@@ -5,30 +5,69 @@ benchmarks:
55 xlml_metrics_dir : " ../microbenchmarks/gemm_all_reduce"
66 xla_dump_dir : " ../microbenchmarks/gemm_all_reduce/hlo_graphs"
77 num_runs : 10
8- benchmark_params :
9- - m : 4096
10- k : 4096
11- n : 4096
12- dtype : " bfloat16"
8+ benchmark_sweep_params :
9+ - { m: 4096, k: 4096, n: 4096, dtype: "bfloat16"}
10+ - {m: 8192, k: 8192, n: 8192, dtype: "bfloat16"}
11+ - {m: 16384, k: 16384, n: 16384, dtype: "bfloat16"}
12+ - {m: 32768, k: 32768, n: 32768, dtype: "bfloat16"}
1313 - benchmark_name : all_reduce_only
1414 trace_dir : " ../microbenchmarks/gemm_all_reduce"
1515 csv_path : " ../microbenchmarks/gemm_all_reduce"
1616 xlml_metrics_dir : " ../microbenchmarks/gemm_all_reduce"
1717 xla_dump_dir : " ../microbenchmarks/gemm_all_reduce/hlo_graphs"
1818 num_runs : 10
19- benchmark_params :
20- - m : 4096
21- k : 4096 # Passed to maintain signature, though not used for shape of C
22- n : 4096
23- dtype : " bfloat16"
19+ benchmark_sweep_params :
20+ - { m: 4096, k: 4096, n: 4096, dtype: "bfloat16"}
21+ - {m: 8192, k: 8192, n: 8192, dtype: "bfloat16"}
22+ - {m: 16384, k: 16384, n: 16384, dtype: "bfloat16"}
23+ - {m: 32768, k: 32768, n: 32768, dtype: "bfloat16"}
2424 - benchmark_name : gemm_all_reduce
2525 trace_dir : " ../microbenchmarks/gemm_all_reduce"
2626 csv_path : " ../microbenchmarks/gemm_all_reduce"
2727 xlml_metrics_dir : " ../microbenchmarks/gemm_all_reduce"
2828 xla_dump_dir : " ../microbenchmarks/gemm_all_reduce/hlo_graphs"
2929 num_runs : 10
30- benchmark_params :
31- - m : 4096
32- k : 4096
33- n : 4096
34- dtype : " bfloat16"
30+ benchmark_sweep_params :
31+ - {m: 4096, k: 4096, n: 4096, dtype: "bfloat16"}
32+ - {m: 8192, k: 8192, n: 8192, dtype: "bfloat16"}
33+ - {m: 16384, k: 16384, n: 16384, dtype: "bfloat16"}
34+ - {m: 32768, k: 32768, n: 32768, dtype: "bfloat16"}
35+ - benchmark_name : gemm_reducescatter_allgather
36+ trace_dir : " ../microbenchmarks/gemm_all_reduce"
37+ csv_path : " ../microbenchmarks/gemm_all_reduce"
38+ xlml_metrics_dir : " ../microbenchmarks/gemm_all_reduce"
39+ xla_dump_dir : " ../microbenchmarks/gemm_all_reduce/hlo_graphs"
40+ num_runs : 10
41+ benchmark_sweep_params :
42+ - {m: 4096, k: 4096, n: 4096, dtype: "bfloat16"}
43+ - {m: 8192, k: 8192, n: 8192, dtype: "bfloat16"}
44+ - {m: 16384, k: 16384, n: 16384, dtype: "bfloat16"}
45+ - {m: 32768, k: 32768, n: 32768, dtype: "bfloat16"}
46+ - benchmark_name : gemm_sharded_all_gather
47+ trace_dir : " ../microbenchmarks/gemm_all_reduce"
48+ csv_path : " ../microbenchmarks/gemm_all_reduce"
49+ xlml_metrics_dir : " ../microbenchmarks/gemm_all_reduce"
50+ xla_dump_dir : " ../microbenchmarks/gemm_all_reduce/hlo_graphs"
51+ num_runs : 10
52+ benchmark_sweep_params :
53+ - {m: 4096, k: 4096, n: 4096, dtype: "bfloat16"}
54+ - {m: 8192, k: 8192, n: 8192, dtype: "bfloat16"}
55+ - {m: 16384, k: 16384, n: 16384, dtype: "bfloat16"}
56+ - {m: 32768, k: 32768, n: 32768, dtype: "bfloat16"}
57+ - {m: 65536, k: 65536, n: 65536, dtype: "bfloat16"}
58+ - {m: 131072, k: 131072, n: 131072, dtype: "bfloat16"}
59+ - {m: 262144, k: 262144, n: 262144, dtype: "bfloat16"}
60+ - {m: 524288, k: 524288, n: 524288, dtype: "bfloat16"}
61+ - benchmark_name : gemm_k_sharded_all_reduce
62+ trace_dir : " ../microbenchmarks/gemm_all_reduce"
63+ csv_path : " ../microbenchmarks/gemm_all_reduce"
64+ xlml_metrics_dir : " ../microbenchmarks/gemm_all_reduce"
65+ xla_dump_dir : " ../microbenchmarks/gemm_all_reduce/hlo_graphs"
66+ num_runs : 10
67+ benchmark_sweep_params :
68+ - {m: 4096, k: 4096, n: 4096, dtype: "bfloat16"}
69+ - {m: 8192, k: 8192, n: 8192, dtype: "bfloat16"}
70+ - {m: 16384, k: 16384, n: 16384, dtype: "bfloat16"}
71+ - {m: 32768, k: 32768, n: 32768, dtype: "bfloat16"}
72+ - {m: 65536, k: 65536, n: 65536, dtype: "bfloat16"}
73+ - {m: 131072, k: 131072, n: 131072, dtype: "bfloat16"}
0 commit comments