Skip to content

Commit 74d7fa7

Browse files
committed
Optimizing
1 parent 1907768 commit 74d7fa7

File tree

5 files changed

+613
-22
lines changed

5 files changed

+613
-22
lines changed

Ironwood/configs/training/gemm_all_reduce.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ benchmarks:
1111
- {m: 4096, k: 4096, n: 4096, dtype: "bfloat16"}
1212
- {m: 8192, k: 8192, n: 8192, dtype: "bfloat16"}
1313
- {m: 16384, k: 16384, n: 16384, dtype: "bfloat16"}
14+
- {m: 32768, k: 32768, n: 32768, dtype: "bfloat16"}

Ironwood/configs/training/local_separation_test.yaml

Lines changed: 54 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -5,30 +5,69 @@ benchmarks:
55
xlml_metrics_dir: "../microbenchmarks/gemm_all_reduce"
66
xla_dump_dir: "../microbenchmarks/gemm_all_reduce/hlo_graphs"
77
num_runs: 10
8-
benchmark_params:
9-
- m: 4096
10-
k: 4096
11-
n: 4096
12-
dtype: "bfloat16"
8+
benchmark_sweep_params:
9+
- {m: 4096, k: 4096, n: 4096, dtype: "bfloat16"}
10+
- {m: 8192, k: 8192, n: 8192, dtype: "bfloat16"}
11+
- {m: 16384, k: 16384, n: 16384, dtype: "bfloat16"}
12+
- {m: 32768, k: 32768, n: 32768, dtype: "bfloat16"}
1313
- benchmark_name: all_reduce_only
1414
trace_dir: "../microbenchmarks/gemm_all_reduce"
1515
csv_path: "../microbenchmarks/gemm_all_reduce"
1616
xlml_metrics_dir: "../microbenchmarks/gemm_all_reduce"
1717
xla_dump_dir: "../microbenchmarks/gemm_all_reduce/hlo_graphs"
1818
num_runs: 10
19-
benchmark_params:
20-
- m: 4096
21-
k: 4096 # Passed to maintain signature, though not used for shape of C
22-
n: 4096
23-
dtype: "bfloat16"
19+
benchmark_sweep_params:
20+
- {m: 4096, k: 4096, n: 4096, dtype: "bfloat16"}
21+
- {m: 8192, k: 8192, n: 8192, dtype: "bfloat16"}
22+
- {m: 16384, k: 16384, n: 16384, dtype: "bfloat16"}
23+
- {m: 32768, k: 32768, n: 32768, dtype: "bfloat16"}
2424
- benchmark_name: gemm_all_reduce
2525
trace_dir: "../microbenchmarks/gemm_all_reduce"
2626
csv_path: "../microbenchmarks/gemm_all_reduce"
2727
xlml_metrics_dir: "../microbenchmarks/gemm_all_reduce"
2828
xla_dump_dir: "../microbenchmarks/gemm_all_reduce/hlo_graphs"
2929
num_runs: 10
30-
benchmark_params:
31-
- m: 4096
32-
k: 4096
33-
n: 4096
34-
dtype: "bfloat16"
30+
benchmark_sweep_params:
31+
- {m: 4096, k: 4096, n: 4096, dtype: "bfloat16"}
32+
- {m: 8192, k: 8192, n: 8192, dtype: "bfloat16"}
33+
- {m: 16384, k: 16384, n: 16384, dtype: "bfloat16"}
34+
- {m: 32768, k: 32768, n: 32768, dtype: "bfloat16"}
35+
- benchmark_name: gemm_reducescatter_allgather
36+
trace_dir: "../microbenchmarks/gemm_all_reduce"
37+
csv_path: "../microbenchmarks/gemm_all_reduce"
38+
xlml_metrics_dir: "../microbenchmarks/gemm_all_reduce"
39+
xla_dump_dir: "../microbenchmarks/gemm_all_reduce/hlo_graphs"
40+
num_runs: 10
41+
benchmark_sweep_params:
42+
- {m: 4096, k: 4096, n: 4096, dtype: "bfloat16"}
43+
- {m: 8192, k: 8192, n: 8192, dtype: "bfloat16"}
44+
- {m: 16384, k: 16384, n: 16384, dtype: "bfloat16"}
45+
- {m: 32768, k: 32768, n: 32768, dtype: "bfloat16"}
46+
- benchmark_name: gemm_sharded_all_gather
47+
trace_dir: "../microbenchmarks/gemm_all_reduce"
48+
csv_path: "../microbenchmarks/gemm_all_reduce"
49+
xlml_metrics_dir: "../microbenchmarks/gemm_all_reduce"
50+
xla_dump_dir: "../microbenchmarks/gemm_all_reduce/hlo_graphs"
51+
num_runs: 10
52+
benchmark_sweep_params:
53+
- {m: 4096, k: 4096, n: 4096, dtype: "bfloat16"}
54+
- {m: 8192, k: 8192, n: 8192, dtype: "bfloat16"}
55+
- {m: 16384, k: 16384, n: 16384, dtype: "bfloat16"}
56+
- {m: 32768, k: 32768, n: 32768, dtype: "bfloat16"}
57+
- {m: 65536, k: 65536, n: 65536, dtype: "bfloat16"}
58+
- {m: 131072, k: 131072, n: 131072, dtype: "bfloat16"}
59+
- {m: 262144, k: 262144, n: 262144, dtype: "bfloat16"}
60+
- {m: 524288, k: 524288, n: 524288, dtype: "bfloat16"}
61+
- benchmark_name: gemm_k_sharded_all_reduce
62+
trace_dir: "../microbenchmarks/gemm_all_reduce"
63+
csv_path: "../microbenchmarks/gemm_all_reduce"
64+
xlml_metrics_dir: "../microbenchmarks/gemm_all_reduce"
65+
xla_dump_dir: "../microbenchmarks/gemm_all_reduce/hlo_graphs"
66+
num_runs: 10
67+
benchmark_sweep_params:
68+
- {m: 4096, k: 4096, n: 4096, dtype: "bfloat16"}
69+
- {m: 8192, k: 8192, n: 8192, dtype: "bfloat16"}
70+
- {m: 16384, k: 16384, n: 16384, dtype: "bfloat16"}
71+
- {m: 32768, k: 32768, n: 32768, dtype: "bfloat16"}
72+
- {m: 65536, k: 65536, n: 65536, dtype: "bfloat16"}
73+
- {m: 131072, k: 131072, n: 131072, dtype: "bfloat16"}

0 commit comments

Comments
 (0)