@@ -31,39 +31,41 @@ nvidia/Llama-3.1-405B-Instruct-FP4
3131
3232#### Llama 3.3 70B FP4
3333
34- | | GPU | B200 | | | |
35- | :------------------------| :--------| :----------| :----------| :----------| :----------|
36- | | TP Size | 1 | 2 | 4 | 8 |
37- | ISL, OSL | | | | | |
38- | | | | | | |
39- | 128, 128 | | 10,994.48 | 17,542.11 | 24,667.31 | 27,272.27 |
40- | 128, 2048 | | 9,580.46 | 15,432.35 | 23,568.12 | 31,174.31 |
41- | 128, 4096 | | 6,418.39 | 9,841.53 | 17,808.76 | 25,229.25 |
42- | 500, 2000 | | 7,343.32 | 11,850.57 | 20,709.67 | 28,038.78 |
43- | 1000, 1000 | | 6,752.53 | 10,815.88 | 16,413.04 | 20,060.66 |
44- | 1000, 2000 | | 6,670.07 | 9,830.73 | 15,597.49 | 20,672.37 |
45- | 1024, 2048 | | 6,636.75 | 9,807.13 | 15,519.23 | 20,617.28 |
46- | 2048, 128 | | 1,342.17 | 1,989.41 | 3,033.14 | 4,035.64 |
47- | 5000, 500 | | 1,429.67 | 2,419.67 | 3,686.84 | 5,182.96 |
48- | 20000, 2000 | | 629.77 | 1,177.01 | 2,120.66 | 3,429.03 |
34+ | | GPU: | B200 | GB200 180GB |
35+ | :-----------------------------| :---| :----------| :--------------|
36+ | | TP Size | 1 | 1 |
37+ | ISL, OSL | | | |
38+ | | | | |
39+ | 128, 128 | | 10,613.84 | 11,100.97 |
40+ | 128, 2048 | | 9,445.51 | 10,276.05 |
41+ | 128, 4096 | | 6,276.85 | 7,351.12 |
42+ | 500, 2000 | | 6,983.27 | 8,194.30 |
43+ | 1000, 1000 | | 6,434.29 | 7,401.80 |
44+ | 1000, 2000 | | 6,725.03 | 6,478.72 |
45+ | 1024, 2048 | | 6,546.61 | 7,922.88 |
46+ | 2048, 128 | | 1,330.35 | 1,418.47 |
47+ | 2048, 2048 | | 4,528.48 | 5,326.77 |
48+ | 5000, 500 | | 1,427.44 | 1,502.44 |
49+ | 20000, 2000 | | 636.36 | 732.43 |
4950
5051#### Llama 3.1 405B FP4
5152
52- | | GPU | B200 | |
53- | :------------------------| :------- | :---------| :----------|
54- | | TP Size | 4 | 8 |
55- | ISL, OSL | | | |
56- | | | | |
57- | 128, 128 | | 6,163.81 | 9,002.90 |
58- | 128, 2048 | | 7,081.21 | 10,288.28 |
59- | 128, 4096 | | 6,028.37 | 8,713.77 |
60- | 500, 2000 | | 5,858.75 | 9,125.86 |
61- | 1000, 1000 | | 4,848.00 | 7,582.97 |
62- | 1000, 2000 | | 5,375.25 | 7,626.28 |
63- | 1024, 2048 | | 5,345.70 | 7,464.03 |
64- | 2048, 128 | | 693.55 | 1,086.56 |
65- | 5000, 500 | | 947.49 | 1,532.45 |
66- | 20000, 2000 | | 641.11 | 1,097.84 |
53+ | | GPU: | B200 | GB200 180GB |
54+ | :-----------------------------| :---| :---------| :--------------|
55+ | | TP Size | 4 | 4 |
56+ | ISL, OSL | | | |
57+ | | | | |
58+ | 128, 128 | | 6,218.89 | 6,598.97 |
59+ | 128, 2048 | | 7,178.10 | 7,497.40 |
60+ | 128, 4096 | | 5,890.89 | 5,898.19 |
61+ | 500, 2000 | | 5,844.37 | 6,198.33 |
62+ | 1000, 1000 | | 4,958.53 | 5,243.35 |
63+ | 1000, 2000 | | 4,874.16 | 4,905.51 |
64+ | 1024, 2048 | | 4,833.19 | 4,686.38 |
65+ | 2048, 128 | | 737.95 | 761.58 |
66+ | 2048, 2048 | | 4,024.02 | 4,326.56 |
67+ | 5000, 500 | | 1,032.40 | 1,078.87 |
68+ | 20000, 2000 | | 667.39 | 649.95 |
6769
6870### FP8 Models:
6971```
@@ -75,74 +77,62 @@ nvidia/Llama-4-Maverick-17B-128E-Instruct-FP8
7577
7678#### Llama 3.1 8B FP8
7779
78- | | GPU | H200 141GB HBM3 | H100 80GB HBM3 |
79- | :-----------------------------| :---| :------------------| :-----------------|
80- | | TP Size | 1 | 1 |
81- | ISL, OSL | | | |
82- | | | | |
83- | 128, 128 | | 27,970.14 | 27,688.36 |
84- | 128, 2048 | | 23,326.38 | 21,841.15 |
85- | 128, 4096 | | 17,508.51 | 13,730.89 |
86- | 500, 2000 | | 21,390.41 | 17,833.34 |
87- | 1000, 1000 | | 17,366.89 | 15,270.62 |
88- | 1000, 2000 | | 16,831.31 | 13,798.08 |
89- | 1024, 2048 | | 16,737.03 | 13,385.50 |
90- | 2048, 128 | | 3,488.03 | 3,414.67 |
91- | 5000, 500 | | 3,813.69 | 3,394.54 |
92- | 20000, 2000 | | 1,696.66 | 1,345.42 |
80+ | | GPU: | GH200 480GB | H100 80GB HBM3 | H200 141GB HBM3 |
81+ | :-----------------------------| :---| :--------------| :-----------------| :------------------|
82+ | | TP Size | 1 | 1 | 1 |
83+ | ISL, OSL | | | | |
84+ | | | | | |
85+ | 128, 128 | | 27,304.25 | 26,401.48 | 27,027.80 |
86+ | 128, 2048 | | 24,045.60 | 21,413.21 | 23,102.25 |
87+ | 128, 4096 | | 15,409.85 | 13,541.54 | 17,396.83 |
88+ | 500, 2000 | | 20,123.88 | 17,571.01 | 19,759.16 |
89+ | 1000, 1000 | | 16,352.99 | 14,991.62 | 17,162.49 |
90+ | 1000, 2000 | | 15,705.82 | 13,505.23 | 16,227.11 |
91+ | 1024, 2048 | | 16,102.52 | 13,165.91 | 16,057.66 |
92+ | 2048, 128 | | 3,573.85 | 3,275.55 | 3,390.69 |
93+ | 2048, 2048 | | 10,767.05 | 9,462.43 | 11,822.14 |
94+ | 5000, 500 | | 3,584.74 | 3,276.47 | 3,758.08 |
95+ | 20000, 2000 | | 1,393.31 | 1,340.69 | 1,705.68 |
9396
9497#### Llama 3.3 70B FP8
9598
96- | | GPU | H200 141GB HBM3 | | | | H100 80GB HBM3 | | | |
97- | :-----------------------------| :---| :------------------ | :--------- | :---------- | :---------- | :----------------- | :--------- | :---------- | : ----------|
98- | | TP Size | 1 | 2 | 4 | 8 | 1 | 2 | 4 | 8 |
99- | ISL, OSL | | | | | | | | | |
100- | | | | | | | | | | |
101- | 128, 128 | | 3,605.47 | 6,427.69 | 10,407.42 | 15,434.37 | 3,128.33 | 6,216.91 | | |
102- | 128, 2048 | | 4,315.80 | 8,464.03 | 13,508.59 | 20,759.72 | 756.42 | 5,782.57 | 11,464.94 | 17,424.32 |
103- | 128, 4096 | | 2,701.17 | 5,573.55 | 11,458.56 | 16,668.75 | | 3,868.37 | 8,206.39 | 12,624.61 |
104- | 500, 2000 | | 3,478.76 | 6,740.06 | 12,200.18 | | | 4,684.06 | 9,903.53 | 14,553.93 |
105- | 1000, 1000 | | 2,744.32 | 5,119.72 | 8,685.44 | 12,744.51 | 742.14 | 4,247.19 | 7,435.65 | 11,018.81 |
106- | 1000, 2000 | | 2,896.44 | 5,847.26 | 9,031.21 | 13,141.17 | 533.74 | 3,866.53 | 7,611.12 | 11,139.22 |
107- | 1024, 2048 | | 2,874.18 | 5,568.61 | 8,946.71 | 13,082.62 | 530.16 | 3,796.68 | 7,575.24 | 11,004.31 |
108- | 2048, 128 | | 435.90 | 772.67 | 1,264.76 | | | 736.89 | 1,213.33 | 1,839.22 |
109- | 2048, 2048 | | | | | 10,412.85 | | | | |
110- | 5000, 500 | | 545.96 | 997.15 | 1,698.22 | 2,655. 28 | 204.94 | 862.91 | 1,552.68 | 2,369.84 |
111- | 20000, 2000 | | 276.66 | 620.33 | 1,161.29 | 1,985. 85 | | 416.13 | 903.66 | 1,554.10 |
99+ | | GPU: | H100 80GB HBM3 | H200 141GB HBM3 |
100+ | :-----------------------------| :---| :-----------------| :------------------|
101+ | | TP Size | 2 | 2 |
102+ | ISL, OSL | | | |
103+ | | | | |
104+ | 128, 128 | | 6,092.28 | 6,327.98 |
105+ | 128, 2048 | | 5,892.94 | 7,467.36 |
106+ | 128, 4096 | | 3,828.46 | 5,526.42 |
107+ | 500, 2000 | | 4,654.74 | 6,639.15 |
108+ | 1000, 1000 | | 4,181.06 | 4,773.33 |
109+ | 1000, 2000 | | 3,708.93 | 5,790.36 |
110+ | 1024, 2048 | | 3,785.04 | 5,480.44 |
111+ | 2048, 128 | | 723.40 | 747.55 |
112+ | 2048, 2048 | | 2,785.53 | 3,775.80 |
113+ | 5000, 500 | | 865.55 | 978. 28 |
114+ | 20000, 2000 | | 411. 85 | 609.42 |
112115
113116#### Llama 3.1 405B FP8
114-
115- | | GPU | H200 141GB HBM3 | H100 80GB HBM3 |
116- | :-----------------------------| :---| :------------------| :-----------------|
117- | | TP Size | 8 | 8 |
118- | ISL, OSL | | | |
119- | | | | |
120- | 128, 2048 | | 5,567.87 | |
121- | 128, 4096 | | 5,136.85 | |
122- | 500, 2000 | | 4,787.61 | 3,673.91 |
123- | 1000, 1000 | | 3,286.30 | 3,012.22 |
124- | 1000, 2000 | | 3,636.76 | 3,262.20 |
125- | 1024, 2048 | | 3,618.66 | 3,109.70 |
126- | 2048, 128 | | 443.10 | 449.02 |
127- | 5000, 500 | | 645.46 | |
128- | 20000, 2000 | | | 372.12 |
117+ | | GPU: | H100 80GB HBM3 | H200 141GB HBM3 |
118+ | :-----------------------------| :---| :-----------------| :------------------|
119+ | | TP Size | 8 | 8 |
120+ | Runtime Input/Output Lengths | | | |
121+ | | | | |
122+ | 128, 128 | | | 3,705.18 |
123+ | 128, 2048 | | 4,517.39 | 4,715.13 |
124+ | 128, 4096 | | 2,910.31 | 4,475.91 |
125+ | 500, 2000 | | 3,664.62 | 4,804.10 |
126+ | 1000, 1000 | | 2,955.50 | 3,208.25 |
127+ | 1000, 2000 | | 2,884.69 | 3,630.29 |
128+ | 1024, 2048 | | 3,237.41 | 3,609.50 |
129+ | 2048, 128 | | 433.47 | 441.35 |
130+ | 2048, 2048 | | 2,216.55 | 2,840.86 |
131+ | 5000, 500 | | 579.05 | 645.26 |
132+ | 20000, 2000 | | 363.27 | 509.87 |
129133
130134#### Llama 4 Maverick FP8
131-
132- | | GPU | H200 141GB HBM3 | H100 80GB HBM3 |
133- | :-----------------------------| :---| :------------------| :-----------------|
134- | | TP Size | 8 | 8 |
135- | ISL, OSL | | | |
136- | | | | |
137- | 128, 2048 | | 27,543.87 | |
138- | 128, 4096 | | 18,541.01 | 11,163.12 |
139- | 500, 2000 | | 21,117.34 | |
140- | 1000, 2000 | | | 10,556.00 |
141- | 1024, 2048 | | 16,859.45 | 11,584.33 |
142- | 2048, 128 | | 4,364.06 | 3,832.38 |
143- | 2048, 2048 | | 12,800.89 | |
144- | 5000, 500 | | 5,128.60 | |
145- | 20000, 2000 | | 1,764.27 | 1,400.79 |
135+ TODO
146136
147137## Reproducing Benchmarked Results
148138
0 commit comments