diff --git a/docs/bert_guide.md b/docs/bert_guide.md index 94921ebe8..a618a9b4f 100644 --- a/docs/bert_guide.md +++ b/docs/bert_guide.md @@ -1288,15 +1288,15 @@ User can use `export NVIDIA_TF32_OVERRIDE=1` to enforce the program run under TF | | FT Dense (ms) | FT Sparse (ms) | Dense EFF (ms) | Sparse EFF (ms) | FT Sparse Speedup | EFF Sparse Speedup | | :-------------------: | :-----------: | :------------: | :------------: | :-------------: | :---------------: | :----------------: | -| <1, 32> | 0.80 | 0.86 | 1.30 | 1.61 | 0.84 | 0.83 | -| <1, 128> | 0.85 | 0.87 | 1.24 | 1.62 | 0.86 | 0.85 | -| <1, 384> | 1.18 | 1.18 | 1.22 | 1.68 | 0.95 | 0.86 | -| <8, 32> | 0.91 | 0.91 | 1.20 | 1.65 | 0.86 | 0.85 | -| <8, 128> | 1.66 | 1.58 | 1.59 | 1.86 | 1.05 | 0.99 | -| <8, 384> | 4.16 | 3.79 | 2.21 | 2.36 | 1.22 | 1.11 | -| <32, 32> | 1.68 | 1.58 | 1.42 | 1.77 | 1.04 | 0.91 | -| <32, 128> | 4.71 | 3.98 | 2.75 | 2.60 | 1.23 | 1.12 | -| <32, 384> | 14.79 | 12.20 | 7.90 | 7.72 | 1.17 | 1.07 | +| <1, 32> | 1.32 | 1.58 | 1.35 | 1.62 | 0.84 | 0.83 | +| <1, 128> | 1.37 | 1.59 | 1.37 | 1.61 | 0.86 | 0.85 | +| <1, 384> | 1.52 | 1.60 | 1.40 | 1.62 | 0.95 | 0.86 | +| <8, 32> | 1.39 | 1.61 | 1.39 | 1.63 | 0.86 | 0.85 | +| <8, 128> | 1.70 | 1.62 | 1.61 | 1.63 | 1.05 | 0.99 | +| <8, 384> | 3.38 | 2.78 | 2.10 | 1.89 | 1.22 | 1.11 | +| <32, 32> | 1.70 | 1.63 | 1.50 | 1.65 | 1.04 | 0.91 | +| <32, 128> | 3.62 | 2.94 | 2.28 | 2.04 | 1.23 | 1.12 | +| <32, 384> | 10.10 | 8.60 | 5.77 | 5.38 | 1.17 | 1.07 | #### BERT performance on T4 and PyTorch