Commit 5e47a62 1 parent 2260456 commit 5e47a62 Copy full SHA for 5e47a62
File tree 1 file changed +33
-0
lines changed
util/tuner/GPU_Microbenchmark/hw_def
1 file changed +33
-0
lines changed Original file line number Diff line number Diff line change
1
+ // These are the configration parameters that can be found publicly
2
+ // Sources:
3
+ // https://www.nvidia.com/content/dam/en-zz/Solutions/geforce/ampere/pdf/NVIDIA-ampere-GA102-GPU-Architecture-Whitepaper-V1.pdf
4
+ // https://en.wikipedia.org/wiki/GeForce_30_series
5
+ // https://en.wikipedia.org/wiki/CUDA
6
+
7
+ #ifndef AMPERE_A100_80_PCIE_DEF_H
8
+ #define AMPERE_A100_80_PCIE_DEF_H
9
+
10
+ #include "./common/common.h"
11
+ #include "./common/deviceQuery.h"
12
+
13
+ #define L1_SIZE (192 * 1024) // Max L1 size in bytes
14
+
15
+ #define CLK_FREQUENCY 1410 // frequency in MHz
16
+
17
+ #define ISSUE_MODEL issue_model::single // single issue core or dual issue
18
+ #define CORE_MODEL core_model::subcore // subcore model or shared model
19
+ #define DRAM_MODEL dram_model::HBM // memory type
20
+ #define WARP_SCHEDS_PER_SM 4 // number of warp schedulers per SM
21
+
22
+ // number of SASS HMMA per 16x16 PTX WMMA for FP16 - FP32 accumlate operation
23
+ // see slide 22 at
24
+ // https://developer.download.nvidia.com/video/gputechconf/gtc/2020/presentations/s21730-inside-the-nvidia-ampere-architecture.pdf
25
+ #define SASS_hmma_per_PTX_wmma 2
26
+
27
+ // These vars are almost constant between HW generation
28
+ // see slide 24 from Nvidia at
29
+ // https://developer.download.nvidia.com/video/gputechconf/gtc/2020/presentations/s21730-inside-the-nvidia-ampere-architecture.pdf
30
+ #define L2_BANKS_PER_MEM_CHANNEL 2
31
+ #define L2_BANK_WIDTH_in_BYTE 32
32
+
33
+ #endif
You can’t perform that action at this time.
0 commit comments