accel-sim · MustafaFayez · Apr 5, 2022 · Nov 2, 2022 · Feb 12, 2023
diff --git a/util/tuner/GPU_Microbenchmark/hw_def/ampere_a100_80_pcie_hw_def.h b/util/tuner/GPU_Microbenchmark/hw_def/ampere_a100_80_pcie_hw_def.h
@@ -0,0 +1,33 @@
+// These are the configration parameters that can be found publicly
+// Sources:
+// https://www.nvidia.com/content/dam/en-zz/Solutions/geforce/ampere/pdf/NVIDIA-ampere-GA102-GPU-Architecture-Whitepaper-V1.pdf
+// https://en.wikipedia.org/wiki/GeForce_30_series
+// https://en.wikipedia.org/wiki/CUDA
+
+#ifndef AMPERE_A100_80_PCIE_DEF_H
+#define AMPERE_A100_80_PCIE_DEF_H
+
+#include "./common/common.h"
+#include "./common/deviceQuery.h"
+
+#define L1_SIZE (192 * 1024) // Max L1 size in bytes
+
+#define CLK_FREQUENCY 1410 // frequency in MHz
+
+#define ISSUE_MODEL issue_model::single // single issue core or dual issue
+#define CORE_MODEL core_model::subcore  // subcore model or shared model
+#define DRAM_MODEL dram_model::HBM    // memory type
+#define WARP_SCHEDS_PER_SM 4            // number of warp schedulers per SM
+
+// number of SASS HMMA per 16x16 PTX WMMA for FP16 - FP32 accumlate operation
+// see slide 22 at
+// https://developer.download.nvidia.com/video/gputechconf/gtc/2020/presentations/s21730-inside-the-nvidia-ampere-architecture.pdf
+#define SASS_hmma_per_PTX_wmma 2
+
+// These vars are almost constant between HW generation
+// see slide 24 from Nvidia at
+// https://developer.download.nvidia.com/video/gputechconf/gtc/2020/presentations/s21730-inside-the-nvidia-ampere-architecture.pdf
+#define L2_BANKS_PER_MEM_CHANNEL 2
+#define L2_BANK_WIDTH_in_BYTE 32
+
+#endif