File tree Expand file tree Collapse file tree 1 file changed +33
-0
lines changed
util/tuner/GPU_Microbenchmark/hw_def Expand file tree Collapse file tree 1 file changed +33
-0
lines changed Original file line number Diff line number Diff line change 1+ // These are the configration parameters that can be found publicly
2+ // Sources:
3+ // https://www.nvidia.com/content/dam/en-zz/Solutions/geforce/ampere/pdf/NVIDIA-ampere-GA102-GPU-Architecture-Whitepaper-V1.pdf
4+ // https://en.wikipedia.org/wiki/GeForce_30_series
5+ // https://en.wikipedia.org/wiki/CUDA
6+
7+ #ifndef AMPERE_A100_80_PCIE_DEF_H
8+ #define AMPERE_A100_80_PCIE_DEF_H
9+
10+ #include "./common/common.h"
11+ #include "./common/deviceQuery.h"
12+
13+ #define L1_SIZE (192 * 1024) // Max L1 size in bytes
14+
15+ #define CLK_FREQUENCY 1410 // frequency in MHz
16+
17+ #define ISSUE_MODEL issue_model::single // single issue core or dual issue
18+ #define CORE_MODEL core_model::subcore // subcore model or shared model
19+ #define DRAM_MODEL dram_model::HBM // memory type
20+ #define WARP_SCHEDS_PER_SM 4 // number of warp schedulers per SM
21+
22+ // number of SASS HMMA per 16x16 PTX WMMA for FP16 - FP32 accumlate operation
23+ // see slide 22 at
24+ // https://developer.download.nvidia.com/video/gputechconf/gtc/2020/presentations/s21730-inside-the-nvidia-ampere-architecture.pdf
25+ #define SASS_hmma_per_PTX_wmma 2
26+
27+ // These vars are almost constant between HW generation
28+ // see slide 24 from Nvidia at
29+ // https://developer.download.nvidia.com/video/gputechconf/gtc/2020/presentations/s21730-inside-the-nvidia-ampere-architecture.pdf
30+ #define L2_BANKS_PER_MEM_CHANNEL 2
31+ #define L2_BANK_WIDTH_in_BYTE 32
32+
33+ #endif
You can’t perform that action at this time.
0 commit comments