-
Notifications
You must be signed in to change notification settings - Fork 1
/
deviceQuery.h
61 lines (56 loc) · 3.82 KB
/
deviceQuery.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
//Standard includes
#include <iostream>
//Cuda includes
#include <cuda_runtime_api.h>
//Code modified from the deviceQuery example supplied with the Nvidia CUDA SDK
inline void printDeviceInfo(int dev)
{
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, dev);
std::cout << "Using GPU device: " << dev << " - on a: " << deviceProp.name << std::endl;
#if CUDART_VERSION >= 2020
int driverVersion = 0, runtimeVersion = 0;
cudaDriverGetVersion(&driverVersion);
std::cout << "CUDA Driver Version: " << (driverVersion/1000) << "." << (driverVersion%100) << std::endl;;
cudaRuntimeGetVersion(&runtimeVersion);
std::cout << "CUDA Runtime Version: " << (runtimeVersion/1000) << "." << (runtimeVersion%100) << std::endl;;
#endif
std::cout << "CUDA Capability Major revision number: " << deviceProp.major << std::endl;
std::cout << "CUDA Capability Minor revision number: " << deviceProp.minor << std::endl;
std::cout << "Total amount of global memory: " << (deviceProp.totalGlobalMem/1048576) << " MB" << std::endl;
#if CUDART_VERSION >= 2000
std::cout << "Number of multiprocessors: " << deviceProp.multiProcessorCount << std::endl;
//std::cout << "Number of cores: " << nGpuArchCoresPerSM[deviceProp.major] * deviceProp.multiProcessorCount << std::endl;
#endif
std::cout << "Total amount of constant memory: " << deviceProp.totalConstMem << " bytes" << std::endl;
std::cout << "Total amount of shared memory per block: " << deviceProp.sharedMemPerBlock << " bytes" << std::endl;
std::cout << "Total number of registers available per block: " << deviceProp.regsPerBlock << std::endl;
std::cout << "Warp size: " << deviceProp.warpSize << std::endl;
std::cout << "Maximum number of threads per block: " << deviceProp.maxThreadsPerBlock << std::endl;
std::cout << "Maximum sizes of each dimension of a block: " << deviceProp.maxThreadsDim[0] << " x " << deviceProp.maxThreadsDim[1] << " x " << deviceProp.maxThreadsDim[2] << std::endl;
std::cout << "Maximum sizes of each dimension of a grid: " << deviceProp.maxGridSize[0] << " x " << deviceProp.maxGridSize[1] << " x " << deviceProp.maxGridSize[2] << std::endl;
std::cout << "Maximum memory pitch: " << (deviceProp.memPitch/1048576) << " MB" << std::endl;
std::cout << "Texture alignment: " << deviceProp.textureAlignment << " bytes" << std::endl;
std::cout << "Clock rate: " << (deviceProp.clockRate * 1e-6f) << " GHz" << std::endl;
#if CUDART_VERSION >= 2000
std::cout << "Concurrent copy and execution: " << (deviceProp.deviceOverlap ? "Yes" : "No") << std::endl;
#endif
#if CUDART_VERSION >= 2020
std::cout << "Run time limit on kernels: " << (deviceProp.kernelExecTimeoutEnabled ? "Yes" : "No") << std::endl;
std::cout << "Integrated: " << (deviceProp.integrated ? "Yes" : "No") << std::endl;
std::cout << "Support host page-locked memory mapping: " << (deviceProp.canMapHostMemory ? "Yes" : "No") << std::endl;
std::cout << "Compute mode: " << (deviceProp.computeMode == cudaComputeModeDefault ?
"Default (multiple host threads can use this device simultaneously)" :
deviceProp.computeMode == cudaComputeModeExclusive ?
"Exclusive (only one host thread at a time can use this device)" :
deviceProp.computeMode == cudaComputeModeProhibited ?
"Prohibited (no host thread can use this device)" :
"Unknown") << std::endl;
#endif
#if CUDART_VERSION >= 3000
std::cout << "Concurrent kernel execution: " << (deviceProp.concurrentKernels ? "Yes" : "No") << std::endl;
#endif
#if CUDART_VERSION >= 3010
std::cout << "Device has ECC support enabled: " << (deviceProp.ECCEnabled ? "Yes" : "No") << std::endl;
#endif
}