forked from ROCm/pytorch
-
Notifications
You must be signed in to change notification settings - Fork 0
/
THCGeneral.h.in
120 lines (93 loc) · 4.21 KB
/
THCGeneral.h.in
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#ifndef THC_GENERAL_INC
#define THC_GENERAL_INC
#include <TH/THGeneral.h>
#include "THAllocator.h"
#undef log10
#undef log1p
#undef log2
#undef expm1
#include "cuda.h"
#include "cuda_runtime.h"
#include "cublas_v2.h"
#include "cusparse.h"
#cmakedefine USE_MAGMA
#ifdef __cplusplus
# define THC_EXTERNC extern "C"
#else
# define THC_EXTERNC extern
#endif
#ifdef _WIN32
# if defined(ATen_cuda_EXPORTS) || defined(caffe2_gpu_EXPORTS) || defined(CAFFE2_CUDA_BUILD_MAIN_LIB)
# define THC_API THC_EXTERNC __declspec(dllexport)
# define THC_CLASS __declspec(dllexport)
# else
# define THC_API THC_EXTERNC __declspec(dllimport)
# define THC_CLASS __declspec(dllimport)
# endif
#else
# define THC_API THC_EXTERNC
# define THC_CLASS
#endif
#ifndef THAssert
#define THAssert(exp) \
do { \
if (!(exp)) { \
_THError(__FILE__, __LINE__, "assert(%s) failed", #exp); \
} \
} while(0)
#endif
struct THCRNGState; /* Random number generator state. */
typedef struct CUDAStreamInternals THCStream;
typedef struct THCState THCState;
struct THCState;
typedef THAllocator THCDeviceAllocator;
typedef struct _THCCudaResourcesPerDevice {
/* cuBLAS handle is lazily initialized */
cublasHandle_t blasHandle;
/* cuSparse handle is lazily initialized */
cusparseHandle_t sparseHandle;
/* Size of scratch space per each stream on this device available */
size_t scratchSpacePerStream;
} THCCudaResourcesPerDevice;
THC_API THCState* THCState_alloc(void);
THC_API void THCState_free(THCState* state);
THC_API void THCudaInit(THCState* state);
THC_API void THCudaShutdown(THCState* state);
/* If device `dev` can access allocations on device `devToAccess`, this will return */
/* 1; otherwise, 0. */
THC_API int THCState_getPeerToPeerAccess(THCState* state, int dev, int devToAccess);
THC_API struct cudaDeviceProp* THCState_getCurrentDeviceProperties(THCState* state);
THC_API struct cudaDeviceProp* THCState_getDeviceProperties(THCState* state, int device);
THC_API struct THCRNGState* THCState_getRngState(THCState* state);
THC_API THAllocator* THCState_getCudaHostAllocator(THCState* state);
THC_API void THCMagma_init(THCState *state);
/* State manipulators and accessors */
THC_API int THCState_getNumDevices(THCState* state);
/* Stream API */
THC_API void THCState_setStream(THCState *state, THCStream* stream);
THC_API void THCState_setStreamOnDevice(THCState *state, int device, THCStream *stream);
THC_API cudaStream_t THCState_getCurrentStreamOnDevice(THCState *state, int device);
THC_API cudaStream_t THCState_getCurrentStream(THCState *state);
THC_API THCStream* THCState_getStream(THCState *state);
/* BLAS and sparse handles */
THC_API cublasHandle_t THCState_getCurrentBlasHandle(THCState *state);
THC_API cusparseHandle_t THCState_getCurrentSparseHandle(THCState *state);
/* For the current device and stream, returns the allocated scratch space */
THC_API size_t THCState_getCurrentDeviceScratchSpaceSize(THCState* state);
#define THCAssertSameGPU(expr) if (!expr) THError("arguments are located on different GPUs")
#define THCudaCheck(err) __THCudaCheck(err, __FILE__, __LINE__)
#define THCudaCheckWarn(err) __THCudaCheckWarn(err, __FILE__, __LINE__)
#define THCublasCheck(err) __THCublasCheck(err, __FILE__, __LINE__)
#define THCusparseCheck(err) __THCusparseCheck(err, __FILE__, __LINE__)
THC_API void __THCudaCheck(cudaError_t err, const char *file, const int line);
THC_API void __THCudaCheckWarn(cudaError_t err, const char *file, const int line);
THC_API void __THCublasCheck(cublasStatus_t status, const char *file, const int line);
THC_API void __THCusparseCheck(cusparseStatus_t status, const char *file, const int line);
THC_API void* THCudaMalloc(THCState *state, size_t size);
THC_API void THCudaFree(THCState *state, void* ptr);
#ifdef __cplusplus
at::DataPtr THCudaHostAlloc(THCState *state, size_t size);
#endif
THC_API void THCudaHostRecord(THCState *state, void *ptr);
THC_API cudaError_t THCudaMemGetInfo(THCState *state, size_t* freeBytes, size_t* totalBytes, size_t* largestBlock);
#endif