Skip to content

Commit

Permalink
Rebase to CTK 12.5
Browse files Browse the repository at this point in the history
  • Loading branch information
vzhurba01 committed May 21, 2024
1 parent 2be0aac commit 6044e4e
Show file tree
Hide file tree
Showing 62 changed files with 4,624 additions and 482 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ There're differences in each of these options that are described further in [Ins
CUDA Python is supported on all platforms that CUDA is supported. Specific dependencies are as follows:

* Driver: Linux (450.80.02 or later) Windows (456.38 or later)
* CUDA Toolkit 12.0 to 12.4
* CUDA Toolkit 12.0 to 12.5

Only the NVRTC redistributable component is required from the CUDA Toolkit. [CUDA Toolkit Documentation](https://docs.nvidia.com/cuda/index.html) Installation Guides can be used for guidance. Note that the NVRTC component in the Toolkit can be obtained via PYPI, Conda or Local Installer.

Expand Down
30 changes: 30 additions & 0 deletions cuda/_cuda/ccuda.pxd.in
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@ cdef CUresult _cuCtxCreate_v2(CUcontext* pctx, unsigned int flags, CUdevice dev)
cdef CUresult _cuCtxCreate_v3(CUcontext* pctx, CUexecAffinityParam* paramsArray, int numParams, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

{{if 'cuCtxCreate_v4' in found_functions}}

cdef CUresult _cuCtxCreate_v4(CUcontext* pctx, CUctxCreateParams* ctxCreateParams, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

{{if 'cuCtxDestroy_v2' in found_functions}}

cdef CUresult _cuCtxDestroy_v2(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil
Expand Down Expand Up @@ -237,6 +242,16 @@ cdef CUresult _cuCtxResetPersistingL2Cache() except ?CUDA_ERROR_NOT_FOUND nogil
cdef CUresult _cuCtxGetExecAffinity(CUexecAffinityParam* pExecAffinity, CUexecAffinityType typename) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

{{if 'cuCtxRecordEvent' in found_functions}}

cdef CUresult _cuCtxRecordEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

{{if 'cuCtxWaitEvent' in found_functions}}

cdef CUresult _cuCtxWaitEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

{{if 'cuCtxAttach' in found_functions}}

cdef CUresult _cuCtxAttach(CUcontext* pctx, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
Expand Down Expand Up @@ -382,6 +397,11 @@ cdef CUresult _cuLibraryGetModule(CUmodule* pMod, CUlibrary library) except ?CUD
cdef CUresult _cuKernelGetFunction(CUfunction* pFunc, CUkernel kernel) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

{{if 'cuKernelGetLibrary' in found_functions}}

cdef CUresult _cuKernelGetLibrary(CUlibrary* pLib, CUkernel kernel) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

{{if 'cuLibraryGetGlobal' in found_functions}}

cdef CUresult _cuLibraryGetGlobal(CUdeviceptr* dptr, size_t* numbytes, CUlibrary library, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil
Expand Down Expand Up @@ -1027,6 +1047,11 @@ cdef CUresult _cuStreamGetId(CUstream hStream, unsigned long long* streamId) exc
cdef CUresult _cuStreamGetCtx(CUstream hStream, CUcontext* pctx) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

{{if 'cuStreamGetCtx_v2' in found_functions}}

cdef CUresult _cuStreamGetCtx_v2(CUstream hStream, CUcontext* pCtx, CUgreenCtx* pGreenCtx) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

{{if 'cuStreamWaitEvent' in found_functions}}

cdef CUresult _cuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
Expand Down Expand Up @@ -2152,6 +2177,11 @@ cdef CUresult _cuGreenCtxWaitEvent(CUgreenCtx hCtx, CUevent hEvent) except ?CUDA
cdef CUresult _cuStreamGetGreenCtx(CUstream hStream, CUgreenCtx* phCtx) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

{{if 'cuGreenCtxStreamCreate' in found_functions}}

cdef CUresult _cuGreenCtxStreamCreate(CUstream* phStream, CUgreenCtx greenCtx, unsigned int flags, int priority) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

{{if 'cuProfilerStart' in found_functions}}

cdef CUresult _cuProfilerStart() except ?CUDA_ERROR_NOT_FOUND nogil
Expand Down
183 changes: 183 additions & 0 deletions cuda/_cuda/ccuda.pyx.in

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions cuda/_lib/ccudart/ccudart.pxd.in
Original file line number Diff line number Diff line change
Expand Up @@ -308,3 +308,4 @@ from libcpp cimport bool
{{if True}}cdef cudaError_t _getLocalRuntimeVersion(int* runtimeVersion) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
{{if 'cudaDeviceRegisterAsyncNotification' in found_functions}}cdef cudaError_t _cudaDeviceRegisterAsyncNotification(int device, cudaAsyncCallback callbackFunc, void* userData, cudaAsyncCallbackHandle_t* callback) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
{{if 'cudaDeviceUnregisterAsyncNotification' in found_functions}}cdef cudaError_t _cudaDeviceUnregisterAsyncNotification(int device, cudaAsyncCallbackHandle_t callback) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
{{if 'cudaGetDriverEntryPointByVersion' in found_functions}}cdef cudaError_t _cudaGetDriverEntryPointByVersion(const char* symbol, void** funcPtr, unsigned int cudaVersion, unsigned long long flags, cudaDriverEntryPointQueryResult* driverStatus) except ?cudaErrorCallRequiresNewerDriver nogil{{endif}}
11 changes: 11 additions & 0 deletions cuda/_lib/ccudart/ccudart.pyx.in
Original file line number Diff line number Diff line change
Expand Up @@ -4916,4 +4916,15 @@ cdef cudaError_t _cudaDeviceUnregisterAsyncNotification(int device, cudaAsyncCal
if err != cudaSuccess:
_setLastError(err)
return err

{{endif}}
{{if 'cudaGetDriverEntryPointByVersion' in found_functions}}

cdef cudaError_t _cudaGetDriverEntryPointByVersion(const char* symbol, void** funcPtr, unsigned int cudaVersion, unsigned long long flags, cudaDriverEntryPointQueryResult* driverStatus) except ?cudaErrorCallRequiresNewerDriver nogil:
cdef cudaError_t err = cudaSuccess
err = <cudaError_t>ccuda._cuGetProcAddress_v2(symbol, funcPtr, cudaVersion, flags, <ccuda.CUdriverProcAddressQueryResult*>driverStatus)
if err != cudaSuccess:
_setLastError(err)
return err

{{endif}}
2 changes: 2 additions & 0 deletions cuda/_lib/utils.pyx.in
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,8 @@ cdef class HelperInputVoidPtr:
elif isinstance(ptr, (int)):
# Easy run, user gave us an already configured void** address
self._cptr = <void*><void_ptr>ptr
elif isinstance(ptr, (cuda.CUdeviceptr)):
self._cptr = <void*><void_ptr>int(ptr)
elif PyObject_CheckBuffer(ptr):
# Easy run, get address from Python Buffer Protocol
err_buffer = PyObject_GetBuffer(ptr, &self._pybuffer, PyBUF_SIMPLE | PyBUF_ANY_CONTIGUOUS)
Expand Down
100 changes: 93 additions & 7 deletions cuda/ccuda.pxd.in
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ cdef extern from "cuda.h":
pass
ctypedef CUasyncCallbackEntry_st* CUasyncCallbackHandle

cdef struct CUgreenCtx_st:
pass
ctypedef CUgreenCtx_st* CUgreenCtx

cdef struct CUuuid_st:
char bytes[16]

Expand Down Expand Up @@ -359,7 +363,20 @@ cdef extern from "cuda.h":
CU_AD_FORMAT_BC6H_SF16 = 156
CU_AD_FORMAT_BC7_UNORM = 157
CU_AD_FORMAT_BC7_UNORM_SRGB = 158
CU_AD_FORMAT_P010 = 159
CU_AD_FORMAT_P016 = 161
CU_AD_FORMAT_NV16 = 162
CU_AD_FORMAT_P210 = 163
CU_AD_FORMAT_P216 = 164
CU_AD_FORMAT_YUY2 = 165
CU_AD_FORMAT_Y210 = 166
CU_AD_FORMAT_Y216 = 167
CU_AD_FORMAT_AYUV = 168
CU_AD_FORMAT_Y410 = 169
CU_AD_FORMAT_NV12 = 176
CU_AD_FORMAT_Y416 = 177
CU_AD_FORMAT_Y444_PLANAR8 = 178
CU_AD_FORMAT_Y444_PLANAR10 = 179
CU_AD_FORMAT_UNORM_INT8X1 = 192
CU_AD_FORMAT_UNORM_INT8X2 = 193
CU_AD_FORMAT_UNORM_INT8X4 = 194
Expand All @@ -372,6 +389,7 @@ cdef extern from "cuda.h":
CU_AD_FORMAT_SNORM_INT16X1 = 201
CU_AD_FORMAT_SNORM_INT16X2 = 202
CU_AD_FORMAT_SNORM_INT16X4 = 203
CU_AD_FORMAT_MAX = 2147483647

ctypedef CUarray_format_enum CUarray_format

Expand Down Expand Up @@ -530,7 +548,8 @@ cdef extern from "cuda.h":
CU_DEVICE_ATTRIBUTE_MULTICAST_SUPPORTED = 132
CU_DEVICE_ATTRIBUTE_MPS_ENABLED = 133
CU_DEVICE_ATTRIBUTE_HOST_NUMA_ID = 134
CU_DEVICE_ATTRIBUTE_MAX = 135
CU_DEVICE_ATTRIBUTE_D3D12_CIG_SUPPORTED = 135
CU_DEVICE_ATTRIBUTE_MAX = 136

ctypedef CUdevice_attribute_enum CUdevice_attribute

Expand Down Expand Up @@ -778,7 +797,10 @@ cdef extern from "cuda.h":
CU_LIMIT_DEV_RUNTIME_PENDING_LAUNCH_COUNT = 4
CU_LIMIT_MAX_L2_FETCH_GRANULARITY = 5
CU_LIMIT_PERSISTING_L2_CACHE_SIZE = 6
CU_LIMIT_MAX = 7
CU_LIMIT_SHMEM_SIZE = 7
CU_LIMIT_CIG_ENABLED = 8
CU_LIMIT_CIG_SHMEM_FALLBACK_ENABLED = 9
CU_LIMIT_MAX = 10

ctypedef CUlimit_enum CUlimit

Expand Down Expand Up @@ -998,6 +1020,7 @@ cdef extern from "cuda.h":
CU_LAUNCH_ATTRIBUTE_MEM_SYNC_DOMAIN = 10
CU_LAUNCH_ATTRIBUTE_LAUNCH_COMPLETION_EVENT = 12
CU_LAUNCH_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE = 13
CU_LAUNCH_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 14

ctypedef CUlaunchAttributeID_enum CUlaunchAttributeID

Expand Down Expand Up @@ -1033,6 +1056,7 @@ cdef extern from "cuda.h":
CUlaunchMemSyncDomainMap memSyncDomainMap
CUlaunchMemSyncDomain memSyncDomain
anon_struct4 deviceUpdatableKernelNode
unsigned int sharedMemCarveout

ctypedef CUlaunchAttributeValue_union CUlaunchAttributeValue

Expand Down Expand Up @@ -1120,6 +1144,24 @@ cdef extern from "cuda.h":

ctypedef CUexecAffinityParam_v1 CUexecAffinityParam

cdef enum CUcigDataType_enum:
CIG_DATA_TYPE_D3D12_COMMAND_QUEUE = 1

ctypedef CUcigDataType_enum CUcigDataType

cdef struct CUctxCigParam_st:
CUcigDataType sharedDataType
void* sharedData

ctypedef CUctxCigParam_st CUctxCigParam

cdef struct CUctxCreateParams_st:
CUexecAffinityParam* execAffinityParams
int numExecAffinityParams
CUctxCigParam* cigParams

ctypedef CUctxCreateParams_st CUctxCreateParams

cdef enum CUlibraryOption_enum:
CU_LIBRARY_HOST_UNIVERSAL_FUNCTION_AND_DATA_TABLE = 0
CU_LIBRARY_BINARY_IS_PRESERVED = 1
Expand Down Expand Up @@ -2123,13 +2165,19 @@ cdef extern from "cuda.h":
CU_COREDUMP_ENABLE_USER_TRIGGER = 4
CU_COREDUMP_FILE = 5
CU_COREDUMP_PIPE = 6
CU_COREDUMP_MAX = 7
CU_COREDUMP_GENERATION_FLAGS = 7
CU_COREDUMP_MAX = 8

ctypedef CUcoredumpSettings_enum CUcoredumpSettings

cdef struct CUgreenCtx_st:
pass
ctypedef CUgreenCtx_st* CUgreenCtx
cdef enum CUCoredumpGenerationFlags:
CU_COREDUMP_DEFAULT_FLAGS = 0
CU_COREDUMP_SKIP_NONRELOCATED_ELF_IMAGES = 1
CU_COREDUMP_SKIP_GLOBAL_MEMORY = 2
CU_COREDUMP_SKIP_SHARED_MEMORY = 4
CU_COREDUMP_SKIP_LOCAL_MEMORY = 8
CU_COREDUMP_LIGHTWEIGHT_FLAGS = 15
CU_COREDUMP_SKIP_ABORT = 16

cdef struct CUdevResourceDesc_st:
pass
Expand All @@ -2138,6 +2186,10 @@ cdef extern from "cuda.h":
ctypedef enum CUgreenCtxCreate_flags:
CU_GREEN_CTX_DEFAULT_STREAM = 1

ctypedef enum CUdevSmResourceSplit_flags:
CU_DEV_SM_RESOURCE_SPLIT_IGNORE_SM_COSCHEDULING = 1
CU_DEV_SM_RESOURCE_SPLIT_MAX_POTENTIAL_CLUSTER_SIZE = 2

ctypedef enum CUdevResourceType:
CU_DEV_RESOURCE_TYPE_INVALID = 0
CU_DEV_RESOURCE_TYPE_SM = 1
Expand Down Expand Up @@ -2504,6 +2556,11 @@ cdef CUresult cuCtxCreate(CUcontext* pctx, unsigned int flags, CUdevice dev) exc
cdef CUresult cuCtxCreate_v3(CUcontext* pctx, CUexecAffinityParam* paramsArray, int numParams, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

{{if 'cuCtxCreate_v4' in found_functions}}

cdef CUresult cuCtxCreate_v4(CUcontext* pctx, CUctxCreateParams* ctxCreateParams, unsigned int flags, CUdevice dev) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

{{if 'cuCtxDestroy_v2' in found_functions}}

cdef CUresult cuCtxDestroy(CUcontext ctx) except ?CUDA_ERROR_NOT_FOUND nogil
Expand Down Expand Up @@ -2594,6 +2651,16 @@ cdef CUresult cuCtxResetPersistingL2Cache() except ?CUDA_ERROR_NOT_FOUND nogil
cdef CUresult cuCtxGetExecAffinity(CUexecAffinityParam* pExecAffinity, CUexecAffinityType typename) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

{{if 'cuCtxRecordEvent' in found_functions}}

cdef CUresult cuCtxRecordEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

{{if 'cuCtxWaitEvent' in found_functions}}

cdef CUresult cuCtxWaitEvent(CUcontext hCtx, CUevent hEvent) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

{{if 'cuCtxAttach' in found_functions}}

cdef CUresult cuCtxAttach(CUcontext* pctx, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
Expand Down Expand Up @@ -2739,6 +2806,11 @@ cdef CUresult cuLibraryGetModule(CUmodule* pMod, CUlibrary library) except ?CUDA
cdef CUresult cuKernelGetFunction(CUfunction* pFunc, CUkernel kernel) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

{{if 'cuKernelGetLibrary' in found_functions}}

cdef CUresult cuKernelGetLibrary(CUlibrary* pLib, CUkernel kernel) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

{{if 'cuLibraryGetGlobal' in found_functions}}

cdef CUresult cuLibraryGetGlobal(CUdeviceptr* dptr, size_t* numbytes, CUlibrary library, const char* name) except ?CUDA_ERROR_NOT_FOUND nogil
Expand Down Expand Up @@ -3384,6 +3456,11 @@ cdef CUresult cuStreamGetId(CUstream hStream, unsigned long long* streamId) exce
cdef CUresult cuStreamGetCtx(CUstream hStream, CUcontext* pctx) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

{{if 'cuStreamGetCtx_v2' in found_functions}}

cdef CUresult cuStreamGetCtx_v2(CUstream hStream, CUcontext* pCtx, CUgreenCtx* pGreenCtx) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

{{if 'cuStreamWaitEvent' in found_functions}}

cdef CUresult cuStreamWaitEvent(CUstream hStream, CUevent hEvent, unsigned int Flags) except ?CUDA_ERROR_NOT_FOUND nogil
Expand Down Expand Up @@ -4509,6 +4586,11 @@ cdef CUresult cuGreenCtxWaitEvent(CUgreenCtx hCtx, CUevent hEvent) except ?CUDA_
cdef CUresult cuStreamGetGreenCtx(CUstream hStream, CUgreenCtx* phCtx) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

{{if 'cuGreenCtxStreamCreate' in found_functions}}

cdef CUresult cuGreenCtxStreamCreate(CUstream* phStream, CUgreenCtx greenCtx, unsigned int flags, int priority) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

{{if 'cuProfilerStart' in found_functions}}

cdef CUresult cuProfilerStart() except ?CUDA_ERROR_NOT_FOUND nogil
Expand Down Expand Up @@ -4614,7 +4696,7 @@ cdef CUresult cuGraphicsVDPAURegisterVideoSurface(CUgraphicsResource* pCudaResou
cdef CUresult cuGraphicsVDPAURegisterOutputSurface(CUgraphicsResource* pCudaResource, VdpOutputSurface vdpSurface, unsigned int flags) except ?CUDA_ERROR_NOT_FOUND nogil
{{endif}}

cdef enum: CUDA_VERSION = 12040
cdef enum: CUDA_VERSION = 12050

cdef enum: CU_IPC_HANDLE_SIZE = 64

Expand Down Expand Up @@ -4648,6 +4730,8 @@ cdef enum: CU_KERNEL_NODE_ATTRIBUTE_MEM_SYNC_DOMAIN = 10

cdef enum: CU_KERNEL_NODE_ATTRIBUTE_DEVICE_UPDATABLE_KERNEL_NODE = 13

cdef enum: CU_KERNEL_NODE_ATTRIBUTE_PREFERRED_SHARED_MEMORY_CARVEOUT = 14

cdef enum: CU_STREAM_ATTRIBUTE_ACCESS_POLICY_WINDOW = 1

cdef enum: CU_STREAM_ATTRIBUTE_SYNCHRONIZATION_POLICY = 3
Expand Down Expand Up @@ -4710,6 +4794,8 @@ cdef enum: CUDA_ARRAY3D_SPARSE = 64

cdef enum: CUDA_ARRAY3D_DEFERRED_MAPPING = 128

cdef enum: CUDA_ARRAY3D_VIDEO_ENCODE_DECODE = 256

cdef enum: CU_TRSA_OVERRIDE_FORMAT = 1

cdef enum: CU_TRSF_READ_AS_INTEGER = 1
Expand Down
Loading

0 comments on commit 6044e4e

Please sign in to comment.