JuliaGPU · maleadt · Aug 19, 2023 · Jun 11, 2023 · Jun 12, 2023 · Jun 12, 2023
diff --git a/lib/cudnn/src/convolution.jl b/lib/cudnn/src/convolution.jl
@@ -120,6 +120,28 @@ function cudnnConvolutionForwardAD(w, x, bias, z; y, activation, convDesc, wDesc
     return y
 end
 
+function cudnnGetConvolutionDescriptor(d::cudnnConvolutionDescriptor)
+    # we don't know the dimension of the convolution, so we start by
+    # allocating the maximum size it can be.
+    nbDimsRequested = CUDNN_DIM_MAX - 2
+    # later, here we get the actual dimensionality of the convolution
+    arrlen = Ref{Cint}(nbDimsRequested)
+    padding = Array{Cint}(undef, nbDimsRequested)
+    stride = Array{Cint}(undef, nbDimsRequested)
+    dilation = Array{Cint}(undef, nbDimsRequested)
+    mode = Ref{cuDNN.cudnnConvolutionMode_t}(CUDNN_CONVOLUTION)
+    dataType = Ref{cuDNN.cudnnDataType_t}(cuDNN.CUDNN_DATA_FLOAT)
+
+    cudnnGetConvolutionNdDescriptor(d, nbDimsRequested, arrlen, padding, stride, dilation,
+                                    mode, dataType)
+    T = juliaDataType(dataType[])
+    SZ = arrlen[]
+    P = (padding[1:SZ]..., )
+    S = (stride[1:SZ]..., )
+    D = (dilation[1:SZ]..., )
+    return T, mode[], SZ, P, S, D
+end
+
 # Helper for cudnnConvolutionDescriptor
 function cudnnSetConvolutionDescriptor(
     ptr::cudnnConvolutionDescriptor_t,
@@ -179,9 +201,15 @@ const cudnnConvolutionFwdAlgoPerfCacheLock = ReentrantLock()
 It can be set to false when beta is zero to save an allocation and must otherwise be set to true.
 """
 function cudnnConvolutionFwdAlgoPerf(xDesc, x, wDesc, w, convDesc, yDesc, y, biasDesc, activation, allocateTmpBuf=true)
-    key = (xDesc, wDesc, convDesc, biasDesc, activation)
+    xDesc_native = cudnnGetTensorDescriptor(xDesc)
+    wDesc_native = cudnnGetFilterDescriptor(wDesc)
+    convDesc_native = cudnnGetConvolutionDescriptor(convDesc)
+    biasDesc_native = (isnothing(biasDesc) ? nothing
+                                           : cudnnGetTensorDescriptor(biasDesc))
+
+    key = (xDesc_native, wDesc_native, convDesc_native, biasDesc, activation)
     val = lock(cudnnConvolutionFwdAlgoPerfCacheLock) do
-         get(cudnnConvolutionFwdAlgoPerfCache, key, nothing)
+        get(cudnnConvolutionFwdAlgoPerfCache, key, nothing)
     end
     if val === nothing
         requestedAlgoCount = Int(CUDNN_CONVOLUTION_FWD_ALGO_COUNT)
@@ -210,7 +238,11 @@ const cudnnConvolutionBwdDataAlgoPerfCacheLock = ReentrantLock()
 It can be set to false when beta is zero to save an allocation and must otherwise be set to true.
 """
 function cudnnConvolutionBwdDataAlgoPerf(wDesc, w, dyDesc, dy, convDesc, dxDesc, dx, allocateTmpBuf=true)
-    key = (wDesc, dyDesc, convDesc)
+    wDesc_native = cudnnGetFilterDescriptor(wDesc)
+    dyDesc_native = cudnnGetTensorDescriptor(dyDesc)
+    convDesc_native = cudnnGetConvolutionDescriptor(convDesc)
+
+    key = (wDesc_native, dyDesc_native, convDesc_native)
     val = lock(cudnnConvolutionBwdDataAlgoPerfCacheLock) do
         get(cudnnConvolutionBwdDataAlgoPerfCache, key, nothing)
     end
@@ -241,7 +273,11 @@ const cudnnConvolutionBwdFilterAlgoPerfCacheLock = ReentrantLock()
 It can be set to false when beta is zero to save an allocation and must otherwise be set to true.
 """
 function cudnnConvolutionBwdFilterAlgoPerf(xDesc, x, dyDesc, dy, convDesc, dwDesc, dw, allocateTmpBuf=true)
-    key = (xDesc, dyDesc, convDesc)
+    xDesc_native = cudnnGetTensorDescriptor(xDesc)
+    dyDesc_native = cudnnGetTensorDescriptor(dyDesc)
+    convDesc_native = cudnnGetConvolutionDescriptor(convDesc)
+
+    key = (xDesc_native, dyDesc_native, convDesc_native)
     val = lock(cudnnConvolutionBwdFilterAlgoPerfCacheLock) do
         get(cudnnConvolutionBwdFilterAlgoPerfCache, (xDesc, dyDesc, convDesc), nothing)
     end