From f74c5468fe1a20644db1ad69055a77c1cb88bae3 Mon Sep 17 00:00:00 2001
From: Romeo Valentin <romeov@stanford.edu>
Date: Mon, 12 Jun 2023 02:50:49 -0700
Subject: [PATCH] Store descriptor keys as native Julia dtypes

However, there's still a few `CUDNN_xyz_t` datatypes, which are Cenums.
We could still map those to Julia integers if serialization is difficult otherwise.
---
 lib/cudnn/src/convolution.jl | 26 ++++++++++----------------
 1 file changed, 10 insertions(+), 16 deletions(-)

diff --git a/lib/cudnn/src/convolution.jl b/lib/cudnn/src/convolution.jl
index ec581f6511..8badfa595a 100644
--- a/lib/cudnn/src/convolution.jl
+++ b/lib/cudnn/src/convolution.jl
@@ -191,12 +191,6 @@ end
 
 ## Utilities to find a fast algorithm
 
-# Helper fct to recover cudnn descriptor tuples from cudnn descriptor pointers
-# so that we can cache algorithms based on data descriptors.
-# Actually just reverses the cache dict and returns the descriptor as a tuple.
-map_cudnn_ptr_to_jl_tuple(cache_dict, desc_ptr) = Dict(zip(values(cache_dict),
-                                                           keys(cache_dict)))[desc_ptr]
-
 const cudnnConvolutionFwdAlgoPerfCache = Dict{Tuple,cudnnConvolutionFwdAlgoPerf_t}()
 const cudnnConvolutionFwdAlgoPerfCacheLock = ReentrantLock()
 
@@ -207,11 +201,11 @@ const cudnnConvolutionFwdAlgoPerfCacheLock = ReentrantLock()
 It can be set to false when beta is zero to save an allocation and must otherwise be set to true.
 """
 function cudnnConvolutionFwdAlgoPerf(xDesc, x, wDesc, w, convDesc, yDesc, y, biasDesc, activation, allocateTmpBuf=true)
-    xDesc_native = map_cudnn_ptr_to_jl_tuple(cudnnTensorDescriptorCache, xDesc)
-    wDesc_native = map_cudnn_ptr_to_jl_tuple(cudnnFilterDescriptorCache, wDesc)
-    convDesc_native = map_cudnn_ptr_to_jl_tuple(cudnnConvolutionDescriptorCache, convDesc)
+    xDesc_native = cudnnGetTensorDescriptor(xDesc)
+    wDesc_native = cudnnGetFilterDescriptor(wDesc)
+    convDesc_native = cudnnGetConvolutionDescriptor(convDesc)
     biasDesc_native = (isnothing(biasDesc) ? nothing
-                                           : map_cudnn_ptr_to_jl_tuple(cudnnTensorDescriptorCache, biasDesc))
+                                           : cudnnGetTensorDescriptor(biasDesc))
 
     key = (xDesc_native, wDesc_native, convDesc_native, biasDesc, activation)
     val = lock(cudnnConvolutionFwdAlgoPerfCacheLock) do
@@ -248,9 +242,9 @@ const cudnnConvolutionBwdDataAlgoPerfCacheLock = ReentrantLock()
 It can be set to false when beta is zero to save an allocation and must otherwise be set to true.
 """
 function cudnnConvolutionBwdDataAlgoPerf(wDesc, w, dyDesc, dy, convDesc, dxDesc, dx, allocateTmpBuf=true)
-    wDesc_native = map_cudnn_ptr_to_jl_tuple(cudnnFilterDescriptorCache, wDesc)
-    dyDesc_native = map_cudnn_ptr_to_jl_tuple(cudnnTensorDescriptorCache, dyDesc)
-    convDesc_native = map_cudnn_ptr_to_jl_tuple(cudnnConvolutionDescriptorCache, convDesc)
+    wDesc_native = cudnnGetFilterDescriptor(wDesc)
+    dyDesc_native = cudnnGetTensorDescriptor(dyDesc)
+    convDesc_native = cudnnGetConvolutionDescriptor(convDesc)
 
     key = (wDesc_native, dyDesc_native, convDesc_native)
     val = lock(cudnnConvolutionBwdDataAlgoPerfCacheLock) do
@@ -288,9 +282,9 @@ const cudnnConvolutionBwdFilterAlgoPerfCacheLock = ReentrantLock()
 It can be set to false when beta is zero to save an allocation and must otherwise be set to true.
 """
 function cudnnConvolutionBwdFilterAlgoPerf(xDesc, x, dyDesc, dy, convDesc, dwDesc, dw, allocateTmpBuf=true)
-    xDesc_native = map_cudnn_ptr_to_jl_tuple(cudnnTensorDescriptorCache, xDesc)
-    dyDesc_native = map_cudnn_ptr_to_jl_tuple(cudnnTensorDescriptorCache, dyDesc)
-    convDesc_native = map_cudnn_ptr_to_jl_tuple(cudnnConvolutionDescriptorCache, convDesc)
+    xDesc_native = cudnnGetTensorDescriptor(xDesc)
+    dyDesc_native = cudnnGetTensorDescriptor(dyDesc)
+    convDesc_native = cudnnGetConvolutionDescriptor(convDesc)
 
     key = (xDesc_native, dyDesc_native, convDesc_native)
     val = lock(cudnnConvolutionBwdFilterAlgoPerfCacheLock) do