isl-org · ssheorey · Oct 31, 2023 · Oct 21, 2023
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -20,6 +20,7 @@
 * Fix some bad triangle generation in TriangleMesh::SimplifyQuadricDecimation
 * Fix printing of tensor in gpu and add validation check for bounds of axis-aligned bounding box (PR #6444)  
 * Python 3.11 support. bump pybind11 v2.6.2 -> v2.11.1
+* Check for support of CUDA Memory Pools at runtime (#4679)
 
 ## 0.13
 

diff --git a/cpp/open3d/core/CUDAUtils.cpp b/cpp/open3d/core/CUDAUtils.cpp
@@ -108,6 +108,27 @@ void AssertCUDADeviceAvailable(const Device& device) {
     }
 }
 
+bool SupportsMemoryPools(const Device& device) {
+#if defined(BUILD_CUDA_MODULE) && (CUDART_VERSION >= 11020)
+    if (device.IsCUDA()) {
+        int driverVersion = 0;
+        int deviceSupportsMemoryPools = 0;
+        OPEN3D_CUDA_CHECK(cudaDriverGetVersion(&driverVersion));
+        if (driverVersion >=
+            11020) {  // avoid invalid value error in cudaDeviceGetAttribute
+            OPEN3D_CUDA_CHECK(cudaDeviceGetAttribute(
+                    &deviceSupportsMemoryPools, cudaDevAttrMemoryPoolsSupported,
+                    device.GetID()));
+        }
+        return !!deviceSupportsMemoryPools;
+    } else {
+        return false;
+    }
+#else
+    return false;
+#endif
+}
+
 #ifdef BUILD_CUDA_MODULE
 int GetDevice() {
     int device;

diff --git a/cpp/open3d/core/CUDAUtils.h b/cpp/open3d/core/CUDAUtils.h
@@ -255,6 +255,13 @@ void AssertCUDADeviceAvailable(int device_id);
 /// \param device The device to be checked.
 void AssertCUDADeviceAvailable(const Device& device);
 
+/// Checks if the CUDA device support Memory Pools
+/// used by the Stream Ordered Memory Allocator,
+/// see
+/// https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html
+/// \param device The device to be checked.
+bool SupportsMemoryPools(const Device& device);
+
 #ifdef BUILD_CUDA_MODULE
 
 int GetDevice();

diff --git a/cpp/open3d/core/MemoryManagerCUDA.cpp b/cpp/open3d/core/MemoryManagerCUDA.cpp
@@ -20,8 +20,12 @@ void* MemoryManagerCUDA::Malloc(size_t byte_size, const Device& device) {
     void* ptr;
     if (device.IsCUDA()) {
 #if CUDART_VERSION >= 11020
-        OPEN3D_CUDA_CHECK(cudaMallocAsync(static_cast<void**>(&ptr), byte_size,
-                                          cuda::GetStream()));
+        if (cuda::SupportsMemoryPools(device)) {
+            OPEN3D_CUDA_CHECK(cudaMallocAsync(static_cast<void**>(&ptr),
+                                              byte_size, cuda::GetStream()));
+        } else {
+            OPEN3D_CUDA_CHECK(cudaMalloc(static_cast<void**>(&ptr), byte_size));
+        }
 #else
         OPEN3D_CUDA_CHECK(cudaMalloc(static_cast<void**>(&ptr), byte_size));
 #endif
@@ -38,7 +42,11 @@ void MemoryManagerCUDA::Free(void* ptr, const Device& device) {
     if (device.IsCUDA()) {
         if (ptr && IsCUDAPointer(ptr, device)) {
 #if CUDART_VERSION >= 11020
-            OPEN3D_CUDA_CHECK(cudaFreeAsync(ptr, cuda::GetStream()));
+            if (cuda::SupportsMemoryPools(device)) {
+                OPEN3D_CUDA_CHECK(cudaFreeAsync(ptr, cuda::GetStream()));
+            } else {
+                OPEN3D_CUDA_CHECK(cudaFree(ptr));
+            }
 #else
             OPEN3D_CUDA_CHECK(cudaFree(ptr));
 #endif