From aa1fb43c9c342990f9ae431db510102c2e42c74f Mon Sep 17 00:00:00 2001
From: Antonio Ospite <ao2@ao2.it>
Date: Sat, 21 Oct 2023 11:02:43 +0200
Subject: [PATCH] Check for support of CUDA Memory Pools at runtime (#4679)

Some CUDA GPUs, like the Quadro M3000M don't support Memory Pools
operations like cudaMallocAsync/cudaFreeAsync even on driver versions
newer than 11020, and this can result in errors like:

  CUDA runtime error: operation not supported

So check for support at runtime instead of compile time.
---
 cpp/open3d/core/CUDAUtils.cpp         | 18 ++++++++++++++++++
 cpp/open3d/core/CUDAUtils.h           |  6 ++++++
 cpp/open3d/core/MemoryManagerCUDA.cpp | 22 +++++++++++-----------
 3 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/cpp/open3d/core/CUDAUtils.cpp b/cpp/open3d/core/CUDAUtils.cpp
index 630cbf38938d..0fc3683117c1 100644
--- a/cpp/open3d/core/CUDAUtils.cpp
+++ b/cpp/open3d/core/CUDAUtils.cpp
@@ -108,6 +108,24 @@ void AssertCUDADeviceAvailable(const Device& device) {
     }
 }
 
+bool SupportsMemoryPools(const Device & device) {
+#ifdef BUILD_CUDA_MODULE
+    if (device.IsCUDA()) {
+        int driverVersion = 0;
+        int deviceSupportsMemoryPools = 0;
+        OPEN3D_CUDA_CHECK(cudaDriverGetVersion(&driverVersion));
+        if (driverVersion >= 11020) { // avoid invalid value error in cudaDeviceGetAttribute
+            OPEN3D_CUDA_CHECK(cudaDeviceGetAttribute(&deviceSupportsMemoryPools, cudaDevAttrMemoryPoolsSupported, device.GetID());
+        }
+        return !!deviceSupportsMemoryPools;
+    } else {
+        return false;
+    }
+#else
+    return false;
+#endif
+}
+
 #ifdef BUILD_CUDA_MODULE
 int GetDevice() {
     int device;
diff --git a/cpp/open3d/core/CUDAUtils.h b/cpp/open3d/core/CUDAUtils.h
index 2996cd0987c2..6e5131d980f7 100644
--- a/cpp/open3d/core/CUDAUtils.h
+++ b/cpp/open3d/core/CUDAUtils.h
@@ -255,6 +255,12 @@ void AssertCUDADeviceAvailable(int device_id);
 /// \param device The device to be checked.
 void AssertCUDADeviceAvailable(const Device& device);
 
+/// Checks if the CUDA device support Memory Pools
+/// used by the Stream Ordered Memory Allocator,
+/// see https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html
+/// \param device The device to be checked.
+bool SupportsMemoryPools(const Device & device);
+
 #ifdef BUILD_CUDA_MODULE
 
 int GetDevice();
diff --git a/cpp/open3d/core/MemoryManagerCUDA.cpp b/cpp/open3d/core/MemoryManagerCUDA.cpp
index 3cc2f4730bcf..a235509d5010 100644
--- a/cpp/open3d/core/MemoryManagerCUDA.cpp
+++ b/cpp/open3d/core/MemoryManagerCUDA.cpp
@@ -19,12 +19,12 @@ void* MemoryManagerCUDA::Malloc(size_t byte_size, const Device& device) {
 
     void* ptr;
     if (device.IsCUDA()) {
-#if CUDART_VERSION >= 11020
-        OPEN3D_CUDA_CHECK(cudaMallocAsync(static_cast<void**>(&ptr), byte_size,
-                                          cuda::GetStream()));
-#else
-        OPEN3D_CUDA_CHECK(cudaMalloc(static_cast<void**>(&ptr), byte_size));
-#endif
+        if (cuda::SupportsMemoryPools(device)) {
+            OPEN3D_CUDA_CHECK(cudaMallocAsync(static_cast<void**>(&ptr), byte_size,
+                                              cuda::GetStream()));
+        } else {
+            OPEN3D_CUDA_CHECK(cudaMalloc(static_cast<void**>(&ptr), byte_size));
+        }
     } else {
         utility::LogError("Internal error: Unimplemented device {}.",
                           device.ToString());
@@ -37,11 +37,11 @@ void MemoryManagerCUDA::Free(void* ptr, const Device& device) {
 
     if (device.IsCUDA()) {
         if (ptr && IsCUDAPointer(ptr, device)) {
-#if CUDART_VERSION >= 11020
-            OPEN3D_CUDA_CHECK(cudaFreeAsync(ptr, cuda::GetStream()));
-#else
-            OPEN3D_CUDA_CHECK(cudaFree(ptr));
-#endif
+            if (cuda::SupportsMemoryPools(device)) {
+                OPEN3D_CUDA_CHECK(cudaFreeAsync(ptr, cuda::GetStream()));
+            } else {
+                OPEN3D_CUDA_CHECK(cudaFree(ptr));
+            }
         }
     } else {
         utility::LogError("Internal error: Unimplemented device {}.",