From aa1fb43c9c342990f9ae431db510102c2e42c74f Mon Sep 17 00:00:00 2001 From: Antonio Ospite Date: Sat, 21 Oct 2023 11:02:43 +0200 Subject: [PATCH] Check for support of CUDA Memory Pools at runtime (#4679) Some CUDA GPUs, like the Quadro M3000M don't support Memory Pools operations like cudaMallocAsync/cudaFreeAsync even on driver versions newer than 11020, and this can result in errors like: CUDA runtime error: operation not supported So check for support at runtime instead of compile time. --- cpp/open3d/core/CUDAUtils.cpp | 18 ++++++++++++++++++ cpp/open3d/core/CUDAUtils.h | 6 ++++++ cpp/open3d/core/MemoryManagerCUDA.cpp | 22 +++++++++++----------- 3 files changed, 35 insertions(+), 11 deletions(-) diff --git a/cpp/open3d/core/CUDAUtils.cpp b/cpp/open3d/core/CUDAUtils.cpp index 630cbf38938d..0fc3683117c1 100644 --- a/cpp/open3d/core/CUDAUtils.cpp +++ b/cpp/open3d/core/CUDAUtils.cpp @@ -108,6 +108,24 @@ void AssertCUDADeviceAvailable(const Device& device) { } } +bool SupportsMemoryPools(const Device & device) { +#ifdef BUILD_CUDA_MODULE + if (device.IsCUDA()) { + int driverVersion = 0; + int deviceSupportsMemoryPools = 0; + OPEN3D_CUDA_CHECK(cudaDriverGetVersion(&driverVersion)); + if (driverVersion >= 11020) { // avoid invalid value error in cudaDeviceGetAttribute + OPEN3D_CUDA_CHECK(cudaDeviceGetAttribute(&deviceSupportsMemoryPools, cudaDevAttrMemoryPoolsSupported, device.GetID()); + } + return !!deviceSupportsMemoryPools; + } else { + return false; + } +#else + return false; +#endif +} + #ifdef BUILD_CUDA_MODULE int GetDevice() { int device; diff --git a/cpp/open3d/core/CUDAUtils.h b/cpp/open3d/core/CUDAUtils.h index 2996cd0987c2..6e5131d980f7 100644 --- a/cpp/open3d/core/CUDAUtils.h +++ b/cpp/open3d/core/CUDAUtils.h @@ -255,6 +255,12 @@ void AssertCUDADeviceAvailable(int device_id); /// \param device The device to be checked. void AssertCUDADeviceAvailable(const Device& device); +/// Checks if the CUDA device support Memory Pools +/// used by the Stream Ordered Memory Allocator, +/// see https://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__MEMORY__POOLS.html +/// \param device The device to be checked. +bool SupportsMemoryPools(const Device & device); + #ifdef BUILD_CUDA_MODULE int GetDevice(); diff --git a/cpp/open3d/core/MemoryManagerCUDA.cpp b/cpp/open3d/core/MemoryManagerCUDA.cpp index 3cc2f4730bcf..a235509d5010 100644 --- a/cpp/open3d/core/MemoryManagerCUDA.cpp +++ b/cpp/open3d/core/MemoryManagerCUDA.cpp @@ -19,12 +19,12 @@ void* MemoryManagerCUDA::Malloc(size_t byte_size, const Device& device) { void* ptr; if (device.IsCUDA()) { -#if CUDART_VERSION >= 11020 - OPEN3D_CUDA_CHECK(cudaMallocAsync(static_cast(&ptr), byte_size, - cuda::GetStream())); -#else - OPEN3D_CUDA_CHECK(cudaMalloc(static_cast(&ptr), byte_size)); -#endif + if (cuda::SupportsMemoryPools(device)) { + OPEN3D_CUDA_CHECK(cudaMallocAsync(static_cast(&ptr), byte_size, + cuda::GetStream())); + } else { + OPEN3D_CUDA_CHECK(cudaMalloc(static_cast(&ptr), byte_size)); + } } else { utility::LogError("Internal error: Unimplemented device {}.", device.ToString()); @@ -37,11 +37,11 @@ void MemoryManagerCUDA::Free(void* ptr, const Device& device) { if (device.IsCUDA()) { if (ptr && IsCUDAPointer(ptr, device)) { -#if CUDART_VERSION >= 11020 - OPEN3D_CUDA_CHECK(cudaFreeAsync(ptr, cuda::GetStream())); -#else - OPEN3D_CUDA_CHECK(cudaFree(ptr)); -#endif + if (cuda::SupportsMemoryPools(device)) { + OPEN3D_CUDA_CHECK(cudaFreeAsync(ptr, cuda::GetStream())); + } else { + OPEN3D_CUDA_CHECK(cudaFree(ptr)); + } } } else { utility::LogError("Internal error: Unimplemented device {}.",