update interface and remove DPC++ allocator

ginkgo-project · Jul 7, 2023 · 9fd6482 · 9fd6482
1 parent f2be25c
commit 9fd6482
Show file tree

Hide file tree

Showing 15 changed files with 348 additions and 322 deletions.
diff --git a/core/base/memory.cpp b/core/base/memory.cpp
@@ -42,15 +42,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 namespace gko {
 
 
-void* CpuAllocator::allocate(size_type num_bytes) const
+void* CpuAllocator::allocate(size_type num_bytes)
 {
     auto ptr = ::operator new (num_bytes, std::nothrow_t{});
     GKO_ENSURE_ALLOCATED(ptr, "cpu", num_bytes);
     return ptr;
 }
 
 
-void CpuAllocator::deallocate(void* ptr) const
+void CpuAllocator::deallocate(void* ptr)
 {
     ::operator delete (ptr, std::nothrow_t{});
 }

diff --git a/core/device_hooks/cuda_hooks.cpp b/core/device_hooks/cuda_hooks.cpp
@@ -54,43 +54,55 @@ version version_info::get_cuda_version() noexcept
 }
 
 
-void* CudaAllocator::allocate(size_type num_bytes) const GKO_NOT_COMPILED(cuda);
+void* CudaAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(cuda);
 
 
-void CudaAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda);
+void CudaAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(cuda);
 
 
 CudaAsyncAllocator::CudaAsyncAllocator(CUstream_st* stream)
     GKO_NOT_COMPILED(cuda);
 
 
-void* CudaAsyncAllocator::allocate(size_type num_bytes) const
-    GKO_NOT_COMPILED(cuda);
+void* CudaAsyncAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(cuda);
+
 
+void CudaAsyncAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(cuda);
 
-void CudaAsyncAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda);
+
+bool CudaAsyncAllocator::check_environment(int device_id,
+                                           CUstream_st* stream) const
+    GKO_NOT_COMPILED(cuda);
 
 
 CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id, unsigned int flags)
     GKO_NOT_COMPILED(cuda);
 
 
-void* CudaUnifiedAllocator::allocate(size_type num_bytes) const
+void* CudaUnifiedAllocator::allocate(size_type num_bytes)
     GKO_NOT_COMPILED(cuda);
 
 
-void CudaUnifiedAllocator::deallocate(void* dev_ptr) const
+void CudaUnifiedAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(cuda);
+
+
+bool CudaUnifiedAllocator::check_environment(int device_id,
+                                             CUstream_st* stream) const
     GKO_NOT_COMPILED(cuda);
 
 
 CudaHostAllocator::CudaHostAllocator(int device_id) GKO_NOT_COMPILED(cuda);
 
 
-void* CudaHostAllocator::allocate(size_type num_bytes) const
-    GKO_NOT_COMPILED(cuda);
+void* CudaHostAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(cuda);
+
 
+void CudaHostAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(cuda);
 
-void CudaHostAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda);
+
+bool CudaHostAllocator::check_environment(int device_id,
+                                          CUstream_st* stream) const
+    GKO_NOT_COMPILED(cuda);
 
 
 std::shared_ptr<CudaExecutor> CudaExecutor::create(

diff --git a/core/device_hooks/dpcpp_hooks.cpp b/core/device_hooks/dpcpp_hooks.cpp
@@ -53,34 +53,6 @@ version version_info::get_dpcpp_version() noexcept
 }
 
 
-DpcppAllocatorBase::DpcppAllocatorBase(sycl::queue*) GKO_NOT_COMPILED(dpcpp);
-
-
-void* DpcppAllocatorBase::allocate(size_type num_bytes) const
-    GKO_NOT_COMPILED(dpcpp);
-
-
-void DpcppAllocatorBase::deallocate(void* ptr) const GKO_NOT_COMPILED(dpcpp);
-
-
-void* DpcppAllocator::allocate_impl(sycl::queue* queue,
-                                    size_type num_bytes) const
-    GKO_NOT_COMPILED(dpcpp);
-
-
-void DpcppAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const
-    GKO_NOT_COMPILED(dpcpp);
-
-
-void* DpcppUnifiedAllocator::allocate_impl(sycl::queue* queue,
-                                           size_type num_bytes) const
-    GKO_NOT_COMPILED(dpcpp);
-
-
-void DpcppUnifiedAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const
-    GKO_NOT_COMPILED(dpcpp);
-
-
 std::shared_ptr<DpcppExecutor> DpcppExecutor::create(
     int device_id, std::shared_ptr<Executor> master, std::string device_type,
     dpcpp_queue_property property)

diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp
@@ -55,10 +55,54 @@ version version_info::get_hip_version() noexcept
 }
 
 
-void* HipAllocator::allocate(size_type num_bytes) const GKO_NOT_COMPILED(hip);
+void* HipAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
 
 
-void HipAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(hip);
+void HipAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
+
+
+HipAsyncAllocator::HipAsyncAllocator(GKO_HIP_STREAM_STRUCT* stream)
+    GKO_NOT_COMPILED(hip);
+
+
+void* HipAsyncAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
+
+
+void HipAsyncAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
+
+
+bool HipAsyncAllocator::check_environment(int device_id,
+                                          GKO_HIP_STREAM_STRUCT* stream) const
+    GKO_NOT_COMPILED(hip);
+
+
+HipUnifiedAllocator::HipUnifiedAllocator(int device_id, unsigned int flags)
+    GKO_NOT_COMPILED(hip);
+
+
+void* HipUnifiedAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
+
+
+void HipUnifiedAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
+
+
+bool HipUnifiedAllocator::check_environment(int device_id,
+                                            GKO_HIP_STREAM_STRUCT* stream) const
+    GKO_NOT_COMPILED(hip);
+
+
+HipHostAllocator::HipHostAllocator(int device_id) GKO_NOT_COMPILED(hip);
+
+
+void* HipHostAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
+
+
+void HipHostAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
+
+
+bool HipHostAllocator::check_environment(int device_id,
+                                         GKO_HIP_STREAM_STRUCT* stream) const
+    GKO_NOT_COMPILED(hip);
 
 
 std::shared_ptr<HipExecutor> HipExecutor::create(
@@ -76,8 +120,7 @@ std::shared_ptr<HipExecutor> HipExecutor::create(
     std::shared_ptr<HipAllocatorBase> alloc, GKO_HIP_STREAM_STRUCT* stream)
 {
     return std::shared_ptr<HipExecutor>(
-        new HipExecutor(device_id, std::move(master),
-                        std::make_shared<HipAllocator>(), stream));
+        new HipExecutor(device_id, std::move(master), alloc, stream));
 }
 
 

diff --git a/core/test/base/executor.cpp b/core/test/base/executor.cpp
@@ -386,7 +386,7 @@ TEST(Executor, CanVerifyMemory)
 
 
 struct MockAllocator : gko::CpuAllocator {
-    void deallocate(void* ptr) const noexcept override
+    void deallocate(void* ptr) noexcept override
     {
         called_free = true;
         CpuAllocator::deallocate(ptr);

diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp
@@ -80,16 +80,19 @@ std::shared_ptr<CudaExecutor> CudaExecutor::create(
     int device_id, std::shared_ptr<Executor> master, bool device_reset,
     allocation_mode alloc_mode, cudaStream_t stream)
 {
-    return std::shared_ptr<CudaExecutor>(
-        new CudaExecutor(device_id, std::move(master),
-                         allocator_from_mode(device_id, alloc_mode), stream));
+    return create(device_id, master, allocator_from_mode(device_id, alloc_mode),
+                  stream);
 }
 
 
 std::shared_ptr<CudaExecutor> CudaExecutor::create(
     int device_id, std::shared_ptr<Executor> master,
     std::shared_ptr<CudaAllocatorBase> alloc, cudaStream_t stream)
 {
+    if (!alloc->check_environment(device_id, stream)) {
+        throw Error{__FILE__, __LINE__,
+                    "Allocator uses incorrect stream or device ID."};
+    }
     return std::shared_ptr<CudaExecutor>(new CudaExecutor(
         device_id, std::move(master), std::move(alloc), stream));
 }

diff --git a/cuda/base/memory.cpp b/cuda/base/memory.cpp
@@ -82,7 +82,7 @@ namespace gko {
 #endif
 
 
-void* CudaAllocator::allocate(size_type num_bytes) const
+void* CudaAllocator::allocate(size_type num_bytes)
 {
     void* ptr{};
     GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(cudaMalloc(&ptr, num_bytes),
@@ -91,7 +91,7 @@ void* CudaAllocator::allocate(size_type num_bytes) const
 }
 
 
-void CudaAllocator::deallocate(void* ptr) const
+void CudaAllocator::deallocate(void* ptr)
 {
     GKO_EXIT_ON_CUDA_ERROR(cudaFree(ptr));
 }
@@ -103,7 +103,7 @@ void CudaAllocator::deallocate(void* ptr) const
 CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{stream} {}
 
 
-void* CudaAsyncAllocator::allocate(size_type num_bytes) const
+void* CudaAsyncAllocator::allocate(size_type num_bytes)
 {
     void* ptr{};
     GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(
@@ -112,7 +112,7 @@ void* CudaAsyncAllocator::allocate(size_type num_bytes) const
 }
 
 
-void CudaAsyncAllocator::deallocate(void* ptr) const
+void CudaAsyncAllocator::deallocate(void* ptr)
 {
     GKO_EXIT_ON_CUDA_ERROR(cudaFreeAsync(ptr, stream_));
 }
@@ -121,10 +121,10 @@ void CudaAsyncAllocator::deallocate(void* ptr) const
 #else  // Fall back to regular allocation
 
 
-CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{} {}
+CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{stream} {}
 
 
-void* CudaAsyncAllocator::allocate(size_type num_bytes) const
+void* CudaAsyncAllocator::allocate(size_type num_bytes)
 {
     void* ptr{};
     GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(cudaMalloc(&ptr, num_bytes),
@@ -133,7 +133,7 @@ void* CudaAsyncAllocator::allocate(size_type num_bytes) const
 }
 
 
-void CudaAsyncAllocator::deallocate(void* ptr) const
+void CudaAsyncAllocator::deallocate(void* ptr)
 {
     GKO_EXIT_ON_CUDA_ERROR(cudaFree(ptr));
 }
@@ -142,6 +142,13 @@ void CudaAsyncAllocator::deallocate(void* ptr) const
 #endif
 
 
+bool CudaAsyncAllocator::check_environment(int device_id,
+                                           CUstream_st* stream) const
+{
+    return stream == stream_;
+}
+
+
 CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id)
     : CudaUnifiedAllocator{device_id, cudaMemAttachGlobal}
 {}
@@ -152,7 +159,7 @@ CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id, unsigned int flags)
 {}
 
 
-void* CudaUnifiedAllocator::allocate(size_type num_bytes) const
+void* CudaUnifiedAllocator::allocate(size_type num_bytes)
 {
     // we need to set the device ID in case this gets used in a host executor
     detail::cuda_scoped_device_id_guard g(device_id_);
@@ -163,18 +170,25 @@ void* CudaUnifiedAllocator::allocate(size_type num_bytes) const
 }
 
 
-void CudaUnifiedAllocator::deallocate(void* ptr) const
+void CudaUnifiedAllocator::deallocate(void* ptr)
 {
     // we need to set the device ID in case this gets used in a host executor
     detail::cuda_scoped_device_id_guard g(device_id_);
     GKO_EXIT_ON_CUDA_ERROR(cudaFree(ptr));
 }
 
 
+bool CudaUnifiedAllocator::check_environment(int device_id,
+                                             CUstream_st* stream) const
+{
+    return device_id == device_id_;
+}
+
+
 CudaHostAllocator::CudaHostAllocator(int device_id) : device_id_{device_id} {}
 
 
-void* CudaHostAllocator::allocate(size_type num_bytes) const
+void* CudaHostAllocator::allocate(size_type num_bytes)
 {
     // we need to set the device ID in case this gets used in a host executor
     detail::cuda_scoped_device_id_guard g(device_id_);
@@ -185,12 +199,19 @@ void* CudaHostAllocator::allocate(size_type num_bytes) const
 }
 
 
-void CudaHostAllocator::deallocate(void* ptr) const
+void CudaHostAllocator::deallocate(void* ptr)
 {
     // we need to set the device ID in case this gets used in a host executor
     detail::cuda_scoped_device_id_guard g(device_id_);
     GKO_EXIT_ON_CUDA_ERROR(cudaFreeHost(ptr));
 }
 
 
+bool CudaHostAllocator::check_environment(int device_id,
+                                          CUstream_st* stream) const
+{
+    return device_id == device_id_;
+}
+
+
 }  // namespace gko
diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt
@@ -10,7 +10,6 @@ target_sources(ginkgo_dpcpp
     base/executor.dp.cpp
     base/helper.dp.cpp
     base/index_set_kernels.dp.cpp
-    base/memory.dp.cpp
     base/scoped_device_id.dp.cpp
     base/timer.dp.cpp
     base/version.dp.cpp