fix some compilation issues

ginkgo-project · Mar 30, 2023 · 9953f30 · 9953f30
1 parent 33645c0
commit 9953f30
Show file tree

Hide file tree

Showing 8 changed files with 86 additions and 57 deletions.
diff --git a/core/device_hooks/cuda_hooks.cpp b/core/device_hooks/cuda_hooks.cpp
@@ -95,9 +95,19 @@ void CudaHostAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda);
 std::shared_ptr<CudaExecutor> CudaExecutor::create(
     int device_id, std::shared_ptr<Executor> master, bool device_reset,
     allocation_mode alloc_mode, CUstream_st* stream)
+{
+    return std::shared_ptr<CudaExecutor>(
+        new CudaExecutor(device_id, std::move(master),
+                         std::make_shared<CudaAllocator>(), stream));
+}
+
+
+std::shared_ptr<CudaExecutor> CudaExecutor::create(
+    int device_id, std::shared_ptr<Executor> master,
+    std::shared_ptr<CudaAllocatorBase> alloc, CUstream_st* stream)
 {
     return std::shared_ptr<CudaExecutor>(new CudaExecutor(
-        device_id, std::move(master), device_reset, alloc_mode, stream));
+        device_id, std::move(master), std::move(alloc), stream));
 }
 
 

diff --git a/core/device_hooks/dpcpp_hooks.cpp b/core/device_hooks/dpcpp_hooks.cpp
@@ -52,7 +52,18 @@ version version_info::get_dpcpp_version() noexcept
 }
 
 
-void* DpcppAllocator::allocate_impl(sycl::queue* queue, size_type size) const
+DpcppAllocatorBase::DpcppAllocatorBase(sycl::queue*) GKO_NOT_COMPILED(dpcpp);
+
+
+void* DpcppAllocatorBase::allocate(size_type num_bytes) const
+    GKO_NOT_COMPILED(dpcpp);
+
+
+void DpcppAllocatorBase::deallocate(void* ptr) const GKO_NOT_COMPILED(dpcpp);
+
+
+void* DpcppAllocator::allocate_impl(sycl::queue* queue,
+                                    size_type num_bytes) const
     GKO_NOT_COMPILED(dpcpp);
 
 
@@ -61,7 +72,7 @@ void DpcppAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const
 
 
 void* DpcppUnifiedAllocator::allocate_impl(sycl::queue* queue,
-                                           size_type size) const
+                                           size_type num_bytes) const
     GKO_NOT_COMPILED(dpcpp);
 
 

diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp
@@ -36,9 +36,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/stream.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/base/version.hpp>
 #include <ginkgo/core/log/profiler_hook.hpp>
+#include "ginkgo/core/base/memory.hpp"
 
 
 namespace gko {
@@ -52,29 +54,22 @@ version version_info::get_hip_version() noexcept
 }
 
 
-void* HipAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
+void* HipAllocator::allocate(size_type num_bytes) const GKO_NOT_COMPILED(hip);
 
 
-void HipAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
+void HipAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(hip);
 
 
-HipAsyncAllocator::HipAsyncAllocator(GKO_HIP_STREAM_STRUCT* stream)
+std::shared_ptr<HipExecutor> HipExecutor::create(
+    int device_id, std::shared_ptr<Executor> master, bool device_reset,
+    allocation_mode alloc_mode, GKO_HIP_STREAM_STRUCT* stream)
     GKO_NOT_COMPILED(hip);
 
 
-void* HipAsyncAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
-
-
-void HipAsyncAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
-
-
 std::shared_ptr<HipExecutor> HipExecutor::create(
-    int device_id, std::shared_ptr<Executor> master, bool device_reset,
-    allocation_mode alloc_mode, GKO_HIP_STREAM_STRUCT* stream)
-{
-    return std::shared_ptr<HipExecutor>(new HipExecutor(
-        device_id, std::move(master), device_reset, alloc_mode, stream));
-}
+    int device_id, std::shared_ptr<Executor> master,
+    std::shared_ptr<HipAllocatorBase> alloc, GKO_HIP_STREAM_STRUCT* stream)
+    GKO_NOT_COMPILED(hip);
 
 
 void HipExecutor::populate_exec_info(const machine_topology* mach_topo)

diff --git a/cuda/base/memory.cpp b/cuda/base/memory.cpp
@@ -97,6 +97,9 @@ void CudaAllocator::deallocate(void* ptr) const
 }
 
 
+#if CUDA_VERSION >= 11020
+
+
 CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{stream} {}
 
 
@@ -108,12 +111,37 @@ void* CudaAsyncAllocator::allocate(size_type num_bytes) const
     return ptr;
 }
 
+
 void CudaAsyncAllocator::deallocate(void* ptr) const
 {
     GKO_EXIT_ON_CUDA_ERROR(cudaFreeAsync(ptr, stream_));
 }
 
 
+#else  // Fall back to regular allocation
+
+
+CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{} {}
+
+
+void* CudaAsyncAllocator::allocate(size_type num_bytes) const
+{
+    void* ptr{};
+    GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(cudaMalloc(&ptr, num_bytes),
+                                         num_bytes);
+    return ptr;
+}
+
+
+void CudaAsyncAllocator::deallocate(void* ptr) const
+{
+    GKO_EXIT_ON_CUDA_ERROR(cudaFree(ptr));
+}
+
+
+#endif
+
+
 CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id)
     : CudaUnifiedAllocator{device_id, cudaMemAttachGlobal}
 {}

diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt
@@ -10,6 +10,7 @@ target_sources(ginkgo_dpcpp
     base/executor.dp.cpp
     base/helper.dp.cpp
     base/index_set_kernels.dp.cpp
+    base/memory.dp.cpp
     base/scoped_device_id.dp.cpp
     base/version.dp.cpp
     components/prefix_sum_kernels.dp.cpp

diff --git a/dpcpp/base/executor.dp.cpp b/dpcpp/base/executor.dp.cpp
@@ -51,39 +51,6 @@ namespace gko {
 namespace detail {
 
 
-DpcppAllocator::DpcppAllocator(sycl::queue* queue) : queue_{queue} {}
-
-
-void* DpcppAllocator::allocate(size_type size)
-{
-    return sycl::malloc_device(size, *queue_);
-}
-
-
-void DpcppAllocator::deallocate(void* ptr)
-{
-    queue_->wait_and_throw();
-    sycl::free(ptr, queue_->get_context());
-}
-
-
-DpcppUnifiedAllocator::DpcppUnifiedAllocator(sycl::queue* queue) : queue_{queue}
-{}
-
-
-void* DpcppUnifiedAllocator::allocate(size_type size)
-{
-    return sycl::malloc_shared(size, *queue_);
-}
-
-
-void DpcppUnifiedAllocator::deallocate(void* ptr)
-{
-    queue_->wait_and_throw();
-    sycl::free(ptr, queue_->get_context());
-}
-
-
 const std::vector<sycl::device> get_devices(std::string device_type)
 {
     std::map<std::string, sycl::info::device_type> device_type_map{

diff --git a/dpcpp/base/memory.dp.cpp b/dpcpp/base/memory.dp.cpp
@@ -42,10 +42,22 @@ namespace gko {
 DpcppAllocatorBase::DpcppAllocatorBase(sycl::queue* queue) : queue_{queue} {}
 
 
+void* DpcppAllocatorBase::allocate(size_type num_bytes) const
+{
+    return this->allocate_impl(queue_, num_bytes);
+}
+
+
+void DpcppAllocatorBase::deallocate(void* ptr) const
+{
+    this->deallocate_impl(queue_, ptr);
+}
+
+
 void* DpcppAllocator::allocate_impl(sycl::queue* queue,
                                     size_type num_bytes) const
 {
-    return sycl::malloc_device(size, *queue);
+    return sycl::malloc_device(num_bytes, *queue);
 }
 
 
@@ -56,16 +68,17 @@ void DpcppAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const
 }
 
 
-void* DpcppUnifiedAllocator::allocate(size_type num_bytes)
+void* DpcppUnifiedAllocator::allocate_impl(sycl::queue* queue,
+                                           size_type num_bytes)
 {
-    return sycl::malloc_shared(size, *queue_);
+    return sycl::malloc_shared(num_bytes, *queue);
 }
 
 
-void DpcppUnifiedAllocator::deallocate(void* ptr)
+void DpcppUnifiedAllocator::deallocate_impl(sycl::queue* queue, void* ptr)
 {
-    queue_->wait_and_throw();
-    sycl::free(ptr, queue_->get_context());
+    queue->wait_and_throw();
+    sycl::free(ptr, queue->get_context());
 }
 
 

diff --git a/include/ginkgo/core/base/memory.hpp b/include/ginkgo/core/base/memory.hpp
@@ -81,6 +81,10 @@ class DpcppAllocatorBase : public Allocator {
 public:
     DpcppAllocatorBase(sycl::queue* queue);
 
+    void* allocate(size_type num_bytes) const final;
+
+    void deallocate(void* ptr) const final;
+
 protected:
     virtual void* allocate_impl(sycl::queue* queue,
                                 size_type num_bytes) const = 0;