add allocator support to all executors

ginkgo-project · Mar 30, 2023 · ea94c89 · ea94c89
1 parent 8cb179c
commit ea94c89
Show file tree

Hide file tree

Showing 64 changed files with 1,735 additions and 756 deletions.
diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp
@@ -230,12 +230,12 @@ const std::map<std::string, std::function<std::shared_ptr<gko::Executor>(bool)>>
         {"cuda",
          [](bool) {
              return gko::CudaExecutor::create(FLAGS_device_id,
-                                              gko::OmpExecutor::create(), true);
+                                              gko::OmpExecutor::create());
          }},
         {"hip",
          [](bool) {
              return gko::HipExecutor::create(FLAGS_device_id,
-                                             gko::OmpExecutor::create(), true);
+                                             gko::OmpExecutor::create());
          }},
         {"dpcpp", [](bool use_gpu_timer) {
              auto property = dpcpp_queue_property::in_order;

diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
@@ -10,6 +10,7 @@ target_sources(ginkgo
     base/device_matrix_data.cpp
     base/executor.cpp
     base/index_set.cpp
+    base/memory.cpp
     base/mpi.cpp
     base/mtx_io.cpp
     base/perturbation.cpp

diff --git a/core/base/memory.cpp b/core/base/memory.cpp
@@ -0,0 +1,59 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/memory.hpp>
+
+
+#include <new>
+
+
+#include <ginkgo/core/base/exception_helpers.hpp>
+
+
+namespace gko {
+
+
+void* CpuAllocator::allocate(size_type num_bytes) const
+{
+    auto ptr = ::operator new (num_bytes, std::nothrow_t{});
+    GKO_ENSURE_ALLOCATED(ptr, "cpu", num_bytes);
+    return ptr;
+}
+
+
+void CpuAllocator::deallocate(void* ptr) const
+{
+    ::operator delete (ptr, std::nothrow_t{});
+}
+
+
+}  // namespace gko
diff --git a/core/device_hooks/cuda_hooks.cpp b/core/device_hooks/cuda_hooks.cpp
@@ -35,6 +35,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory.hpp>
+#include <ginkgo/core/base/stream.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/base/version.hpp>
 #include <ginkgo/core/log/profiler_hook.hpp>
@@ -51,6 +53,45 @@ version version_info::get_cuda_version() noexcept
 }
 
 
+void* CudaAllocator::allocate(size_type num_bytes) const GKO_NOT_COMPILED(cuda);
+
+
+void CudaAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda);
+
+
+CudaAsyncAllocator::CudaAsyncAllocator(CUstream_st* stream)
+    GKO_NOT_COMPILED(cuda);
+
+
+void* CudaAsyncAllocator::allocate(size_type num_bytes) const
+    GKO_NOT_COMPILED(cuda);
+
+
+void CudaAsyncAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda);
+
+
+CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id, unsigned int flags)
+    GKO_NOT_COMPILED(cuda);
+
+
+void* CudaUnifiedAllocator::allocate(size_type num_bytes) const
+    GKO_NOT_COMPILED(cuda);
+
+
+void CudaUnifiedAllocator::deallocate(void* dev_ptr) const
+    GKO_NOT_COMPILED(cuda);
+
+
+CudaHostAllocator::CudaHostAllocator(int device_id) GKO_NOT_COMPILED(cuda);
+
+
+void* CudaHostAllocator::allocate(size_type num_bytes) const
+    GKO_NOT_COMPILED(cuda);
+
+
+void CudaHostAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda);
+
+
 std::shared_ptr<CudaExecutor> CudaExecutor::create(
     int device_id, std::shared_ptr<Executor> master, bool device_reset,
     allocation_mode alloc_mode, CUstream_st* stream)
@@ -153,6 +194,9 @@ scoped_device_id_guard::scoped_device_id_guard(const CudaExecutor* exec,
     GKO_NOT_COMPILED(cuda);
 
 
+cuda_stream::cuda_stream() GKO_NOT_COMPILED(cuda);
+
+
 cuda_stream::cuda_stream(int device_id) GKO_NOT_COMPILED(cuda);
 
 

diff --git a/core/device_hooks/dpcpp_hooks.cpp b/core/device_hooks/dpcpp_hooks.cpp
@@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/base/version.hpp>
 
@@ -51,6 +52,23 @@ version version_info::get_dpcpp_version() noexcept
 }
 
 
+void* DpcppAllocator::allocate_impl(sycl::queue* queue, size_type size) const
+    GKO_NOT_COMPILED(dpcpp);
+
+
+void DpcppAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const
+    GKO_NOT_COMPILED(dpcpp);
+
+
+void* DpcppUnifiedAllocator::allocate_impl(sycl::queue* queue,
+                                           size_type size) const
+    GKO_NOT_COMPILED(dpcpp);
+
+
+void DpcppUnifiedAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const
+    GKO_NOT_COMPILED(dpcpp);
+
+
 std::shared_ptr<DpcppExecutor> DpcppExecutor::create(
     int device_id, std::shared_ptr<Executor> master, std::string device_type,
     dpcpp_queue_property property)

diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp
@@ -52,6 +52,22 @@ version version_info::get_hip_version() noexcept
 }
 
 
+void* HipAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
+
+
+void HipAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
+
+
+HipAsyncAllocator::HipAsyncAllocator(GKO_HIP_STREAM_STRUCT* stream)
+    GKO_NOT_COMPILED(hip);
+
+
+void* HipAsyncAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
+
+
+void HipAsyncAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
+
+
 std::shared_ptr<HipExecutor> HipExecutor::create(
     int device_id, std::shared_ptr<Executor> master, bool device_reset,
     allocation_mode alloc_mode, GKO_HIP_STREAM_STRUCT* stream)
@@ -154,6 +170,9 @@ scoped_device_id_guard::scoped_device_id_guard(const HipExecutor* exec,
     GKO_NOT_COMPILED(hip);
 
 
+hip_stream::hip_stream() GKO_NOT_COMPILED(hip);
+
+
 hip_stream::hip_stream(int device_id) GKO_NOT_COMPILED(hip);
 
 

diff --git a/core/test/base/executor.cpp b/core/test/base/executor.cpp
@@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <thread>
 #include <type_traits>
+#include "ginkgo/core/base/memory.hpp"
 
 
 #if defined(__unix__) || defined(__APPLE__)
@@ -263,35 +264,6 @@ TEST(CudaExecutor, KnowsItsDeviceId)
 }
 
 
-TEST(CudaExecutor, CanGetDeviceResetBoolean)
-{
-    auto omp = gko::OmpExecutor::create();
-    auto cuda = gko::CudaExecutor::create(0, omp);
-
-    ASSERT_EQ(false, cuda->get_device_reset());
-}
-
-
-TEST(CudaExecutor, CanSetDefaultDeviceResetBoolean)
-{
-    auto omp = gko::OmpExecutor::create();
-    auto cuda = gko::CudaExecutor::create(0, omp, true);
-
-    ASSERT_EQ(true, cuda->get_device_reset());
-}
-
-
-TEST(CudaExecutor, CanSetDeviceResetBoolean)
-{
-    auto omp = gko::OmpExecutor::create();
-    auto cuda = gko::CudaExecutor::create(0, omp);
-
-    cuda->set_device_reset(true);
-
-    ASSERT_EQ(true, cuda->get_device_reset());
-}
-
-
 TEST(HipExecutor, KnowsItsMaster)
 {
     auto omp = gko::OmpExecutor::create();
@@ -310,35 +282,6 @@ TEST(HipExecutor, KnowsItsDeviceId)
 }
 
 
-TEST(HipExecutor, CanGetDeviceResetBoolean)
-{
-    auto omp = gko::OmpExecutor::create();
-    auto hip = gko::HipExecutor::create(0, omp);
-
-    ASSERT_EQ(false, hip->get_device_reset());
-}
-
-
-TEST(HipExecutor, CanSetDefaultDeviceResetBoolean)
-{
-    auto omp = gko::OmpExecutor::create();
-    auto hip = gko::HipExecutor::create(0, omp, true);
-
-    ASSERT_EQ(true, hip->get_device_reset());
-}
-
-
-TEST(HipExecutor, CanSetDeviceResetBoolean)
-{
-    auto omp = gko::OmpExecutor::create();
-    auto hip = gko::HipExecutor::create(0, omp);
-
-    hip->set_device_reset(true);
-
-    ASSERT_EQ(true, hip->get_device_reset());
-}
-
-
 TEST(DpcppExecutor, KnowsItsMaster)
 {
     auto omp = gko::OmpExecutor::create();
@@ -442,20 +385,11 @@ TEST(Executor, CanVerifyMemory)
 }
 
 
-template <typename T>
-struct mock_free : T {
-    /**
-     * @internal Due to a bug with gcc 5.3, the constructor needs to be called
-     * with `()` operator instead of `{}`.
-     */
-    template <typename... Params>
-    mock_free(Params&&... params) : T(std::forward<Params>(params)...)
-    {}
-
-    void raw_free(void* ptr) const noexcept override
+struct MockAllocator : gko::CpuAllocator {
+    void deallocate(void* ptr) const noexcept override
     {
         called_free = true;
-        T::raw_free(ptr);
+        CpuAllocator::deallocate(ptr);
     }
 
     mutable bool called_free{false};
@@ -464,12 +398,13 @@ struct mock_free : T {
 
 TEST(ExecutorDeleter, DeletesObject)
 {
-    auto ref = std::make_shared<mock_free<gko::ReferenceExecutor>>();
+    auto alloc = std::make_shared<MockAllocator>();
+    auto ref = gko::ReferenceExecutor::create(alloc);
     auto x = ref->alloc<int>(5);
 
     gko::executor_deleter<int>{ref}(x);
 
-    ASSERT_TRUE(ref->called_free);
+    ASSERT_TRUE(alloc->called_free);
 }
 
 

diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt
@@ -1,11 +1,15 @@
 add_library(ginkgo_cuda $<TARGET_OBJECTS:ginkgo_cuda_device> "")
 target_sources(ginkgo_cuda
     PRIVATE
+    base/device.cpp
     base/device_matrix_data_kernels.cu
     base/exception.cpp
     base/executor.cpp
     base/index_set_kernels.cpp
+    base/memory.cpp
+    base/nvtx.cpp
     base/scoped_device_id.cpp
+    base/stream.cpp
     base/version.cpp
     components/prefix_sum_kernels.cu
     distributed/matrix_kernels.cu