Merge allocator support for executors

This allows CPU and CUDA/HIP executors to use a custom allocator. * Deprecates the device_reset parameter * Adds an additional `allocator` parameter to replace `allocation_mode` * Provides implementations for regular, stream-ordered, unified and page-locked allocation Related PR: #1315
ginkgo-project · Jul 20, 2023 · c2b122a · c2b122a
2 parents 1a9877b + 33e29c3
commit c2b122a
Show file tree

Hide file tree

Showing 63 changed files with 2,084 additions and 842 deletions.
diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp
@@ -337,12 +337,12 @@ const std::map<std::string, std::function<std::shared_ptr<gko::Executor>(bool)>>
         {"cuda",
          [](bool) {
              return gko::CudaExecutor::create(FLAGS_device_id,
-                                              gko::OmpExecutor::create(), true);
+                                              gko::OmpExecutor::create());
          }},
         {"hip",
          [](bool) {
              return gko::HipExecutor::create(FLAGS_device_id,
-                                             gko::OmpExecutor::create(), true);
+                                             gko::OmpExecutor::create());
          }},
         {"dpcpp", [](bool use_gpu_timer) {
              auto property = dpcpp_queue_property::in_order;
@@ -368,16 +368,15 @@ const std::map<std::string,
          [](MPI_Comm comm) {
              FLAGS_device_id = gko::experimental::mpi::map_rank_to_device_id(
                  comm, gko::CudaExecutor::get_num_devices());
-             return gko::CudaExecutor::create(
-                 FLAGS_device_id, gko::ReferenceExecutor::create(), false,
-                 gko::allocation_mode::device);
+             return gko::CudaExecutor::create(FLAGS_device_id,
+                                              gko::ReferenceExecutor::create());
          }},
         {"hip",
          [](MPI_Comm comm) {
              FLAGS_device_id = gko::experimental::mpi::map_rank_to_device_id(
                  comm, gko::HipExecutor::get_num_devices());
-             return gko::HipExecutor::create(
-                 FLAGS_device_id, gko::ReferenceExecutor::create(), true);
+             return gko::HipExecutor::create(FLAGS_device_id,
+                                             gko::ReferenceExecutor::create());
          }},
         {"dpcpp", [](MPI_Comm comm) {
              if (gko::DpcppExecutor::get_num_devices("gpu")) {

diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
@@ -10,6 +10,7 @@ target_sources(ginkgo
     base/device_matrix_data.cpp
     base/executor.cpp
     base/index_set.cpp
+    base/memory.cpp
     base/mpi.cpp
     base/mtx_io.cpp
     base/perturbation.cpp

diff --git a/core/base/memory.cpp b/core/base/memory.cpp
@@ -0,0 +1,59 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/memory.hpp>
+
+
+#include <new>
+
+
+#include <ginkgo/core/base/exception_helpers.hpp>
+
+
+namespace gko {
+
+
+void* CpuAllocator::allocate(size_type num_bytes)
+{
+    auto ptr = ::operator new (num_bytes, std::nothrow_t{});
+    GKO_ENSURE_ALLOCATED(ptr, "cpu", num_bytes);
+    return ptr;
+}
+
+
+void CpuAllocator::deallocate(void* ptr)
+{
+    ::operator delete (ptr, std::nothrow_t{});
+}
+
+
+}  // namespace gko
diff --git a/core/device_hooks/cuda_hooks.cpp b/core/device_hooks/cuda_hooks.cpp
@@ -35,6 +35,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory.hpp>
+#include <ginkgo/core/base/stream.hpp>
 #include <ginkgo/core/base/timer.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/base/version.hpp>
@@ -52,12 +54,73 @@ version version_info::get_cuda_version() noexcept
 }
 
 
+void* CudaAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(cuda);
+
+
+void CudaAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(cuda);
+
+
+CudaAsyncAllocator::CudaAsyncAllocator(CUstream_st* stream)
+    GKO_NOT_COMPILED(cuda);
+
+
+void* CudaAsyncAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(cuda);
+
+
+void CudaAsyncAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(cuda);
+
+
+bool CudaAsyncAllocator::check_environment(int device_id,
+                                           CUstream_st* stream) const
+    GKO_NOT_COMPILED(cuda);
+
+
+CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id, unsigned int flags)
+    GKO_NOT_COMPILED(cuda);
+
+
+void* CudaUnifiedAllocator::allocate(size_type num_bytes)
+    GKO_NOT_COMPILED(cuda);
+
+
+void CudaUnifiedAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(cuda);
+
+
+bool CudaUnifiedAllocator::check_environment(int device_id,
+                                             CUstream_st* stream) const
+    GKO_NOT_COMPILED(cuda);
+
+
+CudaHostAllocator::CudaHostAllocator(int device_id) GKO_NOT_COMPILED(cuda);
+
+
+void* CudaHostAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(cuda);
+
+
+void CudaHostAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(cuda);
+
+
+bool CudaHostAllocator::check_environment(int device_id,
+                                          CUstream_st* stream) const
+    GKO_NOT_COMPILED(cuda);
+
+
 std::shared_ptr<CudaExecutor> CudaExecutor::create(
     int device_id, std::shared_ptr<Executor> master, bool device_reset,
     allocation_mode alloc_mode, CUstream_st* stream)
+{
+    return std::shared_ptr<CudaExecutor>(
+        new CudaExecutor(device_id, std::move(master),
+                         std::make_shared<CudaAllocator>(), stream));
+}
+
+
+std::shared_ptr<CudaExecutor> CudaExecutor::create(
+    int device_id, std::shared_ptr<Executor> master,
+    std::shared_ptr<CudaAllocatorBase> alloc, CUstream_st* stream)
 {
     return std::shared_ptr<CudaExecutor>(new CudaExecutor(
-        device_id, std::move(master), device_reset, alloc_mode, stream));
+        device_id, std::move(master), std::move(alloc), stream));
 }
 
 
@@ -154,6 +217,9 @@ scoped_device_id_guard::scoped_device_id_guard(const CudaExecutor* exec,
     GKO_NOT_COMPILED(cuda);
 
 
+cuda_stream::cuda_stream() GKO_NOT_COMPILED(cuda);
+
+
 cuda_stream::cuda_stream(int device_id) GKO_NOT_COMPILED(cuda);
 
 

diff --git a/core/device_hooks/dpcpp_hooks.cpp b/core/device_hooks/dpcpp_hooks.cpp
@@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory.hpp>
 #include <ginkgo/core/base/timer.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/base/version.hpp>

diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp
@@ -36,6 +36,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory.hpp>
+#include <ginkgo/core/base/stream.hpp>
 #include <ginkgo/core/base/timer.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/base/version.hpp>
@@ -53,12 +55,72 @@ version version_info::get_hip_version() noexcept
 }
 
 
+void* HipAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
+
+
+void HipAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
+
+
+HipAsyncAllocator::HipAsyncAllocator(GKO_HIP_STREAM_STRUCT* stream)
+    GKO_NOT_COMPILED(hip);
+
+
+void* HipAsyncAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
+
+
+void HipAsyncAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
+
+
+bool HipAsyncAllocator::check_environment(int device_id,
+                                          GKO_HIP_STREAM_STRUCT* stream) const
+    GKO_NOT_COMPILED(hip);
+
+
+HipUnifiedAllocator::HipUnifiedAllocator(int device_id, unsigned int flags)
+    GKO_NOT_COMPILED(hip);
+
+
+void* HipUnifiedAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
+
+
+void HipUnifiedAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
+
+
+bool HipUnifiedAllocator::check_environment(int device_id,
+                                            GKO_HIP_STREAM_STRUCT* stream) const
+    GKO_NOT_COMPILED(hip);
+
+
+HipHostAllocator::HipHostAllocator(int device_id) GKO_NOT_COMPILED(hip);
+
+
+void* HipHostAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
+
+
+void HipHostAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
+
+
+bool HipHostAllocator::check_environment(int device_id,
+                                         GKO_HIP_STREAM_STRUCT* stream) const
+    GKO_NOT_COMPILED(hip);
+
+
 std::shared_ptr<HipExecutor> HipExecutor::create(
     int device_id, std::shared_ptr<Executor> master, bool device_reset,
     allocation_mode alloc_mode, GKO_HIP_STREAM_STRUCT* stream)
 {
-    return std::shared_ptr<HipExecutor>(new HipExecutor(
-        device_id, std::move(master), device_reset, alloc_mode, stream));
+    return std::shared_ptr<HipExecutor>(
+        new HipExecutor(device_id, std::move(master),
+                        std::make_shared<HipAllocator>(), stream));
+}
+
+
+std::shared_ptr<HipExecutor> HipExecutor::create(
+    int device_id, std::shared_ptr<Executor> master,
+    std::shared_ptr<HipAllocatorBase> alloc, GKO_HIP_STREAM_STRUCT* stream)
+{
+    return std::shared_ptr<HipExecutor>(
+        new HipExecutor(device_id, std::move(master), alloc, stream));
 }
 
 
@@ -155,6 +217,9 @@ scoped_device_id_guard::scoped_device_id_guard(const HipExecutor* exec,
     GKO_NOT_COMPILED(hip);
 
 
+hip_stream::hip_stream() GKO_NOT_COMPILED(hip);
+
+
 hip_stream::hip_stream(int device_id) GKO_NOT_COMPILED(hip);