From ea94c89e0256b2efdc77b0e13a0819d984c0e695 Mon Sep 17 00:00:00 2001
From: Tobias Ribizel <ribizel@kit.edu>
Date: Thu, 30 Mar 2023 12:20:20 +0200
Subject: [PATCH] add allocator support to all executors

---
 benchmark/utils/general.hpp                   |   4 +-
 core/CMakeLists.txt                           |   1 +
 core/base/memory.cpp                          |  59 ++++
 core/device_hooks/cuda_hooks.cpp              |  44 +++
 core/device_hooks/dpcpp_hooks.cpp             |  18 ++
 core/device_hooks/hip_hooks.cpp               |  19 ++
 core/test/base/executor.cpp                   |  79 +-----
 cuda/CMakeLists.txt                           |   4 +
 cuda/base/device.cpp                          |  59 ++++
 cuda/base/executor.cpp                        | 167 ++---------
 cuda/base/memory.cpp                          | 168 ++++++++++++
 cuda/base/nvtx.cpp                            |  96 +++++++
 .../stream.cpp}                               |  53 ++--
 cuda/test/base/CMakeLists.txt                 |   2 +-
 cuda/test/base/cuda_executor.cu               |  15 +-
 cuda/test/base/memory.cpp                     | 126 +++++++++
 cuda/test/utils.hpp                           |   4 +-
 devices/cuda/executor.cpp                     |  27 --
 devices/hip/executor.cpp                      |  34 ---
 devices/omp/executor.cpp                      |   7 +-
 dpcpp/base/executor.dp.cpp                    |  33 +++
 .../base/memory.dp.cpp                        |  47 ++--
 dpcpp/test/base/CMakeLists.txt                |   1 +
 dpcpp/test/base/memory.dp.cpp                 |  98 +++++++
 .../adaptiveprecision-blockjacobi.cpp         |   7 +-
 examples/cb-gmres/cb-gmres.cpp                |   7 +-
 examples/custom-logger/custom-logger.cpp      |   7 +-
 .../custom-matrix-format.cpp                  |   7 +-
 .../custom-stopping-criterion.cpp             |   7 +-
 .../ilu-preconditioned-solver.cpp             |   7 +-
 .../inverse-iteration/inverse-iteration.cpp   |   7 +-
 .../ir-ilu-preconditioned-solver.cpp          |   7 +-
 .../iterative-refinement.cpp                  |   7 +-
 .../minimal-cuda-solver.cpp                   |   2 +-
 .../mixed-multigrid-preconditioned-solver.cpp |   7 +-
 .../mixed-multigrid-solver.cpp                |   7 +-
 .../mixed-precision-ir/mixed-precision-ir.cpp |   7 +-
 examples/mixed-spmv/mixed-spmv.cpp            |   7 +-
 ...igrid-preconditioned-solver-customized.cpp |   7 +-
 .../multigrid-preconditioned-solver.cpp       |   7 +-
 .../nine-pt-stencil-solver.cpp                |   7 +-
 examples/papi-logging/papi-logging.cpp        |   7 +-
 .../performance-debugging.cpp                 |   7 +-
 examples/poisson-solver/poisson-solver.cpp    |   7 +-
 .../preconditioned-solver.cpp                 |   7 +-
 .../simple-solver-logging.cpp                 |   7 +-
 examples/simple-solver/simple-solver.cpp      |   7 +-
 .../three-pt-stencil-solver.cpp               |   7 +-
 hip/CMakeLists.txt                            |   4 +
 hip/base/device.hip.cpp                       |  61 +++++
 hip/base/executor.hip.cpp                     | 131 +--------
 hip/base/memory.hip.cpp                       |  97 +++++++
 hip/base/roctx.hip.cpp                        |  70 +++++
 hip/base/stream.hip.cpp                       |  78 ++++++
 hip/test/base/CMakeLists.txt                  |   1 -
 hip/test/base/hip_executor.hip.cpp            |  14 +-
 hip/test/utils.hip.hpp                        |   4 +-
 include/ginkgo/core/base/executor.hpp         | 259 +++++-------------
 include/ginkgo/core/base/fwd_defs.hpp         |  82 ++++++
 include/ginkgo/core/base/memory.hpp           | 211 ++++++++++++++
 include/ginkgo/core/base/stream.hpp           | 124 +++++++++
 include/ginkgo/ginkgo.hpp                     |   3 +
 test/utils/executor.hpp                       |  19 +-
 test/utils/mpi/executor.hpp                   |   5 +-
 64 files changed, 1735 insertions(+), 756 deletions(-)
 create mode 100644 core/base/memory.cpp
 create mode 100644 cuda/base/device.cpp
 create mode 100644 cuda/base/memory.cpp
 create mode 100644 cuda/base/nvtx.cpp
 rename cuda/{test/base/cuda_executor_reset.cpp => base/stream.cpp} (62%)
 create mode 100644 cuda/test/base/memory.cpp
 rename hip/test/base/hip_executor_reset.cpp => dpcpp/base/memory.dp.cpp (63%)
 create mode 100644 dpcpp/test/base/memory.dp.cpp
 create mode 100644 hip/base/device.hip.cpp
 create mode 100644 hip/base/memory.hip.cpp
 create mode 100644 hip/base/roctx.hip.cpp
 create mode 100644 hip/base/stream.hip.cpp
 create mode 100644 include/ginkgo/core/base/fwd_defs.hpp
 create mode 100644 include/ginkgo/core/base/memory.hpp
 create mode 100644 include/ginkgo/core/base/stream.hpp

diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp
index dcdb5552ed1..966a9438ee5 100644
--- a/benchmark/utils/general.hpp
+++ b/benchmark/utils/general.hpp
@@ -230,12 +230,12 @@ const std::map<std::string, std::function<std::shared_ptr<gko::Executor>(bool)>>
         {"cuda",
          [](bool) {
              return gko::CudaExecutor::create(FLAGS_device_id,
-                                              gko::OmpExecutor::create(), true);
+                                              gko::OmpExecutor::create());
          }},
         {"hip",
          [](bool) {
              return gko::HipExecutor::create(FLAGS_device_id,
-                                             gko::OmpExecutor::create(), true);
+                                             gko::OmpExecutor::create());
          }},
         {"dpcpp", [](bool use_gpu_timer) {
              auto property = dpcpp_queue_property::in_order;
diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index 7c821c471c2..1898ca69695 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -10,6 +10,7 @@ target_sources(ginkgo
     base/device_matrix_data.cpp
     base/executor.cpp
     base/index_set.cpp
+    base/memory.cpp
     base/mpi.cpp
     base/mtx_io.cpp
     base/perturbation.cpp
diff --git a/core/base/memory.cpp b/core/base/memory.cpp
new file mode 100644
index 00000000000..88d97bcc765
--- /dev/null
+++ b/core/base/memory.cpp
@@ -0,0 +1,59 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/memory.hpp>
+
+
+#include <new>
+
+
+#include <ginkgo/core/base/exception_helpers.hpp>
+
+
+namespace gko {
+
+
+void* CpuAllocator::allocate(size_type num_bytes) const
+{
+    auto ptr = ::operator new (num_bytes, std::nothrow_t{});
+    GKO_ENSURE_ALLOCATED(ptr, "cpu", num_bytes);
+    return ptr;
+}
+
+
+void CpuAllocator::deallocate(void* ptr) const
+{
+    ::operator delete (ptr, std::nothrow_t{});
+}
+
+
+}  // namespace gko
\ No newline at end of file
diff --git a/core/device_hooks/cuda_hooks.cpp b/core/device_hooks/cuda_hooks.cpp
index 2d5121274db..70f7e668242 100644
--- a/core/device_hooks/cuda_hooks.cpp
+++ b/core/device_hooks/cuda_hooks.cpp
@@ -35,6 +35,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory.hpp>
+#include <ginkgo/core/base/stream.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/base/version.hpp>
 #include <ginkgo/core/log/profiler_hook.hpp>
@@ -51,6 +53,45 @@ version version_info::get_cuda_version() noexcept
 }
 
 
+void* CudaAllocator::allocate(size_type num_bytes) const GKO_NOT_COMPILED(cuda);
+
+
+void CudaAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda);
+
+
+CudaAsyncAllocator::CudaAsyncAllocator(CUstream_st* stream)
+    GKO_NOT_COMPILED(cuda);
+
+
+void* CudaAsyncAllocator::allocate(size_type num_bytes) const
+    GKO_NOT_COMPILED(cuda);
+
+
+void CudaAsyncAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda);
+
+
+CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id, unsigned int flags)
+    GKO_NOT_COMPILED(cuda);
+
+
+void* CudaUnifiedAllocator::allocate(size_type num_bytes) const
+    GKO_NOT_COMPILED(cuda);
+
+
+void CudaUnifiedAllocator::deallocate(void* dev_ptr) const
+    GKO_NOT_COMPILED(cuda);
+
+
+CudaHostAllocator::CudaHostAllocator(int device_id) GKO_NOT_COMPILED(cuda);
+
+
+void* CudaHostAllocator::allocate(size_type num_bytes) const
+    GKO_NOT_COMPILED(cuda);
+
+
+void CudaHostAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda);
+
+
 std::shared_ptr<CudaExecutor> CudaExecutor::create(
     int device_id, std::shared_ptr<Executor> master, bool device_reset,
     allocation_mode alloc_mode, CUstream_st* stream)
@@ -153,6 +194,9 @@ scoped_device_id_guard::scoped_device_id_guard(const CudaExecutor* exec,
     GKO_NOT_COMPILED(cuda);
 
 
+cuda_stream::cuda_stream() GKO_NOT_COMPILED(cuda);
+
+
 cuda_stream::cuda_stream(int device_id) GKO_NOT_COMPILED(cuda);
 
 
diff --git a/core/device_hooks/dpcpp_hooks.cpp b/core/device_hooks/dpcpp_hooks.cpp
index cb99dc12c3c..62ee77f9cdc 100644
--- a/core/device_hooks/dpcpp_hooks.cpp
+++ b/core/device_hooks/dpcpp_hooks.cpp
@@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/base/version.hpp>
 
@@ -51,6 +52,23 @@ version version_info::get_dpcpp_version() noexcept
 }
 
 
+void* DpcppAllocator::allocate_impl(sycl::queue* queue, size_type size) const
+    GKO_NOT_COMPILED(dpcpp);
+
+
+void DpcppAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const
+    GKO_NOT_COMPILED(dpcpp);
+
+
+void* DpcppUnifiedAllocator::allocate_impl(sycl::queue* queue,
+                                           size_type size) const
+    GKO_NOT_COMPILED(dpcpp);
+
+
+void DpcppUnifiedAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const
+    GKO_NOT_COMPILED(dpcpp);
+
+
 std::shared_ptr<DpcppExecutor> DpcppExecutor::create(
     int device_id, std::shared_ptr<Executor> master, std::string device_type,
     dpcpp_queue_property property)
diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp
index 95772cc0353..7f8ac45b4b9 100644
--- a/core/device_hooks/hip_hooks.cpp
+++ b/core/device_hooks/hip_hooks.cpp
@@ -52,6 +52,22 @@ version version_info::get_hip_version() noexcept
 }
 
 
+void* HipAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
+
+
+void HipAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
+
+
+HipAsyncAllocator::HipAsyncAllocator(GKO_HIP_STREAM_STRUCT* stream)
+    GKO_NOT_COMPILED(hip);
+
+
+void* HipAsyncAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
+
+
+void HipAsyncAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
+
+
 std::shared_ptr<HipExecutor> HipExecutor::create(
     int device_id, std::shared_ptr<Executor> master, bool device_reset,
     allocation_mode alloc_mode, GKO_HIP_STREAM_STRUCT* stream)
@@ -154,6 +170,9 @@ scoped_device_id_guard::scoped_device_id_guard(const HipExecutor* exec,
     GKO_NOT_COMPILED(hip);
 
 
+hip_stream::hip_stream() GKO_NOT_COMPILED(hip);
+
+
 hip_stream::hip_stream(int device_id) GKO_NOT_COMPILED(hip);
 
 
diff --git a/core/test/base/executor.cpp b/core/test/base/executor.cpp
index 0d64dfcf3cf..71064cf01d2 100644
--- a/core/test/base/executor.cpp
+++ b/core/test/base/executor.cpp
@@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <thread>
 #include <type_traits>
+#include "ginkgo/core/base/memory.hpp"
 
 
 #if defined(__unix__) || defined(__APPLE__)
@@ -263,35 +264,6 @@ TEST(CudaExecutor, KnowsItsDeviceId)
 }
 
 
-TEST(CudaExecutor, CanGetDeviceResetBoolean)
-{
-    auto omp = gko::OmpExecutor::create();
-    auto cuda = gko::CudaExecutor::create(0, omp);
-
-    ASSERT_EQ(false, cuda->get_device_reset());
-}
-
-
-TEST(CudaExecutor, CanSetDefaultDeviceResetBoolean)
-{
-    auto omp = gko::OmpExecutor::create();
-    auto cuda = gko::CudaExecutor::create(0, omp, true);
-
-    ASSERT_EQ(true, cuda->get_device_reset());
-}
-
-
-TEST(CudaExecutor, CanSetDeviceResetBoolean)
-{
-    auto omp = gko::OmpExecutor::create();
-    auto cuda = gko::CudaExecutor::create(0, omp);
-
-    cuda->set_device_reset(true);
-
-    ASSERT_EQ(true, cuda->get_device_reset());
-}
-
-
 TEST(HipExecutor, KnowsItsMaster)
 {
     auto omp = gko::OmpExecutor::create();
@@ -310,35 +282,6 @@ TEST(HipExecutor, KnowsItsDeviceId)
 }
 
 
-TEST(HipExecutor, CanGetDeviceResetBoolean)
-{
-    auto omp = gko::OmpExecutor::create();
-    auto hip = gko::HipExecutor::create(0, omp);
-
-    ASSERT_EQ(false, hip->get_device_reset());
-}
-
-
-TEST(HipExecutor, CanSetDefaultDeviceResetBoolean)
-{
-    auto omp = gko::OmpExecutor::create();
-    auto hip = gko::HipExecutor::create(0, omp, true);
-
-    ASSERT_EQ(true, hip->get_device_reset());
-}
-
-
-TEST(HipExecutor, CanSetDeviceResetBoolean)
-{
-    auto omp = gko::OmpExecutor::create();
-    auto hip = gko::HipExecutor::create(0, omp);
-
-    hip->set_device_reset(true);
-
-    ASSERT_EQ(true, hip->get_device_reset());
-}
-
-
 TEST(DpcppExecutor, KnowsItsMaster)
 {
     auto omp = gko::OmpExecutor::create();
@@ -442,20 +385,11 @@ TEST(Executor, CanVerifyMemory)
 }
 
 
-template <typename T>
-struct mock_free : T {
-    /**
-     * @internal Due to a bug with gcc 5.3, the constructor needs to be called
-     * with `()` operator instead of `{}`.
-     */
-    template <typename... Params>
-    mock_free(Params&&... params) : T(std::forward<Params>(params)...)
-    {}
-
-    void raw_free(void* ptr) const noexcept override
+struct MockAllocator : gko::CpuAllocator {
+    void deallocate(void* ptr) const noexcept override
     {
         called_free = true;
-        T::raw_free(ptr);
+        CpuAllocator::deallocate(ptr);
     }
 
     mutable bool called_free{false};
@@ -464,12 +398,13 @@ struct mock_free : T {
 
 TEST(ExecutorDeleter, DeletesObject)
 {
-    auto ref = std::make_shared<mock_free<gko::ReferenceExecutor>>();
+    auto alloc = std::make_shared<MockAllocator>();
+    auto ref = gko::ReferenceExecutor::create(alloc);
     auto x = ref->alloc<int>(5);
 
     gko::executor_deleter<int>{ref}(x);
 
-    ASSERT_TRUE(ref->called_free);
+    ASSERT_TRUE(alloc->called_free);
 }
 
 
diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt
index 2f9f37bfdfc..629515e8810 100644
--- a/cuda/CMakeLists.txt
+++ b/cuda/CMakeLists.txt
@@ -1,11 +1,15 @@
 add_library(ginkgo_cuda $<TARGET_OBJECTS:ginkgo_cuda_device> "")
 target_sources(ginkgo_cuda
     PRIVATE
+    base/device.cpp
     base/device_matrix_data_kernels.cu
     base/exception.cpp
     base/executor.cpp
     base/index_set_kernels.cpp
+    base/memory.cpp
+    base/nvtx.cpp
     base/scoped_device_id.cpp
+    base/stream.cpp
     base/version.cpp
     components/prefix_sum_kernels.cu
     distributed/matrix_kernels.cu
diff --git a/cuda/base/device.cpp b/cuda/base/device.cpp
new file mode 100644
index 00000000000..5aa9e7f1a17
--- /dev/null
+++ b/cuda/base/device.cpp
@@ -0,0 +1,59 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include "cuda/base/device.hpp"
+
+
+#include <cuda_runtime.h>
+
+
+#include <ginkgo/core/base/exception_helpers.hpp>
+
+
+#include "cuda/base/scoped_device_id.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace cuda {
+
+
+void reset_device(int device_id)
+{
+    gko::detail::cuda_scoped_device_id_guard guard{device_id};
+    cudaDeviceReset();
+}
+
+
+}  // namespace cuda
+}  // namespace kernels
+}  // namespace gko
diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp
index 2b0341fc4a6..f6e838dd2dd 100644
--- a/cuda/base/executor.cpp
+++ b/cuda/base/executor.cpp
@@ -39,18 +39,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <cuda_runtime.h>
-#ifdef GKO_LEGACY_NVTX
-#include <nvToolsExt.h>
-#else
-#include <nvtx3/nvToolsExt.h>
-#endif
 
 
 #include <ginkgo/config.hpp>
 #include <ginkgo/core/base/device.hpp>
 #include <ginkgo/core/base/exception.hpp>
 #include <ginkgo/core/base/exception_helpers.hpp>
-#include <ginkgo/core/log/profiler_hook.hpp>
+#include <ginkgo/core/base/memory.hpp>
 
 
 #include "cuda/base/config.hpp"
@@ -65,25 +60,38 @@ namespace gko {
 #include "common/cuda_hip/base/executor.hpp.inc"
 
 
+std::unique_ptr<CudaAllocatorBase> allocator_from_mode(int device_id,
+                                                       allocation_mode mode)
+{
+    switch (mode) {
+    case allocation_mode::device:
+        return std::make_unique<CudaAllocator>();
+    case allocation_mode::unified_global:
+        return std::make_unique<CudaUnifiedAllocator>(device_id);
+    case allocation_mode::unified_host:
+        return std::make_unique<CudaUnifiedAllocator>(device_id);
+    default:
+        GKO_NOT_SUPPORTED(mode);
+    }
+}
+
+
 std::shared_ptr<CudaExecutor> CudaExecutor::create(
     int device_id, std::shared_ptr<Executor> master, bool device_reset,
     allocation_mode alloc_mode, cudaStream_t stream)
 {
     return std::shared_ptr<CudaExecutor>(
-        new CudaExecutor(device_id, std::move(master), device_reset, alloc_mode,
-                         stream),
-        [device_id](CudaExecutor* exec) {
-            auto device_reset = exec->get_device_reset();
-            std::lock_guard<std::mutex> guard(
-                nvidia_device::get_mutex(device_id));
-            delete exec;
-            auto& num_execs = nvidia_device::get_num_execs(device_id);
-            num_execs--;
-            if (!num_execs && device_reset) {
-                detail::cuda_scoped_device_id_guard g(device_id);
-                cudaDeviceReset();
-            }
-        });
+        new CudaExecutor(device_id, std::move(master),
+                         allocator_from_mode(device_id, alloc_mode), stream));
+}
+
+
+std::shared_ptr<CudaExecutor> CudaExecutor::create(
+    int device_id, std::shared_ptr<Executor> master,
+    std::shared_ptr<CudaAllocatorBase> alloc, cudaStream_t stream)
+{
+    return std::shared_ptr<CudaExecutor>(new CudaExecutor(
+        device_id, std::move(master), std::move(alloc), stream));
 }
 
 
@@ -123,41 +131,14 @@ void OmpExecutor::raw_copy_to(const CudaExecutor* dest, size_type num_bytes,
 void CudaExecutor::raw_free(void* ptr) const noexcept
 {
     detail::cuda_scoped_device_id_guard g(this->get_device_id());
-    auto error_code = cudaFree(ptr);
-    if (error_code != cudaSuccess) {
-#if GKO_VERBOSE_LEVEL >= 1
-        // Unfortunately, if memory free fails, there's not much we can do
-        std::cerr << "Unrecoverable CUDA error on device "
-                  << this->get_device_id() << " in " << __func__ << ": "
-                  << cudaGetErrorName(error_code) << ": "
-                  << cudaGetErrorString(error_code) << std::endl
-                  << "Exiting program" << std::endl;
-#endif  // GKO_VERBOSE_LEVEL >= 1
-        std::exit(error_code);
-    }
+    alloc_->deallocate(ptr);
 }
 
 
 void* CudaExecutor::raw_alloc(size_type num_bytes) const
 {
-    void* dev_ptr = nullptr;
     detail::cuda_scoped_device_id_guard g(this->get_device_id());
-    int error_code = 0;
-    if (this->alloc_mode_ == allocation_mode::unified_host) {
-        error_code = cudaMallocManaged(&dev_ptr, num_bytes, cudaMemAttachHost);
-    } else if (this->alloc_mode_ == allocation_mode::unified_global) {
-        error_code =
-            cudaMallocManaged(&dev_ptr, num_bytes, cudaMemAttachGlobal);
-    } else if (this->alloc_mode_ == allocation_mode::device) {
-        error_code = cudaMalloc(&dev_ptr, num_bytes);
-    } else {
-        GKO_NOT_SUPPORTED(this->alloc_mode_);
-    }
-    if (error_code != cudaErrorMemoryAllocation) {
-        GKO_ASSERT_NO_CUDA_ERRORS(error_code);
-    }
-    GKO_ENSURE_ALLOCATED(dev_ptr, "cuda", num_bytes);
-    return dev_ptr;
+    return alloc_->allocate(num_bytes);
 }
 
 
@@ -298,92 +279,4 @@ void CudaExecutor::init_handles()
 }
 
 
-cuda_stream::cuda_stream(int device_id) : stream_{}, device_id_(device_id)
-{
-    detail::cuda_scoped_device_id_guard g(device_id_);
-    GKO_ASSERT_NO_CUDA_ERRORS(cudaStreamCreate(&stream_));
-}
-
-
-cuda_stream::~cuda_stream()
-{
-    if (stream_) {
-        detail::cuda_scoped_device_id_guard g(device_id_);
-        cudaStreamDestroy(stream_);
-    }
-}
-
-
-cuda_stream::cuda_stream(cuda_stream&& other)
-    : stream_{std::exchange(other.stream_, nullptr)},
-      device_id_(std::exchange(other.device_id_, -1))
-{}
-
-
-CUstream_st* cuda_stream::get() const { return stream_; }
-
-
-namespace log {
-
-
-// "GKO" in ASCII to avoid collision with other application's categories
-constexpr static uint32 category_magic_offset = 0x676B6FU;
-
-
-void init_nvtx()
-{
-#define NAMED_CATEGORY(_name)                                             \
-    nvtxNameCategory(static_cast<uint32>(profile_event_category::_name) + \
-                         category_magic_offset,                           \
-                     "gko::" #_name)
-    NAMED_CATEGORY(memory);
-    NAMED_CATEGORY(operation);
-    NAMED_CATEGORY(object);
-    NAMED_CATEGORY(linop);
-    NAMED_CATEGORY(factory);
-    NAMED_CATEGORY(solver);
-    NAMED_CATEGORY(criterion);
-    NAMED_CATEGORY(user);
-    NAMED_CATEGORY(internal);
-#undef NAMED_CATEGORY
-}
-
-
-std::function<void(const char*, profile_event_category)> begin_nvtx_fn(
-    uint32_t color_argb)
-{
-    return [color_argb](const char* name, profile_event_category category) {
-        nvtxEventAttributes_t attr{};
-        attr.version = NVTX_VERSION;
-        attr.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
-        attr.category = static_cast<uint32>(category) + category_magic_offset;
-        attr.colorType = NVTX_COLOR_ARGB;
-        attr.color = color_argb;
-        attr.payloadType = NVTX_PAYLOAD_UNKNOWN;
-        attr.messageType = NVTX_MESSAGE_TYPE_ASCII;
-        attr.message.ascii = name;
-        nvtxRangePushEx(&attr);
-    };
-}
-
-
-void end_nvtx(const char* name, profile_event_category) { nvtxRangePop(); }
-
-
-}  // namespace log
-
-
-namespace kernels {
-namespace cuda {
-
-
-void reset_device(int device_id)
-{
-    gko::detail::cuda_scoped_device_id_guard guard{device_id};
-    cudaDeviceReset();
-}
-
-
-}  // namespace cuda
-}  // namespace kernels
 }  // namespace gko
diff --git a/cuda/base/memory.cpp b/cuda/base/memory.cpp
new file mode 100644
index 00000000000..11dee81ad42
--- /dev/null
+++ b/cuda/base/memory.cpp
@@ -0,0 +1,168 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/memory.hpp>
+
+
+#include <cuda_runtime.h>
+
+
+#include <ginkgo/core/base/exception_helpers.hpp>
+
+
+#include "cuda/base/scoped_device_id.hpp"
+
+
+namespace gko {
+
+
+#define GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(_operation, _size)       \
+    {                                                                 \
+        auto error_code = _operation;                                 \
+        if (error_code == cudaErrorMemoryAllocation) {                \
+            throw AllocationError(__FILE__, __LINE__, "cuda", _size); \
+        } else {                                                      \
+            GKO_ASSERT_NO_CUDA_ERRORS(error_code);                    \
+        }                                                             \
+    }
+
+
+#if GKO_VERBOSE_LEVEL >= 1
+#define GKO_EXIT_ON_CUDA_ERROR(_operation)                                  \
+    {                                                                       \
+        const auto error_code = _operation;                                 \
+        if (error_code != cudaSuccess) {                                    \
+            int device_id{-1};                                              \
+            cudaGetDevice(&device_id);                                      \
+            std::cerr << "Unrecoverable CUDA error on device " << device_id \
+                      << " in " << __func__ << ":" << __LINE__ << ": "      \
+                      << cudaGetErrorName(error_code) << ": "               \
+                      << cudaGetErrorString(error_code) << std::endl        \
+                      << "Exiting program" << std::endl;                    \
+            std::exit(error_code);                                          \
+        }                                                                   \
+    }
+#else
+#define GKO_EXIT_ON_CUDA_ERROR(_operation)  \
+    {                                       \
+        const auto error_code = _operation; \
+        if (error_code != cudaSuccess) {    \
+            std::exit(error_code);          \
+        }                                   \
+    }
+#endif
+
+
+void* CudaAllocator::allocate(size_type num_bytes) const
+{
+    void* ptr{};
+    GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(cudaMalloc(&ptr, num_bytes),
+                                         num_bytes);
+    return ptr;
+}
+
+
+void CudaAllocator::deallocate(void* ptr) const
+{
+    GKO_EXIT_ON_CUDA_ERROR(cudaFree(ptr));
+}
+
+
+CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{stream} {}
+
+
+void* CudaAsyncAllocator::allocate(size_type num_bytes) const
+{
+    void* ptr{};
+    GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(
+        cudaMallocAsync(&ptr, num_bytes, stream_), num_bytes);
+    return ptr;
+}
+
+void CudaAsyncAllocator::deallocate(void* ptr) const
+{
+    GKO_EXIT_ON_CUDA_ERROR(cudaFreeAsync(ptr, stream_));
+}
+
+
+CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id)
+    : CudaUnifiedAllocator{device_id, cudaMemAttachGlobal}
+{}
+
+
+CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id, unsigned int flags)
+    : device_id_{device_id}, flags_{flags}
+{}
+
+
+void* CudaUnifiedAllocator::allocate(size_type num_bytes) const
+{
+    // we need to set the device ID in case this gets used in a host executor
+    detail::cuda_scoped_device_id_guard g(device_id_);
+    void* ptr{};
+    GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(
+        cudaMallocManaged(&ptr, num_bytes, flags_), num_bytes);
+    return ptr;
+}
+
+
+void CudaUnifiedAllocator::deallocate(void* ptr) const
+{
+    // we need to set the device ID in case this gets used in a host executor
+    detail::cuda_scoped_device_id_guard g(device_id_);
+    GKO_EXIT_ON_CUDA_ERROR(cudaFree(ptr));
+}
+
+
+CudaHostAllocator::CudaHostAllocator(int device_id) : device_id_{device_id} {}
+
+
+void* CudaHostAllocator::allocate(size_type num_bytes) const
+{
+    // we need to set the device ID in case this gets used in a host executor
+    detail::cuda_scoped_device_id_guard g(device_id_);
+    void* ptr{};
+    GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(cudaMallocHost(&ptr, num_bytes),
+                                         num_bytes);
+    return ptr;
+}
+
+
+void CudaHostAllocator::deallocate(void* ptr) const
+{
+    // we need to set the device ID in case this gets used in a host executor
+    detail::cuda_scoped_device_id_guard g(device_id_);
+    GKO_EXIT_ON_CUDA_ERROR(cudaFreeHost(ptr));
+}
+
+
+}  // namespace gko
\ No newline at end of file
diff --git a/cuda/base/nvtx.cpp b/cuda/base/nvtx.cpp
new file mode 100644
index 00000000000..e313c110ea2
--- /dev/null
+++ b/cuda/base/nvtx.cpp
@@ -0,0 +1,96 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/config.hpp>
+
+
+#include <cuda_runtime.h>
+#ifdef GKO_LEGACY_NVTX
+#include <nvToolsExt.h>
+#else
+#include <nvtx3/nvToolsExt.h>
+#endif
+
+
+#include <ginkgo/core/log/profiler_hook.hpp>
+
+
+namespace gko {
+namespace log {
+
+
+// "GKO" in ASCII to avoid collision with other application's categories
+constexpr static uint32 category_magic_offset = 0x676B6FU;
+
+
+void init_nvtx()
+{
+#define NAMED_CATEGORY(_name)                                             \
+    nvtxNameCategory(static_cast<uint32>(profile_event_category::_name) + \
+                         category_magic_offset,                           \
+                     "gko::" #_name)
+    NAMED_CATEGORY(memory);
+    NAMED_CATEGORY(operation);
+    NAMED_CATEGORY(object);
+    NAMED_CATEGORY(linop);
+    NAMED_CATEGORY(factory);
+    NAMED_CATEGORY(solver);
+    NAMED_CATEGORY(criterion);
+    NAMED_CATEGORY(user);
+    NAMED_CATEGORY(internal);
+#undef NAMED_CATEGORY
+}
+
+
+std::function<void(const char*, profile_event_category)> begin_nvtx_fn(
+    uint32_t color_argb)
+{
+    return [color_argb](const char* name, profile_event_category category) {
+        nvtxEventAttributes_t attr{};
+        attr.version = NVTX_VERSION;
+        attr.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE;
+        attr.category = static_cast<uint32>(category) + category_magic_offset;
+        attr.colorType = NVTX_COLOR_ARGB;
+        attr.color = color_argb;
+        attr.payloadType = NVTX_PAYLOAD_UNKNOWN;
+        attr.messageType = NVTX_MESSAGE_TYPE_ASCII;
+        attr.message.ascii = name;
+        nvtxRangePushEx(&attr);
+    };
+}
+
+
+void end_nvtx(const char* name, profile_event_category) { nvtxRangePop(); }
+
+
+}  // namespace log
+}  // namespace gko
diff --git a/cuda/test/base/cuda_executor_reset.cpp b/cuda/base/stream.cpp
similarity index 62%
rename from cuda/test/base/cuda_executor_reset.cpp
rename to cuda/base/stream.cpp
index c8159b9c4d7..8c6aa92c28b 100644
--- a/cuda/test/base/cuda_executor_reset.cpp
+++ b/cuda/base/stream.cpp
@@ -30,58 +30,45 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include <ginkgo/core/base/executor.hpp>
+#include <cuda_runtime.h>
 
 
-#include <thread>
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/stream.hpp>
 
 
-#include <gtest/gtest.h>
+#include "cuda/base/scoped_device_id.hpp"
 
 
-namespace {
+namespace gko {
 
 
-#define GTEST_ASSERT_NO_EXIT(statement) \
-    ASSERT_EXIT({ {statement} exit(0); }, ::testing::ExitedWithCode(0), "")
+cuda_stream::cuda_stream() : stream_{}, device_id_{-1} {}
 
 
-TEST(DeviceReset, HipCuda)
+cuda_stream::cuda_stream(int device_id) : stream_{}, device_id_(device_id)
 {
-    GTEST_ASSERT_NO_EXIT({
-        auto ref = gko::ReferenceExecutor::create();
-        auto hip = gko::HipExecutor::create(0, ref, true);
-        auto cuda = gko::CudaExecutor::create(0, ref, true);
-    });
+    detail::cuda_scoped_device_id_guard g(device_id_);
+    GKO_ASSERT_NO_CUDA_ERRORS(cudaStreamCreate(&stream_));
 }
 
 
-TEST(DeviceReset, CudaHip)
+cuda_stream::~cuda_stream()
 {
-    GTEST_ASSERT_NO_EXIT({
-        auto ref = gko::ReferenceExecutor::create();
-        auto cuda = gko::CudaExecutor::create(0, ref, true);
-        auto hip = gko::HipExecutor::create(0, ref, true);
-    });
+    if (stream_) {
+        detail::cuda_scoped_device_id_guard g(device_id_);
+        cudaStreamDestroy(stream_);
+    }
 }
 
 
-void func()
-{
-    auto ref = gko::ReferenceExecutor::create();
-    auto exec = gko::CudaExecutor::create(0, ref, true);
-}
+cuda_stream::cuda_stream(cuda_stream&& other)
+    : stream_{std::exchange(other.stream_, nullptr)},
+      device_id_(std::exchange(other.device_id_, -1))
+{}
 
 
-TEST(DeviceReset, CudaCuda)
-{
-    GTEST_ASSERT_NO_EXIT({
-        std::thread t1(func);
-        std::thread t2(func);
-        t1.join();
-        t2.join();
-    });
-}
+CUstream_st* cuda_stream::get() const { return stream_; }
 
 
-}  // namespace
+}  // namespace gko
diff --git a/cuda/test/base/CMakeLists.txt b/cuda/test/base/CMakeLists.txt
index 9be3caf9faa..a213e65277a 100644
--- a/cuda/test/base/CMakeLists.txt
+++ b/cuda/test/base/CMakeLists.txt
@@ -1,7 +1,6 @@
 ginkgo_create_cuda_test(array)
 ginkgo_create_cuda_test(cuda_executor)
 ginkgo_create_test(index_set)
-ginkgo_create_test(cuda_executor_reset ADDITIONAL_LIBRARIES Threads::Threads)
 if(GINKGO_HAVE_HWLOC)
     find_package(NUMA REQUIRED)
     ginkgo_create_cuda_test(cuda_executor_topology ADDITIONAL_LIBRARIES NUMA::NUMA)
@@ -10,4 +9,5 @@ ginkgo_create_cuda_test(exception_helpers)
 ginkgo_create_cuda_test(kernel_launch)
 ginkgo_create_cuda_test(lin_op)
 ginkgo_create_cuda_test(math)
+ginkgo_create_test(memory)
 ginkgo_create_cuda_test(scoped_device_id)
diff --git a/cuda/test/base/cuda_executor.cu b/cuda/test/base/cuda_executor.cu
index 5f489ac22f0..afb23c06186 100644
--- a/cuda/test/base/cuda_executor.cu
+++ b/cuda/test/base/cuda_executor.cu
@@ -42,6 +42,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 #include <ginkgo/core/base/exception.hpp>
 #include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/stream.hpp>
+
 
 #include "common/cuda_hip/base/executor.hpp.inc"
 #include "cuda/base/scoped_device_id.hpp"
@@ -103,18 +105,19 @@ protected:
         ASSERT_GT(gko::CudaExecutor::get_num_devices(), 0);
 #ifdef GKO_TEST_NONDEFAULT_STREAM
         cuda = gko::CudaExecutor::create(
-            0, omp, false, gko::default_cuda_alloc_mode, stream.get());
+            0, omp, std::make_shared<gko::CudaAllocator>(), stream.get());
         cuda2 = gko::CudaExecutor::create(
-            gko::CudaExecutor::get_num_devices() - 1, omp, false,
-            gko::default_cuda_alloc_mode, other_stream.get());
+            gko::CudaExecutor::get_num_devices() - 1, omp,
+            std::make_shared<gko::CudaAllocator>(), other_stream.get());
         cuda3 = gko::CudaExecutor::create(
-            0, omp, false, gko::allocation_mode::unified_global, stream.get());
+            0, omp, std::make_shared<gko::CudaUnifiedAllocator>(0),
+            stream.get());
 #else
         cuda = gko::CudaExecutor::create(0, omp);
         cuda2 = gko::CudaExecutor::create(
             gko::CudaExecutor::get_num_devices() - 1, omp);
-        cuda3 = gko::CudaExecutor::create(0, omp, false,
-                                          gko::allocation_mode::unified_global);
+        cuda3 = gko::CudaExecutor::create(
+            0, omp, std::make_shared<gko::CudaUnifiedAllocator>(0));
 #endif
     }
 
diff --git a/cuda/test/base/memory.cpp b/cuda/test/base/memory.cpp
new file mode 100644
index 00000000000..a329817f4af
--- /dev/null
+++ b/cuda/test/base/memory.cpp
@@ -0,0 +1,126 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/memory.hpp>
+
+
+#include <memory>
+#include <type_traits>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/executor.hpp>
+
+
+#include "cuda/test/utils.hpp"
+
+
+namespace {
+
+
+class Memory : public CudaTestFixture {
+protected:
+    Memory()
+        : host_exec_with_pinned{gko::OmpExecutor::create(
+              std::make_shared<gko::CudaHostAllocator>(0))},
+          host_exec_with_unified{gko::OmpExecutor::create(
+              std::make_shared<gko::CudaUnifiedAllocator>(0))},
+          exec_with_normal{gko::CudaExecutor::create(
+              0, ref, std::make_shared<gko::CudaAllocator>(),
+              exec->get_stream())},
+          exec_with_async{gko::CudaExecutor::create(
+              0, host_exec_with_pinned,
+              std::make_shared<gko::CudaAsyncAllocator>(exec->get_stream()),
+              exec->get_stream())},
+          exec_with_unified{gko::CudaExecutor::create(
+              0, host_exec_with_unified,
+              std::make_shared<gko::CudaUnifiedAllocator>(0),
+              exec->get_stream())}
+    {}
+
+    std::shared_ptr<gko::OmpExecutor> host_exec_with_pinned;
+    std::shared_ptr<gko::OmpExecutor> host_exec_with_unified;
+    std::shared_ptr<gko::CudaExecutor> exec_with_normal;
+    std::shared_ptr<gko::CudaExecutor> exec_with_async;
+    std::shared_ptr<gko::CudaExecutor> exec_with_unified;
+};
+
+
+TEST_F(Memory, DeviceAllocationWorks)
+{
+    gko::array<int> data{exec_with_normal, {1, 2}};
+
+    GKO_ASSERT_ARRAY_EQ(data, I<int>({1, 2}));
+}
+
+
+TEST_F(Memory, AsyncDeviceAllocationWorks)
+{
+    gko::array<int> data{exec_with_async, {1, 2}};
+
+    GKO_ASSERT_ARRAY_EQ(data, I<int>({1, 2}));
+}
+
+
+TEST_F(Memory, UnifiedDeviceAllocationWorks)
+{
+    gko::array<int> data{exec_with_unified, {1, 2}};
+    exec->synchronize();
+
+    ASSERT_EQ(data.get_const_data()[0], 1);
+    ASSERT_EQ(data.get_const_data()[1], 2);
+}
+
+
+TEST_F(Memory, HostUnifiedAllocationWorks)
+{
+    gko::array<int> data{host_exec_with_unified, {1, 2}};
+
+    ASSERT_EQ(data.get_const_data()[0], 1);
+    ASSERT_EQ(data.get_const_data()[1], 2);
+}
+
+
+TEST_F(Memory, HostPinnedAllocationWorks)
+{
+    gko::array<int> data{host_exec_with_pinned, {1, 2}};
+
+    ASSERT_EQ(data.get_const_data()[0], 1);
+    ASSERT_EQ(data.get_const_data()[1], 2);
+}
+
+
+}  // namespace
diff --git a/cuda/test/utils.hpp b/cuda/test/utils.hpp
index 814405ba0d9..e1156b91903 100644
--- a/cuda/test/utils.hpp
+++ b/cuda/test/utils.hpp
@@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/stream.hpp>
 
 
 #include "cuda/base/device.hpp"
@@ -60,8 +61,9 @@ class CudaTestFixture : public ::testing::Test {
     CudaTestFixture()
         : ref(gko::ReferenceExecutor::create()),
 #ifdef GKO_TEST_NONDEFAULT_STREAM
+          stream(0),
           exec(gko::CudaExecutor::create(
-              0, ref, false, gko::default_cuda_alloc_mode, stream.get()))
+              0, ref, std::make_shared<gko::CudaAllocator>(), stream.get()))
 #else
           exec(gko::CudaExecutor::create(0, ref))
 #endif
diff --git a/devices/cuda/executor.cpp b/devices/cuda/executor.cpp
index d066d272f81..3789274c3f3 100644
--- a/devices/cuda/executor.cpp
+++ b/devices/cuda/executor.cpp
@@ -64,31 +64,4 @@ bool CudaExecutor::verify_memory_to(const HipExecutor* dest_exec) const
 }
 
 
-void CudaExecutor::increase_num_execs(unsigned device_id)
-{
-#ifdef GKO_COMPILING_CUDA_DEVICE
-    // increase the Cuda Device count only when ginkgo build cuda
-    std::lock_guard<std::mutex> guard(nvidia_device::get_mutex(device_id));
-    nvidia_device::get_num_execs(device_id)++;
-#endif  // GKO_COMPILING_CUDA_DEVICE
-}
-
-
-void CudaExecutor::decrease_num_execs(unsigned device_id)
-{
-#ifdef GKO_COMPILING_CUDA_DEVICE
-    // increase the Cuda Device count only when ginkgo build cuda
-    std::lock_guard<std::mutex> guard(nvidia_device::get_mutex(device_id));
-    nvidia_device::get_num_execs(device_id)--;
-#endif  // GKO_COMPILING_CUDA_DEVICE
-}
-
-
-unsigned CudaExecutor::get_num_execs(unsigned device_id)
-{
-    std::lock_guard<std::mutex> guard(nvidia_device::get_mutex(device_id));
-    return nvidia_device::get_num_execs(device_id);
-}
-
-
 }  // namespace gko
diff --git a/devices/hip/executor.cpp b/devices/hip/executor.cpp
index 60efb4c53a3..b044074c19e 100644
--- a/devices/hip/executor.cpp
+++ b/devices/hip/executor.cpp
@@ -61,38 +61,4 @@ bool HipExecutor::verify_memory_to(const CudaExecutor* dest_exec) const
 }
 
 
-#if (GINKGO_HIP_PLATFORM_NVCC == 1)
-using hip_device_class = nvidia_device;
-#else
-using hip_device_class = amd_device;
-#endif
-
-
-void HipExecutor::increase_num_execs(int device_id)
-{
-#ifdef GKO_COMPILING_HIP_DEVICE
-    // increase the HIP Device count only when ginkgo build hip
-    std::lock_guard<std::mutex> guard(hip_device_class::get_mutex(device_id));
-    hip_device_class::get_num_execs(device_id)++;
-#endif  // GKO_COMPILING_HIP_DEVICE
-}
-
-
-void HipExecutor::decrease_num_execs(int device_id)
-{
-#ifdef GKO_COMPILING_HIP_DEVICE
-    // increase the HIP Device count only when ginkgo build hip
-    std::lock_guard<std::mutex> guard(hip_device_class::get_mutex(device_id));
-    hip_device_class::get_num_execs(device_id)--;
-#endif  // GKO_COMPILING_HIP_DEVICE
-}
-
-
-int HipExecutor::get_num_execs(int device_id)
-{
-    std::lock_guard<std::mutex> guard(hip_device_class::get_mutex(device_id));
-    return hip_device_class::get_num_execs(device_id);
-}
-
-
 }  // namespace gko
diff --git a/devices/omp/executor.cpp b/devices/omp/executor.cpp
index 352216f7633..f8e700bc2d5 100644
--- a/devices/omp/executor.cpp
+++ b/devices/omp/executor.cpp
@@ -55,7 +55,10 @@ void OmpExecutor::populate_exec_info(const machine_topology* mach_topo)
 }
 
 
-void OmpExecutor::raw_free(void* ptr) const noexcept { std::free(ptr); }
+void OmpExecutor::raw_free(void* ptr) const noexcept
+{
+    return alloc_->deallocate(ptr);
+}
 
 
 std::shared_ptr<Executor> OmpExecutor::get_master() noexcept
@@ -72,7 +75,7 @@ std::shared_ptr<const Executor> OmpExecutor::get_master() const noexcept
 
 void* OmpExecutor::raw_alloc(size_type num_bytes) const
 {
-    return GKO_ENSURE_ALLOCATED(std::malloc(num_bytes), "OMP", num_bytes);
+    return alloc_->allocate(num_bytes);
 }
 
 
diff --git a/dpcpp/base/executor.dp.cpp b/dpcpp/base/executor.dp.cpp
index 3b9bfce07b8..b4503bf782e 100644
--- a/dpcpp/base/executor.dp.cpp
+++ b/dpcpp/base/executor.dp.cpp
@@ -51,6 +51,39 @@ namespace gko {
 namespace detail {
 
 
+DpcppAllocator::DpcppAllocator(sycl::queue* queue) : queue_{queue} {}
+
+
+void* DpcppAllocator::allocate(size_type size)
+{
+    return sycl::malloc_device(size, *queue_);
+}
+
+
+void DpcppAllocator::deallocate(void* ptr)
+{
+    queue_->wait_and_throw();
+    sycl::free(ptr, queue_->get_context());
+}
+
+
+DpcppUnifiedAllocator::DpcppUnifiedAllocator(sycl::queue* queue) : queue_{queue}
+{}
+
+
+void* DpcppUnifiedAllocator::allocate(size_type size)
+{
+    return sycl::malloc_shared(size, *queue_);
+}
+
+
+void DpcppUnifiedAllocator::deallocate(void* ptr)
+{
+    queue_->wait_and_throw();
+    sycl::free(ptr, queue_->get_context());
+}
+
+
 const std::vector<sycl::device> get_devices(std::string device_type)
 {
     std::map<std::string, sycl::info::device_type> device_type_map{
diff --git a/hip/test/base/hip_executor_reset.cpp b/dpcpp/base/memory.dp.cpp
similarity index 63%
rename from hip/test/base/hip_executor_reset.cpp
rename to dpcpp/base/memory.dp.cpp
index 39e3252e053..b1ccd007dea 100644
--- a/hip/test/base/hip_executor_reset.cpp
+++ b/dpcpp/base/memory.dp.cpp
@@ -30,58 +30,43 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 ******************************<GINKGO LICENSE>*******************************/
 
-#include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/memory.hpp>
 
 
-#include <thread>
+#include <CL/sycl.hpp>
 
 
-#include <gtest/gtest.h>
+namespace gko {
 
 
-namespace {
+DpcppAllocatorBase::DpcppAllocatorBase(sycl::queue* queue) : queue_{queue} {}
 
 
-#define GTEST_ASSERT_NO_EXIT(statement) \
-    ASSERT_EXIT({ {statement} exit(0); }, ::testing::ExitedWithCode(0), "")
-
-
-TEST(DeviceReset, HipCuda)
+void* DpcppAllocator::allocate_impl(sycl::queue* queue,
+                                    size_type num_bytes) const
 {
-    GTEST_ASSERT_NO_EXIT({
-        auto ref = gko::ReferenceExecutor::create();
-        auto hip = gko::HipExecutor::create(0, ref, true);
-        auto cuda = gko::CudaExecutor::create(0, ref, true);
-    });
+    return sycl::malloc_device(size, *queue);
 }
 
 
-TEST(DeviceReset, CudaHip)
+void DpcppAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const
 {
-    GTEST_ASSERT_NO_EXIT({
-        auto ref = gko::ReferenceExecutor::create();
-        auto cuda = gko::CudaExecutor::create(0, ref, true);
-        auto hip = gko::HipExecutor::create(0, ref, true);
-    });
+    queue->wait_and_throw();
+    sycl::free(ptr, queue->get_context());
 }
 
 
-void func()
+void* DpcppUnifiedAllocator::allocate(size_type num_bytes)
 {
-    auto ref = gko::ReferenceExecutor::create();
-    auto exec = gko::HipExecutor::create(0, ref, true);
+    return sycl::malloc_shared(size, *queue_);
 }
 
 
-TEST(DeviceReset, HipHip)
+void DpcppUnifiedAllocator::deallocate(void* ptr)
 {
-    GTEST_ASSERT_NO_EXIT({
-        std::thread t1(func);
-        std::thread t2(func);
-        t1.join();
-        t2.join();
-    });
+    queue_->wait_and_throw();
+    sycl::free(ptr, queue_->get_context());
 }
 
 
-}  // namespace
+}  // namespace gko
diff --git a/dpcpp/test/base/CMakeLists.txt b/dpcpp/test/base/CMakeLists.txt
index bb9c8a75050..5c0ca601f04 100644
--- a/dpcpp/test/base/CMakeLists.txt
+++ b/dpcpp/test/base/CMakeLists.txt
@@ -3,3 +3,4 @@ ginkgo_create_dpcpp_test(dim3)
 ginkgo_create_dpcpp_test(kernel_launch)
 # set correct flags for kernel_launch.hpp
 target_compile_definitions(dpcpp_test_base_kernel_launch PRIVATE GKO_COMPILING_DPCPP)
+ginkgo_create_dpcpp_test(memory)
\ No newline at end of file
diff --git a/dpcpp/test/base/memory.dp.cpp b/dpcpp/test/base/memory.dp.cpp
new file mode 100644
index 00000000000..e587660cde3
--- /dev/null
+++ b/dpcpp/test/base/memory.dp.cpp
@@ -0,0 +1,98 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/memory.hpp>
+
+
+#include <memory>
+#include <type_traits>
+
+
+#include <gtest/gtest.h>
+
+
+#include <ginkgo/core/base/exception.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/base/executor.hpp>
+
+
+#include "dpcpp/test/utils.hpp"
+
+
+namespace {
+
+
+class Memory : public ::testing::Test {
+protected:
+    Memory()
+        : exec{gko::DpcppExecutor::create(0, gko::OmpExecutor::create())},
+          host_exec_with_unified{gko::OmpExecutor::create(
+              std::make_shared<gko::DpcppUnifiedAllocator>(exec->get_queue()))},
+          exec_with_unified{gko::DpcppExecutor::create(
+              exec->get_queue(), host_exec_with_unified,
+              std::make_shared<gko::DpcppUnifiedAllocator>(exec->get_queue()))}
+    {}
+
+    std::shared_ptr<gko::DpcppExecutor> exec;
+    std::shared_ptr<gko::OmpExecutor> host_exec_with_unified;
+    std::shared_ptr<gko::DpcppExecutor> exec_with_unified;
+};
+
+
+TEST_F(Memory, DeviceAllocationWorks)
+{
+    gko::array<int> data{exec, {1, 2}};
+
+    GKO_ASSERT_ARRAY_EQ(data, I<int>({1, 2}));
+}
+
+
+TEST_F(Memory, UnifiedDeviceAllocationWorks)
+{
+    gko::array<int> data{exec_with_unified, {1, 2}};
+    exec->synchronize();
+
+    ASSERT_EQ(data.get_const_data()[0], 1);
+    ASSERT_EQ(data.get_const_data()[1], 2);
+}
+
+
+TEST_F(Memory, HostUnifiedAllocationWorks)
+{
+    gko::array<int> data{host_exec_with_unified, {1, 2}};
+
+    ASSERT_EQ(data.get_const_data()[0], 1);
+    ASSERT_EQ(data.get_const_data()[1], 2);
+}
+
+
+}  // namespace
diff --git a/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp b/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp
index f6f28cdaab0..17639fefef2 100644
--- a/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp
+++ b/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp
@@ -68,13 +68,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/cb-gmres/cb-gmres.cpp b/examples/cb-gmres/cb-gmres.cpp
index c0235f75e55..b096e48c71a 100644
--- a/examples/cb-gmres/cb-gmres.cpp
+++ b/examples/cb-gmres/cb-gmres.cpp
@@ -108,13 +108,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/custom-logger/custom-logger.cpp b/examples/custom-logger/custom-logger.cpp
index e82d94ed117..8a17f651650 100644
--- a/examples/custom-logger/custom-logger.cpp
+++ b/examples/custom-logger/custom-logger.cpp
@@ -259,13 +259,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/custom-matrix-format/custom-matrix-format.cpp b/examples/custom-matrix-format/custom-matrix-format.cpp
index af08dbdf226..4610413fe9c 100644
--- a/examples/custom-matrix-format/custom-matrix-format.cpp
+++ b/examples/custom-matrix-format/custom-matrix-format.cpp
@@ -255,13 +255,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/custom-stopping-criterion/custom-stopping-criterion.cpp b/examples/custom-stopping-criterion/custom-stopping-criterion.cpp
index 9389f86cc45..e07f1bf92fb 100644
--- a/examples/custom-stopping-criterion/custom-stopping-criterion.cpp
+++ b/examples/custom-stopping-criterion/custom-stopping-criterion.cpp
@@ -158,13 +158,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp b/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp
index aa32e0e879a..33946b7de44 100644
--- a/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp
+++ b/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp
@@ -68,13 +68,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/inverse-iteration/inverse-iteration.cpp b/examples/inverse-iteration/inverse-iteration.cpp
index 5d8270f1ca1..460370b7e00 100644
--- a/examples/inverse-iteration/inverse-iteration.cpp
+++ b/examples/inverse-iteration/inverse-iteration.cpp
@@ -72,13 +72,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp b/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp
index 24db221a062..fa034772e60 100644
--- a/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp
+++ b/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp
@@ -71,13 +71,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/iterative-refinement/iterative-refinement.cpp b/examples/iterative-refinement/iterative-refinement.cpp
index f38c5a6cb49..9b4d0ee5263 100644
--- a/examples/iterative-refinement/iterative-refinement.cpp
+++ b/examples/iterative-refinement/iterative-refinement.cpp
@@ -68,13 +68,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/minimal-cuda-solver/minimal-cuda-solver.cpp b/examples/minimal-cuda-solver/minimal-cuda-solver.cpp
index 7182bc9ad8c..5a7a8c086af 100644
--- a/examples/minimal-cuda-solver/minimal-cuda-solver.cpp
+++ b/examples/minimal-cuda-solver/minimal-cuda-solver.cpp
@@ -36,7 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 int main()
 {
     // Instantiate a CUDA executor
-    auto gpu = gko::CudaExecutor::create(0, gko::OmpExecutor::create(), true);
+    auto gpu = gko::CudaExecutor::create(0, gko::OmpExecutor::create());
     // Read data
     auto A = gko::read<gko::matrix::Csr<>>(std::cin, gpu);
     auto b = gko::read<gko::matrix::Dense<>>(std::cin, gpu);
diff --git a/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp b/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp
index 6f1600d2805..9edd7ff29a1 100644
--- a/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp
+++ b/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp
@@ -71,13 +71,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp b/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp
index 7da28611fcb..9d05e9c65b7 100644
--- a/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp
+++ b/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp
@@ -69,13 +69,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/mixed-precision-ir/mixed-precision-ir.cpp b/examples/mixed-precision-ir/mixed-precision-ir.cpp
index 3510a2163e1..0882d755cdc 100644
--- a/examples/mixed-precision-ir/mixed-precision-ir.cpp
+++ b/examples/mixed-precision-ir/mixed-precision-ir.cpp
@@ -76,13 +76,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/mixed-spmv/mixed-spmv.cpp b/examples/mixed-spmv/mixed-spmv.cpp
index 78461de39ef..6b327c1c708 100644
--- a/examples/mixed-spmv/mixed-spmv.cpp
+++ b/examples/mixed-spmv/mixed-spmv.cpp
@@ -170,13 +170,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp b/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp
index 6f75ca29630..a455ca2e8ed 100644
--- a/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp
+++ b/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp
@@ -64,13 +64,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp b/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp
index 06ac006e4db..4590f1582dd 100644
--- a/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp
+++ b/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp
@@ -62,13 +62,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp b/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp
index 51fdf97d4a4..05ee0503a5f 100644
--- a/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp
+++ b/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp
@@ -230,13 +230,12 @@ void solve_system(const std::string& executor_string,
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/papi-logging/papi-logging.cpp b/examples/papi-logging/papi-logging.cpp
index bcca6fe648e..d73747e3069 100644
--- a/examples/papi-logging/papi-logging.cpp
+++ b/examples/papi-logging/papi-logging.cpp
@@ -151,13 +151,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/performance-debugging/performance-debugging.cpp b/examples/performance-debugging/performance-debugging.cpp
index f357a8d4619..5f036728924 100644
--- a/examples/performance-debugging/performance-debugging.cpp
+++ b/examples/performance-debugging/performance-debugging.cpp
@@ -371,13 +371,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/poisson-solver/poisson-solver.cpp b/examples/poisson-solver/poisson-solver.cpp
index 7602600a514..e16f0b26968 100644
--- a/examples/poisson-solver/poisson-solver.cpp
+++ b/examples/poisson-solver/poisson-solver.cpp
@@ -144,13 +144,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/preconditioned-solver/preconditioned-solver.cpp b/examples/preconditioned-solver/preconditioned-solver.cpp
index 37963f205cc..b64b588c4ef 100644
--- a/examples/preconditioned-solver/preconditioned-solver.cpp
+++ b/examples/preconditioned-solver/preconditioned-solver.cpp
@@ -69,13 +69,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/simple-solver-logging/simple-solver-logging.cpp b/examples/simple-solver-logging/simple-solver-logging.cpp
index 6ab3169b326..384bd9d997b 100644
--- a/examples/simple-solver-logging/simple-solver-logging.cpp
+++ b/examples/simple-solver-logging/simple-solver-logging.cpp
@@ -85,13 +85,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/simple-solver/simple-solver.cpp b/examples/simple-solver/simple-solver.cpp
index 8f665f98496..81dc9ee6d74 100644
--- a/examples/simple-solver/simple-solver.cpp
+++ b/examples/simple-solver/simple-solver.cpp
@@ -89,13 +89,12 @@ int main(int argc, char* argv[])
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp b/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp
index 6bf3cc21a8a..63adfaa5571 100644
--- a/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp
+++ b/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp
@@ -165,13 +165,12 @@ void solve_system(const std::string& executor_string,
             {"omp", [] { return gko::OmpExecutor::create(); }},
             {"cuda",
              [] {
-                 return gko::CudaExecutor::create(0, gko::OmpExecutor::create(),
-                                                  true);
+                 return gko::CudaExecutor::create(0,
+                                                  gko::OmpExecutor::create());
              }},
             {"hip",
              [] {
-                 return gko::HipExecutor::create(0, gko::OmpExecutor::create(),
-                                                 true);
+                 return gko::HipExecutor::create(0, gko::OmpExecutor::create());
              }},
             {"dpcpp",
              [] {
diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt
index cf3f68dc7fc..f1000f5b6ef 100644
--- a/hip/CMakeLists.txt
+++ b/hip/CMakeLists.txt
@@ -1,9 +1,13 @@
 set(GINKGO_HIP_SOURCES
+    base/device.hip.cpp
     base/device_matrix_data_kernels.hip.cpp
     base/exception.hip.cpp
     base/executor.hip.cpp
     base/index_set_kernels.hip.cpp
+    base/memory.hip.cpp
+    base/roctx.hip.cpp
     base/scoped_device_id.hip.cpp
+    base/stream.hip.cpp
     base/version.hip.cpp
     components/prefix_sum_kernels.hip.cpp
     distributed/matrix_kernels.hip.cpp
diff --git a/hip/base/device.hip.cpp b/hip/base/device.hip.cpp
new file mode 100644
index 00000000000..3fc01661cfe
--- /dev/null
+++ b/hip/base/device.hip.cpp
@@ -0,0 +1,61 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/stream.hpp>
+
+
+#include <hip/hip_runtime.h>
+
+
+#include <ginkgo/config.hpp>
+#include <ginkgo/core/base/device.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+
+
+#include "hip/base/scoped_device_id.hip.hpp"
+
+
+namespace gko {
+namespace kernels {
+namespace hip {
+
+
+void reset_device(int device_id)
+{
+    gko::detail::hip_scoped_device_id_guard guard{device_id};
+    hipDeviceReset();
+}
+
+
+}  // namespace hip
+}  // namespace kernels
+}  // namespace gko
diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp
index a67d1c06a3c..6b4b0fd5ddc 100644
--- a/hip/base/executor.hip.cpp
+++ b/hip/base/executor.hip.cpp
@@ -37,15 +37,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <hip/hip_runtime.h>
-#if GINKGO_HIP_PLATFORM_HCC && GKO_HAVE_ROCTX
-#include <roctx.h>
-#endif
 
 
 #include <ginkgo/config.hpp>
 #include <ginkgo/core/base/device.hpp>
 #include <ginkgo/core/base/exception_helpers.hpp>
-#include <ginkgo/core/log/profiler_hook.hpp>
 
 
 #include "hip/base/config.hip.hpp"
@@ -60,32 +56,22 @@ namespace gko {
 #include "common/cuda_hip/base/executor.hpp.inc"
 
 
-#if (GINKGO_HIP_PLATFORM_NVCC == 1)
-using hip_device_class = nvidia_device;
-#else
-using hip_device_class = amd_device;
-#endif
-
-
 std::shared_ptr<HipExecutor> HipExecutor::create(
     int device_id, std::shared_ptr<Executor> master, bool device_reset,
     allocation_mode alloc_mode, hipStream_t stream)
 {
     return std::shared_ptr<HipExecutor>(
-        new HipExecutor(device_id, std::move(master), device_reset, alloc_mode,
-                        stream),
-        [device_id](HipExecutor* exec) {
-            auto device_reset = exec->get_device_reset();
-            std::lock_guard<std::mutex> guard(
-                hip_device_class::get_mutex(device_id));
-            delete exec;
-            auto& num_execs = hip_device_class::get_num_execs(device_id);
-            num_execs--;
-            if (!num_execs && device_reset) {
-                detail::hip_scoped_device_id_guard g(device_id);
-                hipDeviceReset();
-            }
-        });
+        new HipExecutor(device_id, std::move(master),
+                        std::make_shared<HipAllocator>(), stream));
+}
+
+
+std::shared_ptr<HipExecutor> HipExecutor::create(
+    int device_id, std::shared_ptr<Executor> master,
+    std::shared_ptr<HipAllocatorBase> alloc, hipStream_t stream)
+{
+    return std::shared_ptr<HipExecutor>(new HipExecutor(
+        device_id, std::move(master), std::move(alloc), stream));
 }
 
 
@@ -125,42 +111,14 @@ void OmpExecutor::raw_copy_to(const HipExecutor* dest, size_type num_bytes,
 void HipExecutor::raw_free(void* ptr) const noexcept
 {
     detail::hip_scoped_device_id_guard g(this->get_device_id());
-    auto error_code = hipFree(ptr);
-    if (error_code != hipSuccess) {
-#if GKO_VERBOSE_LEVEL >= 1
-        // Unfortunately, if memory free fails, there's not much we can do
-        std::cerr << "Unrecoverable HIP error on device "
-                  << this->get_device_id() << " in " << __func__ << ": "
-                  << hipGetErrorName(error_code) << ": "
-                  << hipGetErrorString(error_code) << std::endl
-                  << "Exiting program" << std::endl;
-#endif  // GKO_VERBOSE_LEVEL >= 1
-        std::exit(error_code);
-    }
+    alloc_->deallocate(ptr);
 }
 
 
 void* HipExecutor::raw_alloc(size_type num_bytes) const
 {
-    void* dev_ptr = nullptr;
     detail::hip_scoped_device_id_guard g(this->get_device_id());
-    int error_code = 0;
-    if (this->alloc_mode_ == allocation_mode::device) {
-        error_code = hipMalloc(&dev_ptr, num_bytes);
-#if !(GKO_HIP_PLATFORM_HCC == 1)
-    } else if (this->alloc_mode_ == allocation_mode::unified_global) {
-        error_code = hipMallocManaged(&dev_ptr, num_bytes, hipMemAttachGlobal);
-    } else if (this->alloc_mode_ == allocation_mode::unified_host) {
-        error_code = hipMallocManaged(&dev_ptr, num_bytes, hipMemAttachHost);
-#endif
-    } else {
-        GKO_NOT_SUPPORTED(this->alloc_mode_);
-    }
-    if (error_code != hipErrorMemoryAllocation) {
-        GKO_ASSERT_NO_HIP_ERRORS(error_code);
-    }
-    GKO_ENSURE_ALLOCATED(dev_ptr, "hip", num_bytes);
-    return dev_ptr;
+    return alloc_->allocate(num_bytes);
 }
 
 
@@ -309,67 +267,4 @@ void HipExecutor::init_handles()
 }
 
 
-hip_stream::hip_stream(int device_id) : stream_{}, device_id_(device_id)
-{
-    detail::hip_scoped_device_id_guard g(device_id_);
-    GKO_ASSERT_NO_HIP_ERRORS(hipStreamCreate(&stream_));
-}
-
-
-hip_stream::~hip_stream()
-{
-    if (stream_) {
-        detail::hip_scoped_device_id_guard g(device_id_);
-        hipStreamDestroy(stream_);
-    }
-}
-
-
-hip_stream::hip_stream(hip_stream&& other)
-    : stream_{std::exchange(other.stream_, nullptr)},
-      device_id_{std::exchange(other.device_id_, -1)}
-{}
-
-
-GKO_HIP_STREAM_STRUCT* hip_stream::get() const { return stream_; }
-
-
-namespace log {
-
-
-#if GINKGO_HIP_PLATFORM_HCC && GKO_HAVE_ROCTX
-
-void begin_roctx(const char* name, profile_event_category)
-{
-    roctxRangePush(name);
-}
-
-void end_roctx(const char*, profile_event_category) { roctxRangePop(); }
-
-#else
-
-void begin_roctx(const char* name, profile_event_category)
-    GKO_NOT_COMPILED(roctx);
-
-void end_roctx(const char*, profile_event_category) GKO_NOT_COMPILED(roctx);
-
-#endif
-
-
-}  // namespace log
-
-
-namespace kernels {
-namespace hip {
-
-
-void reset_device(int device_id)
-{
-    gko::detail::hip_scoped_device_id_guard guard{device_id};
-    hipDeviceReset();
-}
-
-
-}  // namespace hip
-}  // namespace kernels
 }  // namespace gko
diff --git a/hip/base/memory.hip.cpp b/hip/base/memory.hip.cpp
new file mode 100644
index 00000000000..f2a8977525f
--- /dev/null
+++ b/hip/base/memory.hip.cpp
@@ -0,0 +1,97 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/memory.hpp>
+
+
+#include <hip/hip_runtime.h>
+
+
+#include <ginkgo/core/base/exception_helpers.hpp>
+
+
+namespace gko {
+
+
+#define GKO_ASSERT_NO_HIP_ALLOCATION_ERRORS(_operation, _size)       \
+    {                                                                \
+        auto error_code = _operation;                                \
+        if (error_code == hipErrorMemoryAllocation) {                \
+            throw AllocationError(__FILE__, __LINE__, "hip", _size); \
+        } else {                                                     \
+            GKO_ASSERT_NO_HIP_ERRORS(error_code);                    \
+        }                                                            \
+    }
+
+
+#if GKO_VERBOSE_LEVEL >= 1
+#define GKO_EXIT_ON_HIP_ERROR(_operation)                                  \
+    {                                                                      \
+        const auto error_code = _operation;                                \
+        if (error_code != hipSuccess) {                                    \
+            int device_id{-1};                                             \
+            hipGetDevice(&device_id);                                      \
+            std::cerr << "Unrecoverable HIP error on device " << device_id \
+                      << " in " << __func__ << ": "                        \
+                      << hipGetErrorName(error_code) << ": "               \
+                      << hipGetErrorString(error_code) << std::endl        \
+                      << "Exiting program" << std::endl;                   \
+            std::exit(error_code);                                         \
+        }                                                                  \
+    }
+#else
+#define GKO_EXIT_ON_HIP_ERROR(_operation)   \
+    {                                       \
+        const auto error_code = _operation; \
+        if (error_code != hipSuccess) {     \
+            std::exit(error_code);          \
+        }                                   \
+    }
+#endif
+
+
+void* HipAllocator::allocate(size_type num_bytes) const
+{
+    void* dev_ptr{};
+    GKO_ASSERT_NO_HIP_ALLOCATION_ERRORS(hipMalloc(&dev_ptr, num_bytes),
+                                        num_bytes);
+    return dev_ptr;
+}
+
+
+void HipAllocator::deallocate(void* dev_ptr) const
+{
+    GKO_EXIT_ON_HIP_ERROR(hipFree(dev_ptr));
+}
+
+
+}  // namespace gko
diff --git a/hip/base/roctx.hip.cpp b/hip/base/roctx.hip.cpp
new file mode 100644
index 00000000000..9f309b93362
--- /dev/null
+++ b/hip/base/roctx.hip.cpp
@@ -0,0 +1,70 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/config.hpp>
+
+
+#include <hip/hip_runtime.h>
+#if GINKGO_HIP_PLATFORM_HCC && GKO_HAVE_ROCTX
+#include <roctx.h>
+#endif
+
+
+#include <ginkgo/core/base/exception_helpers.hpp>
+#include <ginkgo/core/log/profiler_hook.hpp>
+
+
+namespace gko {
+namespace log {
+
+
+#if GINKGO_HIP_PLATFORM_HCC && GKO_HAVE_ROCTX
+
+void begin_roctx(const char* name, profile_event_category)
+{
+    roctxRangePush(name);
+}
+
+void end_roctx(const char*, profile_event_category) { roctxRangePop(); }
+
+#else
+
+void begin_roctx(const char* name, profile_event_category)
+    GKO_NOT_COMPILED(roctx);
+
+void end_roctx(const char*, profile_event_category) GKO_NOT_COMPILED(roctx);
+
+#endif
+
+
+}  // namespace log
+}  // namespace gko
diff --git a/hip/base/stream.hip.cpp b/hip/base/stream.hip.cpp
new file mode 100644
index 00000000000..e5817eb9ebd
--- /dev/null
+++ b/hip/base/stream.hip.cpp
@@ -0,0 +1,78 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#include <ginkgo/core/base/stream.hpp>
+
+
+#include <hip/hip_runtime.h>
+
+
+#include <ginkgo/config.hpp>
+#include <ginkgo/core/base/device.hpp>
+#include <ginkgo/core/base/exception_helpers.hpp>
+
+
+#include "hip/base/scoped_device_id.hip.hpp"
+
+
+namespace gko {
+
+
+hip_stream::hip_stream() : stream_{}, device_id_{-1} {}
+
+
+hip_stream::hip_stream(int device_id) : stream_{}, device_id_(device_id)
+{
+    detail::hip_scoped_device_id_guard g(device_id_);
+    GKO_ASSERT_NO_HIP_ERRORS(hipStreamCreate(&stream_));
+}
+
+
+hip_stream::~hip_stream()
+{
+    if (stream_) {
+        detail::hip_scoped_device_id_guard g(device_id_);
+        hipStreamDestroy(stream_);
+    }
+}
+
+
+hip_stream::hip_stream(hip_stream&& other)
+    : stream_{std::exchange(other.stream_, nullptr)},
+      device_id_{std::exchange(other.device_id_, -1)}
+{}
+
+
+GKO_HIP_STREAM_STRUCT* hip_stream::get() const { return stream_; }
+
+
+}  // namespace gko
diff --git a/hip/test/base/CMakeLists.txt b/hip/test/base/CMakeLists.txt
index 7ed0d2ceb52..f597a3d6e3d 100644
--- a/hip/test/base/CMakeLists.txt
+++ b/hip/test/base/CMakeLists.txt
@@ -1,6 +1,5 @@
 ginkgo_create_hip_test(hip_executor)
 ginkgo_create_test(index_set)
-ginkgo_create_test(hip_executor_reset ADDITIONAL_LIBRARIES Threads::Threads)
 if(GINKGO_HAVE_HWLOC)
     find_package(NUMA REQUIRED)
     ginkgo_create_hip_test(hip_executor_topology ADDITIONAL_LIBRARIES NUMA::NUMA)
diff --git a/hip/test/base/hip_executor.hip.cpp b/hip/test/base/hip_executor.hip.cpp
index d27dd58d132..e531fa739e6 100644
--- a/hip/test/base/hip_executor.hip.cpp
+++ b/hip/test/base/hip_executor.hip.cpp
@@ -109,18 +109,18 @@ class HipExecutor : public ::testing::Test {
         ASSERT_GT(gko::HipExecutor::get_num_devices(), 0);
 #ifdef GKO_TEST_NONDEFAULT_STREAM
         hip = gko::HipExecutor::create(
-            0, omp, false, gko::default_hip_alloc_mode, stream.get());
-        hip2 = gko::HipExecutor::create(gko::HipExecutor::get_num_devices() - 1,
-                                        omp, false, gko::default_hip_alloc_mode,
-                                        other_stream.get());
+            0, omp, std::make_shared<gko::HipAllocator>(), stream.get());
+        hip2 = gko::HipExecutor::create(
+            gko::HipExecutor::get_num_devices() - 1, omp,
+            std::make_shared<gko::HipAllocator>(), other_stream.get());
         hip3 = gko::HipExecutor::create(
-            0, omp, false, gko::allocation_mode::unified_global, stream.get());
+            0, omp, std::make_shared<gko::HipAllocator>(), stream.get());
 #else
         hip = gko::HipExecutor::create(0, omp);
         hip2 = gko::HipExecutor::create(gko::HipExecutor::get_num_devices() - 1,
                                         omp);
-        hip3 = gko::HipExecutor::create(0, omp, false,
-                                        gko::allocation_mode::unified_global);
+        hip3 = gko::HipExecutor::create(0, omp,
+                                        std::make_shared<gko::HipAllocator>());
 #endif
     }
 
diff --git a/hip/test/utils.hip.hpp b/hip/test/utils.hip.hpp
index 9337da14139..bf7073cf9a1 100644
--- a/hip/test/utils.hip.hpp
+++ b/hip/test/utils.hip.hpp
@@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/stream.hpp>
 
 
 #include "hip/base/device.hpp"
@@ -60,8 +61,9 @@ class HipTestFixture : public ::testing::Test {
     HipTestFixture()
         : ref(gko::ReferenceExecutor::create()),
 #ifdef GKO_TEST_NONDEFAULT_STREAM
+          stream(0),
           exec(gko::HipExecutor::create(
-              0, ref, false, gko::default_hip_alloc_mode, stream.get()))
+              0, ref, std::make_shared<gko::HipAllocator>(), stream.get()))
 #else
           exec(gko::HipExecutor::create(0, ref))
 #endif
diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp
index 00d1049b555..1bbcb458813 100644
--- a/include/ginkgo/core/base/executor.hpp
+++ b/include/ginkgo/core/base/executor.hpp
@@ -47,7 +47,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/device.hpp>
+#include <ginkgo/core/base/fwd_defs.hpp>
 #include <ginkgo/core/base/machine_topology.hpp>
+#include <ginkgo/core/base/memory.hpp>
 #include <ginkgo/core/base/scoped_device_id_guard.hpp>
 #include <ginkgo/core/base/types.hpp>
 #include <ginkgo/core/log/logger.hpp>
@@ -121,31 +123,6 @@ constexpr allocation_mode default_hip_alloc_mode =
 }  // namespace gko
 
 
-// after intel/llvm September'22 release, which uses major version 6, they
-// introduce another inline namespace _V1.
-#if GINKGO_DPCPP_MAJOR_VERSION >= 6
-namespace sycl {
-inline namespace _V1 {
-
-
-class queue;
-
-
-}  // namespace _V1
-}  // namespace sycl
-#else  // GINKGO_DPCPP_MAJOR_VERSION < 6
-inline namespace cl {
-namespace sycl {
-
-
-class queue;
-
-
-}  // namespace sycl
-}  // namespace cl
-#endif
-
-
 /**
  * The enum class is for the dpcpp queue property. It's legal to use a binary
  * or(|) operation to combine several properties.
@@ -170,24 +147,6 @@ GKO_ATTRIBUTES GKO_INLINE dpcpp_queue_property operator|(dpcpp_queue_property a,
 }
 
 
-struct cublasContext;
-
-struct cusparseContext;
-
-struct CUstream_st;
-
-struct hipblasContext;
-
-struct hipsparseContext;
-
-#if GINKGO_HIP_PLATFORM_HCC
-struct ihipStream_t;
-#define GKO_HIP_STREAM_STRUCT ihipStream_t
-#else
-#define GKO_HIP_STREAM_STRUCT CUstream_st
-#endif
-
-
 namespace gko {
 
 
@@ -1348,26 +1307,14 @@ class EnableDeviceReset {
      *
      * @param device_reset  whether to allow a device reset or not
      */
-    void set_device_reset(bool device_reset) { device_reset_ = device_reset; }
+    void set_device_reset(bool device_reset) {}
 
     /**
      * Returns the current status of the device reset boolean for this executor.
      *
      * @return the current status of the device reset boolean for this executor.
      */
-    bool get_device_reset() { return device_reset_; }
-
-protected:
-    /**
-     * Instantiate an EnableDeviceReset class
-     *
-     * @param device_reset  the starting device_reset status. Defaults to false.
-     */
-    EnableDeviceReset(bool device_reset = false) : device_reset_{device_reset}
-    {}
-
-private:
-    bool device_reset_{};
+    bool get_device_reset() { return false; }
 };
 
 
@@ -1404,9 +1351,11 @@ class OmpExecutor : public detail::ExecutorBase<OmpExecutor>,
     /**
      * Creates a new OmpExecutor.
      */
-    static std::shared_ptr<OmpExecutor> create()
+    static std::shared_ptr<OmpExecutor> create(
+        std::shared_ptr<CpuAllocatorBase> alloc =
+            std::make_shared<CpuAllocator>())
     {
-        return std::shared_ptr<OmpExecutor>(new OmpExecutor());
+        return std::shared_ptr<OmpExecutor>(new OmpExecutor(std::move(alloc)));
     }
 
     std::shared_ptr<Executor> get_master() noexcept override;
@@ -1428,7 +1377,8 @@ class OmpExecutor : public detail::ExecutorBase<OmpExecutor>,
     scoped_device_id_guard get_scoped_device_id_guard() const override;
 
 protected:
-    OmpExecutor()
+    OmpExecutor(std::shared_ptr<CpuAllocatorBase> alloc)
+        : alloc_{std::move(alloc)}
     {
         this->OmpExecutor::populate_exec_info(machine_topology::get_instance());
     }
@@ -1450,6 +1400,8 @@ class OmpExecutor : public detail::ExecutorBase<OmpExecutor>,
     GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(CudaExecutor, false);
 
     bool verify_memory_to(const DpcppExecutor* dest_exec) const override;
+
+    std::shared_ptr<CpuAllocatorBase> alloc_;
 };
 
 
@@ -1469,9 +1421,12 @@ using DefaultExecutor = OmpExecutor;
  */
 class ReferenceExecutor : public OmpExecutor {
 public:
-    static std::shared_ptr<ReferenceExecutor> create()
+    static std::shared_ptr<ReferenceExecutor> create(
+        std::shared_ptr<CpuAllocatorBase> alloc =
+            std::make_shared<CpuAllocator>())
     {
-        return std::shared_ptr<ReferenceExecutor>(new ReferenceExecutor());
+        return std::shared_ptr<ReferenceExecutor>(
+            new ReferenceExecutor(std::move(alloc)));
     }
 
     scoped_device_id_guard get_scoped_device_id_guard() const override
@@ -1488,7 +1443,8 @@ class ReferenceExecutor : public OmpExecutor {
     }
 
 protected:
-    ReferenceExecutor()
+    ReferenceExecutor(std::shared_ptr<CpuAllocatorBase> alloc)
+        : OmpExecutor{std::move(alloc)}
     {
         this->ReferenceExecutor::populate_exec_info(
             machine_topology::get_instance());
@@ -1543,15 +1499,32 @@ class CudaExecutor : public detail::ExecutorBase<CudaExecutor>,
      * @param device_id  the CUDA device id of this device
      * @param master  an executor on the host that is used to invoke the device
      * kernels
-     * @param device_reset  whether to reset the device after the object exits
-     *                      the scope.
+     * @param device_reset  this option no longer has any effect.
      * @param alloc_mode  the allocation mode that the executor should operate
      *                    on. See @allocation_mode for more details
+     * @param stream  the stream to execute operations on.
+     */
+    [[deprecated(
+        "device_reset is deprecated entirely, call cudaDeviceReset directly. "
+        "alloc_mode was replaced by the Allocator type "
+        "hierarchy.")]] static std::shared_ptr<CudaExecutor>
+    create(int device_id, std::shared_ptr<Executor> master, bool device_reset,
+           allocation_mode alloc_mode = default_cuda_alloc_mode,
+           CUstream_st* stream = nullptr);
+
+    /**
+     * Creates a new CudaExecutor with a custom allocator and device stream.
+     *
+     * @param device_id  the CUDA device id of this device
+     * @param master  an executor on the host that is used to invoke the device
+     *                kernels.
+     * @param alloc  the allocator to use for device memory allocations.
+     * @param stream  the stream to execute operations on.
      */
     static std::shared_ptr<CudaExecutor> create(
         int device_id, std::shared_ptr<Executor> master,
-        bool device_reset = false,
-        allocation_mode alloc_mode = default_cuda_alloc_mode,
+        std::shared_ptr<CudaAllocatorBase> alloc =
+            std::make_shared<CudaAllocator>(),
         CUstream_st* stream = nullptr);
 
     std::shared_ptr<Executor> get_master() noexcept override;
@@ -1672,26 +1645,15 @@ class CudaExecutor : public detail::ExecutorBase<CudaExecutor>,
     void init_handles();
 
     CudaExecutor(int device_id, std::shared_ptr<Executor> master,
-                 bool device_reset = false,
-                 allocation_mode alloc_mode = default_cuda_alloc_mode,
-                 CUstream_st* stream = nullptr)
-        : EnableDeviceReset{device_reset},
-          master_(master),
-          alloc_mode_{alloc_mode},
-          stream_{stream}
+                 std::shared_ptr<CudaAllocatorBase> alloc, CUstream_st* stream)
+        : alloc_{std::move(alloc)}, master_(master), stream_{stream}
     {
         this->get_exec_info().device_id = device_id;
         this->get_exec_info().num_computing_units = 0;
         this->get_exec_info().num_pu_per_cu = 0;
         this->CudaExecutor::populate_exec_info(
             machine_topology::get_instance());
-
-        // it only gets attribute from device, so it should not be affected by
-        // DeviceReset.
         this->set_gpu_property();
-        // increase the number of executor before any operations may be affected
-        // by DeviceReset.
-        increase_num_execs(this->get_exec_info().device_id);
         this->init_handles();
     }
 
@@ -1711,12 +1673,6 @@ class CudaExecutor : public detail::ExecutorBase<CudaExecutor>,
 
     bool verify_memory_to(const CudaExecutor* dest_exec) const override;
 
-    static void increase_num_execs(unsigned device_id);
-
-    static void decrease_num_execs(unsigned device_id);
-
-    static unsigned get_num_execs(unsigned device_id);
-
     void populate_exec_info(const machine_topology* mach_topo) override;
 
 private:
@@ -1726,45 +1682,8 @@ class CudaExecutor : public detail::ExecutorBase<CudaExecutor>,
     using handle_manager = std::unique_ptr<T, std::function<void(T*)>>;
     handle_manager<cublasContext> cublas_handle_;
     handle_manager<cusparseContext> cusparse_handle_;
+    std::shared_ptr<CudaAllocatorBase> alloc_;
     CUstream_st* stream_;
-
-    allocation_mode alloc_mode_;
-};
-
-
-/**
- * An RAII wrapper for a custom CUDA stream.
- * The stream will be created on construction and destroyed when the lifetime of
- * the wrapper ends.
- */
-class cuda_stream {
-public:
-    /** Creates a new custom CUDA stream. */
-    cuda_stream(int device_id = 0);
-
-    /** Destroys the custom CUDA stream, if it wasn't moved-from already. */
-    ~cuda_stream();
-
-    cuda_stream(const cuda_stream&) = delete;
-
-    /** Move-constructs from an existing stream, which will be emptied. */
-    cuda_stream(cuda_stream&&);
-
-    cuda_stream& operator=(const cuda_stream&) = delete;
-
-    /** Move-assigns from an existing stream, which will be emptied. */
-    cuda_stream& operator=(cuda_stream&&) = delete;
-
-    /**
-     * Returns the native CUDA stream handle.
-     * In a moved-from cuda_stream, this will return nullptr.
-     */
-    CUstream_st* get() const;
-
-private:
-    CUstream_st* stream_;
-
-    int device_id_;
 };
 
 
@@ -1798,10 +1717,15 @@ class HipExecutor : public detail::ExecutorBase<HipExecutor>,
      * @param alloc_mode  the allocation mode that the executor should operate
      *                    on. See @allocation_mode for more details
      */
+    [[deprecated("")]] static std::shared_ptr<HipExecutor> create(
+        int device_id, std::shared_ptr<Executor> master, bool device_reset,
+        allocation_mode alloc_mode = default_hip_alloc_mode,
+        GKO_HIP_STREAM_STRUCT* stream = nullptr);
+
     static std::shared_ptr<HipExecutor> create(
         int device_id, std::shared_ptr<Executor> master,
-        bool device_reset = false,
-        allocation_mode alloc_mode = default_hip_alloc_mode,
+        std::shared_ptr<HipAllocatorBase> alloc =
+            std::make_shared<HipAllocator>(),
         GKO_HIP_STREAM_STRUCT* stream = nullptr);
 
     std::shared_ptr<Executor> get_master() noexcept override;
@@ -1916,25 +1840,15 @@ class HipExecutor : public detail::ExecutorBase<HipExecutor>,
     void init_handles();
 
     HipExecutor(int device_id, std::shared_ptr<Executor> master,
-                bool device_reset = false,
-                allocation_mode alloc_mode = default_hip_alloc_mode,
-                GKO_HIP_STREAM_STRUCT* stream = nullptr)
-        : EnableDeviceReset{device_reset},
-          master_(master),
-          alloc_mode_(alloc_mode),
-          stream_{stream}
+                std::shared_ptr<HipAllocatorBase> alloc,
+                GKO_HIP_STREAM_STRUCT* stream)
+        : master_{std::move(master)}, alloc_{std::move(alloc)}, stream_{stream}
     {
         this->get_exec_info().device_id = device_id;
         this->get_exec_info().num_computing_units = 0;
         this->get_exec_info().num_pu_per_cu = 0;
         this->HipExecutor::populate_exec_info(machine_topology::get_instance());
-
-        // it only gets attribute from device, so it should not be affected by
-        // DeviceReset.
         this->set_gpu_property();
-        // increase the number of executor before any operations may be affected
-        // by DeviceReset.
-        increase_num_execs(this->get_exec_info().device_id);
         this->init_handles();
     }
 
@@ -1954,12 +1868,6 @@ class HipExecutor : public detail::ExecutorBase<HipExecutor>,
 
     bool verify_memory_to(const HipExecutor* dest_exec) const override;
 
-    static void increase_num_execs(int device_id);
-
-    static void decrease_num_execs(int device_id);
-
-    static int get_num_execs(int device_id);
-
     void populate_exec_info(const machine_topology* mach_topo) override;
 
 private:
@@ -1969,45 +1877,8 @@ class HipExecutor : public detail::ExecutorBase<HipExecutor>,
     using handle_manager = std::unique_ptr<T, std::function<void(T*)>>;
     handle_manager<hipblasContext> hipblas_handle_;
     handle_manager<hipsparseContext> hipsparse_handle_;
-
-    allocation_mode alloc_mode_;
-    GKO_HIP_STREAM_STRUCT* stream_;
-};
-
-
-/**
- * An RAII wrapper for a custom HIP stream.
- * The stream will be created on construction and destroyed when the lifetime of
- * the wrapper ends.
- */
-class hip_stream {
-public:
-    /** Creates a new custom HIP stream. */
-    hip_stream(int device_id = 0);
-
-    /** Destroys the custom HIP stream, if it wasn't moved-from already. */
-    ~hip_stream();
-
-    hip_stream(const hip_stream&) = delete;
-
-    /** Move-constructs from an existing stream, which will be emptied. */
-    hip_stream(hip_stream&&);
-
-    hip_stream& operator=(const hip_stream&) = delete;
-
-    /** Move-assigns from an existing stream, which will be emptied. */
-    hip_stream& operator=(hip_stream&&) = delete;
-
-    /**
-     * Returns the native HIP stream handle.
-     * In a moved-from hip_stream, this will return nullptr.
-     */
-    GKO_HIP_STREAM_STRUCT* get() const;
-
-private:
+    std::shared_ptr<HipAllocatorBase> alloc_;
     GKO_HIP_STREAM_STRUCT* stream_;
-
-    int device_id_;
 };
 
 
@@ -2043,6 +1914,28 @@ class DpcppExecutor : public detail::ExecutorBase<DpcppExecutor>,
         std::string device_type = "all",
         dpcpp_queue_property property = dpcpp_queue_property::in_order);
 
+    /**
+     * Creates a new DpcppExecutor from an existing SYCL queue.
+     *
+     * @param queue  the DPCPP device id of this device
+     * @param master  an executor on the host that is used to invoke the device
+     *                kernels
+     */
+    static std::shared_ptr<DpcppExecutor> create(
+        sycl::queue* queue, std::shared_ptr<Executor> master);
+
+    /**
+     * Creates a new DpcppExecutor from an existing SYCL queue.
+     *
+     * @param queue  the DPCPP device id of this device
+     * @param master  an executor on the host that is used to invoke the device
+     *                kernels
+     * @param alloc  the allocator used for memory allocation
+     */
+    static std::shared_ptr<DpcppExecutor> create(
+        sycl::queue* queue, std::shared_ptr<Executor> master,
+        std::shared_ptr<DpcppAllocatorBase> alloc);
+
     std::shared_ptr<Executor> get_master() noexcept override;
 
     std::shared_ptr<const Executor> get_master() const noexcept override;
diff --git a/include/ginkgo/core/base/fwd_defs.hpp b/include/ginkgo/core/base/fwd_defs.hpp
new file mode 100644
index 00000000000..03c3a2d3b09
--- /dev/null
+++ b/include/ginkgo/core/base/fwd_defs.hpp
@@ -0,0 +1,82 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_BASE_FWD_DEFS_HPP_
+#define GKO_PUBLIC_CORE_BASE_FWD_DEFS_HPP_
+
+#include <ginkgo/config.hpp>
+
+
+struct cublasContext;
+
+struct cusparseContext;
+
+struct CUstream_st;
+
+struct hipblasContext;
+
+struct hipsparseContext;
+
+#if GINKGO_HIP_PLATFORM_HCC
+struct ihipStream_t;
+#define GKO_HIP_STREAM_STRUCT ihipStream_t
+#else
+#define GKO_HIP_STREAM_STRUCT CUstream_st
+#endif
+
+
+// after intel/llvm September'22 release, which uses major version 6, they
+// introduce another inline namespace _V1.
+#if GINKGO_DPCPP_MAJOR_VERSION >= 6
+namespace sycl {
+inline namespace _V1 {
+
+
+class queue;
+
+
+}  // namespace _V1
+}  // namespace sycl
+#else  // GINKGO_DPCPP_MAJOR_VERSION < 6
+inline namespace cl {
+namespace sycl {
+
+
+class queue;
+
+
+}  // namespace sycl
+}  // namespace cl
+#endif
+
+
+#endif  // GKO_PUBLIC_CORE_BASE_FWD_DEFS_HPP_
diff --git a/include/ginkgo/core/base/memory.hpp b/include/ginkgo/core/base/memory.hpp
new file mode 100644
index 00000000000..ec25920dcea
--- /dev/null
+++ b/include/ginkgo/core/base/memory.hpp
@@ -0,0 +1,211 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_BASE_MEMORY_HPP_
+#define GKO_PUBLIC_CORE_BASE_MEMORY_HPP_
+
+
+#include <ginkgo/core/base/fwd_defs.hpp>
+#include <ginkgo/core/base/types.hpp>
+
+
+namespace gko {
+
+
+/**
+ * Provides generic allocation and deallocation functionality to be used by an
+ * Executor.
+ */
+class Allocator {
+public:
+    virtual ~Allocator() = default;
+
+    virtual void* allocate(size_type num_bytes) const = 0;
+
+    virtual void deallocate(void* ptr) const = 0;
+};
+
+
+/**
+ * Implement this interface to provide an allocator for OmpExecutor or
+ * ReferenceExecutor.
+ */
+class CpuAllocatorBase : public Allocator {};
+
+
+/**
+ * Implement this interface to provide an allocator for CudaExecutor.
+ */
+class CudaAllocatorBase : public Allocator {};
+
+
+/**
+ * Implement this interface to provide an allocator for HipExecutor.
+ */
+class HipAllocatorBase : public Allocator {};
+
+
+/**
+ * Implement this interface to provide an allocator for DpcppExecutor.
+ */
+class DpcppAllocatorBase : public Allocator {
+public:
+    DpcppAllocatorBase(sycl::queue* queue);
+
+protected:
+    virtual void* allocate_impl(sycl::queue* queue,
+                                size_type num_bytes) const = 0;
+
+    virtual void deallocate_impl(sycl::queue* queue, void* ptr) const = 0;
+
+private:
+    sycl::queue* queue_;
+};
+
+
+/**
+ * Allocator using new/delete.
+ */
+class CpuAllocator : public CpuAllocatorBase {
+public:
+    void* allocate(size_type num_bytes) const override;
+
+    void deallocate(void* ptr) const override;
+};
+
+
+/**
+ * Allocator using cudaMalloc.
+ */
+class CudaAllocator : public CudaAllocatorBase {
+public:
+    void* allocate(size_type num_bytes) const override;
+
+    void deallocate(void* ptr) const override;
+};
+
+
+/*
+ * Allocator using cudaMallocAsync.
+ */
+class CudaAsyncAllocator : public CudaAllocatorBase {
+public:
+    void* allocate(size_type num_bytes) const override;
+
+    void deallocate(void* ptr) const override;
+
+    CudaAsyncAllocator(CUstream_st* stream);
+
+private:
+    CUstream_st* stream_;
+};
+
+
+/*
+ * Allocator using cudaMallocManaged
+ */
+class CudaUnifiedAllocator : public CudaAllocatorBase, public CpuAllocatorBase {
+public:
+    void* allocate(size_type num_bytes) const override;
+
+    void deallocate(void* ptr) const override;
+
+    CudaUnifiedAllocator(int device_id);
+
+    CudaUnifiedAllocator(int device_id, unsigned int flags);
+
+private:
+    int device_id_;
+    unsigned int flags_;
+};
+
+
+/*
+ * Allocator using cudaMallocHost.
+ */
+class CudaHostAllocator : public CudaAllocatorBase, public CpuAllocatorBase {
+public:
+    void* allocate(size_type num_bytes) const override;
+
+    void deallocate(void* ptr) const override;
+
+    CudaHostAllocator(int device_id);
+
+private:
+    int device_id_;
+};
+
+
+/*
+ * Allocator using hipMalloc.
+ */
+class HipAllocator : public HipAllocatorBase {
+public:
+    void* allocate(size_type num_bytes) const override;
+
+    void deallocate(void* ptr) const override;
+};
+
+
+/*
+ * Allocator using sycl::malloc_device.
+ */
+class DpcppAllocator : public DpcppAllocatorBase {
+public:
+    using DpcppAllocatorBase::DpcppAllocatorBase;
+
+protected:
+    void* allocate_impl(sycl::queue* queue, size_type num_bytes) const override;
+
+    void deallocate_impl(sycl::queue* queue, void* ptr) const override;
+};
+
+
+/*
+ * Allocator using sycl::malloc_shared.
+ */
+class DpcppUnifiedAllocator : public DpcppAllocatorBase,
+                              public CpuAllocatorBase {
+public:
+    using DpcppAllocatorBase::DpcppAllocatorBase;
+
+protected:
+    void* allocate_impl(sycl::queue* queue, size_type num_bytes) const override;
+
+    void deallocate_impl(sycl::queue* queue, void* ptr) const override;
+};
+
+
+}  // namespace gko
+
+
+#endif  // GKO_PUBLIC_CORE_BASE_MEMORY_HPP_
diff --git a/include/ginkgo/core/base/stream.hpp b/include/ginkgo/core/base/stream.hpp
new file mode 100644
index 00000000000..4bb4aeecf9e
--- /dev/null
+++ b/include/ginkgo/core/base/stream.hpp
@@ -0,0 +1,124 @@
+/*******************************<GINKGO LICENSE>******************************
+Copyright (c) 2017-2023, the Ginkgo authors
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions
+are met:
+
+1. Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+3. Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+******************************<GINKGO LICENSE>*******************************/
+
+#ifndef GKO_PUBLIC_CORE_BASE_STREAM_HPP_
+#define GKO_PUBLIC_CORE_BASE_STREAM_HPP_
+
+
+#include <ginkgo/core/base/executor.hpp>
+
+
+namespace gko {
+
+
+/**
+ * An RAII wrapper for a custom CUDA stream.
+ * The stream will be created on construction and destroyed when the lifetime of
+ * the wrapper ends.
+ */
+class cuda_stream {
+public:
+    /** Creates an empty stream wrapper, representing the default stream. */
+    cuda_stream();
+
+    /** Creates a new custom CUDA stream. */
+    cuda_stream(int device_id);
+
+    /** Destroys the custom CUDA stream, if it isn't empty. */
+    ~cuda_stream();
+
+    cuda_stream(const cuda_stream&) = delete;
+
+    /** Move-constructs from an existing stream, which will be emptied. */
+    cuda_stream(cuda_stream&&);
+
+    cuda_stream& operator=(const cuda_stream&) = delete;
+
+    /** Move-assigns from an existing stream, which will be emptied. */
+    cuda_stream& operator=(cuda_stream&&) = delete;
+
+    /**
+     * Returns the native CUDA stream handle.
+     * In an empty cuda_stream, this will return nullptr.
+     */
+    CUstream_st* get() const;
+
+private:
+    CUstream_st* stream_;
+
+    int device_id_;
+};
+
+
+/**
+ * An RAII wrapper for a custom HIP stream.
+ * The stream will be created on construction and destroyed when the lifetime of
+ * the wrapper ends.
+ */
+class hip_stream {
+public:
+    /** Creates an empty stream wrapper, representing the default stream. */
+    hip_stream();
+
+    /** Creates a new custom HIP stream. */
+    hip_stream(int device_id);
+
+    /** Destroys the custom HIP stream, if it isn't empty. */
+    ~hip_stream();
+
+    hip_stream(const hip_stream&) = delete;
+
+    /** Move-constructs from an existing stream, which will be emptied. */
+    hip_stream(hip_stream&&);
+
+    hip_stream& operator=(const hip_stream&) = delete;
+
+    /** Move-assigns from an existing stream, which will be emptied. */
+    hip_stream& operator=(hip_stream&&) = delete;
+
+    /**
+     * Returns the native HIP stream handle.
+     * In an empty hip_stream, this will return nullptr.
+     */
+    GKO_HIP_STREAM_STRUCT* get() const;
+
+private:
+    GKO_HIP_STREAM_STRUCT* stream_;
+
+    int device_id_;
+};
+
+
+}  // namespace gko
+
+
+#endif  // GKO_PUBLIC_CORE_BASE_EXECUTOR_HPP_
diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp
index 3e788ed65ee..4c460f2e1e9 100644
--- a/include/ginkgo/ginkgo.hpp
+++ b/include/ginkgo/ginkgo.hpp
@@ -48,6 +48,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/exception.hpp>
 #include <ginkgo/core/base/exception_helpers.hpp>
 #include <ginkgo/core/base/executor.hpp>
+#include <ginkgo/core/base/fwd_defs.hpp>
 #include <ginkgo/core/base/index_set.hpp>
 #include <ginkgo/core/base/intrinsics.hpp>
 #include <ginkgo/core/base/lin_op.hpp>
@@ -55,6 +56,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/base/matrix_assembly_data.hpp>
 #include <ginkgo/core/base/matrix_data.hpp>
+#include <ginkgo/core/base/memory.hpp>
 #include <ginkgo/core/base/mpi.hpp>
 #include <ginkgo/core/base/mtx_io.hpp>
 #include <ginkgo/core/base/name_demangling.hpp>
@@ -65,6 +67,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <ginkgo/core/base/range_accessors.hpp>
 #include <ginkgo/core/base/scoped_device_id_guard.hpp>
 #include <ginkgo/core/base/std_extensions.hpp>
+#include <ginkgo/core/base/stream.hpp>
 #include <ginkgo/core/base/temporary_clone.hpp>
 #include <ginkgo/core/base/temporary_conversion.hpp>
 #include <ginkgo/core/base/types.hpp>
diff --git a/test/utils/executor.hpp b/test/utils/executor.hpp
index 25482cf18c8..33e6258fbbd 100644
--- a/test/utils/executor.hpp
+++ b/test/utils/executor.hpp
@@ -44,6 +44,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <gtest/gtest.h>
 
 
+#include <ginkgo/core/base/stream.hpp>
+
+
 #ifdef GKO_COMPILING_CUDA
 
 #include "cuda/base/device.hpp"
@@ -106,8 +109,8 @@ inline void init_executor(std::shared_ptr<gko::ReferenceExecutor> ref,
         if (gko::CudaExecutor::get_num_devices() == 0) {
             throw std::runtime_error{"No suitable CUDA devices"};
         }
-        exec = gko::CudaExecutor::create(0, ref, false,
-                                         gko::default_cuda_alloc_mode, stream);
+        exec = gko::CudaExecutor::create(
+            0, ref, std::make_shared<gko::CudaAsyncAllocator>(stream), stream);
     }
 }
 
@@ -119,8 +122,8 @@ inline void init_executor(std::shared_ptr<gko::ReferenceExecutor> ref,
     if (gko::HipExecutor::get_num_devices() == 0) {
         throw std::runtime_error{"No suitable HIP devices"};
     }
-    exec = gko::HipExecutor::create(0, ref, false, gko::default_hip_alloc_mode,
-                                    stream);
+    exec = gko::HipExecutor::create(
+        0, ref, std::make_shared<gko::HipAllocator>(), stream);
 }
 
 
@@ -146,7 +149,13 @@ class CommonTestFixture : public ::testing::Test {
 #endif
     using index_type = int;
 
-    CommonTestFixture() : ref{gko::ReferenceExecutor::create()}
+    CommonTestFixture()
+        :
+#if defined(GKO_TEST_NONDEFAULT_STREAM) && \
+    (defined(GKO_COMPILING_CUDA) || defined(GKO_COMPILING_HIP))
+          stream{0},
+#endif
+          ref{gko::ReferenceExecutor::create()}
     {
 #if defined(GKO_TEST_NONDEFAULT_STREAM) && \
     (defined(GKO_COMPILING_CUDA) || defined(GKO_COMPILING_HIP))
diff --git a/test/utils/mpi/executor.hpp b/test/utils/mpi/executor.hpp
index 59c3f1e3f3b..d8c94e01804 100644
--- a/test/utils/mpi/executor.hpp
+++ b/test/utils/mpi/executor.hpp
@@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
 
 #include <ginkgo/core/base/mpi.hpp>
+#include <ginkgo/core/base/stream.hpp>
 
 
 inline void init_executor(std::shared_ptr<gko::ReferenceExecutor>,
@@ -71,7 +72,7 @@ inline void init_executor(std::shared_ptr<gko::ReferenceExecutor> ref,
         exec = gko::CudaExecutor::create(
             gko::experimental::mpi::map_rank_to_device_id(
                 MPI_COMM_WORLD, gko::CudaExecutor::get_num_devices()),
-            ref, false, gko::default_cuda_alloc_mode, stream);
+            ref, std::make_shared<gko::CudaAllocator>(), stream);
     }
 }
 
@@ -86,7 +87,7 @@ inline void init_executor(std::shared_ptr<gko::ReferenceExecutor> ref,
     exec = gko::HipExecutor::create(
         gko::experimental::mpi::map_rank_to_device_id(
             MPI_COMM_WORLD, gko::HipExecutor::get_num_devices()),
-        ref, false, gko::default_hip_alloc_mode, stream);
+        ref, std::make_shared<gko::HipAllocator>(), stream);
 }