From ea94c89e0256b2efdc77b0e13a0819d984c0e695 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Thu, 30 Mar 2023 12:20:20 +0200 Subject: [PATCH] add allocator support to all executors --- benchmark/utils/general.hpp | 4 +- core/CMakeLists.txt | 1 + core/base/memory.cpp | 59 ++++ core/device_hooks/cuda_hooks.cpp | 44 +++ core/device_hooks/dpcpp_hooks.cpp | 18 ++ core/device_hooks/hip_hooks.cpp | 19 ++ core/test/base/executor.cpp | 79 +----- cuda/CMakeLists.txt | 4 + cuda/base/device.cpp | 59 ++++ cuda/base/executor.cpp | 167 ++--------- cuda/base/memory.cpp | 168 ++++++++++++ cuda/base/nvtx.cpp | 96 +++++++ .../stream.cpp} | 53 ++-- cuda/test/base/CMakeLists.txt | 2 +- cuda/test/base/cuda_executor.cu | 15 +- cuda/test/base/memory.cpp | 126 +++++++++ cuda/test/utils.hpp | 4 +- devices/cuda/executor.cpp | 27 -- devices/hip/executor.cpp | 34 --- devices/omp/executor.cpp | 7 +- dpcpp/base/executor.dp.cpp | 33 +++ .../base/memory.dp.cpp | 47 ++-- dpcpp/test/base/CMakeLists.txt | 1 + dpcpp/test/base/memory.dp.cpp | 98 +++++++ .../adaptiveprecision-blockjacobi.cpp | 7 +- examples/cb-gmres/cb-gmres.cpp | 7 +- examples/custom-logger/custom-logger.cpp | 7 +- .../custom-matrix-format.cpp | 7 +- .../custom-stopping-criterion.cpp | 7 +- .../ilu-preconditioned-solver.cpp | 7 +- .../inverse-iteration/inverse-iteration.cpp | 7 +- .../ir-ilu-preconditioned-solver.cpp | 7 +- .../iterative-refinement.cpp | 7 +- .../minimal-cuda-solver.cpp | 2 +- .../mixed-multigrid-preconditioned-solver.cpp | 7 +- .../mixed-multigrid-solver.cpp | 7 +- .../mixed-precision-ir/mixed-precision-ir.cpp | 7 +- examples/mixed-spmv/mixed-spmv.cpp | 7 +- ...igrid-preconditioned-solver-customized.cpp | 7 +- .../multigrid-preconditioned-solver.cpp | 7 +- .../nine-pt-stencil-solver.cpp | 7 +- examples/papi-logging/papi-logging.cpp | 7 +- .../performance-debugging.cpp | 7 +- examples/poisson-solver/poisson-solver.cpp | 7 +- .../preconditioned-solver.cpp | 7 +- .../simple-solver-logging.cpp | 7 +- examples/simple-solver/simple-solver.cpp | 7 +- .../three-pt-stencil-solver.cpp | 7 +- hip/CMakeLists.txt | 4 + hip/base/device.hip.cpp | 61 +++++ hip/base/executor.hip.cpp | 131 +-------- hip/base/memory.hip.cpp | 97 +++++++ hip/base/roctx.hip.cpp | 70 +++++ hip/base/stream.hip.cpp | 78 ++++++ hip/test/base/CMakeLists.txt | 1 - hip/test/base/hip_executor.hip.cpp | 14 +- hip/test/utils.hip.hpp | 4 +- include/ginkgo/core/base/executor.hpp | 259 +++++------------- include/ginkgo/core/base/fwd_defs.hpp | 82 ++++++ include/ginkgo/core/base/memory.hpp | 211 ++++++++++++++ include/ginkgo/core/base/stream.hpp | 124 +++++++++ include/ginkgo/ginkgo.hpp | 3 + test/utils/executor.hpp | 19 +- test/utils/mpi/executor.hpp | 5 +- 64 files changed, 1735 insertions(+), 756 deletions(-) create mode 100644 core/base/memory.cpp create mode 100644 cuda/base/device.cpp create mode 100644 cuda/base/memory.cpp create mode 100644 cuda/base/nvtx.cpp rename cuda/{test/base/cuda_executor_reset.cpp => base/stream.cpp} (62%) create mode 100644 cuda/test/base/memory.cpp rename hip/test/base/hip_executor_reset.cpp => dpcpp/base/memory.dp.cpp (63%) create mode 100644 dpcpp/test/base/memory.dp.cpp create mode 100644 hip/base/device.hip.cpp create mode 100644 hip/base/memory.hip.cpp create mode 100644 hip/base/roctx.hip.cpp create mode 100644 hip/base/stream.hip.cpp create mode 100644 include/ginkgo/core/base/fwd_defs.hpp create mode 100644 include/ginkgo/core/base/memory.hpp create mode 100644 include/ginkgo/core/base/stream.hpp diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp index dcdb5552ed1..966a9438ee5 100644 --- a/benchmark/utils/general.hpp +++ b/benchmark/utils/general.hpp @@ -230,12 +230,12 @@ const std::map(bool)>> {"cuda", [](bool) { return gko::CudaExecutor::create(FLAGS_device_id, - gko::OmpExecutor::create(), true); + gko::OmpExecutor::create()); }}, {"hip", [](bool) { return gko::HipExecutor::create(FLAGS_device_id, - gko::OmpExecutor::create(), true); + gko::OmpExecutor::create()); }}, {"dpcpp", [](bool use_gpu_timer) { auto property = dpcpp_queue_property::in_order; diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 7c821c471c2..1898ca69695 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -10,6 +10,7 @@ target_sources(ginkgo base/device_matrix_data.cpp base/executor.cpp base/index_set.cpp + base/memory.cpp base/mpi.cpp base/mtx_io.cpp base/perturbation.cpp diff --git a/core/base/memory.cpp b/core/base/memory.cpp new file mode 100644 index 00000000000..88d97bcc765 --- /dev/null +++ b/core/base/memory.cpp @@ -0,0 +1,59 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +namespace gko { + + +void* CpuAllocator::allocate(size_type num_bytes) const +{ + auto ptr = ::operator new (num_bytes, std::nothrow_t{}); + GKO_ENSURE_ALLOCATED(ptr, "cpu", num_bytes); + return ptr; +} + + +void CpuAllocator::deallocate(void* ptr) const +{ + ::operator delete (ptr, std::nothrow_t{}); +} + + +} // namespace gko \ No newline at end of file diff --git a/core/device_hooks/cuda_hooks.cpp b/core/device_hooks/cuda_hooks.cpp index 2d5121274db..70f7e668242 100644 --- a/core/device_hooks/cuda_hooks.cpp +++ b/core/device_hooks/cuda_hooks.cpp @@ -35,6 +35,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include +#include #include #include #include @@ -51,6 +53,45 @@ version version_info::get_cuda_version() noexcept } +void* CudaAllocator::allocate(size_type num_bytes) const GKO_NOT_COMPILED(cuda); + + +void CudaAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda); + + +CudaAsyncAllocator::CudaAsyncAllocator(CUstream_st* stream) + GKO_NOT_COMPILED(cuda); + + +void* CudaAsyncAllocator::allocate(size_type num_bytes) const + GKO_NOT_COMPILED(cuda); + + +void CudaAsyncAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda); + + +CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id, unsigned int flags) + GKO_NOT_COMPILED(cuda); + + +void* CudaUnifiedAllocator::allocate(size_type num_bytes) const + GKO_NOT_COMPILED(cuda); + + +void CudaUnifiedAllocator::deallocate(void* dev_ptr) const + GKO_NOT_COMPILED(cuda); + + +CudaHostAllocator::CudaHostAllocator(int device_id) GKO_NOT_COMPILED(cuda); + + +void* CudaHostAllocator::allocate(size_type num_bytes) const + GKO_NOT_COMPILED(cuda); + + +void CudaHostAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda); + + std::shared_ptr CudaExecutor::create( int device_id, std::shared_ptr master, bool device_reset, allocation_mode alloc_mode, CUstream_st* stream) @@ -153,6 +194,9 @@ scoped_device_id_guard::scoped_device_id_guard(const CudaExecutor* exec, GKO_NOT_COMPILED(cuda); +cuda_stream::cuda_stream() GKO_NOT_COMPILED(cuda); + + cuda_stream::cuda_stream(int device_id) GKO_NOT_COMPILED(cuda); diff --git a/core/device_hooks/dpcpp_hooks.cpp b/core/device_hooks/dpcpp_hooks.cpp index cb99dc12c3c..62ee77f9cdc 100644 --- a/core/device_hooks/dpcpp_hooks.cpp +++ b/core/device_hooks/dpcpp_hooks.cpp @@ -36,6 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include #include #include @@ -51,6 +52,23 @@ version version_info::get_dpcpp_version() noexcept } +void* DpcppAllocator::allocate_impl(sycl::queue* queue, size_type size) const + GKO_NOT_COMPILED(dpcpp); + + +void DpcppAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const + GKO_NOT_COMPILED(dpcpp); + + +void* DpcppUnifiedAllocator::allocate_impl(sycl::queue* queue, + size_type size) const + GKO_NOT_COMPILED(dpcpp); + + +void DpcppUnifiedAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const + GKO_NOT_COMPILED(dpcpp); + + std::shared_ptr DpcppExecutor::create( int device_id, std::shared_ptr master, std::string device_type, dpcpp_queue_property property) diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp index 95772cc0353..7f8ac45b4b9 100644 --- a/core/device_hooks/hip_hooks.cpp +++ b/core/device_hooks/hip_hooks.cpp @@ -52,6 +52,22 @@ version version_info::get_hip_version() noexcept } +void* HipAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip); + + +void HipAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip); + + +HipAsyncAllocator::HipAsyncAllocator(GKO_HIP_STREAM_STRUCT* stream) + GKO_NOT_COMPILED(hip); + + +void* HipAsyncAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip); + + +void HipAsyncAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip); + + std::shared_ptr HipExecutor::create( int device_id, std::shared_ptr master, bool device_reset, allocation_mode alloc_mode, GKO_HIP_STREAM_STRUCT* stream) @@ -154,6 +170,9 @@ scoped_device_id_guard::scoped_device_id_guard(const HipExecutor* exec, GKO_NOT_COMPILED(hip); +hip_stream::hip_stream() GKO_NOT_COMPILED(hip); + + hip_stream::hip_stream(int device_id) GKO_NOT_COMPILED(hip); diff --git a/core/test/base/executor.cpp b/core/test/base/executor.cpp index 0d64dfcf3cf..71064cf01d2 100644 --- a/core/test/base/executor.cpp +++ b/core/test/base/executor.cpp @@ -35,6 +35,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include "ginkgo/core/base/memory.hpp" #if defined(__unix__) || defined(__APPLE__) @@ -263,35 +264,6 @@ TEST(CudaExecutor, KnowsItsDeviceId) } -TEST(CudaExecutor, CanGetDeviceResetBoolean) -{ - auto omp = gko::OmpExecutor::create(); - auto cuda = gko::CudaExecutor::create(0, omp); - - ASSERT_EQ(false, cuda->get_device_reset()); -} - - -TEST(CudaExecutor, CanSetDefaultDeviceResetBoolean) -{ - auto omp = gko::OmpExecutor::create(); - auto cuda = gko::CudaExecutor::create(0, omp, true); - - ASSERT_EQ(true, cuda->get_device_reset()); -} - - -TEST(CudaExecutor, CanSetDeviceResetBoolean) -{ - auto omp = gko::OmpExecutor::create(); - auto cuda = gko::CudaExecutor::create(0, omp); - - cuda->set_device_reset(true); - - ASSERT_EQ(true, cuda->get_device_reset()); -} - - TEST(HipExecutor, KnowsItsMaster) { auto omp = gko::OmpExecutor::create(); @@ -310,35 +282,6 @@ TEST(HipExecutor, KnowsItsDeviceId) } -TEST(HipExecutor, CanGetDeviceResetBoolean) -{ - auto omp = gko::OmpExecutor::create(); - auto hip = gko::HipExecutor::create(0, omp); - - ASSERT_EQ(false, hip->get_device_reset()); -} - - -TEST(HipExecutor, CanSetDefaultDeviceResetBoolean) -{ - auto omp = gko::OmpExecutor::create(); - auto hip = gko::HipExecutor::create(0, omp, true); - - ASSERT_EQ(true, hip->get_device_reset()); -} - - -TEST(HipExecutor, CanSetDeviceResetBoolean) -{ - auto omp = gko::OmpExecutor::create(); - auto hip = gko::HipExecutor::create(0, omp); - - hip->set_device_reset(true); - - ASSERT_EQ(true, hip->get_device_reset()); -} - - TEST(DpcppExecutor, KnowsItsMaster) { auto omp = gko::OmpExecutor::create(); @@ -442,20 +385,11 @@ TEST(Executor, CanVerifyMemory) } -template -struct mock_free : T { - /** - * @internal Due to a bug with gcc 5.3, the constructor needs to be called - * with `()` operator instead of `{}`. - */ - template - mock_free(Params&&... params) : T(std::forward(params)...) - {} - - void raw_free(void* ptr) const noexcept override +struct MockAllocator : gko::CpuAllocator { + void deallocate(void* ptr) const noexcept override { called_free = true; - T::raw_free(ptr); + CpuAllocator::deallocate(ptr); } mutable bool called_free{false}; @@ -464,12 +398,13 @@ struct mock_free : T { TEST(ExecutorDeleter, DeletesObject) { - auto ref = std::make_shared>(); + auto alloc = std::make_shared(); + auto ref = gko::ReferenceExecutor::create(alloc); auto x = ref->alloc(5); gko::executor_deleter{ref}(x); - ASSERT_TRUE(ref->called_free); + ASSERT_TRUE(alloc->called_free); } diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt index 2f9f37bfdfc..629515e8810 100644 --- a/cuda/CMakeLists.txt +++ b/cuda/CMakeLists.txt @@ -1,11 +1,15 @@ add_library(ginkgo_cuda $ "") target_sources(ginkgo_cuda PRIVATE + base/device.cpp base/device_matrix_data_kernels.cu base/exception.cpp base/executor.cpp base/index_set_kernels.cpp + base/memory.cpp + base/nvtx.cpp base/scoped_device_id.cpp + base/stream.cpp base/version.cpp components/prefix_sum_kernels.cu distributed/matrix_kernels.cu diff --git a/cuda/base/device.cpp b/cuda/base/device.cpp new file mode 100644 index 00000000000..5aa9e7f1a17 --- /dev/null +++ b/cuda/base/device.cpp @@ -0,0 +1,59 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include "cuda/base/device.hpp" + + +#include + + +#include + + +#include "cuda/base/scoped_device_id.hpp" + + +namespace gko { +namespace kernels { +namespace cuda { + + +void reset_device(int device_id) +{ + gko::detail::cuda_scoped_device_id_guard guard{device_id}; + cudaDeviceReset(); +} + + +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp index 2b0341fc4a6..f6e838dd2dd 100644 --- a/cuda/base/executor.cpp +++ b/cuda/base/executor.cpp @@ -39,18 +39,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#ifdef GKO_LEGACY_NVTX -#include -#else -#include -#endif #include #include #include #include -#include +#include #include "cuda/base/config.hpp" @@ -65,25 +60,38 @@ namespace gko { #include "common/cuda_hip/base/executor.hpp.inc" +std::unique_ptr allocator_from_mode(int device_id, + allocation_mode mode) +{ + switch (mode) { + case allocation_mode::device: + return std::make_unique(); + case allocation_mode::unified_global: + return std::make_unique(device_id); + case allocation_mode::unified_host: + return std::make_unique(device_id); + default: + GKO_NOT_SUPPORTED(mode); + } +} + + std::shared_ptr CudaExecutor::create( int device_id, std::shared_ptr master, bool device_reset, allocation_mode alloc_mode, cudaStream_t stream) { return std::shared_ptr( - new CudaExecutor(device_id, std::move(master), device_reset, alloc_mode, - stream), - [device_id](CudaExecutor* exec) { - auto device_reset = exec->get_device_reset(); - std::lock_guard guard( - nvidia_device::get_mutex(device_id)); - delete exec; - auto& num_execs = nvidia_device::get_num_execs(device_id); - num_execs--; - if (!num_execs && device_reset) { - detail::cuda_scoped_device_id_guard g(device_id); - cudaDeviceReset(); - } - }); + new CudaExecutor(device_id, std::move(master), + allocator_from_mode(device_id, alloc_mode), stream)); +} + + +std::shared_ptr CudaExecutor::create( + int device_id, std::shared_ptr master, + std::shared_ptr alloc, cudaStream_t stream) +{ + return std::shared_ptr(new CudaExecutor( + device_id, std::move(master), std::move(alloc), stream)); } @@ -123,41 +131,14 @@ void OmpExecutor::raw_copy_to(const CudaExecutor* dest, size_type num_bytes, void CudaExecutor::raw_free(void* ptr) const noexcept { detail::cuda_scoped_device_id_guard g(this->get_device_id()); - auto error_code = cudaFree(ptr); - if (error_code != cudaSuccess) { -#if GKO_VERBOSE_LEVEL >= 1 - // Unfortunately, if memory free fails, there's not much we can do - std::cerr << "Unrecoverable CUDA error on device " - << this->get_device_id() << " in " << __func__ << ": " - << cudaGetErrorName(error_code) << ": " - << cudaGetErrorString(error_code) << std::endl - << "Exiting program" << std::endl; -#endif // GKO_VERBOSE_LEVEL >= 1 - std::exit(error_code); - } + alloc_->deallocate(ptr); } void* CudaExecutor::raw_alloc(size_type num_bytes) const { - void* dev_ptr = nullptr; detail::cuda_scoped_device_id_guard g(this->get_device_id()); - int error_code = 0; - if (this->alloc_mode_ == allocation_mode::unified_host) { - error_code = cudaMallocManaged(&dev_ptr, num_bytes, cudaMemAttachHost); - } else if (this->alloc_mode_ == allocation_mode::unified_global) { - error_code = - cudaMallocManaged(&dev_ptr, num_bytes, cudaMemAttachGlobal); - } else if (this->alloc_mode_ == allocation_mode::device) { - error_code = cudaMalloc(&dev_ptr, num_bytes); - } else { - GKO_NOT_SUPPORTED(this->alloc_mode_); - } - if (error_code != cudaErrorMemoryAllocation) { - GKO_ASSERT_NO_CUDA_ERRORS(error_code); - } - GKO_ENSURE_ALLOCATED(dev_ptr, "cuda", num_bytes); - return dev_ptr; + return alloc_->allocate(num_bytes); } @@ -298,92 +279,4 @@ void CudaExecutor::init_handles() } -cuda_stream::cuda_stream(int device_id) : stream_{}, device_id_(device_id) -{ - detail::cuda_scoped_device_id_guard g(device_id_); - GKO_ASSERT_NO_CUDA_ERRORS(cudaStreamCreate(&stream_)); -} - - -cuda_stream::~cuda_stream() -{ - if (stream_) { - detail::cuda_scoped_device_id_guard g(device_id_); - cudaStreamDestroy(stream_); - } -} - - -cuda_stream::cuda_stream(cuda_stream&& other) - : stream_{std::exchange(other.stream_, nullptr)}, - device_id_(std::exchange(other.device_id_, -1)) -{} - - -CUstream_st* cuda_stream::get() const { return stream_; } - - -namespace log { - - -// "GKO" in ASCII to avoid collision with other application's categories -constexpr static uint32 category_magic_offset = 0x676B6FU; - - -void init_nvtx() -{ -#define NAMED_CATEGORY(_name) \ - nvtxNameCategory(static_cast(profile_event_category::_name) + \ - category_magic_offset, \ - "gko::" #_name) - NAMED_CATEGORY(memory); - NAMED_CATEGORY(operation); - NAMED_CATEGORY(object); - NAMED_CATEGORY(linop); - NAMED_CATEGORY(factory); - NAMED_CATEGORY(solver); - NAMED_CATEGORY(criterion); - NAMED_CATEGORY(user); - NAMED_CATEGORY(internal); -#undef NAMED_CATEGORY -} - - -std::function begin_nvtx_fn( - uint32_t color_argb) -{ - return [color_argb](const char* name, profile_event_category category) { - nvtxEventAttributes_t attr{}; - attr.version = NVTX_VERSION; - attr.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; - attr.category = static_cast(category) + category_magic_offset; - attr.colorType = NVTX_COLOR_ARGB; - attr.color = color_argb; - attr.payloadType = NVTX_PAYLOAD_UNKNOWN; - attr.messageType = NVTX_MESSAGE_TYPE_ASCII; - attr.message.ascii = name; - nvtxRangePushEx(&attr); - }; -} - - -void end_nvtx(const char* name, profile_event_category) { nvtxRangePop(); } - - -} // namespace log - - -namespace kernels { -namespace cuda { - - -void reset_device(int device_id) -{ - gko::detail::cuda_scoped_device_id_guard guard{device_id}; - cudaDeviceReset(); -} - - -} // namespace cuda -} // namespace kernels } // namespace gko diff --git a/cuda/base/memory.cpp b/cuda/base/memory.cpp new file mode 100644 index 00000000000..11dee81ad42 --- /dev/null +++ b/cuda/base/memory.cpp @@ -0,0 +1,168 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +#include "cuda/base/scoped_device_id.hpp" + + +namespace gko { + + +#define GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(_operation, _size) \ + { \ + auto error_code = _operation; \ + if (error_code == cudaErrorMemoryAllocation) { \ + throw AllocationError(__FILE__, __LINE__, "cuda", _size); \ + } else { \ + GKO_ASSERT_NO_CUDA_ERRORS(error_code); \ + } \ + } + + +#if GKO_VERBOSE_LEVEL >= 1 +#define GKO_EXIT_ON_CUDA_ERROR(_operation) \ + { \ + const auto error_code = _operation; \ + if (error_code != cudaSuccess) { \ + int device_id{-1}; \ + cudaGetDevice(&device_id); \ + std::cerr << "Unrecoverable CUDA error on device " << device_id \ + << " in " << __func__ << ":" << __LINE__ << ": " \ + << cudaGetErrorName(error_code) << ": " \ + << cudaGetErrorString(error_code) << std::endl \ + << "Exiting program" << std::endl; \ + std::exit(error_code); \ + } \ + } +#else +#define GKO_EXIT_ON_CUDA_ERROR(_operation) \ + { \ + const auto error_code = _operation; \ + if (error_code != cudaSuccess) { \ + std::exit(error_code); \ + } \ + } +#endif + + +void* CudaAllocator::allocate(size_type num_bytes) const +{ + void* ptr{}; + GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(cudaMalloc(&ptr, num_bytes), + num_bytes); + return ptr; +} + + +void CudaAllocator::deallocate(void* ptr) const +{ + GKO_EXIT_ON_CUDA_ERROR(cudaFree(ptr)); +} + + +CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{stream} {} + + +void* CudaAsyncAllocator::allocate(size_type num_bytes) const +{ + void* ptr{}; + GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS( + cudaMallocAsync(&ptr, num_bytes, stream_), num_bytes); + return ptr; +} + +void CudaAsyncAllocator::deallocate(void* ptr) const +{ + GKO_EXIT_ON_CUDA_ERROR(cudaFreeAsync(ptr, stream_)); +} + + +CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id) + : CudaUnifiedAllocator{device_id, cudaMemAttachGlobal} +{} + + +CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id, unsigned int flags) + : device_id_{device_id}, flags_{flags} +{} + + +void* CudaUnifiedAllocator::allocate(size_type num_bytes) const +{ + // we need to set the device ID in case this gets used in a host executor + detail::cuda_scoped_device_id_guard g(device_id_); + void* ptr{}; + GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS( + cudaMallocManaged(&ptr, num_bytes, flags_), num_bytes); + return ptr; +} + + +void CudaUnifiedAllocator::deallocate(void* ptr) const +{ + // we need to set the device ID in case this gets used in a host executor + detail::cuda_scoped_device_id_guard g(device_id_); + GKO_EXIT_ON_CUDA_ERROR(cudaFree(ptr)); +} + + +CudaHostAllocator::CudaHostAllocator(int device_id) : device_id_{device_id} {} + + +void* CudaHostAllocator::allocate(size_type num_bytes) const +{ + // we need to set the device ID in case this gets used in a host executor + detail::cuda_scoped_device_id_guard g(device_id_); + void* ptr{}; + GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(cudaMallocHost(&ptr, num_bytes), + num_bytes); + return ptr; +} + + +void CudaHostAllocator::deallocate(void* ptr) const +{ + // we need to set the device ID in case this gets used in a host executor + detail::cuda_scoped_device_id_guard g(device_id_); + GKO_EXIT_ON_CUDA_ERROR(cudaFreeHost(ptr)); +} + + +} // namespace gko \ No newline at end of file diff --git a/cuda/base/nvtx.cpp b/cuda/base/nvtx.cpp new file mode 100644 index 00000000000..e313c110ea2 --- /dev/null +++ b/cuda/base/nvtx.cpp @@ -0,0 +1,96 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#ifdef GKO_LEGACY_NVTX +#include +#else +#include +#endif + + +#include + + +namespace gko { +namespace log { + + +// "GKO" in ASCII to avoid collision with other application's categories +constexpr static uint32 category_magic_offset = 0x676B6FU; + + +void init_nvtx() +{ +#define NAMED_CATEGORY(_name) \ + nvtxNameCategory(static_cast(profile_event_category::_name) + \ + category_magic_offset, \ + "gko::" #_name) + NAMED_CATEGORY(memory); + NAMED_CATEGORY(operation); + NAMED_CATEGORY(object); + NAMED_CATEGORY(linop); + NAMED_CATEGORY(factory); + NAMED_CATEGORY(solver); + NAMED_CATEGORY(criterion); + NAMED_CATEGORY(user); + NAMED_CATEGORY(internal); +#undef NAMED_CATEGORY +} + + +std::function begin_nvtx_fn( + uint32_t color_argb) +{ + return [color_argb](const char* name, profile_event_category category) { + nvtxEventAttributes_t attr{}; + attr.version = NVTX_VERSION; + attr.size = NVTX_EVENT_ATTRIB_STRUCT_SIZE; + attr.category = static_cast(category) + category_magic_offset; + attr.colorType = NVTX_COLOR_ARGB; + attr.color = color_argb; + attr.payloadType = NVTX_PAYLOAD_UNKNOWN; + attr.messageType = NVTX_MESSAGE_TYPE_ASCII; + attr.message.ascii = name; + nvtxRangePushEx(&attr); + }; +} + + +void end_nvtx(const char* name, profile_event_category) { nvtxRangePop(); } + + +} // namespace log +} // namespace gko diff --git a/cuda/test/base/cuda_executor_reset.cpp b/cuda/base/stream.cpp similarity index 62% rename from cuda/test/base/cuda_executor_reset.cpp rename to cuda/base/stream.cpp index c8159b9c4d7..8c6aa92c28b 100644 --- a/cuda/test/base/cuda_executor_reset.cpp +++ b/cuda/base/stream.cpp @@ -30,58 +30,45 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#include +#include -#include +#include +#include -#include +#include "cuda/base/scoped_device_id.hpp" -namespace { +namespace gko { -#define GTEST_ASSERT_NO_EXIT(statement) \ - ASSERT_EXIT({ {statement} exit(0); }, ::testing::ExitedWithCode(0), "") +cuda_stream::cuda_stream() : stream_{}, device_id_{-1} {} -TEST(DeviceReset, HipCuda) +cuda_stream::cuda_stream(int device_id) : stream_{}, device_id_(device_id) { - GTEST_ASSERT_NO_EXIT({ - auto ref = gko::ReferenceExecutor::create(); - auto hip = gko::HipExecutor::create(0, ref, true); - auto cuda = gko::CudaExecutor::create(0, ref, true); - }); + detail::cuda_scoped_device_id_guard g(device_id_); + GKO_ASSERT_NO_CUDA_ERRORS(cudaStreamCreate(&stream_)); } -TEST(DeviceReset, CudaHip) +cuda_stream::~cuda_stream() { - GTEST_ASSERT_NO_EXIT({ - auto ref = gko::ReferenceExecutor::create(); - auto cuda = gko::CudaExecutor::create(0, ref, true); - auto hip = gko::HipExecutor::create(0, ref, true); - }); + if (stream_) { + detail::cuda_scoped_device_id_guard g(device_id_); + cudaStreamDestroy(stream_); + } } -void func() -{ - auto ref = gko::ReferenceExecutor::create(); - auto exec = gko::CudaExecutor::create(0, ref, true); -} +cuda_stream::cuda_stream(cuda_stream&& other) + : stream_{std::exchange(other.stream_, nullptr)}, + device_id_(std::exchange(other.device_id_, -1)) +{} -TEST(DeviceReset, CudaCuda) -{ - GTEST_ASSERT_NO_EXIT({ - std::thread t1(func); - std::thread t2(func); - t1.join(); - t2.join(); - }); -} +CUstream_st* cuda_stream::get() const { return stream_; } -} // namespace +} // namespace gko diff --git a/cuda/test/base/CMakeLists.txt b/cuda/test/base/CMakeLists.txt index 9be3caf9faa..a213e65277a 100644 --- a/cuda/test/base/CMakeLists.txt +++ b/cuda/test/base/CMakeLists.txt @@ -1,7 +1,6 @@ ginkgo_create_cuda_test(array) ginkgo_create_cuda_test(cuda_executor) ginkgo_create_test(index_set) -ginkgo_create_test(cuda_executor_reset ADDITIONAL_LIBRARIES Threads::Threads) if(GINKGO_HAVE_HWLOC) find_package(NUMA REQUIRED) ginkgo_create_cuda_test(cuda_executor_topology ADDITIONAL_LIBRARIES NUMA::NUMA) @@ -10,4 +9,5 @@ ginkgo_create_cuda_test(exception_helpers) ginkgo_create_cuda_test(kernel_launch) ginkgo_create_cuda_test(lin_op) ginkgo_create_cuda_test(math) +ginkgo_create_test(memory) ginkgo_create_cuda_test(scoped_device_id) diff --git a/cuda/test/base/cuda_executor.cu b/cuda/test/base/cuda_executor.cu index 5f489ac22f0..afb23c06186 100644 --- a/cuda/test/base/cuda_executor.cu +++ b/cuda/test/base/cuda_executor.cu @@ -42,6 +42,8 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include +#include + #include "common/cuda_hip/base/executor.hpp.inc" #include "cuda/base/scoped_device_id.hpp" @@ -103,18 +105,19 @@ protected: ASSERT_GT(gko::CudaExecutor::get_num_devices(), 0); #ifdef GKO_TEST_NONDEFAULT_STREAM cuda = gko::CudaExecutor::create( - 0, omp, false, gko::default_cuda_alloc_mode, stream.get()); + 0, omp, std::make_shared(), stream.get()); cuda2 = gko::CudaExecutor::create( - gko::CudaExecutor::get_num_devices() - 1, omp, false, - gko::default_cuda_alloc_mode, other_stream.get()); + gko::CudaExecutor::get_num_devices() - 1, omp, + std::make_shared(), other_stream.get()); cuda3 = gko::CudaExecutor::create( - 0, omp, false, gko::allocation_mode::unified_global, stream.get()); + 0, omp, std::make_shared(0), + stream.get()); #else cuda = gko::CudaExecutor::create(0, omp); cuda2 = gko::CudaExecutor::create( gko::CudaExecutor::get_num_devices() - 1, omp); - cuda3 = gko::CudaExecutor::create(0, omp, false, - gko::allocation_mode::unified_global); + cuda3 = gko::CudaExecutor::create( + 0, omp, std::make_shared(0)); #endif } diff --git a/cuda/test/base/memory.cpp b/cuda/test/base/memory.cpp new file mode 100644 index 00000000000..a329817f4af --- /dev/null +++ b/cuda/test/base/memory.cpp @@ -0,0 +1,126 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include + + +#include + + +#include +#include +#include + + +#include "cuda/test/utils.hpp" + + +namespace { + + +class Memory : public CudaTestFixture { +protected: + Memory() + : host_exec_with_pinned{gko::OmpExecutor::create( + std::make_shared(0))}, + host_exec_with_unified{gko::OmpExecutor::create( + std::make_shared(0))}, + exec_with_normal{gko::CudaExecutor::create( + 0, ref, std::make_shared(), + exec->get_stream())}, + exec_with_async{gko::CudaExecutor::create( + 0, host_exec_with_pinned, + std::make_shared(exec->get_stream()), + exec->get_stream())}, + exec_with_unified{gko::CudaExecutor::create( + 0, host_exec_with_unified, + std::make_shared(0), + exec->get_stream())} + {} + + std::shared_ptr host_exec_with_pinned; + std::shared_ptr host_exec_with_unified; + std::shared_ptr exec_with_normal; + std::shared_ptr exec_with_async; + std::shared_ptr exec_with_unified; +}; + + +TEST_F(Memory, DeviceAllocationWorks) +{ + gko::array data{exec_with_normal, {1, 2}}; + + GKO_ASSERT_ARRAY_EQ(data, I({1, 2})); +} + + +TEST_F(Memory, AsyncDeviceAllocationWorks) +{ + gko::array data{exec_with_async, {1, 2}}; + + GKO_ASSERT_ARRAY_EQ(data, I({1, 2})); +} + + +TEST_F(Memory, UnifiedDeviceAllocationWorks) +{ + gko::array data{exec_with_unified, {1, 2}}; + exec->synchronize(); + + ASSERT_EQ(data.get_const_data()[0], 1); + ASSERT_EQ(data.get_const_data()[1], 2); +} + + +TEST_F(Memory, HostUnifiedAllocationWorks) +{ + gko::array data{host_exec_with_unified, {1, 2}}; + + ASSERT_EQ(data.get_const_data()[0], 1); + ASSERT_EQ(data.get_const_data()[1], 2); +} + + +TEST_F(Memory, HostPinnedAllocationWorks) +{ + gko::array data{host_exec_with_pinned, {1, 2}}; + + ASSERT_EQ(data.get_const_data()[0], 1); + ASSERT_EQ(data.get_const_data()[1], 2); +} + + +} // namespace diff --git a/cuda/test/utils.hpp b/cuda/test/utils.hpp index 814405ba0d9..e1156b91903 100644 --- a/cuda/test/utils.hpp +++ b/cuda/test/utils.hpp @@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include "cuda/base/device.hpp" @@ -60,8 +61,9 @@ class CudaTestFixture : public ::testing::Test { CudaTestFixture() : ref(gko::ReferenceExecutor::create()), #ifdef GKO_TEST_NONDEFAULT_STREAM + stream(0), exec(gko::CudaExecutor::create( - 0, ref, false, gko::default_cuda_alloc_mode, stream.get())) + 0, ref, std::make_shared(), stream.get())) #else exec(gko::CudaExecutor::create(0, ref)) #endif diff --git a/devices/cuda/executor.cpp b/devices/cuda/executor.cpp index d066d272f81..3789274c3f3 100644 --- a/devices/cuda/executor.cpp +++ b/devices/cuda/executor.cpp @@ -64,31 +64,4 @@ bool CudaExecutor::verify_memory_to(const HipExecutor* dest_exec) const } -void CudaExecutor::increase_num_execs(unsigned device_id) -{ -#ifdef GKO_COMPILING_CUDA_DEVICE - // increase the Cuda Device count only when ginkgo build cuda - std::lock_guard guard(nvidia_device::get_mutex(device_id)); - nvidia_device::get_num_execs(device_id)++; -#endif // GKO_COMPILING_CUDA_DEVICE -} - - -void CudaExecutor::decrease_num_execs(unsigned device_id) -{ -#ifdef GKO_COMPILING_CUDA_DEVICE - // increase the Cuda Device count only when ginkgo build cuda - std::lock_guard guard(nvidia_device::get_mutex(device_id)); - nvidia_device::get_num_execs(device_id)--; -#endif // GKO_COMPILING_CUDA_DEVICE -} - - -unsigned CudaExecutor::get_num_execs(unsigned device_id) -{ - std::lock_guard guard(nvidia_device::get_mutex(device_id)); - return nvidia_device::get_num_execs(device_id); -} - - } // namespace gko diff --git a/devices/hip/executor.cpp b/devices/hip/executor.cpp index 60efb4c53a3..b044074c19e 100644 --- a/devices/hip/executor.cpp +++ b/devices/hip/executor.cpp @@ -61,38 +61,4 @@ bool HipExecutor::verify_memory_to(const CudaExecutor* dest_exec) const } -#if (GINKGO_HIP_PLATFORM_NVCC == 1) -using hip_device_class = nvidia_device; -#else -using hip_device_class = amd_device; -#endif - - -void HipExecutor::increase_num_execs(int device_id) -{ -#ifdef GKO_COMPILING_HIP_DEVICE - // increase the HIP Device count only when ginkgo build hip - std::lock_guard guard(hip_device_class::get_mutex(device_id)); - hip_device_class::get_num_execs(device_id)++; -#endif // GKO_COMPILING_HIP_DEVICE -} - - -void HipExecutor::decrease_num_execs(int device_id) -{ -#ifdef GKO_COMPILING_HIP_DEVICE - // increase the HIP Device count only when ginkgo build hip - std::lock_guard guard(hip_device_class::get_mutex(device_id)); - hip_device_class::get_num_execs(device_id)--; -#endif // GKO_COMPILING_HIP_DEVICE -} - - -int HipExecutor::get_num_execs(int device_id) -{ - std::lock_guard guard(hip_device_class::get_mutex(device_id)); - return hip_device_class::get_num_execs(device_id); -} - - } // namespace gko diff --git a/devices/omp/executor.cpp b/devices/omp/executor.cpp index 352216f7633..f8e700bc2d5 100644 --- a/devices/omp/executor.cpp +++ b/devices/omp/executor.cpp @@ -55,7 +55,10 @@ void OmpExecutor::populate_exec_info(const machine_topology* mach_topo) } -void OmpExecutor::raw_free(void* ptr) const noexcept { std::free(ptr); } +void OmpExecutor::raw_free(void* ptr) const noexcept +{ + return alloc_->deallocate(ptr); +} std::shared_ptr OmpExecutor::get_master() noexcept @@ -72,7 +75,7 @@ std::shared_ptr OmpExecutor::get_master() const noexcept void* OmpExecutor::raw_alloc(size_type num_bytes) const { - return GKO_ENSURE_ALLOCATED(std::malloc(num_bytes), "OMP", num_bytes); + return alloc_->allocate(num_bytes); } diff --git a/dpcpp/base/executor.dp.cpp b/dpcpp/base/executor.dp.cpp index 3b9bfce07b8..b4503bf782e 100644 --- a/dpcpp/base/executor.dp.cpp +++ b/dpcpp/base/executor.dp.cpp @@ -51,6 +51,39 @@ namespace gko { namespace detail { +DpcppAllocator::DpcppAllocator(sycl::queue* queue) : queue_{queue} {} + + +void* DpcppAllocator::allocate(size_type size) +{ + return sycl::malloc_device(size, *queue_); +} + + +void DpcppAllocator::deallocate(void* ptr) +{ + queue_->wait_and_throw(); + sycl::free(ptr, queue_->get_context()); +} + + +DpcppUnifiedAllocator::DpcppUnifiedAllocator(sycl::queue* queue) : queue_{queue} +{} + + +void* DpcppUnifiedAllocator::allocate(size_type size) +{ + return sycl::malloc_shared(size, *queue_); +} + + +void DpcppUnifiedAllocator::deallocate(void* ptr) +{ + queue_->wait_and_throw(); + sycl::free(ptr, queue_->get_context()); +} + + const std::vector get_devices(std::string device_type) { std::map device_type_map{ diff --git a/hip/test/base/hip_executor_reset.cpp b/dpcpp/base/memory.dp.cpp similarity index 63% rename from hip/test/base/hip_executor_reset.cpp rename to dpcpp/base/memory.dp.cpp index 39e3252e053..b1ccd007dea 100644 --- a/hip/test/base/hip_executor_reset.cpp +++ b/dpcpp/base/memory.dp.cpp @@ -30,58 +30,43 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ -#include +#include -#include +#include -#include +namespace gko { -namespace { +DpcppAllocatorBase::DpcppAllocatorBase(sycl::queue* queue) : queue_{queue} {} -#define GTEST_ASSERT_NO_EXIT(statement) \ - ASSERT_EXIT({ {statement} exit(0); }, ::testing::ExitedWithCode(0), "") - - -TEST(DeviceReset, HipCuda) +void* DpcppAllocator::allocate_impl(sycl::queue* queue, + size_type num_bytes) const { - GTEST_ASSERT_NO_EXIT({ - auto ref = gko::ReferenceExecutor::create(); - auto hip = gko::HipExecutor::create(0, ref, true); - auto cuda = gko::CudaExecutor::create(0, ref, true); - }); + return sycl::malloc_device(size, *queue); } -TEST(DeviceReset, CudaHip) +void DpcppAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const { - GTEST_ASSERT_NO_EXIT({ - auto ref = gko::ReferenceExecutor::create(); - auto cuda = gko::CudaExecutor::create(0, ref, true); - auto hip = gko::HipExecutor::create(0, ref, true); - }); + queue->wait_and_throw(); + sycl::free(ptr, queue->get_context()); } -void func() +void* DpcppUnifiedAllocator::allocate(size_type num_bytes) { - auto ref = gko::ReferenceExecutor::create(); - auto exec = gko::HipExecutor::create(0, ref, true); + return sycl::malloc_shared(size, *queue_); } -TEST(DeviceReset, HipHip) +void DpcppUnifiedAllocator::deallocate(void* ptr) { - GTEST_ASSERT_NO_EXIT({ - std::thread t1(func); - std::thread t2(func); - t1.join(); - t2.join(); - }); + queue_->wait_and_throw(); + sycl::free(ptr, queue_->get_context()); } -} // namespace +} // namespace gko diff --git a/dpcpp/test/base/CMakeLists.txt b/dpcpp/test/base/CMakeLists.txt index bb9c8a75050..5c0ca601f04 100644 --- a/dpcpp/test/base/CMakeLists.txt +++ b/dpcpp/test/base/CMakeLists.txt @@ -3,3 +3,4 @@ ginkgo_create_dpcpp_test(dim3) ginkgo_create_dpcpp_test(kernel_launch) # set correct flags for kernel_launch.hpp target_compile_definitions(dpcpp_test_base_kernel_launch PRIVATE GKO_COMPILING_DPCPP) +ginkgo_create_dpcpp_test(memory) \ No newline at end of file diff --git a/dpcpp/test/base/memory.dp.cpp b/dpcpp/test/base/memory.dp.cpp new file mode 100644 index 00000000000..e587660cde3 --- /dev/null +++ b/dpcpp/test/base/memory.dp.cpp @@ -0,0 +1,98 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#include + + +#include + + +#include +#include +#include + + +#include "dpcpp/test/utils.hpp" + + +namespace { + + +class Memory : public ::testing::Test { +protected: + Memory() + : exec{gko::DpcppExecutor::create(0, gko::OmpExecutor::create())}, + host_exec_with_unified{gko::OmpExecutor::create( + std::make_shared(exec->get_queue()))}, + exec_with_unified{gko::DpcppExecutor::create( + exec->get_queue(), host_exec_with_unified, + std::make_shared(exec->get_queue()))} + {} + + std::shared_ptr exec; + std::shared_ptr host_exec_with_unified; + std::shared_ptr exec_with_unified; +}; + + +TEST_F(Memory, DeviceAllocationWorks) +{ + gko::array data{exec, {1, 2}}; + + GKO_ASSERT_ARRAY_EQ(data, I({1, 2})); +} + + +TEST_F(Memory, UnifiedDeviceAllocationWorks) +{ + gko::array data{exec_with_unified, {1, 2}}; + exec->synchronize(); + + ASSERT_EQ(data.get_const_data()[0], 1); + ASSERT_EQ(data.get_const_data()[1], 2); +} + + +TEST_F(Memory, HostUnifiedAllocationWorks) +{ + gko::array data{host_exec_with_unified, {1, 2}}; + + ASSERT_EQ(data.get_const_data()[0], 1); + ASSERT_EQ(data.get_const_data()[1], 2); +} + + +} // namespace diff --git a/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp b/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp index f6f28cdaab0..17639fefef2 100644 --- a/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp +++ b/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp @@ -68,13 +68,12 @@ int main(int argc, char* argv[]) {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/examples/cb-gmres/cb-gmres.cpp b/examples/cb-gmres/cb-gmres.cpp index c0235f75e55..b096e48c71a 100644 --- a/examples/cb-gmres/cb-gmres.cpp +++ b/examples/cb-gmres/cb-gmres.cpp @@ -108,13 +108,12 @@ int main(int argc, char* argv[]) {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/examples/custom-logger/custom-logger.cpp b/examples/custom-logger/custom-logger.cpp index e82d94ed117..8a17f651650 100644 --- a/examples/custom-logger/custom-logger.cpp +++ b/examples/custom-logger/custom-logger.cpp @@ -259,13 +259,12 @@ int main(int argc, char* argv[]) {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/examples/custom-matrix-format/custom-matrix-format.cpp b/examples/custom-matrix-format/custom-matrix-format.cpp index af08dbdf226..4610413fe9c 100644 --- a/examples/custom-matrix-format/custom-matrix-format.cpp +++ b/examples/custom-matrix-format/custom-matrix-format.cpp @@ -255,13 +255,12 @@ int main(int argc, char* argv[]) {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/examples/custom-stopping-criterion/custom-stopping-criterion.cpp b/examples/custom-stopping-criterion/custom-stopping-criterion.cpp index 9389f86cc45..e07f1bf92fb 100644 --- a/examples/custom-stopping-criterion/custom-stopping-criterion.cpp +++ b/examples/custom-stopping-criterion/custom-stopping-criterion.cpp @@ -158,13 +158,12 @@ int main(int argc, char* argv[]) {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp b/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp index aa32e0e879a..33946b7de44 100644 --- a/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp +++ b/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp @@ -68,13 +68,12 @@ int main(int argc, char* argv[]) {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/examples/inverse-iteration/inverse-iteration.cpp b/examples/inverse-iteration/inverse-iteration.cpp index 5d8270f1ca1..460370b7e00 100644 --- a/examples/inverse-iteration/inverse-iteration.cpp +++ b/examples/inverse-iteration/inverse-iteration.cpp @@ -72,13 +72,12 @@ int main(int argc, char* argv[]) {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp b/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp index 24db221a062..fa034772e60 100644 --- a/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp +++ b/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp @@ -71,13 +71,12 @@ int main(int argc, char* argv[]) {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/examples/iterative-refinement/iterative-refinement.cpp b/examples/iterative-refinement/iterative-refinement.cpp index f38c5a6cb49..9b4d0ee5263 100644 --- a/examples/iterative-refinement/iterative-refinement.cpp +++ b/examples/iterative-refinement/iterative-refinement.cpp @@ -68,13 +68,12 @@ int main(int argc, char* argv[]) {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/examples/minimal-cuda-solver/minimal-cuda-solver.cpp b/examples/minimal-cuda-solver/minimal-cuda-solver.cpp index 7182bc9ad8c..5a7a8c086af 100644 --- a/examples/minimal-cuda-solver/minimal-cuda-solver.cpp +++ b/examples/minimal-cuda-solver/minimal-cuda-solver.cpp @@ -36,7 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. int main() { // Instantiate a CUDA executor - auto gpu = gko::CudaExecutor::create(0, gko::OmpExecutor::create(), true); + auto gpu = gko::CudaExecutor::create(0, gko::OmpExecutor::create()); // Read data auto A = gko::read>(std::cin, gpu); auto b = gko::read>(std::cin, gpu); diff --git a/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp b/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp index 6f1600d2805..9edd7ff29a1 100644 --- a/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp +++ b/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp @@ -71,13 +71,12 @@ int main(int argc, char* argv[]) {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp b/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp index 7da28611fcb..9d05e9c65b7 100644 --- a/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp +++ b/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp @@ -69,13 +69,12 @@ int main(int argc, char* argv[]) {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/examples/mixed-precision-ir/mixed-precision-ir.cpp b/examples/mixed-precision-ir/mixed-precision-ir.cpp index 3510a2163e1..0882d755cdc 100644 --- a/examples/mixed-precision-ir/mixed-precision-ir.cpp +++ b/examples/mixed-precision-ir/mixed-precision-ir.cpp @@ -76,13 +76,12 @@ int main(int argc, char* argv[]) {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/examples/mixed-spmv/mixed-spmv.cpp b/examples/mixed-spmv/mixed-spmv.cpp index 78461de39ef..6b327c1c708 100644 --- a/examples/mixed-spmv/mixed-spmv.cpp +++ b/examples/mixed-spmv/mixed-spmv.cpp @@ -170,13 +170,12 @@ int main(int argc, char* argv[]) {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp b/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp index 6f75ca29630..a455ca2e8ed 100644 --- a/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp +++ b/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp @@ -64,13 +64,12 @@ int main(int argc, char* argv[]) {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp b/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp index 06ac006e4db..4590f1582dd 100644 --- a/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp +++ b/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp @@ -62,13 +62,12 @@ int main(int argc, char* argv[]) {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp b/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp index 51fdf97d4a4..05ee0503a5f 100644 --- a/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp +++ b/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp @@ -230,13 +230,12 @@ void solve_system(const std::string& executor_string, {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/examples/papi-logging/papi-logging.cpp b/examples/papi-logging/papi-logging.cpp index bcca6fe648e..d73747e3069 100644 --- a/examples/papi-logging/papi-logging.cpp +++ b/examples/papi-logging/papi-logging.cpp @@ -151,13 +151,12 @@ int main(int argc, char* argv[]) {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/examples/performance-debugging/performance-debugging.cpp b/examples/performance-debugging/performance-debugging.cpp index f357a8d4619..5f036728924 100644 --- a/examples/performance-debugging/performance-debugging.cpp +++ b/examples/performance-debugging/performance-debugging.cpp @@ -371,13 +371,12 @@ int main(int argc, char* argv[]) {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/examples/poisson-solver/poisson-solver.cpp b/examples/poisson-solver/poisson-solver.cpp index 7602600a514..e16f0b26968 100644 --- a/examples/poisson-solver/poisson-solver.cpp +++ b/examples/poisson-solver/poisson-solver.cpp @@ -144,13 +144,12 @@ int main(int argc, char* argv[]) {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/examples/preconditioned-solver/preconditioned-solver.cpp b/examples/preconditioned-solver/preconditioned-solver.cpp index 37963f205cc..b64b588c4ef 100644 --- a/examples/preconditioned-solver/preconditioned-solver.cpp +++ b/examples/preconditioned-solver/preconditioned-solver.cpp @@ -69,13 +69,12 @@ int main(int argc, char* argv[]) {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/examples/simple-solver-logging/simple-solver-logging.cpp b/examples/simple-solver-logging/simple-solver-logging.cpp index 6ab3169b326..384bd9d997b 100644 --- a/examples/simple-solver-logging/simple-solver-logging.cpp +++ b/examples/simple-solver-logging/simple-solver-logging.cpp @@ -85,13 +85,12 @@ int main(int argc, char* argv[]) {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/examples/simple-solver/simple-solver.cpp b/examples/simple-solver/simple-solver.cpp index 8f665f98496..81dc9ee6d74 100644 --- a/examples/simple-solver/simple-solver.cpp +++ b/examples/simple-solver/simple-solver.cpp @@ -89,13 +89,12 @@ int main(int argc, char* argv[]) {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp b/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp index 6bf3cc21a8a..63adfaa5571 100644 --- a/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp +++ b/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp @@ -165,13 +165,12 @@ void solve_system(const std::string& executor_string, {"omp", [] { return gko::OmpExecutor::create(); }}, {"cuda", [] { - return gko::CudaExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::CudaExecutor::create(0, + gko::OmpExecutor::create()); }}, {"hip", [] { - return gko::HipExecutor::create(0, gko::OmpExecutor::create(), - true); + return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, {"dpcpp", [] { diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt index cf3f68dc7fc..f1000f5b6ef 100644 --- a/hip/CMakeLists.txt +++ b/hip/CMakeLists.txt @@ -1,9 +1,13 @@ set(GINKGO_HIP_SOURCES + base/device.hip.cpp base/device_matrix_data_kernels.hip.cpp base/exception.hip.cpp base/executor.hip.cpp base/index_set_kernels.hip.cpp + base/memory.hip.cpp + base/roctx.hip.cpp base/scoped_device_id.hip.cpp + base/stream.hip.cpp base/version.hip.cpp components/prefix_sum_kernels.hip.cpp distributed/matrix_kernels.hip.cpp diff --git a/hip/base/device.hip.cpp b/hip/base/device.hip.cpp new file mode 100644 index 00000000000..3fc01661cfe --- /dev/null +++ b/hip/base/device.hip.cpp @@ -0,0 +1,61 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include +#include +#include + + +#include "hip/base/scoped_device_id.hip.hpp" + + +namespace gko { +namespace kernels { +namespace hip { + + +void reset_device(int device_id) +{ + gko::detail::hip_scoped_device_id_guard guard{device_id}; + hipDeviceReset(); +} + + +} // namespace hip +} // namespace kernels +} // namespace gko diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp index a67d1c06a3c..6b4b0fd5ddc 100644 --- a/hip/base/executor.hip.cpp +++ b/hip/base/executor.hip.cpp @@ -37,15 +37,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include -#if GINKGO_HIP_PLATFORM_HCC && GKO_HAVE_ROCTX -#include -#endif #include #include #include -#include #include "hip/base/config.hip.hpp" @@ -60,32 +56,22 @@ namespace gko { #include "common/cuda_hip/base/executor.hpp.inc" -#if (GINKGO_HIP_PLATFORM_NVCC == 1) -using hip_device_class = nvidia_device; -#else -using hip_device_class = amd_device; -#endif - - std::shared_ptr HipExecutor::create( int device_id, std::shared_ptr master, bool device_reset, allocation_mode alloc_mode, hipStream_t stream) { return std::shared_ptr( - new HipExecutor(device_id, std::move(master), device_reset, alloc_mode, - stream), - [device_id](HipExecutor* exec) { - auto device_reset = exec->get_device_reset(); - std::lock_guard guard( - hip_device_class::get_mutex(device_id)); - delete exec; - auto& num_execs = hip_device_class::get_num_execs(device_id); - num_execs--; - if (!num_execs && device_reset) { - detail::hip_scoped_device_id_guard g(device_id); - hipDeviceReset(); - } - }); + new HipExecutor(device_id, std::move(master), + std::make_shared(), stream)); +} + + +std::shared_ptr HipExecutor::create( + int device_id, std::shared_ptr master, + std::shared_ptr alloc, hipStream_t stream) +{ + return std::shared_ptr(new HipExecutor( + device_id, std::move(master), std::move(alloc), stream)); } @@ -125,42 +111,14 @@ void OmpExecutor::raw_copy_to(const HipExecutor* dest, size_type num_bytes, void HipExecutor::raw_free(void* ptr) const noexcept { detail::hip_scoped_device_id_guard g(this->get_device_id()); - auto error_code = hipFree(ptr); - if (error_code != hipSuccess) { -#if GKO_VERBOSE_LEVEL >= 1 - // Unfortunately, if memory free fails, there's not much we can do - std::cerr << "Unrecoverable HIP error on device " - << this->get_device_id() << " in " << __func__ << ": " - << hipGetErrorName(error_code) << ": " - << hipGetErrorString(error_code) << std::endl - << "Exiting program" << std::endl; -#endif // GKO_VERBOSE_LEVEL >= 1 - std::exit(error_code); - } + alloc_->deallocate(ptr); } void* HipExecutor::raw_alloc(size_type num_bytes) const { - void* dev_ptr = nullptr; detail::hip_scoped_device_id_guard g(this->get_device_id()); - int error_code = 0; - if (this->alloc_mode_ == allocation_mode::device) { - error_code = hipMalloc(&dev_ptr, num_bytes); -#if !(GKO_HIP_PLATFORM_HCC == 1) - } else if (this->alloc_mode_ == allocation_mode::unified_global) { - error_code = hipMallocManaged(&dev_ptr, num_bytes, hipMemAttachGlobal); - } else if (this->alloc_mode_ == allocation_mode::unified_host) { - error_code = hipMallocManaged(&dev_ptr, num_bytes, hipMemAttachHost); -#endif - } else { - GKO_NOT_SUPPORTED(this->alloc_mode_); - } - if (error_code != hipErrorMemoryAllocation) { - GKO_ASSERT_NO_HIP_ERRORS(error_code); - } - GKO_ENSURE_ALLOCATED(dev_ptr, "hip", num_bytes); - return dev_ptr; + return alloc_->allocate(num_bytes); } @@ -309,67 +267,4 @@ void HipExecutor::init_handles() } -hip_stream::hip_stream(int device_id) : stream_{}, device_id_(device_id) -{ - detail::hip_scoped_device_id_guard g(device_id_); - GKO_ASSERT_NO_HIP_ERRORS(hipStreamCreate(&stream_)); -} - - -hip_stream::~hip_stream() -{ - if (stream_) { - detail::hip_scoped_device_id_guard g(device_id_); - hipStreamDestroy(stream_); - } -} - - -hip_stream::hip_stream(hip_stream&& other) - : stream_{std::exchange(other.stream_, nullptr)}, - device_id_{std::exchange(other.device_id_, -1)} -{} - - -GKO_HIP_STREAM_STRUCT* hip_stream::get() const { return stream_; } - - -namespace log { - - -#if GINKGO_HIP_PLATFORM_HCC && GKO_HAVE_ROCTX - -void begin_roctx(const char* name, profile_event_category) -{ - roctxRangePush(name); -} - -void end_roctx(const char*, profile_event_category) { roctxRangePop(); } - -#else - -void begin_roctx(const char* name, profile_event_category) - GKO_NOT_COMPILED(roctx); - -void end_roctx(const char*, profile_event_category) GKO_NOT_COMPILED(roctx); - -#endif - - -} // namespace log - - -namespace kernels { -namespace hip { - - -void reset_device(int device_id) -{ - gko::detail::hip_scoped_device_id_guard guard{device_id}; - hipDeviceReset(); -} - - -} // namespace hip -} // namespace kernels } // namespace gko diff --git a/hip/base/memory.hip.cpp b/hip/base/memory.hip.cpp new file mode 100644 index 00000000000..f2a8977525f --- /dev/null +++ b/hip/base/memory.hip.cpp @@ -0,0 +1,97 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include + + +namespace gko { + + +#define GKO_ASSERT_NO_HIP_ALLOCATION_ERRORS(_operation, _size) \ + { \ + auto error_code = _operation; \ + if (error_code == hipErrorMemoryAllocation) { \ + throw AllocationError(__FILE__, __LINE__, "hip", _size); \ + } else { \ + GKO_ASSERT_NO_HIP_ERRORS(error_code); \ + } \ + } + + +#if GKO_VERBOSE_LEVEL >= 1 +#define GKO_EXIT_ON_HIP_ERROR(_operation) \ + { \ + const auto error_code = _operation; \ + if (error_code != hipSuccess) { \ + int device_id{-1}; \ + hipGetDevice(&device_id); \ + std::cerr << "Unrecoverable HIP error on device " << device_id \ + << " in " << __func__ << ": " \ + << hipGetErrorName(error_code) << ": " \ + << hipGetErrorString(error_code) << std::endl \ + << "Exiting program" << std::endl; \ + std::exit(error_code); \ + } \ + } +#else +#define GKO_EXIT_ON_HIP_ERROR(_operation) \ + { \ + const auto error_code = _operation; \ + if (error_code != hipSuccess) { \ + std::exit(error_code); \ + } \ + } +#endif + + +void* HipAllocator::allocate(size_type num_bytes) const +{ + void* dev_ptr{}; + GKO_ASSERT_NO_HIP_ALLOCATION_ERRORS(hipMalloc(&dev_ptr, num_bytes), + num_bytes); + return dev_ptr; +} + + +void HipAllocator::deallocate(void* dev_ptr) const +{ + GKO_EXIT_ON_HIP_ERROR(hipFree(dev_ptr)); +} + + +} // namespace gko diff --git a/hip/base/roctx.hip.cpp b/hip/base/roctx.hip.cpp new file mode 100644 index 00000000000..9f309b93362 --- /dev/null +++ b/hip/base/roctx.hip.cpp @@ -0,0 +1,70 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include +#if GINKGO_HIP_PLATFORM_HCC && GKO_HAVE_ROCTX +#include +#endif + + +#include +#include + + +namespace gko { +namespace log { + + +#if GINKGO_HIP_PLATFORM_HCC && GKO_HAVE_ROCTX + +void begin_roctx(const char* name, profile_event_category) +{ + roctxRangePush(name); +} + +void end_roctx(const char*, profile_event_category) { roctxRangePop(); } + +#else + +void begin_roctx(const char* name, profile_event_category) + GKO_NOT_COMPILED(roctx); + +void end_roctx(const char*, profile_event_category) GKO_NOT_COMPILED(roctx); + +#endif + + +} // namespace log +} // namespace gko diff --git a/hip/base/stream.hip.cpp b/hip/base/stream.hip.cpp new file mode 100644 index 00000000000..e5817eb9ebd --- /dev/null +++ b/hip/base/stream.hip.cpp @@ -0,0 +1,78 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#include + + +#include + + +#include +#include +#include + + +#include "hip/base/scoped_device_id.hip.hpp" + + +namespace gko { + + +hip_stream::hip_stream() : stream_{}, device_id_{-1} {} + + +hip_stream::hip_stream(int device_id) : stream_{}, device_id_(device_id) +{ + detail::hip_scoped_device_id_guard g(device_id_); + GKO_ASSERT_NO_HIP_ERRORS(hipStreamCreate(&stream_)); +} + + +hip_stream::~hip_stream() +{ + if (stream_) { + detail::hip_scoped_device_id_guard g(device_id_); + hipStreamDestroy(stream_); + } +} + + +hip_stream::hip_stream(hip_stream&& other) + : stream_{std::exchange(other.stream_, nullptr)}, + device_id_{std::exchange(other.device_id_, -1)} +{} + + +GKO_HIP_STREAM_STRUCT* hip_stream::get() const { return stream_; } + + +} // namespace gko diff --git a/hip/test/base/CMakeLists.txt b/hip/test/base/CMakeLists.txt index 7ed0d2ceb52..f597a3d6e3d 100644 --- a/hip/test/base/CMakeLists.txt +++ b/hip/test/base/CMakeLists.txt @@ -1,6 +1,5 @@ ginkgo_create_hip_test(hip_executor) ginkgo_create_test(index_set) -ginkgo_create_test(hip_executor_reset ADDITIONAL_LIBRARIES Threads::Threads) if(GINKGO_HAVE_HWLOC) find_package(NUMA REQUIRED) ginkgo_create_hip_test(hip_executor_topology ADDITIONAL_LIBRARIES NUMA::NUMA) diff --git a/hip/test/base/hip_executor.hip.cpp b/hip/test/base/hip_executor.hip.cpp index d27dd58d132..e531fa739e6 100644 --- a/hip/test/base/hip_executor.hip.cpp +++ b/hip/test/base/hip_executor.hip.cpp @@ -109,18 +109,18 @@ class HipExecutor : public ::testing::Test { ASSERT_GT(gko::HipExecutor::get_num_devices(), 0); #ifdef GKO_TEST_NONDEFAULT_STREAM hip = gko::HipExecutor::create( - 0, omp, false, gko::default_hip_alloc_mode, stream.get()); - hip2 = gko::HipExecutor::create(gko::HipExecutor::get_num_devices() - 1, - omp, false, gko::default_hip_alloc_mode, - other_stream.get()); + 0, omp, std::make_shared(), stream.get()); + hip2 = gko::HipExecutor::create( + gko::HipExecutor::get_num_devices() - 1, omp, + std::make_shared(), other_stream.get()); hip3 = gko::HipExecutor::create( - 0, omp, false, gko::allocation_mode::unified_global, stream.get()); + 0, omp, std::make_shared(), stream.get()); #else hip = gko::HipExecutor::create(0, omp); hip2 = gko::HipExecutor::create(gko::HipExecutor::get_num_devices() - 1, omp); - hip3 = gko::HipExecutor::create(0, omp, false, - gko::allocation_mode::unified_global); + hip3 = gko::HipExecutor::create(0, omp, + std::make_shared()); #endif } diff --git a/hip/test/utils.hip.hpp b/hip/test/utils.hip.hpp index 9337da14139..bf7073cf9a1 100644 --- a/hip/test/utils.hip.hpp +++ b/hip/test/utils.hip.hpp @@ -38,6 +38,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include "hip/base/device.hpp" @@ -60,8 +61,9 @@ class HipTestFixture : public ::testing::Test { HipTestFixture() : ref(gko::ReferenceExecutor::create()), #ifdef GKO_TEST_NONDEFAULT_STREAM + stream(0), exec(gko::HipExecutor::create( - 0, ref, false, gko::default_hip_alloc_mode, stream.get())) + 0, ref, std::make_shared(), stream.get())) #else exec(gko::HipExecutor::create(0, ref)) #endif diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 00d1049b555..1bbcb458813 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -47,7 +47,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include +#include #include #include #include @@ -121,31 +123,6 @@ constexpr allocation_mode default_hip_alloc_mode = } // namespace gko -// after intel/llvm September'22 release, which uses major version 6, they -// introduce another inline namespace _V1. -#if GINKGO_DPCPP_MAJOR_VERSION >= 6 -namespace sycl { -inline namespace _V1 { - - -class queue; - - -} // namespace _V1 -} // namespace sycl -#else // GINKGO_DPCPP_MAJOR_VERSION < 6 -inline namespace cl { -namespace sycl { - - -class queue; - - -} // namespace sycl -} // namespace cl -#endif - - /** * The enum class is for the dpcpp queue property. It's legal to use a binary * or(|) operation to combine several properties. @@ -170,24 +147,6 @@ GKO_ATTRIBUTES GKO_INLINE dpcpp_queue_property operator|(dpcpp_queue_property a, } -struct cublasContext; - -struct cusparseContext; - -struct CUstream_st; - -struct hipblasContext; - -struct hipsparseContext; - -#if GINKGO_HIP_PLATFORM_HCC -struct ihipStream_t; -#define GKO_HIP_STREAM_STRUCT ihipStream_t -#else -#define GKO_HIP_STREAM_STRUCT CUstream_st -#endif - - namespace gko { @@ -1348,26 +1307,14 @@ class EnableDeviceReset { * * @param device_reset whether to allow a device reset or not */ - void set_device_reset(bool device_reset) { device_reset_ = device_reset; } + void set_device_reset(bool device_reset) {} /** * Returns the current status of the device reset boolean for this executor. * * @return the current status of the device reset boolean for this executor. */ - bool get_device_reset() { return device_reset_; } - -protected: - /** - * Instantiate an EnableDeviceReset class - * - * @param device_reset the starting device_reset status. Defaults to false. - */ - EnableDeviceReset(bool device_reset = false) : device_reset_{device_reset} - {} - -private: - bool device_reset_{}; + bool get_device_reset() { return false; } }; @@ -1404,9 +1351,11 @@ class OmpExecutor : public detail::ExecutorBase, /** * Creates a new OmpExecutor. */ - static std::shared_ptr create() + static std::shared_ptr create( + std::shared_ptr alloc = + std::make_shared()) { - return std::shared_ptr(new OmpExecutor()); + return std::shared_ptr(new OmpExecutor(std::move(alloc))); } std::shared_ptr get_master() noexcept override; @@ -1428,7 +1377,8 @@ class OmpExecutor : public detail::ExecutorBase, scoped_device_id_guard get_scoped_device_id_guard() const override; protected: - OmpExecutor() + OmpExecutor(std::shared_ptr alloc) + : alloc_{std::move(alloc)} { this->OmpExecutor::populate_exec_info(machine_topology::get_instance()); } @@ -1450,6 +1400,8 @@ class OmpExecutor : public detail::ExecutorBase, GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(CudaExecutor, false); bool verify_memory_to(const DpcppExecutor* dest_exec) const override; + + std::shared_ptr alloc_; }; @@ -1469,9 +1421,12 @@ using DefaultExecutor = OmpExecutor; */ class ReferenceExecutor : public OmpExecutor { public: - static std::shared_ptr create() + static std::shared_ptr create( + std::shared_ptr alloc = + std::make_shared()) { - return std::shared_ptr(new ReferenceExecutor()); + return std::shared_ptr( + new ReferenceExecutor(std::move(alloc))); } scoped_device_id_guard get_scoped_device_id_guard() const override @@ -1488,7 +1443,8 @@ class ReferenceExecutor : public OmpExecutor { } protected: - ReferenceExecutor() + ReferenceExecutor(std::shared_ptr alloc) + : OmpExecutor{std::move(alloc)} { this->ReferenceExecutor::populate_exec_info( machine_topology::get_instance()); @@ -1543,15 +1499,32 @@ class CudaExecutor : public detail::ExecutorBase, * @param device_id the CUDA device id of this device * @param master an executor on the host that is used to invoke the device * kernels - * @param device_reset whether to reset the device after the object exits - * the scope. + * @param device_reset this option no longer has any effect. * @param alloc_mode the allocation mode that the executor should operate * on. See @allocation_mode for more details + * @param stream the stream to execute operations on. + */ + [[deprecated( + "device_reset is deprecated entirely, call cudaDeviceReset directly. " + "alloc_mode was replaced by the Allocator type " + "hierarchy.")]] static std::shared_ptr + create(int device_id, std::shared_ptr master, bool device_reset, + allocation_mode alloc_mode = default_cuda_alloc_mode, + CUstream_st* stream = nullptr); + + /** + * Creates a new CudaExecutor with a custom allocator and device stream. + * + * @param device_id the CUDA device id of this device + * @param master an executor on the host that is used to invoke the device + * kernels. + * @param alloc the allocator to use for device memory allocations. + * @param stream the stream to execute operations on. */ static std::shared_ptr create( int device_id, std::shared_ptr master, - bool device_reset = false, - allocation_mode alloc_mode = default_cuda_alloc_mode, + std::shared_ptr alloc = + std::make_shared(), CUstream_st* stream = nullptr); std::shared_ptr get_master() noexcept override; @@ -1672,26 +1645,15 @@ class CudaExecutor : public detail::ExecutorBase, void init_handles(); CudaExecutor(int device_id, std::shared_ptr master, - bool device_reset = false, - allocation_mode alloc_mode = default_cuda_alloc_mode, - CUstream_st* stream = nullptr) - : EnableDeviceReset{device_reset}, - master_(master), - alloc_mode_{alloc_mode}, - stream_{stream} + std::shared_ptr alloc, CUstream_st* stream) + : alloc_{std::move(alloc)}, master_(master), stream_{stream} { this->get_exec_info().device_id = device_id; this->get_exec_info().num_computing_units = 0; this->get_exec_info().num_pu_per_cu = 0; this->CudaExecutor::populate_exec_info( machine_topology::get_instance()); - - // it only gets attribute from device, so it should not be affected by - // DeviceReset. this->set_gpu_property(); - // increase the number of executor before any operations may be affected - // by DeviceReset. - increase_num_execs(this->get_exec_info().device_id); this->init_handles(); } @@ -1711,12 +1673,6 @@ class CudaExecutor : public detail::ExecutorBase, bool verify_memory_to(const CudaExecutor* dest_exec) const override; - static void increase_num_execs(unsigned device_id); - - static void decrease_num_execs(unsigned device_id); - - static unsigned get_num_execs(unsigned device_id); - void populate_exec_info(const machine_topology* mach_topo) override; private: @@ -1726,45 +1682,8 @@ class CudaExecutor : public detail::ExecutorBase, using handle_manager = std::unique_ptr>; handle_manager cublas_handle_; handle_manager cusparse_handle_; + std::shared_ptr alloc_; CUstream_st* stream_; - - allocation_mode alloc_mode_; -}; - - -/** - * An RAII wrapper for a custom CUDA stream. - * The stream will be created on construction and destroyed when the lifetime of - * the wrapper ends. - */ -class cuda_stream { -public: - /** Creates a new custom CUDA stream. */ - cuda_stream(int device_id = 0); - - /** Destroys the custom CUDA stream, if it wasn't moved-from already. */ - ~cuda_stream(); - - cuda_stream(const cuda_stream&) = delete; - - /** Move-constructs from an existing stream, which will be emptied. */ - cuda_stream(cuda_stream&&); - - cuda_stream& operator=(const cuda_stream&) = delete; - - /** Move-assigns from an existing stream, which will be emptied. */ - cuda_stream& operator=(cuda_stream&&) = delete; - - /** - * Returns the native CUDA stream handle. - * In a moved-from cuda_stream, this will return nullptr. - */ - CUstream_st* get() const; - -private: - CUstream_st* stream_; - - int device_id_; }; @@ -1798,10 +1717,15 @@ class HipExecutor : public detail::ExecutorBase, * @param alloc_mode the allocation mode that the executor should operate * on. See @allocation_mode for more details */ + [[deprecated("")]] static std::shared_ptr create( + int device_id, std::shared_ptr master, bool device_reset, + allocation_mode alloc_mode = default_hip_alloc_mode, + GKO_HIP_STREAM_STRUCT* stream = nullptr); + static std::shared_ptr create( int device_id, std::shared_ptr master, - bool device_reset = false, - allocation_mode alloc_mode = default_hip_alloc_mode, + std::shared_ptr alloc = + std::make_shared(), GKO_HIP_STREAM_STRUCT* stream = nullptr); std::shared_ptr get_master() noexcept override; @@ -1916,25 +1840,15 @@ class HipExecutor : public detail::ExecutorBase, void init_handles(); HipExecutor(int device_id, std::shared_ptr master, - bool device_reset = false, - allocation_mode alloc_mode = default_hip_alloc_mode, - GKO_HIP_STREAM_STRUCT* stream = nullptr) - : EnableDeviceReset{device_reset}, - master_(master), - alloc_mode_(alloc_mode), - stream_{stream} + std::shared_ptr alloc, + GKO_HIP_STREAM_STRUCT* stream) + : master_{std::move(master)}, alloc_{std::move(alloc)}, stream_{stream} { this->get_exec_info().device_id = device_id; this->get_exec_info().num_computing_units = 0; this->get_exec_info().num_pu_per_cu = 0; this->HipExecutor::populate_exec_info(machine_topology::get_instance()); - - // it only gets attribute from device, so it should not be affected by - // DeviceReset. this->set_gpu_property(); - // increase the number of executor before any operations may be affected - // by DeviceReset. - increase_num_execs(this->get_exec_info().device_id); this->init_handles(); } @@ -1954,12 +1868,6 @@ class HipExecutor : public detail::ExecutorBase, bool verify_memory_to(const HipExecutor* dest_exec) const override; - static void increase_num_execs(int device_id); - - static void decrease_num_execs(int device_id); - - static int get_num_execs(int device_id); - void populate_exec_info(const machine_topology* mach_topo) override; private: @@ -1969,45 +1877,8 @@ class HipExecutor : public detail::ExecutorBase, using handle_manager = std::unique_ptr>; handle_manager hipblas_handle_; handle_manager hipsparse_handle_; - - allocation_mode alloc_mode_; - GKO_HIP_STREAM_STRUCT* stream_; -}; - - -/** - * An RAII wrapper for a custom HIP stream. - * The stream will be created on construction and destroyed when the lifetime of - * the wrapper ends. - */ -class hip_stream { -public: - /** Creates a new custom HIP stream. */ - hip_stream(int device_id = 0); - - /** Destroys the custom HIP stream, if it wasn't moved-from already. */ - ~hip_stream(); - - hip_stream(const hip_stream&) = delete; - - /** Move-constructs from an existing stream, which will be emptied. */ - hip_stream(hip_stream&&); - - hip_stream& operator=(const hip_stream&) = delete; - - /** Move-assigns from an existing stream, which will be emptied. */ - hip_stream& operator=(hip_stream&&) = delete; - - /** - * Returns the native HIP stream handle. - * In a moved-from hip_stream, this will return nullptr. - */ - GKO_HIP_STREAM_STRUCT* get() const; - -private: + std::shared_ptr alloc_; GKO_HIP_STREAM_STRUCT* stream_; - - int device_id_; }; @@ -2043,6 +1914,28 @@ class DpcppExecutor : public detail::ExecutorBase, std::string device_type = "all", dpcpp_queue_property property = dpcpp_queue_property::in_order); + /** + * Creates a new DpcppExecutor from an existing SYCL queue. + * + * @param queue the DPCPP device id of this device + * @param master an executor on the host that is used to invoke the device + * kernels + */ + static std::shared_ptr create( + sycl::queue* queue, std::shared_ptr master); + + /** + * Creates a new DpcppExecutor from an existing SYCL queue. + * + * @param queue the DPCPP device id of this device + * @param master an executor on the host that is used to invoke the device + * kernels + * @param alloc the allocator used for memory allocation + */ + static std::shared_ptr create( + sycl::queue* queue, std::shared_ptr master, + std::shared_ptr alloc); + std::shared_ptr get_master() noexcept override; std::shared_ptr get_master() const noexcept override; diff --git a/include/ginkgo/core/base/fwd_defs.hpp b/include/ginkgo/core/base/fwd_defs.hpp new file mode 100644 index 00000000000..03c3a2d3b09 --- /dev/null +++ b/include/ginkgo/core/base/fwd_defs.hpp @@ -0,0 +1,82 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_PUBLIC_CORE_BASE_FWD_DEFS_HPP_ +#define GKO_PUBLIC_CORE_BASE_FWD_DEFS_HPP_ + +#include + + +struct cublasContext; + +struct cusparseContext; + +struct CUstream_st; + +struct hipblasContext; + +struct hipsparseContext; + +#if GINKGO_HIP_PLATFORM_HCC +struct ihipStream_t; +#define GKO_HIP_STREAM_STRUCT ihipStream_t +#else +#define GKO_HIP_STREAM_STRUCT CUstream_st +#endif + + +// after intel/llvm September'22 release, which uses major version 6, they +// introduce another inline namespace _V1. +#if GINKGO_DPCPP_MAJOR_VERSION >= 6 +namespace sycl { +inline namespace _V1 { + + +class queue; + + +} // namespace _V1 +} // namespace sycl +#else // GINKGO_DPCPP_MAJOR_VERSION < 6 +inline namespace cl { +namespace sycl { + + +class queue; + + +} // namespace sycl +} // namespace cl +#endif + + +#endif // GKO_PUBLIC_CORE_BASE_FWD_DEFS_HPP_ diff --git a/include/ginkgo/core/base/memory.hpp b/include/ginkgo/core/base/memory.hpp new file mode 100644 index 00000000000..ec25920dcea --- /dev/null +++ b/include/ginkgo/core/base/memory.hpp @@ -0,0 +1,211 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_PUBLIC_CORE_BASE_MEMORY_HPP_ +#define GKO_PUBLIC_CORE_BASE_MEMORY_HPP_ + + +#include +#include + + +namespace gko { + + +/** + * Provides generic allocation and deallocation functionality to be used by an + * Executor. + */ +class Allocator { +public: + virtual ~Allocator() = default; + + virtual void* allocate(size_type num_bytes) const = 0; + + virtual void deallocate(void* ptr) const = 0; +}; + + +/** + * Implement this interface to provide an allocator for OmpExecutor or + * ReferenceExecutor. + */ +class CpuAllocatorBase : public Allocator {}; + + +/** + * Implement this interface to provide an allocator for CudaExecutor. + */ +class CudaAllocatorBase : public Allocator {}; + + +/** + * Implement this interface to provide an allocator for HipExecutor. + */ +class HipAllocatorBase : public Allocator {}; + + +/** + * Implement this interface to provide an allocator for DpcppExecutor. + */ +class DpcppAllocatorBase : public Allocator { +public: + DpcppAllocatorBase(sycl::queue* queue); + +protected: + virtual void* allocate_impl(sycl::queue* queue, + size_type num_bytes) const = 0; + + virtual void deallocate_impl(sycl::queue* queue, void* ptr) const = 0; + +private: + sycl::queue* queue_; +}; + + +/** + * Allocator using new/delete. + */ +class CpuAllocator : public CpuAllocatorBase { +public: + void* allocate(size_type num_bytes) const override; + + void deallocate(void* ptr) const override; +}; + + +/** + * Allocator using cudaMalloc. + */ +class CudaAllocator : public CudaAllocatorBase { +public: + void* allocate(size_type num_bytes) const override; + + void deallocate(void* ptr) const override; +}; + + +/* + * Allocator using cudaMallocAsync. + */ +class CudaAsyncAllocator : public CudaAllocatorBase { +public: + void* allocate(size_type num_bytes) const override; + + void deallocate(void* ptr) const override; + + CudaAsyncAllocator(CUstream_st* stream); + +private: + CUstream_st* stream_; +}; + + +/* + * Allocator using cudaMallocManaged + */ +class CudaUnifiedAllocator : public CudaAllocatorBase, public CpuAllocatorBase { +public: + void* allocate(size_type num_bytes) const override; + + void deallocate(void* ptr) const override; + + CudaUnifiedAllocator(int device_id); + + CudaUnifiedAllocator(int device_id, unsigned int flags); + +private: + int device_id_; + unsigned int flags_; +}; + + +/* + * Allocator using cudaMallocHost. + */ +class CudaHostAllocator : public CudaAllocatorBase, public CpuAllocatorBase { +public: + void* allocate(size_type num_bytes) const override; + + void deallocate(void* ptr) const override; + + CudaHostAllocator(int device_id); + +private: + int device_id_; +}; + + +/* + * Allocator using hipMalloc. + */ +class HipAllocator : public HipAllocatorBase { +public: + void* allocate(size_type num_bytes) const override; + + void deallocate(void* ptr) const override; +}; + + +/* + * Allocator using sycl::malloc_device. + */ +class DpcppAllocator : public DpcppAllocatorBase { +public: + using DpcppAllocatorBase::DpcppAllocatorBase; + +protected: + void* allocate_impl(sycl::queue* queue, size_type num_bytes) const override; + + void deallocate_impl(sycl::queue* queue, void* ptr) const override; +}; + + +/* + * Allocator using sycl::malloc_shared. + */ +class DpcppUnifiedAllocator : public DpcppAllocatorBase, + public CpuAllocatorBase { +public: + using DpcppAllocatorBase::DpcppAllocatorBase; + +protected: + void* allocate_impl(sycl::queue* queue, size_type num_bytes) const override; + + void deallocate_impl(sycl::queue* queue, void* ptr) const override; +}; + + +} // namespace gko + + +#endif // GKO_PUBLIC_CORE_BASE_MEMORY_HPP_ diff --git a/include/ginkgo/core/base/stream.hpp b/include/ginkgo/core/base/stream.hpp new file mode 100644 index 00000000000..4bb4aeecf9e --- /dev/null +++ b/include/ginkgo/core/base/stream.hpp @@ -0,0 +1,124 @@ +/************************************************************* +Copyright (c) 2017-2023, the Ginkgo authors +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*************************************************************/ + +#ifndef GKO_PUBLIC_CORE_BASE_STREAM_HPP_ +#define GKO_PUBLIC_CORE_BASE_STREAM_HPP_ + + +#include + + +namespace gko { + + +/** + * An RAII wrapper for a custom CUDA stream. + * The stream will be created on construction and destroyed when the lifetime of + * the wrapper ends. + */ +class cuda_stream { +public: + /** Creates an empty stream wrapper, representing the default stream. */ + cuda_stream(); + + /** Creates a new custom CUDA stream. */ + cuda_stream(int device_id); + + /** Destroys the custom CUDA stream, if it isn't empty. */ + ~cuda_stream(); + + cuda_stream(const cuda_stream&) = delete; + + /** Move-constructs from an existing stream, which will be emptied. */ + cuda_stream(cuda_stream&&); + + cuda_stream& operator=(const cuda_stream&) = delete; + + /** Move-assigns from an existing stream, which will be emptied. */ + cuda_stream& operator=(cuda_stream&&) = delete; + + /** + * Returns the native CUDA stream handle. + * In an empty cuda_stream, this will return nullptr. + */ + CUstream_st* get() const; + +private: + CUstream_st* stream_; + + int device_id_; +}; + + +/** + * An RAII wrapper for a custom HIP stream. + * The stream will be created on construction and destroyed when the lifetime of + * the wrapper ends. + */ +class hip_stream { +public: + /** Creates an empty stream wrapper, representing the default stream. */ + hip_stream(); + + /** Creates a new custom HIP stream. */ + hip_stream(int device_id); + + /** Destroys the custom HIP stream, if it isn't empty. */ + ~hip_stream(); + + hip_stream(const hip_stream&) = delete; + + /** Move-constructs from an existing stream, which will be emptied. */ + hip_stream(hip_stream&&); + + hip_stream& operator=(const hip_stream&) = delete; + + /** Move-assigns from an existing stream, which will be emptied. */ + hip_stream& operator=(hip_stream&&) = delete; + + /** + * Returns the native HIP stream handle. + * In an empty hip_stream, this will return nullptr. + */ + GKO_HIP_STREAM_STRUCT* get() const; + +private: + GKO_HIP_STREAM_STRUCT* stream_; + + int device_id_; +}; + + +} // namespace gko + + +#endif // GKO_PUBLIC_CORE_BASE_EXECUTOR_HPP_ diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index 3e788ed65ee..4c460f2e1e9 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -48,6 +48,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include @@ -55,6 +56,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include @@ -65,6 +67,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #include #include +#include #include #include #include diff --git a/test/utils/executor.hpp b/test/utils/executor.hpp index 25482cf18c8..33e6258fbbd 100644 --- a/test/utils/executor.hpp +++ b/test/utils/executor.hpp @@ -44,6 +44,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include + + #ifdef GKO_COMPILING_CUDA #include "cuda/base/device.hpp" @@ -106,8 +109,8 @@ inline void init_executor(std::shared_ptr ref, if (gko::CudaExecutor::get_num_devices() == 0) { throw std::runtime_error{"No suitable CUDA devices"}; } - exec = gko::CudaExecutor::create(0, ref, false, - gko::default_cuda_alloc_mode, stream); + exec = gko::CudaExecutor::create( + 0, ref, std::make_shared(stream), stream); } } @@ -119,8 +122,8 @@ inline void init_executor(std::shared_ptr ref, if (gko::HipExecutor::get_num_devices() == 0) { throw std::runtime_error{"No suitable HIP devices"}; } - exec = gko::HipExecutor::create(0, ref, false, gko::default_hip_alloc_mode, - stream); + exec = gko::HipExecutor::create( + 0, ref, std::make_shared(), stream); } @@ -146,7 +149,13 @@ class CommonTestFixture : public ::testing::Test { #endif using index_type = int; - CommonTestFixture() : ref{gko::ReferenceExecutor::create()} + CommonTestFixture() + : +#if defined(GKO_TEST_NONDEFAULT_STREAM) && \ + (defined(GKO_COMPILING_CUDA) || defined(GKO_COMPILING_HIP)) + stream{0}, +#endif + ref{gko::ReferenceExecutor::create()} { #if defined(GKO_TEST_NONDEFAULT_STREAM) && \ (defined(GKO_COMPILING_CUDA) || defined(GKO_COMPILING_HIP)) diff --git a/test/utils/mpi/executor.hpp b/test/utils/mpi/executor.hpp index 59c3f1e3f3b..d8c94e01804 100644 --- a/test/utils/mpi/executor.hpp +++ b/test/utils/mpi/executor.hpp @@ -44,6 +44,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include inline void init_executor(std::shared_ptr, @@ -71,7 +72,7 @@ inline void init_executor(std::shared_ptr ref, exec = gko::CudaExecutor::create( gko::experimental::mpi::map_rank_to_device_id( MPI_COMM_WORLD, gko::CudaExecutor::get_num_devices()), - ref, false, gko::default_cuda_alloc_mode, stream); + ref, std::make_shared(), stream); } } @@ -86,7 +87,7 @@ inline void init_executor(std::shared_ptr ref, exec = gko::HipExecutor::create( gko::experimental::mpi::map_rank_to_device_id( MPI_COMM_WORLD, gko::HipExecutor::get_num_devices()), - ref, false, gko::default_hip_alloc_mode, stream); + ref, std::make_shared(), stream); }