From 9fd6482ce850497e84cabc7d61f423d024624ba0 Mon Sep 17 00:00:00 2001 From: Tobias Ribizel Date: Thu, 6 Jul 2023 15:45:46 +0200 Subject: [PATCH] update interface and remove DPC++ allocator --- core/base/memory.cpp | 4 +- core/device_hooks/cuda_hooks.cpp | 32 ++++-- core/device_hooks/dpcpp_hooks.cpp | 28 ----- core/device_hooks/hip_hooks.cpp | 51 ++++++++- core/test/base/executor.cpp | 2 +- cuda/base/executor.cpp | 9 +- cuda/base/memory.cpp | 43 ++++++-- dpcpp/CMakeLists.txt | 1 - dpcpp/base/memory.dp.cpp | 85 --------------- dpcpp/test/base/CMakeLists.txt | 1 - dpcpp/test/base/memory.dp.cpp | 98 ----------------- hip/base/executor.hip.cpp | 9 +- hip/base/memory.hip.cpp | 123 ++++++++++++++++++++- include/ginkgo/core/base/executor.hpp | 33 ++---- include/ginkgo/core/base/memory.hpp | 151 ++++++++++++++++++-------- 15 files changed, 348 insertions(+), 322 deletions(-) delete mode 100644 dpcpp/base/memory.dp.cpp delete mode 100644 dpcpp/test/base/memory.dp.cpp diff --git a/core/base/memory.cpp b/core/base/memory.cpp index 4e9f0b7e24a..b6c6f8f265c 100644 --- a/core/base/memory.cpp +++ b/core/base/memory.cpp @@ -42,7 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { -void* CpuAllocator::allocate(size_type num_bytes) const +void* CpuAllocator::allocate(size_type num_bytes) { auto ptr = ::operator new (num_bytes, std::nothrow_t{}); GKO_ENSURE_ALLOCATED(ptr, "cpu", num_bytes); @@ -50,7 +50,7 @@ void* CpuAllocator::allocate(size_type num_bytes) const } -void CpuAllocator::deallocate(void* ptr) const +void CpuAllocator::deallocate(void* ptr) { ::operator delete (ptr, std::nothrow_t{}); } diff --git a/core/device_hooks/cuda_hooks.cpp b/core/device_hooks/cuda_hooks.cpp index f8489908cc9..03ab12deb46 100644 --- a/core/device_hooks/cuda_hooks.cpp +++ b/core/device_hooks/cuda_hooks.cpp @@ -54,43 +54,55 @@ version version_info::get_cuda_version() noexcept } -void* CudaAllocator::allocate(size_type num_bytes) const GKO_NOT_COMPILED(cuda); +void* CudaAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(cuda); -void CudaAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda); +void CudaAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(cuda); CudaAsyncAllocator::CudaAsyncAllocator(CUstream_st* stream) GKO_NOT_COMPILED(cuda); -void* CudaAsyncAllocator::allocate(size_type num_bytes) const - GKO_NOT_COMPILED(cuda); +void* CudaAsyncAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(cuda); + +void CudaAsyncAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(cuda); -void CudaAsyncAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda); + +bool CudaAsyncAllocator::check_environment(int device_id, + CUstream_st* stream) const + GKO_NOT_COMPILED(cuda); CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id, unsigned int flags) GKO_NOT_COMPILED(cuda); -void* CudaUnifiedAllocator::allocate(size_type num_bytes) const +void* CudaUnifiedAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(cuda); -void CudaUnifiedAllocator::deallocate(void* dev_ptr) const +void CudaUnifiedAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(cuda); + + +bool CudaUnifiedAllocator::check_environment(int device_id, + CUstream_st* stream) const GKO_NOT_COMPILED(cuda); CudaHostAllocator::CudaHostAllocator(int device_id) GKO_NOT_COMPILED(cuda); -void* CudaHostAllocator::allocate(size_type num_bytes) const - GKO_NOT_COMPILED(cuda); +void* CudaHostAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(cuda); + +void CudaHostAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(cuda); -void CudaHostAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda); + +bool CudaHostAllocator::check_environment(int device_id, + CUstream_st* stream) const + GKO_NOT_COMPILED(cuda); std::shared_ptr CudaExecutor::create( diff --git a/core/device_hooks/dpcpp_hooks.cpp b/core/device_hooks/dpcpp_hooks.cpp index 1981c712872..532e9c55bbe 100644 --- a/core/device_hooks/dpcpp_hooks.cpp +++ b/core/device_hooks/dpcpp_hooks.cpp @@ -53,34 +53,6 @@ version version_info::get_dpcpp_version() noexcept } -DpcppAllocatorBase::DpcppAllocatorBase(sycl::queue*) GKO_NOT_COMPILED(dpcpp); - - -void* DpcppAllocatorBase::allocate(size_type num_bytes) const - GKO_NOT_COMPILED(dpcpp); - - -void DpcppAllocatorBase::deallocate(void* ptr) const GKO_NOT_COMPILED(dpcpp); - - -void* DpcppAllocator::allocate_impl(sycl::queue* queue, - size_type num_bytes) const - GKO_NOT_COMPILED(dpcpp); - - -void DpcppAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const - GKO_NOT_COMPILED(dpcpp); - - -void* DpcppUnifiedAllocator::allocate_impl(sycl::queue* queue, - size_type num_bytes) const - GKO_NOT_COMPILED(dpcpp); - - -void DpcppUnifiedAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const - GKO_NOT_COMPILED(dpcpp); - - std::shared_ptr DpcppExecutor::create( int device_id, std::shared_ptr master, std::string device_type, dpcpp_queue_property property) diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp index ba7563f1ef0..dec1de15933 100644 --- a/core/device_hooks/hip_hooks.cpp +++ b/core/device_hooks/hip_hooks.cpp @@ -55,10 +55,54 @@ version version_info::get_hip_version() noexcept } -void* HipAllocator::allocate(size_type num_bytes) const GKO_NOT_COMPILED(hip); +void* HipAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip); -void HipAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(hip); +void HipAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip); + + +HipAsyncAllocator::HipAsyncAllocator(GKO_HIP_STREAM_STRUCT* stream) + GKO_NOT_COMPILED(hip); + + +void* HipAsyncAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip); + + +void HipAsyncAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip); + + +bool HipAsyncAllocator::check_environment(int device_id, + GKO_HIP_STREAM_STRUCT* stream) const + GKO_NOT_COMPILED(hip); + + +HipUnifiedAllocator::HipUnifiedAllocator(int device_id, unsigned int flags) + GKO_NOT_COMPILED(hip); + + +void* HipUnifiedAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip); + + +void HipUnifiedAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip); + + +bool HipUnifiedAllocator::check_environment(int device_id, + GKO_HIP_STREAM_STRUCT* stream) const + GKO_NOT_COMPILED(hip); + + +HipHostAllocator::HipHostAllocator(int device_id) GKO_NOT_COMPILED(hip); + + +void* HipHostAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip); + + +void HipHostAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip); + + +bool HipHostAllocator::check_environment(int device_id, + GKO_HIP_STREAM_STRUCT* stream) const + GKO_NOT_COMPILED(hip); std::shared_ptr HipExecutor::create( @@ -76,8 +120,7 @@ std::shared_ptr HipExecutor::create( std::shared_ptr alloc, GKO_HIP_STREAM_STRUCT* stream) { return std::shared_ptr( - new HipExecutor(device_id, std::move(master), - std::make_shared(), stream)); + new HipExecutor(device_id, std::move(master), alloc, stream)); } diff --git a/core/test/base/executor.cpp b/core/test/base/executor.cpp index 94e7bc02d79..13cba09e2b6 100644 --- a/core/test/base/executor.cpp +++ b/core/test/base/executor.cpp @@ -386,7 +386,7 @@ TEST(Executor, CanVerifyMemory) struct MockAllocator : gko::CpuAllocator { - void deallocate(void* ptr) const noexcept override + void deallocate(void* ptr) noexcept override { called_free = true; CpuAllocator::deallocate(ptr); diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp index f6e838dd2dd..faf90037a0f 100644 --- a/cuda/base/executor.cpp +++ b/cuda/base/executor.cpp @@ -80,9 +80,8 @@ std::shared_ptr CudaExecutor::create( int device_id, std::shared_ptr master, bool device_reset, allocation_mode alloc_mode, cudaStream_t stream) { - return std::shared_ptr( - new CudaExecutor(device_id, std::move(master), - allocator_from_mode(device_id, alloc_mode), stream)); + return create(device_id, master, allocator_from_mode(device_id, alloc_mode), + stream); } @@ -90,6 +89,10 @@ std::shared_ptr CudaExecutor::create( int device_id, std::shared_ptr master, std::shared_ptr alloc, cudaStream_t stream) { + if (!alloc->check_environment(device_id, stream)) { + throw Error{__FILE__, __LINE__, + "Allocator uses incorrect stream or device ID."}; + } return std::shared_ptr(new CudaExecutor( device_id, std::move(master), std::move(alloc), stream)); } diff --git a/cuda/base/memory.cpp b/cuda/base/memory.cpp index afc1f9f62fa..08c64c0ba05 100644 --- a/cuda/base/memory.cpp +++ b/cuda/base/memory.cpp @@ -82,7 +82,7 @@ namespace gko { #endif -void* CudaAllocator::allocate(size_type num_bytes) const +void* CudaAllocator::allocate(size_type num_bytes) { void* ptr{}; GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(cudaMalloc(&ptr, num_bytes), @@ -91,7 +91,7 @@ void* CudaAllocator::allocate(size_type num_bytes) const } -void CudaAllocator::deallocate(void* ptr) const +void CudaAllocator::deallocate(void* ptr) { GKO_EXIT_ON_CUDA_ERROR(cudaFree(ptr)); } @@ -103,7 +103,7 @@ void CudaAllocator::deallocate(void* ptr) const CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{stream} {} -void* CudaAsyncAllocator::allocate(size_type num_bytes) const +void* CudaAsyncAllocator::allocate(size_type num_bytes) { void* ptr{}; GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS( @@ -112,7 +112,7 @@ void* CudaAsyncAllocator::allocate(size_type num_bytes) const } -void CudaAsyncAllocator::deallocate(void* ptr) const +void CudaAsyncAllocator::deallocate(void* ptr) { GKO_EXIT_ON_CUDA_ERROR(cudaFreeAsync(ptr, stream_)); } @@ -121,10 +121,10 @@ void CudaAsyncAllocator::deallocate(void* ptr) const #else // Fall back to regular allocation -CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{} {} +CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{stream} {} -void* CudaAsyncAllocator::allocate(size_type num_bytes) const +void* CudaAsyncAllocator::allocate(size_type num_bytes) { void* ptr{}; GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(cudaMalloc(&ptr, num_bytes), @@ -133,7 +133,7 @@ void* CudaAsyncAllocator::allocate(size_type num_bytes) const } -void CudaAsyncAllocator::deallocate(void* ptr) const +void CudaAsyncAllocator::deallocate(void* ptr) { GKO_EXIT_ON_CUDA_ERROR(cudaFree(ptr)); } @@ -142,6 +142,13 @@ void CudaAsyncAllocator::deallocate(void* ptr) const #endif +bool CudaAsyncAllocator::check_environment(int device_id, + CUstream_st* stream) const +{ + return stream == stream_; +} + + CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id) : CudaUnifiedAllocator{device_id, cudaMemAttachGlobal} {} @@ -152,7 +159,7 @@ CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id, unsigned int flags) {} -void* CudaUnifiedAllocator::allocate(size_type num_bytes) const +void* CudaUnifiedAllocator::allocate(size_type num_bytes) { // we need to set the device ID in case this gets used in a host executor detail::cuda_scoped_device_id_guard g(device_id_); @@ -163,7 +170,7 @@ void* CudaUnifiedAllocator::allocate(size_type num_bytes) const } -void CudaUnifiedAllocator::deallocate(void* ptr) const +void CudaUnifiedAllocator::deallocate(void* ptr) { // we need to set the device ID in case this gets used in a host executor detail::cuda_scoped_device_id_guard g(device_id_); @@ -171,10 +178,17 @@ void CudaUnifiedAllocator::deallocate(void* ptr) const } +bool CudaUnifiedAllocator::check_environment(int device_id, + CUstream_st* stream) const +{ + return device_id == device_id_; +} + + CudaHostAllocator::CudaHostAllocator(int device_id) : device_id_{device_id} {} -void* CudaHostAllocator::allocate(size_type num_bytes) const +void* CudaHostAllocator::allocate(size_type num_bytes) { // we need to set the device ID in case this gets used in a host executor detail::cuda_scoped_device_id_guard g(device_id_); @@ -185,7 +199,7 @@ void* CudaHostAllocator::allocate(size_type num_bytes) const } -void CudaHostAllocator::deallocate(void* ptr) const +void CudaHostAllocator::deallocate(void* ptr) { // we need to set the device ID in case this gets used in a host executor detail::cuda_scoped_device_id_guard g(device_id_); @@ -193,4 +207,11 @@ void CudaHostAllocator::deallocate(void* ptr) const } +bool CudaHostAllocator::check_environment(int device_id, + CUstream_st* stream) const +{ + return device_id == device_id_; +} + + } // namespace gko diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt index 55763ca5525..31b5e0543ba 100644 --- a/dpcpp/CMakeLists.txt +++ b/dpcpp/CMakeLists.txt @@ -10,7 +10,6 @@ target_sources(ginkgo_dpcpp base/executor.dp.cpp base/helper.dp.cpp base/index_set_kernels.dp.cpp - base/memory.dp.cpp base/scoped_device_id.dp.cpp base/timer.dp.cpp base/version.dp.cpp diff --git a/dpcpp/base/memory.dp.cpp b/dpcpp/base/memory.dp.cpp deleted file mode 100644 index 2582fa331a0..00000000000 --- a/dpcpp/base/memory.dp.cpp +++ /dev/null @@ -1,85 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2023, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include - - -#include - - -namespace gko { - - -DpcppAllocatorBase::DpcppAllocatorBase(sycl::queue* queue) : queue_{queue} {} - - -void* DpcppAllocatorBase::allocate(size_type num_bytes) const -{ - return this->allocate_impl(queue_, num_bytes); -} - - -void DpcppAllocatorBase::deallocate(void* ptr) const -{ - this->deallocate_impl(queue_, ptr); -} - - -void* DpcppAllocator::allocate_impl(sycl::queue* queue, - size_type num_bytes) const -{ - return sycl::malloc_device(num_bytes, *queue); -} - - -void DpcppAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const -{ - queue->wait_and_throw(); - sycl::free(ptr, queue->get_context()); -} - - -void* DpcppUnifiedAllocator::allocate_impl(sycl::queue* queue, - size_type num_bytes) -{ - return sycl::malloc_shared(num_bytes, *queue); -} - - -void DpcppUnifiedAllocator::deallocate_impl(sycl::queue* queue, void* ptr) -{ - queue->wait_and_throw(); - sycl::free(ptr, queue->get_context()); -} - - -} // namespace gko diff --git a/dpcpp/test/base/CMakeLists.txt b/dpcpp/test/base/CMakeLists.txt index 5c0ca601f04..bb9c8a75050 100644 --- a/dpcpp/test/base/CMakeLists.txt +++ b/dpcpp/test/base/CMakeLists.txt @@ -3,4 +3,3 @@ ginkgo_create_dpcpp_test(dim3) ginkgo_create_dpcpp_test(kernel_launch) # set correct flags for kernel_launch.hpp target_compile_definitions(dpcpp_test_base_kernel_launch PRIVATE GKO_COMPILING_DPCPP) -ginkgo_create_dpcpp_test(memory) \ No newline at end of file diff --git a/dpcpp/test/base/memory.dp.cpp b/dpcpp/test/base/memory.dp.cpp deleted file mode 100644 index e587660cde3..00000000000 --- a/dpcpp/test/base/memory.dp.cpp +++ /dev/null @@ -1,98 +0,0 @@ -/************************************************************* -Copyright (c) 2017-2023, the Ginkgo authors -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright -notice, this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright -notice, this list of conditions and the following disclaimer in the -documentation and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from -this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED -TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A -PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT -HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT -LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, -DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY -THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*************************************************************/ - -#include - - -#include -#include - - -#include - - -#include -#include -#include - - -#include "dpcpp/test/utils.hpp" - - -namespace { - - -class Memory : public ::testing::Test { -protected: - Memory() - : exec{gko::DpcppExecutor::create(0, gko::OmpExecutor::create())}, - host_exec_with_unified{gko::OmpExecutor::create( - std::make_shared(exec->get_queue()))}, - exec_with_unified{gko::DpcppExecutor::create( - exec->get_queue(), host_exec_with_unified, - std::make_shared(exec->get_queue()))} - {} - - std::shared_ptr exec; - std::shared_ptr host_exec_with_unified; - std::shared_ptr exec_with_unified; -}; - - -TEST_F(Memory, DeviceAllocationWorks) -{ - gko::array data{exec, {1, 2}}; - - GKO_ASSERT_ARRAY_EQ(data, I({1, 2})); -} - - -TEST_F(Memory, UnifiedDeviceAllocationWorks) -{ - gko::array data{exec_with_unified, {1, 2}}; - exec->synchronize(); - - ASSERT_EQ(data.get_const_data()[0], 1); - ASSERT_EQ(data.get_const_data()[1], 2); -} - - -TEST_F(Memory, HostUnifiedAllocationWorks) -{ - gko::array data{host_exec_with_unified, {1, 2}}; - - ASSERT_EQ(data.get_const_data()[0], 1); - ASSERT_EQ(data.get_const_data()[1], 2); -} - - -} // namespace diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp index 6b4b0fd5ddc..2df5c9a4847 100644 --- a/hip/base/executor.hip.cpp +++ b/hip/base/executor.hip.cpp @@ -60,9 +60,8 @@ std::shared_ptr HipExecutor::create( int device_id, std::shared_ptr master, bool device_reset, allocation_mode alloc_mode, hipStream_t stream) { - return std::shared_ptr( - new HipExecutor(device_id, std::move(master), - std::make_shared(), stream)); + return create(device_id, std::move(master), + std::make_shared(), stream); } @@ -70,6 +69,10 @@ std::shared_ptr HipExecutor::create( int device_id, std::shared_ptr master, std::shared_ptr alloc, hipStream_t stream) { + if (!alloc->check_environment(device_id, stream)) { + throw Error{__FILE__, __LINE__, + "Allocator uses incorrect stream or device ID."}; + } return std::shared_ptr(new HipExecutor( device_id, std::move(master), std::move(alloc), stream)); } diff --git a/hip/base/memory.hip.cpp b/hip/base/memory.hip.cpp index f2a8977525f..7acb208173a 100644 --- a/hip/base/memory.hip.cpp +++ b/hip/base/memory.hip.cpp @@ -39,6 +39,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include "hip/base/scoped_device_id.hip.hpp" + + namespace gko { @@ -79,7 +82,7 @@ namespace gko { #endif -void* HipAllocator::allocate(size_type num_bytes) const +void* HipAllocator::allocate(size_type num_bytes) { void* dev_ptr{}; GKO_ASSERT_NO_HIP_ALLOCATION_ERRORS(hipMalloc(&dev_ptr, num_bytes), @@ -88,10 +91,126 @@ void* HipAllocator::allocate(size_type num_bytes) const } -void HipAllocator::deallocate(void* dev_ptr) const +void HipAllocator::deallocate(void* dev_ptr) { GKO_EXIT_ON_HIP_ERROR(hipFree(dev_ptr)); } +#if HIP_VERSION_MAJOR >= 5 + + +HipAsyncAllocator::HipAsyncAllocator(hipStream_t stream) : stream_{stream} {} + + +void* HipAsyncAllocator::allocate(size_type num_bytes) +{ + void* ptr{}; + GKO_ASSERT_NO_HIP_ALLOCATION_ERRORS( + hipMallocAsync(&ptr, num_bytes, stream_), num_bytes); + return ptr; +} + + +void HipAsyncAllocator::deallocate(void* ptr) +{ + GKO_EXIT_ON_HIP_ERROR(hipFreeAsync(ptr, stream_)); +} + + +#else // Fall back to regular allocation + + +HipAsyncAllocator::HipAsyncAllocator(hipStream_t stream) : stream_{stream} {} + + +void* HipAsyncAllocator::allocate(size_type num_bytes) +{ + void* ptr{}; + GKO_ASSERT_NO_HIP_ALLOCATION_ERRORS(hipMalloc(&ptr, num_bytes), num_bytes); + return ptr; +} + + +void HipAsyncAllocator::deallocate(void* ptr) +{ + GKO_EXIT_ON_HIP_ERROR(hipFree(ptr)); +} + + +#endif + + +bool HipAsyncAllocator::check_environment(int device_id, + hipStream_t stream) const +{ + return stream == stream_; +} + + +HipUnifiedAllocator::HipUnifiedAllocator(int device_id) + : HipUnifiedAllocator{device_id, hipMemAttachGlobal} +{} + + +HipUnifiedAllocator::HipUnifiedAllocator(int device_id, unsigned int flags) + : device_id_{device_id}, flags_{flags} +{} + + +void* HipUnifiedAllocator::allocate(size_type num_bytes) +{ + // we need to set the device ID in case this gets used in a host executor + detail::hip_scoped_device_id_guard g(device_id_); + void* ptr{}; + GKO_ASSERT_NO_HIP_ALLOCATION_ERRORS( + hipMallocManaged(&ptr, num_bytes, flags_), num_bytes); + return ptr; +} + + +void HipUnifiedAllocator::deallocate(void* ptr) +{ + // we need to set the device ID in case this gets used in a host executor + detail::hip_scoped_device_id_guard g(device_id_); + GKO_EXIT_ON_HIP_ERROR(hipFree(ptr)); +} + + +bool HipUnifiedAllocator::check_environment(int device_id, + hipStream_t stream) const +{ + return device_id == device_id_; +} + + +HipHostAllocator::HipHostAllocator(int device_id) : device_id_{device_id} {} + + +void* HipHostAllocator::allocate(size_type num_bytes) +{ + // we need to set the device ID in case this gets used in a host executor + detail::hip_scoped_device_id_guard g(device_id_); + void* ptr{}; + GKO_ASSERT_NO_HIP_ALLOCATION_ERRORS(hipHostMalloc(&ptr, num_bytes), + num_bytes); + return ptr; +} + + +void HipHostAllocator::deallocate(void* ptr) +{ + // we need to set the device ID in case this gets used in a host executor + detail::hip_scoped_device_id_guard g(device_id_); + GKO_EXIT_ON_HIP_ERROR(hipFreeHost(ptr)); +} + + +bool HipHostAllocator::check_environment(int device_id, + hipStream_t stream) const +{ + return device_id == device_id_; +} + + } // namespace gko diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 4545b216f86..f033873e392 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -1717,10 +1717,13 @@ class HipExecutor : public detail::ExecutorBase, * @param alloc_mode the allocation mode that the executor should operate * on. See @allocation_mode for more details */ - [[deprecated("")]] static std::shared_ptr create( - int device_id, std::shared_ptr master, bool device_reset, - allocation_mode alloc_mode = default_hip_alloc_mode, - GKO_HIP_STREAM_STRUCT* stream = nullptr); + [[deprecated( + "device_reset is deprecated entirely, call hipDeviceReset directly. " + "alloc_mode was replaced by the Allocator type " + "hierarchy.")]] static std::shared_ptr + create(int device_id, std::shared_ptr master, bool device_reset, + allocation_mode alloc_mode = default_hip_alloc_mode, + GKO_HIP_STREAM_STRUCT* stream = nullptr); static std::shared_ptr create( int device_id, std::shared_ptr master, @@ -1914,28 +1917,6 @@ class DpcppExecutor : public detail::ExecutorBase, std::string device_type = "all", dpcpp_queue_property property = dpcpp_queue_property::in_order); - /** - * Creates a new DpcppExecutor from an existing SYCL queue. - * - * @param queue the DPCPP device id of this device - * @param master an executor on the host that is used to invoke the device - * kernels - */ - static std::shared_ptr create( - sycl::queue* queue, std::shared_ptr master); - - /** - * Creates a new DpcppExecutor from an existing SYCL queue. - * - * @param queue the DPCPP device id of this device - * @param master an executor on the host that is used to invoke the device - * kernels - * @param alloc the allocator used for memory allocation - */ - static std::shared_ptr create( - sycl::queue* queue, std::shared_ptr master, - std::shared_ptr alloc); - std::shared_ptr get_master() noexcept override; std::shared_ptr get_master() const noexcept override; diff --git a/include/ginkgo/core/base/memory.hpp b/include/ginkgo/core/base/memory.hpp index 872a25a9a33..1086c9aacb4 100644 --- a/include/ginkgo/core/base/memory.hpp +++ b/include/ginkgo/core/base/memory.hpp @@ -49,9 +49,9 @@ class Allocator { public: virtual ~Allocator() = default; - virtual void* allocate(size_type num_bytes) const = 0; + virtual void* allocate(size_type num_bytes) = 0; - virtual void deallocate(void* ptr) const = 0; + virtual void deallocate(void* ptr) = 0; }; @@ -65,34 +65,49 @@ class CpuAllocatorBase : public Allocator {}; /** * Implement this interface to provide an allocator for CudaExecutor. */ -class CudaAllocatorBase : public Allocator {}; +class CudaAllocatorBase : public Allocator { + friend class CudaExecutor; - -/** - * Implement this interface to provide an allocator for HipExecutor. - */ -class HipAllocatorBase : public Allocator {}; +protected: + /** + * Checks if the allocator can be used safely with the provided device ID + * and stream. The check is necessary to ensure safe usage of stream-ordered + * allocators and unified shared memory allocators. + * + * @param device_id the device ID the allocator will be used in. + * @param stream the stream the allocator will be used with. + * @return true if and only if the allocator can be used by CudaExecutor in + * the given environment. + */ + virtual bool check_environment(int device_id, CUstream_st* stream) const + { + return true; + } +}; /** - * Implement this interface to provide an allocator for DpcppExecutor. + * Implement this interface to provide an allocator for HipExecutor. */ -class DpcppAllocatorBase : public Allocator { -public: - DpcppAllocatorBase(sycl::queue* queue); - - void* allocate(size_type num_bytes) const final; - - void deallocate(void* ptr) const final; +class HipAllocatorBase : public Allocator { + friend class HipExecutor; protected: - virtual void* allocate_impl(sycl::queue* queue, - size_type num_bytes) const = 0; - - virtual void deallocate_impl(sycl::queue* queue, void* ptr) const = 0; - -private: - sycl::queue* queue_; + /** + * Checks if the allocator can be used safely with the provided device ID + * and stream. The check is necessary to ensure safe usage of stream-ordered + * allocators and unified shared memory allocators. + * + * @param device_id the device ID the allocator will be used in. + * @param stream the stream the allocator will be used with. + * @return true if and only if the allocator can be used by CudaExecutor in + * the given environment. + */ + virtual bool check_environment(int device_id, + GKO_HIP_STREAM_STRUCT* stream) const + { + return true; + } }; @@ -101,9 +116,9 @@ class DpcppAllocatorBase : public Allocator { */ class CpuAllocator : public CpuAllocatorBase { public: - void* allocate(size_type num_bytes) const override; + void* allocate(size_type num_bytes) override; - void deallocate(void* ptr) const override; + void deallocate(void* ptr) override; }; @@ -112,9 +127,9 @@ class CpuAllocator : public CpuAllocatorBase { */ class CudaAllocator : public CudaAllocatorBase { public: - void* allocate(size_type num_bytes) const override; + void* allocate(size_type num_bytes) override; - void deallocate(void* ptr) const override; + void deallocate(void* ptr) override; }; @@ -123,12 +138,14 @@ class CudaAllocator : public CudaAllocatorBase { */ class CudaAsyncAllocator : public CudaAllocatorBase { public: - void* allocate(size_type num_bytes) const override; + void* allocate(size_type num_bytes) override; - void deallocate(void* ptr) const override; + void deallocate(void* ptr) override; CudaAsyncAllocator(CUstream_st* stream); + bool check_environment(int device_id, CUstream_st* stream) const override; + private: CUstream_st* stream_; }; @@ -139,14 +156,17 @@ class CudaAsyncAllocator : public CudaAllocatorBase { */ class CudaUnifiedAllocator : public CudaAllocatorBase, public CpuAllocatorBase { public: - void* allocate(size_type num_bytes) const override; + void* allocate(size_type num_bytes) override; - void deallocate(void* ptr) const override; + void deallocate(void* ptr) override; CudaUnifiedAllocator(int device_id); CudaUnifiedAllocator(int device_id, unsigned int flags); +protected: + bool check_environment(int device_id, CUstream_st* stream) const override; + private: int device_id_; unsigned int flags_; @@ -154,16 +174,19 @@ class CudaUnifiedAllocator : public CudaAllocatorBase, public CpuAllocatorBase { /* - * Allocator using cudaMallocHost. + * Allocator using cudaHostMalloc. */ class CudaHostAllocator : public CudaAllocatorBase, public CpuAllocatorBase { public: - void* allocate(size_type num_bytes) const override; + void* allocate(size_type num_bytes) override; - void deallocate(void* ptr) const override; + void deallocate(void* ptr) override; CudaHostAllocator(int device_id); +protected: + bool check_environment(int device_id, CUstream_st* stream) const override; + private: int device_id_; }; @@ -174,38 +197,72 @@ class CudaHostAllocator : public CudaAllocatorBase, public CpuAllocatorBase { */ class HipAllocator : public HipAllocatorBase { public: - void* allocate(size_type num_bytes) const override; + void* allocate(size_type num_bytes) override; + + void deallocate(void* ptr) override; +}; + + +/* + * Allocator using hipMallocAsync. + */ +class HipAsyncAllocator : public HipAllocatorBase { +public: + void* allocate(size_type num_bytes) override; - void deallocate(void* ptr) const override; + void deallocate(void* ptr) override; + + HipAsyncAllocator(GKO_HIP_STREAM_STRUCT* stream); + +protected: + bool check_environment(int device_id, + GKO_HIP_STREAM_STRUCT* stream) const override; + +private: + GKO_HIP_STREAM_STRUCT* stream_; }; /* - * Allocator using sycl::malloc_device. + * Allocator using hipMallocManaged */ -class DpcppAllocator : public DpcppAllocatorBase { +class HipUnifiedAllocator : public HipAllocatorBase, public CpuAllocatorBase { public: - using DpcppAllocatorBase::DpcppAllocatorBase; + void* allocate(size_type num_bytes) override; + + void deallocate(void* ptr) override; + + HipUnifiedAllocator(int device_id); + + HipUnifiedAllocator(int device_id, unsigned int flags); protected: - void* allocate_impl(sycl::queue* queue, size_type num_bytes) const override; + bool check_environment(int device_id, + GKO_HIP_STREAM_STRUCT* stream) const override; - void deallocate_impl(sycl::queue* queue, void* ptr) const override; +private: + int device_id_; + unsigned int flags_; }; /* - * Allocator using sycl::malloc_shared. + * Allocator using hipHostAlloc. */ -class DpcppUnifiedAllocator : public DpcppAllocatorBase, - public CpuAllocatorBase { +class HipHostAllocator : public HipAllocatorBase, public CpuAllocatorBase { public: - using DpcppAllocatorBase::DpcppAllocatorBase; + void* allocate(size_type num_bytes) override; + + void deallocate(void* ptr) override; + + HipHostAllocator(int device_id); protected: - void* allocate_impl(sycl::queue* queue, size_type num_bytes) const override; + bool check_environment(int device_id, + GKO_HIP_STREAM_STRUCT* stream) const override; - void deallocate_impl(sycl::queue* queue, void* ptr) const override; +private: + int device_id_; };