Skip to content

Commit

Permalink
update interface and remove DPC++ allocator
Browse files Browse the repository at this point in the history
  • Loading branch information
upsj committed Jul 7, 2023
1 parent f2be25c commit 9fd6482
Show file tree
Hide file tree
Showing 15 changed files with 348 additions and 322 deletions.
4 changes: 2 additions & 2 deletions core/base/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,15 +42,15 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
namespace gko {


void* CpuAllocator::allocate(size_type num_bytes) const
void* CpuAllocator::allocate(size_type num_bytes)
{
auto ptr = ::operator new (num_bytes, std::nothrow_t{});
GKO_ENSURE_ALLOCATED(ptr, "cpu", num_bytes);
return ptr;
}


void CpuAllocator::deallocate(void* ptr) const
void CpuAllocator::deallocate(void* ptr)
{
::operator delete (ptr, std::nothrow_t{});
}
Expand Down
32 changes: 22 additions & 10 deletions core/device_hooks/cuda_hooks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -54,43 +54,55 @@ version version_info::get_cuda_version() noexcept
}


void* CudaAllocator::allocate(size_type num_bytes) const GKO_NOT_COMPILED(cuda);
void* CudaAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(cuda);


void CudaAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda);
void CudaAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(cuda);


CudaAsyncAllocator::CudaAsyncAllocator(CUstream_st* stream)
GKO_NOT_COMPILED(cuda);


void* CudaAsyncAllocator::allocate(size_type num_bytes) const
GKO_NOT_COMPILED(cuda);
void* CudaAsyncAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(cuda);


void CudaAsyncAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(cuda);

void CudaAsyncAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda);

bool CudaAsyncAllocator::check_environment(int device_id,
CUstream_st* stream) const
GKO_NOT_COMPILED(cuda);


CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id, unsigned int flags)
GKO_NOT_COMPILED(cuda);


void* CudaUnifiedAllocator::allocate(size_type num_bytes) const
void* CudaUnifiedAllocator::allocate(size_type num_bytes)
GKO_NOT_COMPILED(cuda);


void CudaUnifiedAllocator::deallocate(void* dev_ptr) const
void CudaUnifiedAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(cuda);


bool CudaUnifiedAllocator::check_environment(int device_id,
CUstream_st* stream) const
GKO_NOT_COMPILED(cuda);


CudaHostAllocator::CudaHostAllocator(int device_id) GKO_NOT_COMPILED(cuda);


void* CudaHostAllocator::allocate(size_type num_bytes) const
GKO_NOT_COMPILED(cuda);
void* CudaHostAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(cuda);


void CudaHostAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(cuda);

void CudaHostAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda);

bool CudaHostAllocator::check_environment(int device_id,
CUstream_st* stream) const
GKO_NOT_COMPILED(cuda);


std::shared_ptr<CudaExecutor> CudaExecutor::create(
Expand Down
28 changes: 0 additions & 28 deletions core/device_hooks/dpcpp_hooks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,34 +53,6 @@ version version_info::get_dpcpp_version() noexcept
}


DpcppAllocatorBase::DpcppAllocatorBase(sycl::queue*) GKO_NOT_COMPILED(dpcpp);


void* DpcppAllocatorBase::allocate(size_type num_bytes) const
GKO_NOT_COMPILED(dpcpp);


void DpcppAllocatorBase::deallocate(void* ptr) const GKO_NOT_COMPILED(dpcpp);


void* DpcppAllocator::allocate_impl(sycl::queue* queue,
size_type num_bytes) const
GKO_NOT_COMPILED(dpcpp);


void DpcppAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const
GKO_NOT_COMPILED(dpcpp);


void* DpcppUnifiedAllocator::allocate_impl(sycl::queue* queue,
size_type num_bytes) const
GKO_NOT_COMPILED(dpcpp);


void DpcppUnifiedAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const
GKO_NOT_COMPILED(dpcpp);


std::shared_ptr<DpcppExecutor> DpcppExecutor::create(
int device_id, std::shared_ptr<Executor> master, std::string device_type,
dpcpp_queue_property property)
Expand Down
51 changes: 47 additions & 4 deletions core/device_hooks/hip_hooks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,54 @@ version version_info::get_hip_version() noexcept
}


void* HipAllocator::allocate(size_type num_bytes) const GKO_NOT_COMPILED(hip);
void* HipAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);


void HipAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(hip);
void HipAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);


HipAsyncAllocator::HipAsyncAllocator(GKO_HIP_STREAM_STRUCT* stream)
GKO_NOT_COMPILED(hip);


void* HipAsyncAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);


void HipAsyncAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);


bool HipAsyncAllocator::check_environment(int device_id,
GKO_HIP_STREAM_STRUCT* stream) const
GKO_NOT_COMPILED(hip);


HipUnifiedAllocator::HipUnifiedAllocator(int device_id, unsigned int flags)
GKO_NOT_COMPILED(hip);


void* HipUnifiedAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);


void HipUnifiedAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);


bool HipUnifiedAllocator::check_environment(int device_id,
GKO_HIP_STREAM_STRUCT* stream) const
GKO_NOT_COMPILED(hip);


HipHostAllocator::HipHostAllocator(int device_id) GKO_NOT_COMPILED(hip);


void* HipHostAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);


void HipHostAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);


bool HipHostAllocator::check_environment(int device_id,
GKO_HIP_STREAM_STRUCT* stream) const
GKO_NOT_COMPILED(hip);


std::shared_ptr<HipExecutor> HipExecutor::create(
Expand All @@ -76,8 +120,7 @@ std::shared_ptr<HipExecutor> HipExecutor::create(
std::shared_ptr<HipAllocatorBase> alloc, GKO_HIP_STREAM_STRUCT* stream)
{
return std::shared_ptr<HipExecutor>(
new HipExecutor(device_id, std::move(master),
std::make_shared<HipAllocator>(), stream));
new HipExecutor(device_id, std::move(master), alloc, stream));
}


Expand Down
2 changes: 1 addition & 1 deletion core/test/base/executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -386,7 +386,7 @@ TEST(Executor, CanVerifyMemory)


struct MockAllocator : gko::CpuAllocator {
void deallocate(void* ptr) const noexcept override
void deallocate(void* ptr) noexcept override
{
called_free = true;
CpuAllocator::deallocate(ptr);
Expand Down
9 changes: 6 additions & 3 deletions cuda/base/executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,16 +80,19 @@ std::shared_ptr<CudaExecutor> CudaExecutor::create(
int device_id, std::shared_ptr<Executor> master, bool device_reset,
allocation_mode alloc_mode, cudaStream_t stream)
{
return std::shared_ptr<CudaExecutor>(
new CudaExecutor(device_id, std::move(master),
allocator_from_mode(device_id, alloc_mode), stream));
return create(device_id, master, allocator_from_mode(device_id, alloc_mode),
stream);
}


std::shared_ptr<CudaExecutor> CudaExecutor::create(
int device_id, std::shared_ptr<Executor> master,
std::shared_ptr<CudaAllocatorBase> alloc, cudaStream_t stream)
{
if (!alloc->check_environment(device_id, stream)) {
throw Error{__FILE__, __LINE__,
"Allocator uses incorrect stream or device ID."};
}
return std::shared_ptr<CudaExecutor>(new CudaExecutor(
device_id, std::move(master), std::move(alloc), stream));
}
Expand Down
43 changes: 32 additions & 11 deletions cuda/base/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ namespace gko {
#endif


void* CudaAllocator::allocate(size_type num_bytes) const
void* CudaAllocator::allocate(size_type num_bytes)
{
void* ptr{};
GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(cudaMalloc(&ptr, num_bytes),
Expand All @@ -91,7 +91,7 @@ void* CudaAllocator::allocate(size_type num_bytes) const
}


void CudaAllocator::deallocate(void* ptr) const
void CudaAllocator::deallocate(void* ptr)
{
GKO_EXIT_ON_CUDA_ERROR(cudaFree(ptr));
}
Expand All @@ -103,7 +103,7 @@ void CudaAllocator::deallocate(void* ptr) const
CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{stream} {}


void* CudaAsyncAllocator::allocate(size_type num_bytes) const
void* CudaAsyncAllocator::allocate(size_type num_bytes)
{
void* ptr{};
GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(
Expand All @@ -112,7 +112,7 @@ void* CudaAsyncAllocator::allocate(size_type num_bytes) const
}


void CudaAsyncAllocator::deallocate(void* ptr) const
void CudaAsyncAllocator::deallocate(void* ptr)
{
GKO_EXIT_ON_CUDA_ERROR(cudaFreeAsync(ptr, stream_));
}
Expand All @@ -121,10 +121,10 @@ void CudaAsyncAllocator::deallocate(void* ptr) const
#else // Fall back to regular allocation


CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{} {}
CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{stream} {}


void* CudaAsyncAllocator::allocate(size_type num_bytes) const
void* CudaAsyncAllocator::allocate(size_type num_bytes)
{
void* ptr{};
GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(cudaMalloc(&ptr, num_bytes),
Expand All @@ -133,7 +133,7 @@ void* CudaAsyncAllocator::allocate(size_type num_bytes) const
}


void CudaAsyncAllocator::deallocate(void* ptr) const
void CudaAsyncAllocator::deallocate(void* ptr)
{
GKO_EXIT_ON_CUDA_ERROR(cudaFree(ptr));
}
Expand All @@ -142,6 +142,13 @@ void CudaAsyncAllocator::deallocate(void* ptr) const
#endif


bool CudaAsyncAllocator::check_environment(int device_id,
CUstream_st* stream) const
{
return stream == stream_;
}


CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id)
: CudaUnifiedAllocator{device_id, cudaMemAttachGlobal}
{}
Expand All @@ -152,7 +159,7 @@ CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id, unsigned int flags)
{}


void* CudaUnifiedAllocator::allocate(size_type num_bytes) const
void* CudaUnifiedAllocator::allocate(size_type num_bytes)
{
// we need to set the device ID in case this gets used in a host executor
detail::cuda_scoped_device_id_guard g(device_id_);
Expand All @@ -163,18 +170,25 @@ void* CudaUnifiedAllocator::allocate(size_type num_bytes) const
}


void CudaUnifiedAllocator::deallocate(void* ptr) const
void CudaUnifiedAllocator::deallocate(void* ptr)
{
// we need to set the device ID in case this gets used in a host executor
detail::cuda_scoped_device_id_guard g(device_id_);
GKO_EXIT_ON_CUDA_ERROR(cudaFree(ptr));
}


bool CudaUnifiedAllocator::check_environment(int device_id,
CUstream_st* stream) const
{
return device_id == device_id_;
}


CudaHostAllocator::CudaHostAllocator(int device_id) : device_id_{device_id} {}


void* CudaHostAllocator::allocate(size_type num_bytes) const
void* CudaHostAllocator::allocate(size_type num_bytes)
{
// we need to set the device ID in case this gets used in a host executor
detail::cuda_scoped_device_id_guard g(device_id_);
Expand All @@ -185,12 +199,19 @@ void* CudaHostAllocator::allocate(size_type num_bytes) const
}


void CudaHostAllocator::deallocate(void* ptr) const
void CudaHostAllocator::deallocate(void* ptr)
{
// we need to set the device ID in case this gets used in a host executor
detail::cuda_scoped_device_id_guard g(device_id_);
GKO_EXIT_ON_CUDA_ERROR(cudaFreeHost(ptr));
}


bool CudaHostAllocator::check_environment(int device_id,
CUstream_st* stream) const
{
return device_id == device_id_;
}


} // namespace gko
1 change: 0 additions & 1 deletion dpcpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ target_sources(ginkgo_dpcpp
base/executor.dp.cpp
base/helper.dp.cpp
base/index_set_kernels.dp.cpp
base/memory.dp.cpp
base/scoped_device_id.dp.cpp
base/timer.dp.cpp
base/version.dp.cpp
Expand Down
Loading

0 comments on commit 9fd6482

Please sign in to comment.