Skip to content

Commit

Permalink
fix some compilation issues
Browse files Browse the repository at this point in the history
  • Loading branch information
upsj committed Mar 30, 2023
1 parent 33645c0 commit 9953f30
Show file tree
Hide file tree
Showing 8 changed files with 86 additions and 57 deletions.
12 changes: 11 additions & 1 deletion core/device_hooks/cuda_hooks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,19 @@ void CudaHostAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(cuda);
std::shared_ptr<CudaExecutor> CudaExecutor::create(
int device_id, std::shared_ptr<Executor> master, bool device_reset,
allocation_mode alloc_mode, CUstream_st* stream)
{
return std::shared_ptr<CudaExecutor>(
new CudaExecutor(device_id, std::move(master),
std::make_shared<CudaAllocator>(), stream));
}


std::shared_ptr<CudaExecutor> CudaExecutor::create(
int device_id, std::shared_ptr<Executor> master,
std::shared_ptr<CudaAllocatorBase> alloc, CUstream_st* stream)
{
return std::shared_ptr<CudaExecutor>(new CudaExecutor(
device_id, std::move(master), device_reset, alloc_mode, stream));
device_id, std::move(master), std::move(alloc), stream));
}


Expand Down
15 changes: 13 additions & 2 deletions core/device_hooks/dpcpp_hooks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,18 @@ version version_info::get_dpcpp_version() noexcept
}


void* DpcppAllocator::allocate_impl(sycl::queue* queue, size_type size) const
DpcppAllocatorBase::DpcppAllocatorBase(sycl::queue*) GKO_NOT_COMPILED(dpcpp);


void* DpcppAllocatorBase::allocate(size_type num_bytes) const
GKO_NOT_COMPILED(dpcpp);


void DpcppAllocatorBase::deallocate(void* ptr) const GKO_NOT_COMPILED(dpcpp);


void* DpcppAllocator::allocate_impl(sycl::queue* queue,
size_type num_bytes) const
GKO_NOT_COMPILED(dpcpp);


Expand All @@ -61,7 +72,7 @@ void DpcppAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const


void* DpcppUnifiedAllocator::allocate_impl(sycl::queue* queue,
size_type size) const
size_type num_bytes) const
GKO_NOT_COMPILED(dpcpp);


Expand Down
25 changes: 10 additions & 15 deletions core/device_hooks/hip_hooks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,11 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include <ginkgo/core/base/exception_helpers.hpp>
#include <ginkgo/core/base/executor.hpp>
#include <ginkgo/core/base/stream.hpp>
#include <ginkgo/core/base/types.hpp>
#include <ginkgo/core/base/version.hpp>
#include <ginkgo/core/log/profiler_hook.hpp>
#include "ginkgo/core/base/memory.hpp"


namespace gko {
Expand All @@ -52,29 +54,22 @@ version version_info::get_hip_version() noexcept
}


void* HipAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);
void* HipAllocator::allocate(size_type num_bytes) const GKO_NOT_COMPILED(hip);


void HipAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);
void HipAllocator::deallocate(void* dev_ptr) const GKO_NOT_COMPILED(hip);


HipAsyncAllocator::HipAsyncAllocator(GKO_HIP_STREAM_STRUCT* stream)
std::shared_ptr<HipExecutor> HipExecutor::create(
int device_id, std::shared_ptr<Executor> master, bool device_reset,
allocation_mode alloc_mode, GKO_HIP_STREAM_STRUCT* stream)
GKO_NOT_COMPILED(hip);


void* HipAsyncAllocator::allocate(size_type num_bytes) GKO_NOT_COMPILED(hip);


void HipAsyncAllocator::deallocate(void* dev_ptr) GKO_NOT_COMPILED(hip);


std::shared_ptr<HipExecutor> HipExecutor::create(
int device_id, std::shared_ptr<Executor> master, bool device_reset,
allocation_mode alloc_mode, GKO_HIP_STREAM_STRUCT* stream)
{
return std::shared_ptr<HipExecutor>(new HipExecutor(
device_id, std::move(master), device_reset, alloc_mode, stream));
}
int device_id, std::shared_ptr<Executor> master,
std::shared_ptr<HipAllocatorBase> alloc, GKO_HIP_STREAM_STRUCT* stream)
GKO_NOT_COMPILED(hip);


void HipExecutor::populate_exec_info(const machine_topology* mach_topo)
Expand Down
28 changes: 28 additions & 0 deletions cuda/base/memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ void CudaAllocator::deallocate(void* ptr) const
}


#if CUDA_VERSION >= 11020


CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{stream} {}


Expand All @@ -108,12 +111,37 @@ void* CudaAsyncAllocator::allocate(size_type num_bytes) const
return ptr;
}


void CudaAsyncAllocator::deallocate(void* ptr) const
{
GKO_EXIT_ON_CUDA_ERROR(cudaFreeAsync(ptr, stream_));
}


#else // Fall back to regular allocation


CudaAsyncAllocator::CudaAsyncAllocator(cudaStream_t stream) : stream_{} {}


void* CudaAsyncAllocator::allocate(size_type num_bytes) const
{
void* ptr{};
GKO_ASSERT_NO_CUDA_ALLOCATION_ERRORS(cudaMalloc(&ptr, num_bytes),
num_bytes);
return ptr;
}


void CudaAsyncAllocator::deallocate(void* ptr) const
{
GKO_EXIT_ON_CUDA_ERROR(cudaFree(ptr));
}


#endif


CudaUnifiedAllocator::CudaUnifiedAllocator(int device_id)
: CudaUnifiedAllocator{device_id, cudaMemAttachGlobal}
{}
Expand Down
1 change: 1 addition & 0 deletions dpcpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ target_sources(ginkgo_dpcpp
base/executor.dp.cpp
base/helper.dp.cpp
base/index_set_kernels.dp.cpp
base/memory.dp.cpp
base/scoped_device_id.dp.cpp
base/version.dp.cpp
components/prefix_sum_kernels.dp.cpp
Expand Down
33 changes: 0 additions & 33 deletions dpcpp/base/executor.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -51,39 +51,6 @@ namespace gko {
namespace detail {


DpcppAllocator::DpcppAllocator(sycl::queue* queue) : queue_{queue} {}


void* DpcppAllocator::allocate(size_type size)
{
return sycl::malloc_device(size, *queue_);
}


void DpcppAllocator::deallocate(void* ptr)
{
queue_->wait_and_throw();
sycl::free(ptr, queue_->get_context());
}


DpcppUnifiedAllocator::DpcppUnifiedAllocator(sycl::queue* queue) : queue_{queue}
{}


void* DpcppUnifiedAllocator::allocate(size_type size)
{
return sycl::malloc_shared(size, *queue_);
}


void DpcppUnifiedAllocator::deallocate(void* ptr)
{
queue_->wait_and_throw();
sycl::free(ptr, queue_->get_context());
}


const std::vector<sycl::device> get_devices(std::string device_type)
{
std::map<std::string, sycl::info::device_type> device_type_map{
Expand Down
25 changes: 19 additions & 6 deletions dpcpp/base/memory.dp.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,22 @@ namespace gko {
DpcppAllocatorBase::DpcppAllocatorBase(sycl::queue* queue) : queue_{queue} {}


void* DpcppAllocatorBase::allocate(size_type num_bytes) const
{
return this->allocate_impl(queue_, num_bytes);
}


void DpcppAllocatorBase::deallocate(void* ptr) const
{
this->deallocate_impl(queue_, ptr);
}


void* DpcppAllocator::allocate_impl(sycl::queue* queue,
size_type num_bytes) const
{
return sycl::malloc_device(size, *queue);
return sycl::malloc_device(num_bytes, *queue);
}


Expand All @@ -56,16 +68,17 @@ void DpcppAllocator::deallocate_impl(sycl::queue* queue, void* ptr) const
}


void* DpcppUnifiedAllocator::allocate(size_type num_bytes)
void* DpcppUnifiedAllocator::allocate_impl(sycl::queue* queue,
size_type num_bytes)
{
return sycl::malloc_shared(size, *queue_);
return sycl::malloc_shared(num_bytes, *queue);
}


void DpcppUnifiedAllocator::deallocate(void* ptr)
void DpcppUnifiedAllocator::deallocate_impl(sycl::queue* queue, void* ptr)
{
queue_->wait_and_throw();
sycl::free(ptr, queue_->get_context());
queue->wait_and_throw();
sycl::free(ptr, queue->get_context());
}


Expand Down
4 changes: 4 additions & 0 deletions include/ginkgo/core/base/memory.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,10 @@ class DpcppAllocatorBase : public Allocator {
public:
DpcppAllocatorBase(sycl::queue* queue);

void* allocate(size_type num_bytes) const final;

void deallocate(void* ptr) const final;

protected:
virtual void* allocate_impl(sycl::queue* queue,
size_type num_bytes) const = 0;
Expand Down

0 comments on commit 9953f30

Please sign in to comment.