Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Prepare for more CUDA/HIP unification #1616

Merged
merged 8 commits into from
Jun 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions accessor/cuda_hip_helper.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_ACCESSOR_CUDA_HIP_HELPER_HPP_
#define GKO_ACCESSOR_CUDA_HIP_HELPER_HPP_


#include <utility>


#ifdef GKO_COMPILING_HIP
#include "accessor/hip_helper.hpp"
#else // GKO_COMPILING_CUDA
#include "accessor/cuda_helper.hpp"
#endif


namespace gko {
namespace acc {


template <typename AccType>
GKO_ACC_INLINE auto as_device_range(AccType&& acc)
{
#ifdef GKO_COMPILING_HIP
return as_hip_range(std::forward<AccType>(acc));
#else // GKO_COMPILING_CUDA
return as_cuda_range(std::forward<AccType>(acc));
#endif
}


} // namespace acc
} // namespace gko


#endif // GKO_ACCESSOR_CUDA_HIP_HELPER_HPP_
2 changes: 2 additions & 0 deletions benchmark/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,15 @@ function(ginkgo_benchmark_cusparse_linops type def)
endif()
# make the dependency public to catch issues
target_compile_definitions(cusparse_linops_${type} PUBLIC ${def})
target_compile_definitions(cusparse_linops_${type} PRIVATE GKO_COMPILING_CUDA)
target_link_libraries(cusparse_linops_${type} Ginkgo::ginkgo CUDA::cudart CUDA::cublas CUDA::cusparse)
endfunction()

function(ginkgo_benchmark_hipsparse_linops type def)
add_library(hipsparse_linops_${type} utils/hip_linops.hip.cpp)
set_source_files_properties(utils/hip_linops.hip.cpp PROPERTIES LANGUAGE HIP)
target_compile_definitions(hipsparse_linops_${type} PUBLIC ${def})
target_compile_definitions(hipsparse_linops_${type} PRIVATE GKO_COMPILING_HIP)
target_include_directories(hipsparse_linops_${type} SYSTEM PRIVATE ${HIPBLAS_INCLUDE_DIRS} ${HIPSPARSE_INCLUDE_DIRS})
target_link_libraries(hipsparse_linops_${type} Ginkgo::ginkgo ${HIPSPARSE_LIBRARIES})
endfunction()
Expand Down
30 changes: 15 additions & 15 deletions benchmark/utils/cuda_linops.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ class CusparseCsrmp

auto guard = this->get_gpu_exec()->get_scoped_device_id_guard();
gko::kernels::cuda::cusparse::spmv_mp(
this->get_gpu_exec()->get_cusparse_handle(), trans_,
this->get_gpu_exec()->get_sparselib_handle(), trans_,
this->get_size()[0], this->get_size()[1],
csr_->get_num_stored_elements(), &scalars.get_const_data()[0],
this->get_descr(), csr_->get_const_values(),
Expand All @@ -156,7 +156,7 @@ class CusparseCsrmp
: gko::EnableLinOp<CusparseCsrmp, CusparseBase>(exec, size),
csr_(std::move(
csr::create(exec, std::make_shared<typename csr::classical>()))),
trans_(CUSPARSE_OPERATION_NON_TRANSPOSE)
trans_(SPARSELIB_OPERATION_NON_TRANSPOSE)
{}

private:
Expand Down Expand Up @@ -213,7 +213,7 @@ class CusparseCsr

auto guard = this->get_gpu_exec()->get_scoped_device_id_guard();
gko::kernels::cuda::cusparse::spmv(
this->get_gpu_exec()->get_cusparse_handle(), trans_,
this->get_gpu_exec()->get_sparselib_handle(), trans_,
this->get_size()[0], this->get_size()[1],
csr_->get_num_stored_elements(), &scalars.get_const_data()[0],
this->get_descr(), csr_->get_const_values(),
Expand All @@ -230,7 +230,7 @@ class CusparseCsr
: gko::EnableLinOp<CusparseCsr, CusparseBase>(exec, size),
csr_(std::move(
csr::create(exec, std::make_shared<typename csr::classical>()))),
trans_(CUSPARSE_OPERATION_NON_TRANSPOSE)
trans_(SPARSELIB_OPERATION_NON_TRANSPOSE)
{}

private:
Expand Down Expand Up @@ -288,7 +288,7 @@ class CusparseCsrmm

auto guard = this->get_gpu_exec()->get_scoped_device_id_guard();
gko::kernels::cuda::cusparse::spmm(
this->get_gpu_exec()->get_cusparse_handle(), trans_,
this->get_gpu_exec()->get_sparselib_handle(), trans_,
this->get_size()[0], dense_b->get_size()[1], this->get_size()[1],
csr_->get_num_stored_elements(), &scalars.get_const_data()[0],
this->get_descr(), csr_->get_const_values(),
Expand All @@ -306,7 +306,7 @@ class CusparseCsrmm
: gko::EnableLinOp<CusparseCsrmm, CusparseBase>(exec, size),
csr_(std::move(
csr::create(exec, std::make_shared<typename csr::classical>()))),
trans_(CUSPARSE_OPERATION_NON_TRANSPOSE)
trans_(SPARSELIB_OPERATION_NON_TRANSPOSE)
{}

private:
Expand Down Expand Up @@ -376,7 +376,7 @@ class CusparseCsrEx
gko::size_type buffer_size = 0;

auto guard = this->get_gpu_exec()->get_scoped_device_id_guard();
auto handle = this->get_gpu_exec()->get_cusparse_handle();
auto handle = this->get_gpu_exec()->get_sparselib_handle();
// This function seems to require the pointer mode to be set to HOST.
// Ginkgo use pointer mode DEVICE by default, so we change this
// temporarily.
Expand Down Expand Up @@ -407,7 +407,7 @@ class CusparseCsrEx
: gko::EnableLinOp<CusparseCsrEx, CusparseBase>(exec, size),
csr_(std::move(
csr::create(exec, std::make_shared<typename csr::classical>()))),
trans_(CUSPARSE_OPERATION_NON_TRANSPOSE),
trans_(SPARSELIB_OPERATION_NON_TRANSPOSE),
buffer_(exec)
{
algmode_ = CUSPARSE_ALG_MERGE_PATH;
Expand Down Expand Up @@ -465,7 +465,7 @@ class CusparseHybrid

auto guard = this->get_gpu_exec()->get_scoped_device_id_guard();
gko::kernels::cuda::cusparse::csr2hyb(
this->get_gpu_exec()->get_cusparse_handle(), this->get_size()[0],
this->get_gpu_exec()->get_sparselib_handle(), this->get_size()[0],
this->get_size()[1], this->get_descr(), t_csr->get_const_values(),
t_csr->get_const_row_ptrs(), t_csr->get_const_col_idxs(), hyb_,
Threshold, Partition);
Expand Down Expand Up @@ -496,7 +496,7 @@ class CusparseHybrid

auto guard = this->get_gpu_exec()->get_scoped_device_id_guard();
gko::kernels::cuda::cusparse::spmv(
this->get_gpu_exec()->get_cusparse_handle(), trans_,
this->get_gpu_exec()->get_sparselib_handle(), trans_,
&scalars.get_const_data()[0], this->get_descr(), hyb_, db,
&scalars.get_const_data()[1], dx);
}
Expand All @@ -508,7 +508,7 @@ class CusparseHybrid
CusparseHybrid(std::shared_ptr<const gko::Executor> exec,
const gko::dim<2>& size = gko::dim<2>{})
: gko::EnableLinOp<CusparseHybrid, CusparseBase>(exec, size),
trans_(CUSPARSE_OPERATION_NON_TRANSPOSE)
trans_(SPARSELIB_OPERATION_NON_TRANSPOSE)
{
auto guard = this->get_gpu_exec()->get_scoped_device_id_guard();
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseCreateHybMat(&hyb_));
Expand Down Expand Up @@ -555,13 +555,13 @@ void cusparse_generic_spmv(std::shared_ptr<const gko::CudaExecutor> gpu_exec,

gko::size_type buffer_size = 0;
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseSpMV_bufferSize(
gpu_exec->get_cusparse_handle(), trans, &scalars.get_const_data()[0],
gpu_exec->get_sparselib_handle(), trans, &scalars.get_const_data()[0],
mat, vecb, &scalars.get_const_data()[1], vecx, cu_value, alg,
&buffer_size));
gko::array<char> buffer_array(gpu_exec, buffer_size);
auto dbuffer = buffer_array.get_data();
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseSpMV(
gpu_exec->get_cusparse_handle(), trans, &scalars.get_const_data()[0],
gpu_exec->get_sparselib_handle(), trans, &scalars.get_const_data()[0],
mat, vecb, &scalars.get_const_data()[1], vecx, cu_value, alg, dbuffer));
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroyDnVec(vecx));
GKO_ASSERT_NO_CUSPARSE_ERRORS(cusparseDestroyDnVec(vecb));
Expand Down Expand Up @@ -654,7 +654,7 @@ class CusparseGenericCsr
: gko::EnableLinOp<CusparseGenericCsr, CusparseBase>(exec, size),
csr_(std::move(
csr::create(exec, std::make_shared<typename csr::classical>()))),
trans_(CUSPARSE_OPERATION_NON_TRANSPOSE)
trans_(SPARSELIB_OPERATION_NON_TRANSPOSE)
{}

private:
Expand Down Expand Up @@ -745,7 +745,7 @@ class CusparseGenericCoo
const gko::dim<2>& size = gko::dim<2>{})
: gko::EnableLinOp<CusparseGenericCoo, CusparseBase>(exec, size),
coo_(std::move(coo::create(exec))),
trans_(CUSPARSE_OPERATION_NON_TRANSPOSE)
trans_(SPARSELIB_OPERATION_NON_TRANSPOSE)
{}

private:
Expand Down
14 changes: 7 additions & 7 deletions benchmark/utils/hip_linops.hip.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -126,7 +126,7 @@ class HipsparseCsr

auto guard = this->get_gpu_exec()->get_scoped_device_id_guard();
gko::kernels::hip::hipsparse::spmv(
this->get_gpu_exec()->get_hipsparse_handle(), trans_,
this->get_gpu_exec()->get_sparselib_handle(), trans_,
this->get_size()[0], this->get_size()[1],
csr_->get_num_stored_elements(), &scalars.get_const_data()[0],
this->get_descr(), csr_->get_const_values(),
Expand All @@ -143,7 +143,7 @@ class HipsparseCsr
: gko::EnableLinOp<HipsparseCsr, HipsparseBase>(exec, size),
csr_(std::move(
csr::create(exec, std::make_shared<typename csr::classical>()))),
trans_(HIPSPARSE_OPERATION_NON_TRANSPOSE)
trans_(SPARSELIB_OPERATION_NON_TRANSPOSE)
{}

private:
Expand Down Expand Up @@ -201,7 +201,7 @@ class HipsparseCsrmm

auto guard = this->get_gpu_exec()->get_scoped_device_id_guard();
gko::kernels::hip::hipsparse::spmm(
this->get_gpu_exec()->get_hipsparse_handle(), trans_,
this->get_gpu_exec()->get_sparselib_handle(), trans_,
this->get_size()[0], dense_b->get_size()[1], this->get_size()[1],
csr_->get_num_stored_elements(), &scalars.get_const_data()[0],
this->get_descr(), csr_->get_const_values(),
Expand All @@ -219,7 +219,7 @@ class HipsparseCsrmm
: gko::EnableLinOp<HipsparseCsrmm, HipsparseBase>(exec, size),
csr_(std::move(
csr::create(exec, std::make_shared<typename csr::classical>()))),
trans_(HIPSPARSE_OPERATION_NON_TRANSPOSE)
trans_(SPARSELIB_OPERATION_NON_TRANSPOSE)
{}

private:
Expand Down Expand Up @@ -269,7 +269,7 @@ class HipsparseHybrid

auto guard = this->get_gpu_exec()->get_scoped_device_id_guard();
gko::kernels::hip::hipsparse::csr2hyb(
this->get_gpu_exec()->get_hipsparse_handle(), this->get_size()[0],
this->get_gpu_exec()->get_sparselib_handle(), this->get_size()[0],
this->get_size()[1], this->get_descr(), t_csr->get_const_values(),
t_csr->get_const_row_ptrs(), t_csr->get_const_col_idxs(), hyb_,
Threshold, Partition);
Expand Down Expand Up @@ -300,7 +300,7 @@ class HipsparseHybrid

auto guard = this->get_gpu_exec()->get_scoped_device_id_guard();
gko::kernels::hip::hipsparse::spmv(
this->get_gpu_exec()->get_hipsparse_handle(), trans_,
this->get_gpu_exec()->get_sparselib_handle(), trans_,
&scalars.get_const_data()[0], this->get_descr(), hyb_, db,
&scalars.get_const_data()[1], dx);
}
Expand All @@ -312,7 +312,7 @@ class HipsparseHybrid
HipsparseHybrid(std::shared_ptr<const gko::Executor> exec,
const gko::dim<2>& size = gko::dim<2>{})
: gko::EnableLinOp<HipsparseHybrid, HipsparseBase>(exec, size),
trans_(HIPSPARSE_OPERATION_NON_TRANSPOSE)
trans_(SPARSELIB_OPERATION_NON_TRANSPOSE)
{
auto guard = this->get_gpu_exec()->get_scoped_device_id_guard();
GKO_ASSERT_NO_HIPSPARSE_ERRORS(hipsparseCreateHybMat(&hyb_));
Expand Down
12 changes: 6 additions & 6 deletions cmake/create_test.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -160,7 +160,7 @@ endfunction(ginkgo_create_cuda_test)
## Internal function allowing separate test name, filename and target name
function(ginkgo_create_cuda_test_internal test_name filename test_target_name)
add_executable(${test_target_name} ${filename})
target_compile_definitions(${test_target_name} PRIVATE GKO_COMPILING_CUDA)
target_compile_definitions(${test_target_name} PRIVATE GKO_COMPILING_CUDA GKO_DEVICE_NAMESPACE=cuda)
if(MSVC)
target_compile_options(${test_target_name}
PRIVATE
Expand Down Expand Up @@ -188,7 +188,7 @@ endfunction(ginkgo_create_hip_test)
function(ginkgo_create_hip_test_internal test_name filename test_target_name)
set_source_files_properties(${filename} PROPERTIES LANGUAGE HIP)
add_executable(${test_target_name} ${filename})
target_compile_definitions(${test_target_name} PRIVATE GKO_COMPILING_HIP)
target_compile_definitions(${test_target_name} PRIVATE GKO_COMPILING_HIP GKO_DEVICE_NAMESPACE=hip)
ginkgo_set_test_target_properties(${test_target_name} "_hip" ${ARGN})
ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE hipgpu)
endfunction(ginkgo_create_hip_test_internal)
Expand All @@ -203,7 +203,7 @@ endfunction()
function(ginkgo_create_omp_test_internal test_name filename test_target_name)
ginkgo_build_test_name(${test_name} test_target_name)
add_executable(${test_target_name} ${test_name}.cpp)
target_compile_definitions(${test_target_name} PRIVATE GKO_COMPILING_OMP)
target_compile_definitions(${test_target_name} PRIVATE GKO_COMPILING_OMP GKO_DEVICE_NAMESPACE=omp)
target_link_libraries(${test_target_name} PRIVATE OpenMP::OpenMP_CXX)
ginkgo_set_test_target_properties(${test_target_name} "_omp" ${ARGN})
ginkgo_add_test(${test_name} ${test_target_name} ${ARGN} RESOURCE_TYPE cpu)
Expand Down Expand Up @@ -253,7 +253,7 @@ function(ginkgo_create_common_test_internal test_name exec_type exec)
target_link_libraries(${test_target_name} PRIVATE OpenMP::OpenMP_CXX)
endif ()

target_compile_definitions(${test_target_name} PRIVATE EXEC_TYPE=${exec_type} EXEC_NAMESPACE=${exec} GKO_COMPILING_${exec_upper})
target_compile_definitions(${test_target_name} PRIVATE EXEC_TYPE=${exec_type} GKO_DEVICE_NAMESPACE=${exec} GKO_COMPILING_${exec_upper})
target_link_libraries(${test_target_name} PRIVATE ${common_test_ADDITIONAL_LIBRARIES})
# use float for DPC++ if necessary
if((exec STREQUAL "dpcpp") AND GINKGO_DPCPP_SINGLE_MODE)
Expand Down Expand Up @@ -285,13 +285,13 @@ function(ginkgo_create_common_device_test test_name)
# need to make a separate file for this, since we can't set conflicting properties on the same file
configure_file(${test_name}.cpp ${test_name}.cu COPYONLY)
ginkgo_create_cuda_test_internal(${test_name}_cuda ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.cu ${test_target_name}_cuda ${ARGN})
target_compile_definitions(${test_target_name}_cuda PRIVATE EXEC_TYPE=CudaExecutor EXEC_NAMESPACE=cuda)
target_compile_definitions(${test_target_name}_cuda PRIVATE EXEC_TYPE=CudaExecutor GKO_DEVICE_NAMESPACE=cuda)
endif()
if(GINKGO_BUILD_HIP)
# need to make a separate file for this, since we can't set conflicting properties on the same file
configure_file(${test_name}.cpp ${test_name}.hip.cpp COPYONLY)
ginkgo_create_hip_test_internal(${test_name}_hip ${CMAKE_CURRENT_BINARY_DIR}/${test_name}.hip.cpp ${test_target_name}_hip ${ARGN})
target_compile_definitions(${test_target_name}_hip PRIVATE EXEC_TYPE=HipExecutor EXEC_NAMESPACE=hip)
target_compile_definitions(${test_target_name}_hip PRIVATE EXEC_TYPE=HipExecutor GKO_DEVICE_NAMESPACE=hip)
endif()
endfunction(ginkgo_create_common_device_test)

Expand Down
18 changes: 18 additions & 0 deletions common/cuda_hip/base/blas_bindings.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_COMMON_CUDA_HIP_BASE_BLAS_BINDINGS_HPP_
#define GKO_COMMON_CUDA_HIP_BASE_BLAS_BINDINGS_HPP_


#if defined(GKO_COMPILING_CUDA)
#include "cuda/base/cublas_bindings.hpp"
#elif defined(GKO_COMPILING_HIP)
#include "hip/base/hipblas_bindings.hip.hpp"
#else
#error "Executor definition missing"
#endif


#endif // GKO_COMMON_CUDA_HIP_BASE_BLAS_BINDINGS_HPP_
18 changes: 18 additions & 0 deletions common/cuda_hip/base/config.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_COMMON_CUDA_HIP_BASE_CONFIG_HPP_
#define GKO_COMMON_CUDA_HIP_BASE_CONFIG_HPP_


#if defined(GKO_COMPILING_CUDA)
#include "cuda/base/config.hpp"
#elif defined(GKO_COMPILING_HIP)
#include "hip/base/config.hip.hpp"
#else
#error "Executor definition missing"
#endif


#endif // GKO_COMMON_CUDA_HIP_BASE_CONFIG_HPP_
18 changes: 18 additions & 0 deletions common/cuda_hip/base/pointer_mode_guard.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_COMMON_CUDA_HIP_BASE_POINTER_MODE_GUARD_HPP_
#define GKO_COMMON_CUDA_HIP_BASE_POINTER_MODE_GUARD_HPP_


#if defined(GKO_COMPILING_CUDA)
#include "cuda/base/pointer_mode_guard.hpp"
#elif defined(GKO_COMPILING_HIP)
#include "hip/base/pointer_mode_guard.hip.hpp"
#else
#error "Executor definition missing"
#endif


#endif // GKO_COMMON_CUDA_HIP_BASE_POINTER_MODE_GUARD_HPP_
18 changes: 18 additions & 0 deletions common/cuda_hip/base/randlib_bindings.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_COMMON_CUDA_HIP_BASE_RANDLIB_BINDINGS_HPP_
#define GKO_COMMON_CUDA_HIP_BASE_RANDLIB_BINDINGS_HPP_


#if defined(GKO_COMPILING_CUDA)
#include "cuda/base/curand_bindings.hpp"
#elif defined(GKO_COMPILING_HIP)
#include "hip/base/hiprand_bindings.hip.hpp"
#else
#error "Executor definition missing"
#endif


#endif // GKO_COMMON_CUDA_HIP_BASE_RANDLIB_BINDINGS_HPP_
18 changes: 18 additions & 0 deletions common/cuda_hip/base/runtime.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_COMMON_CUDA_HIP_BASE_RUNTIME_HPP_
#define GKO_COMMON_CUDA_HIP_BASE_RUNTIME_HPP_


#if defined(GKO_COMPILING_CUDA)
// nothing needed here
MarcelKoch marked this conversation as resolved.
Show resolved Hide resolved
#elif defined(GKO_COMPILING_HIP)
#include <hip/hip_runtime.h>
#else
#error "Executor definition missing"
#endif


#endif // GKO_COMMON_CUDA_HIP_BASE_RUNTIME_HPP_
Loading