Skip to content

Commit

Permalink
apply script
Browse files Browse the repository at this point in the history
  • Loading branch information
upsj committed May 19, 2024
1 parent 127c204 commit 5f4c652
Show file tree
Hide file tree
Showing 167 changed files with 4,029 additions and 8,735 deletions.
1 change: 1 addition & 0 deletions common/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,2 +1,3 @@
add_subdirectory(unified)
set(GKO_UNIFIED_COMMON_SOURCES ${GKO_UNIFIED_COMMON_SOURCES} PARENT_SCOPE)
set(GKO_CUDA_HIP_COMMON_SOURCES ${GKO_CUDA_HIP_COMMON_SOURCES} PARENT_SCOPE)
58 changes: 58 additions & 0 deletions common/cuda_hip/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
set(CUDA_HIP_SOURCES
base/batch_multi_vector_kernels.cpp
base/device_matrix_data_kernels.cpp
base/kernel_launch.hpp
base/kernel_launch_reduction.hpp
base/kernel_launch_solver.hpp
base/math.hpp
components/atomic.hpp
components/diagonal_block_manipulation.hpp
components/intrinsics.hpp
components/merging.hpp
components/prefix_sum.hpp
components/prefix_sum_kernels.cpp
components/reduction.hpp
components/searching.hpp
components/segment_scan.hpp
components/sorting.hpp
components/syncfree.hpp
components/thread_ids.hpp
components/uninitialized_array.hpp
components/warp_blas.hpp
distributed/index_map_kernels.cpp
distributed/matrix_kernels.cpp
distributed/partition_helpers_kernels.cpp
distributed/partition_kernels.cpp
distributed/vector_kernels.cpp
factorization/cholesky_kernels.cpp
factorization/factorization_kernels.cpp
factorization/lu_kernels.cpp
factorization/par_ic_kernels.cpp
factorization/par_ict_kernels.cpp
factorization/par_ilu_kernels.cpp
factorization/par_ilut_filter_kernels.cpp
factorization/par_ilut_select_kernels.cpp
factorization/par_ilut_spgeam_kernels.cpp
factorization/par_ilut_sweep_kernels.cpp
log/batch_logger.hpp
matrix/batch_csr_kernels.cpp
matrix/batch_dense_kernels.cpp
matrix/batch_ell_kernels.cpp
matrix/coo_kernels.cpp
matrix/dense_kernels.cpp
matrix/diagonal_kernels.cpp
matrix/ell_kernels.cpp
matrix/fbcsr_kernels.cpp
matrix/sellp_kernels.cpp
matrix/sparsity_csr_kernels.cpp
multigrid/pgm_kernels.cpp
preconditioner/isai_kernels.cpp
preconditioner/jacobi_kernels.cpp
reorder/rcm_kernels.cpp
solver/cb_gmres_kernels.cpp
solver/idr_kernels.cpp
solver/multigrid_kernels.cpp
stop/batch_criteria.hpp
)
list(TRANSFORM CUDA_HIP_SOURCES PREPEND ${CMAKE_CURRENT_SOURCE_DIR}/)
set(GKO_CUDA_HIP_COMMON_SOURCES ${CUDA_HIP_SOURCES} PARENT_SCOPE)
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,50 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#include <common/cuda_hip/base/batch_struct.hpp>


#include <thrust/functional.h>
#include <thrust/transform.h>


#include <ginkgo/core/base/math.hpp>
#include <ginkgo/core/base/range_accessors.hpp>


#include "common/cuda_hip/base/blas_bindings.hpp"
#include "common/cuda_hip/base/config.hpp"
#include "common/cuda_hip/base/pointer_mode_guard.hpp"
#include "common/cuda_hip/base/runtime.hpp"
#include "common/cuda_hip/base/thrust.hpp"
#include "common/cuda_hip/components/cooperative_groups.hpp"
#include "common/cuda_hip/components/reduction.hpp"
#include "common/cuda_hip/components/thread_ids.hpp"
#include "common/cuda_hip/components/uninitialized_array.hpp"
#include "core/base/batch_multi_vector_kernels.hpp"
#include "core/base/batch_struct.hpp"


namespace gko {
namespace kernels {
namespace GKO_DEVICE_NAMESPACE {
/**
* @brief The MultiVector matrix format namespace.
*
* @ingroup batch_multi_vector
*/
namespace batch_multi_vector {


constexpr auto default_block_size = 256;
constexpr int sm_oversubscription = 4;


// clang-format off

// NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES


template <typename ValueType, typename Mapping>
__device__ __forceinline__ void scale(
const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
Expand Down Expand Up @@ -299,3 +343,14 @@ __launch_bounds__(default_block_size, sm_oversubscription) void copy_kernel(
copy(src_b, dst_b);
}
}


#include "common/cuda_hip/base/batch_multi_vector_kernel_launcher.hpp.inc"

// clang-format on


} // namespace batch_multi_vector
} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,26 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#include <thrust/copy.h>
#include <thrust/count.h>
#include <thrust/device_ptr.h>
#include <thrust/execution_policy.h>
#include <thrust/iterator/zip_iterator.h>
#include <thrust/sort.h>
#include <thrust/tuple.h>


#include "common/cuda_hip/base/thrust.hpp"
#include "common/cuda_hip/base/types.hpp"
#include "core/base/device_matrix_data_kernels.hpp"


namespace gko {
namespace kernels {
namespace GKO_DEVICE_NAMESPACE {
namespace components {


template <typename ValueType, typename IndexType>
void remove_zeros(std::shared_ptr<const DefaultExecutor> exec,
array<ValueType>& values, array<IndexType>& row_idxs,
Expand Down Expand Up @@ -99,3 +119,9 @@ void sort_row_major(std::shared_ptr<const DefaultExecutor> exec,

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(
GKO_DECLARE_DEVICE_MATRIX_DATA_SORT_ROW_MAJOR_KERNEL);


} // namespace components
} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,55 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_COMMON_UNIFIED_BASE_KERNEL_LAUNCH_HPP_
#error \
"This file can only be used from inside common/unified/base/kernel_launch.hpp"
#endif


#include <accessor/device_helper.hpp>


#include <thrust/tuple.h>


#include "common/cuda_hip/base/runtime.hpp"
#include "common/cuda_hip/base/types.hpp"
#include "common/cuda_hip/components/thread_ids.hpp"


namespace gko {
namespace kernels {
namespace GKO_DEVICE_NAMESPACE {


template <typename AccessorType>
struct to_device_type_impl<gko::acc::range<AccessorType>&> {
using type = std::decay_t<decltype(gko::acc::as_device_range(
std::declval<gko::acc::range<AccessorType>>()))>;
static type map_to_device(gko::acc::range<AccessorType>& range)
{
return gko::acc::as_device_range(range);
}
};

template <typename AccessorType>
struct to_device_type_impl<const gko::acc::range<AccessorType>&> {
using type = std::decay_t<decltype(gko::acc::as_device_range(
std::declval<gko::acc::range<AccessorType>>()))>;
static type map_to_device(const gko::acc::range<AccessorType>& range)
{
return gko::acc::as_device_range(range);
}
};


namespace device_std = thrust;


constexpr int default_block_size = 512;


template <typename KernelFunction, typename... KernelArgs>
__global__ __launch_bounds__(default_block_size) void generic_kernel_1d(
int64 size, KernelFunction fn, KernelArgs... args)
Expand Down Expand Up @@ -52,3 +101,8 @@ void run_kernel(std::shared_ptr<const DefaultExecutor> exec, KernelFunction fn,
map_to_device(args)...);
}
}


} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,24 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_COMMON_UNIFIED_BASE_KERNEL_LAUNCH_REDUCTION_HPP_
#error \
"This file can only be used from inside common/unified/base/kernel_launch_reduction.hpp"
#endif


#include "common/cuda_hip/base/types.hpp"
#include "common/cuda_hip/components/cooperative_groups.hpp"
#include "common/cuda_hip/components/reduction.hpp"
#include "common/cuda_hip/components/thread_ids.hpp"
#include "core/synthesizer/implementation_selection.hpp"


namespace gko {
namespace kernels {
namespace GKO_DEVICE_NAMESPACE {


template <typename ValueType, typename KernelFunction, typename ReductionOp,
typename FinalizeOp, typename... KernelArgs>
__global__ __launch_bounds__(
Expand Down Expand Up @@ -505,3 +523,8 @@ void run_kernel_col_reduction_cached(
}
}
}


} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,20 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_COMMON_UNIFIED_BASE_KERNEL_LAUNCH_SOLVER_HPP_
#error \
"This file can only be used from inside common/unified/base/kernel_launch_solver.hpp"
#endif


#include "common/cuda_hip/base/runtime.hpp"


namespace gko {
namespace kernels {
namespace GKO_DEVICE_NAMESPACE {


template <typename KernelFunction, typename... KernelArgs>
__global__ __launch_bounds__(default_block_size) void generic_kernel_2d_solver(
int64 rows, int64 cols, int64 default_stride, KernelFunction fn,
Expand Down Expand Up @@ -32,3 +46,8 @@ void run_kernel_solver(std::shared_ptr<const DefaultExecutor> exec,
static_cast<int64>(default_stride), fn, map_to_device(args)...);
}
}


} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,19 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_COMMON_CUDA_HIP_BASE_MATH_HPP_
#define GKO_COMMON_CUDA_HIP_BASE_MATH_HPP_


#include <thrust/complex.h>


#include <ginkgo/core/base/math.hpp>


namespace gko {


// We need this struct, because otherwise we would call a __host__ function in a
// __device__ function (even though it is constexpr)
template <typename T>
Expand Down Expand Up @@ -37,3 +50,9 @@ struct truncate_type_impl<thrust::complex<T>> {


} // namespace detail


} // namespace gko


#endif // GKO_COMMON_CUDA_HIP_BASE_MATH_HPP_
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,22 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_COMMON_CUDA_HIP_COMPONENTS_ATOMIC_HPP_
#define GKO_COMMON_CUDA_HIP_COMPONENTS_ATOMIC_HPP_


#include <type_traits>


#include "common/cuda_hip/base/math.hpp"
#include "common/cuda_hip/base/types.hpp"


namespace gko {
namespace kernels {
namespace GKO_DEVICE_NAMESPACE {


namespace detail {


Expand Down Expand Up @@ -228,3 +244,11 @@ __forceinline__ __device__ thrust::complex<double> atomic_add(
auto imag = atomic_add(addr + 1, val.imag());
return {real, imag};
}


} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko


#endif // GKO_COMMON_CUDA_HIP_COMPONENTS_ATOMIC_HPP_
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,24 @@
//
// SPDX-License-Identifier: BSD-3-Clause

#ifndef GKO_COMMON_CUDA_HIP_COMPONENTS_DIAGONAL_BLOCK_MANIPULATION_HPP_
#define GKO_COMMON_CUDA_HIP_COMPONENTS_DIAGONAL_BLOCK_MANIPULATION_HPP_


#include <type_traits>


#include "common/cuda_hip/base/config.hpp"
#include "common/cuda_hip/base/types.hpp"
#include "common/cuda_hip/components/cooperative_groups.hpp"


namespace gko {
namespace kernels {
namespace GKO_DEVICE_NAMESPACE {
namespace csr {


/**
* @internal
*
Expand Down Expand Up @@ -63,3 +81,12 @@ __device__ __forceinline__ void extract_transposed_diag_blocks(
}
}
}


} // namespace csr
} // namespace GKO_DEVICE_NAMESPACE
} // namespace kernels
} // namespace gko


#endif // GKO_COMMON_CUDA_HIP_COMPONENTS_DIAGONAL_BLOCK_MANIPULATION_HPP_
Loading

0 comments on commit 5f4c652

Please sign in to comment.