Skip to content

Commit

Permalink
Merge (#1450): Add a CSR batched matrix format, CUDA, HIP and DPCPP k…
Browse files Browse the repository at this point in the history
…ernels

Add a CSR batched matrix format, CUDA, HIP and DPCPP kernels

Related PR: #1450
  • Loading branch information
pratikvn authored Dec 12, 2023
2 parents 310bb4c + c30aae8 commit 830c289
Show file tree
Hide file tree
Showing 39 changed files with 2,407 additions and 1 deletion.
50 changes: 50 additions & 0 deletions common/cuda_hip/matrix/batch_csr_kernel_launcher.hpp.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// SPDX-FileCopyrightText: 2017-2023 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

template <typename ValueType, typename IndexType>
void simple_apply(std::shared_ptr<const DefaultExecutor> exec,
const batch::matrix::Csr<ValueType, IndexType>* mat,
const batch::MultiVector<ValueType>* b,
batch::MultiVector<ValueType>* x)
{
const auto num_blocks = mat->get_num_batch_items();
const auto b_ub = get_batch_struct(b);
const auto x_ub = get_batch_struct(x);
const auto mat_ub = get_batch_struct(mat);
if (b->get_common_size()[1] > 1) {
GKO_NOT_IMPLEMENTED;
}
simple_apply_kernel<<<num_blocks, default_block_size, 0,
exec->get_stream()>>>(mat_ub, b_ub, x_ub);
}


GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
GKO_DECLARE_BATCH_CSR_SIMPLE_APPLY_KERNEL);


template <typename ValueType, typename IndexType>
void advanced_apply(std::shared_ptr<const DefaultExecutor> exec,
const batch::MultiVector<ValueType>* alpha,
const batch::matrix::Csr<ValueType, IndexType>* mat,
const batch::MultiVector<ValueType>* b,
const batch::MultiVector<ValueType>* beta,
batch::MultiVector<ValueType>* x)
{
const auto num_blocks = mat->get_num_batch_items();
const auto b_ub = get_batch_struct(b);
const auto x_ub = get_batch_struct(x);
const auto mat_ub = get_batch_struct(mat);
const auto alpha_ub = get_batch_struct(alpha);
const auto beta_ub = get_batch_struct(beta);
if (b->get_common_size()[1] > 1) {
GKO_NOT_IMPLEMENTED;
}
advanced_apply_kernel<<<num_blocks, default_block_size, 0,
exec->get_stream()>>>(alpha_ub, mat_ub, b_ub,
beta_ub, x_ub);
}

GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE(
GKO_DECLARE_BATCH_CSR_ADVANCED_APPLY_KERNEL);
113 changes: 113 additions & 0 deletions common/cuda_hip/matrix/batch_csr_kernels.hpp.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
// SPDX-FileCopyrightText: 2017-2023 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

template <typename ValueType, typename IndexType>
__device__ __forceinline__ void simple_apply(
const gko::batch::matrix::csr::batch_item<const ValueType, IndexType>& mat,
const ValueType* const __restrict__ b, ValueType* const __restrict__ x)
{
const auto num_rows = mat.num_rows;
const auto val = mat.values;
const auto col = mat.col_idxs;
for (int row = threadIdx.x; row < num_rows; row += blockDim.x) {
auto temp = zero<ValueType>();
for (auto nnz = mat.row_ptrs[row]; nnz < mat.row_ptrs[row + 1]; nnz++) {
const auto col_idx = col[nnz];
temp += val[nnz] * b[col_idx];
}
x[row] = temp;
}
}

template <typename ValueType, typename IndexType>
__global__ __launch_bounds__(
default_block_size,
sm_oversubscription) void simple_apply_kernel(const gko::batch::matrix::
csr::uniform_batch<
const ValueType,
IndexType>
mat,
const gko::batch::
multi_vector::
uniform_batch<
const ValueType>
b,
const gko::batch::
multi_vector::
uniform_batch<
ValueType>
x)
{
for (size_type batch_id = blockIdx.x; batch_id < mat.num_batch_items;
batch_id += gridDim.x) {
const auto mat_b =
gko::batch::matrix::extract_batch_item(mat, batch_id);
const auto b_b = gko::batch::extract_batch_item(b, batch_id);
const auto x_b = gko::batch::extract_batch_item(x, batch_id);
simple_apply(mat_b, b_b.values, x_b.values);
}
}


template <typename ValueType, typename IndexType>
__device__ __forceinline__ void advanced_apply(
const ValueType alpha,
const gko::batch::matrix::csr::batch_item<const ValueType, IndexType>& mat,
const ValueType* const __restrict__ b, const ValueType beta,
ValueType* const __restrict__ x)
{
const auto num_rows = mat.num_rows;
const auto val = mat.values;
const auto col = mat.col_idxs;
for (int row = threadIdx.x; row < num_rows; row += blockDim.x) {
auto temp = zero<ValueType>();
for (auto nnz = mat.row_ptrs[row]; nnz < mat.row_ptrs[row + 1]; nnz++) {
const auto col_idx = col[nnz];
temp += alpha * val[nnz] * b[col_idx];
}
x[row] = temp + beta * x[row];
}
}

template <typename ValueType, typename IndexType>
__global__ __launch_bounds__(
default_block_size,
sm_oversubscription) void advanced_apply_kernel(const gko::batch::
multi_vector::
uniform_batch<
const ValueType>
alpha,
const gko::batch::matrix::
csr::uniform_batch<
const ValueType,
IndexType>
mat,
const gko::batch::
multi_vector::
uniform_batch<
const ValueType>
b,
const gko::batch::
multi_vector::
uniform_batch<
const ValueType>
beta,
const gko::batch::
multi_vector::
uniform_batch<
ValueType>
x)
{
for (size_type batch_id = blockIdx.x; batch_id < mat.num_batch_items;
batch_id += gridDim.x) {
const auto mat_b =
gko::batch::matrix::extract_batch_item(mat, batch_id);
const auto b_b = gko::batch::extract_batch_item(b, batch_id);
const auto x_b = gko::batch::extract_batch_item(x, batch_id);
const auto alpha_b = gko::batch::extract_batch_item(alpha, batch_id);
const auto beta_b = gko::batch::extract_batch_item(beta, batch_id);
advanced_apply(alpha_b.values[0], mat_b, b_b.values, beta_b.values[0],
x_b.values);
}
}
1 change: 1 addition & 0 deletions core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ target_sources(ginkgo
log/vtune.cpp
log/record.cpp
log/stream.cpp
matrix/batch_csr.cpp
matrix/batch_dense.cpp
matrix/batch_ell.cpp
matrix/batch_identity.cpp
Expand Down
11 changes: 11 additions & 0 deletions core/device_hooks/common_kernels.inc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include "core/factorization/par_ict_kernels.hpp"
#include "core/factorization/par_ilu_kernels.hpp"
#include "core/factorization/par_ilut_kernels.hpp"
#include "core/matrix/batch_csr_kernels.hpp"
#include "core/matrix/batch_dense_kernels.hpp"
#include "core/matrix/batch_ell_kernels.hpp"
#include "core/matrix/coo_kernels.hpp"
Expand Down Expand Up @@ -281,6 +282,16 @@ GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL);
} // namespace batch_multi_vector


namespace batch_csr {


GKO_STUB_VALUE_AND_INT32_TYPE(GKO_DECLARE_BATCH_CSR_SIMPLE_APPLY_KERNEL);
GKO_STUB_VALUE_AND_INT32_TYPE(GKO_DECLARE_BATCH_CSR_ADVANCED_APPLY_KERNEL);


} // namespace batch_csr


namespace batch_dense {


Expand Down
Loading

0 comments on commit 830c289

Please sign in to comment.