diff --git a/common/cuda_hip/matrix/batch_csr_kernel_launcher.hpp.inc b/common/cuda_hip/matrix/batch_csr_kernel_launcher.hpp.inc new file mode 100644 index 00000000000..9091fb3f040 --- /dev/null +++ b/common/cuda_hip/matrix/batch_csr_kernel_launcher.hpp.inc @@ -0,0 +1,50 @@ +// SPDX-FileCopyrightText: 2017-2023 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +template +void simple_apply(std::shared_ptr exec, + const batch::matrix::Csr* mat, + const batch::MultiVector* b, + batch::MultiVector* x) +{ + const auto num_blocks = mat->get_num_batch_items(); + const auto b_ub = get_batch_struct(b); + const auto x_ub = get_batch_struct(x); + const auto mat_ub = get_batch_struct(mat); + if (b->get_common_size()[1] > 1) { + GKO_NOT_IMPLEMENTED; + } + simple_apply_kernel<<get_stream()>>>(mat_ub, b_ub, x_ub); +} + + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE( + GKO_DECLARE_BATCH_CSR_SIMPLE_APPLY_KERNEL); + + +template +void advanced_apply(std::shared_ptr exec, + const batch::MultiVector* alpha, + const batch::matrix::Csr* mat, + const batch::MultiVector* b, + const batch::MultiVector* beta, + batch::MultiVector* x) +{ + const auto num_blocks = mat->get_num_batch_items(); + const auto b_ub = get_batch_struct(b); + const auto x_ub = get_batch_struct(x); + const auto mat_ub = get_batch_struct(mat); + const auto alpha_ub = get_batch_struct(alpha); + const auto beta_ub = get_batch_struct(beta); + if (b->get_common_size()[1] > 1) { + GKO_NOT_IMPLEMENTED; + } + advanced_apply_kernel<<get_stream()>>>(alpha_ub, mat_ub, b_ub, + beta_ub, x_ub); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE( + GKO_DECLARE_BATCH_CSR_ADVANCED_APPLY_KERNEL); diff --git a/common/cuda_hip/matrix/batch_csr_kernels.hpp.inc b/common/cuda_hip/matrix/batch_csr_kernels.hpp.inc new file mode 100644 index 00000000000..cd025d04436 --- /dev/null +++ b/common/cuda_hip/matrix/batch_csr_kernels.hpp.inc @@ -0,0 +1,113 @@ +// SPDX-FileCopyrightText: 2017-2023 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +template +__device__ __forceinline__ void simple_apply( + const gko::batch::matrix::csr::batch_item& mat, + const ValueType* const __restrict__ b, ValueType* const __restrict__ x) +{ + const auto num_rows = mat.num_rows; + const auto val = mat.values; + const auto col = mat.col_idxs; + for (int row = threadIdx.x; row < num_rows; row += blockDim.x) { + auto temp = zero(); + for (auto nnz = mat.row_ptrs[row]; nnz < mat.row_ptrs[row + 1]; nnz++) { + const auto col_idx = col[nnz]; + temp += val[nnz] * b[col_idx]; + } + x[row] = temp; + } +} + +template +__global__ __launch_bounds__( + default_block_size, + sm_oversubscription) void simple_apply_kernel(const gko::batch::matrix:: + csr::uniform_batch< + const ValueType, + IndexType> + mat, + const gko::batch:: + multi_vector:: + uniform_batch< + const ValueType> + b, + const gko::batch:: + multi_vector:: + uniform_batch< + ValueType> + x) +{ + for (size_type batch_id = blockIdx.x; batch_id < mat.num_batch_items; + batch_id += gridDim.x) { + const auto mat_b = + gko::batch::matrix::extract_batch_item(mat, batch_id); + const auto b_b = gko::batch::extract_batch_item(b, batch_id); + const auto x_b = gko::batch::extract_batch_item(x, batch_id); + simple_apply(mat_b, b_b.values, x_b.values); + } +} + + +template +__device__ __forceinline__ void advanced_apply( + const ValueType alpha, + const gko::batch::matrix::csr::batch_item& mat, + const ValueType* const __restrict__ b, const ValueType beta, + ValueType* const __restrict__ x) +{ + const auto num_rows = mat.num_rows; + const auto val = mat.values; + const auto col = mat.col_idxs; + for (int row = threadIdx.x; row < num_rows; row += blockDim.x) { + auto temp = zero(); + for (auto nnz = mat.row_ptrs[row]; nnz < mat.row_ptrs[row + 1]; nnz++) { + const auto col_idx = col[nnz]; + temp += alpha * val[nnz] * b[col_idx]; + } + x[row] = temp + beta * x[row]; + } +} + +template +__global__ __launch_bounds__( + default_block_size, + sm_oversubscription) void advanced_apply_kernel(const gko::batch:: + multi_vector:: + uniform_batch< + const ValueType> + alpha, + const gko::batch::matrix:: + csr::uniform_batch< + const ValueType, + IndexType> + mat, + const gko::batch:: + multi_vector:: + uniform_batch< + const ValueType> + b, + const gko::batch:: + multi_vector:: + uniform_batch< + const ValueType> + beta, + const gko::batch:: + multi_vector:: + uniform_batch< + ValueType> + x) +{ + for (size_type batch_id = blockIdx.x; batch_id < mat.num_batch_items; + batch_id += gridDim.x) { + const auto mat_b = + gko::batch::matrix::extract_batch_item(mat, batch_id); + const auto b_b = gko::batch::extract_batch_item(b, batch_id); + const auto x_b = gko::batch::extract_batch_item(x, batch_id); + const auto alpha_b = gko::batch::extract_batch_item(alpha, batch_id); + const auto beta_b = gko::batch::extract_batch_item(beta, batch_id); + advanced_apply(alpha_b.values[0], mat_b, b_b.values, beta_b.values[0], + x_b.values); + } +} diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index e74178d2588..21f188ccc5f 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -40,6 +40,7 @@ target_sources(ginkgo log/vtune.cpp log/record.cpp log/stream.cpp + matrix/batch_csr.cpp matrix/batch_dense.cpp matrix/batch_ell.cpp matrix/batch_identity.cpp diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp index 35beec7b3f3..ef79a0978b7 100644 --- a/core/device_hooks/common_kernels.inc.cpp +++ b/core/device_hooks/common_kernels.inc.cpp @@ -29,6 +29,7 @@ #include "core/factorization/par_ict_kernels.hpp" #include "core/factorization/par_ilu_kernels.hpp" #include "core/factorization/par_ilut_kernels.hpp" +#include "core/matrix/batch_csr_kernels.hpp" #include "core/matrix/batch_dense_kernels.hpp" #include "core/matrix/batch_ell_kernels.hpp" #include "core/matrix/coo_kernels.hpp" @@ -281,6 +282,16 @@ GKO_STUB_VALUE_TYPE(GKO_DECLARE_BATCH_MULTI_VECTOR_COPY_KERNEL); } // namespace batch_multi_vector +namespace batch_csr { + + +GKO_STUB_VALUE_AND_INT32_TYPE(GKO_DECLARE_BATCH_CSR_SIMPLE_APPLY_KERNEL); +GKO_STUB_VALUE_AND_INT32_TYPE(GKO_DECLARE_BATCH_CSR_ADVANCED_APPLY_KERNEL); + + +} // namespace batch_csr + + namespace batch_dense { diff --git a/core/matrix/batch_csr.cpp b/core/matrix/batch_csr.cpp new file mode 100644 index 00000000000..d98b05fadea --- /dev/null +++ b/core/matrix/batch_csr.cpp @@ -0,0 +1,209 @@ +// SPDX-FileCopyrightText: 2017-2023 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#include + + +#include +#include + + +#include +#include +#include +#include +#include +#include +#include + + +#include "core/matrix/batch_csr_kernels.hpp" + + +namespace gko { +namespace batch { +namespace matrix { +namespace csr { +namespace { + + +GKO_REGISTER_OPERATION(simple_apply, batch_csr::simple_apply); +GKO_REGISTER_OPERATION(advanced_apply, batch_csr::advanced_apply); + + +} // namespace +} // namespace csr + + +template +std::unique_ptr> +Csr::create_view_for_item(size_type item_id) +{ + auto exec = this->get_executor(); + auto num_rows = this->get_common_size()[0]; + auto mat = unbatch_type::create( + exec, this->get_common_size(), + make_array_view(exec, this->get_num_elements_per_item(), + this->get_values_for_item(item_id)), + make_array_view(exec, this->get_num_elements_per_item(), + this->get_col_idxs()), + make_array_view(exec, num_rows + 1, this->get_row_ptrs())); + return mat; +} + + +template +std::unique_ptr> +Csr::create_const_view_for_item(size_type item_id) const +{ + auto exec = this->get_executor(); + auto num_rows = this->get_common_size()[0]; + auto mat = unbatch_type::create_const( + exec, this->get_common_size(), + make_const_array_view(exec, this->get_num_elements_per_item(), + this->get_const_values_for_item(item_id)), + make_const_array_view(exec, this->get_num_elements_per_item(), + this->get_const_col_idxs()), + make_const_array_view(exec, num_rows + 1, this->get_const_row_ptrs())); + return mat; +} + + +template +std::unique_ptr> +Csr::create_const( + std::shared_ptr exec, const batch_dim<2>& sizes, + gko::detail::const_array_view&& values, + gko::detail::const_array_view&& col_idxs, + gko::detail::const_array_view&& row_ptrs) +{ + // cast const-ness away, but return a const object afterwards, + // so we can ensure that no modifications take place. + return std::unique_ptr( + new Csr{exec, sizes, gko::detail::array_const_cast(std::move(values)), + gko::detail::array_const_cast(std::move(col_idxs)), + gko::detail::array_const_cast(std::move(row_ptrs))}); +} + + +template +Csr::Csr(std::shared_ptr exec, + const batch_dim<2>& size, + size_type num_nnz_per_item) + : EnableBatchLinOp>(exec, size), + values_(exec, num_nnz_per_item * size.get_num_batch_items()), + col_idxs_(exec, num_nnz_per_item), + row_ptrs_(exec, size.get_common_size()[0] + 1) +{ + row_ptrs_.fill(0); +} + + +template +Csr* Csr::apply( + ptr_param> b, + ptr_param> x) +{ + this->validate_application_parameters(b.get(), x.get()); + auto exec = this->get_executor(); + this->apply_impl(make_temporary_clone(exec, b).get(), + make_temporary_clone(exec, x).get()); + return this; +} + + +template +const Csr* Csr::apply( + ptr_param> b, + ptr_param> x) const +{ + this->validate_application_parameters(b.get(), x.get()); + auto exec = this->get_executor(); + this->apply_impl(make_temporary_clone(exec, b).get(), + make_temporary_clone(exec, x).get()); + return this; +} + + +template +Csr* Csr::apply( + ptr_param> alpha, + ptr_param> b, + ptr_param> beta, + ptr_param> x) +{ + this->validate_application_parameters(alpha.get(), b.get(), beta.get(), + x.get()); + auto exec = this->get_executor(); + this->apply_impl(make_temporary_clone(exec, alpha).get(), + make_temporary_clone(exec, b).get(), + make_temporary_clone(exec, beta).get(), + make_temporary_clone(exec, x).get()); + return this; +} + + +template +const Csr* Csr::apply( + ptr_param> alpha, + ptr_param> b, + ptr_param> beta, + ptr_param> x) const +{ + this->validate_application_parameters(alpha.get(), b.get(), beta.get(), + x.get()); + auto exec = this->get_executor(); + this->apply_impl(make_temporary_clone(exec, alpha).get(), + make_temporary_clone(exec, b).get(), + make_temporary_clone(exec, beta).get(), + make_temporary_clone(exec, x).get()); + return this; +} + + +template +void Csr::apply_impl(const MultiVector* b, + MultiVector* x) const +{ + this->get_executor()->run(csr::make_simple_apply(this, b, x)); +} + + +template +void Csr::apply_impl(const MultiVector* alpha, + const MultiVector* b, + const MultiVector* beta, + MultiVector* x) const +{ + this->get_executor()->run( + csr::make_advanced_apply(alpha, this, b, beta, x)); +} + + +template +void Csr::convert_to( + Csr, IndexType>* result) const +{ + result->values_ = this->values_; + result->col_idxs_ = this->col_idxs_; + result->row_ptrs_ = this->row_ptrs_; + result->set_size(this->get_size()); +} + + +template +void Csr::move_to( + Csr, IndexType>* result) +{ + this->convert_to(result); +} + + +#define GKO_DECLARE_BATCH_CSR_MATRIX(ValueType) class Csr +GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_BATCH_CSR_MATRIX); + + +} // namespace matrix +} // namespace batch +} // namespace gko diff --git a/core/matrix/batch_csr_kernels.hpp b/core/matrix/batch_csr_kernels.hpp new file mode 100644 index 00000000000..9b3b07e477f --- /dev/null +++ b/core/matrix/batch_csr_kernels.hpp @@ -0,0 +1,56 @@ +// SPDX-FileCopyrightText: 2017-2023 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#ifndef GKO_CORE_MATRIX_BATCH_CSR_KERNELS_HPP_ +#define GKO_CORE_MATRIX_BATCH_CSR_KERNELS_HPP_ + + +#include + + +#include +#include +#include + + +#include "core/base/kernel_declaration.hpp" + + +namespace gko { +namespace kernels { + + +#define GKO_DECLARE_BATCH_CSR_SIMPLE_APPLY_KERNEL(_vtype, _itype) \ + void simple_apply(std::shared_ptr exec, \ + const batch::matrix::Csr<_vtype, _itype>* a, \ + const batch::MultiVector<_vtype>* b, \ + batch::MultiVector<_vtype>* c) + +#define GKO_DECLARE_BATCH_CSR_ADVANCED_APPLY_KERNEL(_vtype, _itype) \ + void advanced_apply(std::shared_ptr exec, \ + const batch::MultiVector<_vtype>* alpha, \ + const batch::matrix::Csr<_vtype, _itype>* a, \ + const batch::MultiVector<_vtype>* b, \ + const batch::MultiVector<_vtype>* beta, \ + batch::MultiVector<_vtype>* c) + +#define GKO_DECLARE_ALL_AS_TEMPLATES \ + template \ + GKO_DECLARE_BATCH_CSR_SIMPLE_APPLY_KERNEL(ValueType, IndexType); \ + template \ + GKO_DECLARE_BATCH_CSR_ADVANCED_APPLY_KERNEL(ValueType, IndexType) + + +GKO_DECLARE_FOR_ALL_EXECUTOR_NAMESPACES(batch_csr, + GKO_DECLARE_ALL_AS_TEMPLATES); + + +#undef GKO_DECLARE_ALL_AS_TEMPLATES + + +} // namespace kernels +} // namespace gko + + +#endif // GKO_CORE_MATRIX_BATCH_CSR_KERNELS_HPP_ diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp index f0771f0d316..a1136b0fda4 100644 --- a/core/matrix/batch_struct.hpp +++ b/core/matrix/batch_struct.hpp @@ -8,6 +8,7 @@ #include #include +#include #include #include @@ -15,6 +16,52 @@ namespace gko { namespace batch { namespace matrix { +namespace csr { + + +/** + * Encapsulates one matrix from a batch of csr matrices. + */ +template +struct batch_item { + using value_type = ValueType; + using index_type = IndexType; + + ValueType* values; + const index_type* col_idxs; + const index_type* row_ptrs; + index_type num_rows; + index_type num_cols; +}; + + +/** + * A 'simple' structure to store a global uniform batch of csr matrices. + */ +template +struct uniform_batch { + using value_type = ValueType; + using index_type = IndexType; + using entry_type = batch_item; + + ValueType* values; + const index_type* col_idxs; + const index_type* row_ptrs; + size_type num_batch_items; + index_type num_rows; + index_type num_cols; + index_type num_nnz_per_item; + + inline size_type get_single_item_num_nnz() const + { + return static_cast(num_nnz_per_item); + } +}; + + +} // namespace csr + + namespace dense { @@ -102,6 +149,45 @@ struct uniform_batch { } // namespace ell +template +GKO_ATTRIBUTES GKO_INLINE csr::batch_item +to_const(const csr::batch_item& b) +{ + return {b.values, b.col_idxs, b.row_ptrs, b.num_rows, b.num_cols}; +} + + +template +GKO_ATTRIBUTES GKO_INLINE csr::uniform_batch +to_const(const csr::uniform_batch& ub) +{ + return {ub.values, ub.col_idxs, ub.row_ptrs, ub.num_batch_items, + ub.num_rows, ub.num_cols, ub.num_nnz_per_item}; +} + + +template +GKO_ATTRIBUTES GKO_INLINE csr::batch_item +extract_batch_item(const csr::uniform_batch& batch, + const size_type batch_idx) +{ + return {batch.values + batch_idx * batch.num_nnz_per_item, batch.col_idxs, + batch.row_ptrs, batch.num_rows, batch.num_cols}; +} + +template +GKO_ATTRIBUTES GKO_INLINE csr::batch_item +extract_batch_item(ValueType* const batch_values, + IndexType* const batch_col_idxs, + IndexType* const batch_row_ptrs, const int num_rows, + const int num_cols, int num_nnz_per_item, + const size_type batch_idx) +{ + return {batch_values + batch_idx * num_nnz_per_item, batch_col_idxs, + batch_row_ptrs, num_rows, num_cols}; +} + + template GKO_ATTRIBUTES GKO_INLINE dense::batch_item to_const( const dense::batch_item& b) diff --git a/core/solver/batch_dispatch.hpp b/core/solver/batch_dispatch.hpp index b29ae688c7c..0621cabba79 100644 --- a/core/solver/batch_dispatch.hpp +++ b/core/solver/batch_dispatch.hpp @@ -261,6 +261,10 @@ class batch_solver_dispatch { mat_)) { auto mat_item = device::get_batch_struct(batch_mat); dispatch_on_logger(mat_item, b_item, x_item, log_data); + } else if (auto batch_mat = dynamic_cast< + const batch::matrix::Csr*>(mat_)) { + auto mat_item = device::get_batch_struct(batch_mat); + dispatch_on_logger(mat_item, b_item, x_item, log_data); } else { GKO_NOT_SUPPORTED(mat_); } diff --git a/core/test/matrix/CMakeLists.txt b/core/test/matrix/CMakeLists.txt index 43d3b74ff0f..da1178425ff 100644 --- a/core/test/matrix/CMakeLists.txt +++ b/core/test/matrix/CMakeLists.txt @@ -1,3 +1,4 @@ +ginkgo_create_test(batch_csr) ginkgo_create_test(batch_dense) ginkgo_create_test(batch_ell) ginkgo_create_test(batch_identity) diff --git a/core/test/matrix/batch_csr.cpp b/core/test/matrix/batch_csr.cpp new file mode 100644 index 00000000000..41e066744f4 --- /dev/null +++ b/core/test/matrix/batch_csr.cpp @@ -0,0 +1,470 @@ +// SPDX-FileCopyrightText: 2017-2023 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#include + + +#include + + +#include +#include +#include + + +#include "core/base/batch_utilities.hpp" +#include "core/test/utils.hpp" +#include "core/test/utils/batch_helpers.hpp" + + +template +class Csr : public ::testing::Test { +protected: + using value_type = T; + using index_type = gko::int32; + using BatchCsrMtx = gko::batch::matrix::Csr; + using CsrMtx = gko::matrix::Csr; + using size_type = gko::size_type; + Csr() + : exec(gko::ReferenceExecutor::create()), + mtx(gko::batch::initialize( + // clang-format off + {{{-1.0, 2.0, 3.0}, + {-1.5, 2.5, 3.5}}, + {{1.0, 2.5, 3.0}, + {1.0, 2.0, 3.0}}}, + // clang-format on + exec, 6)), + sp_mtx(gko::batch::initialize( + // clang-format off + {{{-1.0, 0.0, 0.0}, + {0.0, 2.5, 3.5}}, + {{1.0, 0.0, 0.0}, + {0.0, 2.0, 3.0}}}, + // clang-format on + exec, 3)), + csr_mtx(gko::initialize( + // clang-format off + {{1.0, 2.5, 3.0}, + {1.0, 2.0, 3.0}}, + // clang-format on + exec, gko::dim<2>(2, 3))), + sp_csr_mtx(gko::initialize( + // clang-format off + {{1.0, 0.0, 0.0}, + {0.0, 2.0, 3.0}}, + // clang-format on + exec, gko::dim<2>(2, 3))) + {} + + static void assert_equal_to_original_sparse_mtx(const BatchCsrMtx* m) + { + ASSERT_EQ(m->get_num_batch_items(), 2); + ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3)); + ASSERT_EQ(m->get_num_stored_elements(), 2 * 3); + EXPECT_EQ(m->get_const_values()[0], value_type{-1.0}); + EXPECT_EQ(m->get_const_values()[1], value_type{2.5}); + EXPECT_EQ(m->get_const_values()[2], value_type{3.5}); + EXPECT_EQ(m->get_const_values()[3], value_type{1.0}); + EXPECT_EQ(m->get_const_values()[4], value_type{2.0}); + EXPECT_EQ(m->get_const_values()[5], value_type{3.0}); + EXPECT_EQ(m->get_const_col_idxs()[0], index_type{0}); + EXPECT_EQ(m->get_const_col_idxs()[1], index_type{1}); + EXPECT_EQ(m->get_const_col_idxs()[2], index_type{2}); + EXPECT_EQ(m->get_const_row_ptrs()[0], index_type{0}); + EXPECT_EQ(m->get_const_row_ptrs()[1], index_type{1}); + EXPECT_EQ(m->get_const_row_ptrs()[2], index_type{3}); + } + + static void assert_equal_to_original_mtx(const BatchCsrMtx* m) + { + ASSERT_EQ(m->get_num_batch_items(), 2); + ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 3)); + ASSERT_EQ(m->get_num_stored_elements(), 2 * 6); + EXPECT_EQ(m->get_const_values()[0], value_type{-1.0}); + EXPECT_EQ(m->get_const_values()[1], value_type{2.0}); + EXPECT_EQ(m->get_const_values()[2], value_type{3.0}); + EXPECT_EQ(m->get_const_values()[3], value_type{-1.5}); + EXPECT_EQ(m->get_const_values()[4], value_type{2.5}); + EXPECT_EQ(m->get_const_values()[5], value_type{3.5}); + EXPECT_EQ(m->get_const_values()[6], value_type{1.0}); + EXPECT_EQ(m->get_const_values()[7], value_type{2.5}); + EXPECT_EQ(m->get_const_values()[8], value_type{3.0}); + EXPECT_EQ(m->get_const_values()[9], value_type{1.0}); + EXPECT_EQ(m->get_const_values()[10], value_type{2.0}); + EXPECT_EQ(m->get_const_values()[11], value_type{3.0}); + EXPECT_EQ(m->get_const_col_idxs()[0], index_type{0}); + EXPECT_EQ(m->get_const_col_idxs()[1], index_type{1}); + EXPECT_EQ(m->get_const_col_idxs()[2], index_type{2}); + EXPECT_EQ(m->get_const_col_idxs()[3], index_type{0}); + EXPECT_EQ(m->get_const_col_idxs()[4], index_type{1}); + EXPECT_EQ(m->get_const_col_idxs()[5], index_type{2}); + EXPECT_EQ(m->get_const_row_ptrs()[0], index_type{0}); + EXPECT_EQ(m->get_const_row_ptrs()[1], index_type{3}); + EXPECT_EQ(m->get_const_row_ptrs()[2], index_type{6}); + } + + static void assert_empty(BatchCsrMtx* m) + { + ASSERT_EQ(m->get_num_batch_items(), 0); + ASSERT_EQ(m->get_num_stored_elements(), 0); + } + + std::shared_ptr exec; + std::unique_ptr mtx; + std::unique_ptr sp_mtx; + std::unique_ptr csr_mtx; + std::unique_ptr sp_csr_mtx; +}; + +TYPED_TEST_SUITE(Csr, gko::test::ValueTypes, TypenameNameGenerator); + + +TYPED_TEST(Csr, KnowsItsSizeAndValues) +{ + this->assert_equal_to_original_mtx(this->mtx.get()); +} + + +TYPED_TEST(Csr, SparseMtxKnowsItsSizeAndValues) +{ + this->assert_equal_to_original_sparse_mtx(this->sp_mtx.get()); +} + + +TYPED_TEST(Csr, CanBeEmpty) +{ + using BatchCsrMtx = typename TestFixture::BatchCsrMtx; + + auto empty = BatchCsrMtx::create(this->exec); + + this->assert_empty(empty.get()); + ASSERT_EQ(empty->get_const_values(), nullptr); +} + + +TYPED_TEST(Csr, CanGetValuesForEntry) +{ + using value_type = typename TestFixture::value_type; + + ASSERT_EQ(this->mtx->get_values_for_item(1)[0], value_type{1.0}); +} + + +TYPED_TEST(Csr, CanCreateCsrItemView) +{ + GKO_ASSERT_MTX_NEAR(this->mtx->create_view_for_item(1), this->csr_mtx, 0.0); +} + + +TYPED_TEST(Csr, CanCreateSpCsrItemView) +{ + GKO_ASSERT_MTX_NEAR(this->sp_mtx->create_view_for_item(1), this->sp_csr_mtx, + 0.0); +} + + +TYPED_TEST(Csr, CanBeCopied) +{ + using BatchCsrMtx = typename TestFixture::BatchCsrMtx; + + auto mtx_copy = BatchCsrMtx::create(this->exec); + + mtx_copy->copy_from(this->mtx.get()); + + this->assert_equal_to_original_mtx(this->mtx.get()); + this->mtx->get_values()[0] = 7; + this->assert_equal_to_original_mtx(mtx_copy.get()); +} + + +TYPED_TEST(Csr, CanBeMoved) +{ + using BatchCsrMtx = typename TestFixture::BatchCsrMtx; + + auto mtx_copy = BatchCsrMtx::create(this->exec); + + this->mtx->move_to(mtx_copy); + + this->assert_equal_to_original_mtx(mtx_copy.get()); +} + + +TYPED_TEST(Csr, CanBeCloned) +{ + auto mtx_clone = this->mtx->clone(); + + this->assert_equal_to_original_mtx( + dynamic_castmtx.get())>(mtx_clone.get())); +} + + +TYPED_TEST(Csr, CanBeCleared) +{ + this->mtx->clear(); + + this->assert_empty(this->mtx.get()); +} + + +TYPED_TEST(Csr, CanBeConstructedWithSize) +{ + using BatchCsrMtx = typename TestFixture::BatchCsrMtx; + + auto m = BatchCsrMtx::create(this->exec, + gko::batch_dim<2>(2, gko::dim<2>{5, 3})); + + ASSERT_EQ(m->get_num_batch_items(), 2); + ASSERT_EQ(m->get_common_size(), gko::dim<2>(5, 3)); + ASSERT_EQ(m->get_num_stored_elements(), 0); +} + + +TYPED_TEST(Csr, CanBeConstructedWithSizeAndNnz) +{ + using BatchCsrMtx = typename TestFixture::BatchCsrMtx; + + auto m = BatchCsrMtx::create(this->exec, + gko::batch_dim<2>(2, gko::dim<2>{5, 3}), 5); + + ASSERT_EQ(m->get_num_batch_items(), 2); + ASSERT_EQ(m->get_common_size(), gko::dim<2>(5, 3)); + ASSERT_EQ(m->get_num_stored_elements(), 10); +} + + +TYPED_TEST(Csr, CanBeConstructedFromExistingData) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using BatchCsrMtx = typename TestFixture::BatchCsrMtx; + value_type values[] = {-1.0, 2.5, 3.5, 1.0, 2.0, 3.0}; + index_type col_idxs[] = {0, 1, 2}; + index_type row_ptrs[] = {0, 1, 3}; + + auto m = BatchCsrMtx::create( + this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 3)), + gko::array::view(this->exec, 6, values), + gko::array::view(this->exec, 3, col_idxs), + gko::array::view(this->exec, 3, row_ptrs)); + + this->assert_equal_to_original_sparse_mtx(m.get()); +} + + +TYPED_TEST(Csr, CanBeConstructedFromExistingConstData) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using BatchCsrMtx = typename TestFixture::BatchCsrMtx; + value_type values[] = {-1.0, 2.5, 3.5, 1.0, 2.0, 3.0}; + index_type col_idxs[] = {0, 1, 2}; + index_type row_ptrs[] = {0, 1, 3}; + + auto m = BatchCsrMtx::create_const( + this->exec, gko::batch_dim<2>(2, gko::dim<2>(2, 3)), + gko::array::const_view(this->exec, 6, values), + gko::array::const_view(this->exec, 3, col_idxs), + gko::array::const_view(this->exec, 3, row_ptrs)); + + this->assert_equal_to_original_sparse_mtx(m.get()); +} + + +TYPED_TEST(Csr, CanBeConstructedFromCsrMatrices) +{ + using BatchCsrMtx = typename TestFixture::BatchCsrMtx; + using CsrMtx = typename TestFixture::CsrMtx; + auto mat1 = gko::initialize({{-1.0, 0.0, 0.0}, {0.0, 2.5, 3.5}}, + this->exec); + auto mat2 = + gko::initialize({{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}}, this->exec); + + auto m = gko::batch::create_from_item( + this->exec, std::vector{mat1.get(), mat2.get()}, 3); + + this->assert_equal_to_original_sparse_mtx(m.get()); +} + + +TYPED_TEST(Csr, CanBeConstructedFromCsrMatricesByDuplication) +{ + using BatchCsrMtx = typename TestFixture::BatchCsrMtx; + using CsrMtx = typename TestFixture::CsrMtx; + auto mat1 = + gko::initialize({{1.0, 0.0, 0.0}, {0.0, 2.0, 0.0}}, this->exec); + auto bat_m = gko::batch::create_from_item( + this->exec, std::vector{mat1.get(), mat1.get(), mat1.get()}, + 2); + + auto m = + gko::batch::create_from_item(this->exec, 3, mat1.get(), 2); + + GKO_ASSERT_BATCH_MTX_NEAR(bat_m.get(), m.get(), 0.); +} + + +TYPED_TEST(Csr, CanBeConstructedByDuplicatingCsrMatrices) +{ + using BatchCsrMtx = typename TestFixture::BatchCsrMtx; + using CsrMtx = typename TestFixture::CsrMtx; + auto mat1 = gko::initialize({{-1.0, 0.0, 0.0}, {0.0, 2.5, 0.0}}, + this->exec); + auto mat2 = + gko::initialize({{1.0, 0.0, 0.0}, {0.0, 2.0, 0.0}}, this->exec); + + auto m = gko::batch::create_from_item( + this->exec, std::vector{mat1.get(), mat2.get()}, 2); + auto m_ref = gko::batch::create_from_item( + this->exec, + std::vector{mat1.get(), mat2.get(), mat1.get(), mat2.get(), + mat1.get(), mat2.get()}, + 2); + + auto m2 = gko::batch::duplicate(this->exec, 3, m.get(), 2); + + GKO_ASSERT_BATCH_MTX_NEAR(m2.get(), m_ref.get(), 0.); +} + + +TYPED_TEST(Csr, CanBeUnbatchedIntoCsrMatrices) +{ + using BatchCsrMtx = typename TestFixture::BatchCsrMtx; + using CsrMtx = typename TestFixture::CsrMtx; + auto mat1 = gko::initialize({{-1.0, 0.0, 0.0}, {0.0, 2.5, 3.5}}, + this->exec); + auto mat2 = + gko::initialize({{1.0, 0.0, 0.0}, {0.0, 2.0, 3.0}}, this->exec); + + auto csr_mats = gko::batch::unbatch(this->sp_mtx.get()); + + GKO_ASSERT_MTX_NEAR(csr_mats[0].get(), mat1.get(), 0.); + GKO_ASSERT_MTX_NEAR(csr_mats[1].get(), mat2.get(), 0.); +} + + +TYPED_TEST(Csr, CanBeListConstructed) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using BatchCsrMtx = typename TestFixture::BatchCsrMtx; + + auto m = gko::batch::initialize({{0.0, -1.0}, {0.0, -5.0}}, + this->exec, 1); + + ASSERT_EQ(m->get_num_batch_items(), 2); + ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1)); + ASSERT_EQ(m->get_num_stored_elements(), 2); + EXPECT_EQ(m->get_values()[0], value_type{-1.0}); + EXPECT_EQ(m->get_values()[1], value_type{-5.0}); + EXPECT_EQ(m->get_col_idxs()[0], index_type{0}); + EXPECT_EQ(m->get_row_ptrs()[0], index_type{0}); + EXPECT_EQ(m->get_row_ptrs()[1], index_type{0}); + EXPECT_EQ(m->get_row_ptrs()[2], index_type{1}); +} + + +TYPED_TEST(Csr, CanBeListConstructedByCopies) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using BatchCsrMtx = typename TestFixture::BatchCsrMtx; + + auto m = gko::batch::initialize(2, I({0.0, -1.0}), + this->exec, 1); + + ASSERT_EQ(m->get_num_batch_items(), 2); + ASSERT_EQ(m->get_common_size(), gko::dim<2>(2, 1)); + ASSERT_EQ(m->get_num_stored_elements(), 2); + EXPECT_EQ(m->get_values()[0], value_type{-1.0}); + EXPECT_EQ(m->get_values()[1], value_type{-1.0}); + EXPECT_EQ(m->get_col_idxs()[0], index_type{0}); + EXPECT_EQ(m->get_row_ptrs()[0], index_type{0}); + EXPECT_EQ(m->get_row_ptrs()[1], index_type{0}); + EXPECT_EQ(m->get_row_ptrs()[2], index_type{1}); +} + + +TYPED_TEST(Csr, CanBeDoubleListConstructed) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using BatchCsrMtx = typename TestFixture::BatchCsrMtx; + using T = value_type; + + auto m = gko::batch::initialize( + // clang-format off + {{I{1.0, 0.0, 0.0}, + I{2.0, 0.0, 3.0}, + I{3.0, 6.0, 0.0}}, + {I{1.0, 0.0, 0.0}, + I{3.0, 0.0, -2.0}, + I{5.0, 8.0, 0.0}}}, + // clang-format on + this->exec, 5); + + ASSERT_EQ(m->get_num_batch_items(), 2); + ASSERT_EQ(m->get_common_size(), gko::dim<2>(3, 3)); + ASSERT_EQ(m->get_num_stored_elements(), 2 * 5); + EXPECT_EQ(m->get_values()[0], value_type{1.0}); + EXPECT_EQ(m->get_values()[1], value_type{2.0}); + EXPECT_EQ(m->get_values()[2], value_type{3.0}); + EXPECT_EQ(m->get_values()[3], value_type{3.0}); + EXPECT_EQ(m->get_values()[4], value_type{6.0}); + EXPECT_EQ(m->get_values()[5], value_type{1.0}); + EXPECT_EQ(m->get_values()[6], value_type{3.0}); + EXPECT_EQ(m->get_values()[7], value_type{-2.0}); + EXPECT_EQ(m->get_values()[8], value_type{5.0}); + EXPECT_EQ(m->get_values()[9], value_type{8.0}); + EXPECT_EQ(m->get_col_idxs()[0], index_type{0}); + EXPECT_EQ(m->get_col_idxs()[1], index_type{0}); + EXPECT_EQ(m->get_col_idxs()[2], index_type{2}); + EXPECT_EQ(m->get_col_idxs()[3], index_type{0}); + EXPECT_EQ(m->get_col_idxs()[4], index_type{1}); + EXPECT_EQ(m->get_row_ptrs()[0], index_type{0}); + EXPECT_EQ(m->get_row_ptrs()[1], index_type{1}); + EXPECT_EQ(m->get_row_ptrs()[2], index_type{3}); + EXPECT_EQ(m->get_row_ptrs()[3], index_type{5}); +} + + +TYPED_TEST(Csr, CanBeReadFromMatrixData) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using BatchCsrMtx = typename TestFixture::BatchCsrMtx; + auto vec_data = std::vector>{}; + vec_data.emplace_back(gko::matrix_data( + {2, 3}, {{0, 0, -1.0}, {1, 1, 2.5}, {1, 2, 3.5}})); + vec_data.emplace_back(gko::matrix_data( + {2, 3}, {{0, 0, 1.0}, {1, 1, 2.0}, {1, 2, 3.0}})); + + auto m = gko::batch::read(this->exec, + vec_data, 3); + + this->assert_equal_to_original_sparse_mtx(m.get()); +} + + +TYPED_TEST(Csr, GeneratesCorrectMatrixData) +{ + using value_type = typename TestFixture::value_type; + using index_type = typename TestFixture::index_type; + using BatchCsrMtx = typename TestFixture::BatchCsrMtx; + using tpl = typename gko::matrix_data::nonzero_type; + + auto data = gko::batch::write( + this->sp_mtx.get()); + + ASSERT_EQ(data[0].size, gko::dim<2>(2, 3)); + ASSERT_EQ(data[0].nonzeros.size(), 3); + EXPECT_EQ(data[0].nonzeros[0], tpl(0, 0, value_type{-1.0})); + EXPECT_EQ(data[0].nonzeros[1], tpl(1, 1, value_type{2.5})); + EXPECT_EQ(data[0].nonzeros[2], tpl(1, 2, value_type{3.5})); + ASSERT_EQ(data[1].size, gko::dim<2>(2, 3)); + ASSERT_EQ(data[1].nonzeros.size(), 3); + EXPECT_EQ(data[1].nonzeros[0], tpl(0, 0, value_type{1.0})); + EXPECT_EQ(data[1].nonzeros[1], tpl(1, 1, value_type{2.0})); + EXPECT_EQ(data[1].nonzeros[2], tpl(1, 2, value_type{3.0})); +} diff --git a/cuda/CMakeLists.txt b/cuda/CMakeLists.txt index 1efa8192aeb..0e0ac23f558 100644 --- a/cuda/CMakeLists.txt +++ b/cuda/CMakeLists.txt @@ -38,6 +38,7 @@ target_sources(ginkgo_cuda factorization/par_ilut_select_kernel.cu factorization/par_ilut_spgeam_kernel.cu factorization/par_ilut_sweep_kernel.cu + matrix/batch_csr_kernels.cu matrix/batch_dense_kernels.cu matrix/batch_ell_kernels.cu matrix/coo_kernels.cu diff --git a/cuda/matrix/batch_csr_kernels.cu b/cuda/matrix/batch_csr_kernels.cu new file mode 100644 index 00000000000..ddf9405c489 --- /dev/null +++ b/cuda/matrix/batch_csr_kernels.cu @@ -0,0 +1,57 @@ +// SPDX-FileCopyrightText: 2017-2023 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#include "core/matrix/batch_csr_kernels.hpp" + + +#include + + +#include +#include +#include + + +#include "core/base/batch_struct.hpp" +#include "core/matrix/batch_struct.hpp" +#include "cuda/base/batch_struct.hpp" +#include "cuda/base/config.hpp" +#include "cuda/base/thrust.cuh" +#include "cuda/components/cooperative_groups.cuh" +#include "cuda/components/reduction.cuh" +#include "cuda/components/thread_ids.cuh" +#include "cuda/components/uninitialized_array.hpp" +#include "cuda/matrix/batch_struct.hpp" + + +namespace gko { +namespace kernels { +namespace cuda { +/** + * @brief The Csr matrix format namespace. + * @ref Csr + * @ingroup batch_csr + */ +namespace batch_csr { + + +constexpr auto default_block_size = 256; +constexpr int sm_oversubscription = 4; + +// clang-format off + +// NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES + +#include "common/cuda_hip/matrix/batch_csr_kernels.hpp.inc" + + +#include "common/cuda_hip/matrix/batch_csr_kernel_launcher.hpp.inc" + +// clang-format on + + +} // namespace batch_csr +} // namespace cuda +} // namespace kernels +} // namespace gko diff --git a/cuda/matrix/batch_struct.hpp b/cuda/matrix/batch_struct.hpp index de1c33d0897..a1016fca5cc 100644 --- a/cuda/matrix/batch_struct.hpp +++ b/cuda/matrix/batch_struct.hpp @@ -32,6 +32,41 @@ namespace cuda { */ +/** + * Generates an immutable uniform batch struct from a batch of csr matrices. + */ +template +inline batch::matrix::csr::uniform_batch, + const IndexType> +get_batch_struct(const batch::matrix::Csr* const op) +{ + return {as_cuda_type(op->get_const_values()), + op->get_const_col_idxs(), + op->get_const_row_ptrs(), + op->get_num_batch_items(), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_elements_per_item())}; +} + + +/** + * Generates a uniform batch struct from a batch of csr matrices. + */ +template +inline batch::matrix::csr::uniform_batch, IndexType> +get_batch_struct(batch::matrix::Csr* const op) +{ + return {as_cuda_type(op->get_values()), + op->get_col_idxs(), + op->get_row_ptrs(), + op->get_num_batch_items(), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_elements_per_item())}; +} + + /** * Generates an immutable uniform batch struct from a batch of dense matrices. */ diff --git a/cuda/solver/batch_bicgstab_kernels.cu b/cuda/solver/batch_bicgstab_kernels.cu index 2fe07ad9762..1641c7ea109 100644 --- a/cuda/solver/batch_bicgstab_kernels.cu +++ b/cuda/solver/batch_bicgstab_kernels.cu @@ -48,6 +48,7 @@ namespace batch_bicgstab { #include "common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc" #include "common/cuda_hip/components/uninitialized_array.hpp.inc" +#include "common/cuda_hip/matrix/batch_csr_kernels.hpp.inc" #include "common/cuda_hip/matrix/batch_dense_kernels.hpp.inc" #include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc" #include "common/cuda_hip/solver/batch_bicgstab_kernels.hpp.inc" diff --git a/dpcpp/CMakeLists.txt b/dpcpp/CMakeLists.txt index 7499bca97a5..c3a2eb17548 100644 --- a/dpcpp/CMakeLists.txt +++ b/dpcpp/CMakeLists.txt @@ -36,6 +36,7 @@ target_sources(ginkgo_dpcpp factorization/par_ilut_select_kernel.dp.cpp factorization/par_ilut_spgeam_kernel.dp.cpp factorization/par_ilut_sweep_kernel.dp.cpp + matrix/batch_csr_kernels.dp.cpp matrix/batch_dense_kernels.dp.cpp matrix/batch_ell_kernels.dp.cpp matrix/coo_kernels.dp.cpp diff --git a/dpcpp/matrix/batch_csr_kernels.dp.cpp b/dpcpp/matrix/batch_csr_kernels.dp.cpp new file mode 100644 index 00000000000..3f7b13b7109 --- /dev/null +++ b/dpcpp/matrix/batch_csr_kernels.dp.cpp @@ -0,0 +1,148 @@ +// SPDX-FileCopyrightText: 2017-2023 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#include "core/matrix/batch_csr_kernels.hpp" + + +#include + + +#include + + +#include +#include + + +#include "core/base/batch_struct.hpp" +#include "core/matrix/batch_struct.hpp" +#include "dpcpp/base/batch_struct.hpp" +#include "dpcpp/base/dim3.dp.hpp" +#include "dpcpp/base/dpct.hpp" +#include "dpcpp/base/helper.hpp" +#include "dpcpp/components/cooperative_groups.dp.hpp" +#include "dpcpp/components/intrinsics.dp.hpp" +#include "dpcpp/components/reduction.dp.hpp" +#include "dpcpp/components/thread_ids.dp.hpp" +#include "dpcpp/matrix/batch_struct.hpp" + + +namespace gko { +namespace kernels { +namespace dpcpp { +/** + * @brief The Csr matrix format namespace. + * @ref Csr + * @ingroup batch_csr + */ +namespace batch_csr { + + +#include "dpcpp/matrix/batch_csr_kernels.hpp.inc" + + +template +void simple_apply(std::shared_ptr exec, + const batch::matrix::Csr* mat, + const batch::MultiVector* b, + batch::MultiVector* x) +{ + const size_type num_rows = mat->get_common_size()[0]; + const size_type num_cols = mat->get_common_size()[1]; + + const auto num_batch_items = mat->get_num_batch_items(); + auto device = exec->get_queue()->get_device(); + // TODO: use runtime selection of group size based on num_rows. + auto group_size = + device.get_info(); + + const dim3 block(group_size); + const dim3 grid(num_batch_items); + const auto x_ub = get_batch_struct(x); + const auto b_ub = get_batch_struct(b); + const auto mat_ub = get_batch_struct(mat); + if (b_ub.num_rhs > 1) { + GKO_NOT_IMPLEMENTED; + } + + // Launch a kernel that has nbatches blocks, each block has max group size + exec->get_queue()->submit([&](sycl::handler& cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) + [[sycl::reqd_sub_group_size(config::warp_size)]] { + auto group = item_ct1.get_group(); + auto group_id = group.get_group_linear_id(); + const auto mat_b = + batch::matrix::extract_batch_item(mat_ub, group_id); + const auto b_b = batch::extract_batch_item(b_ub, group_id); + const auto x_b = batch::extract_batch_item(x_ub, group_id); + simple_apply_kernel(mat_b, b_b.values, x_b.values, + item_ct1); + }); + }); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE( + GKO_DECLARE_BATCH_CSR_SIMPLE_APPLY_KERNEL); + + +template +void advanced_apply(std::shared_ptr exec, + const batch::MultiVector* alpha, + const batch::matrix::Csr* mat, + const batch::MultiVector* b, + const batch::MultiVector* beta, + batch::MultiVector* x) +{ + const auto mat_ub = get_batch_struct(mat); + const auto b_ub = get_batch_struct(b); + const auto x_ub = get_batch_struct(x); + const auto alpha_ub = get_batch_struct(alpha); + const auto beta_ub = get_batch_struct(beta); + + if (b_ub.num_rhs > 1) { + GKO_NOT_IMPLEMENTED; + } + + const auto num_batch_items = mat_ub.num_batch_items; + auto device = exec->get_queue()->get_device(); + // TODO: use runtime selection of group size based on num_rows. + auto group_size = + device.get_info(); + + const dim3 block(group_size); + const dim3 grid(num_batch_items); + + // Launch a kernel that has nbatches blocks, each block has max group size + exec->get_queue()->submit([&](sycl::handler& cgh) { + cgh.parallel_for( + sycl_nd_range(grid, block), + [=](sycl::nd_item<3> item_ct1) + [[sycl::reqd_sub_group_size(config::warp_size)]] { + auto group = item_ct1.get_group(); + auto group_id = group.get_group_linear_id(); + const auto mat_b = + batch::matrix::extract_batch_item(mat_ub, group_id); + const auto b_b = batch::extract_batch_item(b_ub, group_id); + const auto x_b = batch::extract_batch_item(x_ub, group_id); + const auto alpha_b = + batch::extract_batch_item(alpha_ub, group_id); + const auto beta_b = + batch::extract_batch_item(beta_ub, group_id); + advanced_apply_kernel(alpha_b.values[0], mat_b, b_b.values, + beta_b.values[0], x_b.values, + item_ct1); + }); + }); +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE( + GKO_DECLARE_BATCH_CSR_ADVANCED_APPLY_KERNEL); + + +} // namespace batch_csr +} // namespace dpcpp +} // namespace kernels +} // namespace gko diff --git a/dpcpp/matrix/batch_csr_kernels.hpp.inc b/dpcpp/matrix/batch_csr_kernels.hpp.inc new file mode 100644 index 00000000000..773312bb6f5 --- /dev/null +++ b/dpcpp/matrix/batch_csr_kernels.hpp.inc @@ -0,0 +1,44 @@ +// SPDX-FileCopyrightText: 2017-2023 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +template +__dpct_inline__ void simple_apply_kernel( + const gko::batch::matrix::csr::batch_item& mat, + const ValueType* b, ValueType* x, sycl::nd_item<3>& item_ct1) +{ + const auto num_rows = mat.num_rows; + const auto val = mat.values; + const auto col = mat.col_idxs; + for (int row = item_ct1.get_local_linear_id(); row < num_rows; + row += item_ct1.get_local_range().size()) { + auto temp = zero(); + for (auto nnz = mat.row_ptrs[row]; nnz < mat.row_ptrs[row + 1]; nnz++) { + const auto col_idx = col[nnz]; + temp += val[nnz] * b[col_idx]; + } + x[row] = temp; + } +} + + +template +__dpct_inline__ void advanced_apply_kernel( + const ValueType alpha, + const gko::batch::matrix::csr::batch_item& mat, + const ValueType* b, const ValueType beta, ValueType* x, + sycl::nd_item<3>& item_ct1) +{ + const auto num_rows = mat.num_rows; + const auto val = mat.values; + const auto col = mat.col_idxs; + for (int row = item_ct1.get_local_linear_id(); row < num_rows; + row += item_ct1.get_local_range().size()) { + auto temp = zero(); + for (auto nnz = mat.row_ptrs[row]; nnz < mat.row_ptrs[row + 1]; nnz++) { + const auto col_idx = col[nnz]; + temp += val[nnz] * b[col_idx]; + } + x[row] = alpha * temp + beta * x[row]; + } +} diff --git a/dpcpp/matrix/batch_struct.hpp b/dpcpp/matrix/batch_struct.hpp index 0df37a685df..40c63c33827 100644 --- a/dpcpp/matrix/batch_struct.hpp +++ b/dpcpp/matrix/batch_struct.hpp @@ -31,6 +31,40 @@ namespace dpcpp { */ +/** + * Generates an immutable uniform batch struct from a batch of csr matrices. + */ +template +inline batch::matrix::csr::uniform_batch +get_batch_struct(const batch::matrix::Csr* const op) +{ + return {op->get_const_values(), + op->get_const_col_idxs(), + op->get_const_row_ptrs(), + op->get_num_batch_items(), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_elements_per_item())}; +} + + +/** + * Generates a uniform batch struct from a batch of csr matrices. + */ +template +inline batch::matrix::csr::uniform_batch get_batch_struct( + batch::matrix::Csr* const op) +{ + return {op->get_values(), + op->get_col_idxs(), + op->get_row_ptrs(), + op->get_num_batch_items(), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_elements_per_item())}; +} + + /** * Generates an immutable uniform batch struct from a batch of dense matrices. */ diff --git a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp index 1d4a8abb4a0..6a4509c8f77 100644 --- a/dpcpp/solver/batch_bicgstab_kernels.dp.cpp +++ b/dpcpp/solver/batch_bicgstab_kernels.dp.cpp @@ -40,6 +40,7 @@ namespace batch_bicgstab { #include "dpcpp/base/batch_multi_vector_kernels.hpp.inc" +#include "dpcpp/matrix/batch_csr_kernels.hpp.inc" #include "dpcpp/matrix/batch_dense_kernels.hpp.inc" #include "dpcpp/matrix/batch_ell_kernels.hpp.inc" #include "dpcpp/solver/batch_bicgstab_kernels.hpp.inc" diff --git a/hip/CMakeLists.txt b/hip/CMakeLists.txt index cb193920edc..6cf4d056e83 100644 --- a/hip/CMakeLists.txt +++ b/hip/CMakeLists.txt @@ -35,6 +35,7 @@ set(GINKGO_HIP_SOURCES factorization/par_ilut_select_kernel.hip.cpp factorization/par_ilut_spgeam_kernel.hip.cpp factorization/par_ilut_sweep_kernel.hip.cpp + matrix/batch_csr_kernels.hip.cpp matrix/batch_dense_kernels.hip.cpp matrix/batch_ell_kernels.hip.cpp matrix/coo_kernels.hip.cpp diff --git a/hip/matrix/batch_csr_kernels.hip.cpp b/hip/matrix/batch_csr_kernels.hip.cpp new file mode 100644 index 00000000000..4b07ea9588e --- /dev/null +++ b/hip/matrix/batch_csr_kernels.hip.cpp @@ -0,0 +1,58 @@ +// SPDX-FileCopyrightText: 2017-2023 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#include "core/matrix/batch_csr_kernels.hpp" + + +#include +#include + + +#include +#include +#include + + +#include "core/base/batch_struct.hpp" +#include "core/matrix/batch_struct.hpp" +#include "hip/base/batch_struct.hip.hpp" +#include "hip/base/config.hip.hpp" +#include "hip/base/thrust.hip.hpp" +#include "hip/components/cooperative_groups.hip.hpp" +#include "hip/components/reduction.hip.hpp" +#include "hip/components/thread_ids.hip.hpp" +#include "hip/components/uninitialized_array.hip.hpp" +#include "hip/matrix/batch_struct.hip.hpp" + + +namespace gko { +namespace kernels { +namespace hip { +/** + * @brief The Csr matrix format namespace. + * @ref Csr + * @ingroup batch_csr + */ +namespace batch_csr { + + +constexpr auto default_block_size = 256; +constexpr int sm_oversubscription = 4; + +// clang-format off + +// NOTE: DO NOT CHANGE THE ORDERING OF THE INCLUDES + +#include "common/cuda_hip/matrix/batch_csr_kernels.hpp.inc" + + +#include "common/cuda_hip/matrix/batch_csr_kernel_launcher.hpp.inc" + +// clang-format on + + +} // namespace batch_csr +} // namespace hip +} // namespace kernels +} // namespace gko diff --git a/hip/matrix/batch_struct.hip.hpp b/hip/matrix/batch_struct.hip.hpp index 63b9b046f7e..f1bba81d4dd 100644 --- a/hip/matrix/batch_struct.hip.hpp +++ b/hip/matrix/batch_struct.hip.hpp @@ -32,6 +32,41 @@ namespace hip { */ +/** + * Generates an immutable uniform batch struct from a batch of csr matrices. + */ +template +inline batch::matrix::csr::uniform_batch, + const IndexType> +get_batch_struct(const batch::matrix::Csr* const op) +{ + return {as_hip_type(op->get_const_values()), + op->get_const_col_idxs(), + op->get_const_row_ptrs(), + op->get_num_batch_items(), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_elements_per_item())}; +} + + +/** + * Generates a uniform batch struct from a batch of csr matrices. + */ +template +inline batch::matrix::csr::uniform_batch, IndexType> +get_batch_struct(batch::matrix::Csr* const op) +{ + return {as_hip_type(op->get_values()), + op->get_col_idxs(), + op->get_row_ptrs(), + op->get_num_batch_items(), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_elements_per_item())}; +} + + /** * Generates an immutable uniform batch struct from a batch of dense matrices. */ diff --git a/hip/solver/batch_bicgstab_kernels.hip.cpp b/hip/solver/batch_bicgstab_kernels.hip.cpp index 90efccaeace..86f91bb5abd 100644 --- a/hip/solver/batch_bicgstab_kernels.hip.cpp +++ b/hip/solver/batch_bicgstab_kernels.hip.cpp @@ -47,6 +47,7 @@ namespace batch_bicgstab { #include "common/cuda_hip/base/batch_multi_vector_kernels.hpp.inc" #include "common/cuda_hip/components/uninitialized_array.hpp.inc" +#include "common/cuda_hip/matrix/batch_csr_kernels.hpp.inc" #include "common/cuda_hip/matrix/batch_dense_kernels.hpp.inc" #include "common/cuda_hip/matrix/batch_ell_kernels.hpp.inc" #include "common/cuda_hip/solver/batch_bicgstab_kernels.hpp.inc" diff --git a/include/ginkgo/core/matrix/batch_csr.hpp b/include/ginkgo/core/matrix/batch_csr.hpp new file mode 100644 index 00000000000..5f77d5653c9 --- /dev/null +++ b/include/ginkgo/core/matrix/batch_csr.hpp @@ -0,0 +1,341 @@ +// SPDX-FileCopyrightText: 2017-2023 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#ifndef GKO_PUBLIC_CORE_MATRIX_BATCH_CSR_HPP_ +#define GKO_PUBLIC_CORE_MATRIX_BATCH_CSR_HPP_ + + +#include +#include + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace gko { +namespace batch { +namespace matrix { + + +/** + * Csr is a general sparse matrix format that stores the column indices for each + * nonzero entry and a cumulative sum of the number of nonzeros in each row. It + * is one of the most popular sparse matrix formats due to its versatility and + * ability to store a wide range of sparsity patterns in an efficient fashion. + * + * @note It is assumed that the sparsity pattern of all the items in the + * batch is the same and therefore only a single copy of the sparsity pattern is + * stored. + * + * @note Currently only IndexType of int32 is supported. + * + * @tparam ValueType value precision of matrix elements + * @tparam IndexType index precision of matrix elements + * + * @ingroup batch_csr + * @ingroup mat_formats + * @ingroup BatchLinOp + */ +template +class Csr final + : public EnableBatchLinOp>, + public EnableCreateMethod>, + public ConvertibleTo, IndexType>> { + friend class EnableCreateMethod; + friend class EnablePolymorphicObject; + friend class Csr, IndexType>; + friend class Csr, IndexType>; + static_assert(std::is_same::value, + "IndexType must be a 32 bit integer"); + +public: + using EnableBatchLinOp::convert_to; + using EnableBatchLinOp::move_to; + + using value_type = ValueType; + using index_type = IndexType; + using unbatch_type = gko::matrix::Csr; + using absolute_type = remove_complex; + using complex_type = to_complex; + + void convert_to( + Csr, IndexType>* result) const override; + + void move_to(Csr, IndexType>* result) override; + + /** + * Creates a mutable view (of matrix::Csr type) of one item of the + * batch::matrix::Csr object. Does not perform any deep + * copies, but only returns a view of the data. + * + * @param item_id The index of the batch item + * + * @return a batch::matrix::Csr object with the data from the batch item + * at the given index. + */ + std::unique_ptr create_view_for_item(size_type item_id); + + /** + * @copydoc create_view_for_item(size_type) + */ + std::unique_ptr create_const_view_for_item( + size_type item_id) const; + + /** + * Returns a pointer to the array of values of the matrix + * + * @return the pointer to the array of values + */ + value_type* get_values() noexcept { return values_.get_data(); } + + /** + * @copydoc get_values() + * + * @note This is the constant version of the function, which can be + * significantly more memory efficient than the non-constant version, + * so always prefer this version. + */ + const value_type* get_const_values() const noexcept + { + return values_.get_const_data(); + } + + /** + * Returns a pointer to the array of column indices of the matrix + * + * @return the pointer to the array of column indices + */ + index_type* get_col_idxs() noexcept { return col_idxs_.get_data(); } + + /** + * @copydoc get_col_idxs() + * + * @note This is the constant version of the function, which can be + * significantly more memory efficient than the non-constant version, + * so always prefer this version. + */ + const index_type* get_const_col_idxs() const noexcept + { + return col_idxs_.get_const_data(); + } + + /** + * Returns a pointer to the array of row pointers of the matrix + * + * @return the pointer to the array of row pointers + */ + index_type* get_row_ptrs() noexcept { return row_ptrs_.get_data(); } + + /** + * @copydoc get_row_ptrs() + * + * @note This is the constant version of the function, which can be + * significantly more memory efficient than the non-constant version, + * so always prefer this version. + */ + const index_type* get_const_row_ptrs() const noexcept + { + return row_ptrs_.get_const_data(); + } + + /** + * Returns the number of elements explicitly stored in the batch matrix, + * cumulative across all the batch items. + * + * @return the number of elements explicitly stored in the vector, + * cumulative across all the batch items + */ + size_type get_num_stored_elements() const noexcept + { + return values_.get_size(); + } + + /** + * Returns the number of stored elements in each batch item. + * + * @return the number of stored elements per batch item. + */ + size_type get_num_elements_per_item() const noexcept + { + return this->get_num_stored_elements() / this->get_num_batch_items(); + } + + /** + * Returns a pointer to the array of values of the matrix for a + * specific batch item. + * + * @param batch_id the id of the batch item. + * + * @return the pointer to the array of values + */ + value_type* get_values_for_item(size_type batch_id) noexcept + { + GKO_ASSERT(batch_id < this->get_num_batch_items()); + GKO_ASSERT(values_.get_size() >= + batch_id * this->get_num_elements_per_item()); + return values_.get_data() + + batch_id * this->get_num_elements_per_item(); + } + + /** + * @copydoc get_values_for_item(size_type) + * + * @note This is the constant version of the function, which can be + * significantly more memory efficient than the non-constant version, + * so always prefer this version. + */ + const value_type* get_const_values_for_item( + size_type batch_id) const noexcept + { + GKO_ASSERT(batch_id < this->get_num_batch_items()); + GKO_ASSERT(values_.get_size() >= + batch_id * this->get_num_elements_per_item()); + return values_.get_const_data() + + batch_id * this->get_num_elements_per_item(); + } + + /** + * Creates a constant (immutable) batch csr matrix from a constant + * array. Only a single sparsity pattern (column indices and row pointers) + * is stored and hence the user needs to ensure that each batch item has the + * same sparsity pattern. + * + * @param exec the executor to create the matrix on + * @param size the dimensions of the matrix + * @param num_elems_per_row the number of elements to be stored in each row + * @param values the value array of the matrix + * @param col_idxs the col_idxs array of a single batch item of the matrix. + * @param row_ptrs the row_ptrs array of a single batch item of the matrix. + * + * @return A smart pointer to the constant matrix wrapping the input + * array (if it resides on the same executor as the matrix) or a copy of the + * array on the correct executor. + */ + static std::unique_ptr create_const( + std::shared_ptr exec, const batch_dim<2>& sizes, + gko::detail::const_array_view&& values, + gko::detail::const_array_view&& col_idxs, + gko::detail::const_array_view&& row_ptrs); + + /** + * Apply the matrix to a multi-vector. Represents the matrix vector + * multiplication, x = A * b, where x and b are both multi-vectors. + * + * @param b the multi-vector to be applied to + * @param x the output multi-vector + */ + Csr* apply(ptr_param> b, + ptr_param> x); + + /** + * Apply the matrix to a multi-vector with a linear combination of the given + * input vector. Represents the matrix vector multiplication, x = alpha * A + * * b + beta * x, where x and b are both multi-vectors. + * + * @param alpha the scalar to scale the matrix-vector product with + * @param b the multi-vector to be applied to + * @param beta the scalar to scale the x vector with + * @param x the output multi-vector + */ + Csr* apply(ptr_param> alpha, + ptr_param> b, + ptr_param> beta, + ptr_param> x); + + /** + * @copydoc apply(const MultiVector*, MultiVector*) + */ + const Csr* apply(ptr_param> b, + ptr_param> x) const; + + /** + * @copydoc apply(const MultiVector*, const + * MultiVector*, const MultiVector*, + * MultiVector*) + */ + const Csr* apply(ptr_param> alpha, + ptr_param> b, + ptr_param> beta, + ptr_param> x) const; + +private: + /** + * Creates an uninitialized Csr matrix of the specified size. + * + * @param exec Executor associated to the matrix + * @param size size of the matrix + * @param num_nonzeros_per_item number of nonzeros in each item of the + * batch matrix + * + * @internal It is necessary to pass in the correct nnz_per_item to ensure + * that the arrays are allocated correctly. An incorrect value will result + * in a runtime failure when the user tries to use any batch matrix + * utilities such as create_view_from_item etc. + */ + Csr(std::shared_ptr exec, + const batch_dim<2>& size = batch_dim<2>{}, + size_type num_nonzeros_per_item = {}); + + /** + * Creates a Csr matrix from an already allocated (and initialized) + * array. The column indices array needs to be the same for all batch items. + * + * @tparam ValuesArray type of array of values + * + * @param exec Executor associated to the matrix + * @param size size of the matrix + * @param values array of matrix values + * @param col_idxs the col_idxs array of a single batch item of the matrix. + * @param row_ptrs the row_ptrs array of a single batch item of the matrix. + * + * @note If `values` is not an rvalue, not an array of ValueType, or is on + * the wrong executor, an internal copy will be created, and the + * original array data will not be used in the matrix. + */ + template + Csr(std::shared_ptr exec, const batch_dim<2>& size, + ValuesArray&& values, ColIdxsArray&& col_idxs, RowPtrsArray&& row_ptrs) + : EnableBatchLinOp(exec, size), + values_{exec, std::forward(values)}, + col_idxs_{exec, std::forward(col_idxs)}, + row_ptrs_{exec, std::forward(row_ptrs)} + { + // Ensure that the value and col_idxs arrays have the correct size + auto max_num_elems = this->get_common_size()[0] * + this->get_common_size()[1] * + this->get_num_batch_items(); + GKO_ASSERT(values_.get_size() <= max_num_elems); + GKO_ASSERT_EQ(row_ptrs_.get_size(), this->get_common_size()[0] + 1); + GKO_ASSERT_EQ(this->get_num_elements_per_item(), col_idxs_.get_size()); + } + + void apply_impl(const MultiVector* b, + MultiVector* x) const; + + void apply_impl(const MultiVector* alpha, + const MultiVector* b, + const MultiVector* beta, + MultiVector* x) const; + + array values_; + array col_idxs_; + array row_ptrs_; +}; + + +} // namespace matrix +} // namespace batch +} // namespace gko + + +#endif // GKO_PUBLIC_CORE_MATRIX_BATCH_CSR_HPP_ diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index 62be8aaa394..ab0829625a6 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -81,6 +81,7 @@ #include #include +#include #include #include #include diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt index 557061c5fed..49564dd4c4d 100644 --- a/omp/CMakeLists.txt +++ b/omp/CMakeLists.txt @@ -23,6 +23,7 @@ target_sources(ginkgo_omp factorization/par_ict_kernels.cpp factorization/par_ilu_kernels.cpp factorization/par_ilut_kernels.cpp + matrix/batch_csr_kernels.cpp matrix/batch_dense_kernels.cpp matrix/batch_ell_kernels.cpp matrix/coo_kernels.cpp diff --git a/omp/matrix/batch_csr_kernels.cpp b/omp/matrix/batch_csr_kernels.cpp new file mode 100644 index 00000000000..5af8f79c299 --- /dev/null +++ b/omp/matrix/batch_csr_kernels.cpp @@ -0,0 +1,89 @@ +// SPDX-FileCopyrightText: 2017-2023 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#include "core/matrix/batch_csr_kernels.hpp" + + +#include + + +#include +#include + + +#include "core/base/batch_struct.hpp" +#include "core/matrix/batch_struct.hpp" +#include "reference/base/batch_struct.hpp" +#include "reference/matrix/batch_struct.hpp" + + +namespace gko { +namespace kernels { +namespace omp { +/** + * @brief The Csr matrix format namespace. + * @ref Csr + * @ingroup batch_csr + */ +namespace batch_csr { + + +#include "reference/matrix/batch_csr_kernels.hpp.inc" + + +template +void simple_apply(std::shared_ptr exec, + const batch::matrix::Csr* mat, + const batch::MultiVector* b, + batch::MultiVector* x) +{ + const auto b_ub = host::get_batch_struct(b); + const auto x_ub = host::get_batch_struct(x); + const auto mat_ub = host::get_batch_struct(mat); +#pragma omp parallel for + for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { + const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch); + const auto b_item = batch::extract_batch_item(b_ub, batch); + const auto x_item = batch::extract_batch_item(x_ub, batch); + simple_apply_kernel(mat_item, b_item, x_item); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE( + GKO_DECLARE_BATCH_CSR_SIMPLE_APPLY_KERNEL); + + +template +void advanced_apply(std::shared_ptr exec, + const batch::MultiVector* alpha, + const batch::matrix::Csr* mat, + const batch::MultiVector* b, + const batch::MultiVector* beta, + batch::MultiVector* x) +{ + const auto b_ub = host::get_batch_struct(b); + const auto x_ub = host::get_batch_struct(x); + const auto mat_ub = host::get_batch_struct(mat); + const auto alpha_ub = host::get_batch_struct(alpha); + const auto beta_ub = host::get_batch_struct(beta); +#pragma omp parallel for + for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { + const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch); + const auto b_item = batch::extract_batch_item(b_ub, batch); + const auto x_item = batch::extract_batch_item(x_ub, batch); + const auto alpha_item = batch::extract_batch_item(alpha_ub, batch); + const auto beta_item = batch::extract_batch_item(beta_ub, batch); + advanced_apply_kernel(alpha_item.values[0], mat_item, b_item, + beta_item.values[0], x_item); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE( + GKO_DECLARE_BATCH_CSR_ADVANCED_APPLY_KERNEL); + + +} // namespace batch_csr +} // namespace omp +} // namespace kernels +} // namespace gko diff --git a/omp/matrix/batch_ell_kernels.cpp b/omp/matrix/batch_ell_kernels.cpp index 08ac909798f..7139f32bcc0 100644 --- a/omp/matrix/batch_ell_kernels.cpp +++ b/omp/matrix/batch_ell_kernels.cpp @@ -9,7 +9,7 @@ #include -#include +#include #include "core/base/batch_struct.hpp" diff --git a/omp/solver/batch_bicgstab_kernels.cpp b/omp/solver/batch_bicgstab_kernels.cpp index 59d8dd05170..9294a5a286b 100644 --- a/omp/solver/batch_bicgstab_kernels.cpp +++ b/omp/solver/batch_bicgstab_kernels.cpp @@ -26,6 +26,7 @@ constexpr int max_num_rhs = 1; #include "reference/base/batch_multi_vector_kernels.hpp.inc" +#include "reference/matrix/batch_csr_kernels.hpp.inc" #include "reference/matrix/batch_dense_kernels.hpp.inc" #include "reference/matrix/batch_ell_kernels.hpp.inc" #include "reference/solver/batch_bicgstab_kernels.hpp.inc" diff --git a/reference/CMakeLists.txt b/reference/CMakeLists.txt index f8dff69723b..0b5c1aa8ae4 100644 --- a/reference/CMakeLists.txt +++ b/reference/CMakeLists.txt @@ -25,6 +25,7 @@ target_sources(ginkgo_reference factorization/par_ict_kernels.cpp factorization/par_ilu_kernels.cpp factorization/par_ilut_kernels.cpp + matrix/batch_csr_kernels.cpp matrix/batch_dense_kernels.cpp matrix/batch_ell_kernels.cpp matrix/coo_kernels.cpp diff --git a/reference/matrix/batch_csr_kernels.cpp b/reference/matrix/batch_csr_kernels.cpp new file mode 100644 index 00000000000..7b4058cb09e --- /dev/null +++ b/reference/matrix/batch_csr_kernels.cpp @@ -0,0 +1,87 @@ +// SPDX-FileCopyrightText: 2017-2023 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#include "core/matrix/batch_csr_kernels.hpp" + + +#include + + +#include +#include + + +#include "core/base/batch_struct.hpp" +#include "core/matrix/batch_struct.hpp" +#include "reference/base/batch_struct.hpp" +#include "reference/matrix/batch_struct.hpp" + + +namespace gko { +namespace kernels { +namespace reference { +/** + * @brief The Csr matrix format namespace. + * @ref Csr + * @ingroup batch_csr + */ +namespace batch_csr { + + +#include "reference/matrix/batch_csr_kernels.hpp.inc" + + +template +void simple_apply(std::shared_ptr exec, + const batch::matrix::Csr* mat, + const batch::MultiVector* b, + batch::MultiVector* x) +{ + const auto b_ub = host::get_batch_struct(b); + const auto x_ub = host::get_batch_struct(x); + const auto mat_ub = host::get_batch_struct(mat); + for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { + const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch); + const auto b_item = batch::extract_batch_item(b_ub, batch); + const auto x_item = batch::extract_batch_item(x_ub, batch); + simple_apply_kernel(mat_item, b_item, x_item); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE( + GKO_DECLARE_BATCH_CSR_SIMPLE_APPLY_KERNEL); + + +template +void advanced_apply(std::shared_ptr exec, + const batch::MultiVector* alpha, + const batch::matrix::Csr* mat, + const batch::MultiVector* b, + const batch::MultiVector* beta, + batch::MultiVector* x) +{ + const auto b_ub = host::get_batch_struct(b); + const auto x_ub = host::get_batch_struct(x); + const auto mat_ub = host::get_batch_struct(mat); + const auto alpha_ub = host::get_batch_struct(alpha); + const auto beta_ub = host::get_batch_struct(beta); + for (size_type batch = 0; batch < x->get_num_batch_items(); ++batch) { + const auto mat_item = batch::matrix::extract_batch_item(mat_ub, batch); + const auto b_item = batch::extract_batch_item(b_ub, batch); + const auto x_item = batch::extract_batch_item(x_ub, batch); + const auto alpha_item = batch::extract_batch_item(alpha_ub, batch); + const auto beta_item = batch::extract_batch_item(beta_ub, batch); + advanced_apply_kernel(alpha_item.values[0], mat_item, b_item, + beta_item.values[0], x_item); + } +} + +GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INT32_TYPE( + GKO_DECLARE_BATCH_CSR_ADVANCED_APPLY_KERNEL); + + +} // namespace batch_csr +} // namespace reference +} // namespace kernels +} // namespace gko diff --git a/reference/matrix/batch_csr_kernels.hpp.inc b/reference/matrix/batch_csr_kernels.hpp.inc new file mode 100644 index 00000000000..8457088eb5a --- /dev/null +++ b/reference/matrix/batch_csr_kernels.hpp.inc @@ -0,0 +1,48 @@ +// SPDX-FileCopyrightText: 2017-2023 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +template +inline void simple_apply_kernel( + const gko::batch::matrix::csr::batch_item& a, + const gko::batch::multi_vector::batch_item& b, + const gko::batch::multi_vector::batch_item& c) +{ + for (int row = 0; row < a.num_rows; ++row) { + for (int j = 0; j < b.num_rhs; ++j) { + c.values[row * c.stride + j] = zero(); + } + for (auto k = a.row_ptrs[row]; k < a.row_ptrs[row + 1]; ++k) { + auto val = a.values[k]; + auto col = a.col_idxs[k]; + for (int j = 0; j < b.num_rhs; ++j) { + c.values[row * c.stride + j] += + val * b.values[col * b.stride + j]; + } + } + } +} + + +template +inline void advanced_apply_kernel( + const ValueType alpha, + const gko::batch::matrix::csr::batch_item& a, + const gko::batch::multi_vector::batch_item& b, + const ValueType beta, + const gko::batch::multi_vector::batch_item& c) +{ + for (int row = 0; row < a.num_rows; ++row) { + for (int j = 0; j < c.num_rhs; ++j) { + c.values[row * c.stride + j] *= beta; + } + for (int k = a.row_ptrs[row]; k < a.row_ptrs[row + 1]; ++k) { + const auto val = a.values[k]; + const auto col = a.col_idxs[k]; + for (int j = 0; j < c.num_rhs; ++j) { + c.values[row * c.stride + j] += + alpha * val * b.values[col * b.stride + j]; + } + } + } +} diff --git a/reference/matrix/batch_struct.hpp b/reference/matrix/batch_struct.hpp index 2b6d08b37b3..3062b819405 100644 --- a/reference/matrix/batch_struct.hpp +++ b/reference/matrix/batch_struct.hpp @@ -10,6 +10,7 @@ #include +#include #include #include @@ -35,6 +36,40 @@ namespace host { */ +/** + * Generates an immutable uniform batch struct from a batch of csr matrices. + */ +template +inline batch::matrix::csr::uniform_batch +get_batch_struct(const batch::matrix::Csr* const op) +{ + return {op->get_const_values(), + op->get_const_col_idxs(), + op->get_const_row_ptrs(), + op->get_num_batch_items(), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_elements_per_item())}; +} + + +/** + * Generates a uniform batch struct from a batch of csr matrices. + */ +template +inline batch::matrix::csr::uniform_batch get_batch_struct( + batch::matrix::Csr* const op) +{ + return {op->get_values(), + op->get_col_idxs(), + op->get_row_ptrs(), + op->get_num_batch_items(), + static_cast(op->get_common_size()[0]), + static_cast(op->get_common_size()[1]), + static_cast(op->get_num_elements_per_item())}; +} + + /** * Generates an immutable uniform batch struct from a batch of dense matrices. */ diff --git a/reference/solver/batch_bicgstab_kernels.cpp b/reference/solver/batch_bicgstab_kernels.cpp index f653ae99400..57a378ab69f 100644 --- a/reference/solver/batch_bicgstab_kernels.cpp +++ b/reference/solver/batch_bicgstab_kernels.cpp @@ -28,6 +28,7 @@ constexpr int max_num_rhs = 1; #include "reference/base/batch_multi_vector_kernels.hpp.inc" +#include "reference/matrix/batch_csr_kernels.hpp.inc" #include "reference/matrix/batch_dense_kernels.hpp.inc" #include "reference/matrix/batch_ell_kernels.hpp.inc" #include "reference/solver/batch_bicgstab_kernels.hpp.inc" diff --git a/reference/test/matrix/CMakeLists.txt b/reference/test/matrix/CMakeLists.txt index 6f3348da432..82dd108d0d8 100644 --- a/reference/test/matrix/CMakeLists.txt +++ b/reference/test/matrix/CMakeLists.txt @@ -1,3 +1,4 @@ +ginkgo_create_test(batch_csr_kernels) ginkgo_create_test(batch_dense_kernels) ginkgo_create_test(batch_ell_kernels) ginkgo_create_test(coo_kernels) diff --git a/reference/test/matrix/batch_csr_kernels.cpp b/reference/test/matrix/batch_csr_kernels.cpp new file mode 100644 index 00000000000..c3298741d2f --- /dev/null +++ b/reference/test/matrix/batch_csr_kernels.cpp @@ -0,0 +1,228 @@ +// SPDX-FileCopyrightText: 2017-2023 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#include + + +#include +#include +#include + + +#include + + +#include +#include +#include +#include +#include +#include + + +#include "core/matrix/batch_csr_kernels.hpp" +#include "core/test/utils.hpp" + + +template +class Csr : public ::testing::Test { +protected: + using value_type = T; + using size_type = gko::size_type; + using BMtx = gko::batch::matrix::Csr; + using BMVec = gko::batch::MultiVector; + using CsrMtx = gko::matrix::Csr; + using DenseMtx = gko::matrix::Dense; + Csr() + : exec(gko::ReferenceExecutor::create()), + mtx_0(gko::batch::initialize( + {{I({1.0, -1.0, 0.0}), I({-2.0, 2.0, 3.0})}, + {{1.0, -2.0, 0.0}, {1.0, -2.5, 4.0}}}, + exec, 5)), + mtx_00(gko::initialize( + {I({1.0, -1.0, 0.0}), I({-2.0, 2.0, 3.0})}, exec)), + mtx_01(gko::initialize( + {I({1.0, -2.0, 0.0}), I({1.0, -2.5, 4.0})}, exec)), + b_0(gko::batch::initialize( + {{I({1.0, 0.0, 1.0}), I({2.0, 0.0, 1.0}), + I({1.0, 0.0, 2.0})}, + {I({-1.0, 1.0, 1.0}), I({1.0, -1.0, 1.0}), + I({1.0, 0.0, 2.0})}}, + exec)), + b_00(gko::initialize( + {I({1.0, 0.0, 1.0}), I({2.0, 0.0, 1.0}), + I({1.0, 0.0, 2.0})}, + exec)), + b_01(gko::initialize( + {I({-1.0, 1.0, 1.0}), I({1.0, -1.0, 1.0}), + I({1.0, 0.0, 2.0})}, + exec)), + x_0(gko::batch::initialize( + {{I({2.0, 0.0, 1.0}), I({2.0, 0.0, 2.0})}, + {I({-2.0, 1.0, 1.0}), I({1.0, -1.0, -1.0})}}, + exec)), + x_00(gko::initialize( + {I({2.0, 0.0, 1.0}), I({2.0, 0.0, 2.0})}, exec)), + x_01(gko::initialize( + {I({-2.0, 1.0, 1.0}), I({1.0, -1.0, -1.0})}, exec)) + {} + + std::shared_ptr exec; + std::unique_ptr mtx_0; + std::unique_ptr mtx_00; + std::unique_ptr mtx_01; + std::unique_ptr b_0; + std::unique_ptr b_00; + std::unique_ptr b_01; + std::unique_ptr x_0; + std::unique_ptr x_00; + std::unique_ptr x_01; + + std::ranlux48 rand_engine; +}; + +TYPED_TEST_SUITE(Csr, gko::test::ValueTypes, TypenameNameGenerator); + + +TYPED_TEST(Csr, AppliesToBatchMultiVector) +{ + using T = typename TestFixture::value_type; + + this->mtx_0->apply(this->b_0.get(), this->x_0.get()); + + this->mtx_00->apply(this->b_00.get(), this->x_00.get()); + this->mtx_01->apply(this->b_01.get(), this->x_01.get()); + auto res = gko::batch::unbatch>(this->x_0.get()); + GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), r::value); + GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), r::value); +} + + +TYPED_TEST(Csr, ConstAppliesToBatchMultiVector) +{ + using T = typename TestFixture::value_type; + using BMtx = typename TestFixture::BMtx; + + static_cast(this->mtx_0.get())->apply(this->b_0, this->x_0); + + this->mtx_00->apply(this->b_00.get(), this->x_00.get()); + this->mtx_01->apply(this->b_01.get(), this->x_01.get()); + auto res = gko::batch::unbatch>(this->x_0.get()); + GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), r::value); + GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), r::value); +} + + +TYPED_TEST(Csr, AppliesLinearCombinationToBatchMultiVector) +{ + using BMVec = typename TestFixture::BMVec; + using DenseMtx = typename TestFixture::DenseMtx; + using T = typename TestFixture::value_type; + auto alpha = gko::batch::initialize({{1.5}, {-1.0}}, this->exec); + auto beta = gko::batch::initialize({{2.5}, {-4.0}}, this->exec); + auto alpha0 = gko::initialize({1.5}, this->exec); + auto alpha1 = gko::initialize({-1.0}, this->exec); + auto beta0 = gko::initialize({2.5}, this->exec); + auto beta1 = gko::initialize({-4.0}, this->exec); + + this->mtx_0->apply(alpha.get(), this->b_0.get(), beta.get(), + this->x_0.get()); + + this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(), + this->x_00.get()); + this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(), + this->x_01.get()); + auto res = gko::batch::unbatch>(this->x_0.get()); + GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), r::value); + GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), r::value); +} + + +TYPED_TEST(Csr, ConstAppliesLinearCombinationToBatchMultiVector) +{ + using BMtx = typename TestFixture::BMtx; + using BMVec = typename TestFixture::BMVec; + using DenseMtx = typename TestFixture::DenseMtx; + using T = typename TestFixture::value_type; + auto alpha = gko::batch::initialize({{1.5}, {-1.0}}, this->exec); + auto beta = gko::batch::initialize({{2.5}, {-4.0}}, this->exec); + auto alpha0 = gko::initialize({1.5}, this->exec); + auto alpha1 = gko::initialize({-1.0}, this->exec); + auto beta0 = gko::initialize({2.5}, this->exec); + auto beta1 = gko::initialize({-4.0}, this->exec); + + static_cast(this->mtx_0.get()) + ->apply(alpha.get(), this->b_0.get(), beta.get(), this->x_0.get()); + + this->mtx_00->apply(alpha0.get(), this->b_00.get(), beta0.get(), + this->x_00.get()); + this->mtx_01->apply(alpha1.get(), this->b_01.get(), beta1.get(), + this->x_01.get()); + auto res = gko::batch::unbatch>(this->x_0.get()); + GKO_ASSERT_MTX_NEAR(res[0].get(), this->x_00.get(), r::value); + GKO_ASSERT_MTX_NEAR(res[1].get(), this->x_01.get(), r::value); +} + + +TYPED_TEST(Csr, ApplyFailsOnWrongNumberOfResultCols) +{ + using BMVec = typename TestFixture::BMVec; + auto res = BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2}}); + + ASSERT_THROW(this->mtx_0->apply(this->b_0.get(), res.get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(Csr, ApplyFailsOnWrongNumberOfResultRows) +{ + using BMVec = typename TestFixture::BMVec; + auto res = BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3}}); + + ASSERT_THROW(this->mtx_0->apply(this->b_0.get(), res.get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(Csr, ApplyFailsOnWrongInnerDimension) +{ + using BMVec = typename TestFixture::BMVec; + auto res = + BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}}); + + ASSERT_THROW(this->mtx_0->apply(res.get(), this->x_0.get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(Csr, AdvancedApplyFailsOnWrongInnerDimension) +{ + using BMVec = typename TestFixture::BMVec; + auto res = + BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 3}}); + auto alpha = + BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}}); + auto beta = + BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}}); + + ASSERT_THROW( + this->mtx_0->apply(alpha.get(), res.get(), beta.get(), this->x_0.get()), + gko::DimensionMismatch); +} + + +TYPED_TEST(Csr, AdvancedApplyFailsOnWrongAlphaDimension) +{ + using BMVec = typename TestFixture::BMVec; + auto res = + BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{3, 3}}); + auto alpha = + BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{2, 1}}); + auto beta = + BMVec::create(this->exec, gko::batch_dim<2>{2, gko::dim<2>{1, 1}}); + + ASSERT_THROW( + this->mtx_0->apply(alpha.get(), res.get(), beta.get(), this->x_0.get()), + gko::DimensionMismatch); +} diff --git a/reference/test/solver/batch_bicgstab_kernels.cpp b/reference/test/solver/batch_bicgstab_kernels.cpp index 4a957a9f0fb..a9efa7dd9ad 100644 --- a/reference/test/solver/batch_bicgstab_kernels.cpp +++ b/reference/test/solver/batch_bicgstab_kernels.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #include @@ -33,6 +34,7 @@ class BatchBicgstab : public ::testing::Test { using solver_type = gko::batch::solver::Bicgstab; using Mtx = gko::batch::matrix::Dense; using EllMtx = gko::batch::matrix::Ell; + using CsrMtx = gko::batch::matrix::Csr; using MVec = gko::batch::MultiVector; using RealMVec = gko::batch::MultiVector; using Settings = gko::kernels::batch_bicgstab::settings; @@ -247,6 +249,42 @@ TYPED_TEST(BatchBicgstab, CanSolveEllSystem) } +TYPED_TEST(BatchBicgstab, CanSolveCsrSystem) +{ + using value_type = typename TestFixture::value_type; + using real_type = gko::remove_complex; + using Solver = typename TestFixture::solver_type; + using Mtx = typename TestFixture::CsrMtx; + const real_type tol = 1e-5; + const int max_iters = 1000; + auto solver_factory = + Solver::build() + .with_max_iterations(max_iters) + .with_tolerance(tol) + .with_tolerance_type(gko::batch::stop::tolerance_type::relative) + .on(this->exec); + const int num_rows = 13; + const size_t num_batch_items = 2; + const int num_rhs = 1; + auto stencil_mat = + gko::share(gko::test::generate_3pt_stencil_batch_matrix( + this->exec, num_batch_items, num_rows, (num_rows * 3 - 2))); + auto linear_system = + gko::test::generate_batch_linear_system(stencil_mat, num_rhs); + auto solver = gko::share(solver_factory->generate(linear_system.matrix)); + + auto res = + gko::test::solve_linear_system(this->exec, linear_system, solver); + + GKO_ASSERT_BATCH_MTX_NEAR(res.x, linear_system.exact_sol, tol * 10); + for (size_t i = 0; i < num_batch_items; i++) { + ASSERT_LE(res.host_res_norm->get_const_values()[i] / + linear_system.host_rhs_norm->get_const_values()[i], + tol * 10); + } +} + + TYPED_TEST(BatchBicgstab, CanSolveDenseHpdSystem) { using value_type = typename TestFixture::value_type; diff --git a/test/matrix/CMakeLists.txt b/test/matrix/CMakeLists.txt index d49373811dc..c8eaa54eb76 100644 --- a/test/matrix/CMakeLists.txt +++ b/test/matrix/CMakeLists.txt @@ -1,3 +1,4 @@ +ginkgo_create_common_test(batch_csr_kernels) ginkgo_create_common_test(batch_dense_kernels) ginkgo_create_common_test(batch_ell_kernels) ginkgo_create_common_device_test(csr_kernels) diff --git a/test/matrix/batch_csr_kernels.cpp b/test/matrix/batch_csr_kernels.cpp new file mode 100644 index 00000000000..8b9dbdab598 --- /dev/null +++ b/test/matrix/batch_csr_kernels.cpp @@ -0,0 +1,115 @@ +// SPDX-FileCopyrightText: 2017-2023 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#include "core/matrix/batch_csr_kernels.hpp" + + +#include +#include + + +#include + + +#include +#include +#include +#include + + +#include "core/base/batch_utilities.hpp" +#include "core/test/utils.hpp" +#include "core/test/utils/assertions.hpp" +#include "core/test/utils/batch_helpers.hpp" +#include "test/utils/executor.hpp" + + +class Csr : public CommonTestFixture { +protected: + using BMtx = gko::batch::matrix::Csr; + using BMVec = gko::batch::MultiVector; + + Csr() : rand_engine(15) {} + + template + std::unique_ptr gen_mtx(const gko::size_type num_batch_items, + gko::size_type num_rows, + gko::size_type num_cols, + int num_elems_per_row) + { + return gko::test::generate_random_batch_matrix( + num_batch_items, num_rows, num_cols, + std::uniform_int_distribution<>(num_elems_per_row, + num_elems_per_row), + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref, + num_elems_per_row * num_rows); + } + + std::unique_ptr gen_mvec(const gko::size_type num_batch_items, + gko::size_type num_rows, + gko::size_type num_cols) + { + return gko::test::generate_random_batch_matrix( + num_batch_items, num_rows, num_cols, + std::uniform_int_distribution<>(num_cols, num_cols), + std::normal_distribution<>(-1.0, 1.0), rand_engine, ref); + } + + void set_up_apply_data(gko::size_type num_vecs = 1, + int num_elems_per_row = 5) + { + const gko::size_type num_rows = 252; + const gko::size_type num_cols = 32; + GKO_ASSERT(num_elems_per_row <= num_cols); + mat = gen_mtx(batch_size, num_rows, num_cols, num_elems_per_row); + y = gen_mvec(batch_size, num_cols, num_vecs); + alpha = gen_mvec(batch_size, 1, 1); + beta = gen_mvec(batch_size, 1, 1); + dmat = gko::clone(exec, mat); + dy = gko::clone(exec, y); + dalpha = gko::clone(exec, alpha); + dbeta = gko::clone(exec, beta); + expected = BMVec::create( + ref, + gko::batch_dim<2>(batch_size, gko::dim<2>{num_rows, num_vecs})); + expected->fill(gko::one()); + dresult = gko::clone(exec, expected); + } + + std::default_random_engine rand_engine; + + const gko::size_type batch_size = 11; + std::unique_ptr mat; + std::unique_ptr y; + std::unique_ptr alpha; + std::unique_ptr beta; + std::unique_ptr expected; + std::unique_ptr dresult; + std::unique_ptr dmat; + std::unique_ptr dy; + std::unique_ptr dalpha; + std::unique_ptr dbeta; +}; + + +TEST_F(Csr, SingleVectorApplyIsEquivalentToRef) +{ + set_up_apply_data(1); + + mat->apply(y.get(), expected.get()); + dmat->apply(dy.get(), dresult.get()); + + GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r::value); +} + + +TEST_F(Csr, SingleVectorAdvancedApplyIsEquivalentToRef) +{ + set_up_apply_data(1); + + mat->apply(alpha.get(), y.get(), beta.get(), expected.get()); + dmat->apply(dalpha.get(), dy.get(), dbeta.get(), dresult.get()); + + GKO_ASSERT_BATCH_MTX_NEAR(dresult, expected, r::value); +}