Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add a batched block Jacobi preconditioner #1542

Merged
merged 23 commits into from
May 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 80 additions & 0 deletions common/cuda_hip/preconditioner/batch_block_jacobi.hpp.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

/**
* BlockJacobi preconditioner for batch solvers.
*/
template <typename ValueType>
class BlockJacobi final {
public:
using value_type = ValueType;
using index_type = int;


/**
*
* @param max_block_size Maximum block size
* @param num_blocks Number of diagonal blocks in a matrix
* @param blocks_cumulative_offsets the cumulative block storage array
* @param blocks_arr_batch array of diagonal blocks for the batch
* @param block_ptrs_arr array of block pointers
* @param row_block_map array containing block indices of the
* blocks that the individual rows of the matrix are a part of
*
*/
BlockJacobi(const uint32 max_block_size, const size_type num_blocks,
const int* const blocks_cumulative_offsets,
const value_type* const blocks_arr_batch,
const int* const block_ptrs_arr, const int* const row_block_map)
: max_block_size_{max_block_size},
num_blocks_{num_blocks},
blocks_cumulative_offsets_{blocks_cumulative_offsets},
blocks_arr_batch_{blocks_arr_batch},
block_ptrs_arr_{block_ptrs_arr},
row_block_map_{row_block_map}
{}

/**
* The size of the work vector required in case of dynamic allocation.
*/
__host__ __device__ static constexpr int dynamic_work_size(
const int num_rows, int)
{
return 0;
}

__device__ __forceinline__ void generate(
size_type batch_id,
const gko::batch::matrix::ell::batch_item<const value_type,
const index_type>&,
value_type* const __restrict__)
{}

__device__ __forceinline__ void generate(
size_type batch_id,
const gko::batch::matrix::csr::batch_item<const value_type,
const index_type>&,
value_type* const __restrict__)
{}

__device__ __forceinline__ void generate(
size_type batch_id,
const gko::batch::matrix::dense::batch_item<const value_type>&,
value_type* const __restrict__)
{}

__device__ __forceinline__ void apply(const int num_rows,
const value_type* const r,
value_type* const z) const
{}

private:
const uint32 max_block_size_;
const size_type num_blocks_;
const int* __restrict__ const blocks_cumulative_offsets_;
const value_type* const blocks_arr_batch_;
const value_type* __restrict__ blocks_arr_entry_;
const int* __restrict__ const block_ptrs_arr_;
const int* __restrict__ const row_block_map_;
};
77 changes: 77 additions & 0 deletions common/cuda_hip/preconditioner/batch_scalar_jacobi.hpp.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
//
// SPDX-License-Identifier: BSD-3-Clause

/**
* (Scalar) Jacobi preconditioner for batch solvers.
*/
template <typename ValueType>
class ScalarJacobi final {
public:
using value_type = ValueType;
using index_type = int;

/**
* The size of the work vector required in case of dynamic allocation.
*/
__host__ __device__ static constexpr int dynamic_work_size(
const int num_rows, int)
{
return num_rows;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

should be in another pr?

}

/**
* Sets the input and generates the preconditioner by storing the inverse
* diagonal entries in the work vector.
*
* @param mat Matrix for which to build a Jacobi preconditioner.
* @param work A 'work-vector', used here to store the inverse diagonal
* entries. It must be allocated with at least the amount
* of memory given by dynamic_work_size.
*/
__device__ __forceinline__ void generate(
size_type,
const gko::batch::matrix::ell::batch_item<const value_type,
const index_type>& mat,
value_type* const __restrict__ work)
{}

/**
* Sets the input and generates the preconditioner by storing the inverse
* diagonal entries in the work vector.
*
* @param mat Matrix for which to build a Jacobi preconditioner.
* @param work A 'work-vector', used here to store the inverse diagonal
* entries. It must be allocated with at least the amount
* of memory given by dynamic_work_size.
*/
__device__ __forceinline__ void generate(
size_type,
const gko::batch::matrix::csr::batch_item<const value_type,
const index_type>& mat,
value_type* const __restrict__ work)
{}

/**
* Sets the input and generates the preconditioner by storing the inverse
* diagonal entries in the work vector.
*
* @param mat Matrix for which to build a Jacobi preconditioner.
* @param work A 'work-vector', used here to store the inverse diagonal
* entries. It must be allocated with at least the amount
* of memory given by dynamic_work_size.
*/
__device__ __forceinline__ void generate(
size_type,
const gko::batch::matrix::dense::batch_item<const value_type>& mat,
value_type* const __restrict__ work)
{}

__device__ __forceinline__ void apply(const int num_rows,
const value_type* const r,
value_type* const z) const
{}

private:
value_type* __restrict__ work_;
};
1 change: 1 addition & 0 deletions core/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ target_sources(ginkgo
matrix/sparsity_csr.cpp
multigrid/pgm.cpp
multigrid/fixed_coarsening.cpp
preconditioner/batch_jacobi.cpp
preconditioner/isai.cpp
preconditioner/jacobi.cpp
reorder/amd.cpp
Expand Down
15 changes: 15 additions & 0 deletions core/device_hooks/common_kernels.inc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
#include "core/matrix/sellp_kernels.hpp"
#include "core/matrix/sparsity_csr_kernels.hpp"
#include "core/multigrid/pgm_kernels.hpp"
#include "core/preconditioner/batch_jacobi_kernels.hpp"
#include "core/preconditioner/isai_kernels.hpp"
#include "core/preconditioner/jacobi_kernels.hpp"
#include "core/reorder/rcm_kernels.hpp"
Expand Down Expand Up @@ -772,6 +773,20 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SELLP_EXTRACT_DIAGONAL_KERNEL);
} // namespace sellp


namespace batch_jacobi {


GKO_STUB_INDEX_TYPE(
GKO_DECLARE_BATCH_BLOCK_JACOBI_COMPUTE_CUMULATIVE_BLOCK_STORAGE);
GKO_STUB_INDEX_TYPE(GKO_DECLARE_BATCH_BLOCK_JACOBI_FIND_ROW_BLOCK_MAP);
GKO_STUB_VALUE_AND_INT32_TYPE(
GKO_DECLARE_BATCH_BLOCK_JACOBI_EXTRACT_PATTERN_KERNEL);
GKO_STUB_VALUE_AND_INT32_TYPE(GKO_DECLARE_BATCH_BLOCK_JACOBI_COMPUTE_KERNEL);


} // namespace batch_jacobi


namespace jacobi {


Expand Down
35 changes: 30 additions & 5 deletions core/matrix/batch_struct.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ struct batch_item {
const index_type* row_ptrs;
index_type num_rows;
index_type num_cols;
index_type num_nnz_per_item;

inline size_type get_single_item_num_nnz() const
{
return static_cast<size_type>(num_nnz_per_item);
}
};


Expand Down Expand Up @@ -75,6 +81,11 @@ struct batch_item {
int32 stride;
int32 num_rows;
int32 num_cols;

inline size_type get_single_item_num_nnz() const
{
return static_cast<size_type>(stride * num_rows);
}
};


Expand Down Expand Up @@ -119,6 +130,11 @@ struct batch_item {
index_type num_rows;
index_type num_cols;
index_type num_stored_elems_per_row;

inline size_type get_single_item_num_nnz() const
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I find it a bit odd, that both uniform_batch and batch_item have both the function get_single_item_num_nnz, but TBH I'm not sure what to do about that. Maybe only keep the batch_item one and use extract_batch_item?

{
return static_cast<size_type>(stride * num_stored_elems_per_row);
}
};


Expand Down Expand Up @@ -153,7 +169,8 @@ template <typename ValueType, typename IndexType>
GKO_ATTRIBUTES GKO_INLINE csr::batch_item<const ValueType, const IndexType>
to_const(const csr::batch_item<ValueType, IndexType>& b)
{
return {b.values, b.col_idxs, b.row_ptrs, b.num_rows, b.num_cols};
return {b.values, b.col_idxs, b.row_ptrs,
b.num_rows, b.num_cols, b.num_nnz_per_item};
}


Expand All @@ -171,8 +188,12 @@ GKO_ATTRIBUTES GKO_INLINE csr::batch_item<ValueType, IndexType>
extract_batch_item(const csr::uniform_batch<ValueType, IndexType>& batch,
const size_type batch_idx)
{
return {batch.values + batch_idx * batch.num_nnz_per_item, batch.col_idxs,
batch.row_ptrs, batch.num_rows, batch.num_cols};
return {batch.values + batch_idx * batch.num_nnz_per_item,
batch.col_idxs,
batch.row_ptrs,
batch.num_rows,
batch.num_cols,
batch.num_nnz_per_item};
}

template <typename ValueType, typename IndexType>
Expand All @@ -183,8 +204,12 @@ extract_batch_item(ValueType* const batch_values,
const int num_cols, int num_nnz_per_item,
const size_type batch_idx)
{
return {batch_values + batch_idx * num_nnz_per_item, batch_col_idxs,
batch_row_ptrs, num_rows, num_cols};
return {batch_values + batch_idx * num_nnz_per_item,
batch_col_idxs,
batch_row_ptrs,
num_rows,
num_cols,
num_nnz_per_item};
}


Expand Down
Loading
Loading