ginkgo-project · pratikvn · May 9, 2024 · Feb 13, 2024 · Feb 13, 2024 · Feb 13, 2024
diff --git a/common/cuda_hip/preconditioner/batch_block_jacobi.hpp.inc b/common/cuda_hip/preconditioner/batch_block_jacobi.hpp.inc
@@ -0,0 +1,80 @@
+// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+/**
+ * BlockJacobi preconditioner for batch solvers.
+ */
+template <typename ValueType>
+class BlockJacobi final {
+public:
+    using value_type = ValueType;
+    using index_type = int;
+
+
+    /**
+     *
+     * @param max_block_size Maximum block size
+     * @param num_blocks  Number of diagonal blocks in a matrix
+     * @param blocks_cumulative_offsets the cumulative block storage array
+     * @param blocks_arr_batch array of diagonal blocks for the batch
+     * @param block_ptrs_arr array of block pointers
+     * @param row_block_map array containing block indices of the
+     * blocks that the individual rows of the matrix are a part of
+     *
+     */
+    BlockJacobi(const uint32 max_block_size, const size_type num_blocks,
+                const int* const blocks_cumulative_offsets,
+                const value_type* const blocks_arr_batch,
+                const int* const block_ptrs_arr, const int* const row_block_map)
+        : max_block_size_{max_block_size},
+          num_blocks_{num_blocks},
+          blocks_cumulative_offsets_{blocks_cumulative_offsets},
+          blocks_arr_batch_{blocks_arr_batch},
+          block_ptrs_arr_{block_ptrs_arr},
+          row_block_map_{row_block_map}
+    {}
+
+    /**
+     * The size of the work vector required in case of dynamic allocation.
+     */
+    __host__ __device__ static constexpr int dynamic_work_size(
+        const int num_rows, int)
+    {
+        return 0;
+    }
+
+    __device__ __forceinline__ void generate(
+        size_type batch_id,
+        const gko::batch::matrix::ell::batch_item<const value_type,
+                                                  const index_type>&,
+        value_type* const __restrict__)
+    {}
+
+    __device__ __forceinline__ void generate(
+        size_type batch_id,
+        const gko::batch::matrix::csr::batch_item<const value_type,
+                                                  const index_type>&,
+        value_type* const __restrict__)
+    {}
+
+    __device__ __forceinline__ void generate(
+        size_type batch_id,
+        const gko::batch::matrix::dense::batch_item<const value_type>&,
+        value_type* const __restrict__)
+    {}
+
+    __device__ __forceinline__ void apply(const int num_rows,
+                                          const value_type* const r,
+                                          value_type* const z) const
+    {}
+
+private:
+    const uint32 max_block_size_;
+    const size_type num_blocks_;
+    const int* __restrict__ const blocks_cumulative_offsets_;
+    const value_type* const blocks_arr_batch_;
+    const value_type* __restrict__ blocks_arr_entry_;
+    const int* __restrict__ const block_ptrs_arr_;
+    const int* __restrict__ const row_block_map_;
+};
diff --git a/common/cuda_hip/preconditioner/batch_scalar_jacobi.hpp.inc b/common/cuda_hip/preconditioner/batch_scalar_jacobi.hpp.inc
@@ -0,0 +1,77 @@
+// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors
+//
+// SPDX-License-Identifier: BSD-3-Clause
+
+/**
+ * (Scalar) Jacobi preconditioner for batch solvers.
+ */
+template <typename ValueType>
+class ScalarJacobi final {
+public:
+    using value_type = ValueType;
+    using index_type = int;
+
+    /**
+     * The size of the work vector required in case of dynamic allocation.
+     */
+    __host__ __device__ static constexpr int dynamic_work_size(
+        const int num_rows, int)
+    {
+        return num_rows;
+    }
+
+    /**
+     * Sets the input and generates the preconditioner by storing the inverse
+     * diagonal entries in the work vector.
+     *
+     * @param mat  Matrix for which to build a Jacobi preconditioner.
+     * @param work  A 'work-vector', used here to store the inverse diagonal
+     *              entries. It must be allocated with at least the amount
+     *              of memory given by dynamic_work_size.
+     */
+    __device__ __forceinline__ void generate(
+        size_type,
+        const gko::batch::matrix::ell::batch_item<const value_type,
+                                                  const index_type>& mat,
+        value_type* const __restrict__ work)
+    {}
+
+    /**
+     * Sets the input and generates the preconditioner by storing the inverse
+     * diagonal entries in the work vector.
+     *
+     * @param mat  Matrix for which to build a Jacobi preconditioner.
+     * @param work  A 'work-vector', used here to store the inverse diagonal
+     *              entries. It must be allocated with at least the amount
+     *              of memory given by dynamic_work_size.
+     */
+    __device__ __forceinline__ void generate(
+        size_type,
+        const gko::batch::matrix::csr::batch_item<const value_type,
+                                                  const index_type>& mat,
+        value_type* const __restrict__ work)
+    {}
+
+    /**
+     * Sets the input and generates the preconditioner by storing the inverse
+     * diagonal entries in the work vector.
+     *
+     * @param mat  Matrix for which to build a Jacobi preconditioner.
+     * @param work  A 'work-vector', used here to store the inverse diagonal
+     *              entries. It must be allocated with at least the amount
+     *              of memory given by dynamic_work_size.
+     */
+    __device__ __forceinline__ void generate(
+        size_type,
+        const gko::batch::matrix::dense::batch_item<const value_type>& mat,
+        value_type* const __restrict__ work)
+    {}
+
+    __device__ __forceinline__ void apply(const int num_rows,
+                                          const value_type* const r,
+                                          value_type* const z) const
+    {}
+
+private:
+    value_type* __restrict__ work_;
+};
diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
@@ -63,6 +63,7 @@ target_sources(ginkgo
     matrix/sparsity_csr.cpp
     multigrid/pgm.cpp
     multigrid/fixed_coarsening.cpp
+    preconditioner/batch_jacobi.cpp
     preconditioner/isai.cpp
     preconditioner/jacobi.cpp
     reorder/amd.cpp

diff --git a/core/device_hooks/common_kernels.inc.cpp b/core/device_hooks/common_kernels.inc.cpp
@@ -45,6 +45,7 @@
 #include "core/matrix/sellp_kernels.hpp"
 #include "core/matrix/sparsity_csr_kernels.hpp"
 #include "core/multigrid/pgm_kernels.hpp"
+#include "core/preconditioner/batch_jacobi_kernels.hpp"
 #include "core/preconditioner/isai_kernels.hpp"
 #include "core/preconditioner/jacobi_kernels.hpp"
 #include "core/reorder/rcm_kernels.hpp"
@@ -772,6 +773,20 @@ GKO_STUB_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SELLP_EXTRACT_DIAGONAL_KERNEL);
 }  // namespace sellp
 
 
+namespace batch_jacobi {
+
+
+GKO_STUB_INDEX_TYPE(
+    GKO_DECLARE_BATCH_BLOCK_JACOBI_COMPUTE_CUMULATIVE_BLOCK_STORAGE);
+GKO_STUB_INDEX_TYPE(GKO_DECLARE_BATCH_BLOCK_JACOBI_FIND_ROW_BLOCK_MAP);
+GKO_STUB_VALUE_AND_INT32_TYPE(
+    GKO_DECLARE_BATCH_BLOCK_JACOBI_EXTRACT_PATTERN_KERNEL);
+GKO_STUB_VALUE_AND_INT32_TYPE(GKO_DECLARE_BATCH_BLOCK_JACOBI_COMPUTE_KERNEL);
+
+
+}  // namespace batch_jacobi
+
+
 namespace jacobi {
 
 

diff --git a/core/matrix/batch_struct.hpp b/core/matrix/batch_struct.hpp
@@ -32,6 +32,12 @@ struct batch_item {
     const index_type* row_ptrs;
     index_type num_rows;
     index_type num_cols;
+    index_type num_nnz_per_item;
+
+    inline size_type get_single_item_num_nnz() const
+    {
+        return static_cast<size_type>(num_nnz_per_item);
+    }
 };
 
 
@@ -75,6 +81,11 @@ struct batch_item {
     int32 stride;
     int32 num_rows;
     int32 num_cols;
+
+    inline size_type get_single_item_num_nnz() const
+    {
+        return static_cast<size_type>(stride * num_rows);
+    }
 };
 
 
@@ -119,6 +130,11 @@ struct batch_item {
     index_type num_rows;
     index_type num_cols;
     index_type num_stored_elems_per_row;
+
+    inline size_type get_single_item_num_nnz() const
+    {
+        return static_cast<size_type>(stride * num_stored_elems_per_row);
+    }
 };
 
 
@@ -153,7 +169,8 @@ template <typename ValueType, typename IndexType>
 GKO_ATTRIBUTES GKO_INLINE csr::batch_item<const ValueType, const IndexType>
 to_const(const csr::batch_item<ValueType, IndexType>& b)
 {
-    return {b.values, b.col_idxs, b.row_ptrs, b.num_rows, b.num_cols};
+    return {b.values,   b.col_idxs, b.row_ptrs,
+            b.num_rows, b.num_cols, b.num_nnz_per_item};
 }
 
 
@@ -171,8 +188,12 @@ GKO_ATTRIBUTES GKO_INLINE csr::batch_item<ValueType, IndexType>
 extract_batch_item(const csr::uniform_batch<ValueType, IndexType>& batch,
                    const size_type batch_idx)
 {
-    return {batch.values + batch_idx * batch.num_nnz_per_item, batch.col_idxs,
-            batch.row_ptrs, batch.num_rows, batch.num_cols};
+    return {batch.values + batch_idx * batch.num_nnz_per_item,
+            batch.col_idxs,
+            batch.row_ptrs,
+            batch.num_rows,
+            batch.num_cols,
+            batch.num_nnz_per_item};
 }
 
 template <typename ValueType, typename IndexType>
@@ -183,8 +204,12 @@ extract_batch_item(ValueType* const batch_values,
                    const int num_cols, int num_nnz_per_item,
                    const size_type batch_idx)
 {
-    return {batch_values + batch_idx * num_nnz_per_item, batch_col_idxs,
-            batch_row_ptrs, num_rows, num_cols};
+    return {batch_values + batch_idx * num_nnz_per_item,
+            batch_col_idxs,
+            batch_row_ptrs,
+            num_rows,
+            num_cols,
+            num_nnz_per_item};
 }