Fix tests

ginkgo-project · Apr 24, 2024 · 45ae7d0 · 45ae7d0
1 parent 73c817c
commit 45ae7d0
Show file tree

Hide file tree

Showing 2 changed files with 32 additions and 68 deletions.
diff --git a/common/cuda_hip/preconditioner/batch_jacobi_kernels.hpp.inc b/common/cuda_hip/preconditioner/batch_jacobi_kernels.hpp.inc
@@ -2,17 +2,16 @@
 //
 // SPDX-License-Identifier: BSD-3-Clause
 
-
 __global__ void compute_block_storage_kernel(
     const gko::size_type num_blocks,
     const int* const __restrict__ block_pointers,
-    int* const __restrict__ blocks_cumulative_storage)
+    int* const __restrict__ blocks_cumulative_offsets)
 {
     const auto gid = threadIdx.x + blockIdx.x * blockDim.x;
 
     for (int i = gid; i < num_blocks; i += blockDim.x * gridDim.x) {
         const auto bsize = block_pointers[i + 1] - block_pointers[i];
-        blocks_cumulative_storage[i] = bsize * bsize;
+        blocks_cumulative_offsets[i] = bsize * bsize;
     }
 }
 
@@ -39,7 +38,7 @@ __global__
 __launch_bounds__(default_block_size) void extract_common_block_pattern_kernel(
     const int nrows, const int* const __restrict__ sys_row_ptrs,
     const int* const __restrict__ sys_col_idxs, const gko::size_type num_blocks,
-    const int* const __restrict__ blocks_cumulative_storage,
+    const int* const __restrict__ blocks_cumulative_offsets,
     const int* const __restrict__ block_pointers,
     const int* const __restrict__ map_block_to_row, int* const blocks_pattern)
 {
@@ -60,7 +59,7 @@ __launch_bounds__(default_block_size) void extract_common_block_pattern_kernel(
         const int idx_end = block_pointers[block_idx + 1];
         int* __restrict__ pattern_ptr =
             blocks_pattern + gko::detail::batch_jacobi::get_block_offset(
-                                 block_idx, blocks_cumulative_storage);
+                                 block_idx, blocks_cumulative_offsets);
         const auto stride =
             gko::detail::batch_jacobi::get_stride(block_idx, block_pointers);
 
@@ -92,15 +91,11 @@ __device__ __forceinline__ int choose_pivot(
     if (perm > -1) {
         my_abs_ele = -1;
     }
-
     if (subwarp_grp.thread_rank() >= block_size) {
         my_abs_ele = -1;
     }
-
     subwarp_grp.sync();
-
     int my_piv_idx = subwarp_grp.thread_rank();
-
     for (int a = subwarp_grp.size() / 2; a > 0; a /= 2) {
         const auto abs_ele_other = subwarp_grp.shfl_down(my_abs_ele, a);
         const int piv_idx_other = subwarp_grp.shfl_down(my_piv_idx, a);
@@ -110,11 +105,8 @@ __device__ __forceinline__ int choose_pivot(
             my_piv_idx = piv_idx_other;
         }
     }
-
     subwarp_grp.sync();
-
     const int ipiv = subwarp_grp.shfl(my_piv_idx, 0);
-
     return ipiv;
 }
 
@@ -128,23 +120,19 @@ __device__ __forceinline__ void invert_dense_block(Group subwarp_grp,
     // Gauss Jordan Elimination with implicit pivoting
     for (int k = 0; k < block_size; k++) {
         // implicit pivoting
-
         const int ipiv = choose_pivot(subwarp_grp, block_size, block_row, perm,
                                       k);  // pivot index
-
         if (subwarp_grp.thread_rank() == ipiv) {
             perm = k;
         }
-
         const ValueType d = subwarp_grp.shfl(block_row[k], ipiv);
         // scale kth col
         block_row[k] /= -d;
         if (subwarp_grp.thread_rank() == ipiv) {
             block_row[k] = zero<ValueType>();
         }
-
         const ValueType row_val = block_row[k];
-        // GER
+        // rank-1 update
         for (int col = 0; col < block_size; col++) {
             const ValueType col_val = subwarp_grp.shfl(block_row[col], ipiv);
             block_row[col] += row_val * col_val;
@@ -157,7 +145,6 @@ __device__ __forceinline__ void invert_dense_block(Group subwarp_grp,
             for (int i = 0; i < block_size; i++) {
                 block_row[i] /= d;
             }
-
             block_row[k] = one<ValueType>() / d;
         }
     }
@@ -169,7 +156,7 @@ __global__
 __launch_bounds__(default_block_size) void compute_block_jacobi_kernel(
     const gko::size_type nbatch, const int nnz, const ValueType* const A_vals,
     const gko::size_type num_blocks,
-    const int* const __restrict__ blocks_cumulative_storage,
+    const int* const __restrict__ blocks_cumulative_offsets,
     const int* const __restrict__ block_pointers,
     const int* const blocks_pattern, ValueType* const blocks)
 {
@@ -194,11 +181,11 @@ __launch_bounds__(default_block_size) void compute_block_jacobi_kernel(
 
         const int* __restrict__ current_block_pattern =
             blocks_pattern + gko::detail::batch_jacobi::get_block_offset(
-                                 block_idx, blocks_cumulative_storage);
+                                 block_idx, blocks_cumulative_offsets);
         ValueType* __restrict__ current_block_data =
             blocks +
             gko::detail::batch_jacobi::get_global_block_offset(
-                batch_idx, num_blocks, block_idx, blocks_cumulative_storage);
+                batch_idx, num_blocks, block_idx, blocks_cumulative_offsets);
         const auto stride =
             gko::detail::batch_jacobi::get_stride(block_idx, block_pointers);
 

diff --git a/test/preconditioner/batch_jacobi_kernels.cpp b/test/preconditioner/batch_jacobi_kernels.cpp
@@ -18,6 +18,7 @@
 #include <ginkgo/core/base/math.hpp>
 #include <ginkgo/core/matrix/batch_csr.hpp>
 #include <ginkgo/core/preconditioner/batch_jacobi.hpp>
+#include <ginkgo/core/preconditioner/jacobi.hpp>
 #include <ginkgo/core/solver/batch_bicgstab.hpp>
 
 
@@ -39,21 +40,33 @@ void is_equivalent_to_ref(
     auto ref = ref_prec->get_executor();
     auto exec = d_prec->get_executor();
     const auto nbatch = ref_prec->get_num_batch_items();
+    const auto num_rows = ref_prec->get_common_size()[0];
     const auto num_blocks = ref_prec->get_num_blocks();
     const auto cumul_block_size =
         ref_prec->get_const_blocks_cumulative_offsets()[num_blocks];
-    const auto block_pointers_ref = ref_prec->get_const_block_pointers();
 
     const auto tol = 10 * r<ValueType>::value;
 
-    GKO_ASSERT_ARRAY_EQ(
+    GKO_EXPECT_ARRAY_EQ(gko::array<int>::const_view(
+                            exec, num_blocks + 1,
+                            d_prec->get_const_blocks_cumulative_offsets()),
+                        gko::array<int>::const_view(
+                            ref, num_blocks + 1,
+                            ref_prec->get_const_blocks_cumulative_offsets()));
+    GKO_EXPECT_ARRAY_EQ(
         gko::array<int>::const_view(exec, num_blocks + 1,
                                     d_prec->get_const_block_pointers()),
-        gko::array<int>::const_view(exec, num_blocks + 1, block_pointers_ref));
+        gko::array<int>::const_view(ref, num_blocks + 1,
+                                    ref_prec->get_const_block_pointers()));
+    GKO_EXPECT_ARRAY_EQ(
+        gko::array<int>::const_view(exec, num_rows,
+                                    d_prec->get_const_map_block_to_row()),
+        gko::array<int>::const_view(ref, num_rows,
+                                    ref_prec->get_const_map_block_to_row()));
     GKO_EXPECT_ARRAY_NEAR(
         gko::array<ValueType>::const_view(exec, nbatch * cumul_block_size,
                                           d_prec->get_const_blocks()),
-        gko::array<ValueType>::const_view(exec, nbatch * cumul_block_size,
+        gko::array<ValueType>::const_view(ref, nbatch * cumul_block_size,
                                           ref_prec->get_const_blocks()),
         tol);
 }
@@ -79,50 +92,20 @@ class BatchJacobi : public CommonTestFixture {
     using Logger = gko::batch::log::BatchConvergence<real_type>;
 
     BatchJacobi()
-        : ref_mtx(
-              gko::share(gko::test::generate_diag_dominant_batch_matrix<Mtx>(
-                  ref, nbatch, nrows, false, 4 * nrows - 3))),
-          d_mtx(gko::share(Mtx::create(exec))),
-          ref_b(gko::test::generate_random_batch_matrix<BMVec>(
-              nbatch, nrows, 1, std::uniform_int_distribution<>(nrows, nrows),
-              std::normal_distribution<real_type>(),
-              std::default_random_engine(34), ref)),
-          d_b(BMVec::create(exec,
-                            gko::batch_dim<2>(nbatch, gko::dim<2>(nrows, 1)))),
-          ref_x(BMVec::create(
-              ref, gko::batch_dim<2>(nbatch, gko::dim<2>(nrows, 1)))),
-          d_x(BMVec::create(exec,
-                            gko::batch_dim<2>(nbatch, gko::dim<2>(nrows, 1))))
+        : ref_mtx(gko::share(gko::test::generate_3pt_stencil_batch_matrix<Mtx>(
+              ref, nbatch, nrows, 3 * nrows - 2))),
+          d_mtx(gko::share(Mtx::create(exec)))
     {
         d_mtx->copy_from(ref_mtx.get());
-        d_b->copy_from(ref_b.get());
-        ref_scalar_jacobi_prec =
-            BJ::build().with_max_block_size(1u).on(ref)->generate(ref_mtx);
-        d_scalar_jacobi_prec =
-            BJ::build().with_max_block_size(1u).on(exec)->generate(d_mtx);
         ref_block_jacobi_prec = BJ::build()
                                     .with_max_block_size(max_blk_sz)
                                     .on(ref)
                                     ->generate(ref_mtx);
 
-        // TODO (before merging device kernels): Check if it is the same for
-        // other device kernels
-        // so that the block pointers are exactly the same for ref and device
-        const int* block_pointers_generated_by_ref =
-            ref_block_jacobi_prec->get_const_block_pointers();
-        const auto num_blocks_generated_by_ref =
-            ref_block_jacobi_prec->get_num_blocks();
-
-        gko::array<int> block_pointers_for_device(
-            this->exec, block_pointers_generated_by_ref,
-            block_pointers_generated_by_ref + num_blocks_generated_by_ref + 1);
-
-        d_block_jacobi_prec =
-            BJ::build()
-                .with_max_block_size(max_blk_sz)
-                // .with_block_pointers(block_pointers_for_device)
-                .on(exec)
-                ->generate(d_mtx);
+        d_block_jacobi_prec = BJ::build()
+                                  .with_max_block_size(max_blk_sz)
+                                  .on(exec)
+                                  ->generate(d_mtx);
     }
 
     template <typename MatrixType>
@@ -178,13 +161,7 @@ class BatchJacobi : public CommonTestFixture {
     const int nrows = 300;
     std::shared_ptr<Mtx> ref_mtx;
     std::shared_ptr<Mtx> d_mtx;
-    std::unique_ptr<BMVec> ref_b;
-    std::unique_ptr<BMVec> d_b;
-    std::unique_ptr<BMVec> ref_x;
-    std::unique_ptr<BMVec> d_x;
     const gko::uint32 max_blk_sz = 6u;
-    std::unique_ptr<BJ> ref_scalar_jacobi_prec;
-    std::unique_ptr<BJ> d_scalar_jacobi_prec;
     std::unique_ptr<BJ> ref_block_jacobi_prec;
     std::unique_ptr<BJ> d_block_jacobi_prec;
 };