Skip to content

Commit

Permalink
Fix tests
Browse files Browse the repository at this point in the history
  • Loading branch information
pratikvn committed Apr 24, 2024
1 parent 73c817c commit 45ae7d0
Show file tree
Hide file tree
Showing 2 changed files with 32 additions and 68 deletions.
29 changes: 8 additions & 21 deletions common/cuda_hip/preconditioner/batch_jacobi_kernels.hpp.inc
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,16 @@
//
// SPDX-License-Identifier: BSD-3-Clause


__global__ void compute_block_storage_kernel(
const gko::size_type num_blocks,
const int* const __restrict__ block_pointers,
int* const __restrict__ blocks_cumulative_storage)
int* const __restrict__ blocks_cumulative_offsets)
{
const auto gid = threadIdx.x + blockIdx.x * blockDim.x;

for (int i = gid; i < num_blocks; i += blockDim.x * gridDim.x) {
const auto bsize = block_pointers[i + 1] - block_pointers[i];
blocks_cumulative_storage[i] = bsize * bsize;
blocks_cumulative_offsets[i] = bsize * bsize;
}
}

Expand All @@ -39,7 +38,7 @@ __global__
__launch_bounds__(default_block_size) void extract_common_block_pattern_kernel(
const int nrows, const int* const __restrict__ sys_row_ptrs,
const int* const __restrict__ sys_col_idxs, const gko::size_type num_blocks,
const int* const __restrict__ blocks_cumulative_storage,
const int* const __restrict__ blocks_cumulative_offsets,
const int* const __restrict__ block_pointers,
const int* const __restrict__ map_block_to_row, int* const blocks_pattern)
{
Expand All @@ -60,7 +59,7 @@ __launch_bounds__(default_block_size) void extract_common_block_pattern_kernel(
const int idx_end = block_pointers[block_idx + 1];
int* __restrict__ pattern_ptr =
blocks_pattern + gko::detail::batch_jacobi::get_block_offset(
block_idx, blocks_cumulative_storage);
block_idx, blocks_cumulative_offsets);
const auto stride =
gko::detail::batch_jacobi::get_stride(block_idx, block_pointers);

Expand Down Expand Up @@ -92,15 +91,11 @@ __device__ __forceinline__ int choose_pivot(
if (perm > -1) {
my_abs_ele = -1;
}

if (subwarp_grp.thread_rank() >= block_size) {
my_abs_ele = -1;
}

subwarp_grp.sync();

int my_piv_idx = subwarp_grp.thread_rank();

for (int a = subwarp_grp.size() / 2; a > 0; a /= 2) {
const auto abs_ele_other = subwarp_grp.shfl_down(my_abs_ele, a);
const int piv_idx_other = subwarp_grp.shfl_down(my_piv_idx, a);
Expand All @@ -110,11 +105,8 @@ __device__ __forceinline__ int choose_pivot(
my_piv_idx = piv_idx_other;
}
}

subwarp_grp.sync();

const int ipiv = subwarp_grp.shfl(my_piv_idx, 0);

return ipiv;
}

Expand All @@ -128,23 +120,19 @@ __device__ __forceinline__ void invert_dense_block(Group subwarp_grp,
// Gauss Jordan Elimination with implicit pivoting
for (int k = 0; k < block_size; k++) {
// implicit pivoting

const int ipiv = choose_pivot(subwarp_grp, block_size, block_row, perm,
k); // pivot index

if (subwarp_grp.thread_rank() == ipiv) {
perm = k;
}

const ValueType d = subwarp_grp.shfl(block_row[k], ipiv);
// scale kth col
block_row[k] /= -d;
if (subwarp_grp.thread_rank() == ipiv) {
block_row[k] = zero<ValueType>();
}

const ValueType row_val = block_row[k];
// GER
// rank-1 update
for (int col = 0; col < block_size; col++) {
const ValueType col_val = subwarp_grp.shfl(block_row[col], ipiv);
block_row[col] += row_val * col_val;
Expand All @@ -157,7 +145,6 @@ __device__ __forceinline__ void invert_dense_block(Group subwarp_grp,
for (int i = 0; i < block_size; i++) {
block_row[i] /= d;
}

block_row[k] = one<ValueType>() / d;
}
}
Expand All @@ -169,7 +156,7 @@ __global__
__launch_bounds__(default_block_size) void compute_block_jacobi_kernel(
const gko::size_type nbatch, const int nnz, const ValueType* const A_vals,
const gko::size_type num_blocks,
const int* const __restrict__ blocks_cumulative_storage,
const int* const __restrict__ blocks_cumulative_offsets,
const int* const __restrict__ block_pointers,
const int* const blocks_pattern, ValueType* const blocks)
{
Expand All @@ -194,11 +181,11 @@ __launch_bounds__(default_block_size) void compute_block_jacobi_kernel(

const int* __restrict__ current_block_pattern =
blocks_pattern + gko::detail::batch_jacobi::get_block_offset(
block_idx, blocks_cumulative_storage);
block_idx, blocks_cumulative_offsets);
ValueType* __restrict__ current_block_data =
blocks +
gko::detail::batch_jacobi::get_global_block_offset(
batch_idx, num_blocks, block_idx, blocks_cumulative_storage);
batch_idx, num_blocks, block_idx, blocks_cumulative_offsets);
const auto stride =
gko::detail::batch_jacobi::get_stride(block_idx, block_pointers);

Expand Down
71 changes: 24 additions & 47 deletions test/preconditioner/batch_jacobi_kernels.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include <ginkgo/core/base/math.hpp>
#include <ginkgo/core/matrix/batch_csr.hpp>
#include <ginkgo/core/preconditioner/batch_jacobi.hpp>
#include <ginkgo/core/preconditioner/jacobi.hpp>
#include <ginkgo/core/solver/batch_bicgstab.hpp>


Expand All @@ -39,21 +40,33 @@ void is_equivalent_to_ref(
auto ref = ref_prec->get_executor();
auto exec = d_prec->get_executor();
const auto nbatch = ref_prec->get_num_batch_items();
const auto num_rows = ref_prec->get_common_size()[0];
const auto num_blocks = ref_prec->get_num_blocks();
const auto cumul_block_size =
ref_prec->get_const_blocks_cumulative_offsets()[num_blocks];
const auto block_pointers_ref = ref_prec->get_const_block_pointers();

const auto tol = 10 * r<ValueType>::value;

GKO_ASSERT_ARRAY_EQ(
GKO_EXPECT_ARRAY_EQ(gko::array<int>::const_view(
exec, num_blocks + 1,
d_prec->get_const_blocks_cumulative_offsets()),
gko::array<int>::const_view(
ref, num_blocks + 1,
ref_prec->get_const_blocks_cumulative_offsets()));
GKO_EXPECT_ARRAY_EQ(
gko::array<int>::const_view(exec, num_blocks + 1,
d_prec->get_const_block_pointers()),
gko::array<int>::const_view(exec, num_blocks + 1, block_pointers_ref));
gko::array<int>::const_view(ref, num_blocks + 1,
ref_prec->get_const_block_pointers()));
GKO_EXPECT_ARRAY_EQ(
gko::array<int>::const_view(exec, num_rows,
d_prec->get_const_map_block_to_row()),
gko::array<int>::const_view(ref, num_rows,
ref_prec->get_const_map_block_to_row()));
GKO_EXPECT_ARRAY_NEAR(
gko::array<ValueType>::const_view(exec, nbatch * cumul_block_size,
d_prec->get_const_blocks()),
gko::array<ValueType>::const_view(exec, nbatch * cumul_block_size,
gko::array<ValueType>::const_view(ref, nbatch * cumul_block_size,
ref_prec->get_const_blocks()),
tol);
}
Expand All @@ -79,50 +92,20 @@ class BatchJacobi : public CommonTestFixture {
using Logger = gko::batch::log::BatchConvergence<real_type>;

BatchJacobi()
: ref_mtx(
gko::share(gko::test::generate_diag_dominant_batch_matrix<Mtx>(
ref, nbatch, nrows, false, 4 * nrows - 3))),
d_mtx(gko::share(Mtx::create(exec))),
ref_b(gko::test::generate_random_batch_matrix<BMVec>(
nbatch, nrows, 1, std::uniform_int_distribution<>(nrows, nrows),
std::normal_distribution<real_type>(),
std::default_random_engine(34), ref)),
d_b(BMVec::create(exec,
gko::batch_dim<2>(nbatch, gko::dim<2>(nrows, 1)))),
ref_x(BMVec::create(
ref, gko::batch_dim<2>(nbatch, gko::dim<2>(nrows, 1)))),
d_x(BMVec::create(exec,
gko::batch_dim<2>(nbatch, gko::dim<2>(nrows, 1))))
: ref_mtx(gko::share(gko::test::generate_3pt_stencil_batch_matrix<Mtx>(
ref, nbatch, nrows, 3 * nrows - 2))),
d_mtx(gko::share(Mtx::create(exec)))
{
d_mtx->copy_from(ref_mtx.get());
d_b->copy_from(ref_b.get());
ref_scalar_jacobi_prec =
BJ::build().with_max_block_size(1u).on(ref)->generate(ref_mtx);
d_scalar_jacobi_prec =
BJ::build().with_max_block_size(1u).on(exec)->generate(d_mtx);
ref_block_jacobi_prec = BJ::build()
.with_max_block_size(max_blk_sz)
.on(ref)
->generate(ref_mtx);

// TODO (before merging device kernels): Check if it is the same for
// other device kernels
// so that the block pointers are exactly the same for ref and device
const int* block_pointers_generated_by_ref =
ref_block_jacobi_prec->get_const_block_pointers();
const auto num_blocks_generated_by_ref =
ref_block_jacobi_prec->get_num_blocks();

gko::array<int> block_pointers_for_device(
this->exec, block_pointers_generated_by_ref,
block_pointers_generated_by_ref + num_blocks_generated_by_ref + 1);

d_block_jacobi_prec =
BJ::build()
.with_max_block_size(max_blk_sz)
// .with_block_pointers(block_pointers_for_device)
.on(exec)
->generate(d_mtx);
d_block_jacobi_prec = BJ::build()
.with_max_block_size(max_blk_sz)
.on(exec)
->generate(d_mtx);
}

template <typename MatrixType>
Expand Down Expand Up @@ -178,13 +161,7 @@ class BatchJacobi : public CommonTestFixture {
const int nrows = 300;
std::shared_ptr<Mtx> ref_mtx;
std::shared_ptr<Mtx> d_mtx;
std::unique_ptr<BMVec> ref_b;
std::unique_ptr<BMVec> d_b;
std::unique_ptr<BMVec> ref_x;
std::unique_ptr<BMVec> d_x;
const gko::uint32 max_blk_sz = 6u;
std::unique_ptr<BJ> ref_scalar_jacobi_prec;
std::unique_ptr<BJ> d_scalar_jacobi_prec;
std::unique_ptr<BJ> ref_block_jacobi_prec;
std::unique_ptr<BJ> d_block_jacobi_prec;
};
Expand Down

0 comments on commit 45ae7d0

Please sign in to comment.