diff --git a/common/cuda_hip/factorization/cholesky_kernels.hpp.inc b/common/cuda_hip/factorization/cholesky_kernels.hpp.inc index f87969a7ad0..eb90127a8ca 100644 --- a/common/cuda_hip/factorization/cholesky_kernels.hpp.inc +++ b/common/cuda_hip/factorization/cholesky_kernels.hpp.inc @@ -149,8 +149,6 @@ __global__ __launch_bounds__(default_block_size) void symbolic_factorize( template __global__ __launch_bounds__(default_block_size) void factorize( const IndexType* __restrict__ row_ptrs, const IndexType* __restrict__ cols, - const IndexType* __restrict__ elim_tree_child_ptrs, - const IndexType* __restrict__ elim_tree_children, const IndexType* __restrict__ storage_offsets, const int32* __restrict__ storage, const int64* __restrict__ row_descs, const IndexType* __restrict__ diag_idxs, @@ -171,32 +169,21 @@ __global__ __launch_bounds__(default_block_size) void factorize( const auto row_begin = row_ptrs[row]; const auto row_diag = diag_idxs[row]; const auto row_end = row_ptrs[row + 1]; - const auto child_begin = elim_tree_child_ptrs[row]; - const auto child_end = elim_tree_child_ptrs[row + 1]; gko::matrix::csr::device_sparsity_lookup lookup{ row_ptrs, cols, storage_offsets, storage, row_descs, static_cast(row)}; - for (auto child = child_begin; child < child_end; child++) { - const auto dep = elim_tree_children[child]; - scheduler.wait(dep); - // TODO evaluate parallel waiting with __all_sync - } - // for each lower triangular entry: eliminate with corresponding row + // for each lower triangular entry: eliminate with corresponding column for (auto lower_nz = row_begin; lower_nz < row_diag; lower_nz++) { const auto dep = cols[lower_nz]; - auto val = vals[lower_nz]; + scheduler.wait(dep); + const auto scale = vals[lower_nz]; const auto diag_idx = diag_idxs[dep]; const auto dep_end = row_ptrs[dep + 1]; - const auto diag = vals[diag_idx]; - const auto scale = val / diag; - if (lane == 0) { - vals[lower_nz] = scale; - } - // subtract all entries past the diagonal - for (auto upper_nz = diag_idx + 1 + lane; upper_nz < dep_end; + // subtract column dep from current column + for (auto upper_nz = diag_idx + lane; upper_nz < dep_end; upper_nz += config::warp_size) { const auto upper_col = cols[upper_nz]; - if (upper_col < row) { + if (upper_col >= row) { const auto upper_val = vals[upper_nz]; const auto output_pos = lookup.lookup_unsafe(upper_col) + row_begin; @@ -204,17 +191,16 @@ __global__ __launch_bounds__(default_block_size) void factorize( } } } - ValueType sum{}; - for (auto lower_nz = row_begin + lane; lower_nz < row_diag; - lower_nz += config::warp_size) { - sum += squared_norm(vals[lower_nz]); - // copy the lower triangular entries to the transpose - vals[transpose_idxs[lower_nz]] = conj(vals[lower_nz]); + auto diag_val = sqrt(vals[row_diag]); + for (auto upper_nz = row_diag + 1 + lane; upper_nz < row_end; + upper_nz += config::warp_size) { + vals[upper_nz] /= diag_val; + // copy the upper triangular entries to the transpose + vals[transpose_idxs[upper_nz]] = conj(vals[upper_nz]); } - sum = reduce(warp, sum, thrust::plus{}); if (lane == 0) { // store computed diagonal - vals[row_diag] = sqrt(vals[row_diag] - sum); + vals[row_diag] = diag_val; } scheduler.mark_ready(); } @@ -365,10 +351,9 @@ void factorize(std::shared_ptr exec, kernel::factorize<<get_stream()>>>( factors->get_const_row_ptrs(), factors->get_const_col_idxs(), - forest.child_ptrs.get_const_data(), - forest.children.get_const_data(), lookup_offsets, lookup_storage, - lookup_descs, diag_idxs, transpose_idxs, - as_device_type(factors->get_values()), storage, num_rows); + lookup_offsets, lookup_storage, lookup_descs, diag_idxs, + transpose_idxs, as_device_type(factors->get_values()), storage, + num_rows); } } diff --git a/reference/test/factorization/cholesky_kernels.cpp b/reference/test/factorization/cholesky_kernels.cpp index d5fae12a2e9..36bbd7e176e 100644 --- a/reference/test/factorization/cholesky_kernels.cpp +++ b/reference/test/factorization/cholesky_kernels.cpp @@ -34,6 +34,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include +#include #include @@ -67,12 +68,15 @@ class Cholesky : public ::testing::Test { using index_type = typename std::tuple_element<1, decltype(ValueIndexType())>::type; using matrix_type = gko::matrix::Csr; + using sparsity_matrix_type = + gko::matrix::SparsityCsr; using elimination_forest = gko::factorization::elimination_forest; Cholesky() : ref(gko::ReferenceExecutor::create()), tmp{ref}, + ref_row_nnz{ref}, storage_offsets{ref}, storage{ref}, row_descs{ref} @@ -98,13 +102,27 @@ class Cholesky : public ::testing::Test { return result; } + void setup( + std::initializer_list> mtx_list, + std::initializer_list> factor_list) + { + mtx = gko::initialize(mtx_list, ref); + l_factor_ref = gko::initialize(factor_list, ref); + setup_impl(); + } + void setup(const char* name_mtx, const char* name_factor) { std::ifstream stream{name_mtx}; std::ifstream ref_stream{name_factor}; mtx = gko::read(stream, this->ref); - num_rows = mtx->get_size()[0]; l_factor_ref = gko::read(ref_stream, this->ref); + setup_impl(); + } + + void setup_impl() + { + num_rows = mtx->get_size()[0]; combined_ref = combined_factor(l_factor_ref.get()); l_factor = matrix_type::create(ref, l_factor_ref->get_size(), l_factor_ref->get_num_stored_elements()); @@ -123,6 +141,13 @@ class Cholesky : public ::testing::Test { storage_offsets.resize_and_reset(num_rows + 1); row_descs.resize_and_reset(num_rows); + ref_row_nnz.resize_and_reset(num_rows); + const auto ref_row_ptrs = l_factor_ref->get_const_row_ptrs(); + for (gko::size_type row = 0; row < num_rows; row++) { + ref_row_nnz.get_data()[row] = + ref_row_ptrs[row + 1] - ref_row_ptrs[row]; + } + const auto allowed = gko::matrix::csr::sparsity_type::bitmap | gko::matrix::csr::sparsity_type::full | gko::matrix::csr::sparsity_type::hash; @@ -149,7 +174,7 @@ class Cholesky : public ::testing::Test { } } - void forall_matrices(std::function fn) + void forall_matrices(std::function fn, bool non_spd) { { SCOPED_TRACE("ani1"); @@ -163,11 +188,87 @@ class Cholesky : public ::testing::Test { gko::matrices::location_ani1_amd_chol_mtx); fn(); } + { + SCOPED_TRACE("example"); + this->setup( + {{4, 0, 1, 0, 0, 0, 0, 1, 0, 0}, + {0, 4, 0, 0, 1, 0, 0, 0, 0, 1}, + {1, 0, 4.25, 0, 0, 0, 1, 0, 0, 0}, + {0, 0, 0, 4, 0, 0, 0, 0, 1, 1}, + {0, 1, 0, 0, 4.25, 0, 0, 0, 1, 1}, + {0, 0, 0, 0, 0, 4, 2, 4, 0, 0}, + {0, 0, 1, 0, 0, 2, 5.25, 0, 0, 0}, + {1, 0, 0, 0, 0, 4, 0, 8, 1, 1}, + {0, 0, 0, 1, 1, 0, 0, 1, 4, 0}, + {0, 1, 0, 1, 1, 0, 0, 1, 0, 4}}, + {{2, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 2, 0, 0, 0, 0, 0, 0, 0, 0}, + {0.5, 0, 2, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 2, 0, 0, 0, 0, 0, 0}, + {0, 0.5, 0, 0, 2, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 2, 0, 0, 0, 0}, + {0, 0, 0.5, 0, 0, 1, 2, 0, 0, 0}, + {0.5, 0, -0.125, 0, 0, 2, -0.96875, 1.67209402770897, 0, 0}, + {0, 0, 0, 0.5, 0.5, 0, 0, 0.598052491922453, 1.7726627476498, + 0}, + {0, 0.5, 0, 0.5, 0.375, 0, 0, 0.598052491922453, + -0.448571948696326, 1.67346688755653}}); + fn(); + } + { + SCOPED_TRACE("separable"); + this->setup({{4, 0, 1, 0, 0, 0, 0, 0, 0, 0}, + {0, 4, 2, 0, 0, 0, 0, 0, 0, 0}, + {1, 2, 5.25, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 4, 1, 0, 0, 0, 0, 0}, + {0, 0, 0, 1, 4.25, 1, 0, 0, 0, 4}, + {0, 0, 0, 0, 1, 4.25, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 4, 1, 0, 4}, + {0, 0, 0, 0, 0, 0, 1, 4.25, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 4, 1}, + {0, 0, 0, 0, 4, 0, 4, 0, 1, 17.75}}, + {{2, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 2, 0, 0, 0, 0, 0, 0, 0, 0}, + {0.5, 1, 2, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 2, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0.5, 2, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0.5, 2, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 2, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0.5, 2, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 2, 0}, + {0, 0, 0, 0, 2, -0.5, 2, -0.5, 0.5, 3}}); + fn(); + } + if (non_spd) { + SCOPED_TRACE("missing diagonal"); + this->setup({{1, 0, 1, 0, 0, 0, 0, 0, 0, 0}, + {0, 1, 1, 0, 0, 0, 0, 0, 0, 0}, + {1, 1, 0, 1, 0, 0, 0, 0, 0, 0}, + {0, 0, 1, 1, 1, 0, 0, 0, 0, 0}, + {0, 0, 0, 1, 0, 1, 0, 0, 0, 0}, + {0, 0, 0, 0, 1, 1, 1, 0, 0, 0}, + {0, 0, 0, 0, 0, 1, 1, 1, 0, 1}, + {0, 0, 0, 0, 0, 0, 1, 1, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 1, 1}, + {0, 0, 0, 0, 0, 0, 1, 0, 1, 0}}, + {{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.}, + {0., 1., 0., 0., 0., 0., 0., 0., 0., 0.}, + {1., 1., 1., 0., 0., 0., 0., 0., 0., 0.}, + {0., 0., 1., 1., 0., 0., 0., 0., 0., 0.}, + {0., 0., 0., 1., 1., 0., 0., 0., 0., 0.}, + {0., 0., 0., 0., 1., 1., 0., 0., 0., 0.}, + {0., 0., 0., 0., 0., 1., 1., 0., 0., 0.}, + {0., 0., 0., 0., 0., 0., 1., 1., 0., 0.}, + {0., 0., 0., 0., 0., 0., 0., 0., 1., 0.}, + {0., 0., 0., 0., 0., 0., 1., 1., 1., 1.}}); + fn(); + } } std::shared_ptr ref; gko::size_type num_rows; gko::array tmp; + gko::array ref_row_nnz; gko::array storage_offsets; gko::array storage; gko::array row_descs; @@ -183,255 +284,51 @@ TYPED_TEST_SUITE(Cholesky, gko::test::ValueIndexTypes, PairTypenameNameGenerator); -TYPED_TEST(Cholesky, KernelSymbolicCountExample) -{ - using matrix_type = typename TestFixture::matrix_type; - using elimination_forest = typename TestFixture::elimination_forest; - using index_type = typename TestFixture::index_type; - auto mtx = gko::initialize( - {{1, 0, 1, 0, 0, 0, 0, 1, 0, 0}, - {0, 1, 0, 1, 0, 0, 0, 0, 0, 1}, - {1, 0, 1, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 0, 0, 0, 0, 1, 1}, - {0, 1, 0, 0, 1, 0, 0, 0, 1, 1}, - {0, 0, 0, 0, 0, 1, 0, 1, 0, 0}, - {0, 0, 1, 0, 0, 1, 1, 0, 0, 0}, - {1, 0, 0, 0, 0, 1, 0, 1, 1, 1}, - {0, 0, 0, 1, 1, 0, 0, 1, 1, 0}, - {0, 1, 0, 1, 1, 0, 0, 1, 0, 1}}, - this->ref); - std::unique_ptr forest; - gko::factorization::compute_elim_forest(mtx.get(), forest); - gko::array row_nnz{this->ref, 10}; - - gko::kernels::reference::cholesky::symbolic_count( - this->ref, mtx.get(), *forest, row_nnz.get_data(), this->tmp); - - GKO_ASSERT_ARRAY_EQ(row_nnz, I({1, 1, 2, 1, 2, 1, 3, 5, 4, 6})); -} - - -TYPED_TEST(Cholesky, KernelSymbolicFactorizeExample) -{ - using matrix_type = typename TestFixture::matrix_type; - using elimination_forest = typename TestFixture::elimination_forest; - using index_type = typename TestFixture::index_type; - auto mtx = gko::initialize( - {{1, 0, 1, 0, 0, 0, 0, 1, 0, 0}, - {0, 1, 0, 1, 0, 0, 0, 0, 0, 1}, - {1, 0, 1, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 0, 0, 0, 0, 1, 1}, - {0, 1, 0, 0, 1, 0, 0, 0, 1, 1}, - {0, 0, 0, 0, 0, 1, 0, 1, 0, 0}, - {0, 0, 1, 0, 0, 1, 1, 0, 0, 0}, - {1, 0, 0, 0, 0, 1, 0, 1, 1, 1}, - {0, 0, 0, 1, 1, 0, 0, 1, 1, 0}, - {0, 1, 0, 1, 1, 0, 0, 1, 0, 1}}, - this->ref); - std::unique_ptr forest; - gko::factorization::compute_elim_forest(mtx.get(), forest); - auto l_factor = matrix_type::create(this->ref, gko::dim<2>{10, 10}, 26); - gko::kernels::reference::cholesky::symbolic_count( - this->ref, mtx.get(), *forest, l_factor->get_row_ptrs(), this->tmp); - gko::kernels::reference::components::prefix_sum_nonnegative( - this->ref, l_factor->get_row_ptrs(), 11); - - gko::kernels::reference::cholesky::symbolic_factorize( - this->ref, mtx.get(), *forest, l_factor.get(), this->tmp); - - GKO_ASSERT_MTX_EQ_SPARSITY(l_factor, - l({{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.}, - {0., 1., 0., 0., 0., 0., 0., 0., 0., 0.}, - {1., 0., 1., 0., 0., 0., 0., 0., 0., 0.}, - {0., 0., 0., 1., 0., 0., 0., 0., 0., 0.}, - {0., 1., 0., 0., 1., 0., 0., 0., 0., 0.}, - {0., 0., 0., 0., 0., 1., 0., 0., 0., 0.}, - {0., 0., 1., 0., 0., 1., 1., 0., 0., 0.}, - {1., 0., 1., 0., 0., 1., 1., 1., 0., 0.}, - {0., 0., 0., 1., 1., 0., 0., 1., 1., 0.}, - {0., 1., 0., 1., 1., 0., 0., 1., 1., 1.}})); -} - - -TYPED_TEST(Cholesky, KernelSymbolicCountSeparable) -{ - using matrix_type = typename TestFixture::matrix_type; - using elimination_forest = typename TestFixture::elimination_forest; - using index_type = typename TestFixture::index_type; - auto mtx = gko::initialize( - {{1, 0, 1, 0, 0, 0, 0, 0, 0, 0}, - {0, 1, 1, 0, 0, 0, 0, 0, 0, 0}, - {1, 1, 1, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 1}, - {0, 0, 0, 0, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 1, 1, 0, 1}, - {0, 0, 0, 0, 0, 0, 1, 1, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 1, 1}, - {0, 0, 0, 0, 1, 0, 1, 0, 1, 1}}, - this->ref); - std::unique_ptr forest; - gko::factorization::compute_elim_forest(mtx.get(), forest); - gko::array row_nnz{this->ref, 10}; - - gko::kernels::reference::cholesky::symbolic_count( - this->ref, mtx.get(), *forest, row_nnz.get_data(), this->tmp); - - GKO_ASSERT_ARRAY_EQ(row_nnz, I({1, 1, 3, 1, 2, 2, 1, 2, 1, 6})); -} - - -TYPED_TEST(Cholesky, KernelSymbolicFactorizeSeparable) +TYPED_TEST(Cholesky, KernelSymbolicCount) { using matrix_type = typename TestFixture::matrix_type; - using index_type = typename TestFixture::index_type; + using sparsity_matrix_type = typename TestFixture::sparsity_matrix_type; using elimination_forest = typename TestFixture::elimination_forest; - auto mtx = gko::initialize( - {{1, 0, 1, 0, 0, 0, 0, 0, 0, 0}, - {0, 1, 1, 0, 0, 0, 0, 0, 0, 0}, - {1, 1, 1, 0, 0, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 1, 1, 0, 0, 0, 1}, - {0, 0, 0, 0, 1, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 0, 0, 1, 1, 0, 1}, - {0, 0, 0, 0, 0, 0, 1, 1, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 1, 1}, - {0, 0, 0, 0, 1, 0, 1, 0, 1, 1}}, - this->ref); - std::unique_ptr forest; - gko::factorization::compute_elim_forest(mtx.get(), forest); - auto l_factor = matrix_type::create(this->ref, gko::dim<2>{10, 10}, 26); - gko::kernels::reference::cholesky::symbolic_count( - this->ref, mtx.get(), *forest, l_factor->get_row_ptrs(), this->tmp); - gko::kernels::reference::components::prefix_sum_nonnegative( - this->ref, l_factor->get_row_ptrs(), 11); - - gko::kernels::reference::cholesky::symbolic_factorize( - this->ref, mtx.get(), *forest, l_factor.get(), this->tmp); - - GKO_ASSERT_MTX_EQ_SPARSITY(l_factor, - l({{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.}, - {0., 1., 0., 0., 0., 0., 0., 0., 0., 0.}, - {1., 1., 1., 0., 0., 0., 0., 0., 0., 0.}, - {0., 0., 0., 1., 0., 0., 0., 0., 0., 0.}, - {0., 0., 0., 1., 1., 0., 0., 0., 0., 0.}, - {0., 0., 0., 0., 1., 1., 0., 0., 0., 0.}, - {0., 0., 0., 0., 0., 0., 1., 0., 0., 0.}, - {0., 0., 0., 0., 0., 0., 1., 1., 0., 0.}, - {0., 0., 0., 0., 0., 0., 0., 0., 1., 0.}, - {0., 0., 0., 0., 1., 1., 1., 1., 1., 1.}})); -} - - -TYPED_TEST(Cholesky, KernelSymbolicCountMissingDiagonal) -{ - using matrix_type = typename TestFixture::matrix_type; using index_type = typename TestFixture::index_type; - using elimination_forest = typename TestFixture::elimination_forest; - auto mtx = gko::initialize( - {{1, 0, 1, 0, 0, 0, 0, 0, 0, 0}, - {0, 1, 1, 0, 0, 0, 0, 0, 0, 0}, - {1, 1, 0, 1, 0, 0, 0, 0, 0, 0}, - {0, 0, 1, 1, 1, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 0, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 1, 1, 1, 0, 0, 0}, - {0, 0, 0, 0, 0, 1, 1, 1, 0, 1}, - {0, 0, 0, 0, 0, 0, 1, 1, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 1, 1}, - {0, 0, 0, 0, 0, 0, 1, 0, 1, 0}}, - this->ref); - std::unique_ptr forest; - gko::factorization::compute_elim_forest(mtx.get(), forest); - gko::array row_nnz{this->ref, 10}; - - gko::kernels::reference::cholesky::symbolic_count( - this->ref, mtx.get(), *forest, row_nnz.get_data(), this->tmp); - - GKO_ASSERT_ARRAY_EQ(row_nnz, I({1, 1, 3, 2, 2, 2, 2, 2, 1, 4})); + this->forall_matrices( + [this] { + gko::factorization::compute_elim_forest(this->mtx.get(), + this->forest); + gko::array row_nnz{this->ref, this->num_rows}; + + gko::kernels::reference::cholesky::symbolic_count( + this->ref, this->mtx.get(), *this->forest, row_nnz.get_data(), + this->tmp); + + GKO_ASSERT_ARRAY_EQ(row_nnz, this->ref_row_nnz); + }, + true); } -TYPED_TEST(Cholesky, KernelSymbolicFactorizeMissingDiagonal) +TYPED_TEST(Cholesky, KernelSymbolicFactorize) { using matrix_type = typename TestFixture::matrix_type; - using index_type = typename TestFixture::index_type; + using sparsity_matrix_type = typename TestFixture::sparsity_matrix_type; using elimination_forest = typename TestFixture::elimination_forest; - auto mtx = gko::initialize( - {{1, 0, 1, 0, 0, 0, 0, 0, 0, 0}, - {0, 1, 1, 0, 0, 0, 0, 0, 0, 0}, - {1, 1, 0, 1, 0, 0, 0, 0, 0, 0}, - {0, 0, 1, 1, 1, 0, 0, 0, 0, 0}, - {0, 0, 0, 1, 0, 1, 0, 0, 0, 0}, - {0, 0, 0, 0, 1, 1, 1, 0, 0, 0}, - {0, 0, 0, 0, 0, 1, 1, 1, 0, 1}, - {0, 0, 0, 0, 0, 0, 1, 1, 0, 0}, - {0, 0, 0, 0, 0, 0, 0, 0, 1, 1}, - {0, 0, 0, 0, 0, 0, 1, 0, 1, 0}}, - this->ref); - std::unique_ptr forest; - gko::factorization::compute_elim_forest(mtx.get(), forest); - auto l_factor = matrix_type::create(this->ref, gko::dim<2>{10, 10}, 20); - gko::kernels::reference::cholesky::symbolic_count( - this->ref, mtx.get(), *forest, l_factor->get_row_ptrs(), this->tmp); - gko::kernels::reference::components::prefix_sum_nonnegative( - this->ref, l_factor->get_row_ptrs(), 11); - - gko::kernels::reference::cholesky::symbolic_factorize( - this->ref, mtx.get(), *forest, l_factor.get(), this->tmp); - - GKO_ASSERT_MTX_EQ_SPARSITY(l_factor, - l({{1., 0., 0., 0., 0., 0., 0., 0., 0., 0.}, - {0., 1., 0., 0., 0., 0., 0., 0., 0., 0.}, - {1., 1., 1., 0., 0., 0., 0., 0., 0., 0.}, - {0., 0., 1., 1., 0., 0., 0., 0., 0., 0.}, - {0., 0., 0., 1., 1., 0., 0., 0., 0., 0.}, - {0., 0., 0., 0., 1., 1., 0., 0., 0., 0.}, - {0., 0., 0., 0., 0., 1., 1., 0., 0., 0.}, - {0., 0., 0., 0., 0., 0., 1., 1., 0., 0.}, - {0., 0., 0., 0., 0., 0., 0., 0., 1., 0.}, - {0., 0., 0., 0., 0., 0., 1., 1., 1., 1.}})); -} - - -TYPED_TEST(Cholesky, KernelSymbolicCountAni1) -{ using index_type = typename TestFixture::index_type; - using elimination_forest = typename TestFixture::elimination_forest; - this->setup(gko::matrices::location_ani1_mtx, - gko::matrices::location_ani1_chol_mtx); - std::unique_ptr forest; - gko::factorization::compute_elim_forest(this->mtx.get(), forest); - gko::array row_nnz{this->ref, this->mtx->get_size()[0]}; - - gko::kernels::reference::cholesky::symbolic_count( - this->ref, this->mtx.get(), *forest, row_nnz.get_data(), this->tmp); - - GKO_ASSERT_ARRAY_EQ( - row_nnz, I({1, 2, 3, 3, 2, 2, 7, 7, 7, 8, 8, 7, - 8, 8, 8, 8, 2, 10, 10, 10, 10, 9, 8, 8, - 8, 7, 8, 2, 8, 8, 7, 5, 8, 6, 4, 4})); -} - - -TYPED_TEST(Cholesky, KernelSymbolicFactorize) -{ - using elimination_forest = typename TestFixture::elimination_forest; - this->forall_matrices([this] { - std::unique_ptr forest; - gko::factorization::compute_elim_forest(this->mtx.get(), forest); - gko::kernels::reference::cholesky::symbolic_count( - this->ref, this->mtx.get(), *forest, this->l_factor->get_row_ptrs(), - this->tmp); - gko::kernels::reference::components::prefix_sum_nonnegative( - this->ref, this->l_factor->get_row_ptrs(), - this->mtx->get_size()[0] + 1); - - gko::kernels::reference::cholesky::symbolic_factorize( - this->ref, this->mtx.get(), *forest, this->l_factor.get(), - this->tmp); - - GKO_ASSERT_MTX_EQ_SPARSITY(this->l_factor, this->l_factor_ref); - }); + this->forall_matrices( + [this] { + gko::factorization::compute_elim_forest(this->mtx.get(), + this->forest); + gko::kernels::reference::cholesky::symbolic_count( + this->ref, this->mtx.get(), *this->forest, + this->l_factor->get_row_ptrs(), this->tmp); + gko::kernels::reference::components::prefix_sum_nonnegative( + this->ref, this->l_factor->get_row_ptrs(), this->num_rows + 1); + + gko::kernels::reference::cholesky::symbolic_factorize( + this->ref, this->mtx.get(), *this->forest, this->l_factor.get(), + this->tmp); + + GKO_ASSERT_MTX_EQ_SPARSITY(this->l_factor, this->l_factor_ref); + }, + true); } @@ -439,14 +336,16 @@ TYPED_TEST(Cholesky, SymbolicFactorize) { using matrix_type = typename TestFixture::matrix_type; using elimination_forest = typename TestFixture::elimination_forest; - this->forall_matrices([this] { - std::unique_ptr combined_factor; - std::unique_ptr forest; - gko::factorization::symbolic_cholesky(this->mtx.get(), true, - combined_factor, forest); - - GKO_ASSERT_MTX_EQ_SPARSITY(combined_factor, this->combined_ref); - }); + this->forall_matrices( + [this] { + std::unique_ptr combined_factor; + std::unique_ptr forest; + gko::factorization::symbolic_cholesky(this->mtx.get(), true, + combined_factor, forest); + + GKO_ASSERT_MTX_EQ_SPARSITY(combined_factor, this->combined_ref); + }, + true); } @@ -454,55 +353,39 @@ TYPED_TEST(Cholesky, SymbolicFactorizeOnlyLower) { using matrix_type = typename TestFixture::matrix_type; using elimination_forest = typename TestFixture::elimination_forest; - this->forall_matrices([this] { - std::unique_ptr l_factor; - std::unique_ptr forest; - gko::factorization::symbolic_cholesky(this->mtx.get(), false, l_factor, - forest); - - GKO_ASSERT_MTX_EQ_SPARSITY(l_factor, this->l_factor_ref); - }); + this->forall_matrices( + [this] { + std::unique_ptr l_factor; + std::unique_ptr forest; + gko::factorization::symbolic_cholesky(this->mtx.get(), false, + l_factor, forest); + + GKO_ASSERT_MTX_EQ_SPARSITY(l_factor, this->l_factor_ref); + }, + true); } -TYPED_TEST(Cholesky, KernelSymbolicCountAni1Amd) -{ - using index_type = typename TestFixture::index_type; - using elimination_forest = typename TestFixture::elimination_forest; - this->setup(gko::matrices::location_ani1_amd_mtx, - gko::matrices::location_ani1_amd_chol_mtx); - std::unique_ptr forest; - gko::factorization::compute_elim_forest(this->mtx.get(), forest); - gko::array row_nnz{this->ref, this->mtx->get_size()[0]}; - - gko::kernels::reference::cholesky::symbolic_count( - this->ref, this->mtx.get(), *forest, row_nnz.get_data(), this->tmp); - - GKO_ASSERT_ARRAY_EQ( - row_nnz, I({1, 1, 2, 3, 5, 4, 1, 2, 3, 4, 1, 2, - 2, 2, 5, 1, 4, 4, 4, 1, 2, 3, 4, 3, - 8, 10, 4, 8, 10, 7, 7, 13, 21, 6, 11, 14})); -} - - -TYPED_TEST(Cholesky, KernelForestFromFactor) +TYPED_TEST(Cholesky, KernelForestFromFactorPlusPostprocessing) { using matrix_type = typename TestFixture::matrix_type; using index_type = typename TestFixture::index_type; using elimination_forest = typename TestFixture::elimination_forest; - this->forall_matrices([this] { - std::unique_ptr combined_factor; - std::unique_ptr forest_ref; - gko::factorization::symbolic_cholesky(this->mtx.get(), true, - combined_factor, forest_ref); - elimination_forest forest{this->ref, - static_cast(this->num_rows)}; - - gko::kernels::reference::cholesky::forest_from_factor( - this->ref, combined_factor.get(), forest); - - this->assert_equal_forests(forest, *forest_ref); - }); + this->forall_matrices( + [this] { + std::unique_ptr combined_factor; + std::unique_ptr forest_ref; + gko::factorization::symbolic_cholesky(this->mtx.get(), true, + combined_factor, forest_ref); + elimination_forest forest{this->ref, + static_cast(this->num_rows)}; + + gko::kernels::reference::cholesky::forest_from_factor( + this->ref, combined_factor.get(), forest); + + this->assert_equal_forests(forest, *forest_ref); + }, + true); } @@ -510,39 +393,46 @@ TYPED_TEST(Cholesky, KernelInitializeWorks) { using value_type = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; - this->forall_matrices([this] { - std::fill_n(this->combined->get_values(), - this->combined->get_num_stored_elements(), - gko::zero()); - gko::array diag_idxs{this->ref, this->num_rows}; - gko::array transpose_idxs{ - this->ref, this->combined->get_num_stored_elements()}; - - gko::kernels::reference::cholesky::initialize( - this->ref, this->mtx.get(), this->storage_offsets.get_const_data(), - this->row_descs.get_const_data(), this->storage.get_const_data(), - diag_idxs.get_data(), transpose_idxs.get_data(), - this->combined.get()); - - GKO_ASSERT_MTX_NEAR(this->mtx, this->combined, 0.0); - for (gko::size_type row = 0; row < this->num_rows; row++) { - const auto diag_pos = diag_idxs.get_const_data()[row]; - const auto begin_pos = this->combined->get_const_row_ptrs()[row]; - const auto end_pos = this->combined->get_const_row_ptrs()[row + 1]; - ASSERT_GE(diag_pos, begin_pos); - ASSERT_LT(diag_pos, end_pos); - ASSERT_EQ(this->combined->get_const_col_idxs()[diag_pos], row); - for (auto nz = begin_pos; nz < end_pos; nz++) { - const auto trans_pos = transpose_idxs.get_const_data()[nz]; - const auto col = this->combined->get_const_col_idxs()[nz]; - ASSERT_GE(trans_pos, this->combined->get_const_row_ptrs()[col]); - ASSERT_LT(trans_pos, - this->combined->get_const_row_ptrs()[col + 1]); - ASSERT_EQ(this->combined->get_const_col_idxs()[trans_pos], row); - ASSERT_EQ(transpose_idxs.get_const_data()[trans_pos], nz); + this->forall_matrices( + [this] { + std::fill_n(this->combined->get_values(), + this->combined->get_num_stored_elements(), + gko::zero()); + gko::array diag_idxs{this->ref, this->num_rows}; + gko::array transpose_idxs{ + this->ref, this->combined->get_num_stored_elements()}; + + gko::kernels::reference::cholesky::initialize( + this->ref, this->mtx.get(), + this->storage_offsets.get_const_data(), + this->row_descs.get_const_data(), + this->storage.get_const_data(), diag_idxs.get_data(), + transpose_idxs.get_data(), this->combined.get()); + + GKO_ASSERT_MTX_NEAR(this->mtx, this->combined, 0.0); + for (gko::size_type row = 0; row < this->num_rows; row++) { + const auto diag_pos = diag_idxs.get_const_data()[row]; + const auto begin_pos = + this->combined->get_const_row_ptrs()[row]; + const auto end_pos = + this->combined->get_const_row_ptrs()[row + 1]; + ASSERT_GE(diag_pos, begin_pos); + ASSERT_LT(diag_pos, end_pos); + ASSERT_EQ(this->combined->get_const_col_idxs()[diag_pos], row); + for (auto nz = begin_pos; nz < end_pos; nz++) { + const auto trans_pos = transpose_idxs.get_const_data()[nz]; + const auto col = this->combined->get_const_col_idxs()[nz]; + ASSERT_GE(trans_pos, + this->combined->get_const_row_ptrs()[col]); + ASSERT_LT(trans_pos, + this->combined->get_const_row_ptrs()[col + 1]); + ASSERT_EQ(this->combined->get_const_col_idxs()[trans_pos], + row); + ASSERT_EQ(transpose_idxs.get_const_data()[trans_pos], nz); + } } - } - }); + }, + true); } @@ -550,26 +440,30 @@ TYPED_TEST(Cholesky, KernelFactorizeWorks) { using value_type = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; - this->forall_matrices([this] { - gko::array diag_idxs{this->ref, this->num_rows}; - gko::array transpose_idxs{ - this->ref, this->combined->get_num_stored_elements()}; - gko::array tmp{this->ref}; - gko::kernels::reference::cholesky::initialize( - this->ref, this->mtx.get(), this->storage_offsets.get_const_data(), - this->row_descs.get_const_data(), this->storage.get_const_data(), - diag_idxs.get_data(), transpose_idxs.get_data(), - this->combined.get()); - - gko::kernels::reference::cholesky::factorize( - this->ref, this->storage_offsets.get_const_data(), - this->row_descs.get_const_data(), this->storage.get_const_data(), - diag_idxs.get_data(), transpose_idxs.get_data(), *this->forest, - this->combined.get(), tmp); - - GKO_ASSERT_MTX_NEAR(this->combined, this->combined_ref, - r::value); - }); + this->forall_matrices( + [this] { + gko::array diag_idxs{this->ref, this->num_rows}; + gko::array transpose_idxs{ + this->ref, this->combined->get_num_stored_elements()}; + gko::array tmp{this->ref}; + gko::kernels::reference::cholesky::initialize( + this->ref, this->mtx.get(), + this->storage_offsets.get_const_data(), + this->row_descs.get_const_data(), + this->storage.get_const_data(), diag_idxs.get_data(), + transpose_idxs.get_data(), this->combined.get()); + + gko::kernels::reference::cholesky::factorize( + this->ref, this->storage_offsets.get_const_data(), + this->row_descs.get_const_data(), + this->storage.get_const_data(), diag_idxs.get_data(), + transpose_idxs.get_data(), *this->forest, this->combined.get(), + tmp); + + GKO_ASSERT_MTX_NEAR(this->combined, this->combined_ref, + r::value); + }, + false); } @@ -577,23 +471,25 @@ TYPED_TEST(Cholesky, FactorizeWorks) { using value_type = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; - this->forall_matrices([this] { - auto factory = - gko::experimental::factorization::Cholesky::build() - .on(this->ref); - - auto cholesky = factory->generate(this->mtx); - - GKO_ASSERT_MTX_NEAR(cholesky->get_combined(), this->combined_ref, - r::value); - ASSERT_EQ(cholesky->get_storage_type(), - gko::experimental::factorization::storage_type:: - symm_combined_cholesky); - ASSERT_EQ(cholesky->get_lower_factor(), nullptr); - ASSERT_EQ(cholesky->get_upper_factor(), nullptr); - ASSERT_EQ(cholesky->get_diagonal(), nullptr); - }); + this->forall_matrices( + [this] { + auto factory = + gko::experimental::factorization::Cholesky::build() + .on(this->ref); + + auto cholesky = factory->generate(this->mtx); + + GKO_ASSERT_MTX_NEAR(cholesky->get_combined(), this->combined_ref, + r::value); + ASSERT_EQ(cholesky->get_storage_type(), + gko::experimental::factorization::storage_type:: + symm_combined_cholesky); + ASSERT_EQ(cholesky->get_lower_factor(), nullptr); + ASSERT_EQ(cholesky->get_upper_factor(), nullptr); + ASSERT_EQ(cholesky->get_diagonal(), nullptr); + }, + false); } @@ -601,28 +497,30 @@ TYPED_TEST(Cholesky, FactorizeWithKnownSparsityWorks) { using value_type = typename TestFixture::value_type; using index_type = typename TestFixture::index_type; - this->forall_matrices([this] { - auto pattern = - gko::share(gko::matrix::SparsityCsr::create( - this->ref)); - pattern->copy_from(this->combined_ref.get()); - auto factory = - gko::experimental::factorization::Cholesky::build() - .with_symbolic_factorization(pattern) - .on(this->ref); - - auto cholesky = factory->generate(this->mtx); - - GKO_ASSERT_MTX_NEAR(cholesky->get_combined(), this->combined_ref, - r::value); - ASSERT_EQ(cholesky->get_storage_type(), - gko::experimental::factorization::storage_type:: - symm_combined_cholesky); - ASSERT_EQ(cholesky->get_lower_factor(), nullptr); - ASSERT_EQ(cholesky->get_upper_factor(), nullptr); - ASSERT_EQ(cholesky->get_diagonal(), nullptr); - }); + this->forall_matrices( + [this] { + auto pattern = gko::share( + gko::matrix::SparsityCsr::create( + this->ref)); + pattern->copy_from(this->combined_ref.get()); + auto factory = + gko::experimental::factorization::Cholesky::build() + .with_symbolic_factorization(pattern) + .on(this->ref); + + auto cholesky = factory->generate(this->mtx); + + GKO_ASSERT_MTX_NEAR(cholesky->get_combined(), this->combined_ref, + r::value); + ASSERT_EQ(cholesky->get_storage_type(), + gko::experimental::factorization::storage_type:: + symm_combined_cholesky); + ASSERT_EQ(cholesky->get_lower_factor(), nullptr); + ASSERT_EQ(cholesky->get_upper_factor(), nullptr); + ASSERT_EQ(cholesky->get_diagonal(), nullptr); + }, + false); }