From 3f095188d5f3f36ddd84ad22abb425812c522d1c Mon Sep 17 00:00:00 2001 From: "Yu-Hsiang M. Tsai" Date: Wed, 17 Jan 2024 05:49:04 +0800 Subject: [PATCH] enable distributed multigrid Co-authored-by: Pratik Nayak --- core/solver/multigrid.cpp | 193 ++++++++++++++++----- include/ginkgo/core/distributed/matrix.hpp | 20 ++- 2 files changed, 166 insertions(+), 47 deletions(-) diff --git a/core/solver/multigrid.cpp b/core/solver/multigrid.cpp index ea3099cf185..1467aab3fba 100644 --- a/core/solver/multigrid.cpp +++ b/core/solver/multigrid.cpp @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include #include @@ -27,6 +29,7 @@ #include "core/base/dispatch_helper.hpp" #include "core/components/fill_array_kernels.hpp" +#include "core/distributed/helpers.hpp" #include "core/solver/ir_kernels.hpp" #include "core/solver/multigrid_kernels.hpp" #include "core/solver/solver_base.hpp" @@ -80,26 +83,6 @@ casting(const T& x) return static_cast(real(x)); } -/** - * as_vec gives a shortcut for casting pointer to dense. - */ -template -auto as_vec(std::shared_ptr x) -{ - return std::static_pointer_cast>(x); -} - - -/** - * as_real_vec gives a shortcut for casting pointer to dense with real type. - */ -template -auto as_real_vec(std::shared_ptr x) -{ - return std::static_pointer_cast>>( - x); -} - /** * handle_list generate the smoother for each MultigridLevel @@ -212,17 +195,43 @@ struct MultigridState { /** * allocate_memory is a helper function to allocate the memory of one level * - * @tparam ValueType the value type of memory + * @tparam VectorType the vector type * * @param level the current level index * @param cycle the multigrid cycle * @param current_nrows the number of rows of current fine matrix * @param next_nrows the number of rows of next coarse matrix */ - template + template void allocate_memory(int level, multigrid::cycle cycle, size_type current_nrows, size_type next_nrows); +#if GINKGO_BUILD_MPI + /** + * allocate_memory is a helper function to allocate the memory of one level + * + * @tparam VectorType the vector type + * + * @param level the current level index + * @param cycle the multigrid cycle + * @param current_comm the communicator of the current fine matrix + * @param next_comm the communicator of the next coarse matrix + * @param current_nrows the number of rows of the current fine matrix + * @param next_nrows the number of rows of the next coarse matrix + * @param current_local_nrows the number of rows of the local operator of + * the current fine matrix + * @param next_local_nrows the number of rows of the local operator of the + * next coarse matrix + */ + template + void allocate_memory(int level, multigrid::cycle cycle, + experimental::mpi::communicator& current_comm, + experimental::mpi::communicator& next_comm, + size_type current_nrows, size_type next_nrows, + size_type current_local_nrows, + size_type next_local_nrows); +#endif + /** * run the cycle of the level * @@ -240,11 +249,11 @@ struct MultigridState { /** * @copydoc run_cycle * - * @tparam ValueType the value type + * @tparam VectorType the vector type * * @note it is the version with known ValueType */ - template + template void run_cycle(multigrid::cycle cycle, size_type level, const std::shared_ptr& matrix, const LinOp* b, LinOp* x, cycle_mode mode); @@ -291,13 +300,44 @@ void MultigridState::generate(const LinOp* system_matrix_in, mg_level, [&, this](auto mg_level, auto i, auto cycle, auto current_nrows, auto next_nrows) { - using value_type = - typename std::decay_t::value_type; - using vec = matrix::Dense; - this->allocate_memory(i, cycle, current_nrows, - next_nrows); - auto exec = as(multigrid->get_mg_level_list().at(i)) - ->get_executor(); +#if GINKGO_BUILD_MPI + if (gko::detail::is_distributed(system_matrix_in)) { + using value_type = + typename std::decay_t::value_type; + using VectorType = + experimental::distributed::Vector; + auto fine = mg_level->get_fine_op(); + auto coarse = mg_level->get_coarse_op(); + auto current_comm = + dynamic_cast< + const experimental::distributed::DistributedBase*>( + fine) + ->get_communicator(); + auto next_comm = + dynamic_cast< + const experimental::distributed::DistributedBase*>( + coarse) + ->get_communicator(); + auto current_local_nrows = + dynamic_cast(fine) + ->get_local_size()[0]; + auto next_local_nrows = + dynamic_cast(coarse) + ->get_local_size()[0]; + this->allocate_memory( + i, cycle, current_comm, next_comm, current_nrows, + next_nrows, current_local_nrows, next_local_nrows); + } else +#endif + { + using value_type = + typename std::decay_t::value_type; + using VectorType = matrix::Dense; + this->allocate_memory(i, cycle, current_nrows, + next_nrows); + } }, i, cycle, current_nrows, next_nrows); @@ -306,13 +346,13 @@ void MultigridState::generate(const LinOp* system_matrix_in, } -template +template void MultigridState::allocate_memory(int level, multigrid::cycle cycle, size_type current_nrows, size_type next_nrows) { - using vec = matrix::Dense; - using norm_vec = matrix::Dense>; + using value_type = typename VectorType::value_type; + using vec = matrix::Dense; auto exec = as(multigrid->get_mg_level_list().at(level))->get_executor(); @@ -321,19 +361,71 @@ void MultigridState::allocate_memory(int level, multigrid::cycle cycle, // allocate the previous level g_list.emplace_back(vec::create(exec, dim<2>{current_nrows, nrhs})); e_list.emplace_back(vec::create(exec, dim<2>{current_nrows, nrhs})); - next_one_list.emplace_back(initialize({one()}, exec)); + next_one_list.emplace_back(initialize({one()}, exec)); } if (level + 1 == multigrid->get_mg_level_list().size()) { // the last level allocate the g, e for coarsest solver g_list.emplace_back(vec::create(exec, dim<2>{next_nrows, nrhs})); e_list.emplace_back(vec::create(exec, dim<2>{next_nrows, nrhs})); - next_one_list.emplace_back(initialize({one()}, exec)); + next_one_list.emplace_back(initialize({one()}, exec)); + } + one_list.emplace_back(initialize({one()}, exec)); + neg_one_list.emplace_back(initialize({-one()}, exec)); +} + + +#if GINKGO_BUILD_MPI + + +template +void MultigridState::allocate_memory( + int level, multigrid::cycle cycle, + experimental::mpi::communicator& current_comm, + experimental::mpi::communicator& next_comm, size_type current_nrows, + size_type next_nrows, size_type current_local_nrows, + size_type next_local_nrows) +{ + using value_type = typename VectorType::value_type; + using vec = VectorType; + using dense_vec = matrix::Dense; + + auto exec = + as(multigrid->get_mg_level_list().at(level))->get_executor(); + r_list.emplace_back(vec::create(exec, current_comm, + dim<2>{current_nrows, nrhs}, + dim<2>{current_local_nrows, nrhs})); + if (level != 0) { + // allocate the previous level + g_list.emplace_back(vec::create(exec, current_comm, + dim<2>{current_nrows, nrhs}, + dim<2>{current_local_nrows, nrhs})); + e_list.emplace_back(vec::create(exec, current_comm, + dim<2>{current_nrows, nrhs}, + dim<2>{current_local_nrows, nrhs})); + next_one_list.emplace_back( + initialize({one()}, exec)); } - one_list.emplace_back(initialize({one()}, exec)); - neg_one_list.emplace_back(initialize({-one()}, exec)); + if (level + 1 == multigrid->get_mg_level_list().size()) { + // the last level allocate the g, e for coarsest solver + g_list.emplace_back(vec::create(exec, next_comm, + dim<2>{next_nrows, nrhs}, + dim<2>{next_local_nrows, nrhs})); + e_list.emplace_back(vec::create(exec, next_comm, + dim<2>{next_nrows, nrhs}, + dim<2>{next_local_nrows, nrhs})); + next_one_list.emplace_back( + initialize({one()}, exec)); + } + one_list.emplace_back(initialize({one()}, exec)); + neg_one_list.emplace_back( + initialize({-one()}, exec)); } +#endif + + +template void MultigridState::run_mg_cycle(multigrid::cycle cycle, size_type level, const std::shared_ptr& matrix, const LinOp* b, LinOp* x, cycle_mode mode) @@ -346,23 +438,33 @@ void MultigridState::run_mg_cycle(multigrid::cycle cycle, size_type level, run, std::complex>( mg_level, [&, this](auto mg_level) { - using value_type = - typename std::decay_t::value_type; - this->run_cycle(cycle, level, matrix, b, x, mode); +#if GINKGO_BUILD_MPI + if (gko::detail::is_distributed(matrix.get())) { + using value_type = + typename std::decay_t::value_type; + this->run_cycle(cycle, level, matrix, b, x, mode); + } else +#endif + { + using value_type = + typename std::decay_t::value_type; + this->run_cycle(cycle, level, matrix, b, x, mode); + } }); } -template +template void MultigridState::run_cycle(multigrid::cycle cycle, size_type level, const std::shared_ptr& matrix, const LinOp* b, LinOp* x, cycle_mode mode) { + using value_type = typename VectorType::value_type; auto total_level = multigrid->get_mg_level_list().size(); auto r = r_list.at(level); auto g = g_list.at(level); - auto e = e_list.at(level); + auto e = as(e_list.at(level)); // get mg_level auto mg_level = multigrid->get_mg_level_list().at(level); // get the pre_smoother @@ -392,8 +494,7 @@ void MultigridState::run_cycle(multigrid::cycle cycle, size_type level, } else { // x in first level is already filled by zero outside. if (level != 0) { - dynamic_cast*>(x)->fill( - zero()); + dynamic_cast(x)->fill(zero()); } pre_smoother->apply(b, x); } @@ -414,7 +515,7 @@ void MultigridState::run_cycle(multigrid::cycle cycle, size_type level, // next level if (level + 1 == total_level) { // the coarsest solver use the last level valuetype - as_vec(e)->fill(zero()); + e->fill(zero()); } auto next_level_matrix = (level + 1 < total_level) diff --git a/include/ginkgo/core/distributed/matrix.hpp b/include/ginkgo/core/distributed/matrix.hpp index 4bede9e456d..ba16b427d82 100644 --- a/include/ginkgo/core/distributed/matrix.hpp +++ b/include/ginkgo/core/distributed/matrix.hpp @@ -127,6 +127,21 @@ template class Vector; +/** + * DistributedLocalSize is a feature class providing `get_local_size` which + * return the size of local operator. + */ +class DistributedLocalSize { +public: + /** + * get the size of local operator + * + * @return the size of local operator + */ + virtual dim<2> get_local_size() const = 0; +}; + + /** * The Matrix class defines a (MPI-)distributed matrix. * @@ -240,7 +255,8 @@ class Matrix Matrix>, public ConvertibleTo< Matrix, LocalIndexType, GlobalIndexType>>, - public DistributedBase { + public DistributedBase, + public DistributedLocalSize { friend class EnableCreateMethod; friend class EnableDistributedPolymorphicObject; friend class Matrix, LocalIndexType, @@ -339,6 +355,8 @@ class Matrix ptr_param> col_partition); + dim<2> get_local_size() const override { return local_mtx_->get_size(); } + /** * Get read access to the stored local matrix. *