diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index 68d32639e81..6310ee8fd26 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -108,6 +108,7 @@ if(GINKGO_BUILD_MPI) mpi/exception.cpp distributed/matrix.cpp distributed/partition_helpers.cpp + distributed/neighborhood_communicator.cpp distributed/vector.cpp distributed/preconditioner/schwarz.cpp) endif() diff --git a/core/distributed/neighborhood_communicator.cpp b/core/distributed/neighborhood_communicator.cpp new file mode 100644 index 00000000000..bfd4b2b0358 --- /dev/null +++ b/core/distributed/neighborhood_communicator.cpp @@ -0,0 +1,228 @@ +// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#include + + +#include +#include + + +namespace gko { +namespace experimental { +namespace mpi { + + +/** + * \brief Computes the inverse envelope (target-ids, sizes) for a given + * one-sided communication pattern. + * + * \param exec the executor, this will always use the host executor + * \param comm communicator + * \param ids target ids of the one-sided operation + * \param sizes number of elements send to each id + * + * \return the inverse envelope consisting of the target-ids and the sizes + */ +std::tuple, std::vector> +communicate_inverse_envelope(std::shared_ptr exec, + mpi::communicator comm, + const std::vector& ids, + const std::vector& sizes) +{ + auto host_exec = exec->get_master(); + std::vector inverse_sizes_full(comm.size()); + mpi::window window(host_exec, inverse_sizes_full.data(), + inverse_sizes_full.size(), comm, + sizeof(comm_index_type), MPI_INFO_ENV); + window.fence(); + for (int i = 0; i < ids.size(); ++i) { + window.put(host_exec, sizes.data() + i, 1, ids[i], comm.rank(), 1); + } + window.fence(); + + std::vector inverse_sizes; + std::vector inverse_ids; + for (int i = 0; i < inverse_sizes_full.size(); ++i) { + if (inverse_sizes_full[i] > 0) { + inverse_ids.push_back(i); + inverse_sizes.push_back(inverse_sizes_full[i]); + } + } + + return std::make_tuple(std::move(inverse_ids), std::move(inverse_sizes)); +} + + +/** + * Creates a distributed graph communicator based on the input sources and + * destinations. + * + * The graph is unweighted and has the same rank ordering as the input + * communicator. + */ +mpi::communicator create_neighborhood_comm( + mpi::communicator base, const std::vector& sources, + const std::vector& destinations) +{ + auto in_degree = static_cast(sources.size()); + auto out_degree = static_cast(destinations.size()); + + // adjacent constructor guarantees that querying sources/destinations + // will result in the array having the same order as defined here + MPI_Comm graph_comm; + MPI_Info info; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Info_dup(MPI_INFO_ENV, &info)); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Dist_graph_create_adjacent( + base.get(), in_degree, sources.data(), + in_degree ? MPI_UNWEIGHTED : MPI_WEIGHTS_EMPTY, out_degree, + destinations.data(), out_degree ? MPI_UNWEIGHTED : MPI_WEIGHTS_EMPTY, + info, false, &graph_comm)); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Info_free(&info)); + + return mpi::communicator::create_owning(graph_comm, + base.force_host_buffer()); +} + +std::unique_ptr +neighborhood_communicator::create_inverse() const +{ + auto base_comm = this->get_base_communicator(); + distributed::comm_index_type num_sources; + distributed::comm_index_type num_destinations; + distributed::comm_index_type weighted; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Dist_graph_neighbors_count( + comm_.get(), &num_sources, &num_destinations, &weighted)); + + std::vector sources(num_sources); + std::vector destinations(num_destinations); + GKO_ASSERT_NO_MPI_ERRORS(MPI_Dist_graph_neighbors( + comm_.get(), num_sources, sources.data(), MPI_UNWEIGHTED, + num_destinations, destinations.data(), MPI_UNWEIGHTED)); + + return std::unique_ptr{ + new neighborhood_communicator(base_comm, destinations, recv_sizes_, + recv_offsets_, sources, send_sizes_, + send_offsets_)}; +} + + +comm_index_type neighborhood_communicator::get_recv_size() const +{ + return recv_offsets_.back(); +} + + +comm_index_type neighborhood_communicator::get_send_size() const +{ + return send_offsets_.back(); +} + + +neighborhood_communicator::neighborhood_communicator( + communicator base, const std::vector& sources, + const std::vector& send_sizes, + const std::vector& send_offsets, + const std::vector& destinations, + const std::vector& recv_sizes, + const std::vector& recv_offsets) + : collective_communicator(base), comm_(MPI_COMM_NULL) +{ + comm_ = create_neighborhood_comm(base, sources, destinations); + send_sizes_ = send_sizes; + send_offsets_ = send_offsets; + recv_sizes_ = recv_sizes; + recv_offsets_ = recv_offsets; +} + + +neighborhood_communicator::neighborhood_communicator(communicator base) + : collective_communicator(std::move(base)), + comm_(MPI_COMM_SELF), + send_sizes_(), + send_offsets_(1), + recv_sizes_(), + recv_offsets_(1) +{ + // ensure that comm_ always has the correct topology + std::vector non_nullptr(1); + non_nullptr.resize(0); + comm_ = create_neighborhood_comm(this->get_base_communicator(), non_nullptr, + non_nullptr); +} + + +request neighborhood_communicator::i_all_to_all_v( + std::shared_ptr exec, const void* send_buffer, + MPI_Datatype send_type, void* recv_buffer, MPI_Datatype recv_type) const +{ + auto guard = exec->get_scoped_device_id_guard(); + request req; + GKO_ASSERT_NO_MPI_ERRORS(MPI_Ineighbor_alltoallv( + send_buffer, send_sizes_.data(), send_offsets_.data(), send_type, + recv_buffer, recv_sizes_.data(), recv_offsets_.data(), recv_type, + comm_.get(), req.get())); + return req; +} + + +template +neighborhood_communicator::neighborhood_communicator( + communicator base, + const distributed::index_map& imap) + : collective_communicator(base), + comm_(MPI_COMM_SELF), + recv_sizes_(imap.get_remote_target_ids().get_size()), + recv_offsets_(recv_sizes_.size() + 1), + send_offsets_(1) +{ + auto exec = imap.get_executor(); + if (!exec) { + return; + } + auto host_exec = exec->get_master(); + + auto recv_target_ids_arr = + make_temporary_clone(host_exec, &imap.get_remote_target_ids()); + auto remote_idx_offsets_arr = make_temporary_clone( + host_exec, &imap.get_remote_global_idxs().get_offsets()); + std::vector recv_target_ids( + recv_target_ids_arr->get_size()); + std::copy_n(recv_target_ids_arr->get_const_data(), + recv_target_ids_arr->get_size(), recv_target_ids.begin()); + for (size_type seg_id = 0; + seg_id < imap.get_remote_global_idxs().get_segment_count(); ++seg_id) { + recv_sizes_[seg_id] = + remote_idx_offsets_arr->get_const_data()[seg_id + 1] - + remote_idx_offsets_arr->get_const_data()[seg_id]; + } + auto send_envelope = + communicate_inverse_envelope(exec, base, recv_target_ids, recv_sizes_); + const auto& send_target_ids = std::get<0>(send_envelope); + send_sizes_ = std::move(std::get<1>(send_envelope)); + + send_offsets_.resize(send_sizes_.size() + 1); + std::partial_sum(send_sizes_.begin(), send_sizes_.end(), + send_offsets_.begin() + 1); + std::partial_sum(recv_sizes_.begin(), recv_sizes_.end(), + recv_offsets_.begin() + 1); + + comm_ = create_neighborhood_comm(base, recv_target_ids, send_target_ids); +} + + +#define GKO_DECLARE_NEIGHBORHOOD_CONSTRUCTOR(LocalIndexType, GlobalIndexType) \ + neighborhood_communicator::neighborhood_communicator( \ + communicator base, \ + const distributed::index_map& imap) + +GKO_INSTANTIATE_FOR_EACH_LOCAL_GLOBAL_INDEX_TYPE( + GKO_DECLARE_NEIGHBORHOOD_CONSTRUCTOR); + +#undef GKO_DECLARE_NEIGHBORHOOD_CONSTRUCTOR + + +} // namespace mpi +} // namespace experimental +} // namespace gko diff --git a/include/ginkgo/core/distributed/neighborhood_communicator.hpp b/include/ginkgo/core/distributed/neighborhood_communicator.hpp new file mode 100644 index 00000000000..451ec395216 --- /dev/null +++ b/include/ginkgo/core/distributed/neighborhood_communicator.hpp @@ -0,0 +1,131 @@ +// SPDX-FileCopyrightText: 2017 - 2024 The Ginkgo authors +// +// SPDX-License-Identifier: BSD-3-Clause + +#ifndef GKO_PUBLIC_CORE_DISTRIBUTED_NEIGHBORHOOD_COMMUNICATOR_HPP_ +#define GKO_PUBLIC_CORE_DISTRIBUTED_NEIGHBORHOOD_COMMUNICATOR_HPP_ + + +#include + + +#if GINKGO_BUILD_MPI + +#include +#include +#include + + +namespace gko { +namespace experimental { +namespace mpi { + + +/** + * A collective_communicator that uses a neighborhood topology. + * + * The neighborhood communicator is defined by a list of neighbors this + * rank sends data to and a list of neighbors this rank receives data from. + * No communication with any ranks that is not in one of those lists will + * take place. + */ +class neighborhood_communicator final : public collective_communicator { +public: + using collective_communicator::i_all_to_all_v; + + /** + * Default constructor with empty communication pattern + * @param base the base communicator + */ + explicit neighborhood_communicator(communicator base); + + /** + * Create a neighborhood_communicator from an index map. + * + * The receive neighbors are defined by the remote indices and their + * owning ranks of the index map. The send neighbors are deduced + * from that through collective communication. + * + * @tparam LocalIndexType the local index type of the map + * @tparam GlobalIndexType the global index type of the map + * @param base the base communicator + * @param imap the index map that defines the communication pattern + */ + template + neighborhood_communicator( + communicator base, + const distributed::index_map& imap); + + /** + * Create a neighborhood_communicator by explicitly defining the + * neighborhood lists and sizes/offsets. + * + * @param base the base communicator + * @param sources the ranks to receive from + * @param recv_sizes the number of elements to recv for each source + * @param recv_offsets the offset for each source + * @param destinations the ranks to send to + * @param send_sizes the number of elements to send for each destination + * @param send_offsets the offset for each destination + */ + neighborhood_communicator( + communicator base, + const std::vector& sources, + const std::vector& recv_sizes, + const std::vector& recv_offsets, + const std::vector& destinations, + const std::vector& send_sizes, + const std::vector& send_offsets); + + /** + * Communicate data from all ranks to all other ranks using the + * neighboorhood communication MPI_Ineighbor_alltoallv. See MPI + * documentation for more details + * + * @param exec The executor, on which the message buffers are located. + * @param send_buffer the buffer to send + * @param send_type the MPI_Datatype for the send buffer + * @param recv_buffer the buffer to gather into + * @param recv_type the MPI_Datatype for the recv buffer + * + * @return the request handle for the call + */ + request i_all_to_all_v(std::shared_ptr exec, + const void* send_buffer, MPI_Datatype send_type, + void* recv_buffer, + MPI_Datatype recv_type) const override; + + /** + * Creates the inverse neighborhood_communicator by switching sources + * and destinations. + * + * @return collective_communicator with the inverse communication pattern + */ + std::unique_ptr create_inverse() const override; + + /** + * @copydoc collective_communicator::get_recv_size + */ + comm_index_type get_recv_size() const override; + + /** + * @copydoc collective_communicator::get_recv_size + */ + comm_index_type get_send_size() const override; + +private: + communicator comm_; + + std::vector send_sizes_; + std::vector send_offsets_; + std::vector recv_sizes_; + std::vector recv_offsets_; +}; + + +} // namespace mpi +} // namespace experimental +} // namespace gko + +#endif +#endif // GKO_PUBLIC_CORE_DISTRIBUTED_NEIGHBORHOOD_COMMUNICATOR_HPP_ diff --git a/include/ginkgo/ginkgo.hpp b/include/ginkgo/ginkgo.hpp index 373732012e7..6dc2aa45433 100644 --- a/include/ginkgo/ginkgo.hpp +++ b/include/ginkgo/ginkgo.hpp @@ -60,6 +60,7 @@ #include #include #include +#include #include #include #include