project-asgard · quantumsteve · Dec 13, 2023 · Jan 25, 2024 · Jan 25, 2024 · Jan 31, 2024
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -480,6 +480,7 @@ if (ASGARD_BUILD_TESTS)
   set (mpi_test_components
     adapt
     distribution
+    solver
     time_advance
     asgard_discretization
   )

diff --git a/src/asgard_matrix.hpp b/src/asgard_matrix.hpp
@@ -1,6 +1,7 @@
 #pragma once
 #include "build_info.hpp"
 
+#include "asgard_mpi.h"
 #include "asgard_resources.hpp"
 #include "lib_dispatch.hpp"
 #include "tools.hpp"
@@ -11,6 +12,8 @@
 #include <string>
 #include <vector>
 
+#include <unistd.h>
+
 namespace asgard
 {
 // resource arguments allow developers to select host (CPU only) or device
@@ -1203,6 +1206,24 @@ template<typename P, mem_type mem, resource resrc>
 template<resource, typename>
 void fk::matrix<P, mem, resrc>::print(std::string label) const
 {
+#ifdef ASGARD_USE_MPI
+  static int const rank = []() {
+    int my_rank;
+    auto const status =
+        MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+    expect(status == 0);
+    return my_rank;
+  }();
+  static int const num_ranks = []() {
+    int output;
+    auto const status = MPI_Comm_size(MPI_COMM_WORLD, &output);
+    expect(status == 0);
+    return output;
+  }();
+  sleep(rank);
+  std::cout << "rank: " << rank << '\n';
+#endif
+
   if constexpr (mem == mem_type::owner)
     std::cout << label << "(owner)" << '\n';
 
@@ -1233,6 +1254,9 @@ void fk::matrix<P, mem, resrc>::print(std::string label) const
     }
     std::cout << '\n';
   }
+#ifdef ASGARD_USE_MPI
+  sleep(num_ranks - rank - 1);
+#endif
 }
 
 //

diff --git a/src/asgard_resources.hpp b/src/asgard_resources.hpp
@@ -14,7 +14,7 @@
 namespace asgard
 {
 // used to suppress warnings in unused variables
-auto const ignore = [](auto ignored) { (void)ignored; };
+auto const ignore = [](auto const &ignored) { (void)ignored; };
 
 /*!
  * \brief Default precision to use, double if enabled and float otherwise.
@@ -115,4 +115,4 @@ void memcpy_2d(P *dest, int const dest_stride, P const *const source,
 #include "asgard_resources_cuda.tpp"
 #else
 #include "asgard_resources_host.tpp"
-#endif
+#endif
diff --git a/src/asgard_vector.hpp b/src/asgard_vector.hpp
@@ -1,6 +1,7 @@
 #pragma once
 #include "build_info.hpp"
 
+#include "asgard_mpi.h"
 #include "asgard_resources.hpp"
 #include "lib_dispatch.hpp"
 #include "tools.hpp"
@@ -11,6 +12,8 @@
 #include <string>
 #include <vector>
 
+#include <unistd.h>
+
 namespace asgard
 {
 // resource arguments allow developers to select host (CPU only) or device
@@ -978,6 +981,24 @@ template<typename P, mem_type mem, resource resrc>
 template<resource, typename>
 void fk::vector<P, mem, resrc>::print(std::string_view const label) const
 {
+#ifdef ASGARD_USE_MPI
+  static int const rank = []() {
+    int my_rank;
+    auto const status =
+        MPI_Comm_rank(MPI_COMM_WORLD, &my_rank);
+    expect(status == 0);
+    return my_rank;
+  }();
+  static int const num_ranks = []() {
+    int output;
+    auto const status = MPI_Comm_size(MPI_COMM_WORLD, &output);
+    expect(status == 0);
+    return output;
+  }();
+  sleep(rank);
+  std::cout << "rank: " << rank << '\n';
+#endif
+
   if constexpr (mem == mem_type::owner)
     std::cout << label << "(owner)" << '\n';
   else if constexpr (mem == mem_type::view)
@@ -999,6 +1020,10 @@ void fk::vector<P, mem, resrc>::print(std::string_view const label) const
       std::cout << std::right << (*this)(i) << " ";
   }
   std::cout << '\n';
+
+#ifdef ASGARD_USE_MPI
+  sleep(num_ranks - rank - 1);
+#endif
 }
 
 //

diff --git a/src/distribution.cpp b/src/distribution.cpp
@@ -330,6 +330,49 @@ void reduce_results(fk::vector<P, src_mem, resrc> const &source,
 #endif
 }
 
+template<typename P, std::enable_if_t<std::is_floating_point<P>::value, bool>>
+void reduce_results(P const source,
+                    P &dest,
+                    distribution_plan const &plan, int const my_rank)
+{
+  expect(my_rank >= 0);
+  expect(my_rank < static_cast<int>(plan.size()));
+
+#ifdef ASGARD_USE_MPI
+  if (plan.size() == 1)
+  {
+    dest = source;
+    return;
+  }
+
+  dest               = 0.;
+  int const num_cols = get_num_subgrid_cols(plan.size());
+
+  int const my_row = my_rank / num_cols;
+  int const my_col = my_rank % num_cols;
+
+  MPI_Comm row_communicator;
+  MPI_Comm const global_communicator = distro_handle.get_global_comm();
+
+  auto success =
+      MPI_Comm_split(global_communicator, my_row, my_col, &row_communicator);
+  expect(success == 0);
+
+  MPI_Datatype const mpi_type =
+      std::is_same_v<P, double> ? MPI_DOUBLE : MPI_FLOAT;
+  success = MPI_Allreduce((void *)&source, (void *)&dest,
+                          1, mpi_type, MPI_SUM, row_communicator);
+  expect(success == 0);
+
+  success = MPI_Comm_free(&row_communicator);
+  expect(success == 0);
+
+#else
+  dest = source;
+  return;
+#endif
+}
+
 //
 // -- below functionality for exchanging solution vector data across subgrid
 // rows via point-to-point messages.
@@ -1170,16 +1213,34 @@ void scatter_matrix(P const *A, int const *descA, P *A_distr,
 
 #ifdef ASGARD_ENABLE_DOUBLE
 
+template void reduce_results(double const source, double &dest,
+                             distribution_plan const &plan, int const my_rank);
+
 template void reduce_results(
     fk::vector<double, mem_type::owner, resource::host> const &source,
     fk::vector<double, mem_type::owner, resource::host> &dest,
     distribution_plan const &plan, int const my_rank);
 
+template void reduce_results(
+    fk::vector<double, mem_type::owner, resource::host> const &source,
+    fk::vector<double, mem_type::view, resource::host> &dest,
+    distribution_plan const &plan, int const my_rank);
+
 template void exchange_results(
     fk::vector<double, mem_type::owner, resource::host> const &source,
     fk::vector<double, mem_type::owner, resource::host> &dest,
     int const segment_size, distribution_plan const &plan, int const my_rank);
 
+template void exchange_results(
+    fk::vector<double, mem_type::view, resource::host> const &source,
+    fk::vector<double, mem_type::owner, resource::host> &dest,
+    int const segment_size, distribution_plan const &plan, int const my_rank);
+
+template void exchange_results(
+    fk::vector<double, mem_type::owner, resource::host> const &source,
+    fk::vector<double, mem_type::view, resource::host> &dest,
+    int const segment_size, distribution_plan const &plan, int const my_rank);
+
 #ifdef ASGARD_USE_CUDA
 template void reduce_results(
     fk::vector<double, mem_type::owner, resource::device> const &source,
@@ -1220,11 +1281,19 @@ template void scatter_matrix<double>(double const *A, int const *descA,
 
 #ifdef ASGARD_ENABLE_FLOAT
 
+template void reduce_results(float const source, float &dest,
+                             distribution_plan const &plan, int const my_rank);
+
 template void
 reduce_results(fk::vector<float, mem_type::owner, resource::host> const &source,
                fk::vector<float, mem_type::owner, resource::host> &dest,
                distribution_plan const &plan, int const my_rank);
 
+template void
+reduce_results(fk::vector<float, mem_type::owner, resource::host> const &source,
+               fk::vector<float, mem_type::view, resource::host> &dest,
+               distribution_plan const &plan, int const my_rank);
+
 template void exchange_results(
     fk::vector<float, mem_type::owner, resource::host> const &source,
     fk::vector<float, mem_type::owner, resource::host> &dest,

diff --git a/src/distribution.hpp b/src/distribution.hpp
@@ -238,6 +238,10 @@ template<typename P, mem_type src_mem, mem_type dst_mem, resource resrc>
 void reduce_results(fk::vector<P, src_mem, resrc> const &source,
                     fk::vector<P, dst_mem, resrc> &dest,
                     distribution_plan const &plan, int const my_rank);
+template<typename P, std::enable_if_t<std::is_floating_point<P>::value, bool> = true>
+void reduce_results(P const source,
+                    P &dest,
+                    distribution_plan const &plan, int const my_rank);
 
 // generate a message list for each rank for exchange_results function;
 // conceptually an internal component function, exposed for testing

diff --git a/src/distribution_tests.cpp b/src/distribution_tests.cpp
@@ -453,6 +453,83 @@ TEMPLATE_TEST_CASE("allreduce across row of subgrids", "[distribution]",
   }
 }
 
+TEMPLATE_TEST_CASE("allreduce element across row", "[distribution]",
+                   test_precs)
+{
+  if (!is_active())
+  {
+    return;
+  }
+
+  SECTION("1 rank")
+  {
+    auto const num_ranks = 1;
+    auto const my_rank   = 0;
+    int const degree     = 4;
+    int const level      = 2;
+
+    options const o = make_options(
+        {"-l", std::to_string(level), "-d", std::to_string(degree)});
+    auto const pde =
+        make_PDE<default_precision>(PDE_opts::continuity_2, level, degree);
+    elements::table const table(o, *pde);
+
+    auto const plan = get_plan(num_ranks, table);
+
+    TestType const gold{42};
+    TestType const x = gold;
+    TestType fx = 0.;
+    reduce_results(x, fx, plan, my_rank);
+    REQUIRE(fx == gold);
+  }
+
+  SECTION("multiple ranks")
+  {
+#ifdef ASGARD_USE_MPI
+
+    int const my_rank   = distrib_test_info.get_my_rank();
+    int const num_ranks = distrib_test_info.get_num_ranks();
+
+    if (my_rank < num_ranks)
+    {
+      int const degree = 5;
+      int const level  = 4;
+
+      options const o = make_options(
+          {"-l", std::to_string(level), "-d", std::to_string(degree)});
+
+      auto const pde =
+          make_PDE<default_precision>(PDE_opts::continuity_3, level, degree);
+      elements::table const table(o, *pde);
+
+      auto const plan       = get_plan(num_ranks, table);
+
+      std::vector<TestType> rank_outputs;
+      for (int i = 0; i < static_cast<int>(plan.size()); ++i)
+      {
+        rank_outputs.push_back(i);
+      }
+      int const my_row = my_rank / get_num_subgrid_cols(num_ranks);
+      TestType gold{0};
+      for (int i = 0; i < static_cast<int>(rank_outputs.size()); ++i)
+      {
+        if (i / get_num_subgrid_cols(num_ranks) == my_row)
+        {
+          gold = gold + rank_outputs[i];
+        }
+      }
+
+      auto const &x = rank_outputs[std::min(my_rank, static_cast<int>(plan.size()) - 1)];
+      TestType fx = 0.;
+      reduce_results(x, fx, plan, my_rank);
+      REQUIRE_THAT(fx, Catch::Matchers::WithinAbs(gold, std::numeric_limits<TestType>::epsilon()));
+    }
+#else
+    REQUIRE(true);
+#endif
+  }
+}
+
 void generate_messages_test(int const num_ranks, elements::table const &table)
 {
   auto const plan     = get_plan(num_ranks, table);

diff --git a/src/program_options.cpp b/src/program_options.cpp
@@ -308,7 +308,7 @@ parser::parser(int argc, char const *const *argv)
   if ((use_implicit_stepping || use_imex_stepping) && get_num_ranks() > 1)
   {
     auto const choice = solver_mapping.at(solver_str);
-    if (choice != solve_opts::scalapack)
+    if (choice == solve_opts::direct)
     {
       std::cerr << "Distribution not implemented for implicit stepping\n";
       valid = false;