Add new timing categories for linear solver preconditioning, multigri…

…d coarse solve, wave ports, and clarify adaptive driven timing categories
awslabs · Aug 22, 2023 · de42147 · de42147
1 parent 014665f
commit de42147
Show file tree

Hide file tree

Showing 12 changed files with 144 additions and 133 deletions.
diff --git a/palace/drivers/basesolver.cpp b/palace/drivers/basesolver.cpp
@@ -548,6 +548,7 @@ void BaseSolver::PostprocessProbes(const PostOperator &postop, const std::string
 void BaseSolver::PostprocessFields(const PostOperator &postop, int step, double time) const
 {
   // Save the computed fields in parallel in format for viewing with ParaView.
+  BlockTimer bt(Timer::IO);
   if (post_dir.length() == 0)
   {
     Mpi::Warning("No file specified under [\"Problem\"][\"Output\"]!\nSkipping saving of "

diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp
@@ -144,11 +144,11 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in
   auto t0 = Timer::Now();
   while (step < nstep)
   {
-    // Assemble the linear system.
-    BlockTimer bt1(Timer::CONSTRUCT);
     const double freq = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega);
     Mpi::Print("\nIt {:d}/{:d}: ω/2π = {:.3e} GHz (elapsed time = {:.2e} s)\n", step + 1,
                nstep, freq, Timer::Duration(Timer::Now() - t0).count());
+
+    // Assemble the linear system.
     if (step > step0)
     {
       // Update frequency-dependent excitation and operators.
@@ -163,13 +163,13 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in
     spaceop.GetExcitationVector(omega, RHS);
 
     // Solve the linear system.
-    BlockTimer bt2(Timer::SOLVE);
+    BlockTimer bt1(Timer::SOLVE);
     Mpi::Print("\n");
     ksp.Mult(RHS, E);
 
     // Compute B = -1/(iω) ∇ x E on the true dofs, and set the internal GridFunctions in
     // PostOperator for all postprocessing operations.
-    BlockTimer bt3(Timer::POSTPRO);
+    BlockTimer bt2(Timer::POSTPRO);
     double E_elec = 0.0, E_mag = 0.0;
     Curl->Mult(E, B);
     B *= -1.0 / (1i * omega);
@@ -240,10 +240,7 @@ void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, i
 
   // Configure the PROM operator which performs the parameter space sampling and basis
   // construction during the offline phase as well as the PROM solution during the online
-  // phase. Initialize the basis with samples from the top and bottom of the frequency
-  // range of interest. Each call for an HDM solution adds the frequency sample to P_S and
-  // removes it from P \ P_S.
-  BlockTimer bt1(Timer::FREQSAMPLESELECTION);
+  // phase.
   auto t0 = Timer::Now();
   const double f0 = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, 1.0);
   Mpi::Print("\nBeginning PROM construction offline phase:\n"
@@ -252,15 +249,15 @@ void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, i
   RomOperator prom(iodata, spaceop);
   prom.Initialize(omega0, delta_omega, nstep - step0, nmax);
   spaceop.GetWavePortOp().SetSuppressOutput(true);  // Suppress wave port output for offline
-  {
-    BlockTimer bt2(Timer::HDMSOLVE);
-    prom.SolveHDM(omega0, E);  // Print matrix stats at first HDM solve
-  }
+
+  // Initialize the basis with samples from the top and bottom of the frequency
+  // range of interest. Each call for an HDM solution adds the frequency sample to P_S and
+  // removes it from P \ P_S. Timing for the HDM construction and solve is handled inside
+  // of the RomOperator.
+  BlockTimer bt1(Timer::CONSTRUCTPROM);
+  prom.SolveHDM(omega0, E);  // Print matrix stats at first HDM solve
   prom.AddHDMSample(omega0, E);
-  {
-    BlockTimer bt2(Timer::HDMSOLVE);
-    prom.SolveHDM(omega0 + (nstep - step0 - 1) * delta_omega, E);
-  }
+  prom.SolveHDM(omega0 + (nstep - step0 - 1) * delta_omega, E);
   prom.AddHDMSample(omega0 + (nstep - step0 - 1) * delta_omega, E);
 
   // Greedy procedure for basis construction (offline phase). Basis is initialized with
@@ -282,10 +279,7 @@ void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, i
         "\nGreedy iteration {:d} (n = {:d}): ω* = {:.3e} GHz ({:.3e}), error = {:.3e}\n",
         iter - iter0 + 1, prom.GetReducedDimension(), omega_star * f0, omega_star,
         max_error);
-    {
-      BlockTimer bt2(Timer::HDMSOLVE);
-      prom.SolveHDM(omega_star, E);
-    }
+    prom.SolveHDM(omega_star, E);
     prom.AddHDMSample(omega_star, E);
     iter++;
   }
@@ -299,21 +293,22 @@ void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, i
   SaveMetadata(prom.GetLinearSolver());
 
   // Main fast frequency sweep loop (online phase).
+  BlockTimer bt2(Timer::CONSTRUCT);
   Mpi::Print("\nBeginning fast frequency sweep online phase\n");
   spaceop.GetWavePortOp().SetSuppressOutput(false);  // Disable output suppression
   int step = step0;
   double omega = omega0;
   while (step < nstep)
   {
-    // Assemble the linear system.
-    BlockTimer bt2(Timer::CONSTRUCT);
     const double freq = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega);
     Mpi::Print("\nIt {:d}/{:d}: ω/2π = {:.3e} GHz (elapsed time = {:.2e} s)\n", step + 1,
                nstep, freq, Timer::Duration(Timer::Now() - t0).count());
+
+    // Assemble the linear system.
     prom.AssemblePROM(omega);
 
     // Solve the linear system.
-    BlockTimer bt3(Timer::SOLVE);
+    BlockTimer bt3(Timer::SOLVEPROM);
     Mpi::Print("\n");
     prom.SolvePROM(E);
 
@@ -383,7 +378,6 @@ void DrivenSolver::Postprocess(const PostOperator &postop,
   }
   if (iodata.solver.driven.delta_post > 0 && step % iodata.solver.driven.delta_post == 0)
   {
-    BlockTimer bt(Timer::IO);
     Mpi::Print("\n");
     PostprocessFields(postop, step / iodata.solver.driven.delta_post, freq);
     Mpi::Print(" Wrote fields to disk at step {:d}\n", step + 1);

diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp
@@ -314,7 +314,6 @@ void EigenSolver::Postprocess(const PostOperator &postop,
   PostprocessProbes(postop, "m", i, i + 1);
   if (i < iodata.solver.eigenmode.n_post)
   {
-    BlockTimer bt(Timer::IO);
     PostprocessFields(postop, i, i + 1);
     Mpi::Print(" Wrote mode {:d} to disk\n", i + 1);
   }

diff --git a/palace/drivers/electrostaticsolver.cpp b/palace/drivers/electrostaticsolver.cpp
@@ -47,18 +47,18 @@ void ElectrostaticSolver::Solve(std::vector<std::unique_ptr<mfem::ParMesh>> &mes
   auto t0 = Timer::Now();
   for (const auto &[idx, data] : laplaceop.GetSources())
   {
-    // Form and solve the linear system for a prescribed nonzero voltage on the specified
-    // terminal.
-    BlockTimer bt1(Timer::CONSTRUCT);
     Mpi::Print("\nIt {:d}/{:d}: Index = {:d} (elapsed time = {:.2e} s)\n", step + 1, nstep,
                idx, Timer::Duration(Timer::Now() - t0).count());
+
+    // Form and solve the linear system for a prescribed nonzero voltage on the specified
+    // terminal.
     Mpi::Print("\n");
     laplaceop.GetExcitationVector(idx, *K, V[step], RHS);
 
-    BlockTimer bt2(Timer::SOLVE);
+    BlockTimer bt1(Timer::SOLVE);
     ksp.Mult(RHS, V[step]);
 
-    BlockTimer bt3(Timer::POSTPRO);
+    BlockTimer bt2(Timer::POSTPRO);
     Mpi::Print(" Sol. ||V|| = {:.6e} (||RHS|| = {:.6e})\n",
                linalg::Norml2(laplaceop.GetComm(), V[step]),
                linalg::Norml2(laplaceop.GetComm(), RHS));
@@ -106,7 +106,6 @@ void ElectrostaticSolver::Postprocess(LaplaceOperator &laplaceop, PostOperator &
     PostprocessProbes(postop, "i", i, idx);
     if (i < iodata.solver.electrostatic.n_post)
     {
-      BlockTimer bt(Timer::IO);
       PostprocessFields(postop, i, idx);
       Mpi::Print(" Wrote fields to disk for terminal {:d}\n", idx);
     }

diff --git a/palace/drivers/magnetostaticsolver.cpp b/palace/drivers/magnetostaticsolver.cpp
@@ -47,19 +47,19 @@ void MagnetostaticSolver::Solve(std::vector<std::unique_ptr<mfem::ParMesh>> &mes
   auto t0 = Timer::Now();
   for (const auto &[idx, data] : curlcurlop.GetSurfaceCurrentOp())
   {
-    // Form and solve the linear system for a prescribed current on the specified source.
-    BlockTimer bt1(Timer::CONSTRUCT);
     Mpi::Print("\nIt {:d}/{:d}: Index = {:d} (elapsed time = {:.2e} s)\n", step + 1, nstep,
                idx, Timer::Duration(Timer::Now() - t0).count());
+
+    // Form and solve the linear system for a prescribed current on the specified source.
     Mpi::Print("\n");
     A[step].SetSize(RHS.Size());
     A[step] = 0.0;
     curlcurlop.GetExcitationVector(idx, RHS);
 
-    BlockTimer bt2(Timer::SOLVE);
+    BlockTimer bt1(Timer::SOLVE);
     ksp.Mult(RHS, A[step]);
 
-    BlockTimer bt3(Timer::POSTPRO);
+    BlockTimer bt2(Timer::POSTPRO);
     Mpi::Print(" Sol. ||A|| = {:.6e} (||RHS|| = {:.6e})\n",
                linalg::Norml2(curlcurlop.GetComm(), A[step]),
                linalg::Norml2(curlcurlop.GetComm(), RHS));
@@ -113,7 +113,6 @@ void MagnetostaticSolver::Postprocess(CurlCurlOperator &curlcurlop, PostOperator
     PostprocessProbes(postop, "i", i, idx);
     if (i < iodata.solver.magnetostatic.n_post)
     {
-      BlockTimer bt(Timer::IO);
       PostprocessFields(postop, i, idx);
       Mpi::Print(" Wrote fields to disk for terminal {:d}\n", idx);
     }

diff --git a/palace/drivers/transientsolver.cpp b/palace/drivers/transientsolver.cpp
@@ -80,11 +80,12 @@ void TransientSolver::Solve(std::vector<std::unique_ptr<mfem::ParMesh>> &mesh) c
   auto t0 = Timer::Now();
   while (step < nstep)
   {
-    // Single time step t -> t + dt.
-    BlockTimer bt1(Timer::SOLVE);
     const double ts = iodata.DimensionalizeValue(IoData::ValueType::TIME, t + delta_t);
     Mpi::Print("\nIt {:d}/{:d}: t = {:e} ns (elapsed time = {:.2e} s)\n", step, nstep - 1,
                ts, Timer::Duration(Timer::Now() - t0).count());
+
+    // Single time step t -> t + dt.
+    BlockTimer bt1(Timer::SOLVE);
     if (step == 0)
     {
       Mpi::Print("\n");
@@ -254,7 +255,6 @@ void TransientSolver::Postprocess(const PostOperator &postop,
   if (iodata.solver.transient.delta_post > 0 &&
       step % iodata.solver.transient.delta_post == 0)
   {
-    BlockTimer bt(Timer::IO);
     Mpi::Print("\n");
     PostprocessFields(postop, step / iodata.solver.transient.delta_post, ts);
     Mpi::Print(" Wrote fields to disk at step {:d}\n", step);

diff --git a/palace/linalg/gmg.cpp b/palace/linalg/gmg.cpp
@@ -7,6 +7,7 @@
 #include "linalg/chebyshev.hpp"
 #include "linalg/distrelaxation.hpp"
 #include "linalg/rap.hpp"
+#include "utils/timer.hpp"
 
 namespace palace
 {
@@ -161,11 +162,13 @@ void GeometricMultigridSolver<OperType>::VCycle(int l, bool initial_guess) const
   // level 0. Important to note that the smoothers must respect the initial guess flag
   // correctly (given X, Y, compute Y <- Y + B (X - A Y)) .
   B[l]->SetInitialGuess(initial_guess);
-  B[l]->Mult(X[l], Y[l]);
   if (l == 0)
   {
+    BlockTimer bt(Timer::COARSESOLVE);
+    B[l]->Mult(X[l], Y[l]);
     return;
   }
+  B[l]->Mult(X[l], Y[l]);
 
   // Compute residual.
   A[l]->Mult(Y[l], R[l]);