diff --git a/palace/drivers/basesolver.cpp b/palace/drivers/basesolver.cpp index 953a15647..a5bb64273 100644 --- a/palace/drivers/basesolver.cpp +++ b/palace/drivers/basesolver.cpp @@ -554,6 +554,7 @@ void BaseSolver::PostprocessProbes(const PostOperator &postop, const std::string void BaseSolver::PostprocessFields(const PostOperator &postop, int step, double time) const { // Save the computed fields in parallel in format for viewing with ParaView. + BlockTimer bt(Timer::IO); if (post_dir.length() == 0) { Mpi::Warning("No file specified under [\"Problem\"][\"Output\"]!\nSkipping saving of " diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp index 2fac11a38..7ced1e76a 100644 --- a/palace/drivers/drivensolver.cpp +++ b/palace/drivers/drivensolver.cpp @@ -144,11 +144,11 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in auto t0 = Timer::Now(); while (step < nstep) { - // Assemble the linear system. - BlockTimer bt1(Timer::CONSTRUCT); const double freq = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega); Mpi::Print("\nIt {:d}/{:d}: ω/2π = {:.3e} GHz (elapsed time = {:.2e} s)\n", step + 1, nstep, freq, Timer::Duration(Timer::Now() - t0).count()); + + // Assemble the linear system. if (step > step0) { // Update frequency-dependent excitation and operators. @@ -163,13 +163,13 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in spaceop.GetExcitationVector(omega, RHS); // Solve the linear system. - BlockTimer bt2(Timer::SOLVE); + BlockTimer bt1(Timer::SOLVE); Mpi::Print("\n"); ksp.Mult(RHS, E); // Compute B = -1/(iω) ∇ x E on the true dofs, and set the internal GridFunctions in // PostOperator for all postprocessing operations. - BlockTimer bt3(Timer::POSTPRO); + BlockTimer bt2(Timer::POSTPRO); double E_elec = 0.0, E_mag = 0.0; Curl->Mult(E, B); B *= -1.0 / (1i * omega); @@ -240,10 +240,7 @@ void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, i // Configure the PROM operator which performs the parameter space sampling and basis // construction during the offline phase as well as the PROM solution during the online - // phase. Initialize the basis with samples from the top and bottom of the frequency - // range of interest. Each call for an HDM solution adds the frequency sample to P_S and - // removes it from P \ P_S. - BlockTimer bt1(Timer::FREQSAMPLESELECTION); + // phase. auto t0 = Timer::Now(); const double f0 = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, 1.0); Mpi::Print("\nBeginning PROM construction offline phase:\n" @@ -252,15 +249,15 @@ void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, i RomOperator prom(iodata, spaceop); prom.Initialize(omega0, delta_omega, nstep - step0, nmax); spaceop.GetWavePortOp().SetSuppressOutput(true); // Suppress wave port output for offline - { - BlockTimer bt2(Timer::HDMSOLVE); - prom.SolveHDM(omega0, E); // Print matrix stats at first HDM solve - } + + // Initialize the basis with samples from the top and bottom of the frequency + // range of interest. Each call for an HDM solution adds the frequency sample to P_S and + // removes it from P \ P_S. Timing for the HDM construction and solve is handled inside + // of the RomOperator. + BlockTimer bt1(Timer::CONSTRUCTPROM); + prom.SolveHDM(omega0, E); // Print matrix stats at first HDM solve prom.AddHDMSample(omega0, E); - { - BlockTimer bt2(Timer::HDMSOLVE); - prom.SolveHDM(omega0 + (nstep - step0 - 1) * delta_omega, E); - } + prom.SolveHDM(omega0 + (nstep - step0 - 1) * delta_omega, E); prom.AddHDMSample(omega0 + (nstep - step0 - 1) * delta_omega, E); // Greedy procedure for basis construction (offline phase). Basis is initialized with @@ -282,10 +279,7 @@ void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, i "\nGreedy iteration {:d} (n = {:d}): ω* = {:.3e} GHz ({:.3e}), error = {:.3e}\n", iter - iter0 + 1, prom.GetReducedDimension(), omega_star * f0, omega_star, max_error); - { - BlockTimer bt2(Timer::HDMSOLVE); - prom.SolveHDM(omega_star, E); - } + prom.SolveHDM(omega_star, E); prom.AddHDMSample(omega_star, E); iter++; } @@ -299,21 +293,22 @@ void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, i SaveMetadata(prom.GetLinearSolver()); // Main fast frequency sweep loop (online phase). + BlockTimer bt2(Timer::CONSTRUCT); Mpi::Print("\nBeginning fast frequency sweep online phase\n"); spaceop.GetWavePortOp().SetSuppressOutput(false); // Disable output suppression int step = step0; double omega = omega0; while (step < nstep) { - // Assemble the linear system. - BlockTimer bt2(Timer::CONSTRUCT); const double freq = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega); Mpi::Print("\nIt {:d}/{:d}: ω/2π = {:.3e} GHz (elapsed time = {:.2e} s)\n", step + 1, nstep, freq, Timer::Duration(Timer::Now() - t0).count()); + + // Assemble the linear system. prom.AssemblePROM(omega); // Solve the linear system. - BlockTimer bt3(Timer::SOLVE); + BlockTimer bt3(Timer::SOLVEPROM); Mpi::Print("\n"); prom.SolvePROM(E); @@ -383,7 +378,6 @@ void DrivenSolver::Postprocess(const PostOperator &postop, } if (iodata.solver.driven.delta_post > 0 && step % iodata.solver.driven.delta_post == 0) { - BlockTimer bt(Timer::IO); Mpi::Print("\n"); PostprocessFields(postop, step / iodata.solver.driven.delta_post, freq); Mpi::Print(" Wrote fields to disk at step {:d}\n", step + 1); diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp index d4b3e61ad..31711d2d4 100644 --- a/palace/drivers/eigensolver.cpp +++ b/palace/drivers/eigensolver.cpp @@ -314,7 +314,6 @@ void EigenSolver::Postprocess(const PostOperator &postop, PostprocessProbes(postop, "m", i, i + 1); if (i < iodata.solver.eigenmode.n_post) { - BlockTimer bt(Timer::IO); PostprocessFields(postop, i, i + 1); Mpi::Print(" Wrote mode {:d} to disk\n", i + 1); } diff --git a/palace/drivers/electrostaticsolver.cpp b/palace/drivers/electrostaticsolver.cpp index 08463328f..9e9dc0d5f 100644 --- a/palace/drivers/electrostaticsolver.cpp +++ b/palace/drivers/electrostaticsolver.cpp @@ -47,18 +47,18 @@ void ElectrostaticSolver::Solve(std::vector> &mes auto t0 = Timer::Now(); for (const auto &[idx, data] : laplaceop.GetSources()) { - // Form and solve the linear system for a prescribed nonzero voltage on the specified - // terminal. - BlockTimer bt1(Timer::CONSTRUCT); Mpi::Print("\nIt {:d}/{:d}: Index = {:d} (elapsed time = {:.2e} s)\n", step + 1, nstep, idx, Timer::Duration(Timer::Now() - t0).count()); + + // Form and solve the linear system for a prescribed nonzero voltage on the specified + // terminal. Mpi::Print("\n"); laplaceop.GetExcitationVector(idx, *K, V[step], RHS); - BlockTimer bt2(Timer::SOLVE); + BlockTimer bt1(Timer::SOLVE); ksp.Mult(RHS, V[step]); - BlockTimer bt3(Timer::POSTPRO); + BlockTimer bt2(Timer::POSTPRO); Mpi::Print(" Sol. ||V|| = {:.6e} (||RHS|| = {:.6e})\n", linalg::Norml2(laplaceop.GetComm(), V[step]), linalg::Norml2(laplaceop.GetComm(), RHS)); @@ -106,7 +106,6 @@ void ElectrostaticSolver::Postprocess(LaplaceOperator &laplaceop, PostOperator & PostprocessProbes(postop, "i", i, idx); if (i < iodata.solver.electrostatic.n_post) { - BlockTimer bt(Timer::IO); PostprocessFields(postop, i, idx); Mpi::Print(" Wrote fields to disk for terminal {:d}\n", idx); } diff --git a/palace/drivers/magnetostaticsolver.cpp b/palace/drivers/magnetostaticsolver.cpp index 66bee0825..ac2996df0 100644 --- a/palace/drivers/magnetostaticsolver.cpp +++ b/palace/drivers/magnetostaticsolver.cpp @@ -47,19 +47,19 @@ void MagnetostaticSolver::Solve(std::vector> &mes auto t0 = Timer::Now(); for (const auto &[idx, data] : curlcurlop.GetSurfaceCurrentOp()) { - // Form and solve the linear system for a prescribed current on the specified source. - BlockTimer bt1(Timer::CONSTRUCT); Mpi::Print("\nIt {:d}/{:d}: Index = {:d} (elapsed time = {:.2e} s)\n", step + 1, nstep, idx, Timer::Duration(Timer::Now() - t0).count()); + + // Form and solve the linear system for a prescribed current on the specified source. Mpi::Print("\n"); A[step].SetSize(RHS.Size()); A[step] = 0.0; curlcurlop.GetExcitationVector(idx, RHS); - BlockTimer bt2(Timer::SOLVE); + BlockTimer bt1(Timer::SOLVE); ksp.Mult(RHS, A[step]); - BlockTimer bt3(Timer::POSTPRO); + BlockTimer bt2(Timer::POSTPRO); Mpi::Print(" Sol. ||A|| = {:.6e} (||RHS|| = {:.6e})\n", linalg::Norml2(curlcurlop.GetComm(), A[step]), linalg::Norml2(curlcurlop.GetComm(), RHS)); @@ -113,7 +113,6 @@ void MagnetostaticSolver::Postprocess(CurlCurlOperator &curlcurlop, PostOperator PostprocessProbes(postop, "i", i, idx); if (i < iodata.solver.magnetostatic.n_post) { - BlockTimer bt(Timer::IO); PostprocessFields(postop, i, idx); Mpi::Print(" Wrote fields to disk for terminal {:d}\n", idx); } diff --git a/palace/drivers/transientsolver.cpp b/palace/drivers/transientsolver.cpp index 09a4fa89d..910ad1030 100644 --- a/palace/drivers/transientsolver.cpp +++ b/palace/drivers/transientsolver.cpp @@ -80,11 +80,12 @@ void TransientSolver::Solve(std::vector> &mesh) c auto t0 = Timer::Now(); while (step < nstep) { - // Single time step t -> t + dt. - BlockTimer bt1(Timer::SOLVE); const double ts = iodata.DimensionalizeValue(IoData::ValueType::TIME, t + delta_t); Mpi::Print("\nIt {:d}/{:d}: t = {:e} ns (elapsed time = {:.2e} s)\n", step, nstep - 1, ts, Timer::Duration(Timer::Now() - t0).count()); + + // Single time step t -> t + dt. + BlockTimer bt1(Timer::SOLVE); if (step == 0) { Mpi::Print("\n"); @@ -252,7 +253,6 @@ void TransientSolver::Postprocess(const PostOperator &postop, if (iodata.solver.transient.delta_post > 0 && step % iodata.solver.transient.delta_post == 0) { - BlockTimer bt(Timer::IO); Mpi::Print("\n"); PostprocessFields(postop, step / iodata.solver.transient.delta_post, ts); Mpi::Print(" Wrote fields to disk at step {:d}\n", step); diff --git a/palace/linalg/gmg.cpp b/palace/linalg/gmg.cpp index b2ad28151..2b2b80434 100644 --- a/palace/linalg/gmg.cpp +++ b/palace/linalg/gmg.cpp @@ -7,6 +7,7 @@ #include "linalg/chebyshev.hpp" #include "linalg/distrelaxation.hpp" #include "linalg/rap.hpp" +#include "utils/timer.hpp" namespace palace { @@ -176,11 +177,13 @@ void GeometricMultigridSolver::VCycle(int l, bool initial_guess) const // level 0. Important to note that the smoothers must respect the initial guess flag // correctly (given X, Y, compute Y <- Y + B (X - A Y)) . B[l]->SetInitialGuess(initial_guess); - B[l]->Mult(X[l], Y[l]); if (l == 0) { + BlockTimer bt(Timer::COARSESOLVE); + B[l]->Mult(X[l], Y[l]); return; } + B[l]->Mult(X[l], Y[l]); // Compute residual. A[l]->Mult(Y[l], R[l]); diff --git a/palace/linalg/iterative.cpp b/palace/linalg/iterative.cpp index 531af33f9..0806b165f 100644 --- a/palace/linalg/iterative.cpp +++ b/palace/linalg/iterative.cpp @@ -9,6 +9,7 @@ #include #include "linalg/orthog.hpp" #include "utils/communication.hpp" +#include "utils/timer.hpp" namespace palace { @@ -238,6 +239,91 @@ inline void ApplyPlaneRotation(std::complex &dx, std::complex &dy, const T dx = t; } +template +inline void ApplyB(const Solver *B, const VecType &x, VecType &y) +{ + BlockTimer bt(Timer::PRECONDITIONER); + MFEM_ASSERT(B, "Missing preconditioner in ApplyB!"); + B->Mult(x, y); +} + +template +inline void InitialResidual(PrecSide side, const OperType *A, const Solver *B, + const VecType &b, VecType &x, VecType &r, VecType &z, + bool initial_guess) +{ + if (B && side == GmresSolver::PrecSide::LEFT) + { + if (initial_guess) + { + A->Mult(x, z); + linalg::AXPBY(1.0, b, -1.0, z); + ApplyB(B, z, r); + } + else + { + ApplyB(B, b, r); + x = 0.0; + } + } + else // !B || side == PrecSide::RIGHT + { + if (initial_guess) + { + A->Mult(x, r); + linalg::AXPBY(1.0, b, -1.0, r); + } + else + { + r = b; + x = 0.0; + } + } +} + +template +inline void ApplyBA(PrecSide side, const OperType *A, const Solver *B, + const VecType &x, VecType &y, VecType &z) +{ + if (B && side == GmresSolver::PrecSide::LEFT) + { + A->Mult(x, z); + ApplyB(B, z, y); + } + else if (B && side == GmresSolver::PrecSide::RIGHT) + { + ApplyB(B, x, z); + A->Mult(z, y); + } + else + { + A->Mult(x, y); + } +} + +template +inline void OrthogonalizeIteration(OrthogType type, MPI_Comm comm, + const std::vector &V, VecType &w, + ScalarType *Hj, int j) +{ + using OperType = typename std::conditional::value, + ComplexOperator, Operator>::type; + + // Orthogonalize w against the leading j + 1 columns of V. + switch (type) + { + case GmresSolver::OrthogType::MGS: + linalg::OrthogonalizeColumnMGS(comm, V, w, Hj, j + 1); + break; + case GmresSolver::OrthogType::CGS: + linalg::OrthogonalizeColumnCGS(comm, V, w, Hj, j + 1); + break; + case GmresSolver::OrthogType::CGS2: + linalg::OrthogonalizeColumnCGS(comm, V, w, Hj, j + 1, true); + break; + } +} + } // namespace template @@ -295,7 +381,7 @@ void CgSolver::Mult(const VecType &b, VecType &x) const } if (B) { - B->Mult(r, z); + ApplyB(B, r, z); } else { @@ -306,7 +392,7 @@ void CgSolver::Mult(const VecType &b, VecType &x) const res = std::sqrt(std::abs(beta)); if (this->initial_guess && B) { - B->Mult(b, p); + ApplyB(B, b, p); auto beta_rhs = linalg::Dot(comm, p, b); CheckDot(beta_rhs, "PCG preconditioner is not positive definite: (Bb, b) = "); initial_res = std::sqrt(std::abs(beta_rhs)); @@ -352,7 +438,7 @@ void CgSolver::Mult(const VecType &b, VecType &x) const beta_prev = beta; if (B) { - B->Mult(r, z); + ApplyB(B, r, z); } else { @@ -386,88 +472,6 @@ void CgSolver::Mult(const VecType &b, VecType &x) const final_it = it; } -namespace -{ - -template -inline void InitialResidual(PrecSide side, const OperType *A, const Solver *B, - const VecType &b, VecType &x, VecType &r, VecType &z, - bool initial_guess) -{ - if (B && side == GmresSolver::PrecSide::LEFT) - { - if (initial_guess) - { - A->Mult(x, z); - linalg::AXPBY(1.0, b, -1.0, z); - B->Mult(z, r); - } - else - { - B->Mult(b, r); - x = 0.0; - } - } - else // !B || side == PrecSide::RIGHT - { - if (initial_guess) - { - A->Mult(x, r); - linalg::AXPBY(1.0, b, -1.0, r); - } - else - { - r = b; - x = 0.0; - } - } -} - -template -inline void ApplyBA(PrecSide side, const OperType *A, const Solver *B, - const VecType &x, VecType &y, VecType &z) -{ - if (B && side == GmresSolver::PrecSide::LEFT) - { - A->Mult(x, z); - B->Mult(z, y); - } - else if (B && side == GmresSolver::PrecSide::RIGHT) - { - B->Mult(x, z); - A->Mult(z, y); - } - else - { - A->Mult(x, y); - } -} - -template -inline void OrthogonalizeIteration(OrthogType type, MPI_Comm comm, - const std::vector &V, VecType &w, - ScalarType *Hj, int j) -{ - using OperType = typename std::conditional::value, - ComplexOperator, Operator>::type; - - // Orthogonalize w against the leading j + 1 columns of V. - switch (type) - { - case GmresSolver::OrthogType::MGS: - linalg::OrthogonalizeColumnMGS(comm, V, w, Hj, j + 1); - break; - case GmresSolver::OrthogType::CGS: - linalg::OrthogonalizeColumnCGS(comm, V, w, Hj, j + 1); - break; - case GmresSolver::OrthogType::CGS2: - linalg::OrthogonalizeColumnCGS(comm, V, w, Hj, j + 1, true); - break; - } -} - -} // namespace - template void GmresSolver::Initialize() const { @@ -538,7 +542,7 @@ void GmresSolver::Mult(const VecType &b, VecType &x) const RealType beta_rhs; if (B && pc_side == PrecSide::LEFT) { - B->Mult(b, V[0]); + ApplyB(B, b, V[0]); beta_rhs = linalg::Norml2(comm, V[0]); } else // !B || pc_side == PrecSide::RIGHT @@ -637,7 +641,7 @@ void GmresSolver::Mult(const VecType &b, VecType &x) const { r.Add(s[k], V[k]); } - B->Mult(r, V[0]); + ApplyB(B, r, V[0]); x += V[0]; } if (converged) diff --git a/palace/models/romoperator.cpp b/palace/models/romoperator.cpp index 37fbdf6c2..78c9f5ea3 100644 --- a/palace/models/romoperator.cpp +++ b/palace/models/romoperator.cpp @@ -10,6 +10,7 @@ #include "models/spaceoperator.hpp" #include "utils/communication.hpp" #include "utils/iodata.hpp" +#include "utils/timer.hpp" namespace palace { @@ -158,6 +159,7 @@ void RomOperator::SolveHDM(double omega, ComplexVector &e) { // Compute HDM solution at the given frequency. The system matrix, A = K + iω C - ω² M + // A2(ω) is built by summing the underlying operator contributions. + BlockTimer bt0(Timer::CONSTRUCT); A2 = spaceop.GetExtraSystemMatrix(omega, Operator::DIAG_ZERO); has_A2 = (A2 != nullptr); auto A = spaceop.GetSystemMatrix(std::complex(1.0, 0.0), 1i * omega, @@ -181,6 +183,9 @@ void RomOperator::SolveHDM(double omega, ComplexVector &e) { r.Add(1i * omega, RHS1); } + + // Solve the linear system. + BlockTimer bt1(Timer::SOLVE); ksp->Mult(r, e); } diff --git a/palace/models/waveportoperator.cpp b/palace/models/waveportoperator.cpp index f1aef5e91..668780dfa 100644 --- a/palace/models/waveportoperator.cpp +++ b/palace/models/waveportoperator.cpp @@ -19,6 +19,7 @@ #include "utils/communication.hpp" #include "utils/geodata.hpp" #include "utils/iodata.hpp" +#include "utils/timer.hpp" namespace palace { @@ -1105,6 +1106,7 @@ void WavePortOperator::Initialize(double omega) { return; } + BlockTimer bt(Timer::WAVEPORT); if (!suppress_output) { const double freq = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega); diff --git a/palace/utils/geodata.cpp b/palace/utils/geodata.cpp index f13354336..593c8e3ab 100644 --- a/palace/utils/geodata.cpp +++ b/palace/utils/geodata.cpp @@ -70,7 +70,7 @@ std::unique_ptr ReadMesh(MPI_Comm comm, const IoData &iodata, boo // I/O time separately for the mesh read from file. std::unique_ptr smesh; { - BlockTimer b(Timer::IO); + BlockTimer bt(Timer::IO); if (Mpi::Root(comm)) { // Optionally reorder elements (and vertices) based on spatial location after loading diff --git a/palace/utils/timer.hpp b/palace/utils/timer.hpp index aa6d1ad50..1da1654a7 100644 --- a/palace/utils/timer.hpp +++ b/palace/utils/timer.hpp @@ -29,9 +29,12 @@ class Timer { INIT = 0, CONSTRUCT, - FREQSAMPLESELECTION, - HDMSOLVE, + WAVEPORT, // Wave port solver SOLVE, + PRECONDITIONER, // Linear solver + COARSESOLVE, // Linear solver + CONSTRUCTPROM, // Adaptive frequency sweep + SOLVEPROM, // Adaptive frequency sweep POSTPRO, IO, TOTAL, @@ -40,9 +43,12 @@ class Timer inline static const std::vector descriptions{"Initialization", "Operator Construction", - "Frequency Sample Selection", - "HDM Solve", + " Wave Ports", "Solve", + " Preconditioner", + " Coarse Solve", + "PROM Construction", + "PROM Solve", "Postprocessing", "Disk IO", "Total"};