diff --git a/.github/workflows/build-and-test-macos.yml b/.github/workflows/build-and-test-macos.yml index a24e58a26..7700d1058 100644 --- a/.github/workflows/build-and-test-macos.yml +++ b/.github/workflows/build-and-test-macos.yml @@ -16,14 +16,14 @@ jobs: fail-fast: false matrix: include: # Pairwise testing - # - compiler: clang - # mpi: openmpi - # math-libs: openblas - # build-shared: shared - # with-64bit-int: int32 - # with-openmp: serial - # with-solver: superlu - # with-eigensolver: slepc + - compiler: clang + mpi: openmpi + math-libs: openblas + build-shared: shared + with-64bit-int: int32 + with-openmp: serial + with-solver: superlu + with-eigensolver: slepc - compiler: gcc mpi: openmpi diff --git a/CHANGELOG.md b/CHANGELOG.md index 464f421f8..a8bbf1a30 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,17 @@ The format of this changelog is based on ## In progress + - Changed implementation of complex-valued linear algebra to use new `ComplexVector` and + `ComplexOperator` types, which are based on the underlying `mfem::Vector` and + `mfem::Operator` classes, instead of PETSc. PETSc is now fully optional and only + required when SLEPc eigenvalue solver support is requested. Krylov solvers for real- and + complex-valued linear systems are implemented via the built-in `IterativeSolver` + classes. + - Changed implementation of PROMs for adaptive fast frequency sweep to use the Eigen + library for sequential dense linear algebra. + - Changed implementation of numeric wave ports to use MFEM's `SubMesh` functionality. As + of [#3379](https://github.com/mfem/mfem/pull/3379) in MFEM, this has full ND and RT + basis support. For now, support for nonconforming mesh boundaries is limited. - Added Apptainer/Singularity container build definition for Palace. - Added build dependencies on [libCEED](https://github.com/CEED/libCEED) and [LIBXSMM](https://github.com/libxsmm/libxsmm) to support operator partial assembly (CPU- @@ -20,9 +31,9 @@ The format of this changelog is based on ## [0.11.2] - 2023-07-14 - - Changed layout and names of `palace/` source directory for better organization. - Fixed a regression bug affecting meshes which have domain elements which are not assigned material properties in the configuration file. + - Changed layout and names of `palace/` source directory for better organization. - Added many updates to build system: Removed use of Git submodules to download dependencies relying instead directly on CMake's ExternalProject, patch GSLIB dependency for shared library builds, add CI tests with ARPACK-NG instead of SLEPc, update all diff --git a/CMakeLists.txt b/CMakeLists.txt index d2b906315..a3c865adc 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -57,12 +57,6 @@ if(NOT DEFINED BUILD_SHARED_LIBS) set(BUILD_SHARED_LIBS OFF CACHE BOOL "Global flag to cause add_library() to create shared libraries if ON") endif() -# For now, SLEPc is always required -if(NOT PALACE_WITH_SLEPC) - message(STATUS "Building with SLEPc eigenvalue solver as it is required") - set(PALACE_WITH_SLEPC ON CACHE BOOL "Build with SLEPc eigenvalue solver" FORCE) -endif() - # Add extra CMake modules list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake") diff --git a/docs/src/config/solver.md b/docs/src/config/solver.md index f306d05d4..7df1143ea 100644 --- a/docs/src/config/solver.md +++ b/docs/src/config/solver.md @@ -299,11 +299,16 @@ directory specified by [`config["Problem"]["Output"]`] "Tol": , "MaxIts": , "MaxSize": , - "UseGMG": , - "UsePCShifted": , + "UsePCMatShifted": , + "PCSide": , + "UseMultigrid": , + "MGAuxiliarySmoother": , "MGCycleIts": , "MGSmoothIts": , - "MGSmoothOrder": + "MGSmoothOrder": , + "DivFreeTol": , + "DivFreeMaxIts": , + "GSOrthogonalization": } ``` @@ -353,46 +358,66 @@ equations arising for each simulation type. The available options are: definite (SPD) and the preconditioned conjugate gradient method (`"CG"`) is used as the Krylov solver. -`"Tol" [1.0e-6]` : Relative (preconditioned) residual convergence tolerance for the -iterative linear solver. +`"Tol" [1.0e-6]` : Relative residual convergence tolerance for the iterative linear solver. `"MaxIts" [100]` : Maximum number of iterations for the iterative linear solver. `"MaxSize" [0]` : Maximum Krylov space size for the GMRES and FGMRES solvers. A value less than 1 defaults to the value specified by `"MaxIts"`. -`"UseGMG" [true]` : Enable or not [geometric multigrid solver] -(https://en.wikipedia.org/wiki/Multigrid_method) which uses h- and p-multigrid coarsening as -available to construct the multigrid hierarchy. The solver specified by `"Type"` is used on -the coarsest level. A Hiptmair smoother is applied to all other levels. - -`"UsePCShifted" [false]` : When set to `true`, constructs the preconditioner for frequency +`"UsePCMatShifted" [false]` : When set to `true`, constructs the preconditioner for frequency domain problems using a real SPD approximation of the system matrix, which can help performance at high frequencies (relative to the lowest nonzero eigenfrequencies of the model). +`"PCSide" ["Default"]` : Side for preconditioning. Not all options are available for all +iterative solver choices, and the default choice depends on the iterative solver used. + + - `"Left"` + - `"Right"` + - `"Default"` + +`"UseMultigrid" [true]` : Chose whether to enable [geometric multigrid preconditioning] +(https://en.wikipedia.org/wiki/Multigrid_method) which uses p- and h-multigrid coarsening as +available to construct the multigrid hierarchy. The solver specified by `"Type"` is used on +the coarsest level. Relaxation on the fine levels is performed with Chebyshev smoothing. + +`"MGAuxiliarySmoother"` : Activate hybrid smoothing from Hiptmair for multigrid levels when +`"UseMultigrid"` is `true`. For non-singular problems involving curl-curl operators, this +option is `true` by default. + `"MGCycleIts" [1]` : Number of V-cycle iterations per preconditioner application for -multigrid preconditioners (when `"UseGMG"` is `true` or `"Type"` is `"AMS"` or +multigrid preconditioners (when `"UseMultigrid"` is `true` or `"Type"` is `"AMS"` or `"BoomerAMG"`). `"MGSmoothIts" [1]` : Number of pre- and post-smooth iterations used for multigrid -preconditioners (when `"UseGMG"` is `true` or `"Type"` is `"AMS"` or `"BoomerAMG"`). +preconditioners (when `"UseMultigrid"` is `true` or `"Type"` is `"AMS"` or `"BoomerAMG"`). `"MGSmoothOrder" [3]` : Order of polynomial smoothing for geometric multigrid -preconditioning (when `"UseGMG"` is `true`). +preconditioning (when `"UseMultigrid"` is `true`). + +`"DivFreeTol" [1.0e-12]` : Relative tolerance for divergence-free cleaning used in the +eigenmode simulation type. + +`"DivFreeMaxIts" [100]` : Maximum number of iterations for divergence-free cleaning use in +the eigenmode simulation type. + +`"GSOrthogonalization" ["MGS"]` : Gram-Schmidt variant used to explicitly orthogonalize +vectors in Krylov subspace methods or other parts of the code. + + - `"MGS"` : Modified Gram-Schmidt + - `"CGS"` : Classical Gram-Schmidt + - `"CGS2"` : Two-step classical Gram-Schmidt with reorthogonalization ### Advanced linear solver options - - `"Type"`: `"STRUMPACK-MP"` - - `"KSPType"`: `"MINRES"`, `"CGSYM"`, `"FCG"`, `"BCGS"`, `"BCGSL"`, `"FBCGS"`, `"QMRCGS"`, - `"TFQMR"` - - `"UseMGS" [false]` - - `"UseCGS2" [false]` - - `"UseKSPPiped" [false]` - - `"UseLOR" [false]` - - `"PrecondSide" ["Default"]`: `"Left"`, `"Right"`, `"Default"` - - `"Reordering" ["Default"]`: `"METIS"`, `"ParMETIS"`, `"Default"` - - `"STRUMPACKCompressionType" ["None"]`: `"None"`, `"BLR"`, `"HSS"`, `"HODLR"` + - `"UseInitialGuess" [true]` + - `"UsePartialAssembly" [false]` + - `"UseLowOrderRefined" [false]` + - `"Reordering" ["Default"]` : `"METIS"`, `"ParMETIS"`,`"Scotch"`, `"PTScotch"`, + `"Default"` + - `"STRUMPACKCompressionType" ["None"]` : `"None"`, `"BLR"`, `"HSS"`, `"HODLR"`, `"ZFP"`, + `"BLR-HODLR"`, `"ZFP-BLR-HODLR"` - `"STRUMPACKCompressionTol" [1.0e-3]` - `"STRUMPACKLossyPrecision" [16]` - `"STRUMPACKButterflyLevels" [1]` diff --git a/docs/src/guide/model.md b/docs/src/guide/model.md index 6f5716998..0fd1da6f5 100644 --- a/docs/src/guide/model.md +++ b/docs/src/guide/model.md @@ -40,7 +40,7 @@ or region-based refinement, specified using the [`config["Model"]["Refinement"]` uniform refinement levels as well as local refinement regions which refines the elements inside of a certain box or sphere-shaped region. For simplex meshes, the refinement maintains a conforming mesh but meshes containing hexahedra, prism, or pyramid elements -will be non-conforming after local refinement (this is not supported at this time). +will be nonconforming after local refinement (this is not supported at this time). [Adaptive mesh refinement (AMR)](https://en.wikipedia.org/wiki/Adaptive_mesh_refinement) according to error estimates in the computed solution is a work in progress for all diff --git a/palace/drivers/basesolver.cpp b/palace/drivers/basesolver.cpp index 07b98f7f3..89fa8433c 100644 --- a/palace/drivers/basesolver.cpp +++ b/palace/drivers/basesolver.cpp @@ -6,6 +6,7 @@ #include #include #include +#include "linalg/ksp.hpp" #include "models/domainpostoperator.hpp" #include "models/postoperator.hpp" #include "models/surfacepostoperator.hpp" @@ -51,10 +52,9 @@ void WriteMetadata(const std::string &post_dir, const json &meta) } // namespace -BaseSolver::BaseSolver(const IoData &iodata_, bool root_, int size, int num_thread, +BaseSolver::BaseSolver(const IoData &iodata, bool root, int size, int num_thread, const char *git_tag) - : iodata(iodata_), post_dir(GetPostDir(iodata_.problem.output)), root(root_), - table(8, 9, 6) + : iodata(iodata), post_dir(GetPostDir(iodata.problem.output)), root(root), table(8, 9, 6) { // Create directory for output. if (root && !std::filesystem::exists(post_dir)) @@ -100,7 +100,8 @@ void BaseSolver::SaveMetadata(const mfem::ParFiniteElementSpace &fespace) const } } -void BaseSolver::SaveMetadata(int ksp_mult, int ksp_it) const +template +void BaseSolver::SaveMetadata(const SolverType &ksp) const { if (post_dir.length() == 0) { @@ -109,8 +110,8 @@ void BaseSolver::SaveMetadata(int ksp_mult, int ksp_it) const if (root) { json meta = LoadMetadata(post_dir); - meta["LinearSolver"]["TotalSolves"] = ksp_mult; - meta["LinearSolver"]["TotalIts"] = ksp_it; + meta["LinearSolver"]["TotalSolves"] = ksp.NumTotalMult(); + meta["LinearSolver"]["TotalIts"] = ksp.NumTotalMultIterations(); WriteMetadata(post_dir, meta); } } @@ -556,4 +557,7 @@ void BaseSolver::PostprocessFields(const PostOperator &postop, int step, double Mpi::Barrier(); } +template void BaseSolver::SaveMetadata(const KspSolver &) const; +template void BaseSolver::SaveMetadata(const ComplexKspSolver &) const; + } // namespace palace diff --git a/palace/drivers/basesolver.hpp b/palace/drivers/basesolver.hpp index 11efabd93..2bbbdfa03 100644 --- a/palace/drivers/basesolver.hpp +++ b/palace/drivers/basesolver.hpp @@ -45,9 +45,7 @@ class BaseSolver int p; // Floating point precision for data int w1; // First column width = precision + 7 extra int p1; // Floating point precision for first column - Table(int sp_, int p_, int p1_) : w(sp_ + p_ + 7), sp(sp_), p(p_), w1(p1_ + 7), p1(p1_) - { - } + Table(int sp, int p, int p1) : w(sp + p + 7), sp(sp), p(p), w1(p1 + 7), p1(p1) {} }; const Table table; @@ -71,7 +69,7 @@ class BaseSolver void PostprocessFields(const PostOperator &postop, int step, double time) const; public: - BaseSolver(const IoData &iodata_, bool root_, int size = 0, int num_thread = 0, + BaseSolver(const IoData &iodata, bool root, int size = 0, int num_thread = 0, const char *git_tag = nullptr); virtual ~BaseSolver() = default; @@ -80,7 +78,8 @@ class BaseSolver // These methods write different simulation metadata to a JSON file in post_dir. void SaveMetadata(const mfem::ParFiniteElementSpace &fespace) const; - void SaveMetadata(int ksp_mult, int ksp_it) const; + template + void SaveMetadata(const SolverType &ksp) const; void SaveMetadata(const Timer &timer) const; }; diff --git a/palace/drivers/drivensolver.cpp b/palace/drivers/drivensolver.cpp index 680a38fab..ae6d3b341 100644 --- a/palace/drivers/drivensolver.cpp +++ b/palace/drivers/drivensolver.cpp @@ -6,8 +6,8 @@ #include #include #include "linalg/ksp.hpp" -#include "linalg/pc.hpp" -#include "linalg/petsc.hpp" +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" #include "models/lumpedportoperator.hpp" #include "models/postoperator.hpp" #include "models/romoperator.hpp" @@ -22,6 +22,8 @@ namespace palace { +using namespace std::complex_literals; + void DrivenSolver::Solve(std::vector> &mesh, Timer &timer) const { @@ -112,30 +114,30 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in // simply by setting diagonal entries of the system matrix for the corresponding dofs. // Because the Dirichlet BC is always homogenous, no special elimination is required on // the RHS. Assemble the linear system for the initial frequency (so we can call - // KspSolver:: SetOperators). Compute everything at the first frequency step. - std::unique_ptr A = spaceop.GetSystemMatrixPetsc( - SpaceOperator::OperatorType::COMPLETE, omega0, mfem::Operator::DIAG_ONE); - std::unique_ptr NegCurl = spaceop.GetNegCurlMatrixPetsc(); + // KspSolver::SetOperators). Compute everything at the first frequency step. + auto K = spaceop.GetComplexStiffnessMatrix(Operator::DIAG_ONE); + auto C = spaceop.GetComplexDampingMatrix(Operator::DIAG_ZERO); + auto M = spaceop.GetComplexMassMatrix(Operator::DIAG_ZERO); + auto A2 = spaceop.GetComplexExtraSystemMatrix(omega0, Operator::DIAG_ZERO); + auto Curl = spaceop.GetComplexCurlMatrix(); // Set up the linear solver and set operators for the first frequency step. The // preconditioner for the complex linear system is constructed from a real approximation // to the complex system matrix. - std::vector> P, AuxP; - spaceop.GetPreconditionerMatrix(omega0, P, AuxP); - - KspPreconditioner pc(iodata, spaceop.GetDbcMarker(), spaceop.GetNDSpaces(), - &spaceop.GetH1Spaces()); - pc.SetOperator(P, &AuxP); + auto A = spaceop.GetSystemMatrix(std::complex(1.0, 0.0), 1i * omega0, + std::complex(-omega0 * omega0, 0.0), K.get(), + C.get(), M.get(), A2.get()); + auto P = spaceop.GetPreconditionerMatrix(1.0, omega0, -omega0 * omega0, + omega0); - KspSolver ksp(A->GetComm(), iodata, "ksp_"); - ksp.SetPreconditioner(pc); - ksp.SetOperator(*A); + ComplexKspSolver ksp(iodata, spaceop.GetNDSpaces(), &spaceop.GetH1Spaces()); + ksp.SetOperators(*A, *P); // Set up RHS vector for the incident field at port boundaries, and the vector for the // first frequency step. - petsc::PetscParVector RHS(*NegCurl), E(*NegCurl), B(*NegCurl, true); - E.SetZero(); - B.SetZero(); + ComplexVector RHS(Curl->Width()), E(Curl->Width()), B(Curl->Height()); + E = 0.0; + B = 0.0; timer.construct_time += timer.Lap(); // Main frequency sweep loop. @@ -152,26 +154,32 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in if (step > step0) { // Update frequency-dependent excitation and operators. - A = spaceop.GetSystemMatrixPetsc(SpaceOperator::OperatorType::COMPLETE, omega, - mfem::Operator::DIAG_ONE, false); - spaceop.GetPreconditionerMatrix(omega, P, AuxP, false); - pc.SetOperator(P, &AuxP); - ksp.SetOperator(*A); + A2 = spaceop.GetComplexExtraSystemMatrix(omega, Operator::DIAG_ZERO); + A = spaceop.GetSystemMatrix(std::complex(1.0, 0.0), 1i * omega, + std::complex(-omega * omega, 0.0), K.get(), + C.get(), M.get(), A2.get()); + P = spaceop.GetPreconditionerMatrix(1.0, omega, -omega * omega, + omega); + ksp.SetOperators(*A, *P); } - spaceop.GetFreqDomainExcitationVector(omega, RHS); + spaceop.GetExcitationVector(omega, RHS); timer.construct_time += timer.Lap(); Mpi::Print("\n"); ksp.Mult(RHS, E); timer.solve_time += timer.Lap(); + // Compute B = -1/(iω) ∇ x E on the true dofs, and set the internal GridFunctions in + // PostOperator for all postprocessing operations. double E_elec = 0.0, E_mag = 0.0; - PostOperator::GetBField(omega, *NegCurl, E, B); + Curl->Mult(E, B); + B *= -1.0 / (1i * omega); postop.SetEGridFunction(E); postop.SetBGridFunction(B); postop.UpdatePorts(spaceop.GetLumpedPortOp(), spaceop.GetWavePortOp(), omega); - // E.Print(); - Mpi::Print(" Sol. ||E|| = {:.6e} (||RHS|| = {:.6e})\n", E.Norml2(), RHS.Norml2()); + Mpi::Print(" Sol. ||E|| = {:.6e} (||RHS|| = {:.6e})\n", + linalg::Norml2(spaceop.GetComm(), E), + linalg::Norml2(spaceop.GetComm(), RHS)); if (!iodata.solver.driven.only_port_post) { E_elec = postop.GetEFieldEnergy(); @@ -191,7 +199,7 @@ void DrivenSolver::SweepUniform(SpaceOperator &spaceop, PostOperator &postop, in step++; omega += delta_omega; } - SaveMetadata(ksp.GetTotalNumMult(), ksp.GetTotalNumIter()); + SaveMetadata(ksp); } void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, int nstep, @@ -228,10 +236,10 @@ void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, i // Allocate negative curl matrix for postprocessing the B-field and vectors for the // high-dimensional field solution. - std::unique_ptr NegCurl = spaceop.GetNegCurlMatrixPetsc(); - petsc::PetscParVector E(*NegCurl), B(*NegCurl, true); - E.SetZero(); - B.SetZero(); + auto Curl = spaceop.GetComplexCurlMatrix(); + ComplexVector E(Curl->Width()), B(Curl->Height()); + E = 0.0; + B = 0.0; // Configure the PROM operator which performs the parameter space sampling and basis // construction during the offline phase as well as the PROM solution during the online @@ -240,25 +248,29 @@ void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, i // removes it from P \ P_S. timer.construct_time += timer.Lap(); Timer local_timer; + const double f0 = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, 1.0); + Mpi::Print("\nBeginning PROM construction offline phase:\n" " {:d} points for frequency sweep over [{:.3e}, {:.3e}] GHz\n", - nstep - step0, - iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega0), - iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, - omega0 + (nstep - step0 - 1) * delta_omega)); - spaceop.GetWavePortOp().SetSuppressOutput(true); // Suppress wave port stuff for offline - RomOperator prom(iodata, spaceop, nmax); - prom.Initialize(nstep - step0, omega0, delta_omega); + nstep - step0, omega0 * f0, (omega0 + (nstep - step0 - 1) * delta_omega) * f0); + RomOperator prom(iodata, spaceop); + prom.Initialize(omega0, delta_omega, nstep - step0, nmax); + spaceop.GetWavePortOp().SetSuppressOutput(true); // Suppress wave port output for offline local_timer.construct_time += local_timer.Lap(); - prom.SolveHDM(omega0, E, true); // Print matrix stats at first HDM solve - prom.SolveHDM(omega0 + (nstep - step0 - 1) * delta_omega, E, false); + prom.SolveHDM(omega0, E); // Print matrix stats at first HDM solve local_timer.solve_time += local_timer.Lap(); + prom.AddHDMSample(omega0, E); + local_timer.construct_time += local_timer.Lap(); + prom.SolveHDM(omega0 + (nstep - step0 - 1) * delta_omega, E); + local_timer.solve_time += local_timer.Lap(); + prom.AddHDMSample(omega0 + (nstep - step0 - 1) * delta_omega, E); + local_timer.construct_time += local_timer.Lap(); // Greedy procedure for basis construction (offline phase). Basis is initialized with // solutions at frequency sweep endpoints. int iter = static_cast(prom.GetSampleFrequencies().size()), iter0 = iter; - double max_error = 1.0; + double max_error; while (true) { // Compute maximum error in parameter domain with current PROM. @@ -273,31 +285,25 @@ void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, i // Sample HDM and add solution to basis. Mpi::Print( "\nGreedy iteration {:d} (n = {:d}): ω* = {:.3e} GHz ({:.3e}), error = {:.3e}\n", - iter - iter0 + 1, prom.GetReducedDimension(), - iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, omega_star), omega_star, + iter - iter0 + 1, prom.GetReducedDimension(), omega_star * f0, omega_star, max_error); prom.SolveHDM(omega_star, E); local_timer.solve_time += local_timer.Lap(); + prom.AddHDMSample(omega_star, E); + local_timer.construct_time += local_timer.Lap(); iter++; } - { - std::vector samples(prom.GetSampleFrequencies()); - // samples.Sort(); - for (auto &sample : samples) - { - sample = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, sample); - } - Mpi::Print("\nAdaptive sampling{} {:d} frequency samples:\n" - " n = {:d}, error = {:.3e}, tol = {:.3e}\n", - (iter == nmax) ? " reached maximum" : " converged with", iter, - prom.GetReducedDimension(), max_error, offline_tol); - utils::PrettyPrint(samples, " Sampled frequencies (GHz):"); - } - SaveMetadata(prom.GetTotalKspMult(), prom.GetTotalKspIter()); + Mpi::Print("\nAdaptive sampling{} {:d} frequency samples:\n" + " n = {:d}, error = {:.3e}, tol = {:.3e}\n", + (iter == nmax) ? " reached maximum" : " converged with", iter, + prom.GetReducedDimension(), max_error, offline_tol); + utils::PrettyPrint(prom.GetSampleFrequencies(), f0, " Sampled frequencies (GHz):"); + SaveMetadata(prom.GetLinearSolver()); + const auto local_construction_time = timer.Lap(); timer.construct_time += local_construction_time; Mpi::Print(" Total offline phase elapsed time: {:.2e} s\n" - " Parameter space sampling: {:.2e} s, HDM solves: {:.2e} s\n", + " Sampling and PROM construction: {:.2e} s, HDM solves: {:.2e} s\n", Timer::Duration(local_construction_time).count(), Timer::Duration(local_timer.construct_time).count(), Timer::Duration(local_timer.solve_time).count()); // Timings on rank 0 @@ -322,13 +328,15 @@ void DrivenSolver::SweepAdaptive(SpaceOperator &spaceop, PostOperator &postop, i prom.SolvePROM(E); timer.solve_time += timer.Lap(); + // Compute B = -1/(iω) ∇ x E on the true dofs, and set the internal GridFunctions in + // PostOperator for all postprocessing operations. double E_elec = 0.0, E_mag = 0.0; - PostOperator::GetBField(omega, *NegCurl, E, B); + Curl->Mult(E, B); + B *= -1.0 / (1i * omega); postop.SetEGridFunction(E); postop.SetBGridFunction(B); postop.UpdatePorts(spaceop.GetLumpedPortOp(), spaceop.GetWavePortOp(), omega); - // E.Print(); - Mpi::Print(" Sol. ||E|| = {:.6e}\n", E.Norml2()); + Mpi::Print(" Sol. ||E|| = {:.6e}\n", linalg::Norml2(spaceop.GetComm(), E)); if (!iodata.solver.driven.only_port_post) { E_elec = postop.GetEFieldEnergy(); diff --git a/palace/drivers/eigensolver.cpp b/palace/drivers/eigensolver.cpp index 11369c158..95e2eaa4f 100644 --- a/palace/drivers/eigensolver.cpp +++ b/palace/drivers/eigensolver.cpp @@ -4,15 +4,12 @@ #include "eigensolver.hpp" #include -#include "fem/freqdomain.hpp" -#include "fem/operator.hpp" #include "linalg/arpack.hpp" #include "linalg/divfree.hpp" -#include "linalg/feast.hpp" #include "linalg/ksp.hpp" -#include "linalg/pc.hpp" -#include "linalg/petsc.hpp" +#include "linalg/operator.hpp" #include "linalg/slepc.hpp" +#include "linalg/vector.hpp" #include "models/lumpedportoperator.hpp" #include "models/postoperator.hpp" #include "models/spaceoperator.hpp" @@ -33,25 +30,20 @@ void EigenSolver::Solve(std::vector> &mesh, // computational range. The damping matrix may be nullptr. timer.Lap(); SpaceOperator spaceop(iodata, mesh); - std::unique_ptr K = spaceop.GetSystemMatrixPetsc( - SpaceOperator::OperatorType::STIFFNESS, mfem::Operator::DIAG_ONE); - std::unique_ptr M = spaceop.GetSystemMatrixPetsc( - SpaceOperator::OperatorType::MASS, mfem::Operator::DIAG_ZERO); - std::unique_ptr C = spaceop.GetSystemMatrixPetsc( - SpaceOperator::OperatorType::DAMPING, mfem::Operator::DIAG_ZERO); - std::unique_ptr NegCurl = spaceop.GetNegCurlMatrixPetsc(); + auto K = spaceop.GetComplexStiffnessMatrix(Operator::DIAG_ONE); + auto C = spaceop.GetComplexDampingMatrix(Operator::DIAG_ZERO); + auto M = spaceop.GetComplexMassMatrix(Operator::DIAG_ZERO); + auto Curl = spaceop.GetComplexCurlMatrix(); SaveMetadata(spaceop.GetNDSpace()); // Configure objects for postprocessing. PostOperator postop(iodata, spaceop, "eigenmode"); - petsc::PetscParVector E(*NegCurl), B(*NegCurl, true); + ComplexVector E(Curl->Width()), B(Curl->Height()); // Define and configure the eigensolver to solve the eigenvalue problem: // (K + λ C + λ² M) u = 0 or K u = -λ² M u - // with λ = iω. A shift-and-invert strategy is employed to solve for the eigenvalues - // closest to the specified target, σ. In general, the system matrices are complex and - // symmetric. - std::unique_ptr eigen; + // with λ = iω. In general, the system matrices are complex and symmetric. + std::unique_ptr eigen; config::EigenSolverData::Type type = iodata.solver.eigenmode.type; #if defined(PALACE_WITH_ARPACK) && defined(PALACE_WITH_SLEPC) if (type == config::EigenSolverData::Type::DEFAULT) @@ -79,70 +71,60 @@ void EigenSolver::Solve(std::vector> &mesh, #endif if (type == config::EigenSolverData::Type::FEAST) { - Mpi::Print("\nConfiguring FEAST eigenvalue solver\n"); -#if defined(PALACE_WITH_SLEPC) - if (C) - { - eigen = std::make_unique( - K->GetComm(), iodata, spaceop, iodata.solver.eigenmode.feast_contour_np, - iodata.problem.verbose); - } - else - { - eigen = std::make_unique( - K->GetComm(), iodata, spaceop, iodata.solver.eigenmode.feast_contour_np, - iodata.problem.verbose); - } -#endif + MFEM_ABORT("FEAST eigenvalue solver is currently not supported!"); } else if (type == config::EigenSolverData::Type::ARPACK) { - Mpi::Print("\nConfiguring ARPACK eigenvalue solver\n"); #if defined(PALACE_WITH_ARPACK) + Mpi::Print("\nConfiguring ARPACK eigenvalue solver\n"); if (C) { - eigen = std::make_unique(iodata.problem.verbose); + eigen = std::make_unique(spaceop.GetComm(), + iodata.problem.verbose); } else { - eigen = std::make_unique(iodata.problem.verbose); + eigen = std::make_unique(spaceop.GetComm(), + iodata.problem.verbose); } #endif } else // config::EigenSolverData::Type::SLEPC { - Mpi::Print("\nConfiguring SLEPc eigenvalue solver\n"); #if defined(PALACE_WITH_SLEPC) - std::unique_ptr slepc; + Mpi::Print("\nConfiguring SLEPc eigenvalue solver\n"); + std::unique_ptr slepc; if (C) { if (!iodata.solver.eigenmode.pep_linear) { - slepc = - std::make_unique(K->GetComm(), iodata.problem.verbose); - slepc->SetType(slepc::SlepcEigenSolver::Type::TOAR); + slepc = std::make_unique(spaceop.GetComm(), + iodata.problem.verbose); + slepc->SetType(slepc::SlepcEigenvalueSolver::Type::TOAR); } else { - slepc = std::make_unique(K->GetComm(), + slepc = std::make_unique(spaceop.GetComm(), iodata.problem.verbose); - slepc->SetType(slepc::SlepcEigenSolver::Type::KRYLOVSCHUR); + slepc->SetType(slepc::SlepcEigenvalueSolver::Type::KRYLOVSCHUR); } } else { - slepc = std::make_unique(K->GetComm(), iodata.problem.verbose); - slepc->SetType(slepc::SlepcEigenSolver::Type::KRYLOVSCHUR); + slepc = std::make_unique(spaceop.GetComm(), + iodata.problem.verbose); + slepc->SetType(slepc::SlepcEigenvalueSolver::Type::KRYLOVSCHUR); } - slepc->SetProblemType(slepc::SlepcEigenSolver::ProblemType::GEN_NON_HERMITIAN); - slepc->SetOrthogonalization(iodata.solver.linear.orthog_mgs, - iodata.solver.linear.orthog_cgs2); + slepc->SetProblemType(slepc::SlepcEigenvalueSolver::ProblemType::GEN_NON_HERMITIAN); + slepc->SetOrthogonalization( + iodata.solver.linear.gs_orthog_type == config::LinearSolverData::OrthogType::MGS, + iodata.solver.linear.gs_orthog_type == config::LinearSolverData::OrthogType::CGS2); eigen = std::move(slepc); #endif } - EigenSolverBase::ScaleType scale = iodata.solver.eigenmode.scale - ? EigenSolverBase::ScaleType::NORM_2 - : EigenSolverBase::ScaleType::NONE; + EigenvalueSolver::ScaleType scale = iodata.solver.eigenmode.scale + ? EigenvalueSolver::ScaleType::NORM_2 + : EigenvalueSolver::ScaleType::NONE; if (C) { eigen->SetOperators(*K, *C, *M, scale); @@ -157,136 +139,19 @@ void EigenSolver::Solve(std::vector> &mesh, Mpi::Print(" Scaling γ = {:.3e}, δ = {:.3e}\n", eigen->GetScalingGamma(), eigen->GetScalingDelta()); - const double target = iodata.solver.eigenmode.target; - const double f_target = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, target); - std::unique_ptr A; - std::vector> P, AuxP; - std::unique_ptr ksp; - std::unique_ptr pc; -#if defined(PALACE_WITH_SLEPC) - auto *feast = dynamic_cast(eigen.get()); - if (feast) - { - // Configure the FEAST integration contour. The linear solvers are set up inside the - // solver. - if (iodata.solver.eigenmode.feast_contour_np > 1) - { - double contour_ub = iodata.solver.eigenmode.feast_contour_ub; - double f_contour_ub = - iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, contour_ub); - double contour_ar = iodata.solver.eigenmode.feast_contour_ar; - MFEM_VERIFY(contour_ub > target, - "FEAST eigensolver requires a specified upper frequency target!"); - MFEM_VERIFY( - contour_ar >= 0.0 && contour_ar <= 1.0, - "Contour aspect ratio for FEAST eigenvalue solver must be in range [0.0, 1.0]!"); - Mpi::Print(" FEAST search contour: σ_lower = {:.3e} GHz ({:.3e})\n" - " σ_upper = {:.3e} GHz ({:.3e})\n" - " AR = {:.1e}\n", - f_target, target, f_contour_ub, contour_ub, contour_ar); - if (C) - { - // Search for eigenvalues in the range λ = iσₗₒ to iσₕᵢ. - double h = (contour_ub - target) * contour_ar; - feast->SetContour(-0.5 * h, target, 0.5 * h, contour_ub, false, true); - } - else - { - // Linear EVP has eigenvalues μ = -λ² = ω². Search for eigenvalues from μ = σₗₒ² to - // σₕᵢ². - double h = (contour_ub * contour_ub - target * target) * contour_ar; - feast->SetContour(target * target, -0.5 * h, contour_ub * contour_ub, 0.5 * h); - } - } - else - { - Mpi::Print(" FEAST search target: σ = {:.3e} GHz ({:.3e})\n", f_target, target); - if (C) - { - feast->SetContour(0.0, target, 0.0, target, false, true); - } - else - { - feast->SetContour(target * target, 0.0, target * target, 0.0); - } - } - } - else -#endif - { - Mpi::Print(" Shift-and-invert σ = {:.3e} GHz ({:.3e})\n", f_target, target); - if (C) - { - // Search for eigenvalues closest to λ = iσ. - eigen->SetShiftInvert(0.0, target); - if (type == config::EigenSolverData::Type::ARPACK) - { - // ARPACK searches based on eigenvalues of the transformed problem. The eigenvalue - // 1/(λ-σ) will be a large-magnitude negative imaginary number for an eigenvalue λ - // with frequency close to but not below the target σ. - eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::SMALLEST_IMAGINARY); - } - else - { - eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_IMAGINARY); - } - // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_MAGNITUDE); - } - else - { - // Linear EVP has eigenvalues μ = -λ² = ω². Search for eigenvalues closest to μ = σ². - eigen->SetShiftInvert(target * target, 0.0); - if (type == config::EigenSolverData::Type::ARPACK) - { - // ARPACK searches based on eigenvalues of the transformed problem. 1/(μ-σ²) will be - // a large-magnitude positive real number for an eigenvalue μ with frequency close - // to but below the target σ². - eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::LARGEST_REAL); - } - else - { - eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_REAL); - } - // eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::TARGET_MAGNITUDE); - } - - // Set up the linear solver required for solving systems involving the shifted operator - // (K - σ² M) or P(iσ) = (K + iσ C - σ² M) during the eigenvalue solve. The - // preconditioner for complex linear systems is constructed from a real approximation - // to the complex system matrix. - A = utils::GetSystemMatrixShell(target, *K, *M, C.get()); - spaceop.GetPreconditionerMatrix(target, P, AuxP); - - pc = std::make_unique(iodata, spaceop.GetDbcMarker(), - spaceop.GetNDSpaces(), &spaceop.GetH1Spaces()); - pc->SetOperator(P, &AuxP); - - ksp = std::make_unique(A->GetComm(), iodata, "ksp_"); - ksp->SetPreconditioner(*pc); - ksp->SetOperator(*A); - ksp->SetTabLevel(1); - eigen->SetLinearSolver(*ksp); - } - // If desired, use an M-inner product for orthogonalizing the eigenvalue subspace. The // constructed matrix just references the real SPD part of the mass matrix (no copy is - // performed). - std::unique_ptr Mr; + // performed). Boundary conditions don't need to be eliminated here. + std::unique_ptr KM; if (iodata.solver.eigenmode.mass_orthog) { // Mpi::Print(" Basis uses M-inner product\n"); - // Mr = std::make_unique( - // mesh.back()->GetComm(), - // std::make_unique(*M->GetOperator(petsc::PetscParMatrix::ExtractStructure::REAL))); + // KM = spaceop.GetInnerProductMatrix(0.0, 1.0, nullptr, M.get()); + // eigen->SetBMat(*KM); Mpi::Print(" Basis uses (K + M)-inner product\n"); - auto KM = std::make_unique(K->GetNumRows(), K->GetNumCols()); - KM->AddOperator(*K->GetOperator(petsc::PetscParMatrix::ExtractStructure::REAL)); - KM->AddOperator(*M->GetOperator(petsc::PetscParMatrix::ExtractStructure::REAL)); - Mr = std::make_unique(mesh.back()->GetComm(), std::move(KM)); - - Mr->SetRealSymmetric(); - eigen->SetBMat(*Mr); + KM = spaceop.GetInnerProductMatrix(1.0, 1.0, K.get(), M.get()); + eigen->SetBMat(*KM); } // Construct a divergence-free projector so the eigenvalue solve is performed in the space @@ -296,57 +161,98 @@ void EigenSolver::Solve(std::vector> &mesh, { constexpr int divfree_verbose = 0; divfree = std::make_unique( - spaceop.GetMaterialOp(), spaceop.GetAuxBdrMarker(), spaceop.GetNDSpace(), - spaceop.GetH1Spaces(), iodata.solver.linear.divfree_tol, + spaceop.GetMaterialOp(), spaceop.GetNDSpace(), spaceop.GetH1Spaces(), + spaceop.GetAuxBdrTDofLists(), iodata.solver.linear.divfree_tol, iodata.solver.linear.divfree_max_it, divfree_verbose); - eigen->SetProjector(*divfree); + eigen->SetDivFreeProjector(*divfree); } // Set up the initial space for the eigenvalue solve. Satisfies boundary conditions and is // projected appropriately. if (iodata.solver.eigenmode.init_v0) { - petsc::PetscParVector v0(*K); + ComplexVector v0; if (iodata.solver.eigenmode.init_v0_const) { Mpi::Print(" Using constant starting vector\n"); - v0 = 1.0; + spaceop.GetConstantInitialVector(v0); } else { Mpi::Print(" Using random starting vector\n"); - v0.SetRandom(); + spaceop.GetRandomInitialVector(v0); } - v0.ZeroRows(spaceop.GetDbcTDofList()); if (divfree) { divfree->Mult(v0); } eigen->SetInitialSpace(v0); // Copies the vector - // { - // std::unique_ptr Grad = spaceop.GetGradMatrixPetsc(); - // petsc::PetscParVector r0(*Grad, false); - // Grad->MultTranspose(v0, r0); - // r0.Print(); - // } + + // Debug + // auto Grad = spaceop.GetComplexGradMatrix(); + // ComplexVector r0(Grad->Width()); + // Grad->MultTranspose(v0, r0); + // r0.Print(); } - timer.construct_time += timer.Lap(); - // Eigenvalue problem solve. - Mpi::Print("\n"); - int num_conv = 0; - num_conv = eigen->Solve(); -#if defined(PALACE_WITH_SLEPC) - if (!ksp) + // Configure the shift-and-invert strategy is employed to solve for the eigenvalues + // closest to the specified target, σ. + const double target = iodata.solver.eigenmode.target; + const double f_target = iodata.DimensionalizeValue(IoData::ValueType::FREQUENCY, target); + Mpi::Print(" Shift-and-invert σ = {:.3e} GHz ({:.3e})\n", f_target, target); + if (C) { - const auto &feast = dynamic_cast(*eigen); - SaveMetadata(feast.GetTotalKspMult(), feast.GetTotalKspIter()); + // Search for eigenvalues closest to λ = iσ. + eigen->SetShiftInvert(1i * target); + if (type == config::EigenSolverData::Type::ARPACK) + { + // ARPACK searches based on eigenvalues of the transformed problem. The eigenvalue + // 1 / (λ - σ) will be a large-magnitude negative imaginary number for an eigenvalue + // λ with frequency close to but not below the target σ. + eigen->SetWhichEigenpairs(EigenvalueSolver::WhichType::SMALLEST_IMAGINARY); + } + else + { + eigen->SetWhichEigenpairs(EigenvalueSolver::WhichType::TARGET_IMAGINARY); + } } else -#endif { - SaveMetadata(ksp->GetTotalNumMult(), ksp->GetTotalNumIter()); + // Linear EVP has eigenvalues μ = -λ² = ω². Search for eigenvalues closest to μ = σ². + eigen->SetShiftInvert(target * target); + if (type == config::EigenSolverData::Type::ARPACK) + { + // ARPACK searches based on eigenvalues of the transformed problem. 1 / (μ - σ²) + // will be a large-magnitude positive real number for an eigenvalue μ with frequency + // close to but below the target σ². + eigen->SetWhichEigenpairs(EigenvalueSolver::WhichType::LARGEST_REAL); + } + else + { + eigen->SetWhichEigenpairs(EigenvalueSolver::WhichType::TARGET_REAL); + } } + + // Set up the linear solver required for solving systems involving the shifted operator + // (K - σ² M) or P(iσ) = (K + iσ C - σ² M) during the eigenvalue solve. The + // preconditioner for complex linear systems is constructed from a real approximation + // to the complex system matrix. + auto A = spaceop.GetSystemMatrix(std::complex(1.0, 0.0), 1i * target, + std::complex(-target * target, 0.0), K.get(), + C.get(), M.get()); + auto P = spaceop.GetPreconditionerMatrix(1.0, target, -target * target, + target); + + auto ksp = std::make_unique(iodata, spaceop.GetNDSpaces(), + &spaceop.GetH1Spaces()); + ksp->SetOperators(*A, *P); + eigen->SetLinearSolver(*ksp); + timer.construct_time += timer.Lap(); + + // Eigenvalue problem solve. + Mpi::Print("\n"); + int num_conv = eigen->Solve(); + SaveMetadata(*ksp); timer.solve_time += timer.Lap(); // Postprocess the results. @@ -354,13 +260,9 @@ void EigenSolver::Solve(std::vector> &mesh, for (int i = 0; i < num_conv; i++) { // Get the eigenvalue and relative error. - double real, imag, error1, error2; - std::complex omega; - eigen->GetEigenvalue(i, real, imag); - eigen->GetError(i, EigenSolverBase::ErrorType::BACKWARD, error1); - eigen->GetError(i, EigenSolverBase::ErrorType::ABSOLUTE, error2); - omega.real(real); - omega.imag(imag); + std::complex omega = eigen->GetEigenvalue(i); + double error1 = eigen->GetError(i, EigenvalueSolver::ErrorType::BACKWARD); + double error2 = eigen->GetError(i, EigenvalueSolver::ErrorType::ABSOLUTE); if (!C) { // Linear EVP has eigenvalue μ = -λ² = ω². @@ -374,13 +276,15 @@ void EigenSolver::Solve(std::vector> &mesh, if (i == 0) { Mpi::Print(" Found {:d} converged eigenvalue{} (first = {:.3e}{:+.3e}i)\n", num_conv, - (num_conv > 1) ? "s" : "", real, imag); + (num_conv > 1) ? "s" : "", omega.real(), omega.imag()); Mpi::Print("\n"); } - // Set the internal GridFunctions in PostOperator for all postprocessing operations. + // Compute B = -1/(iω) ∇ x E on the true dofs, and set the internal GridFunctions in + // PostOperator for all postprocessing operations. eigen->GetEigenvector(i, E); - PostOperator::GetBField(omega, *NegCurl, E, B); + Curl->Mult(E, B); + B *= -1.0 / (1i * omega); postop.SetEGridFunction(E); postop.SetBGridFunction(B); postop.UpdatePorts(spaceop.GetLumpedPortOp(), omega.real()); diff --git a/palace/drivers/electrostaticsolver.cpp b/palace/drivers/electrostaticsolver.cpp index 855eab7a0..fd4888f0e 100644 --- a/palace/drivers/electrostaticsolver.cpp +++ b/palace/drivers/electrostaticsolver.cpp @@ -4,8 +4,8 @@ #include "electrostaticsolver.hpp" #include -#include "linalg/gmg.hpp" -#include "linalg/pc.hpp" +#include "linalg/ksp.hpp" +#include "linalg/operator.hpp" #include "models/laplaceoperator.hpp" #include "models/postoperator.hpp" #include "utils/communication.hpp" @@ -23,49 +23,13 @@ void ElectrostaticSolver::Solve(std::vector> &mes // dofs. The eliminated matrix is stored in order to construct the RHS vector for nonzero // prescribed BC values. timer.Lap(); - std::vector> K, Ke; LaplaceOperator laplaceop(iodata, mesh); - laplaceop.GetStiffnessMatrix(K, Ke); + auto K = laplaceop.GetStiffnessMatrix(); SaveMetadata(laplaceop.GetH1Space()); // Set up the linear solver. - std::unique_ptr pc = - ConfigurePreconditioner(iodata, laplaceop.GetDbcMarker(), laplaceop.GetH1Spaces()); - auto *gmg = dynamic_cast(pc.get()); - if (gmg) - { - gmg->SetOperator(K); - } - else - { - pc->SetOperator(*K.back()); - } - - mfem::IterativeSolver::PrintLevel print = - mfem::IterativeSolver::PrintLevel().Warnings().Errors(); - if (iodata.problem.verbose > 0) - { - print.Summary(); - if (iodata.problem.verbose > 1) - { - print.Iterations(); - if (iodata.problem.verbose > 2) - { - print.All(); - } - } - } - mfem::CGSolver pcg(mesh.back()->GetComm()); - pcg.SetRelTol(iodata.solver.linear.tol); - pcg.SetMaxIter(iodata.solver.linear.max_it); - pcg.SetPrintLevel(print); - pcg.SetOperator(*K.back()); // Call before SetPreconditioner, PC operator set separately - pcg.SetPreconditioner(*pc); - if (iodata.solver.linear.ksp_type != config::LinearSolverData::KspType::DEFAULT && - iodata.solver.linear.ksp_type != config::LinearSolverData::KspType::CG) - { - Mpi::Warning("Electrostatic problem type always uses CG as the Krylov solver!\n"); - } + KspSolver ksp(iodata, laplaceop.GetH1Spaces()); + ksp.SetOperators(*K, *K); // Terminal indices are the set of boundaries over which to compute the capacitance // matrix. Terminal boundaries are aliases for ports. @@ -74,14 +38,14 @@ void ElectrostaticSolver::Solve(std::vector> &mes MFEM_VERIFY(nstep > 0, "No terminal boundaries specified for electrostatic simulation!"); // Right-hand side term and solution vector storage. - mfem::Vector RHS(K.back()->Height()); - std::vector V(nstep); + Vector RHS(K->Height()); + std::vector V(nstep); timer.construct_time += timer.Lap(); // Main loop over terminal boundaries. Mpi::Print("\nComputing electrostatic fields for {:d} terminal boundar{}\n", nstep, (nstep > 1) ? "ies" : "y"); - int step = 0, ksp_it = 0; + int step = 0; auto t0 = timer.Now(); for (const auto &[idx, data] : laplaceop.GetSources()) { @@ -91,23 +55,15 @@ void ElectrostaticSolver::Solve(std::vector> &mes // Form and solve the linear system for a prescribed nonzero voltage on the specified // terminal. Mpi::Print("\n"); - V[step].SetSize(RHS.Size()); - laplaceop.GetExcitationVector(idx, *K.back(), *Ke.back(), V[step], RHS); + laplaceop.GetExcitationVector(idx, *K, V[step], RHS); timer.construct_time += timer.Lap(); - pcg.Mult(RHS, V[step]); - if (!pcg.GetConverged()) - { - Mpi::Warning("Linear solver did not converge in {:d} iterations!\n", - pcg.GetNumIterations()); - } - ksp_it += pcg.GetNumIterations(); + ksp.Mult(RHS, V[step]); timer.solve_time += timer.Lap(); - // V[step]->Print(); Mpi::Print(" Sol. ||V|| = {:.6e} (||RHS|| = {:.6e})\n", - std::sqrt(mfem::InnerProduct(mesh.back()->GetComm(), V[step], V[step])), - std::sqrt(mfem::InnerProduct(mesh.back()->GetComm(), RHS, RHS))); + linalg::Norml2(laplaceop.GetComm(), V[step]), + linalg::Norml2(laplaceop.GetComm(), RHS)); timer.postpro_time += timer.Lap(); // Next terminal. @@ -116,14 +72,13 @@ void ElectrostaticSolver::Solve(std::vector> &mes // Postprocess the capacitance matrix from the computed field solutions. const auto io_time_prev = timer.io_time; - SaveMetadata(nstep, ksp_it); + SaveMetadata(ksp); Postprocess(laplaceop, postop, V, timer); timer.postpro_time += timer.Lap() - (timer.io_time - io_time_prev); } void ElectrostaticSolver::Postprocess(LaplaceOperator &laplaceop, PostOperator &postop, - const std::vector &V, - Timer &timer) const + const std::vector &V, Timer &timer) const { // Postprocess the Maxwell capacitance matrix. See p. 97 of the COMSOL AC/DC Module manual // for the associated formulas based on the electric field energy based on a unit voltage @@ -131,11 +86,11 @@ void ElectrostaticSolver::Postprocess(LaplaceOperator &laplaceop, PostOperator & // charges from the prescribed voltage to get C directly as: // Q_i = ∫ ρ dV = ∫ ∇ ⋅ (ε E) dV = ∫ (ε E) ⋅ n dS // and C_ij = Q_i/V_j. The energy formulation avoids having to locally integrate E = -∇V. - std::unique_ptr NegGrad = laplaceop.GetNegGradMatrix(); + auto Grad = laplaceop.GetGradMatrix(); const std::map> &terminal_sources = laplaceop.GetSources(); int nstep = static_cast(terminal_sources.size()); mfem::DenseMatrix C(nstep), Cm(nstep); - mfem::Vector E(NegGrad->Height()), Vij(NegGrad->Width()); + Vector E(Grad->Height()), Vij(Grad->Width()); if (iodata.solver.electrostatic.n_post > 0) { Mpi::Print("\n"); @@ -143,8 +98,10 @@ void ElectrostaticSolver::Postprocess(LaplaceOperator &laplaceop, PostOperator & int i = 0; for (const auto &[idx, data] : terminal_sources) { - // Set the internal GridFunctions in PostOperator for all postprocessing operations. - PostOperator::GetEField(*NegGrad, V[i], E); + // Compute E = -∇V on the true dofs, and set the internal GridFunctions in PostOperator + // for all postprocessing operations. + E = 0.0; + Grad->AddMult(V[i], E, -1.0); postop.SetEGridFunction(E); postop.SetVGridFunction(V[i]); double Ue = postop.GetEFieldEnergy(); @@ -178,8 +135,9 @@ void ElectrostaticSolver::Postprocess(LaplaceOperator &laplaceop, PostOperator & } else if (j > i) { - add(V[i], V[j], Vij); - PostOperator::GetEField(*NegGrad, Vij, E); + linalg::AXPBYPCZ(1.0, V[i], 1.0, V[j], 0.0, Vij); + E = 0.0; + Grad->AddMult(Vij, E, -1.0); postop.SetEGridFunction(E); double Ue = postop.GetEFieldEnergy(); C(i, j) = Ue - 0.5 * (C(i, i) + C(j, j)); diff --git a/palace/drivers/electrostaticsolver.hpp b/palace/drivers/electrostaticsolver.hpp index 58af068c6..d74a8812d 100644 --- a/palace/drivers/electrostaticsolver.hpp +++ b/palace/drivers/electrostaticsolver.hpp @@ -8,6 +8,7 @@ #include #include #include "drivers/basesolver.hpp" +#include "linalg/vector.hpp" namespace mfem { @@ -16,7 +17,6 @@ template class Array; class DenseMatrix; class ParMesh; -class Vector; } // namespace mfem @@ -35,7 +35,7 @@ class ElectrostaticSolver : public BaseSolver { private: void Postprocess(LaplaceOperator &laplaceop, PostOperator &postop, - const std::vector &V, Timer &timer) const; + const std::vector &V, Timer &timer) const; void PostprocessTerminals(const std::map> &terminal_sources, const mfem::DenseMatrix &C, const mfem::DenseMatrix &Cinv, diff --git a/palace/drivers/magnetostaticsolver.cpp b/palace/drivers/magnetostaticsolver.cpp index b00666ddc..38be4ae47 100644 --- a/palace/drivers/magnetostaticsolver.cpp +++ b/palace/drivers/magnetostaticsolver.cpp @@ -4,8 +4,8 @@ #include "magnetostaticsolver.hpp" #include -#include "linalg/gmg.hpp" -#include "linalg/pc.hpp" +#include "linalg/ksp.hpp" +#include "linalg/operator.hpp" #include "models/curlcurloperator.hpp" #include "models/postoperator.hpp" #include "models/surfacecurrentoperator.hpp" @@ -23,49 +23,13 @@ void MagnetostaticSolver::Solve(std::vector> &mes // handled eliminating the rows and columns of the system matrix for the corresponding // dofs. timer.Lap(); - std::vector> K; CurlCurlOperator curlcurlop(iodata, mesh); - curlcurlop.GetStiffnessMatrix(K); + auto K = curlcurlop.GetStiffnessMatrix(); SaveMetadata(curlcurlop.GetNDSpace()); // Set up the linear solver. - std::unique_ptr pc = - ConfigurePreconditioner(iodata, curlcurlop.GetDbcMarker(), curlcurlop.GetNDSpaces()); - auto *gmg = dynamic_cast(pc.get()); - if (gmg) - { - gmg->SetOperator(K); - } - else - { - pc->SetOperator(*K.back()); - } - - mfem::IterativeSolver::PrintLevel print = - mfem::IterativeSolver::PrintLevel().Warnings().Errors(); - if (iodata.problem.verbose > 0) - { - print.Summary(); - if (iodata.problem.verbose > 1) - { - print.Iterations(); - if (iodata.problem.verbose > 2) - { - print.All(); - } - } - } - mfem::CGSolver pcg(mesh.back()->GetComm()); - pcg.SetRelTol(iodata.solver.linear.tol); - pcg.SetMaxIter(iodata.solver.linear.max_it); - pcg.SetPrintLevel(print); - pcg.SetOperator(*K.back()); // Call before SetPreconditioner, PC operator set separately - pcg.SetPreconditioner(*pc); - if (iodata.solver.linear.ksp_type != config::LinearSolverData::KspType::DEFAULT && - iodata.solver.linear.ksp_type != config::LinearSolverData::KspType::CG) - { - Mpi::Warning("Magnetostatic problem type always uses CG as the Krylov solver!\n"); - } + KspSolver ksp(iodata, curlcurlop.GetNDSpaces(), &curlcurlop.GetH1Spaces()); + ksp.SetOperators(*K, *K); // Terminal indices are the set of boundaries over which to compute the inductance matrix. PostOperator postop(iodata, curlcurlop, "magnetostatic"); @@ -74,14 +38,14 @@ void MagnetostaticSolver::Solve(std::vector> &mes "No surface current boundaries specified for magnetostatic simulation!"); // Source term and solution vector storage. - mfem::Vector RHS(K.back()->Height()); - std::vector A(nstep); + Vector RHS(K->Height()); + std::vector A(nstep); timer.construct_time += timer.Lap(); // Main loop over current source boundaries. Mpi::Print("\nComputing magnetostatic fields for {:d} source boundar{}\n", nstep, (nstep > 1) ? "ies" : "y"); - int step = 0, ksp_it = 0; + int step = 0; auto t0 = timer.Now(); for (const auto &[idx, data] : curlcurlop.GetSurfaceCurrentOp()) { @@ -95,19 +59,12 @@ void MagnetostaticSolver::Solve(std::vector> &mes curlcurlop.GetExcitationVector(idx, RHS); timer.construct_time += timer.Lap(); - pcg.Mult(RHS, A[step]); - if (!pcg.GetConverged()) - { - Mpi::Warning("Linear solver did not converge in {:d} iterations!\n", - pcg.GetNumIterations()); - } - ksp_it += pcg.GetNumIterations(); + ksp.Mult(RHS, A[step]); timer.solve_time += timer.Lap(); - // A[step]->Print(); Mpi::Print(" Sol. ||A|| = {:.6e} (||RHS|| = {:.6e})\n", - std::sqrt(mfem::InnerProduct(mesh.back()->GetComm(), A[step], A[step])), - std::sqrt(mfem::InnerProduct(mesh.back()->GetComm(), RHS, RHS))); + linalg::Norml2(curlcurlop.GetComm(), A[step]), + linalg::Norml2(curlcurlop.GetComm(), RHS)); timer.postpro_time += timer.Lap(); // Next source. @@ -116,14 +73,13 @@ void MagnetostaticSolver::Solve(std::vector> &mes // Postprocess the capacitance matrix from the computed field solutions. const auto io_time_prev = timer.io_time; - SaveMetadata(nstep, ksp_it); + SaveMetadata(ksp); Postprocess(curlcurlop, postop, A, timer); timer.postpro_time += timer.Lap() - (timer.io_time - io_time_prev); } void MagnetostaticSolver::Postprocess(CurlCurlOperator &curlcurlop, PostOperator &postop, - const std::vector &A, - Timer &timer) const + const std::vector &A, Timer &timer) const { // Postprocess the Maxwell inductance matrix. See p. 97 of the COMSOL AC/DC Module manual // for the associated formulas based on the magnetic field energy based on a current @@ -132,12 +88,12 @@ void MagnetostaticSolver::Postprocess(CurlCurlOperator &curlcurlop, PostOperator // Φ_i = ∫ B ⋅ n_j dS // and M_ij = Φ_i/I_j. The energy formulation avoids having to locally integrate B = // ∇ x A. - std::unique_ptr Curl = curlcurlop.GetCurlMatrix(); + auto Curl = curlcurlop.GetCurlMatrix(); const SurfaceCurrentOperator &surf_j_op = curlcurlop.GetSurfaceCurrentOp(); int nstep = static_cast(surf_j_op.Size()); mfem::DenseMatrix M(nstep), Mm(nstep); - mfem::Vector B(Curl->Height()), Aij(Curl->Width()); - mfem::Vector Iinc(nstep); + Vector B(Curl->Height()), Aij(Curl->Width()); + Vector Iinc(nstep); if (iodata.solver.magnetostatic.n_post > 0) { Mpi::Print("\n"); @@ -150,8 +106,9 @@ void MagnetostaticSolver::Postprocess(CurlCurlOperator &curlcurlop, PostOperator Iinc(i) = data.GetExcitationCurrent(); MFEM_VERIFY(Iinc(i) > 0.0, "Zero current excitation for magnetostatic solver!"); - // Set the internal GridFunctions in PostOperator for all postprocessing operations. - PostOperator::GetBField(*Curl, A[i], B); + // Compute B = ∇ x A on the true dofs, and set the internal GridFunctions in + // PostOperator for all postprocessing operations. + Curl->Mult(A[i], B); postop.SetBGridFunction(B); postop.SetAGridFunction(A[i]); double Um = postop.GetHFieldEnergy(); @@ -185,8 +142,8 @@ void MagnetostaticSolver::Postprocess(CurlCurlOperator &curlcurlop, PostOperator } else if (j > i) { - add(A[i], A[j], Aij); - PostOperator::GetBField(*Curl, Aij, B); + linalg::AXPBYPCZ(1.0, A[i], 1.0, A[j], 0.0, Aij); + Curl->Mult(Aij, B); postop.SetBGridFunction(B); double Um = postop.GetHFieldEnergy(); M(i, j) = Um / (Iinc(i) * Iinc(j)) - diff --git a/palace/drivers/magnetostaticsolver.hpp b/palace/drivers/magnetostaticsolver.hpp index eb0fcd009..aab9e9668 100644 --- a/palace/drivers/magnetostaticsolver.hpp +++ b/palace/drivers/magnetostaticsolver.hpp @@ -7,13 +7,13 @@ #include #include #include "drivers/basesolver.hpp" +#include "linalg/vector.hpp" namespace mfem { class DenseMatrix; class ParMesh; -class Vector; } // namespace mfem @@ -33,7 +33,7 @@ class MagnetostaticSolver : public BaseSolver { private: void Postprocess(CurlCurlOperator &curlcurlop, PostOperator &postop, - const std::vector &A, Timer &timer) const; + const std::vector &A, Timer &timer) const; void PostprocessTerminals(const SurfaceCurrentOperator &surf_j_op, const mfem::DenseMatrix &M, const mfem::DenseMatrix &Minv, diff --git a/palace/drivers/transientsolver.cpp b/palace/drivers/transientsolver.cpp index 22e07f798..9370c2551 100644 --- a/palace/drivers/transientsolver.cpp +++ b/palace/drivers/transientsolver.cpp @@ -4,6 +4,7 @@ #include "transientsolver.hpp" #include +#include "linalg/vector.hpp" #include "models/lumpedportoperator.hpp" #include "models/postoperator.hpp" #include "models/spaceoperator.hpp" @@ -85,9 +86,10 @@ void TransientSolver::Solve(std::vector> &mesh, Mpi::Print("\nIt {:d}/{:d}: t = {:e} ns (elapsed time = {:.2e} s)\n", step, nstep - 1, ts, Timer::Duration(timer.Now() - t0).count()); - // Single time step t => t + dt. + // Single time step t -> t + dt. if (step == 0) { + Mpi::Print("\n"); t += delta_t; timeop.Init(); // Initial conditions } @@ -98,15 +100,13 @@ void TransientSolver::Solve(std::vector> &mesh, timer.solve_time += timer.Lap(); double E_elec = 0.0, E_mag = 0.0; - const mfem::Vector &E = timeop.GetE(); - const mfem::Vector &B = timeop.GetB(); + const Vector &E = timeop.GetE(); + const Vector &B = timeop.GetB(); postop.SetEGridFunction(E); postop.SetBGridFunction(B); postop.UpdatePorts(spaceop.GetLumpedPortOp()); - // E.Print(); Mpi::Print(" Sol. ||E|| = {:.6e}, ||B|| = {:.6e}\n", - std::sqrt(mfem::InnerProduct(mesh.back()->GetComm(), E, E)), - std::sqrt(mfem::InnerProduct(mesh.back()->GetComm(), B, B))); + linalg::Norml2(spaceop.GetComm(), E), linalg::Norml2(spaceop.GetComm(), B)); if (!iodata.solver.transient.only_port_post) { E_elec = postop.GetEFieldEnergy(); @@ -124,7 +124,7 @@ void TransientSolver::Solve(std::vector> &mesh, // Increment time step. step++; } - SaveMetadata(timeop.GetTotalKspMult(), timeop.GetTotalKspIter()); + SaveMetadata(timeop.GetLinearSolver()); } std::function TransientSolver::GetTimeExcitation(bool dot) const diff --git a/palace/fem/coefficient.hpp b/palace/fem/coefficient.hpp index 18b53de55..95c0d3f86 100644 --- a/palace/fem/coefficient.hpp +++ b/palace/fem/coefficient.hpp @@ -22,6 +22,157 @@ namespace palace // comm on shared faces after a call to ExchangeFaceNbrData. // +enum class MaterialPropertyType +{ + INV_PERMEABILITY, + PERMITTIVITY_REAL, + PERMITTIVITY_IMAG, + PERMITTIVITY_ABS, + CONDUCTIVITY, + INV_LONDON_DEPTH, + INV_Z0, + INV_PERMEABILITY_C0 +}; + +enum class MeshElementType +{ + ELEMENT, + BDR_ELEMENT, + SUBMESH, + BDR_SUBMESH +}; + +// Returns the property value of the material for the given index. Two separate classes for +// domain element access and boundary element access, which returns the material property of +// the neighboring domain element. +template +class MaterialPropertyCoefficient : public mfem::MatrixCoefficient +{ +private: + const MaterialOperator &mat_op; + const double coef; + + static int GetAttribute(mfem::ElementTransformation &T) + { + if constexpr (ElemType == MeshElementType::SUBMESH || + ElemType == MeshElementType::BDR_SUBMESH) + { + MFEM_ASSERT( + T.ElementType == mfem::ElementTransformation::ELEMENT, + "Invalid usage of MaterialPropertyCoefficient for given MeshElementType!"); + const mfem::ParSubMesh &submesh = *static_cast(T.mesh); + const mfem::ParMesh &mesh = *submesh.GetParent(); + if constexpr (ElemType == MeshElementType::SUBMESH) + { + MFEM_ASSERT( + const_cast(submesh).GetFrom() == + mfem::SubMesh::From::Domain, + "Invalid usage of MaterialPropertyCoefficient for given MeshElementType!"); + return mesh.GetAttribute(submesh.GetParentElementIDMap()[T.ElementNo]); + } + else if constexpr (ElemType == MeshElementType::BDR_SUBMESH) + { + MFEM_ASSERT( + const_cast(submesh).GetFrom() == + mfem::SubMesh::From::Boundary, + "Invalid usage of MaterialPropertyCoefficient for given MeshElementType!"); + int i, o, iel1, iel2; + mesh.GetBdrElementFace(submesh.GetParentElementIDMap()[T.ElementNo], &i, &o); + mesh.GetFaceElements(i, &iel1, &iel2); +#ifdef MFEM_DEBUG + int info1, info2, nc; + mesh.GetFaceInfos(i, &info1, &info2, &nc); + MFEM_VERIFY(nc == -1 && iel2 < 0 && info2 < 0, + "MaterialPropertyCoefficient should only be used for exterior " + "(single-sided) boundaries!"); +#endif + return mesh.GetAttribute(iel1); + } + } + else if constexpr (ElemType == MeshElementType::ELEMENT) + { + MFEM_ASSERT( + T.ElementType == mfem::ElementTransformation::ELEMENT, + "Invalid usage of MaterialPropertyCoefficient for given MeshElementType!"); + return T.Attribute; + } + else if constexpr (ElemType == MeshElementType::BDR_ELEMENT) + { + MFEM_ASSERT( + T.ElementType == mfem::ElementTransformation::BDR_ELEMENT, + "Invalid usage of MaterialPropertyCoefficient for given MeshElementType!"); + int i, o, iel1, iel2; + const mfem::Mesh &mesh = *T.mesh; + mesh.GetBdrElementFace(T.ElementNo, &i, &o); + mesh.GetFaceElements(i, &iel1, &iel2); +#ifdef MFEM_DEBUG + int info1, info2, nc; + mesh.GetFaceInfos(i, &info1, &info2, &nc); + MFEM_VERIFY(nc == -1 && iel2 < 0 && info2 < 0, + "MaterialPropertyCoefficient should only be used for exterior " + "(single-sided) boundaries!"); +#endif + return mesh.GetAttribute(iel1); + } + MFEM_ABORT("Unsupported element type in MaterialPropertyCoefficient!"); + return 0; + } + +public: + MaterialPropertyCoefficient(const MaterialOperator &op, double c = 1.0) + : mfem::MatrixCoefficient(op.SpaceDimension()), mat_op(op), coef(c) + { + } + + void Eval(mfem::DenseMatrix &K, mfem::ElementTransformation &T, + const mfem::IntegrationPoint &ip) override + { + if constexpr (MatType == MaterialPropertyType::INV_PERMEABILITY) + { + K = mat_op.GetInvPermeability(GetAttribute(T)); + } + else if constexpr (MatType == MaterialPropertyType::PERMITTIVITY_REAL) + { + K = mat_op.GetPermittivityReal(GetAttribute(T)); + } + else if constexpr (MatType == MaterialPropertyType::PERMITTIVITY_IMAG) + { + K = mat_op.GetPermittivityImag(GetAttribute(T)); + } + else if constexpr (MatType == MaterialPropertyType::PERMITTIVITY_ABS) + { + K = mat_op.GetPermittivityAbs(GetAttribute(T)); + } + else if constexpr (MatType == MaterialPropertyType::CONDUCTIVITY) + { + K = mat_op.GetConductivity(GetAttribute(T)); + } + else if constexpr (MatType == MaterialPropertyType::INV_LONDON_DEPTH) + { + K = mat_op.GetInvLondonDepth(GetAttribute(T)); + } + else if constexpr (MatType == MaterialPropertyType::INV_Z0) + { + K = mat_op.GetInvImpedance(GetAttribute(T)); + } + else if constexpr (MatType == MaterialPropertyType::INV_PERMEABILITY_C0) + { + const int attr = GetAttribute(T); + K.SetSize(height, width); + Mult(mat_op.GetInvPermeability(attr), mat_op.GetLightSpeed(attr), K); + } + else + { + MFEM_ABORT("MaterialPropertyCoefficient::Eval() is not implemented for this " + "material property type!"); + } + K *= coef; + } +}; + +// Base class for coefficients which need to evaluate a GridFunction in a domain element +// attached to a boundary element, or both domain elements on either side for internal +// boundaries. class BdrGridFunctionCoefficient { private: @@ -40,8 +191,8 @@ class BdrGridFunctionCoefficient mfem::Vector *C1 = nullptr); public: - BdrGridFunctionCoefficient(mfem::ParMesh &msh, const std::map &l2s) - : mesh(msh), local_to_shared(l2s) + BdrGridFunctionCoefficient(mfem::ParMesh &mesh, const std::map &local_to_shared) + : mesh(mesh), local_to_shared(local_to_shared) { } @@ -61,12 +212,12 @@ class BdrCurrentVectorCoefficient : public mfem::VectorCoefficient, mutable mfem::Vector C1, W, VU, VL, nor; public: - BdrCurrentVectorCoefficient(const mfem::ParGridFunction &gf, const MaterialOperator &op, - const std::map &l2s) + BdrCurrentVectorCoefficient(const mfem::ParGridFunction &gf, const MaterialOperator &op) : mfem::VectorCoefficient(gf.ParFESpace()->GetParMesh()->SpaceDimension()), - BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(), l2s), B(gf), mat_op(op), - C1(gf.VectorDim()), W(gf.VectorDim()), VU(gf.VectorDim()), VL(gf.VectorDim()), - nor(gf.VectorDim()) + BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(), + op.GetLocalToSharedFaceMap()), + B(gf), mat_op(op), C1(gf.VectorDim()), W(gf.VectorDim()), VU(gf.VectorDim()), + VL(gf.VectorDim()), nor(gf.VectorDim()) { } @@ -74,7 +225,7 @@ class BdrCurrentVectorCoefficient : public mfem::VectorCoefficient, const mfem::IntegrationPoint &ip) override { // Get neighboring elements. - MFEM_VERIFY(vdim == 3, "BdrJVectorCoefficient expects a mesh in 3D space!"); + MFEM_ASSERT(vdim == 3, "BdrJVectorCoefficient expects a mesh in 3D space!"); mfem::ElementTransformation *T1, *T2; GetElementTransformations(T, ip, T1, T2, &C1); @@ -119,9 +270,9 @@ class BdrChargeCoefficient : public mfem::Coefficient, public BdrGridFunctionCoe mutable mfem::Vector C1, W, VU, VL, nor; public: - BdrChargeCoefficient(const mfem::ParGridFunction &gf, const MaterialOperator &op, - const std::map &l2s) - : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(), l2s), + BdrChargeCoefficient(const mfem::ParGridFunction &gf, const MaterialOperator &op) + : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(), + op.GetLocalToSharedFaceMap()), E(gf), mat_op(op), C1(gf.VectorDim()), W(gf.VectorDim()), VU(gf.VectorDim()), VL(gf.VectorDim()), nor(gf.VectorDim()) { @@ -162,9 +313,10 @@ class BdrFluxCoefficient : public mfem::Coefficient, public BdrGridFunctionCoeff public: BdrFluxCoefficient(const mfem::ParGridFunction &gf, mfem::Vector d, - const std::map &l2s) - : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(), l2s), - B(gf), dir(std::move(d)), V(gf.VectorDim()), VL(gf.VectorDim()), nor(gf.VectorDim()) + const std::map &local_to_shared) + : mfem::Coefficient(), + BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(), local_to_shared), B(gf), + dir(std::move(d)), V(gf.VectorDim()), VL(gf.VectorDim()), nor(gf.VectorDim()) { } @@ -191,13 +343,6 @@ class BdrFluxCoefficient : public mfem::Coefficient, public BdrGridFunctionCoeff } }; -// Computes a single-valued α Eᵀ E on boundaries from E given as a vector grid function. -// Uses the neighbor element on a user specified side to compute a single-sided value for -// potentially discontinuous solutions for an interior boundary element. The four cases -// correspond to a generic interface vs. specializations for metal-air, metal-substrate, -// and subtrate-air interfaces following: -// J. Wenner et al., Surface loss simulations of superconducting coplanar waveguide -// resonators, Appl. Phys. Lett. (2011). enum class DielectricInterfaceType { DEFAULT, @@ -206,6 +351,13 @@ enum class DielectricInterfaceType SA }; +// Computes a single-valued α Eᵀ E on boundaries from E given as a vector grid function. +// Uses the neighbor element on a user specified side to compute a single-sided value for +// potentially discontinuous solutions for an interior boundary element. The four cases +// correspond to a generic interface vs. specializations for metal-air, metal-substrate, +// and subtrate-air interfaces following: +// J. Wenner et al., Surface loss simulations of superconducting coplanar waveguide +// resonators, Appl. Phys. Lett. (2011). template class DielectricInterfaceCoefficient : public mfem::Coefficient, public BdrGridFunctionCoefficient @@ -257,8 +409,9 @@ class DielectricInterfaceCoefficient : public mfem::Coefficient, public: DielectricInterfaceCoefficient(const mfem::ParGridFunction &gf, const MaterialOperator &op, double ti, double ei, - mfem::Vector s, const std::map &l2s) - : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(), l2s), + mfem::Vector s) + : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(), + op.GetLocalToSharedFaceMap()), E(gf), mat_op(op), ts(ti), epsilon(ei), side(std::move(s)), C1(gf.VectorDim()), V(gf.VectorDim()), nor(gf.VectorDim()) { @@ -309,7 +462,7 @@ inline double DielectricInterfaceCoefficient::Eval( // Substrate-air interface: 0.5 * t * (ϵ_SA * |E_t|² + 1 / ϵ_MS * |E_n|²) . double Vn = V * nor; - add(V, -Vn, nor, V); + V.Add(-Vn, nor); return 0.5 * ts * (epsilon * (V * V) + (Vn * Vn) / epsilon); } @@ -324,170 +477,30 @@ inline double DielectricInterfaceCoefficient:: return 0.5 * ts * epsilon * (V * V); } -// Returns the property value of the material for the given index. When called on a -// boundary element (external only), uses the material property of the neighboring domain -// element. -enum class MaterialPropertyType -{ - INV_PERMEABILITY, - PERMITTIVITY_REAL, - PERMITTIVITY_IMAG, - PERMITTIVITY_ABS, - CONDUCTIVITY, - INV_LONDON_DEPTH, - INV_Z0, - INV_PERMEABILITY_C0 -}; - -template -class MaterialPropertyCoefficient : public mfem::MatrixCoefficient -{ -private: - const MaterialOperator &mat_op; - const double coef; - - static int GetAttribute(mfem::ElementTransformation &T) - { - if (T.ElementType == mfem::ElementTransformation::ELEMENT) - { - return T.mesh->GetAttribute(T.ElementNo); - } - if (T.ElementType == mfem::ElementTransformation::BDR_ELEMENT) - { - int i, o; - int iel1, iel2, info1, info2; - const mfem::Mesh &mesh = *T.mesh; - mesh.GetBdrElementFace(T.ElementNo, &i, &o); - mesh.GetFaceElements(i, &iel1, &iel2); - mesh.GetFaceInfos(i, &info1, &info2); - MFEM_VERIFY(info2 < 0, "MaterialPropertyCoefficient should only be used for exterior " - "(single-sided) boundaries!"); - return mesh.GetAttribute(iel1); - } - MFEM_ABORT("Unsupported element type in MaterialPropertyCoefficient!"); - return 0; - } - -public: - MaterialPropertyCoefficient(const MaterialOperator &op, double c = 1.0) - : mfem::MatrixCoefficient(op.SpaceDimension()), mat_op(op), coef(c) - { - } - - void Eval(mfem::DenseMatrix &K, mfem::ElementTransformation &T, - const mfem::IntegrationPoint &ip) override - { - MFEM_ABORT("MaterialPropertyCoefficient::Eval() is not implemented for this " - "material property type!"); - } -}; - -template <> -inline void MaterialPropertyCoefficient::Eval( - mfem::DenseMatrix &K, mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip) -{ - K = mat_op.GetInvPermeability(GetAttribute(T)); - K *= coef; -} - -template <> -inline void MaterialPropertyCoefficient::Eval( - mfem::DenseMatrix &K, mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip) -{ - K = mat_op.GetPermittivityReal(GetAttribute(T)); - K *= coef; -} - -template <> -inline void MaterialPropertyCoefficient::Eval( - mfem::DenseMatrix &K, mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip) -{ - K = mat_op.GetPermittivityImag(GetAttribute(T)); - K *= coef; -} - -template <> -inline void MaterialPropertyCoefficient::Eval( - mfem::DenseMatrix &K, mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip) -{ - K = mat_op.GetPermittivityAbs(GetAttribute(T)); - K *= coef; -} - -template <> -inline void MaterialPropertyCoefficient::Eval( - mfem::DenseMatrix &K, mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip) -{ - K = mat_op.GetConductivity(GetAttribute(T)); - K *= coef; -} - -template <> -inline void MaterialPropertyCoefficient::Eval( - mfem::DenseMatrix &K, mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip) -{ - K = mat_op.GetInvLondonDepth(GetAttribute(T)); - K *= coef; -} - -template <> -inline void MaterialPropertyCoefficient::Eval( - mfem::DenseMatrix &K, mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip) -{ - K = mat_op.GetInvImpedance(GetAttribute(T)); - K *= coef; -} - -template <> -inline void MaterialPropertyCoefficient::Eval( - mfem::DenseMatrix &K, mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip) -{ - const int attr = GetAttribute(T); - K.SetSize(height, width); - Mult(mat_op.GetInvPermeability(attr), mat_op.GetLightSpeed(attr), K); - K *= coef; -} - -// Returns the local energy density evaluated as 1/2 Dᴴ E or 1/2 Bᴴ H for real-valued -// material coefficients. For internal boundary elements, the solution is taken on the side -// of the element with the larger-valued material property (permittivity or permeability). enum class EnergyDensityType { ELECTRIC, - ELECTRIC_LOSS, MAGNETIC }; -enum class EnergyDensityValueType -{ - COMPLEX, - REAL -}; -template +// Returns the local energy density evaluated as 1/2 Dᴴ E or 1/2 Bᴴ H for real-valued +// material coefficients. For internal boundary elements, the solution is taken on the side +// of the element with the larger-valued material property (permittivity or permeability). +template class EnergyDensityCoefficient : public mfem::Coefficient, public BdrGridFunctionCoefficient { private: - typedef - typename std::conditional::type - GridFunctionType; const GridFunctionType &U; const MaterialOperator &mat_op; mutable mfem::Vector V; double GetLocalEnergyDensity(mfem::ElementTransformation &T, - const mfem::IntegrationPoint &ip, int attr) - { - MFEM_ABORT( - "EnergyDensityCoefficient::GetLocalEnergyDensity() is not implemented for this " - "value type!"); - return 0.0; - } + const mfem::IntegrationPoint &ip, int attr); public: - EnergyDensityCoefficient(const GridFunctionType &gf, const MaterialOperator &op, - const std::map &l2s) - : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(), l2s), + EnergyDensityCoefficient(const GridFunctionType &gf, const MaterialOperator &op) + : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(), + op.GetLocalToSharedFaceMap()), U(gf), mat_op(op), V(gf.ParFESpace()->GetParMesh()->SpaceDimension()) { } @@ -497,7 +510,7 @@ class EnergyDensityCoefficient : public mfem::Coefficient, public BdrGridFunctio if (T.ElementType == mfem::ElementTransformation::ELEMENT) { T.SetIntPoint(&ip); - return GetLocalEnergyDensity(T, ip, mesh.GetAttribute(T.ElementNo)); + return GetLocalEnergyDensity(T, ip, T.Attribute); } if (T.ElementType == mfem::ElementTransformation::BDR_ELEMENT) { @@ -524,7 +537,7 @@ class EnergyDensityCoefficient : public mfem::Coefficient, public BdrGridFunctio template <> inline double -EnergyDensityCoefficient:: +EnergyDensityCoefficient:: GetLocalEnergyDensity(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, int attr) { @@ -538,8 +551,7 @@ EnergyDensityCoefficient -inline double -EnergyDensityCoefficient:: +inline double EnergyDensityCoefficient:: GetLocalEnergyDensity(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, int attr) { @@ -547,32 +559,9 @@ EnergyDensityCoefficient -inline double EnergyDensityCoefficient:: - GetLocalEnergyDensity(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, - int attr) -{ - U.real().GetVectorValue(T, ip, V); - double res = mat_op.GetPermittivityImag(attr).InnerProduct(V, V); - U.imag().GetVectorValue(T, ip, V); - res += mat_op.GetPermittivityImag(attr).InnerProduct(V, V); - return -0.5 * res; -} - template <> inline double -EnergyDensityCoefficient:: - GetLocalEnergyDensity(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, - int attr) -{ - U.GetVectorValue(T, ip, V); - return -0.5 * mat_op.GetPermittivityImag(attr).InnerProduct(V, V); -} - -template <> -inline double -EnergyDensityCoefficient:: +EnergyDensityCoefficient:: GetLocalEnergyDensity(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, int attr) { @@ -584,8 +573,7 @@ EnergyDensityCoefficient -inline double -EnergyDensityCoefficient:: +inline double EnergyDensityCoefficient:: GetLocalEnergyDensity(mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, int attr) { @@ -604,10 +592,11 @@ class BdrFieldVectorCoefficient : public mfem::VectorCoefficient, const MaterialOperator &mat_op; public: - BdrFieldVectorCoefficient(const mfem::ParGridFunction &gf, const MaterialOperator &op, - const std::map &l2s) + BdrFieldVectorCoefficient(const mfem::ParGridFunction &gf, const MaterialOperator &op) : mfem::VectorCoefficient(gf.ParFESpace()->GetParMesh()->SpaceDimension()), - BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(), l2s), U(gf), mat_op(op) + BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(), + op.GetLocalToSharedFaceMap()), + U(gf), mat_op(op) { } @@ -639,9 +628,9 @@ class BdrFieldCoefficient : public mfem::Coefficient, public BdrGridFunctionCoef const MaterialOperator &mat_op; public: - BdrFieldCoefficient(const mfem::ParGridFunction &gf, const MaterialOperator &op, - const std::map &l2s) - : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(), l2s), + BdrFieldCoefficient(const mfem::ParGridFunction &gf, const MaterialOperator &op) + : mfem::Coefficient(), BdrGridFunctionCoefficient(*gf.ParFESpace()->GetParMesh(), + op.GetLocalToSharedFaceMap()), U(gf), mat_op(op) { } @@ -666,120 +655,6 @@ class BdrFieldCoefficient : public mfem::Coefficient, public BdrGridFunctionCoef } }; -// From mfem::GridFunction::GetVectorValue. -inline mfem::IntegrationPoint -BdrGridFunctionCoefficient::be_to_bfe(mfem::Geometry::Type geom, int o, - const mfem::IntegrationPoint &ip) -{ - mfem::IntegrationPoint fip = {}; - if (geom == mfem::Geometry::TRIANGLE) - { - if (o == 2) - { - fip.x = 1.0 - ip.x - ip.y; - fip.y = ip.x; - } - else if (o == 4) - { - fip.x = ip.y; - fip.y = 1.0 - ip.x - ip.y; - } - else - { - fip.x = ip.x; - fip.y = ip.y; - } - fip.z = ip.z; - } - else - { - if (o == 2) - { - fip.x = ip.y; - fip.y = 1.0 - ip.x; - } - else if (o == 4) - { - fip.x = 1.0 - ip.x; - fip.y = 1.0 - ip.y; - } - else if (o == 6) - { - fip.x = 1.0 - ip.y; - fip.y = ip.x; - } - else - { - fip.x = ip.x; - fip.y = ip.y; - } - fip.z = ip.z; - } - fip.weight = ip.weight; - fip.index = ip.index; - return fip; -} - -inline void BdrGridFunctionCoefficient::GetElementTransformations( - mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, - mfem::ElementTransformation *&T1, mfem::ElementTransformation *&T2, mfem::Vector *C1) -{ - // Return transformations for elements attached to boundary element T. T1 always exists - // but T2 may not if the element is truly a single-sided boundary. - MFEM_VERIFY(T.ElementType == mfem::ElementTransformation::BDR_ELEMENT, - "Unexpected element type in BdrGridFunctionCoefficient!"); - MFEM_VERIFY(&mesh == T.mesh, "Invalid mesh for BdrGridFunctionCoefficient!"); - int i, o; - int iel1, iel2, info1, info2; - mesh.GetBdrElementFace(T.ElementNo, &i, &o); - mesh.GetFaceElements(i, &iel1, &iel2); - mesh.GetFaceInfos(i, &info1, &info2); - - mfem::FaceElementTransformations *FET; - if (info2 >= 0 && iel2 < 0) - { - // Face is shared with another subdomain. - const int &ishared = local_to_shared.at(i); - FET = mesh.GetSharedFaceTransformations(ishared); - } - else - { - // Face is either internal to the subdomain, or a true one-sided boundary. - FET = mesh.GetFaceElementTransformations(i); - } - - // Boundary elements and boundary faces may have different orientations so adjust the - // integration point if necessary. See mfem::GridFunction::GetValue and GetVectorValue. - mfem::IntegrationPoint fip = be_to_bfe(FET->GetGeometryType(), o, ip); - FET->SetAllIntPoints(&fip); - T1 = &FET->GetElement1Transformation(); - T2 = (info2 >= 0) ? &FET->GetElement2Transformation() : nullptr; - - // If desired, get vector pointing from center of boundary element into element 1 for - // orientations. - if (C1) - { - mfem::Vector CF(T.GetSpaceDim()); - mfem::ElementTransformation &TF = *mesh.GetFaceTransformation(i); - TF.Transform(mfem::Geometries.GetCenter(mesh.GetFaceGeometry(i)), CF); - - C1->SetSize(T.GetSpaceDim()); - T1->Transform(mfem::Geometries.GetCenter(T1->GetGeometryType()), *C1); - *C1 -= CF; // Points into element 1 from the face - } -} - -inline void BdrGridFunctionCoefficient::GetNormal(mfem::ElementTransformation &T, - const mfem::IntegrationPoint &ip, - mfem::Vector &normal) -{ - // Return normal vector to the boundary element at the provided integration point. - normal.SetSize(T.GetSpaceDim()); - T.SetIntPoint(&ip); - mfem::CalcOrtho(T.Jacobian(), normal); - normal /= normal.Norml2(); -} - // Wraps a mfem::MatrixCoefficient to compute a scalar coefficient as nᵀ M n. Only works // for square matrix coefficients of size equal to the spatial dimension. class NormalProjectedCoefficient : public mfem::Coefficient @@ -1053,6 +928,120 @@ class SumMatrixCoefficient : public mfem::MatrixCoefficient } }; +// From mfem::GridFunction::GetVectorValue. +inline mfem::IntegrationPoint +BdrGridFunctionCoefficient::be_to_bfe(mfem::Geometry::Type geom, int o, + const mfem::IntegrationPoint &ip) +{ + mfem::IntegrationPoint fip = {}; + if (geom == mfem::Geometry::TRIANGLE) + { + if (o == 2) + { + fip.x = 1.0 - ip.x - ip.y; + fip.y = ip.x; + } + else if (o == 4) + { + fip.x = ip.y; + fip.y = 1.0 - ip.x - ip.y; + } + else + { + fip.x = ip.x; + fip.y = ip.y; + } + fip.z = ip.z; + } + else + { + if (o == 2) + { + fip.x = ip.y; + fip.y = 1.0 - ip.x; + } + else if (o == 4) + { + fip.x = 1.0 - ip.x; + fip.y = 1.0 - ip.y; + } + else if (o == 6) + { + fip.x = 1.0 - ip.y; + fip.y = ip.x; + } + else + { + fip.x = ip.x; + fip.y = ip.y; + } + fip.z = ip.z; + } + fip.weight = ip.weight; + fip.index = ip.index; + return fip; +} + +inline void BdrGridFunctionCoefficient::GetElementTransformations( + mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip, + mfem::ElementTransformation *&T1, mfem::ElementTransformation *&T2, mfem::Vector *C1) +{ + // Return transformations for elements attached to boundary element T. T1 always exists + // but T2 may not if the element is truly a single-sided boundary. + MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::BDR_ELEMENT, + "Unexpected element type in BdrGridFunctionCoefficient!"); + MFEM_ASSERT(&mesh == T.mesh, "Invalid mesh for BdrGridFunctionCoefficient!"); + int i, o; + int iel1, iel2, info1, info2; + mesh.GetBdrElementFace(T.ElementNo, &i, &o); + mesh.GetFaceElements(i, &iel1, &iel2); + mesh.GetFaceInfos(i, &info1, &info2); // XX TODO: Nonconforming support + + mfem::FaceElementTransformations *FET; + if (info2 >= 0 && iel2 < 0) + { + // Face is shared with another subdomain. + const int &ishared = local_to_shared.at(i); + FET = mesh.GetSharedFaceTransformations(ishared); + } + else + { + // Face is either internal to the subdomain, or a true one-sided boundary. + FET = mesh.GetFaceElementTransformations(i); + } + + // Boundary elements and boundary faces may have different orientations so adjust the + // integration point if necessary. See mfem::GridFunction::GetValue and GetVectorValue. + mfem::IntegrationPoint fip = be_to_bfe(FET->GetGeometryType(), o, ip); + FET->SetAllIntPoints(&fip); + T1 = &FET->GetElement1Transformation(); + T2 = (info2 >= 0) ? &FET->GetElement2Transformation() : nullptr; + + // If desired, get vector pointing from center of boundary element into element 1 for + // orientations. + if (C1) + { + mfem::Vector CF(T.GetSpaceDim()); + mfem::ElementTransformation &TF = *mesh.GetFaceTransformation(i); + TF.Transform(mfem::Geometries.GetCenter(mesh.GetFaceGeometry(i)), CF); + + C1->SetSize(T.GetSpaceDim()); + T1->Transform(mfem::Geometries.GetCenter(T1->GetGeometryType()), *C1); + *C1 -= CF; // Points into element 1 from the face + } +} + +inline void BdrGridFunctionCoefficient::GetNormal(mfem::ElementTransformation &T, + const mfem::IntegrationPoint &ip, + mfem::Vector &normal) +{ + // Return normal vector to the boundary element at the provided integration point. + normal.SetSize(T.GetSpaceDim()); + T.SetIntPoint(&ip); + mfem::CalcOrtho(T.Jacobian(), normal); + normal /= normal.Norml2(); +} + } // namespace palace #endif // PALACE_FEM_COEFFICIENT_HPP diff --git a/palace/fem/freqdomain.hpp b/palace/fem/freqdomain.hpp deleted file mode 100644 index 7328a2d88..000000000 --- a/palace/fem/freqdomain.hpp +++ /dev/null @@ -1,74 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_FEM_FREQ_DOMAIN_HPP -#define PALACE_FEM_FREQ_DOMAIN_HPP - -#include -#include "fem/operator.hpp" -#include "linalg/petsc.hpp" - -namespace palace::utils -{ - -// -// Some utility methods for frequency domain problems. -// - -// Convinience method for constructing a the frequency domain matrix-vector product with the -// operator K + iω C - ω² M + A2(ω). -inline std::unique_ptr GetSystemMatrixShell( - double omega, const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &M, - const petsc::PetscParMatrix *C = nullptr, const petsc::PetscParMatrix *A2 = nullptr) -{ - constexpr auto ExtractReal = petsc::PetscParMatrix::ExtractStructure::REAL; - constexpr auto ExtractImag = petsc::PetscParMatrix::ExtractStructure::IMAGINARY; - auto Ar = std::make_unique(K.GetNumRows(), K.GetNumCols()); - auto Ai = std::make_unique(K.GetNumRows(), K.GetNumCols()); - if (K.HasReal()) - { - Ar->AddOperator(*K.GetOperator(ExtractReal)); - } - if (K.HasImag()) - { - Ai->AddOperator(*K.GetOperator(ExtractImag)); - } - if (M.HasReal()) - { - Ar->AddOperator(*M.GetOperator(ExtractReal), -omega * omega); - } - if (M.HasImag()) - { - Ai->AddOperator(*M.GetOperator(ExtractImag), -omega * omega); - } - if (C) - { - if (C->HasReal()) - { - Ai->AddOperator(*C->GetOperator(ExtractReal), omega); - } - if (C->HasImag()) - { - Ar->AddOperator(*C->GetOperator(ExtractImag), -omega); - } - } - if (A2) - { - if (A2->HasReal()) - { - Ar->AddOperator(*A2->GetOperator(ExtractReal)); - } - if (A2->HasImag()) - { - Ai->AddOperator(*A2->GetOperator(ExtractImag)); - } - } - auto A = - std::make_unique(K.GetComm(), std::move(Ar), std::move(Ai)); - A->SetSymmetric(); - return A; -} - -} // namespace palace::utils - -#endif // PALACE_FEM_FREQ_DOMAIN_HPP diff --git a/palace/fem/integrator.hpp b/palace/fem/integrator.hpp index 78b87c9ca..b96938d9f 100644 --- a/palace/fem/integrator.hpp +++ b/palace/fem/integrator.hpp @@ -104,7 +104,7 @@ class BoundaryLFIntegrator : public mfem::LinearFormIntegrator, fe.CalcShape(ip, shape); double val = ip.weight * Tr.Weight() * Q.Eval(Tr, ip); - add(elvect, val, shape, elvect); + elvect.Add(val, shape); } } }; diff --git a/palace/fem/interpolation.hpp b/palace/fem/interpolator.hpp similarity index 97% rename from palace/fem/interpolation.hpp rename to palace/fem/interpolator.hpp index f0a9f7fbb..e3d736950 100644 --- a/palace/fem/interpolation.hpp +++ b/palace/fem/interpolator.hpp @@ -1,8 +1,8 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 -#ifndef PALACE_FEM_INTERPOLATION_HPP -#define PALACE_FEM_INTERPOLATION_HPP +#ifndef PALACE_FEM_INTERPOLATOR_HPP +#define PALACE_FEM_INTERPOLATOR_HPP #include #include @@ -126,4 +126,4 @@ class InterpolationOperator } // namespace palace -#endif // PALACE_FEM_INTERPOLATION_HPP +#endif // PALACE_FEM_INTERPOLATOR_HPP diff --git a/palace/fem/lumpedelement.hpp b/palace/fem/lumpedelement.hpp index 4e98a6e81..188406809 100644 --- a/palace/fem/lumpedelement.hpp +++ b/palace/fem/lumpedelement.hpp @@ -8,6 +8,7 @@ #include #include #include "fem/integrator.hpp" +#include "utils/communication.hpp" #include "utils/geodata.hpp" namespace palace @@ -28,14 +29,17 @@ class LumpedElementData double GetArea(mfem::ParFiniteElementSpace &fespace) { - mfem::ParGridFunction ones(&fespace); - ones.mfem::Vector::operator=(1.0); - mfem::ParLinearForm s(&fespace); mfem::ConstantCoefficient one_func(1.0); + mfem::LinearForm s(&fespace); s.AddBoundaryIntegrator(new BoundaryLFIntegrator(one_func), attr_marker); - s.UseFastAssembly(true); + s.UseFastAssembly(false); s.Assemble(); - return s(ones); + + mfem::GridFunction ones(&fespace); + ones = 1.0; + double dot = s * ones; + Mpi::GlobalSum(1, &dot, fespace.GetComm()); + return dot; } public: diff --git a/palace/fem/multigrid.hpp b/palace/fem/multigrid.hpp index fa361a819..78edd63c8 100644 --- a/palace/fem/multigrid.hpp +++ b/palace/fem/multigrid.hpp @@ -7,7 +7,8 @@ #include #include #include -#include "fem/operator.hpp" +#include "linalg/operator.hpp" +#include "linalg/rap.hpp" namespace palace::utils { @@ -66,33 +67,53 @@ std::vector> ConstructFECollections(bool pc_pmg, b return fecs; } -// Construct a heirarchy of finite element spaces given a sequence of meshes and +// Construct a hierarchy of finite element spaces given a sequence of meshes and // finite element collections. Dirichlet boundary conditions are additionally // marked. template mfem::ParFiniteElementSpaceHierarchy ConstructFiniteElementSpaceHierarchy( const std::vector> &mesh, const std::vector> &fecs, - const mfem::Array &dbc_marker) + const mfem::Array *dbc_marker = nullptr, + std::vector> *dbc_tdof_lists = nullptr) { - MFEM_VERIFY(!mesh.empty() && !fecs.empty(), + MFEM_VERIFY(!mesh.empty() && !fecs.empty() && + (!dbc_tdof_lists || dbc_tdof_lists->empty()), "Empty mesh or FE collection for FE space construction!"); auto *fespace = new mfem::ParFiniteElementSpace(mesh[0].get(), fecs[0].get()); + if (dbc_marker && dbc_tdof_lists) + { + fespace->GetEssentialTrueDofs(*dbc_marker, dbc_tdof_lists->emplace_back()); + } mfem::ParFiniteElementSpaceHierarchy fespaces(mesh[0].get(), fespace, false, true); + + // XX TODO: LibCEED transfer operators! + // h-refinement for (std::size_t l = 1; l < mesh.size(); l++) { fespace = new mfem::ParFiniteElementSpace(mesh[l].get(), fecs[0].get()); - auto *P = - new ZeroWrapTransferOperator(fespaces.GetFinestFESpace(), *fespace, dbc_marker); + if (dbc_marker && dbc_tdof_lists) + { + fespace->GetEssentialTrueDofs(*dbc_marker, dbc_tdof_lists->emplace_back()); + } + auto *P = new ParOperator( + std::make_unique(fespaces.GetFinestFESpace(), *fespace), + fespaces.GetFinestFESpace(), *fespace, true); fespaces.AddLevel(mesh[l].get(), fespace, P, false, true, true); } + // p-refinement for (std::size_t l = 1; l < fecs.size(); l++) { fespace = new mfem::ParFiniteElementSpace(mesh.back().get(), fecs[l].get()); - auto *P = - new ZeroWrapTransferOperator(fespaces.GetFinestFESpace(), *fespace, dbc_marker); + if (dbc_marker && dbc_tdof_lists) + { + fespace->GetEssentialTrueDofs(*dbc_marker, dbc_tdof_lists->emplace_back()); + } + auto *P = new ParOperator( + std::make_unique(fespaces.GetFinestFESpace(), *fespace), + fespaces.GetFinestFESpace(), *fespace, true); fespaces.AddLevel(mesh.back().get(), fespace, P, false, true, true); } return fespaces; @@ -103,12 +124,18 @@ mfem::ParFiniteElementSpaceHierarchy ConstructFiniteElementSpaceHierarchy( // conditions as they need not be incorporated in any inter-space projectors. template mfem::ParFiniteElementSpaceHierarchy -ConstructFiniteElementSpaceHierarchy(mfem::ParMesh &mesh, const FECollection &fec) +ConstructFiniteElementSpaceHierarchy(mfem::ParMesh &mesh, const FECollection &fec, + const mfem::Array *dbc_marker = nullptr, + mfem::Array *dbc_tdof_list = nullptr) { auto *fespace = new mfem::ParFiniteElementSpace(&mesh, &fec); + if (dbc_marker && dbc_tdof_list) + { + fespace->GetEssentialTrueDofs(*dbc_marker, *dbc_tdof_list); + } return mfem::ParFiniteElementSpaceHierarchy(&mesh, fespace, false, true); } } // namespace palace::utils -#endif // PALACE_FEM_MULTIGRID_HPP \ No newline at end of file +#endif // PALACE_FEM_MULTIGRID_HPP diff --git a/palace/fem/operator.hpp b/palace/fem/operator.hpp deleted file mode 100644 index 589def462..000000000 --- a/palace/fem/operator.hpp +++ /dev/null @@ -1,105 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_FEM_OPERATOR_HPP -#define PALACE_FEM_OPERATOR_HPP - -#include -#include -#include -#include - -namespace palace -{ - -// -// Derived operator classes extending those already in MFEM. -// - -// Wraps a transfer operator between finite element spaces to account for eliminated -// essential BC. -class ZeroWrapTransferOperator : public mfem::Operator -{ -private: - const mfem::TrueTransferOperator P; - mfem::Array coarse_dbc_tdof_list; - -public: - ZeroWrapTransferOperator(mfem::ParFiniteElementSpace &coarse_fespace, - mfem::ParFiniteElementSpace &fine_fespace, - const mfem::Array &dbc_marker) - : P(coarse_fespace, fine_fespace) - { - height = P.Height(); - width = P.Width(); - coarse_fespace.GetEssentialTrueDofs(dbc_marker, coarse_dbc_tdof_list); - } - - void Mult(const mfem::Vector &x, mfem::Vector &y) const override { P.Mult(x, y); } - - void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override - { - P.MultTranspose(x, y); - y.SetSubVector(coarse_dbc_tdof_list, 0.0); - } -}; - -// Wraps a reference to an existing, externally owned operator. -class ReferenceOperator : public mfem::Operator -{ -private: - const mfem::Operator &op; - -public: - ReferenceOperator(const mfem::Operator &oper) - : mfem::Operator(oper.Height(), oper.Width()), op(oper) - { - } - - void Mult(const mfem::Vector &x, mfem::Vector &y) const override { op.Mult(x, y); } - - void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override - { - op.MultTranspose(x, y); - } -}; - -// Wrap a sequence of operators of the same dimensions and optional coefficients. -class SumOperator : public mfem::Operator -{ -private: - std::vector, double>> op; - -public: - SumOperator(int s) : mfem::Operator(s) {} - SumOperator(int h, int w) : mfem::Operator(h, w) {} - - void AddOperator(const mfem::Operator &oper, double c = 1.0) - { - MFEM_VERIFY(oper.Height() == height && oper.Width() == width, - "Invalid Operator dimensions for SumOperator!"); - op.emplace_back(std::cref(oper), c); - } - - void Mult(const mfem::Vector &x, mfem::Vector &y) const override - { - y = 0.0; - for (const auto &[oper, c] : op) - { - oper.get().AddMult(x, y, c); - } - } - - void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override - { - y = 0.0; - for (const auto &[oper, c] : op) - { - oper.get().AddMultTranspose(x, y, c); - } - } -}; - -} // namespace palace - -#endif // PALACE_FEM_OPERATOR_HPP diff --git a/palace/linalg/CMakeLists.txt b/palace/linalg/CMakeLists.txt index e2f45b969..8a4647129 100644 --- a/palace/linalg/CMakeLists.txt +++ b/palace/linalg/CMakeLists.txt @@ -14,14 +14,16 @@ target_sources(${TARGET_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/curlcurl.cpp ${CMAKE_CURRENT_SOURCE_DIR}/distrelaxation.cpp ${CMAKE_CURRENT_SOURCE_DIR}/divfree.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/feast.cpp ${CMAKE_CURRENT_SOURCE_DIR}/gmg.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/hypre.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/jacobi.cpp ${CMAKE_CURRENT_SOURCE_DIR}/ksp.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/iterative.cpp ${CMAKE_CURRENT_SOURCE_DIR}/mumps.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/pc.cpp - ${CMAKE_CURRENT_SOURCE_DIR}/petsc.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/operator.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/rap.cpp ${CMAKE_CURRENT_SOURCE_DIR}/slepc.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/solver.cpp ${CMAKE_CURRENT_SOURCE_DIR}/strumpack.cpp ${CMAKE_CURRENT_SOURCE_DIR}/superlu.cpp + ${CMAKE_CURRENT_SOURCE_DIR}/vector.cpp ) diff --git a/palace/linalg/amg.cpp b/palace/linalg/amg.cpp index 9d5facd89..f34ef25b7 100644 --- a/palace/linalg/amg.cpp +++ b/palace/linalg/amg.cpp @@ -3,6 +3,8 @@ #include "amg.hpp" +#include "linalg/rap.hpp" + namespace palace { @@ -12,26 +14,30 @@ BoomerAmgSolver::BoomerAmgSolver(int cycle_it, int smooth_it, int print) SetPrintLevel((print > 1) ? print - 1 : 0); SetMaxIter(cycle_it); SetTol(0.0); - SetNumSweeps(smooth_it); - Init(); -} -void BoomerAmgSolver::Init() -{ + // Set additional BoomerAMG options. double theta = 0.5; // AMG strength parameter = 0.25 is 2D optimal (0.5-0.8 for 3D) int agg_levels = 1; // Number of aggressive coarsening levels + SetStrengthThresh(theta); SetAggressiveCoarsening(agg_levels); -} + HYPRE_BoomerAMGSetNumSweeps(*this, smooth_it); -void BoomerAmgSolver::SetNumSweeps(int relax_sweeps) -{ - HYPRE_BoomerAMGSetNumSweeps(*this, relax_sweeps); + // int coarse_relax_type = 8; // l1-symm. GS (inexact coarse solve) + // HYPRE_BoomerAMGSetCycleRelaxType(*this, coarse_relax_type, 3); } -void BoomerAmgSolver::SetCoarseRelaxType(int relax_type) +void BoomerAmgSolver::SetOperator(const Operator &op) { - HYPRE_BoomerAMGSetCycleRelaxType(*this, relax_type, 3); + const auto *PtAP = dynamic_cast(&op); + if (PtAP) + { + mfem::HypreBoomerAMG::SetOperator(PtAP->ParallelAssemble()); + } + else + { + mfem::HypreBoomerAMG::SetOperator(op); + } } } // namespace palace diff --git a/palace/linalg/amg.hpp b/palace/linalg/amg.hpp index 30c7f8c36..975930fb4 100644 --- a/palace/linalg/amg.hpp +++ b/palace/linalg/amg.hpp @@ -5,6 +5,7 @@ #define PALACE_LINALG_AMG_HPP #include +#include "linalg/operator.hpp" #include "utils/iodata.hpp" namespace palace @@ -15,25 +16,15 @@ namespace palace // class BoomerAmgSolver : public mfem::HypreBoomerAMG { -private: - // Helper function for setting common settings. - void Init(); - public: BoomerAmgSolver(int cycle_it = 1, int smooth_it = 1, int print = 0); BoomerAmgSolver(const IoData &iodata, int print) - : BoomerAmgSolver((iodata.solver.linear.mat_gmg) ? 1 : iodata.solver.linear.mg_cycle_it, + : BoomerAmgSolver(iodata.solver.linear.pc_mg ? 1 : iodata.solver.linear.mg_cycle_it, iodata.solver.linear.mg_smooth_it, print) { } - // Set the number of smoothing iterations to be performed at each level. - void SetNumSweeps(int relax_sweeps); - - // Set the relaxation type on the coarsest level. Useful for specifying to not use a - // direct solve when the coarse matrix may be singular(relax_type = 8 is the AMS - // default). - void SetCoarseRelaxType(int relax_type); + void SetOperator(const Operator &op) override; }; } // namespace palace diff --git a/palace/linalg/ams.cpp b/palace/linalg/ams.cpp index 400abf987..1feb6a34b 100644 --- a/palace/linalg/ams.cpp +++ b/palace/linalg/ams.cpp @@ -3,39 +3,40 @@ #include "ams.hpp" -#include "linalg/hypre.hpp" +#include "linalg/rap.hpp" namespace palace { HypreAmsSolver::HypreAmsSolver(mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace *h1_fespace, int cycle_it, + mfem::ParFiniteElementSpace &h1_fespace, int cycle_it, int smooth_it, int agg_coarsen, bool vector_interp, - bool op_singular, int print_lvl) + bool op_singular, int print) : mfem::HypreSolver(), // From the Hypre docs for AMS: cycles 1, 5, 8, 11, 13 are fastest, 7 yields fewest its // (MFEM default is 13). 14 is similar to 11/13 but is cheaper in that is uses additive // scalar Pi-space corrections. cycle_type(vector_interp ? 1 : 14), - // Control levels of aggressive coarsening based on problem type: SPD/ semi-definite - // curl-curl operators are easier than indefinite frequency domain problems. When used - // as the coarse solver of geometric multigrid, always do only a single V-cycle. - sdim(nd_fespace.GetParMesh()->SpaceDimension()), ams_it(cycle_it), - ams_smooth_it(smooth_it), + space_dim(nd_fespace.GetParMesh()->SpaceDimension()), + // When used as the coarse solver of geometric multigrid, always do only a single + // V-cycle. + ams_it(cycle_it), ams_smooth_it(smooth_it), // Use no aggressive coarsening for frequency domain problems when the preconditioner // matrix is not SPD. - agg_levels(agg_coarsen), ams_singular(op_singular), - print((print_lvl > 1) ? print_lvl - 1 : 0) + amg_agg_levels(agg_coarsen), + // If we know the operator is singular (no mass matrix, for magnetostatic problems), + // internally the AMS solver will avoid G-space corrections. + ams_singular(op_singular), print((print > 1) ? print - 1 : 0) { // From MFEM: The AMS preconditioner may sometimes require inverting singular matrices - // with BoomerAMG, which are handled correctly in hypre's Solve method, but can produce - // hypre errors in the Setup (specifically in the l1 row norm computation). See the + // with BoomerAMG, which are handled correctly in Hypre's Solve method, but can produce + // Hypre errors in the Setup (specifically in the row l1-norm computation). See the // documentation of MFEM's SetErrorMode() for more details. error_mode = IGNORE_HYPRE_ERRORS; // Set up the AMS solver. - Initialize(); ConstructAuxiliaryMatrices(nd_fespace, h1_fespace); + InitializeSolver(); } HypreAmsSolver::~HypreAmsSolver() @@ -43,13 +44,123 @@ HypreAmsSolver::~HypreAmsSolver() HYPRE_AMSDestroy(ams); } -void HypreAmsSolver::Initialize() +void HypreAmsSolver::ConstructAuxiliaryMatrices(mfem::ParFiniteElementSpace &nd_fespace, + mfem::ParFiniteElementSpace &h1_fespace) +{ + // Set up the auxiliary space objects for the preconditioner. Mostly the same as MFEM's + // HypreAMS:Init. Start with the discrete gradient matrix. + { + // XX TODO: Partial assembly option? + auto grad = std::make_unique(&h1_fespace, &nd_fespace); + grad->AddDomainInterpolator(new mfem::GradientInterpolator); + grad->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + grad->Assemble(); + grad->Finalize(); + ParOperator RAP_G(std::move(grad), h1_fespace, nd_fespace, true); + G = RAP_G.StealParallelAssemble(); + } + + // Vertex coordinates for the lowest order case, or Nedelec interpolation matrix or + // matrices for order > 1. + mfem::ParMesh &mesh = *h1_fespace.GetParMesh(); + if (h1_fespace.GetMaxElementOrder() == 1) + { + mfem::ParGridFunction x_coord(&h1_fespace), y_coord(&h1_fespace), z_coord(&h1_fespace); + if (mesh.GetNodes()) + { + mesh.GetNodes()->GetNodalValues(x_coord, 1); + MFEM_VERIFY(x_coord.Size() == h1_fespace.GetVSize(), + "Unexpected size for vertex coordinates in AMS setup!"); + if (space_dim > 1) + { + mesh.GetNodes()->GetNodalValues(y_coord, 2); + } + if (space_dim > 2) + { + mesh.GetNodes()->GetNodalValues(z_coord, 3); + } + } + else + { + MFEM_VERIFY(x_coord.Size() == mesh.GetNV(), + "Unexpected size for vertex coordinates in AMS setup!"); + for (int i = 0; i < mesh.GetNV(); i++) + { + x_coord(i) = mesh.GetVertex(i)[0]; + if (space_dim > 1) + { + y_coord(i) = mesh.GetVertex(i)[1]; + } + if (space_dim > 2) + { + z_coord(i) = mesh.GetVertex(i)[2]; + } + } + } + x.reset(x_coord.ParallelProject()); + x->HypreReadWrite(); + if (space_dim > 1) + { + y.reset(y_coord.ParallelProject()); + y->HypreReadWrite(); + } + if (space_dim > 2) + { + z.reset(z_coord.ParallelProject()); + z->HypreReadWrite(); + } + } + else + { + { + // XX TODO: Partial assembly option? + mfem::ParFiniteElementSpace h1d_fespace(&mesh, h1_fespace.FEColl(), space_dim, + mfem::Ordering::byVDIM); + auto pi = std::make_unique(&h1d_fespace, &nd_fespace); + pi->AddDomainInterpolator(new mfem::IdentityInterpolator); + pi->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + pi->Assemble(); + pi->Finalize(); + ParOperator RAP_Pi(std::move(pi), h1d_fespace, nd_fespace, true); + Pi = RAP_Pi.StealParallelAssemble(); + } + if (cycle_type >= 10) + { + // Get blocks of Pi corresponding to each component, and free Pi. + mfem::Array2D Pi_blocks(1, space_dim); + Pi->GetBlocks(Pi_blocks, false, true); + Pix.reset(Pi_blocks(0, 0)); + if (space_dim > 1) + { + Piy.reset(Pi_blocks(0, 1)); + } + if (space_dim > 2) + { + Piz.reset(Pi_blocks(0, 2)); + } + Pi.reset(); + } + } +} + +void HypreAmsSolver::InitializeSolver() { // Create the Hypre solver object. HYPRE_AMSCreate(&ams); - HYPRE_AMSSetDimension(ams, sdim); + HYPRE_AMSSetDimension(ams, space_dim); HYPRE_AMSSetCycleType(ams, cycle_type); + // Control printing and number of iterations for use as a preconditioner. + HYPRE_AMSSetPrintLevel(ams, print); + HYPRE_AMSSetMaxIter(ams, ams_it); + // HYPRE_AMSSetTol(ams, 1.0e-16); // Avoid issues with zero RHS + + // Set this option when solving a curl-curl problem with zero mass term. + if (ams_singular) + { + HYPRE_AMSSetBetaPoissonMatrix(ams, nullptr); + } + // Set additional AMS options. int coarsen_type = 10; // 10 = HMIS, 8 = PMIS, 6 = Falgout, 0 = CLJP double theta = 0.5; // AMG strength parameter = 0.25 is 2D optimal (0.5-0.8 for 3D) @@ -57,108 +168,61 @@ void HypreAmsSolver::Initialize() // 18 = l1-Jacobi, 16 = Chebyshev int interp_type = 6; // 6 = Extended+i, 0 = Classical, 13 = FF1 int Pmax = 4; // Interpolation width - int relax_type = 2; // 2 = l1-SSOR, 4 = trunc. l1-SSOR, - // 1 = l1-Jacobi, 16 = Chebyshev - int relax_sweeps = ams_smooth_it; + int relax_type = 2; // 2 = l1-SSOR, 4 = trunc. l1-SSOR, 1 = l1-Jacobi, 16 = Chebyshev double weight = 1.0; double omega = 1.0; - HYPRE_AMSSetSmoothingOptions(ams, relax_type, relax_sweeps, weight, omega); - HYPRE_AMSSetAlphaAMGOptions(ams, coarsen_type, agg_levels, amg_relax_type, theta, + HYPRE_AMSSetSmoothingOptions(ams, relax_type, ams_smooth_it, weight, omega); + HYPRE_AMSSetAlphaAMGOptions(ams, coarsen_type, amg_agg_levels, amg_relax_type, theta, interp_type, Pmax); - HYPRE_AMSSetBetaAMGOptions(ams, coarsen_type, agg_levels, amg_relax_type, theta, + HYPRE_AMSSetBetaAMGOptions(ams, coarsen_type, amg_agg_levels, amg_relax_type, theta, interp_type, Pmax); // int coarse_relax_type = 8; // Default, l1-symm. GS // HYPRE_AMSSetAlphaAMGCoarseRelaxType(ams, coarse_relax_type); // HYPRE_AMSSetBetaAMGCoarseRelaxType(ams, coarse_relax_type); - // Control printing and number of iterations for use as a preconditioner. - HYPRE_AMSSetPrintLevel(ams, print); - HYPRE_AMSSetMaxIter(ams, ams_it); - HYPRE_AMSSetTol(ams, 0.0); - // HYPRE_AMSSetTol(ams, 1.0e-16); // Avoid issues with zero RHS + // Set the discrete gradient matrix. + HYPRE_AMSSetDiscreteGradient(ams, (HYPRE_ParCSRMatrix)*G); - // Set this option when solving a curl-curl problem with zero mass term. - if (ams_singular) - { - HYPRE_AMSSetBetaPoissonMatrix(ams, nullptr); - } -} + // Set the mesh vertex coordinates or Nedelec interpolation matrix or matrices. + HYPRE_ParVector HY_X = (x) ? (HYPRE_ParVector)*x : nullptr; + HYPRE_ParVector HY_Y = (y) ? (HYPRE_ParVector)*y : nullptr; + HYPRE_ParVector HY_Z = (z) ? (HYPRE_ParVector)*z : nullptr; + HYPRE_AMSSetCoordinateVectors(ams, HY_X, HY_Y, HY_Z); -void HypreAmsSolver::ConstructAuxiliaryMatrices(mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace *h1_fespace) -{ - // Set up the auxiliary spaces for the preconditioner. Mostly the same as MFEM's - // HypreAMS:Init. - mfem::FiniteElementCollection *h1_fec = nullptr; - if (!h1_fespace) - { - h1_fec = new mfem::H1_FECollection(nd_fespace.GetMaxElementOrder(), - nd_fespace.GetParMesh()->Dimension()); - h1_fespace = new mfem::ParFiniteElementSpace(nd_fespace.GetParMesh(), h1_fec); - } - { - mfem::ParDiscreteLinearOperator grad(h1_fespace, &nd_fespace); - grad.AddDomainInterpolator(new mfem::GradientInterpolator); - // grad.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - grad.Assemble(); - grad.Finalize(); - G.reset(grad.ParallelAssemble()); - } - { - mfem::ParFiniteElementSpace h1d_fespace(h1_fespace->GetParMesh(), h1_fespace->FEColl(), - sdim, mfem::Ordering::byVDIM); - mfem::ParDiscreteLinearOperator id_ND(&h1d_fespace, &nd_fespace); - id_ND.AddDomainInterpolator(new mfem::IdentityInterpolator); - // id_ND.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - id_ND.Assemble(); - id_ND.Finalize(); - if (cycle_type < 10) - { - Pi.reset(id_ND.ParallelAssemble()); - } - else - { - mfem::Array2D Pi_blocks; - id_ND.GetParBlocks(Pi_blocks); - Pix.reset(Pi_blocks(0, 0)); - if (sdim >= 2) - { - Piy.reset(Pi_blocks(0, 1)); - } - if (sdim == 3) - { - Piz.reset(Pi_blocks(0, 2)); - } - } - } - if (h1_fec) - { - delete h1_fespace; - delete h1_fec; - } + HYPRE_ParCSRMatrix HY_Pi = (Pi) ? (HYPRE_ParCSRMatrix)*Pi : nullptr; + HYPRE_ParCSRMatrix HY_Pix = (Pix) ? (HYPRE_ParCSRMatrix)*Pix : nullptr; + HYPRE_ParCSRMatrix HY_Piy = (Piy) ? (HYPRE_ParCSRMatrix)*Piy : nullptr; + HYPRE_ParCSRMatrix HY_Piz = (Piz) ? (HYPRE_ParCSRMatrix)*Piz : nullptr; + HYPRE_AMSSetInterpolations(ams, HY_Pi, HY_Pix, HY_Piy, HY_Piz); } -void HypreAmsSolver::SetOperator(const mfem::Operator &op) +void HypreAmsSolver::SetOperator(const Operator &op) { // When the operator changes, we need to rebuild the AMS solver but can use the unchanged // auxiliary space matrices. - bool first = true; - if (setup_called) + if (A) { HYPRE_AMSDestroy(ams); - Initialize(); - first = false; + InitializeSolver(); } - setup_called = 0; + + const auto *PtAP = dynamic_cast(&op); + if (PtAP) + { + A = &PtAP->ParallelAssemble(); + } + else + { + A = dynamic_cast(const_cast(&op)); + } + MFEM_VERIFY(A, "HypreAmsSolver requires a HypreParMatrix operator!"); + height = A->Height(); + width = A->Width(); // From mfem::HypreAMS: Update HypreSolver base class. - height = op.Height(); - width = op.Width(); - auto *new_A = dynamic_cast(&op); - MFEM_VERIFY(new_A, "AMS solver requires a HypreParMatrix operator!"); - A = const_cast(new_A); + setup_called = 0; delete X; delete B; B = X = nullptr; @@ -166,51 +230,6 @@ void HypreAmsSolver::SetOperator(const mfem::Operator &op) auxB.Reset(); auxX.Delete(); auxX.Reset(); - - // Eliminate Dirichlet BCs in G, Pi matrices before RAP. - if (first) - { - constexpr double tol = 1.0e-9; - mfem::Array dbc_rows; - mfem::Vector diag(A->Height()), rowsums(A->Height()); - A->AssembleDiagonal(diag); - hypre::hypreParCSRRowSums(*A, rowsums); - for (int i = 0; i < A->Height(); i++) - { - if (std::abs(rowsums(i) - diag(i)) < tol * std::abs(diag(i))) - { - dbc_rows.Append(i); - } - } - if (G) - { - G->EliminateRows(dbc_rows); - } - if (Pi) - { - Pi->EliminateRows(dbc_rows); - } - if (Pix) - { - Pix->EliminateRows(dbc_rows); - } - if (Piy) - { - Piy->EliminateRows(dbc_rows); - } - if (Piz) - { - Piz->EliminateRows(dbc_rows); - } - } - - // Set the discrete gradient and Nedelec interpolation matrices. - HYPRE_ParCSRMatrix HY_Pi = (Pi) ? (HYPRE_ParCSRMatrix)*Pi : nullptr; - HYPRE_ParCSRMatrix HY_Pix = (Pix) ? (HYPRE_ParCSRMatrix)*Pix : nullptr; - HYPRE_ParCSRMatrix HY_Piy = (Piy) ? (HYPRE_ParCSRMatrix)*Piy : nullptr; - HYPRE_ParCSRMatrix HY_Piz = (Piz) ? (HYPRE_ParCSRMatrix)*Piz : nullptr; - HYPRE_AMSSetDiscreteGradient(ams, *G); - HYPRE_AMSSetInterpolations(ams, HY_Pi, HY_Pix, HY_Piy, HY_Piz); } } // namespace palace diff --git a/palace/linalg/ams.hpp b/palace/linalg/ams.hpp index 69a276c43..8e9810b69 100644 --- a/palace/linalg/ams.hpp +++ b/palace/linalg/ams.hpp @@ -4,7 +4,9 @@ #ifndef PALACE_LINALG_AMS_HPP #define PALACE_LINALG_AMS_HPP +#include #include +#include "linalg/operator.hpp" #include "utils/iodata.hpp" namespace palace @@ -19,34 +21,38 @@ class HypreAmsSolver : public mfem::HypreSolver // The Hypre solver object. HYPRE_Solver ams; - // Discrete gradient matrix. - std::unique_ptr G; - - // Nedelec interpolation matrix and its components (used even for p = 1). - std::unique_ptr Pi, Pix, Piy, Piz; - // Parameters used for preconditioner construction. - const int cycle_type, sdim, ams_it, ams_smooth_it, agg_levels; + const int cycle_type, space_dim, ams_it, ams_smooth_it, amg_agg_levels; const bool ams_singular; // Control print level for debugging. const int print; - // Helper functions to construct the AMS solver and required auxiliary space matrices. - void Initialize(); + // Discrete gradient matrix. + std::unique_ptr G; + + // Nedelec interpolation matrix and its components, or, for p = 1, the mesh vertex + // coordinates. + std::unique_ptr Pi, Pix, Piy, Piz; + std::unique_ptr x, y, z; + + // Helper function to set up the auxiliary objects required by the AMS solver. void ConstructAuxiliaryMatrices(mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace *h1_fespace = nullptr); + mfem::ParFiniteElementSpace &h1_fespace); + + // Helper function to construct and configure the AMS solver. + void InitializeSolver(); public: // Constructor requires the ND space, but will construct the H1 and (H1)ᵈ spaces // internally as needed. HypreAmsSolver(mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace *h1_fespace, int cycle_it, int smooth_it, - int agg_coarsen, bool vector_interp, bool op_singular, int print_lvl); + mfem::ParFiniteElementSpace &h1_fespace, int cycle_it, int smooth_it, + int agg_coarsen, bool vector_interp, bool op_singular, int print); HypreAmsSolver(const IoData &iodata, mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace *h1_fespace, int print_lvl) + mfem::ParFiniteElementSpace &h1_fespace, int print) : HypreAmsSolver(nd_fespace, h1_fespace, - iodata.solver.linear.mat_gmg ? 1 : iodata.solver.linear.mg_cycle_it, + iodata.solver.linear.pc_mg ? 1 : iodata.solver.linear.mg_cycle_it, iodata.solver.linear.mg_smooth_it, (iodata.problem.type == config::ProblemData::Type::TRANSIENT || iodata.problem.type == config::ProblemData::Type::MAGNETOSTATIC) @@ -54,15 +60,13 @@ class HypreAmsSolver : public mfem::HypreSolver : 0, iodata.solver.linear.ams_vector, (iodata.problem.type == config::ProblemData::Type::MAGNETOSTATIC), - print_lvl) + print) { } ~HypreAmsSolver() override; - // Sets matrix associated with the AMS solver. - void SetOperator(const mfem::Operator &op) override; + void SetOperator(const Operator &op) override; - // The typecast to HYPRE_Solver returns the internal ams object. operator HYPRE_Solver() const override { return ams; } HYPRE_PtrToParSolverFcn SetupFcn() const override diff --git a/palace/linalg/arpack.cpp b/palace/linalg/arpack.cpp index 2297adb86..d6d61a0c6 100644 --- a/palace/linalg/arpack.cpp +++ b/palace/linalg/arpack.cpp @@ -19,122 +19,280 @@ #include // clang-format on #include "linalg/divfree.hpp" -#include "linalg/ksp.hpp" -#include "linalg/petsc.hpp" #include "utils/communication.hpp" +namespace +{ + +void CheckInfoAUPD(a_int info) +{ + if (info != 0) + { + std::string msg = "ARPACK pznaupd error: "; + switch (info) + { + case 1: + msg += "Maximum number of iterations taken, all possible eigenvalues " + "have been found!"; + break; + case 2: + msg += "No longer an informational error (deprecated starting with " + "release 2 of ARPACK)!"; + break; + case 3: + msg += "No shifts could be applied during a cycle of the Implicitly " + "restarted Arnoldi iteration!"; + break; + case -1: + msg += "N must be positive!"; + break; + case -2: + msg += "NEV must be positive!"; + break; + case -3: + msg += "NCV-NEV >= 2 and less than or equal to N!"; + break; + case -4: + msg += "The maximum number of Arnoldi update iterations allowed must " + "be greater than zero!"; + break; + case -5: + msg += "WHICH must be one of 'LM', 'SM', 'LR', 'SR', 'LI', 'SI'"; + break; + case -6: + msg += "BMAT must be one of 'I' or 'G'!"; + break; + case -7: + msg += "Length of private work array WORKL is not sufficient!"; + break; + case -8: + msg += "Error return from LAPACK eigenvalue calculation!"; + break; + case -9: + msg += "Starting vector is zero!"; + break; + case -10: + msg += "IPARAM(7) must be 1, 2, or 3!"; + break; + case -11: + msg += "IPARAM(7) = 1 and BMAT = 'G' are incompatible!"; + break; + case -12: + msg += "IPARAM(1) must be equal to 0 or 1!"; + break; + case -9999: + msg += "Could not build an Arnoldi factorization!"; + break; + default: + msg += "Unknown ARPACK error message!"; + break; + } + MFEM_ABORT(msg.c_str()); + } +} + +void CheckInfoEUPD(a_int info) +{ + if (info != 0) + { + std::string msg = "ARPACK pzneupd error: "; + switch (info) + { + case 1: + msg += "The Schur form computed by LAPACK routine csheqr could not " + "be reordered by LAPACK routine ztrsen!"; + break; + case -1: + msg += "N must be positive!"; + break; + case -2: + msg += "NEV must be positive!"; + break; + case -3: + msg += "NCV-NEV >= 2 and less than or equal to N!"; + break; + case -4: + msg += "The maximum number of Arnoldi update iterations allowed must " + "be greater than zero!"; + break; + case -5: + msg += "WHICH must be one of 'LM', 'SM', 'LR', 'SR', 'LI', 'SI'"; + break; + case -6: + msg += "BMAT must be one of 'I' or 'G'!"; + break; + case -7: + msg += "Length of private work array WORKL is not sufficient!"; + break; + case -8: + msg += "Error return from LAPACK eigenvalue calculation!"; + break; + case -9: + msg += "Error return from calculation of eigenvectors!"; + break; + case -10: + msg += "IPARAM(7) must be 1, 2, or 3!"; + break; + case -11: + msg += "IPARAM(7) = 1 and BMAT = 'G' are incompatible!"; + break; + case -12: + msg += "HOWMNY = 'S' not yet implemented!"; + break; + case -13: + msg += "HOWMNY must be one of 'A' or 'P' if RVEC = true!"; + break; + case -14: + msg += "PZNAUPD did not find any eigenvalues to sufficient accuracy!"; + break; + case -15: + msg += "ZNEUPD got a different count of the number of converged Ritz " + "values than ZNAUPD got!"; + break; + default: + msg += "Unknown ARPACK error message!"; + break; + } + MFEM_ABORT(msg.c_str()); + } +} + +} // namespace + namespace palace::arpack { // Base class methods -ArpackEigenSolver::ArpackEigenSolver(int print_lvl) +ArpackEigenvalueSolver::ArpackEigenvalueSolver(MPI_Comm comm, int print) + : comm(comm), print(print) { // Initialization. - print = print_lvl; info = 0; - nev = ncv = 0; + nev = ncv = n = 0; rtol = 0.0; - max_it = 0; - which_option = ::arpack::which::largest_magnitude; + arpack_it = 0; + which_type = WhichType::LARGEST_MAGNITUDE; + gamma = delta = 1.0; sinvert = false; sigma = 0.0; - gamma = delta = 1.0; - eig = nullptr; - perm = nullptr; - V = nullptr; - res = nullptr; - r = nullptr; opInv = nullptr; opProj = nullptr; opB = nullptr; // Configure debugging output. a_int logfill = 6, ndigit = -6, mgetv0 = 0; - a_int _aupd = (print_lvl > 2) ? 1 : 0, - _aup2 = (print_lvl > 2) ? 2 : ((print_lvl > 0) ? 1 : 0), _aitr = 0, _eigh = 0, - _gets = 0, _apps = 0, _eupd = 0; + a_int _aupd = (print > 2) ? 1 : 0, _aup2 = (print > 2) ? 2 : ((print > 0) ? 1 : 0), + _aitr = 0, _eigh = 0, _gets = 0, _apps = 0, _eupd = 0; debug_c(logfill, ndigit, mgetv0, _aupd, _aup2, _aitr, _eigh, _gets, _apps, _eupd, _aupd, _aup2, _aitr, _eigh, _gets, _apps, _eupd, _aupd, _aup2, _aitr, _eigh, _gets, _apps, _eupd); cstatn_c(); } -ArpackEigenSolver::~ArpackEigenSolver() +void ArpackEigenvalueSolver::SetOperators(const ComplexOperator &K, + const ComplexOperator &M, + EigenvalueSolver::ScaleType type) { - delete[] eig; - delete[] perm; - delete[] res; - delete V; - delete r; + MFEM_ABORT("SetOperators not defined for base class ArpackEigenvalueSolver!"); } -void ArpackEigenSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) +void ArpackEigenvalueSolver::SetOperators(const ComplexOperator &K, + const ComplexOperator &C, + const ComplexOperator &M, + EigenvalueSolver::ScaleType type) { - MFEM_ABORT("SetOperators not defined for base class ArpackEigenSolver!"); + MFEM_ABORT("SetOperators not defined for base class ArpackEigenvalueSolver!"); } -void ArpackEigenSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) -{ - MFEM_ABORT("SetOperators not defined for base class ArpackEigenSolver!"); -} - -void ArpackEigenSolver::SetLinearSolver(const KspSolver &ksp) +void ArpackEigenvalueSolver::SetLinearSolver(const ComplexKspSolver &ksp) { opInv = &ksp; } -void ArpackEigenSolver::SetProjector(const DivFreeSolver &divfree) +void ArpackEigenvalueSolver::SetDivFreeProjector(const DivFreeSolver &divfree) { opProj = &divfree; } -void ArpackEigenSolver::SetBMat(const petsc::PetscParMatrix &B) +void ArpackEigenvalueSolver::SetBMat(const Operator &B) { - MFEM_VERIFY(!opB || opB->GetNumRows() == B.GetNumRows(), + MFEM_VERIFY(!opB || opB->Height() == B.Height(), "Invalid modification of eigenvalue problem size!"); opB = &B; } -void ArpackEigenSolver::SetNumModes(int numeig, int numvec) +void ArpackEigenvalueSolver::SetNumModes(int num_eig, int num_vec) { - if (nev > 0 && numeig != nev) + if (nev > 0 && num_eig != nev) { - delete[] eig; - delete[] perm; - delete[] res; - eig = nullptr; - perm = nullptr; - res = nullptr; + eig.reset(); + perm.reset(); + res.reset(); } - if (ncv > 0 && numvec != ncv) + if (ncv > 0 && num_vec != ncv) { - delete V; - V = nullptr; + V.reset(); } - nev = numeig; - ncv = (numvec > 0) ? numvec : std::max(20, 2 * nev + 1); // Default from SLEPc + nev = num_eig; + ncv = (num_vec > 0) ? num_vec : std::max(20, 2 * nev + 1); // Default from SLEPc } -void ArpackEigenSolver::SetTol(double tol) +void ArpackEigenvalueSolver::SetTol(double tol) { rtol = tol; } -void ArpackEigenSolver::SetMaxIter(int maxits) +void ArpackEigenvalueSolver::SetMaxIter(int max_it) { - max_it = maxits; + arpack_it = max_it; } -void ArpackEigenSolver::SetWhichEigenpairs(EigenSolverBase::WhichType type) +void ArpackEigenvalueSolver::SetWhichEigenpairs(EigenvalueSolver::WhichType type) { - switch (type) + which_type = type; +} + +void ArpackEigenvalueSolver::SetShiftInvert(std::complex s, bool precond) +{ + MFEM_VERIFY(!precond, "ARPACK eigenvalue solver does not support preconditioned " + "spectral transformation option!"); + sigma = s; + sinvert = true; +} + +void ArpackEigenvalueSolver::SetInitialSpace(const ComplexVector &v) +{ + MFEM_VERIFY( + n > 0, + "Must call SetOperators before using SetInitialSpace for ARPACK eigenvalue solver!"); + if (!r) + { + r = std::make_unique[]>(n); + } + MFEM_VERIFY(v.Size() == n, "Invalid size mismatch for provided initial space vector!"); + v.Get(r.get(), n); + info = 1; +} + +int ArpackEigenvalueSolver::SolveInternal(int n, std::complex *r, + std::complex *V, + std::complex *eig, int *perm) +{ + MPI_Fint fcomm = MPI_Comm_c2f(comm); + a_int iparam[11] = {0}; + iparam[0] = 1; // Exact shifts + iparam[2] = (a_int)arpack_it; // Maximum number of Arnoldi iterations + iparam[3] = 1; // Block size + iparam[4] = 0; // Number of converged Ritz values + iparam[6] = sinvert ? 3 : 1; // Problem mode + + ::arpack::bmat bmat_option = + (opB) ? ::arpack::bmat::generalized : ::arpack::bmat::identity; + + ::arpack::which which_option = ::arpack::which::largest_magnitude; + switch (which_type) { case WhichType::LARGEST_MAGNITUDE: case WhichType::TARGET_MAGNITUDE: @@ -161,88 +319,30 @@ void ArpackEigenSolver::SetWhichEigenpairs(EigenSolverBase::WhichType type) "TARGET_IMAGINARY for SetWhichEigenpairs!"); break; } -} - -void ArpackEigenSolver::SetShiftInvert(double tr, double ti, bool precond) -{ - MFEM_VERIFY(!precond, "ARPACK eigenvalue solver does not support preconditioned " - "spectral transformation option!"); - sigma = tr + PETSC_i * ti; - sinvert = true; -} - -void ArpackEigenSolver::SetInitialSpace(const petsc::PetscParVector &v) -{ - if (!r) - { - r = new petsc::PetscParVector(v); - } - else - { - MFEM_VERIFY(v.GetSize() == r->GetSize(), - "Invalid modification of eigenvalue problem size!"); - r->Copy(v); - } - info = 1; -} - -int ArpackEigenSolver::SolveInternal(petsc::PetscParVector &r_, petsc::PetscDenseMatrix &V_, - PetscScalar *eig_, int *perm_) -{ - MPI_Comm comm; - MPI_Fint fcomm; - a_int ido, info_ = (a_int)info; - a_int iparam[11] = {0}, ipntr[14] = {0}; - a_int n, nev_, ncv_; - ::arpack::bmat bmat_option = - (opB) ? ::arpack::bmat::generalized : ::arpack::bmat::identity; - PetscScalar *workd, *workl; - double *rwork; - a_int lworkl; - - comm = r_.GetComm(); - fcomm = MPI_Comm_c2f(comm); - iparam[0] = 1; // Exact shifts - iparam[2] = (a_int)max_it; // Maximum number of Arnoldi iterations - iparam[3] = 1; // Block size - iparam[4] = 0; // Number of converged Ritz values - iparam[6] = sinvert ? 3 : 1; // Problem mode - - // Set problem sizes. The cast to int should always be safe because this is a local size. - n = (a_int)r_.GetSize(); - nev_ = (a_int)nev; - ncv_ = (a_int)ncv; // Allocate work arrays. - lworkl = 3 * ncv_ * ncv_ + 5 * ncv_; - workd = new PetscScalar[3 * n]; - workl = new PetscScalar[lworkl]; - rwork = new double[ncv_]; - - PetscScalar *pr_ = r_.GetArray(); - PetscScalar *pV_ = V_.GetArray(); - petsc::PetscParVector x(comm, n, PETSC_DECIDE, nullptr); - petsc::PetscParVector y(comm, n, PETSC_DECIDE, nullptr); + a_int lworkl = 3 * ncv * ncv + 5 * ncv; + auto workd = std::make_unique[]>(3 * n); + auto workl = std::make_unique[]>(lworkl); + auto rwork = std::make_unique(ncv); // Begin RCI loop. - ido = 0; + a_int ido = 0, ainfo = (a_int)info, ipntr[14] = {0}; while (true) { // Call complex problem driver. - naupd(fcomm, ido, bmat_option, n, which_option, nev_, rtol, pr_, ncv_, pV_, n, iparam, - ipntr, workd, workl, lworkl, rwork, info_); - CheckInfoAUPD(info_); + naupd(fcomm, ido, bmat_option, (a_int)n, which_option, (a_int)nev, rtol, r, (a_int)ncv, + V, (a_int)n, iparam, ipntr, workd.get(), workl.get(), lworkl, rwork.get(), ainfo); + CheckInfoAUPD(ainfo); - // We never use pre-computed B * x in workd[ipntr[2]-1]. - x.PlaceArray(&workd[ipntr[0] - 1]); - y.PlaceArray(&workd[ipntr[1] - 1]); + // We never use pre-computed B * x in workd[ipntr[2] - 1]. if (ido == 1 || ido == -1) { - ApplyOp(x, y); + ApplyOp(&workd.get()[ipntr[0] - 1], &workd.get()[ipntr[1] - 1]); } else if (ido == 2) { - ApplyOpB(x, y); + ApplyOpB(&workd.get()[ipntr[0] - 1], &workd.get()[ipntr[1] - 1]); } else if (ido == 99) { @@ -252,297 +352,134 @@ int ArpackEigenSolver::SolveInternal(petsc::PetscParVector &r_, petsc::PetscDens { MFEM_ABORT("Internal error in ARPACK RCI interface!"); } - x.ResetArray(); - y.ResetArray(); } // Print some log information. - int niter = (int)iparam[2]; - int nconv = (int)iparam[4]; + int num_it = (int)iparam[2]; + int num_conv = (int)iparam[4]; if (print > 0) { Mpi::Print(comm, "\n ARPACK {} eigensolve {} ({:d} eigenpairs); iterations {:d}\n" " Total number of linear systems solved: {:d}\n" " Total number of linear solver iterations: {:d}\n", - GetName(), (nconv >= nev_) ? "converged" : "finished", nconv, niter, - opInv->GetTotalNumMult(), opInv->GetTotalNumIter()); + GetName(), (num_conv >= nev) ? "converged" : "finished", num_conv, num_it, + opInv->NumTotalMult(), opInv->NumTotalMultIterations()); } - if (nconv < nev_) + if (num_conv < nev) { Mpi::Warning( comm, "ARPACK eigenvalue solver found only {:d} of requested {:d} eigenvalues!\n", - nconv, nev_); + num_conv, nev); } // Postprocess eigenvalues and eigenvectors. a_int rvec = 1; ::arpack::howmny howmny_option = ::arpack::howmny::ritz_vectors; - a_int *select; - PetscScalar *workev; // Allocate eigenvalue storage and work arrays. - select = new a_int[ncv_]; - workev = new PetscScalar[2 * ncv_]; + auto select = std::make_unique(ncv); + auto workev = std::make_unique[]>(2 * ncv); // Call complex problem driver. - PetscScalar sigma_ = sigma / gamma; - neupd(fcomm, rvec, howmny_option, select, eig_, pV_, n, sigma_, workev, bmat_option, n, - which_option, nev_, rtol, pr_, ncv_, pV_, n, iparam, ipntr, workd, workl, lworkl, - rwork, info_); - CheckInfoEUPD(info_); + neupd(fcomm, rvec, howmny_option, select.get(), eig, V, (a_int)n, sigma / gamma, + workev.get(), bmat_option, (a_int)n, which_option, (a_int)nev, rtol, r, (a_int)ncv, + V, (a_int)n, iparam, ipntr, workd.get(), workl.get(), lworkl, rwork.get(), ainfo); + CheckInfoEUPD(ainfo); // Unscale and properly sort the eigenvalues. - auto CompareReal = [&eig_](const int &l, const int &r) - { return PetscRealPart(eig_[l]) < PetscRealPart(eig_[r]); }; - auto CompareImag = [&eig_](const int &l, const int &r) - { return PetscImaginaryPart(eig_[l]) < PetscImaginaryPart(eig_[r]); }; - auto CompareAbs = [&eig_](const int &l, const int &r) - { return PetscAbsScalar(eig_[l]) < PetscAbsScalar(eig_[r]); }; - for (int i = 0; i < nev_; i++) + auto CompareReal = [&eig](const int &l, const int &r) + { return eig[l].real() < eig[r].real(); }; + auto CompareImag = [&eig](const int &l, const int &r) + { return eig[l].imag() < eig[r].imag(); }; + auto CompareAbs = [&eig](const int &l, const int &r) + { return std::abs(eig[l]) < std::abs(eig[r]); }; + for (int i = 0; i < nev; i++) { - eig_[i] = eig_[i] * gamma; - perm_[i] = i; + eig[i] = eig[i] * gamma; + perm[i] = i; } if (which_option == ::arpack::which::largest_real || which_option == ::arpack::which::smallest_real) { - std::sort(perm_, perm_ + nev_, CompareReal); + std::sort(perm, perm + nev, CompareReal); } else if (which_option == ::arpack::which::largest_imaginary || which_option == ::arpack::which::smallest_imaginary) { - std::sort(perm_, perm_ + nev_, CompareImag); + std::sort(perm, perm + nev, CompareImag); } else { - std::sort(perm_, perm_ + nev_, CompareAbs); + std::sort(perm, perm + nev, CompareAbs); } - // Cleanup. - r_.RestoreArray(pr_); - V_.RestoreArray(pV_); - delete[] select; - delete[] workev; - delete[] workd; - delete[] workl; - delete[] rwork; - - return nconv; + return num_conv; } -void ArpackEigenSolver::CheckParameters() const +void ArpackEigenvalueSolver::CheckParameters() const { + MFEM_VERIFY(n > 0, "Operators are not set for ARPACK eigenvalue solver!"); MFEM_VERIFY(nev > 0, "Number of requested modes is not positive!"); MFEM_VERIFY(rtol > 0.0, "Eigensolver tolerance is not positive!"); MFEM_VERIFY(opInv, "No linear solver provided for operator!"); } -void ArpackEigenSolver::CheckInfoAUPD(int info) const -{ - if (info != 0) - { - std::string msg = "ARPACK pznaupd error: "; - switch (info) - { - case 1: - msg += "Maximum number of iterations taken, all possible eigenvalues " - "have been found!"; - break; - case 2: - msg += "No longer an informational error (deprecated starting with " - "release 2 of ARPACK)!"; - break; - case 3: - msg += "No shifts could be applied during a cycle of the Implicitly " - "restarted Arnoldi iteration!"; - break; - case -1: - msg += "N must be positive!"; - break; - case -2: - msg += "NEV must be positive!"; - break; - case -3: - msg += "NCV-NEV >= 2 and less than or equal to N!"; - break; - case -4: - msg += "The maximum number of Arnoldi update iterations allowed must " - "be greater than zero!"; - break; - case -5: - msg += "WHICH must be one of 'LM', 'SM', 'LR', 'SR', 'LI', 'SI'"; - break; - case -6: - msg += "BMAT must be one of 'I' or 'G'!"; - break; - case -7: - msg += "Length of private work array WORKL is not sufficient!"; - break; - case -8: - msg += "Error return from LAPACK eigenvalue calculation!"; - break; - case -9: - msg += "Starting vector is zero!"; - break; - case -10: - msg += "IPARAM(7) must be 1, 2, or 3!"; - break; - case -11: - msg += "IPARAM(7) = 1 and BMAT = 'G' are incompatible!"; - break; - case -12: - msg += "IPARAM(1) must be equal to 0 or 1!"; - break; - case -9999: - msg += "Could not build an Arnoldi factorization!"; - break; - default: - msg += "Unknown ARPACK error message!"; - break; - } - MFEM_ABORT(msg.c_str()); - } -} - -void ArpackEigenSolver::CheckInfoEUPD(int info) const -{ - if (info != 0) - { - std::string msg = "ARPACK pzneupd error: "; - switch (info) - { - case 1: - msg += "The Schur form computed by LAPACK routine csheqr could not " - "be reordered by LAPACK routine ztrsen!"; - break; - case -1: - msg += "N must be positive!"; - break; - case -2: - msg += "NEV must be positive!"; - break; - case -3: - msg += "NCV-NEV >= 2 and less than or equal to N!"; - break; - case -4: - msg += "The maximum number of Arnoldi update iterations allowed must " - "be greater than zero!"; - break; - case -5: - msg += "WHICH must be one of 'LM', 'SM', 'LR', 'SR', 'LI', 'SI'"; - break; - case -6: - msg += "BMAT must be one of 'I' or 'G'!"; - break; - case -7: - msg += "Length of private work array WORKL is not sufficient!"; - break; - case -8: - msg += "Error return from LAPACK eigenvalue calculation!"; - break; - case -9: - msg += "Error return from calculation of eigenvectors!"; - break; - case -10: - msg += "IPARAM(7) must be 1, 2, or 3!"; - break; - case -11: - msg += "IPARAM(7) = 1 and BMAT = 'G' are incompatible!"; - break; - case -12: - msg += "HOWMNY = 'S' not yet implemented!"; - break; - case -13: - msg += "HOWMNY must be one of 'A' or 'P' if RVEC = true!"; - break; - case -14: - msg += "PZNAUPD did not find any eigenvalues to sufficient accuracy!"; - break; - case -15: - msg += "ZNEUPD got a different count of the number of converged Ritz " - "values than ZNAUPD got!"; - break; - default: - msg += "Unknown ARPACK error message!"; - break; - } - MFEM_ABORT(msg.c_str()); - } -} - -void ArpackEigenSolver::GetEigenvalue(int i, double &eigr, double &eigi) const +std::complex ArpackEigenvalueSolver::GetEigenvalue(int i) const { MFEM_VERIFY(eig && i >= 0 && i < nev, "Out of range eigenpair requested (i = " << i << ", nev = " << nev << ")!"); - const int &j = perm[i]; - eigr = PetscRealPart(eig[j]); - eigi = PetscImaginaryPart(eig[j]); + const int &j = perm.get()[i]; + return eig.get()[j]; } -void ArpackEigenSolver::GetEigenvector(int i, petsc::PetscParVector &x) const +void ArpackEigenvalueSolver::GetEigenvector(int i, ComplexVector &x) const { MFEM_VERIFY(eig && i >= 0 && i < nev, "Out of range eigenpair requested (i = " << i << ", nev = " << nev << ")!"); - const int &j = perm[i]; - const petsc::PetscParVector v = V->GetColumnRead(j); - x.Copy(v); - V->RestoreColumnRead(j, v); + MFEM_VERIFY(x.Size() == n, "Invalid size mismatch for provided eigenvector!"); + const int &j = perm.get()[i]; + x.Set(V.get() + j * n, n); } -void ArpackEigenSolver::GetError(int i, EigenSolverBase::ErrorType type, double &err) const +double ArpackEigenvalueSolver::GetError(int i, EigenvalueSolver::ErrorType type) const { MFEM_VERIFY(eig && i >= 0 && i < nev, "Out of range eigenpair requested (i = " << i << ", nev = " << nev << ")!"); - const int &j = perm[i]; - if (res[j] <= 0.0) - { - const petsc::PetscParVector v = V->GetColumnRead(j); - GetResidual(eig[j], v, *r); - res[j] = r->Norml2() / v.Norml2(); - V->RestoreColumnRead(j, v); - } + const int &j = perm.get()[i]; switch (type) { case ErrorType::ABSOLUTE: - err = res[j]; - break; + return res.get()[j]; case ErrorType::RELATIVE: - err = res[j] / PetscAbsScalar(eig[j]); - break; + return res.get()[j] / std::abs(eig.get()[j]); case ErrorType::BACKWARD: - err = res[j] / GetBackwardScaling(eig[j]); - break; + return res.get()[j] / GetBackwardScaling(eig.get()[j]); } + return 0.0; } // EPS specific methods -ArpackEPSSolver::ArpackEPSSolver(int print_lvl) : ArpackEigenSolver(print_lvl) +ArpackEPSSolver::ArpackEPSSolver(MPI_Comm comm, int print) + : ArpackEigenvalueSolver(comm, print) { opK = opM = nullptr; normK = normM = 0.0; - z = nullptr; -} - -ArpackEPSSolver::~ArpackEPSSolver() -{ - delete z; } -void ArpackEPSSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) +void ArpackEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &M, + EigenvalueSolver::ScaleType type) { - MFEM_VERIFY(!opK || opK->GetNumRows() == K.GetNumRows(), + MFEM_VERIFY(!opK || opK->Height() == K.Height(), "Invalid modification of eigenvalue problem size!"); bool first = (opK == nullptr); opK = &K; opM = &M; if (first && type != ScaleType::NONE) { - normK = opK->Norm2(); - normM = opM->Norm2(); + normK = linalg::SpectralNorm(comm, *opK, opK->IsReal()); + normM = linalg::SpectralNorm(comm, *opM, opM->IsReal()); MFEM_VERIFY(normK >= 0.0 && normM >= 0.0, "Invalid matrix norms for EPS scaling!"); if (normK > 0 && normM > 0.0) { @@ -552,155 +489,138 @@ void ArpackEPSSolver::SetOperators(const petsc::PetscParMatrix &K, } // Set up workspace. - if (!z) - { - z = new petsc::PetscParVector(K); - } + x.SetSize(opK->Height()); + y.SetSize(opK->Height()); + z.SetSize(opK->Height()); + n = opK->Height(); } int ArpackEPSSolver::Solve() { - // Check input parameters. - CheckParameters(); - MFEM_VERIFY(opK && opM, "Operators are not set for ArpackEPSSolver!"); - // Set some defaults (default maximum iterations from SLEPc). - PetscInt n = opK->GetNumRows(), N = opK->GetGlobalNumRows(); + CheckParameters(); + HYPRE_BigInt N = linalg::GlobalSize(comm, z); if (ncv > N) { - ncv = (int)N; + ncv = mfem::internal::to_int(N); } - if (max_it <= 0) + if (arpack_it <= 0) { - max_it = std::max(300, (int)(2 * N / ncv)); + arpack_it = std::max(300, mfem::internal::to_int(2 * N / ncv)); } // Initialize if user did not provide an initial space. if (!r) { + r = std::make_unique[]>(n); info = 0; - r = new petsc::PetscParVector(*opK); } if (!info) { - r->SetZero(); + std::fill(r.get(), r.get() + n, 0.0); } // Allocate Arnoldi basis for the problem. if (!V) { - V = new petsc::PetscDenseMatrix(opK->GetComm(), n, PETSC_DECIDE, PETSC_DECIDE, ncv, - nullptr); + V = std::make_unique[]>(n * ncv); } - // Cache residual norms when calculated later on. + // Allocate storage for eigenvalues and residual norms. if (!eig) { - eig = new PetscScalar[nev + 1]; - perm = new int[nev + 1]; - res = new double[nev + 1]; - } - for (int i = 0; i < nev + 1; i++) - { - res[i] = -1.0; + eig = std::make_unique[]>(nev + 1); + perm = std::make_unique(nev); + res = std::make_unique(nev); } // Solve the generalized eigenvalue problem. - int nconv = SolveInternal(*r, *V, eig, perm); + int num_conv = SolveInternal(n, r.get(), V.get(), eig.get(), perm.get()); + + // Compute the eigenpair residuals: || (K - λ M) x ||₂ for eigenvalue λ. + for (int i = 0; i < nev; i++) + { + const std::complex l = eig.get()[i]; + x.Set(V.get() + i * n, n); + opK->Mult(x, y); + opM->AddMult(x, y, -l); + res.get()[i] = linalg::Norml2(comm, y); + } // Reset for next solve. info = 0; - return nconv; + return num_conv; } -void ArpackEPSSolver::ApplyOp(const petsc::PetscParVector &x, - petsc::PetscParVector &y) const +void ArpackEPSSolver::ApplyOp(const std::complex *px, + std::complex *py) const { // Case 1: No spectral transformation (opInv = M⁻¹) // y = M⁻¹ K x . // Case 2: Shift-and-invert spectral transformation (opInv = (K - σ M)⁻¹) // y = (K - σ M)⁻¹ M x . + x.Set(px, n); if (!sinvert) { - opK->Mult(x, *z); - opInv->Mult(*z, y); - y.Scale(1.0 / gamma); + opK->Mult(x, z); + opInv->Mult(z, y); + y *= 1.0 / gamma; } else { - opM->Mult(x, *z); - opInv->Mult(*z, y); - y.Scale(gamma); + opM->Mult(x, z); + opInv->Mult(z, y); + y *= gamma; } - - // Debug - // Mpi::Print(" Before projection: {:e}\n", y.Norml2()); - if (opProj) { + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(comm, y)); opProj->Mult(y); + // Mpi::Print(" After projection: {:e}\n", linalg::Norml2(comm, y)); } - - // Debug - // Mpi::Print(" After projection: {:e}\n", y.Norml2()); + y.Get(py, n); } -void ArpackEPSSolver::ApplyOpB(const petsc::PetscParVector &x, - petsc::PetscParVector &y) const +void ArpackEPSSolver::ApplyOpB(const std::complex *px, + std::complex *py) const { MFEM_VERIFY(opB, "No B operator for weighted inner product in ARPACK solve!"); - opB->Mult(x, y); - y.Scale(delta * gamma); -} - -void ArpackEPSSolver::GetResidual(PetscScalar l, const petsc::PetscParVector &x, - petsc::PetscParVector &r) const -{ - // r = (K - λ M) x for eigenvalue λ. - opM->Mult(x, r); - r.Scale(-l); - opK->MultAdd(x, r); + x.Set(px, n); + opB->Mult(x.Real(), y.Real()); + opB->Mult(x.Imag(), y.Imag()); + y *= delta * gamma; + y.Get(py, n); } -double ArpackEPSSolver::GetBackwardScaling(PetscScalar l) const +double ArpackEPSSolver::GetBackwardScaling(std::complex l) const { // Make sure not to use norms from scaling as this can be confusing if they are different. // Note that SLEPc uses ||.||∞, not the 2-norm. if (normK <= 0.0) { - normK = opK->Norm2(); + normK = linalg::SpectralNorm(comm, *opK, opK->IsReal()); } if (normM <= 0.0) { - normM = opM->Norm2(); + normM = linalg::SpectralNorm(comm, *opM, opM->IsReal()); } - return normK + PetscAbsScalar(l) * normM; + return normK + std::abs(l) * normM; } // PEP specific methods -ArpackPEPSolver::ArpackPEPSolver(int print_lvl) : ArpackEigenSolver(print_lvl) +ArpackPEPSolver::ArpackPEPSolver(MPI_Comm comm, int print) + : ArpackEigenvalueSolver(comm, print) { opK = opC = opM = nullptr; normK = normC = normM = 0.0; - x1 = x2 = y1 = y2 = z = nullptr; } -ArpackPEPSolver::~ArpackPEPSolver() +void ArpackPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C, + const ComplexOperator &M, + EigenvalueSolver::ScaleType type) { - delete x1; - delete x2; - delete y1; - delete y2; - delete z; -} - -void ArpackPEPSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) -{ - MFEM_VERIFY(!opK || opK->GetNumRows() == K.GetNumRows(), + MFEM_VERIFY(!opK || opK->Height() == K.Height(), "Invalid modification of eigenvalue problem size!"); bool first = (opK == nullptr); opK = &K; @@ -708,9 +628,9 @@ void ArpackPEPSolver::SetOperators(const petsc::PetscParMatrix &K, opM = &M; if (first && type != ScaleType::NONE) { - normK = opK->Norm2(); - normC = opC->Norm2(); - normM = opM->Norm2(); + normK = linalg::SpectralNorm(comm, *opK, opK->IsReal()); + normC = linalg::SpectralNorm(comm, *opC, opC->IsReal()); + normM = linalg::SpectralNorm(comm, *opM, opM->IsReal()); MFEM_VERIFY(normK >= 0.0 && normC >= 0.0 && normM >= 0.0, "Invalid matrix norms for PEP scaling!"); if (normK > 0 && normC > 0.0 && normM > 0.0) @@ -721,102 +641,81 @@ void ArpackPEPSolver::SetOperators(const petsc::PetscParMatrix &K, } // Set up workspace. - if (!z) - { - MPI_Comm comm = K.GetComm(); - PetscInt n = K.GetNumRows(); - delete x1; - delete x2; - delete y1; - delete y2; - delete z; - x1 = new petsc::PetscParVector(comm, n, PETSC_DECIDE, nullptr); - x2 = new petsc::PetscParVector(comm, n, PETSC_DECIDE, nullptr); - y1 = new petsc::PetscParVector(comm, n, PETSC_DECIDE, nullptr); - y2 = new petsc::PetscParVector(comm, n, PETSC_DECIDE, nullptr); - z = new petsc::PetscParVector(K); - } + x1.SetSize(opK->Height()); + x2.SetSize(opK->Height()); + y1.SetSize(opK->Height()); + y2.SetSize(opK->Height()); + z.SetSize(opK->Height()); + n = opK->Height(); } int ArpackPEPSolver::Solve() { - // Check input parameters. - CheckParameters(); - MFEM_VERIFY(opK && opC && opM, "Operators are not set for ArpackPEPSolver!"); - // Set some defaults (from SLEPc ARPACK interface). The problem size is the size of the // 2x2 block linearized problem. - PetscInt n = opK->GetNumRows(), N = opK->GetGlobalNumRows(); + CheckParameters(); + HYPRE_BigInt N = linalg::GlobalSize(comm, z); if (ncv > 2 * N) { - ncv = 2 * (int)N; + ncv = mfem::internal::to_int(2 * N); } - if (max_it <= 0) + if (arpack_it <= 0) { - max_it = std::max(300, 4 * (int)(N / ncv)); + arpack_it = std::max(300, mfem::internal::to_int(4 * N / ncv)); } // Initialize if user did not provide an initial space. if (!r) { + r = std::make_unique[]>(n); info = 0; - r = new petsc::PetscParVector(*opK); } if (!info) { - r->SetZero(); + std::fill(r.get(), r.get() + n, 0.0); } - petsc::PetscParVector *s = new petsc::PetscParVector(opK->GetComm(), 2 * n, PETSC_DECIDE); - PetscScalar *ps = GetBlocks(*s, *x1, *x2); - x1->Copy(*r); - x2->SetZero(); // Second block initialized to zero even with initial guess - RestoreBlocks(ps, *s, *x1, *x2); + auto s = std::make_unique[]>(2 * n); + std::copy(r.get(), r.get() + n, s.get()); + std::fill(s.get() + n, s.get() + 2 * n, 0.0); // Allocate Arnoldi basis for original and linearized problem. if (!V) { - V = new petsc::PetscDenseMatrix(opK->GetComm(), n, PETSC_DECIDE, PETSC_DECIDE, ncv, - nullptr); + V = std::make_unique[]>(n * ncv); } - petsc::PetscDenseMatrix *W = new petsc::PetscDenseMatrix( - opK->GetComm(), 2 * n, PETSC_DECIDE, PETSC_DECIDE, ncv, nullptr); + auto W = std::make_unique[]>(2 * n * ncv); - // Cache residual norms when calculated later on. + // Allocate storage for eigenvalues and residual norms. if (!eig) { - eig = new PetscScalar[nev + 1]; - perm = new int[nev + 1]; - res = new double[nev + 1]; - } - for (int i = 0; i < nev + 1; i++) - { - res[i] = -1.0; + eig = std::make_unique[]>(nev + 1); + perm = std::make_unique(nev + 1); + res = std::make_unique(nev + 1); } // Solve the linearized eigenvalue problem. - int nconv = SolveInternal(*s, *W, eig, perm); + int num_conv = SolveInternal(2 * n, s.get(), W.get(), eig.get(), perm.get()); - // Eigenvector extraction from the linearized eigenproblem. + // Extract the eigenvector from the linearized problem and compute the eigenpair + // residuals: || P(λ) x ||₂ = || (K + λ C + λ² M) x ||₂ for eigenvalue λ. for (int i = 0; i < nev; i++) { - petsc::PetscParVector w = W->GetColumn(i); - petsc::PetscParVector v = V->GetColumn(i); - ExtractEigenvector(eig[i], w, v); - W->RestoreColumn(i, w); - V->RestoreColumn(i, v); + const std::complex &l = eig.get()[i]; + ExtractEigenvector(l, W.get() + i * 2 * n, V.get() + i * n); + x1.Set(V.get() + i * n, n); + opK->Mult(x1, y1); + opC->AddMult(x1, y1, l); + opM->AddMult(x1, y1, l * l); + res.get()[i] = linalg::Norml2(comm, y1); } - // Cleanup auxiliary basis and residual vector. - delete W; - delete s; - // Reset for next solve. info = 0; - return nconv; + return num_conv; } -void ArpackPEPSolver::ApplyOp(const petsc::PetscParVector &x, - petsc::PetscParVector &y) const +void ArpackPEPSolver::ApplyOp(const std::complex *px, + std::complex *py) const { // Case 1: No spectral transformation (opInv = M⁻¹) // y = L₁⁻¹ L₀ x . @@ -825,143 +724,107 @@ void ArpackPEPSolver::ApplyOp(const petsc::PetscParVector &x, // With: // L₀ = [ -K 0 ] L₁ = [ C M ] // [ 0 M ] , [ M 0 ] . - PetscScalar *px = GetBlocks(const_cast(x), *x1, *x2); - PetscScalar *py = GetBlocks(y, *y1, *y2); + x1.Set(px, n); + x2.Set(px + n, n); if (!sinvert) { - opC->Mult(*x2, *z); - z->Scale(gamma); - opK->MultAdd(*x1, *z); - opInv->Mult(*z, *y2); - y2->Scale(-1.0 / (gamma * gamma)); + y1 = x2; if (opProj) { - opProj->Mult(*y2); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(comm, y1)); + opProj->Mult(y1); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(comm, y1)); } - y1->Copy(*x2); + + opK->Mult(x1, z); + opC->AddMult(x2, z, std::complex(gamma, 0.0)); + opInv->Mult(z, y2); + y2 *= -1.0 / (gamma * gamma); if (opProj) { - opProj->Mult(*y1); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(comm, y2)); + opProj->Mult(y2); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(comm, y2)); } } else { - y1->AXPBYPCZ(sigma, *x1, gamma, *x2, 0.0); // Just temporarily - opM->Mult(*y1, *z); - opC->MultAdd(*x1, *z); - z->Scale(-gamma); - opInv->Mult(*z, *y1); - - // Debug - // Mpi::Print(" Before projection: {:e}\n", y1->Norml2()); - + y2.AXPBYPCZ(sigma, x1, gamma, x2, 0.0); // Just temporarily + opM->Mult(y2, z); + opC->AddMult(x1, z, std::complex(1.0, 0.0)); + opInv->Mult(z, y1); + y1 *= -gamma; if (opProj) { - opProj->Mult(*y1); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(comm, y1)); + opProj->Mult(y1); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(comm, y1)); } - // Debug - // Mpi::Print(" After projection: {:e}\n", y1->Norml2()); - - y2->AXPBYPCZ(sigma / gamma, *y1, 1.0, *x1, 0.0); - - // Debug - // Mpi::Print(" Before projection: {:e}\n", y2->Norml2()); - + y2.AXPBYPCZ(sigma / gamma, y1, 1.0, x1, 0.0); if (opProj) { - opProj->Mult(*y2); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(comm, y2)); + opProj->Mult(y2); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(comm, y2)); } - - // Debug - // Mpi::Print(" After projection: {:e}\n", y2->Norml2()); } - RestoreBlocks(px, const_cast(x), *x1, *x2); - RestoreBlocks(py, y, *y1, *y2); + y1.Get(py, n); + y2.Get(py + n, n); } -void ArpackPEPSolver::ApplyOpB(const petsc::PetscParVector &x, - petsc::PetscParVector &y) const +void ArpackPEPSolver::ApplyOpB(const std::complex *px, + std::complex *py) const { MFEM_VERIFY(opB, "No B operator for weighted inner product in ARPACK solve!"); - PetscScalar *px = GetBlocks(const_cast(x), *x1, *x2); - PetscScalar *py = GetBlocks(y, *y1, *y2); - opB->Mult(*x1, *y1); - opB->Mult(*x2, *y2); - y1->Scale(delta * gamma * gamma); - y2->Scale(delta * gamma * gamma); - RestoreBlocks(px, const_cast(x), *x1, *x2); - RestoreBlocks(py, y, *y1, *y2); + x1.Set(px, n); + x2.Set(px + n, n); + opB->Mult(x1.Real(), y1.Real()); + opB->Mult(x1.Imag(), y1.Imag()); + opB->Mult(x2.Real(), y2.Real()); + opB->Mult(x2.Imag(), y2.Imag()); + y1 *= delta * gamma * gamma; + y2 *= delta * gamma * gamma; + y1.Get(py, n); + y2.Get(py + n, n); } -void ArpackPEPSolver::GetResidual(PetscScalar l, const petsc::PetscParVector &x, - petsc::PetscParVector &r) const -{ - // r = P(λ) x = (K + λ C + λ² M) x for eigenvalue λ. - opM->Mult(x, r); - r.Scale(l); - opC->MultAdd(x, r); - r.Scale(l); - opK->MultAdd(x, r); -} - -double ArpackPEPSolver::GetBackwardScaling(PetscScalar l) const +double ArpackPEPSolver::GetBackwardScaling(std::complex l) const { // Make sure not to use norms from scaling as this can be confusing if they are different. // Note that SLEPc uses ||.||∞, not the 2-norm. if (normK <= 0.0) { - normK = opK->Norm2(); + normK = linalg::SpectralNorm(comm, *opK, opK->IsReal()); } if (normC <= 0.0) { - normC = opC->Norm2(); + normC = linalg::SpectralNorm(comm, *opC, opC->IsReal()); } if (normM <= 0.0) { - normM = opM->Norm2(); + normM = linalg::SpectralNorm(comm, *opM, opM->IsReal()); } - double t = PetscAbsScalar(l); + double t = std::abs(l); return normK + t * normC + t * t * normM; } -void ArpackPEPSolver::ExtractEigenvector(PetscScalar l, petsc::PetscParVector &y, - petsc::PetscParVector &x) +void ArpackPEPSolver::ExtractEigenvector(std::complex l, + const std::complex *py, + std::complex *px) const { - // Select the most accurate x for y = [x₁; x₂] from the linearized eigenvalue problem. - PetscScalar *py = GetBlocks(y, *y1, *y2); + // Select the most accurate x for y = [x₁; x₂] from the linearized eigenvalue problem. Or, + // just take x = x₁. + x1.Set(py, n); + if (opB) { - if (opB) - { - y1->Normalize(*opB, *r); - } - else - { - y1->Normalize(); - } - x.Copy(*y1); + linalg::Normalize(comm, x1, *opB, y1); } - RestoreBlocks(py, y, *y1, *y2); -} - -PetscScalar *ArpackPEPSolver::GetBlocks(petsc::PetscParVector &v, petsc::PetscParVector &v1, - petsc::PetscParVector &v2) const -{ - PetscInt n1 = v1.GetSize(), n2 = v2.GetSize(); - MFEM_VERIFY(n1 + n2 == v.GetSize(), "Unexpected size in PEP linearization!"); - PetscScalar *pv = v.GetArray(); - v1.PlaceArray(pv); - v2.PlaceArray(pv + n1); - return pv; -} - -void ArpackPEPSolver::RestoreBlocks(PetscScalar *pv, petsc::PetscParVector &v, - petsc::PetscParVector &v1, - petsc::PetscParVector &v2) const -{ - v1.ResetArray(); - v2.ResetArray(); - v.RestoreArray(pv); + else + { + linalg::Normalize(comm, x1); + } + x1.Get(px, n); } } // namespace palace::arpack diff --git a/palace/linalg/arpack.hpp b/palace/linalg/arpack.hpp index aa80d1466..25771b735 100644 --- a/palace/linalg/arpack.hpp +++ b/palace/linalg/arpack.hpp @@ -6,20 +6,18 @@ #if defined(PALACE_WITH_ARPACK) -#include "linalg/petsc.hpp" - -#if !defined(PETSC_USE_COMPLEX) -#error "ARPACK interface requires PETSc built with complex scalars!" -#endif - -#include -#include "linalg/eigen.hpp" +#include +#include +#include +#include "linalg/eps.hpp" +#include "linalg/ksp.hpp" +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" namespace palace { class DivFreeSolver; -class KspSolver; namespace arpack { @@ -30,52 +28,55 @@ namespace arpack // used to compute interior eigenvalues. Currently only implemented for complex scalar // interface. // -class ArpackEigenSolver : public EigenSolverBase +class ArpackEigenvalueSolver : public EigenvalueSolver { protected: + // MPI communicator for PARPACK. + MPI_Comm comm; + // Control print level for debugging. int print; // Status variable for ARPACK. int info; - // Number eigenvalues to be computed, and dimension. - int nev, ncv; + // Number eigenvalues to be computed, subspace dimension, and problem size. + int nev, ncv, n; // Relative eigenvalue error convergence tolerance for the solver. double rtol; // Maximum number of Arnoldi update iterations. - int max_it; + int arpack_it; // Specifies which part of the spectrum to search for. - ::arpack::which which_option; + EigenvalueSolver::WhichType which_type; // Variables for scaling, from Higham et al., IJNME 2008. double gamma, delta; // Parameters defining the spectral transformation. - PetscScalar sigma; + std::complex sigma; bool sinvert; // Storage for computed eigenvalues. - PetscScalar *eig; - int *perm; + std::unique_ptr[]> eig; + std::unique_ptr perm; // Storage for Arnoldi basis vectors. - petsc::PetscDenseMatrix *V; + std::unique_ptr[]> V; // Storage for computed residual norms. - mutable double *res; + std::unique_ptr res; // On input used to define optional initial guess, on output stores final residual // vector. - mutable petsc::PetscParVector *r; + std::unique_ptr[]> r; // Reference to linear solver used for operator action for M⁻¹ (with no spectral // transformation) or (K - σ M)⁻¹ (generalized EVP with shift-and- invert) or P(σ)⁻¹ // (polynomial with shift-and-invert) (not owned). - const KspSolver *opInv; + const ComplexKspSolver *opInv; // Reference to solver for projecting an intermediate vector onto a divergence-free space // (not owned). @@ -83,180 +84,146 @@ class ArpackEigenSolver : public EigenSolverBase // Reference to matrix used for weighted inner products (not owned). May be nullptr, in // which case identity is used. - const petsc::PetscParMatrix *opB; + const Operator *opB; // Perform the ARPACK RCI loop. - int SolveInternal(petsc::PetscParVector &r_, petsc::PetscDenseMatrix &V_, - PetscScalar *eig_, int *perm_); + int SolveInternal(int n, std::complex *r, std::complex *V, + std::complex *eig, int *perm); - // Helper routines for parameter checking. + // Helper routine for parameter checking. void CheckParameters() const; - void CheckInfoAUPD(int info) const; - void CheckInfoEUPD(int info) const; // Helper routines for ARPACK RCI. - virtual void ApplyOp(const petsc::PetscParVector &x, petsc::PetscParVector &y) const = 0; - virtual void ApplyOpB(const petsc::PetscParVector &x, petsc::PetscParVector &y) const = 0; - - // Helper routine for computing the eigenpair residual. - virtual void GetResidual(PetscScalar l, const petsc::PetscParVector &x, - petsc::PetscParVector &r) const = 0; + virtual void ApplyOp(const std::complex *px, std::complex *py) const = 0; + virtual void ApplyOpB(const std::complex *px, std::complex *py) const = 0; // Helper routine for computing the backward error. - virtual double GetBackwardScaling(PetscScalar l) const = 0; + virtual double GetBackwardScaling(std::complex l) const = 0; // Return problem type name. virtual const char *GetName() const = 0; public: - ArpackEigenSolver(int print_lvl); - ~ArpackEigenSolver() override; + ArpackEigenvalueSolver(MPI_Comm comm, int print); // Set operators for the generalized eigenvalue problem or for the quadratic polynomial // eigenvalue problem. - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &M, + void SetOperators(const ComplexOperator &K, const ComplexOperator &M, ScaleType type) override; - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, ScaleType type) override; + void SetOperators(const ComplexOperator &K, const ComplexOperator &C, + const ComplexOperator &M, ScaleType type) override; // For the linear generalized case, the linear solver should be configured to compute the // action of M⁻¹ (with no spectral transformation) or (K - σ M)⁻¹. For the quadratic // case, the linear solver should be configured to compute the action of M⁻¹ (with no // spectral transformation) or P(σ)⁻¹. - void SetLinearSolver(const KspSolver &ksp) override; + void SetLinearSolver(const ComplexKspSolver &ksp) override; - // Set the projection operator for the divergence-free constraint. - void SetProjector(const DivFreeSolver &divfree) override; + // Set the projection operator for enforcing the divergence-free constraint. + void SetDivFreeProjector(const DivFreeSolver &divfree) override; // Set optional B matrix used for weighted inner products. This must be set explicitly // even for generalized problems, otherwise the identity will be used. - void SetBMat(const petsc::PetscParMatrix &B) override; + void SetBMat(const Operator &B) override; // Get scaling factors used by the solver. double GetScalingGamma() const override { return gamma; } double GetScalingDelta() const override { return delta; } // Set the number of required eigenmodes. - void SetNumModes(int numeig, int numvec = 0) override; + void SetNumModes(int num_eig, int num_vec = 0) override; // Set solver tolerance. void SetTol(double tol) override; // Set maximum number of Arnoldi update iterations. - void SetMaxIter(int maxits) override; + void SetMaxIter(int max_it) override; // Set target spectrum for the eigensolver. When a spectral transformation is used, this // applies to the spectrum of the shifted operator. void SetWhichEigenpairs(WhichType type) override; // Set shift-and-invert spectral transformation. - void SetShiftInvert(double tr, double ti, bool precond = false) override; + void SetShiftInvert(std::complex s, bool precond = false) override; // Set an initial vector for the solution subspace. - void SetInitialSpace(const petsc::PetscParVector &v) override; + void SetInitialSpace(const ComplexVector &v) override; // Solve the eigenvalue problem. Returns the number of converged eigenvalues. int Solve() override = 0; // Get the corresponding eigenvalue. - void GetEigenvalue(int i, double &eigr, double &eigi) const override; + std::complex GetEigenvalue(int i) const override; // Get the corresponding eigenvector. - void GetEigenvector(int i, petsc::PetscParVector &x) const override; + void GetEigenvector(int i, ComplexVector &x) const override; // Get the corresponding eigenpair error. - void GetError(int i, ErrorType type, double &err) const override; + double GetError(int i, ErrorType type) const override; }; // Generalized eigenvalue problem solver: K x = λ M x . -class ArpackEPSSolver : public ArpackEigenSolver +class ArpackEPSSolver : public ArpackEigenvalueSolver { private: // References to matrices defining the generalized eigenvalue problem (not owned). - const petsc::PetscParMatrix *opK, *opM; + const ComplexOperator *opK, *opM; // Operator norms for scaling. mutable double normK, normM; // Workspace vector for operator applications. - mutable petsc::PetscParVector *z; + mutable ComplexVector x, y, z; protected: - // Helper routines for ARPACK RCI interface. - void ApplyOp(const petsc::PetscParVector &x, petsc::PetscParVector &y) const override; - void ApplyOpB(const petsc::PetscParVector &x, petsc::PetscParVector &y) const override; - - // Helper routine for computing the eigenpair residual: r = (K - λ M) x . - void GetResidual(PetscScalar l, const petsc::PetscParVector &x, - petsc::PetscParVector &r) const override; + void ApplyOp(const std::complex *px, std::complex *py) const override; + void ApplyOpB(const std::complex *px, std::complex *py) const override; - // Helper routine for computing the backward error. - double GetBackwardScaling(PetscScalar l) const override; + double GetBackwardScaling(std::complex l) const override; - // Return problem type name. const char *GetName() const override { return "EPS"; } public: - ArpackEPSSolver(int print_lvl); - ~ArpackEPSSolver() override; + ArpackEPSSolver(MPI_Comm comm, int print); - // Set operators for the generalized eigenvalue problem. - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &M, + void SetOperators(const ComplexOperator &K, const ComplexOperator &M, ScaleType type) override; - // Solve the eigenvalue problem. Returns the number of converged eigenvalues. int Solve() override; }; // Quadratic eigenvalue problem solver: P(λ) x = (K + λ C + λ² M) x = 0 . -class ArpackPEPSolver : public ArpackEigenSolver +class ArpackPEPSolver : public ArpackEigenvalueSolver { private: // References to matrices defining the quadratic polynomial eigenvalue problem // (not owned). - const petsc::PetscParMatrix *opK, *opC, *opM; + const ComplexOperator *opK, *opC, *opM; // Operator norms for scaling. mutable double normK, normC, normM; // Workspace vectors for operator applications. - mutable petsc::PetscParVector *x1, *x2, *y1, *y2, *z; + mutable ComplexVector x1, x2, y1, y2, z; // Do eigenvector extraction from the linearized problem to the actual eigenvectors. - void ExtractEigenvector(PetscScalar l, petsc::PetscParVector &y, - petsc::PetscParVector &x); - - // Helper methods for splitting a block vector from the linearized problem into its into - // two parts. - PetscScalar *GetBlocks(petsc::PetscParVector &v, petsc::PetscParVector &v1, - petsc::PetscParVector &v2) const; - void RestoreBlocks(PetscScalar *pv, petsc::PetscParVector &v, petsc::PetscParVector &v1, - petsc::PetscParVector &v2) const; + void ExtractEigenvector(std::complex l, const std::complex *py, + std::complex *px) const; protected: - // Helper routines for ARPACK RCI interface. - void ApplyOp(const petsc::PetscParVector &x, petsc::PetscParVector &y) const override; - void ApplyOpB(const petsc::PetscParVector &x, petsc::PetscParVector &y) const override; + void ApplyOp(const std::complex *px, std::complex *py) const override; + void ApplyOpB(const std::complex *px, std::complex *py) const override; - // Helper routine for computing the eigenpair residual: r = P(λ) x . - void GetResidual(PetscScalar l, const petsc::PetscParVector &x, - petsc::PetscParVector &r) const override; + double GetBackwardScaling(std::complex l) const override; - // Helper routine for computing the backward error. - double GetBackwardScaling(PetscScalar l) const override; - - // Return problem type name. const char *GetName() const override { return "PEP"; } public: - ArpackPEPSolver(int print_lvl); - ~ArpackPEPSolver() override; + ArpackPEPSolver(MPI_Comm comm, int print); - // Set operators for the quadratic polynomial eigenvalue problem. - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, ScaleType type) override; + void SetOperators(const ComplexOperator &K, const ComplexOperator &C, + const ComplexOperator &M, ScaleType type) override; - // Solve the eigenvalue problem. Returns the number of converged eigenvalues. int Solve() override; }; diff --git a/palace/linalg/chebyshev.cpp b/palace/linalg/chebyshev.cpp index 0e9a4f487..446c652b5 100644 --- a/palace/linalg/chebyshev.cpp +++ b/palace/linalg/chebyshev.cpp @@ -5,8 +5,7 @@ #include #include -#include "linalg/pc.hpp" -#include "linalg/petsc.hpp" +#include "linalg/rap.hpp" namespace palace { @@ -14,159 +13,252 @@ namespace palace namespace { -using mfem::ForallWrap; - -class SymmetricScaledOperator : public mfem::Operator +void GetInverseDiagonal(const ParOperator &A, Vector &dinv) { -private: - const mfem::Operator &A; - const mfem::Vector &d; - mutable mfem::Vector z; + dinv.SetSize(A.Height()); + A.AssembleDiagonal(dinv); + dinv.Reciprocal(); +} -public: - SymmetricScaledOperator(const mfem::Operator &op, const mfem::Vector &v) - : mfem::Operator(op.Height()), A(op), d(v), z(v.Size()) - { - } +void GetInverseDiagonal(const ComplexParOperator &A, Vector &dinv) +{ + MFEM_VERIFY(A.HasReal() && !A.HasImag(), + "ComplexOperator for ChebyshevSmoother must be real-valued for now!"); + dinv.SetSize(A.Height()); + A.Real()->AssembleDiagonal(dinv); + dinv.Reciprocal(); + // MFEM_VERIFY(A.HasReal() || A.HasImag(), + // "Invalid zero ComplexOperator for ChebyshevSmoother!"); + // dinv.SetSize(A.Height()); + // dinv.SetSize(A.Height()); + // dinv = 0.0; + // if (A.HasReal()) + // { + // A.Real()->AssembleDiagonal(dinv.Real()); + // } + // if (A.HasImag()) + // { + // A.Imag()->AssembleDiagonal(dinv.Imag()); + // } + // dinv.Reciprocal(); +} - void Mult(const mfem::Vector &x, mfem::Vector &y) const override - { - A.Mult(x, z); - { - const int N = height; - const auto *D = d.Read(); - const auto *Z = z.Read(); - auto *Y = y.Write(); - MFEM_FORALL(i, N, { Y[i] = D[i] * Z[i]; }); - } - } +double GetLambdaMax(MPI_Comm comm, const Operator &A, const Vector &dinv) +{ + DiagonalOperator Dinv(dinv); + ProductOperator DinvA(Dinv, A); + return linalg::SpectralNorm(comm, DinvA, false); +} - void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override - { - { - const int N = height; - const auto *D = d.Read(); - const auto *X = x.Read(); - auto *Z = z.Write(); - MFEM_FORALL(i, N, { Z[i] = D[i] * X[i]; }); - } - A.Mult(z, y); - } -}; +double GetLambdaMax(MPI_Comm comm, const ComplexOperator &A, const Vector &dinv) +{ + MFEM_VERIFY(A.HasReal() && !A.HasImag(), + "ComplexOperator for ChebyshevSmoother must be real-valued for now!"); + DiagonalOperator Dinv(dinv); + ProductOperator DinvA(Dinv, *A.Real()); + return linalg::SpectralNorm(comm, DinvA, false); +} } // namespace -ChebyshevSmoother::ChebyshevSmoother(MPI_Comm c, const mfem::Array &tdof_list, - int smooth_it, int poly_order) - : comm(c), A(nullptr), dbc_tdof_list(tdof_list), pc_it(smooth_it), order(poly_order) +template +ChebyshevSmoother::ChebyshevSmoother(int smooth_it, int poly_order) + : Solver(), pc_it(smooth_it), order(poly_order), A(nullptr) { } -void ChebyshevSmoother::SetOperator(const mfem::Operator &op) +template +void ChebyshevSmoother::SetOperator(const OperType &op) { + using ParOperType = + typename std::conditional::value, + ComplexParOperator, ParOperator>::type; + A = &op; - height = A->Height(); - width = A->Width(); - - // Configure symmetric diagonal scaling. - const int N = height; - dinv.SetSize(N); - mfem::Vector diag(N); - A->AssembleDiagonal(diag); - const auto *D = diag.Read(); - auto *DI = dinv.Write(); - MFEM_FORALL(i, N, { - MFEM_ASSERT_KERNEL(D[i] != 0.0, "Zero diagonal entry in Chebyshev smoother!"); - DI[i] = 1.0 / D[i]; - }); - const auto *I = dbc_tdof_list.Read(); - MFEM_FORALL(i, dbc_tdof_list.Size(), { - DI[I[i]] = 1.0; // Assumes operator DiagonalPolicy::ONE - }); + r.SetSize(op.Height()); + d.SetSize(op.Height()); + + const auto *PtAP = dynamic_cast(&op); + MFEM_VERIFY(PtAP, + "ChebyshevSmoother requires a ParOperator or ComplexParOperator operator!"); + GetInverseDiagonal(*PtAP, dinv); // Set up Chebyshev coefficients using the computed maximum eigenvalue estimate. See // mfem::OperatorChebyshevSmoother or Adams et al., Parallel multigrid smoothing: // polynomial versus Gauss-Seidel, JCP (2003). - petsc::PetscShellMatrix DinvA(comm, std::make_unique(*A, dinv)); - lambda_max = 1.1 * DinvA.Norm2(); + lambda_max = 1.01 * GetLambdaMax(PtAP->GetComm(), *A, dinv); +} + +namespace +{ + +template +inline void ApplyOp(const Operator &A, const Vector &x, Vector &y) +{ + A.Mult(x, y); } -void ChebyshevSmoother::ArrayMult(const mfem::Array &X, - mfem::Array &Y) const +template +inline void ApplyOp(const ComplexOperator &A, const ComplexVector &x, ComplexVector &y) { - // Initialize. - const int nrhs = X.Size(); - mfem::Array R(nrhs), D(nrhs); - std::vector rrefs(nrhs), drefs(nrhs); - if (nrhs * height != r.Size()) + if constexpr (!Transpose) { - r.SetSize(nrhs * height); - d.SetSize(nrhs * height); + A.Mult(x, y); } - for (int j = 0; j < nrhs; j++) + else { - rrefs[j].MakeRef(r, j * height, height); - drefs[j].MakeRef(d, j * height, height); - R[j] = &rrefs[j]; - D[j] = &drefs[j]; + A.MultHermitianTranspose(x, y); } +} + +template +inline void ApplyOp(const Operator &A, const Vector &x, Vector &y, const double a) +{ + A.AddMult(x, y, a); +} +template +inline void ApplyOp(const ComplexOperator &A, const ComplexVector &x, ComplexVector &y, + const double a) +{ + if constexpr (!Transpose) + { + A.AddMult(x, y, a); + } + else + { + A.AddMultHermitianTranspose(x, y, a); + } +} + +template +inline void ApplyOrder0(double sr, const Vector &dinv, const Vector &r, Vector &d) +{ + const int N = d.Size(); + const auto *DI = dinv.Read(); + const auto *R = r.Read(); + auto *D = d.ReadWrite(); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { D[i] = sr * DI[i] * R[i]; }); +} + +template +inline void ApplyOrder0(const double sr, const Vector &dinv, const ComplexVector &r, + ComplexVector &d) +{ + const int N = dinv.Size(); + // const auto *DIR = dinv.Real().Read(); + // const auto *DII = dinv.Imag().Read(); + const auto *DIR = dinv.Read(); + const auto *RR = r.Real().Read(); + const auto *RI = r.Imag().Read(); + auto *DR = d.Real().ReadWrite(); + auto *DI = d.Imag().ReadWrite(); + if constexpr (!Transpose) + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + // DR[i] = sr * (DIR[i] * RR[i] - DII[i] * RI[i]); + // DI[i] = sr * (DII[i] * RR[i] + DIR[i] * RI[i]); + DR[i] = sr * DIR[i] * RR[i]; + DI[i] = sr * DIR[i] * RI[i]; + }); + } + else + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + // DR[i] = sr * (DIR[i] * RR[i] + DII[i] * RI[i]); + // DI[i] = sr * (-DII[i] * RR[i] + DIR[i] * RI[i]); + DR[i] = sr * DIR[i] * RR[i]; + DI[i] = sr * DIR[i] * RI[i]; + }); + } +} + +template +inline void ApplyOrderK(const double sd, const double sr, const Vector &dinv, + const Vector &r, Vector &d) +{ + const int N = dinv.Size(); + const auto *DI = dinv.Read(); + const auto *R = r.Read(); + auto *D = d.ReadWrite(); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { D[i] = sd * D[i] + sr * DI[i] * R[i]; }); +} + +template +inline void ApplyOrderK(const double sd, const double sr, const Vector &dinv, + const ComplexVector &r, ComplexVector &d) +{ + const int N = dinv.Size(); + // const auto *DIR = dinv.Real().Read(); + // const auto *DII = dinv.Imag().Read(); + const auto *DIR = dinv.Read(); + const auto *RR = r.Real().Read(); + const auto *RI = r.Imag().Read(); + auto *DR = d.Real().ReadWrite(); + auto *DI = d.Imag().ReadWrite(); + if constexpr (!Transpose) + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + // DR[i] = sd * DR[i] + sr * (DIR[i] * RR[i] - DII[i] * RI[i]); + // DI[i] = sd * DI[i] + sr * (DII[i] * RR[i] + DIR[i] * RI[i]); + DR[i] = sd * DR[i] + sr * DIR[i] * RR[i]; + DI[i] = sd * DI[i] + sr * DIR[i] * RI[i]; + }); + } + else + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + // DR[i] = sd * DR[i] + sr * (DIR[i] * RR[i] + DII[i] * RI[i]); + // DI[i] = sd * DI[i] + sr * (-DII[i] * RR[i] + DIR[i] * RI[i]); + DR[i] = sd * DR[i] + sr * DIR[i] * RR[i]; + DI[i] = sd * DI[i] + sr * DIR[i] * RI[i]; + }); + } +} + +} // namespace + +template +void ChebyshevSmoother::Mult(const VecType &x, VecType &y) const +{ // Apply smoother: y = y + p(A) (x - A y) . for (int it = 0; it < pc_it; it++) { - if (iterative_mode || it > 0) + if (this->initial_guess || it > 0) { - A->ArrayMult(Y, R); - for (int j = 0; j < nrhs; j++) - { - subtract(*X[j], *R[j], *R[j]); - } + ApplyOp(*A, y, r); + linalg::AXPBY(1.0, x, -1.0, r); } else { - for (int j = 0; j < nrhs; j++) - { - *R[j] = *X[j]; - *Y[j] = 0.0; - } + r = x; + y = 0.0; } - // 4th-kind Chebyshev smoother - { - const auto *DI = dinv.Read(); - for (int j = 0; j < nrhs; j++) - { - const auto *RR = R[j]->Read(); - auto *DD = D[j]->ReadWrite(); - MFEM_FORALL(i, height, { DD[i] = 4.0 / (3.0 * lambda_max) * DI[i] * RR[i]; }); - } - } + // 4th-kind Chebyshev smoother, from Phillips and Fischer or Lottes (with k -> k + 1 + // shift due to 1-based indexing). + ApplyOrder0(4.0 / (3.0 * lambda_max), dinv, r, d); for (int k = 1; k < order; k++) { - A->ArrayAddMult(D, R, -1.0); - { - // From Phillips and Fischer or Lottes (with k -> k + 1 shift due to 1-based - // indexing) - const double sd = (2.0 * k - 1.0) / (2.0 * k + 3.0); - const double sr = (8.0 * k + 4.0) / ((2.0 * k + 3.0) * lambda_max); - const auto *DI = dinv.Read(); - for (int j = 0; j < nrhs; j++) - { - const auto *RR = R[j]->Read(); - auto *YY = Y[j]->ReadWrite(); - auto *DD = D[j]->ReadWrite(); - MFEM_FORALL(i, height, { - YY[i] += DD[i]; - DD[i] = sd * DD[i] + sr * DI[i] * RR[i]; - }); - } - } - } - for (int j = 0; j < nrhs; j++) - { - *Y[j] += *D[j]; + y += d; + ApplyOp(*A, d, r, -1.0); + const double sd = (2.0 * k - 1.0) / (2.0 * k + 3.0); + const double sr = (8.0 * k + 4.0) / ((2.0 * k + 3.0) * lambda_max); + ApplyOrderK(sd, sr, dinv, r, d); } + y += d; } } +template class ChebyshevSmoother; +template class ChebyshevSmoother; + } // namespace palace diff --git a/palace/linalg/chebyshev.hpp b/palace/linalg/chebyshev.hpp index 202e607b0..47a8de3e3 100644 --- a/palace/linalg/chebyshev.hpp +++ b/palace/linalg/chebyshev.hpp @@ -4,7 +4,9 @@ #ifndef PALACE_LINALG_CHEBYSHEV_SMOOTHER_HPP #define PALACE_LINALG_CHEBYSHEV_SMOOTHER_HPP -#include +#include "linalg/operator.hpp" +#include "linalg/solver.hpp" +#include "linalg/vector.hpp" namespace palace { @@ -16,54 +18,38 @@ namespace palace // Chebyshev smoothers and one-sided V-cycles, arXiv:2210.03179v1 (2022) for reference on // the 4th-kind Chebyshev polynomial smoother. // -class ChebyshevSmoother : public mfem::Solver +template +class ChebyshevSmoother : public Solver { -private: - // System matrix (not owned), its communicator, and list of eliminated degrees of freedom. - MPI_Comm comm; - const mfem::Operator *A; - const mfem::Array dbc_tdof_list; + using VecType = typename Solver::VecType; +private: // Number of smoother iterations and polynomial order. const int pc_it, order; - // Diagonal scaling of the operator. - mfem::Vector dinv; + // System matrix (not owned). + const OperType *A; + + // Inverse diagonal scaling of the operator (real-valued for now). + Vector dinv; // Maximum operator eigenvalue for Chebyshev polynomial smoothing. double lambda_max; // Temporary vectors for smoother application. - mutable mfem::Vector r, d; + mutable VecType r, d; public: - ChebyshevSmoother(MPI_Comm c, const mfem::Array &tdof_list, int smooth_it, - int poly_order); + ChebyshevSmoother(int smooth_it, int poly_order); - void SetOperator(const mfem::Operator &op) override; + void SetOperator(const OperType &op) override; - void Mult(const mfem::Vector &x, mfem::Vector &y) const override - { - mfem::Array X(1); - mfem::Array Y(1); - X[0] = &x; - Y[0] = &y; - ArrayMult(X, Y); - } + void Mult(const VecType &x, VecType &y) const override; - void ArrayMult(const mfem::Array &X, - mfem::Array &Y) const override; - - void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override + void MultTranspose(const VecType &x, VecType &y) const override { Mult(x, y); // Assumes operator symmetry } - - void ArrayMultTranspose(const mfem::Array &X, - mfem::Array &Y) const override - { - ArrayMult(X, Y); // Assumes operator symmetry - } }; } // namespace palace diff --git a/palace/linalg/curlcurl.cpp b/palace/linalg/curlcurl.cpp index 15067d8e8..1e73ae34f 100644 --- a/palace/linalg/curlcurl.cpp +++ b/palace/linalg/curlcurl.cpp @@ -3,77 +3,82 @@ #include "curlcurl.hpp" +#include #include "fem/coefficient.hpp" #include "linalg/ams.hpp" #include "linalg/gmg.hpp" +#include "linalg/iterative.hpp" +#include "linalg/rap.hpp" #include "models/materialoperator.hpp" namespace palace { -CurlCurlSolver::CurlCurlSolver(const MaterialOperator &mat_op, - const mfem::Array &dbc_marker, - mfem::ParFiniteElementSpaceHierarchy &nd_fespaces, - mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, - double tol, int max_it, int print) - : mfem::Solver(nd_fespaces.GetFinestFESpace().GetTrueVSize()) +CurlCurlMassSolver::CurlCurlMassSolver( + const MaterialOperator &mat_op, mfem::ParFiniteElementSpaceHierarchy &nd_fespaces, + mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, + const std::vector> &nd_dbc_tdof_lists, + const std::vector> &h1_dbc_tdof_lists, double tol, int max_it, + int print) { - MaterialPropertyCoefficient muinv_func(mat_op); - MaterialPropertyCoefficient epsilon_func(mat_op); - MFEM_VERIFY(dbc_marker.Size() == - nd_fespaces.GetFinestFESpace().GetParMesh()->bdr_attributes.Max(), - "Invalid boundary marker for curl-curl solver!"); - for (int s = 0; s < 2; s++) + constexpr auto MatTypeMuInv = MaterialPropertyType::INV_PERMEABILITY; + constexpr auto MatTypeEps = MaterialPropertyType::PERMITTIVITY_REAL; + MaterialPropertyCoefficient muinv_func(mat_op); + MaterialPropertyCoefficient epsilon_func(mat_op); { - auto &A_ = (s == 0) ? A : AuxA; - A_.reserve(nd_fespaces.GetNumLevels()); - for (int l = 0; l < nd_fespaces.GetNumLevels(); l++) + auto A_mg = std::make_unique(nd_fespaces.GetNumLevels()); + for (int s = 0; s < 2; s++) { - auto &fespace_l = - (s == 0) ? nd_fespaces.GetFESpaceAtLevel(l) : h1_fespaces.GetFESpaceAtLevel(l); - mfem::Array dbc_tdof_list_l; - fespace_l.GetEssentialTrueDofs(dbc_marker, dbc_tdof_list_l); - - mfem::ParBilinearForm a(&fespace_l); - if (s == 1) - { - a.AddDomainIntegrator(new mfem::MixedGradGradIntegrator(epsilon_func)); - } - else + auto &fespaces = (s == 0) ? nd_fespaces : h1_fespaces; + auto &dbc_tdof_lists = (s == 0) ? nd_dbc_tdof_lists : h1_dbc_tdof_lists; + for (int l = 0; l < fespaces.GetNumLevels(); l++) { - a.AddDomainIntegrator(new mfem::CurlCurlIntegrator(muinv_func)); - a.AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func)); + auto &fespace_l = fespaces.GetFESpaceAtLevel(l); + auto a = std::make_unique(&fespace_l); + if (s == 0) + { + a->AddDomainIntegrator(new mfem::CurlCurlIntegrator(muinv_func)); + a->AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func)); + } + else + { + a->AddDomainIntegrator(new mfem::MixedGradGradIntegrator(epsilon_func)); + } + // XX TODO: Partial assembly option? + a->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + a->Assemble(0); + a->Finalize(0); + auto A_l = std::make_unique(std::move(a), fespace_l); + A_l->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); + if (s == 0) + { + A_mg->AddOperator(std::move(A_l)); + } + else + { + A_mg->AddAuxiliaryOperator(std::move(A_l)); + } } - // a.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - a.Assemble(); - a.Finalize(); - mfem::HypreParMatrix *hA = a.ParallelAssemble(); - hA->EliminateBC(dbc_tdof_list_l, mfem::Operator::DiagonalPolicy::DIAG_ONE); - A_.emplace_back(hA); } + A = std::move(A_mg); } - // The system matrix for the projection is real and SPD. For the coarse-level AMG solve, - // we don't use an exact solve on the coarsest level. - auto ams = std::make_unique(nd_fespaces.GetFESpaceAtLevel(0), - &h1_fespaces.GetFESpaceAtLevel(0), 1, 1, 1, - false, false, 0); - auto gmg = std::make_unique(std::move(ams), dbc_marker, - nd_fespaces, &h1_fespaces, 1, 1, 2); - gmg->SetOperator(A, &AuxA); - pc = std::move(gmg); + // The system matrix K + M is real and SPD. We use Hypre's AMS solver as the coarse-level + // multigrid solve. + auto ams = std::make_unique>(std::make_unique( + nd_fespaces.GetFESpaceAtLevel(0), h1_fespaces.GetFESpaceAtLevel(0), 1, 1, 1, false, + false, 0)); + auto gmg = std::make_unique>( + std::move(ams), nd_fespaces, &h1_fespaces, 1, 1, 2); - ksp = std::make_unique(nd_fespaces.GetFinestFESpace().GetComm()); - ksp->SetRelTol(tol); - ksp->SetMaxIter(max_it); - ksp->SetPrintLevel(print); - ksp->SetOperator(*A.back()); - ksp->SetPreconditioner(*pc); + auto pcg = + std::make_unique>(nd_fespaces.GetFinestFESpace().GetComm(), print); + pcg->SetInitialGuess(false); + pcg->SetRelTol(tol); + pcg->SetMaxIter(max_it); - xr.SetSize(height); - xi.SetSize(height); - yr.SetSize(height); - yi.SetSize(height); + ksp = std::make_unique(std::move(pcg), std::move(gmg)); + ksp->SetOperators(*A, *A); } } // namespace palace diff --git a/palace/linalg/curlcurl.hpp b/palace/linalg/curlcurl.hpp index a71e25257..f36280d5c 100644 --- a/palace/linalg/curlcurl.hpp +++ b/palace/linalg/curlcurl.hpp @@ -5,8 +5,19 @@ #define PALACE_LINALG_CURL_CURL_HPP #include -#include -#include "linalg/petsc.hpp" +#include +#include "linalg/ksp.hpp" +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" + +namespace mfem +{ + +template +class Array; +class ParFiniteElementSpaceHierarchy; + +} // namespace mfem namespace palace { @@ -14,40 +25,34 @@ namespace palace class MaterialOperator; // -// This solver implements a solver for the operator K + M in the Nedelec space. +// This solver implements a solver for the operator K + M in a Nedelec space. // -class CurlCurlSolver : public mfem::Solver +class CurlCurlMassSolver { private: - // H(curl) norm operator A = K + M. - std::vector> A, AuxA; - - // Linear solver and preconditioner for the linear system A y = x; - std::unique_ptr ksp; - std::unique_ptr pc; + // H(curl) norm operator A = K + M and its projection Gᵀ A G. + std::unique_ptr A; - // Workspace objects for solver application. - mutable mfem::Vector xr, xi, yr, yi; + // Linear solver for the linear system A y = x; + std::unique_ptr ksp; public: - CurlCurlSolver(const MaterialOperator &mat_op, const mfem::Array &dbc_marker, - mfem::ParFiniteElementSpaceHierarchy &nd_fespaces, - mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, double tol, int max_it, - int print); + CurlCurlMassSolver(const MaterialOperator &mat_op, + mfem::ParFiniteElementSpaceHierarchy &nd_fespaces, + mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, + const std::vector> &nd_dbc_tdof_lists, + const std::vector> &h1_dbc_tdof_lists, double tol, + int max_it, int print); + + const Operator &GetOperator() { return *A; } - // Operator is set in constructor. - void SetOperator(const mfem::Operator &op) override {} + void Mult(const Vector &x, Vector &y) const { ksp->Mult(x, y); } - // Application of the solver. - void Mult(const mfem::Vector &x, mfem::Vector &y) const override { ksp->Mult(x, y); } - void Mult(const petsc::PetscParVector &x, petsc::PetscParVector &y) const + void Mult(const ComplexVector &x, ComplexVector &y) { - x.GetToVectors(xr, xi); - Mult(xr, yr); - Mult(xi, yi); - y.SetFromVectors(yr, yi); + Mult(x.Real(), y.Real()); + Mult(x.Imag(), y.Imag()); } - using mfem::Operator::Mult; }; } // namespace palace diff --git a/palace/linalg/distrelaxation.cpp b/palace/linalg/distrelaxation.cpp index 06f09c222..4e4cd0e5d 100644 --- a/palace/linalg/distrelaxation.cpp +++ b/palace/linalg/distrelaxation.cpp @@ -3,53 +3,149 @@ #include "distrelaxation.hpp" +#include +#include #include "linalg/chebyshev.hpp" +#include "linalg/rap.hpp" namespace palace { -DistRelaxationSmoother::DistRelaxationSmoother(mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace &h1_fespace, - const mfem::Array &dbc_marker, - int smooth_it, int cheby_smooth_it, - int cheby_order) - : mfem::Solver(), A(nullptr), A_G(nullptr), pc_it(smooth_it) +template +DistRelaxationSmoother::DistRelaxationSmoother( + mfem::ParFiniteElementSpace &nd_fespace, mfem::ParFiniteElementSpace &h1_fespace, + int smooth_it, int cheby_smooth_it, int cheby_order) + : Solver(), pc_it(smooth_it), A(nullptr), A_G(nullptr), dbc_tdof_list_G(nullptr) { // Construct discrete gradient matrix for the auxiliary space. { - mfem::ParDiscreteLinearOperator grad(&h1_fespace, &nd_fespace); - grad.AddDomainInterpolator(new mfem::GradientInterpolator); - // grad.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - grad.Assemble(); - grad.Finalize(); - G.reset(grad.ParallelAssemble()); + // XX TODO: Partial assembly option? + auto grad = std::make_unique(&h1_fespace, &nd_fespace); + grad->AddDomainInterpolator(new mfem::GradientInterpolator); + grad->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + grad->Assemble(); + grad->Finalize(); + G = std::make_unique(std::move(grad), h1_fespace, nd_fespace, true); + // ParOperator RAP_G(std::move(grad), h1_fespace, nd_fespace, true); + // G = RAP_G.StealParallelAssemble(); } // Initialize smoothers. - mfem::Array nd_dbc_tdof_list; - nd_fespace.GetEssentialTrueDofs(dbc_marker, nd_dbc_tdof_list); - h1_fespace.GetEssentialTrueDofs(dbc_marker, h1_dbc_tdof_list); - B = std::make_unique(nd_fespace.GetComm(), nd_dbc_tdof_list, - cheby_smooth_it, cheby_order); - B_G = std::make_unique(h1_fespace.GetComm(), h1_dbc_tdof_list, - cheby_smooth_it, cheby_order); - B_G->iterative_mode = false; + B = std::make_unique>(cheby_smooth_it, cheby_order); + B_G = std::make_unique>(cheby_smooth_it, cheby_order); + B_G->SetInitialGuess(false); } -void DistRelaxationSmoother::SetOperator(const mfem::Operator &op, - const mfem::Operator &op_G) +template +void DistRelaxationSmoother::SetOperators(const OperType &op, + const OperType &op_G) { + using ParOperType = + typename std::conditional::value, + ComplexParOperator, ParOperator>::type; + + MFEM_VERIFY(op.Height() == G->Height() && op.Width() == G->Height() && + op_G.Height() == G->Width() && op_G.Width() == G->Width(), + "Invalid operator sizes for DistRelaxationSmoother!"); A = &op; A_G = &op_G; - MFEM_VERIFY(A->Height() == G->Height() && A->Width() == G->Height() && - A_G->Height() == G->Width() && A_G->Width() == G->Width(), - "Invalid operator sizes for DistRelaxationSmoother!"); - height = A->Height(); - width = A->Width(); + + const auto *PtAP_G = dynamic_cast(&op_G); + MFEM_VERIFY(PtAP_G, + "ChebyshevSmoother requires a ParOperator or ComplexParOperator operator!"); + dbc_tdof_list_G = PtAP_G->GetEssentialTrueDofs(); + + r.SetSize(op.Height()); + x_G.SetSize(op_G.Height()); + y_G.SetSize(op_G.Height()); // Set up smoothers for A and A_G. - B->SetOperator(*A); - B_G->SetOperator(*A_G); + B->SetOperator(op); + B_G->SetOperator(op_G); } +namespace +{ + +inline void RealAddMult(const Operator &op, const Vector &x, Vector &y) +{ + op.AddMult(x, y, 1.0); +} + +inline void RealAddMult(const Operator &op, const ComplexVector &x, ComplexVector &y) +{ + op.AddMult(x.Real(), y.Real(), 1.0); + op.AddMult(x.Imag(), y.Imag(), 1.0); +} + +inline void RealMultTranspose(const Operator &op, const Vector &x, Vector &y) +{ + op.MultTranspose(x, y); +} + +inline void RealMultTranspose(const Operator &op, const ComplexVector &x, ComplexVector &y) +{ + op.MultTranspose(x.Real(), y.Real()); + op.MultTranspose(x.Imag(), y.Imag()); +} + +} // namespace + +template +void DistRelaxationSmoother::Mult(const VecType &x, VecType &y) const +{ + // Apply smoother. + for (int it = 0; it < pc_it; it++) + { + // y = y + B (x - A y) + B->SetInitialGuess(this->initial_guess || it > 0); + B->Mult(x, y); + + // y = y + G B_G Gᵀ (x - A y) + A->Mult(y, r); + linalg::AXPBY(1.0, x, -1.0, r); + RealMultTranspose(*G, r, x_G); + if (dbc_tdof_list_G) + { + linalg::SetSubVector(x_G, *dbc_tdof_list_G, 0.0); + } + B_G->Mult(x_G, y_G); + RealAddMult(*G, y_G, y); + } +} + +template +void DistRelaxationSmoother::MultTranspose(const VecType &x, VecType &y) const +{ + // Apply transpose. + B->SetInitialGuess(true); + for (int it = 0; it < pc_it; it++) + { + // y = y + G B_Gᵀ Gᵀ (x - A y) + if (this->initial_guess || it > 0) + { + A->Mult(y, r); + linalg::AXPBY(1.0, x, -1.0, r); + RealMultTranspose(*G, r, x_G); + } + else + { + y = 0.0; + RealMultTranspose(*G, x, x_G); + } + if (dbc_tdof_list_G) + { + linalg::SetSubVector(x_G, *dbc_tdof_list_G, 0.0); + } + B_G->MultTranspose(x_G, y_G); + RealAddMult(*G, y_G, y); + + // y = y + Bᵀ (x - A y) + B->MultTranspose(x, y); + } +} + +template class DistRelaxationSmoother; +template class DistRelaxationSmoother; + } // namespace palace diff --git a/palace/linalg/distrelaxation.hpp b/palace/linalg/distrelaxation.hpp index 2616bd17e..0e56cb786 100644 --- a/palace/linalg/distrelaxation.hpp +++ b/palace/linalg/distrelaxation.hpp @@ -5,8 +5,18 @@ #define PALACE_LINALG_DIST_RELAXATION_SMOOTHER_HPP #include -#include -#include +#include "linalg/operator.hpp" +#include "linalg/solver.hpp" +#include "linalg/vector.hpp" + +namespace mfem +{ + +template +class Array; +class ParFiniteElementSpace; + +} // namespace mfem namespace palace { @@ -17,164 +27,44 @@ namespace palace // Reference: Hiptmair, Multigrid method for Maxwell's equations, SIAM J. Numer. Anal. // (1998). // -class DistRelaxationSmoother : public mfem::Solver +template +class DistRelaxationSmoother : public Solver { + using VecType = typename Solver::VecType; + private: - // System matrix and its projection G^T A G (not owned). - const mfem::Operator *A, *A_G; + // Number of smoother iterations. + const int pc_it; + + // System matrix and its projection GᵀAG (not owned). + const OperType *A, *A_G; + const mfem::Array *dbc_tdof_list_G; // Discrete gradient matrix. - std::unique_ptr G; + std::unique_ptr G; // Point smoother objects for each matrix. - mutable std::unique_ptr B; - std::unique_ptr B_G; + mutable std::unique_ptr> B; + std::unique_ptr> B_G; // Temporary vectors for smoother application. - mutable mfem::Vector r, x_G, y_G; - - // Dirichlet boundary conditions in the auxiliary space. - mfem::Array h1_dbc_tdof_list; - - // Number of smoother iterations. - const int pc_it; + mutable VecType r, x_G, y_G; public: DistRelaxationSmoother(mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace &h1_fespace, - const mfem::Array &dbc_marker, int smooth_it, + mfem::ParFiniteElementSpace &h1_fespace, int smooth_it, int cheby_smooth_it, int cheby_order); - void SetOperator(const mfem::Operator &op) override + void SetOperator(const OperType &op) override { MFEM_ABORT("SetOperator with a single operator is not implemented for " "DistRelaxationSmoother, use the two argument signature instead!"); } + void SetOperators(const OperType &op, const OperType &op_G); - void SetOperator(const mfem::Operator &op, const mfem::Operator &op_G); + void Mult(const VecType &x, VecType &y) const override; - void Mult(const mfem::Vector &x, mfem::Vector &y) const override - { - mfem::Array X(1); - mfem::Array Y(1); - X[0] = &x; - Y[0] = &y; - ArrayMult(X, Y); - } - - void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override - { - mfem::Array X(1); - mfem::Array Y(1); - X[0] = &x; - Y[0] = &y; - ArrayMultTranspose(X, Y); - } - - void ArrayMult(const mfem::Array &X, - mfem::Array &Y) const override - { - // Initialize. - const int nrhs = X.Size(); - mfem::Array R(nrhs), X_G(nrhs), Y_G(nrhs); - std::vector rrefs(nrhs), xgrefs(nrhs), ygrefs(nrhs); - if (nrhs * height != r.Size()) - { - r.SetSize(nrhs * height); - x_G.SetSize(nrhs * A_G->Height()); - y_G.SetSize(nrhs * A_G->Height()); - } - for (int j = 0; j < nrhs; j++) - { - rrefs[j].MakeRef(r, j * height, height); - xgrefs[j].MakeRef(x_G, j * A_G->Height(), A_G->Height()); - ygrefs[j].MakeRef(y_G, j * A_G->Height(), A_G->Height()); - R[j] = &rrefs[j]; - X_G[j] = &xgrefs[j]; - Y_G[j] = &ygrefs[j]; - } - - // Apply smoother. - for (int it = 0; it < pc_it; it++) - { - // y = y + B (x - A y) - B->iterative_mode = (iterative_mode || it > 0); - B->ArrayMult(X, Y); - - // y = y + G B_G Gᵀ (x - A y) - A->ArrayMult(Y, R); - for (int j = 0; j < nrhs; j++) - { - subtract(*X[j], *R[j], *R[j]); - } - G->ArrayMultTranspose(R, X_G); - for (int j = 0; j < nrhs; j++) - { - X_G[j]->SetSubVector(h1_dbc_tdof_list, 0.0); - } - B_G->ArrayMult(X_G, Y_G); - G->ArrayAddMult(Y_G, Y, 1.0); - } - } - - void ArrayMultTranspose(const mfem::Array &X, - mfem::Array &Y) const override - { - // Initialize. - const int nrhs = X.Size(); - mfem::Array R(nrhs), X_G(nrhs), Y_G(nrhs); - std::vector rrefs(nrhs), xgrefs(nrhs), ygrefs(nrhs); - if (nrhs * height != r.Size()) - { - r.SetSize(nrhs * height); - x_G.SetSize(nrhs * A_G->Height()); - y_G.SetSize(nrhs * A_G->Height()); - } - for (int j = 0; j < nrhs; j++) - { - rrefs[j].MakeRef(r, j * height, height); - xgrefs[j].MakeRef(x_G, j * A_G->Height(), A_G->Height()); - ygrefs[j].MakeRef(y_G, j * A_G->Height(), A_G->Height()); - R[j] = &rrefs[j]; - X_G[j] = &xgrefs[j]; - Y_G[j] = &ygrefs[j]; - } - - // Apply transpose. - B->iterative_mode = true; - for (int it = 0; it < pc_it; it++) - { - // y = y + G B_Gᵀ Gᵀ (x - A y) - if (iterative_mode || it > 0) - { - A->ArrayMult(Y, R); - for (int j = 0; j < nrhs; j++) - { - subtract(*X[j], *R[j], *R[j]); - } - G->ArrayMultTranspose(R, X_G); - for (int j = 0; j < nrhs; j++) - { - X_G[j]->SetSubVector(h1_dbc_tdof_list, 0.0); - } - B_G->ArrayMultTranspose(X_G, Y_G); - G->ArrayAddMult(Y_G, Y, 1.0); - } - else - { - G->ArrayMultTranspose(X, X_G); - for (int j = 0; j < nrhs; j++) - { - X_G[j]->SetSubVector(h1_dbc_tdof_list, 0.0); - } - B_G->ArrayMultTranspose(X_G, Y_G); - G->ArrayMult(Y_G, Y); - } - - // y = y + Bᵀ (x - A y) - B->ArrayMultTranspose(X, Y); - } - } + void MultTranspose(const VecType &x, VecType &y) const override; }; } // namespace palace diff --git a/palace/linalg/divfree.cpp b/palace/linalg/divfree.cpp index 8c63d84a0..472e1674a 100644 --- a/palace/linalg/divfree.cpp +++ b/palace/linalg/divfree.cpp @@ -4,81 +4,86 @@ #include "divfree.hpp" #include +#include #include "fem/coefficient.hpp" #include "linalg/amg.hpp" #include "linalg/gmg.hpp" +#include "linalg/iterative.hpp" +#include "linalg/rap.hpp" #include "models/materialoperator.hpp" namespace palace { DivFreeSolver::DivFreeSolver(const MaterialOperator &mat_op, - const mfem::Array &bdr_marker, mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, double tol, - int max_it, int print) - : mfem::Solver(nd_fespace.GetTrueVSize()) + mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, + const std::vector> &h1_bdr_tdof_lists, + double tol, int max_it, int print) { - MaterialPropertyCoefficient epsilon_func(mat_op); - MFEM_VERIFY(bdr_marker.Size() == - h1_fespaces.GetFinestFESpace().GetParMesh()->bdr_attributes.Max(), - "Invalid boundary marker for divergence-free solver!"); - M.reserve(h1_fespaces.GetNumLevels()); - for (int l = 0; l < h1_fespaces.GetNumLevels(); l++) + constexpr auto MatType = MaterialPropertyType::PERMITTIVITY_REAL; + MaterialPropertyCoefficient epsilon_func(mat_op); { - auto &h1_fespace_l = h1_fespaces.GetFESpaceAtLevel(l); - mfem::Array dbc_tdof_list_l; - h1_fespace_l.GetEssentialTrueDofs(bdr_marker, dbc_tdof_list_l); - - mfem::ParBilinearForm m(&h1_fespace_l); - m.AddDomainIntegrator(new mfem::MixedGradGradIntegrator(epsilon_func)); - // m.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - m.Assemble(); - m.Finalize(); - mfem::HypreParMatrix *hM = m.ParallelAssemble(); - hM->EliminateBC(dbc_tdof_list_l, mfem::Operator::DiagonalPolicy::DIAG_ONE); - M.emplace_back(hM); + auto M_mg = std::make_unique(h1_fespaces.GetNumLevels()); + for (int l = 0; l < h1_fespaces.GetNumLevels(); l++) + { + auto &h1_fespace_l = h1_fespaces.GetFESpaceAtLevel(l); + auto m = std::make_unique(&h1_fespace_l); + m->AddDomainIntegrator(new mfem::MixedGradGradIntegrator(epsilon_func)); + // XX TODO: Partial assembly option? + m->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + m->Assemble(0); + m->Finalize(0); + auto M_l = std::make_unique(std::move(m), h1_fespace_l); + M_l->SetEssentialTrueDofs(h1_bdr_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); + M_mg->AddOperator(std::move(M_l)); + } + M = std::move(M_mg); } { - mfem::ParMixedBilinearForm weakDiv(&nd_fespace, &h1_fespaces.GetFinestFESpace()); - weakDiv.AddDomainIntegrator( + // XX TODO: Partial assembly option? + auto weakDiv = std::make_unique( + &nd_fespace, &h1_fespaces.GetFinestFESpace()); + weakDiv->AddDomainIntegrator( new mfem::MixedVectorWeakDivergenceIntegrator(epsilon_func)); - // weakDiv.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - weakDiv.Assemble(); - weakDiv.Finalize(); - WeakDiv.reset(weakDiv.ParallelAssemble()); + weakDiv->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + weakDiv->Assemble(); + weakDiv->Finalize(); + WeakDiv = std::make_unique(std::move(weakDiv), nd_fespace, + h1_fespaces.GetFinestFESpace(), false); } { - mfem::ParDiscreteLinearOperator grad(&h1_fespaces.GetFinestFESpace(), &nd_fespace); - grad.AddDomainInterpolator(new mfem::GradientInterpolator); - // grad.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - grad.Assemble(); - grad.Finalize(); - Grad.reset(grad.ParallelAssemble()); + // XX TODO: Partial assembly option? + auto grad = std::make_unique( + &h1_fespaces.GetFinestFESpace(), &nd_fespace); + grad->AddDomainInterpolator(new mfem::GradientInterpolator); + grad->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + grad->Assemble(); + grad->Finalize(); + Grad = std::make_unique(std::move(grad), h1_fespaces.GetFinestFESpace(), + nd_fespace, true); } - h1_fespaces.GetFinestFESpace().GetEssentialTrueDofs(bdr_marker, h1_bdr_tdof_list); + bdr_tdof_list_M = &h1_bdr_tdof_lists.back(); // The system matrix for the projection is real and SPD. For the coarse-level AMG solve, // we don't use an exact solve on the coarsest level. - auto amg = std::make_unique(); - amg->SetCoarseRelaxType(8); - auto gmg = std::make_unique(std::move(amg), bdr_marker, - h1_fespaces, nullptr, 1, 1, 2); - gmg->SetOperator(M); - pc = std::move(gmg); + auto amg = + std::make_unique>(std::make_unique(1, 1, 0)); + auto gmg = std::make_unique>( + std::move(amg), h1_fespaces, nullptr, 1, 1, 2); + + auto pcg = + std::make_unique>(h1_fespaces.GetFinestFESpace().GetComm(), print); + pcg->SetInitialGuess(false); + pcg->SetRelTol(tol); + pcg->SetAbsTol(std::numeric_limits::epsilon()); + pcg->SetMaxIter(max_it); - ksp = std::make_unique(h1_fespaces.GetFinestFESpace().GetComm()); - ksp->SetRelTol(tol); - ksp->SetAbsTol(std::numeric_limits::epsilon()); - ksp->SetMaxIter(max_it); - ksp->SetPrintLevel(print); - ksp->SetOperator(*M.back()); - ksp->SetPreconditioner(*pc); + ksp = std::make_unique(std::move(pcg), std::move(gmg)); + ksp->SetOperators(*M, *M); psi.SetSize(h1_fespaces.GetFinestFESpace().GetTrueVSize()); rhs.SetSize(h1_fespaces.GetFinestFESpace().GetTrueVSize()); - xr.SetSize(height); - xi.SetSize(height); } } // namespace palace diff --git a/palace/linalg/divfree.hpp b/palace/linalg/divfree.hpp index 69e2b3693..43a81a39f 100644 --- a/palace/linalg/divfree.hpp +++ b/palace/linalg/divfree.hpp @@ -5,8 +5,19 @@ #define PALACE_LINALG_DIV_FREE_HPP #include -#include -#include "linalg/petsc.hpp" +#include +#include "linalg/ksp.hpp" +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" + +namespace mfem +{ + +template +class Array; +class ParFiniteElementSpaceHierarchy; + +} // namespace mfem namespace palace { @@ -18,66 +29,61 @@ class MaterialOperator; // where G represents the discrete gradient matrix with columns spanning the nullspace of // the curl-curl operator. // -class DivFreeSolver : public mfem::Solver +class DivFreeSolver { private: // Operators for the divergence-free projection. - std::unique_ptr WeakDiv, Grad; - std::vector> M; + std::unique_ptr WeakDiv, Grad, M; + const mfem::Array *bdr_tdof_list_M; - // Linear solver and preconditioner for the projected linear system (Gᵀ M G) y = x. - std::unique_ptr ksp; - std::unique_ptr pc; + // Linear solver for the projected linear system (Gᵀ M G) y = x. + std::unique_ptr ksp; // Workspace objects for solver application. - mutable mfem::Vector psi, rhs, xr, xi; - - // Boundary condition dofs for essential BCs. - mfem::Array h1_bdr_tdof_list; + mutable Vector psi, rhs; public: - DivFreeSolver(const MaterialOperator &mat_op, const mfem::Array &bdr_marker, - mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, double tol, int max_it, - int print); - - // Operator is set in constructor. - void SetOperator(const mfem::Operator &op) override {} + DivFreeSolver(const MaterialOperator &mat_op, mfem::ParFiniteElementSpace &nd_fespace, + mfem::ParFiniteElementSpaceHierarchy &h1_fespaces, + const std::vector> &h1_bdr_tdof_lists, double tol, + int max_it, int print); // Given a vector of Nedelec dofs for an arbitrary vector field, compute the Nedelec dofs // of the irrotational portion of this vector field. The resulting vector will satisfy - // ∇ x x = 0. - void Mult(mfem::Vector &x) const + // ∇ x y = 0. + void Mult(Vector &y) const { - // Compute the divergence of x. - WeakDiv->Mult(x, rhs); + // Compute the divergence of y. + WeakDiv->Mult(y, rhs); // Apply essential BC and solve the linear system. - psi = 0.0; - rhs.SetSubVector(h1_bdr_tdof_list, 0.0); + if (bdr_tdof_list_M) + { + linalg::SetSubVector(rhs, *bdr_tdof_list_M, 0.0); + } ksp->Mult(rhs, psi); - // Compute the irrotational portion of x and subtract. - Grad->AddMult(psi, x, 1.0); + // Compute the irrotational portion of y and subtract. + Grad->AddMult(psi, y, 1.0); } - void Mult(const mfem::Vector &x, mfem::Vector &y) const override + + void Mult(const Vector &x, Vector &y) const { y = x; Mult(y); } - void Mult(petsc::PetscParVector &x) const + + void Mult(ComplexVector &y) const { - x.GetToVectors(xr, xi); - Mult(xr); - Mult(xi); - x.SetFromVectors(xr, xi); + Mult(y.Real()); + Mult(y.Imag()); } - void Mult(const petsc::PetscParVector &x, petsc::PetscParVector &y) const + + void Mult(const ComplexVector &x, ComplexVector &y) const { - y.Copy(x); + y = x; Mult(y); } - using mfem::Operator::Mult; }; } // namespace palace diff --git a/palace/linalg/eigen.hpp b/palace/linalg/eps.hpp similarity index 63% rename from palace/linalg/eigen.hpp rename to palace/linalg/eps.hpp index 3cddb3343..1754b19d8 100644 --- a/palace/linalg/eigen.hpp +++ b/palace/linalg/eps.hpp @@ -1,28 +1,24 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 -#ifndef PALACE_LINALG_EIGEN_HPP -#define PALACE_LINALG_EIGEN_HPP +#ifndef PALACE_LINALG_EPS_HPP +#define PALACE_LINALG_EPS_HPP + +#include +#include "linalg/ksp.hpp" +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" namespace palace { class DivFreeSolver; -class KspSolver; - -namespace petsc -{ - -class PetscParMatrix; -class PetscParVector; - -} // namespace petsc // // Pure abstract base class for solving generalized linear eigenvalue problems problems or // quadratic polynomial eigenvalue problems. // -class EigenSolverBase +class EigenvalueSolver { public: enum class ScaleType @@ -51,65 +47,66 @@ class EigenSolverBase BACKWARD }; - EigenSolverBase() = default; - virtual ~EigenSolverBase() = default; +public: + EigenvalueSolver() = default; + virtual ~EigenvalueSolver() = default; // Set operators for the generalized eigenvalue problem or for the quadratic polynomial // eigenvalue problem. - virtual void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &M, + virtual void SetOperators(const ComplexOperator &K, const ComplexOperator &M, ScaleType type) = 0; - virtual void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, ScaleType type) = 0; + virtual void SetOperators(const ComplexOperator &K, const ComplexOperator &C, + const ComplexOperator &M, ScaleType type) = 0; // For the linear generalized case, the linear solver should be configured to compute the // action of M⁻¹ (with no spectral transformation) or (K - σ M)⁻¹. For the quadratic // case, the linear solver should be configured to compute the action of M⁻¹ (with no // spectral transformation) or P(σ)⁻¹. - virtual void SetLinearSolver(const KspSolver &ksp) = 0; + virtual void SetLinearSolver(const ComplexKspSolver &ksp) = 0; - // Set the projection operator for the divergence-free constraint. - virtual void SetProjector(const DivFreeSolver &divfree) = 0; + // Set the projection operator for enforcing the divergence-free constraint. + virtual void SetDivFreeProjector(const DivFreeSolver &divfree) = 0; + + // Set optional B matrix used for weighted inner products. This must be set explicitly + // even for generalized problems, otherwise the identity will be used. + virtual void SetBMat(const Operator &B) = 0; // Get scaling factors used by the solver. virtual double GetScalingGamma() const = 0; virtual double GetScalingDelta() const = 0; // Set the number of required eigenmodes. - virtual void SetNumModes(int numeig, int numvec = 0) = 0; + virtual void SetNumModes(int num_eig, int num_vec = 0) = 0; // Set solver tolerance. virtual void SetTol(double tol) = 0; // Set maximum number of Arnoldi update iterations. - virtual void SetMaxIter(int maxits) = 0; + virtual void SetMaxIter(int max_it) = 0; // Set target spectrum for the eigensolver. When a spectral transformation is used, this // applies to the spectrum of the shifted operator. virtual void SetWhichEigenpairs(WhichType type) = 0; // Set shift-and-invert spectral transformation. - virtual void SetShiftInvert(double tr, double ti, bool precond = false) = 0; - - // Set optional B matrix used for weighted inner products. This must be set explicitly - // even for generalized problems, otherwise the identity will be used. - virtual void SetBMat(const petsc::PetscParMatrix &B) = 0; + virtual void SetShiftInvert(std::complex s, bool precond = false) = 0; // Set an initial vector for the solution subspace. - virtual void SetInitialSpace(const petsc::PetscParVector &v) = 0; + virtual void SetInitialSpace(const ComplexVector &v) = 0; // Solve the eigenvalue problem. Returns the number of converged eigenvalues. virtual int Solve() = 0; // Get the corresponding eigenvalue. - virtual void GetEigenvalue(int i, double &eigr, double &eigi) const = 0; + virtual std::complex GetEigenvalue(int i) const = 0; // Get the corresponding eigenvector. - virtual void GetEigenvector(int i, petsc::PetscParVector &x) const = 0; + virtual void GetEigenvector(int i, ComplexVector &x) const = 0; // Get the corresponding eigenpair error. - virtual void GetError(int i, ErrorType type, double &err) const = 0; + virtual double GetError(int i, ErrorType type) const = 0; }; } // namespace palace -#endif // PALACE_LINALG_EIGEN_HPP +#endif // PALACE_LINALG_EPS_HPP diff --git a/palace/linalg/feast.cpp b/palace/linalg/feast.cpp deleted file mode 100644 index 24d1d7a6c..000000000 --- a/palace/linalg/feast.cpp +++ /dev/null @@ -1,1294 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#include "feast.hpp" - -#if defined(PALACE_WITH_SLEPC) - -#include -#include -#include -#include -#include -#include -#include "linalg/divfree.hpp" -#include "linalg/ksp.hpp" -#include "linalg/pc.hpp" -#include "models/spaceoperator.hpp" -#include "utils/communication.hpp" -#include "utils/iodata.hpp" - -static PetscErrorCode __mat_apply_FEAST_EPS(Mat, Vec, Vec); -static PetscErrorCode __mat_apply_FEAST_PEP(Mat, Vec, Vec); - -namespace palace::feast -{ - -namespace internal -{ - -// Linear solver helper class - -class FeastLinearSolver -{ -public: - PetscScalar zk, wk; - KspSolver ksp; - KspPreconditioner pc; - const petsc::PetscParMatrix *opK, *opC, *opM; // Reference to EVP operators (not owned) - -private: - SpaceOperator &spaceop; // Reference to spatial discretization (not owned) - std::unique_ptr A; - std::vector> P, AuxP; - -public: - FeastLinearSolver(int k, MPI_Comm comm, const IoData &iodata, SpaceOperator &sp) - : zk(0.0), wk(0.0), ksp(comm, iodata, "ksp" + std::to_string(k + 1) + "_"), - pc(iodata, sp.GetDbcMarker(), sp.GetNDSpaces(), &sp.GetH1Spaces()), spaceop(sp) - { - ksp.SetTabLevel(1); - ksp.SetPrintOptions(false); - ksp.SetPreconditioner(pc); - opK = opC = opM = nullptr; - } - - void SetOperators(PetscScalar z, PetscScalar w, const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &M) - { - zk = z; - wk = w; - opK = &K; - opM = &M; - { - Mat A_; - MPI_Comm comm = K.GetComm(); - PetscInt n = K.GetNumRows(); - PalacePetscCall( - MatCreateShell(comm, n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A_)); - PalacePetscCall( - MatShellSetOperation(A_, MATOP_MULT, - (void (*)()) static_cast( - &__mat_apply_FEAST_EPS))); - A = std::make_unique(A_, false); // Inherits the PETSc Mat - ksp.SetOperator(*A); - } - const double sigma = PetscSqrtReal(PetscAbsScalar(zk)); - constexpr bool print = false; - spaceop.GetPreconditionerMatrix(sigma, P, AuxP, print); - pc.SetOperator(P, &AuxP); - } - - void SetOperators(PetscScalar z, PetscScalar w, const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &C, const petsc::PetscParMatrix &M, - KspPreconditioner *op = nullptr) - { - zk = z; - wk = w; - opK = &K; - opC = &C; - opM = &M; - { - Mat A_; - MPI_Comm comm = K.GetComm(); - PetscInt n = K.GetNumRows(); - PalacePetscCall( - MatCreateShell(comm, n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A_)); - PalacePetscCall( - MatShellSetOperation(A_, MATOP_MULT, - (void (*)()) static_cast( - &__mat_apply_FEAST_PEP))); - A = std::make_unique(A_, false); // Inherits the PETSc Mat - ksp.SetOperator(*A); - } - const double sigma = PetscAbsScalar(zk); - constexpr bool print = false; - spaceop.GetPreconditionerMatrix(sigma, P, AuxP, print); - pc.SetOperator(P, &AuxP); - } - - void Mult(const PetscScalar *eig, const petsc::PetscDenseMatrix &X, - const petsc::PetscDenseMatrix &R, petsc::PetscDenseMatrix &Q, - petsc::PetscParVector &v, bool *converged, PetscReal gamma) const - { - // Solve P(zₖ) Qₖ = R, Q += wₖ (X - Qₖ) (zₖ I - Λ)⁻¹ (residual-inverse iteration). Note: - // Q may have g.t. m0 columns, but we just use the first m0 for the result (X should - // have exactly m0 columns). - PetscInt m0 = X.GetGlobalNumCols(); - PetscInt M = Q.GetGlobalNumCols() / (2 * m0); - MFEM_VERIFY(M == 1 || M == 2, - "FEAST eigensolver only supports up to 2 subspace moments!"); - for (PetscInt j = 0; j < m0; j++) - { - const petsc::PetscParVector x = X.GetColumnRead(j); - if (converged && converged[j]) - { - // When R[j] is converged, Q[j] += wₖ/(zₖ - λₖ) X[j] (with Qₖ[j] = 0) . - v.AXPBY(wk / (zk / gamma - eig[j]), x, 0.0); - } - else - { - const petsc::PetscParVector r = R.GetColumnRead(j); - ksp.Mult(r, v); - v.AXPBY(wk / (zk / gamma - eig[j]), x, -wk / (zk / gamma - eig[j])); - R.RestoreColumnRead(j, r); - } - X.RestoreColumnRead(j, x); - - petsc::PetscParVector q = Q.GetColumn(j); - q.AXPY(1.0, v); - Q.RestoreColumn(j, q); - if (M > 1) - { - petsc::PetscParVector q = Q.GetColumn(j + m0); - q.AXPY(zk / gamma, v); - Q.RestoreColumn(j + m0, q); - } - } - } - - PetscScalar Mult(const petsc::PetscDenseMatrix &X, petsc::PetscParVector &r, - petsc::PetscParVector &v) const - { - // Solve P(zₖ) Qₖ = P'(zₖ) X, sum += wₖ tr(Xᵀ Qₖ) for estimating the eigenvalue count - // inside of the contour. - PetscInt m0 = X.GetGlobalNumCols(); - PetscScalar sum = 0.0; - for (PetscInt j = 0; j < m0; j++) - { - const petsc::PetscParVector x = X.GetColumnRead(j); - opM->Mult(x, r); - if (opC) - { - r.Scale(zk); - opC->MultAdd(x, r); - } - ksp.Mult(r, v); - sum += x.TransposeDot(v); - X.RestoreColumnRead(j, x); - } - return wk * sum; - } -}; - -} // namespace internal - -// Base class methods - -FeastEigenSolver::FeastEigenSolver(MPI_Comm comm, const IoData &iodata, - SpaceOperator &spaceop, int np, int print_lvl) -{ - // Initialization. - print = print_lvl; - info = 0; - nev = m0 = mQ = 0; - M = iodata.solver.eigenmode.feast_moments; - MFEM_VERIFY(M == 1 || M == 2, - "FEAST eigensolver only supports up to 2 subspace moments!"); - rtol = 0.0; - max_it = 0; - gamma = delta = 1.0; - bl = tr = 0.0; - real_threshold = imag_threshold = false; - - eig = nullptr; - perm = nullptr; - X = nullptr; - res = nullptr; - r0 = nullptr; - opProj = nullptr; - opB = nullptr; - - // Construct the linear solvers for each quadrature point. - opInv.reserve(np); - for (int k = 0; k < np; k++) - { - opInv.emplace_back(k, comm, iodata, spaceop); - } -} - -FeastEigenSolver::~FeastEigenSolver() -{ - delete[] eig; - delete[] perm; - delete[] res; - delete X; - delete r0; -} - -void FeastEigenSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) -{ - MFEM_ABORT("SetOperators not defined for base class FeastEigenSolver!"); -} - -void FeastEigenSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) -{ - MFEM_ABORT("SetOperators not defined for base class FeastEigenSolver!"); -} - -void FeastEigenSolver::SetProjector(const DivFreeSolver &divfree) -{ - opProj = &divfree; -} - -void FeastEigenSolver::SetNumModes(int numeig, int numvec) -{ - if (nev > 0 && numeig != nev) - { - delete[] eig; - delete[] perm; - delete[] res; - eig = nullptr; - perm = nullptr; - res = nullptr; - } - if (m0 > 0 && numvec != m0) - { - delete X; - X = nullptr; - } - nev = numeig; - if (numvec > 0) - { - m0 = numvec; - } - else - { - if (nev <= 3) - { - m0 = std::max(nev + 2, 2 * nev); // Just a guess for subspace dimension - } - else - { - m0 = std::max(nev + 3, nev + (nev + 1) / 2); - } - } - mQ = 2 * M * m0; // Real-valued basis splitting leads to factor of 2 -} - -void FeastEigenSolver::SetTol(double tol) -{ - rtol = tol; -} - -void FeastEigenSolver::SetMaxIter(int maxits) -{ - max_it = maxits; -} - -void FeastEigenSolver::SetContour(double blr, double bli, double trr, double tri, - bool filter_small_real, bool filter_small_imag) -{ - MFEM_VERIFY(blr <= trr && bli <= tri, - "Integration contour must be defined by bottom-left and top-right " - "points in the complex plane!"); - bl = blr + PETSC_i * bli; - tr = trr + PETSC_i * tri; - real_threshold = filter_small_real; - imag_threshold = filter_small_imag; -} - -void FeastEigenSolver::SetBMat(const petsc::PetscParMatrix &B) -{ - opB = &B; -} - -void FeastEigenSolver::SetInitialSpace(const petsc::PetscParVector &v) -{ - if (!r0) - { - r0 = new petsc::PetscParVector(v); - } - else - { - MFEM_VERIFY(v.GetSize() == r0->GetSize(), - "Invalid modification of eigenvalue problem size!"); - r0->Copy(v); - } - info = 1; -} - -int FeastEigenSolver::SolveInternal(RG rg) -{ - // Allocate space for subspace and residuals. R is constructed with mQ columns for - // computing products of form R = A Q during projection. - MFEM_VERIFY(X && X->GetGlobalNumCols() == m0, - "Unexpected number of eigenvector columns in FEAST solver!"); - MPI_Comm comm = X->GetComm(); - PetscInt n = X->GetNumRows(); - petsc::PetscDenseMatrix R(comm, n, PETSC_DECIDE, PETSC_DECIDE, mQ, nullptr); - petsc::PetscDenseMatrix Q(comm, n, PETSC_DECIDE, PETSC_DECIDE, mQ, nullptr); - - // Allocate other workspace variables. - PetscInt *inside = new PetscInt[m0]; - bool *converged = new bool[m0]; - if (!eig) - { - eig = new PetscScalar[m0]; - perm = new PetscInt[m0]; - res = new PetscReal[m0]; - } - for (PetscInt j = 0; j < m0; j++) - { - res[j] = -1.0; - } - mfem::Vector qr(n), qi(n); - -#if 0 - // XX TODO: Stochastic estimates - bool est_stochastic = true; - if (est_stochastic) - { - X->SetRandomReal(0, m0); - if (info) - { - for (PetscInt j = 0; j < m0; j++) - { - // Ensure homogeneous Dirichlet BC are satisfied by the subspace. - petsc::PetscParVector x = X->GetColumn(j); - x.PointwiseMult(*r0, false); - X->RestoreColumn(j, x); - } - } - X->SetRandomSign(0, m0, true); - - PetscScalar sum = 0; - petsc::PetscParVector r = R.GetColumn(0); // Just for workspace - for (const auto &op : opInv) - { - sum += op.Mult(*X, r, *r0); - } - R.RestoreColumn(0, r); - PetscInt m = (PetscInt)PetscCeilReal(PetscAbsScalar(sum)/(PetscReal)m0); - - // Debug - Mpi::Print("Eigenvalue estimate: {:d}\n", m); - } -#endif - - // Initialize the subspace. - Q.SetRandom(0, mQ / 2); - if (info) - { - petsc::PetscParVector q = Q.GetColumn(0); - q.Copy(*r0); - Q.RestoreColumn(0, q); - for (PetscInt j = 1; j < mQ / 2; j++) - { - // Ensure homogeneous Dirichlet BC are satisfied by the starting subspace. - petsc::PetscParVector q = Q.GetColumn(j); - q.PointwiseMult(*r0, false); - Q.RestoreColumn(j, q); - } - } - - // Begin main FEAST loop. - int it = 0, nconv, ninside; - while (true) - { - // Orthonormalize the (real-valued) basis Q. - { - bool mgs = false, cgs2 = true; - for (PetscInt j = 0; j < mQ / 2; j++) - { - petsc::PetscParVector q1 = Q.GetColumn(j); - q1.GetToVectors(qr, qi); - if (opProj) - { - opProj->Mult(qr); - opProj->Mult(qi); - } - q1.SetFromVector(qr); - Q.RestoreColumn(j, q1); - - petsc::PetscParVector q2 = Q.GetColumn(j + mQ / 2); - q2.SetFromVector(qi); - Q.RestoreColumn(j + mQ / 2, q2); - } - for (PetscInt j = 0; j < mQ; j++) - { - if (opB) - { - Q.OrthonormalizeColumn(j, mgs, cgs2, *opB, *r0); - } - else - { - Q.OrthonormalizeColumn(j, mgs, cgs2); - } - } - } - - // Form and solve the projected EVP. Select the m0 best eigenpair candidates and - // reconstruct the full-dimensional eigenvectors. - SolveProjectedProblem(Q, R, *X, eig); - - // Update the eigenpair residuals and check convergence. Residual calculation and - // convergence tests occur in the unscaled space. - nconv = ninside = 0; - bool check = true; - PetscReal rmin = mfem::infinity(), rmax = 0.0; - PetscInt jmin = -1, jmax = -1; - if (rg) - { - PalacePetscCall(RGCheckInside(rg, m0, eig, nullptr, inside)); - } - else - { - for (PetscInt j = 0; j < m0; j++) - { - inside[j] = true; - } - } - for (PetscInt j = 0; j < m0; j++) - { - PetscScalar sigma = eig[j] * gamma; - petsc::PetscParVector x = X->GetColumn(j); - petsc::PetscParVector r = R.GetColumn(j); - if (opB) - { - x.Normalize(*opB, *r0); - } - else - { - x.Normalize(); - } - GetResidual(sigma, x, r); - PetscReal res = r.Norml2() / (x.Norml2() * PetscAbsScalar(sigma)); - // PetscReal res = r.Norml2()/x.Norml2(); - X->RestoreColumn(j, x); - R.RestoreColumn(j, r); - if (res < rtol) - { - // Mark converged even for eigenvalues outside the contour. - converged[j] = true; - nconv++; - if (res > rmax) - { - rmax = res; - jmax = j; - } - } - else - { - converged[j] = false; - if (res < rmin) - { - rmin = res; - jmin = j; - } - } - if (inside[j] >= 0) - { - ninside++; - if (!converged[j]) - { - check = false; // Only finish when inside eigenvalues are converged - } - } - - // Debug - // Mpi::Print(comm, " res[{:d}] = {:e} (eig = {:+e}{:+e}i, inside = {:d})\n", - // j, res, PetscRealPart(sigma), - // PetscImaginaryPart(sigma), inside[j]); - } - if (print > 0) - { - if (ninside > 0 || nconv > 0) - { - if (jmin >= 0) - { - Mpi::Print(comm, - " {:d} FEAST inside={:d} converged={:d} first " - "unconverged value (error) {:+.3e}{:+.3e}i ({:.6e})\n", - it, ninside, nconv, PetscRealPart(eig[jmin] * gamma), - PetscImaginaryPart(eig[jmin] * gamma), rmin); - } - else - { - Mpi::Print(comm, - " {:d} FEAST inside={:d} converged={:d} last " - "converged value (error) {:+.3e}{:+.3e}i ({:.6e})\n", - it, ninside, nconv, PetscRealPart(eig[jmax] * gamma), - PetscImaginaryPart(eig[jmax] * gamma), rmax); - } - } - else - { - Mpi::Print(comm, " {:d} FEAST inside=0\n", it); - } - } - // Check convergence: All inside must be converged + any outside if user specified nev - // too large. - if ((check && nconv >= nev) || it == max_it) - { - break; - } - - // Update subspace with contour integral (accumulates to first M*m0 columns of Q). - Q.Scale(0.0); - for (const auto &op : opInv) - { - op.Mult(eig, *X, R, Q, *r0, converged, gamma); - } - it++; - } - - // Print some log information. - if (print > 0) - { - Mpi::Print(comm, - "\n FEAST {} eigensolve {} ({:d} eigenpairs); iterations {:d}\n" - " Total number of linear systems solved: {:d}\n" - " Total number of linear solver iterations: {:d}\n", - GetName(), (it == max_it) ? "finished" : "converged", nconv, it, - GetTotalKspMult(), GetTotalKspIter()); - } - if (it == max_it) - { - Mpi::Warning(comm, - "FEAST eigenvalue solver reached maximum {:d} " - "iterations!\nFound {:d} converged eigenvales of requested {:d}!\n", - it, nconv, nev); - } - - // Unscale and sort the eigenvalues in ascending order. - auto CompareAbs = [converged, this](const PetscInt &l, const PetscInt &r) - { - if (!converged[l] && converged[r]) - { - return false; - } - else if (converged[l] && !converged[r]) - { - return true; - } - return (PetscAbsScalar(eig[l]) < PetscAbsScalar(eig[r])); - }; - for (PetscInt j = 0; j < m0; j++) - { - eig[j] = eig[j] * gamma; - perm[j] = j; - } - std::sort(perm, perm + m0, CompareAbs); - - // Cleanup. - delete[] inside; - delete[] converged; - - // Reset for next solve. - info = 0; - return nconv; -} - -void FeastEigenSolver::CheckParameters() -{ - MFEM_VERIFY(nev > 0, "Number of requested modes is not positive!"); - MFEM_VERIFY(rtol > 0.0, "Eigensolver tolerance is not positive!"); - MFEM_VERIFY(!(bl == 0.0 && tr == 0.0), "Integration contour has not been defined!"); - if (max_it <= 0) - { - max_it = 15; - } -} - -RG FeastEigenSolver::ConfigureRG(PetscScalar *&z, PetscScalar *&w) -{ - int np = static_cast(opInv.size()); - if (np == 1) - { - z = new PetscScalar[np]; - w = new PetscScalar[np]; - z[0] = 0.5 * (bl + tr) / gamma; // User should pass in bl = tr = target - w[0] = 1.0; - return nullptr; - } - else - { - RG rg; - PalacePetscCall(RGCreate(PETSC_COMM_SELF, &rg)); - MFEM_VERIFY(PetscRealPart(tr - bl) > 0.0 && PetscImaginaryPart(tr - bl) > 0.0, - "Contour must have nonzero and finite aspect ratio!"); - PetscScalar c = 0.5 * (bl + tr) / gamma; - PetscReal r = 0.5 * PetscRealPart(tr - bl) / gamma; - PetscReal vscale = 0.5 * PetscImaginaryPart(tr - bl) / (r * gamma); - PalacePetscCall(RGSetType(rg, RGELLIPSE)); - PalacePetscCall(RGEllipseSetParameters(rg, c, r, vscale)); - // MFEM_VERIFY(opInv.size() % 4 == 0, - // "Number of contour quadrature points for rectangular region - // must be evenly divisible by 4!"); - // PalacePetscCall(RGSetType(rg, RGINTERVAL)); - // PalacePetscCall(RGIntervalSetEndpoints(rg, PetscRealPart(bl)/gamma, - // PetscRealPart(tr)/gamma, // PetscImaginaryPart(bl)/gamma, - // PetscImaginaryPart(tr)/gamma)); - - z = new PetscScalar[np]; - w = new PetscScalar[np]; - if (PetscImaginaryPart(c) == 0.0 || PetscRealPart(c) == 0.0) - { - // Contour is symmetric about an axis and we place the first quadrature point at θ - // = -π/2 (imaginary-axis symmetry) or θ = π (real-axis symmetry). - PetscReal shift = (PetscRealPart(c) == 0.0) ? -0.5 * PETSC_PI : PETSC_PI; - for (int k = 0; k < np; k++) - { - PetscReal theta = 2.0 * PETSC_PI * k / (PetscReal)np + shift; - z[k] = c + r * (PetscCosReal(theta) + PETSC_i * vscale * PetscSinReal(theta)); - w[k] = r * (vscale * PetscCosReal(theta) + PETSC_i * PetscSinReal(theta)) / - (PetscReal)np; - } - } - else - { - PetscScalar *zn = new PetscScalar[np]; - PalacePetscCall(RGComputeQuadrature(rg, RG_QUADRULE_TRAPEZOIDAL, np, z, zn, w)); - delete[] zn; - } - return rg; - } -} - -PetscInt *FeastEigenSolver::SortEigenvalues(const PetscScalar *eig_, PetscInt m) const -{ - PetscReal rthresh = (real_threshold) ? 0.01 * PetscRealPart(bl) / gamma : 0.0; - PetscReal ithresh = (imag_threshold) ? 0.01 * PetscImaginaryPart(bl) / gamma : 0.0; - PetscScalar target = 0.5 * (bl + tr) / gamma; - PetscReal vscale = - (bl == tr) ? 1.0 : PetscImaginaryPart(tr - bl) / PetscRealPart(tr - bl); - auto CompareTargetAbs = - [eig_, rthresh, ithresh, target, vscale](const PetscInt &l, const PetscInt &r) - { - PetscReal lr = PetscAbsReal(PetscRealPart(eig_[l])); - PetscReal li = PetscAbsReal(PetscImaginaryPart(eig_[l])); - PetscReal rr = PetscAbsReal(PetscRealPart(eig_[r])); - PetscReal ri = PetscAbsReal(PetscImaginaryPart(eig_[r])); - if ((li < ithresh && ri >= ithresh) || (lr < rthresh && rr >= rthresh)) - { - return false; - } - else if ((li >= ithresh && ri < ithresh) || (lr >= rthresh && rr < rthresh)) - { - return true; - } - PetscScalar dl = eig_[l] - target; - PetscScalar dr = eig_[r] - target; - PetscReal vl = PetscRealPart(dl) * PetscRealPart(dl) + - PetscImaginaryPart(dl) * PetscImaginaryPart(dl) / (vscale * vscale); - PetscReal vr = PetscRealPart(dr) * PetscRealPart(dr) + - PetscImaginaryPart(dr) * PetscImaginaryPart(dr) / (vscale * vscale); - return (vl < vr); - }; - PetscInt *perm_ = new PetscInt[m]; - for (PetscInt i = 0; i < m; i++) - { - perm_[i] = i; - } - std::sort(perm_, perm_ + m, CompareTargetAbs); - return perm_; -} - -void FeastEigenSolver::BVMatProjectInternal(const petsc::PetscDenseMatrix &Q, - const petsc::PetscParMatrix &A, - petsc::PetscDenseMatrix &Ar, - petsc::PetscDenseMatrix &R, - PetscReal scale) const -{ - // Compute Ar = Qᴴ A Q. We assume Q is real and thus the result is complex symmetric if A - // is symmetric. Ar is replicated across all processes(sequential mQ x mQ matrix). - MFEM_VERIFY(A.GetSymmetric() && Ar.GetSymmetric(), - "BVMatProjectInternal is specialized for symmetric matrices!"); - MFEM_VERIFY(Q.GetGlobalNumCols() == mQ && R.GetGlobalNumCols() == mQ && - Ar.GetNumRows() == mQ && Ar.GetNumCols() == mQ, - "Unexpected number of basis columns in FEAST solver!"); - mfem::Vector qr(Q.GetNumRows()); - for (PetscInt j = 0; j < mQ; j++) - { - const petsc::PetscParVector q = Q.GetColumnRead(j); - petsc::PetscParVector r = R.GetColumn(j); - q.GetToVector(qr); - A.Mult(qr, r); - Q.RestoreColumnRead(j, q); - R.RestoreColumn(j, r); - } - PetscInt n = A.GetNumRows(); - const PetscScalar *pQ = Q.GetArrayRead(), *pR = R.GetArrayRead(); - petsc::PetscDenseMatrix locQ(n, mQ, const_cast(pQ)); - petsc::PetscDenseMatrix locR(n, mQ, const_cast(pR)); - locQ.MatTransposeMult(locR, Ar); // Qᴴ = Qᵀ - Q.RestoreArrayRead(pQ); - R.RestoreArrayRead(pR); - - // Global reduction over all processes. - PetscScalar *pAr = Ar.GetArray(); - Mpi::GlobalSum(mQ * mQ, pAr, Q.GetComm()); - Ar.RestoreArray(pAr); - Ar.Scale(scale); -} - -int FeastEigenSolver::GetTotalKspMult() const -{ - int ksp_mult = 0; - for (const auto &op : opInv) - { - ksp_mult += op.ksp.GetTotalNumMult(); - } - return ksp_mult; -} - -int FeastEigenSolver::GetTotalKspIter() const -{ - int ksp_it = 0; - for (const auto &op : opInv) - { - ksp_it += op.ksp.GetTotalNumIter(); - } - return ksp_it; -} - -void FeastEigenSolver::GetEigenvalue(int i, double &eigr, double &eigi) const -{ - MFEM_VERIFY(eig && i >= 0 && i < m0, - "Out of range eigenpair requested (i = " << i << ", m0 = " << m0 << ")!"); - const int &j = perm[i]; - eigr = PetscRealPart(eig[j]); - eigi = PetscImaginaryPart(eig[j]); -} - -void FeastEigenSolver::GetEigenvector(int i, petsc::PetscParVector &v) const -{ - MFEM_VERIFY(eig && i >= 0 && i < m0, - "Out of range eigenpair requested (i = " << i << ", m0 = " << m0 << ")!"); - const int &j = perm[i]; - const petsc::PetscParVector x = X->GetColumnRead(j); - v.Copy(x); - X->RestoreColumnRead(j, x); -} - -void FeastEigenSolver::GetError(int i, EigenSolverBase::ErrorType type, double &err) const -{ - MFEM_VERIFY(eig && i >= 0 && i < m0, - "Out of range eigenpair requested (i = " << i << ", m0 = " << m0 << ")!"); - const int &j = perm[i]; - if (res[j] <= 0.0) - { - const petsc::PetscParVector x = X->GetColumnRead(j); - GetResidual(eig[j], x, *r0); - res[j] = r0->Norml2() / x.Norml2(); - X->RestoreColumnRead(j, x); - } - switch (type) - { - case ErrorType::ABSOLUTE: - err = res[j]; - break; - case ErrorType::RELATIVE: - err = res[j] / PetscAbsScalar(eig[j]); - break; - case ErrorType::BACKWARD: - err = res[j] / GetBackwardScaling(eig[j]); - break; - } -} - -// EPS specific methods - -FeastEPSSolver::FeastEPSSolver(MPI_Comm comm, const IoData &iodata, SpaceOperator &spaceop, - int np, int print_lvl) - : FeastEigenSolver(comm, iodata, spaceop, np, print_lvl) -{ - opK = opM = nullptr; - normK = normM = 0.0; - AQ = BQ = XQ = XQ0 = nullptr; -} - -void FeastEPSSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) -{ - MFEM_VERIFY(!opK || opK->GetNumRows() == K.GetNumRows(), - "Invalid modification of eigenvalue problem size!"); - bool first = (opK == nullptr); - opK = &K; - opM = &M; - if (first && type != ScaleType::NONE) - { - normK = opK->Norm2(); - normM = opM->Norm2(); - MFEM_VERIFY(normK > 0.0 && normM > 0.0, "Invalid matrix norms for EPS scaling!"); - gamma = normK / normM; // Store γ² for linear problem - delta = 2.0 / normK; - } -} - -int FeastEPSSolver::Solve() -{ - // Check inputs. - CheckParameters(); - MFEM_VERIFY(opK && opM, "Operators are not set for FeastEPSSolver!"); - - // Allocate storage for eigenvectors. - MPI_Comm comm = opK->GetComm(); - if (!X) - { - X = new petsc::PetscDenseMatrix(comm, opK->GetNumRows(), PETSC_DECIDE, PETSC_DECIDE, m0, - nullptr); - } - if (!r0) - { - r0 = new petsc::PetscParVector(*opK); - } - - // Allocate sequential matrices for the projected generalized eigenvalue problems at each - // iteration, and associated eigenvectors. - AQ = new petsc::PetscDenseMatrix(mQ, mQ, nullptr); - AQ->CopySymmetry(*opK); - BQ = new petsc::PetscDenseMatrix(mQ, mQ, nullptr); - BQ->CopySymmetry(*opM); - XQ = new petsc::PetscDenseMatrix(mQ, mQ, nullptr); - XQ0 = new petsc::PetscDenseMatrix(mQ, m0, nullptr); - - // Create region object for integration contour and configure the linear solvers at each - // integration point. The linear solves use the unscaled space. - PetscScalar *z, *w; - RG rg = ConfigureRG(z, w); - Mpi::Print(comm, "Quadrature points for FEAST contour\n"); - for (int k = 0; k < static_cast(opInv.size()); k++) - { - Mpi::Print(comm, " {:d}: zₖ = {:+.3e}{:+3e}i\n", k + 1, PetscRealPart(z[k]) * gamma, - PetscImaginaryPart(z[k]) * gamma); - opInv[k].SetOperators(z[k] * gamma, w[k], *opK, *opM); -#if 0 - int l = 0; - for (; l < k; l++) - { - constexpr double atol = 1.0e-9; - if (PetscAbsReal(PetscAbsScalar(z[k]) - PetscAbsScalar(z[l])) < atol) - { - // Reuse preconditioner assembled for contour point with same real magnitude. - opInv[k].SetOperators(z[k] * gamma, w[k], *opK, *opM, opInv[l].pc); - break; - } - } - if (l == k) - { - opInv[k].SetOperators(z[k] * gamma, w[k], *opK, *opM); - } -#endif - } - Mpi::Print(comm, "\n"); - delete[] z; - delete[] w; - - // Solve the quadratic eigenvalue problem. - int nconv = SolveInternal(rg); - - // Cleanup. - PalacePetscCall(RGDestroy(&rg)); - delete AQ; - delete BQ; - delete XQ; - delete XQ0; - - return nconv; -} - -void FeastEPSSolver::SolveProjectedProblem(const petsc::PetscDenseMatrix &Q_, - petsc::PetscDenseMatrix &R_, - petsc::PetscDenseMatrix &X_, PetscScalar *eig_) -{ - // Form mQ x mQ projected matrices. - // AQ->Scale(0.0); - // BQ->Scale(0.0); - BVMatProjectInternal(Q_, *opK, *AQ, R_, delta); - BVMatProjectInternal(Q_, *opM, *BQ, R_, delta * gamma); - - // Solve projected EVP using LAPACK wrapper. - PetscBLASInt info, n, lwork, lrwork; - PetscScalar *work, *alpha, *beta; - PetscReal *rwork; - PetscBLASIntCast(mQ, &n); - lwork = 2 * n; - lrwork = 8 * n; - work = new PetscScalar[lwork]; - rwork = new PetscReal[lrwork]; - alpha = new PetscScalar[n]; - beta = new PetscScalar[n]; - - PetscScalar *pAQ = AQ->GetArray(); - PetscScalar *pBQ = BQ->GetArray(); - PetscScalar *pXQ = XQ->GetArray(); - LAPACKggev_("N", "V", &n, pAQ, &n, pBQ, &n, alpha, beta, nullptr, &n, pXQ, &n, work, - &lwork, rwork, &info); - AQ->RestoreArray(pAQ); - BQ->RestoreArray(pBQ); - XQ->RestoreArray(pXQ); - - // Sort eigenpairs by distance to center. - for (PetscBLASInt i = 0; i < n; i++) - { - alpha[i] /= beta[i]; - } - - // Debug - // Mpi::Print(Q_.GetComm(), "Before sort, eigenvalues:\n"); - // for (PetscBLASInt i = 0; i < n; i++) - // { - // Mpi::Print(Q_.GetComm(), " {:+e}{:+e}i\n", - // PetscRealPart(alpha[i]*gamma), - // PetscImaginaryPart(alpha[i]*gamma)); - // } - - PetscInt *sort = SortEigenvalues(alpha, n); - for (PetscInt i = 0; i < m0; i++) - { - eig_[i] = alpha[sort[i]]; - const petsc::PetscParVector xq = XQ->GetColumnRead(sort[i]); - petsc::PetscParVector xq0 = XQ0->GetColumn(i); - xq0.Copy(xq); - XQ->RestoreColumnRead(sort[i], xq); - XQ0->RestoreColumn(i, xq0); - } - - // Cleanup. - delete[] sort; - delete[] work; - delete[] rwork; - delete[] alpha; - delete[] beta; - - // Reconstruct the first m0 high-dimensional eigenvectors. - const PetscScalar *pQ = Q_.GetArrayRead(); - PetscScalar *pX = X_.GetArray(); - petsc::PetscDenseMatrix locQ(X_.GetNumRows(), mQ, const_cast(pQ)); - petsc::PetscDenseMatrix locX(X_.GetNumRows(), m0, pX); - locQ.MatMult(*XQ0, locX); - Q_.RestoreArrayRead(pQ); - X_.RestoreArray(pX); -} - -void FeastEPSSolver::GetResidual(PetscScalar eig_, const petsc::PetscParVector &x_, - petsc::PetscParVector &r_) const -{ - // r = (K - λ M) x for eigenvalue λ. - opM->Mult(x_, r_); - r_.Scale(-eig_); - opK->MultAdd(x_, r_); -} - -PetscReal FeastEPSSolver::GetBackwardScaling(PetscScalar eig_) const -{ - // Make sure not to use norms from scaling as this can be confusing if they are different. - // Note that SLEPc uses ||.||∞, not Frobenius. - if (normK <= 0.0) - { - normK = opK->NormInf(); - } - if (normM <= 0.0) - { - normM = opM->NormInf(); - } - return normK + PetscAbsScalar(eig_) * normM; -} - -// PEP specific methods - -FeastPEPSolver::FeastPEPSolver(MPI_Comm comm, const IoData &iodata, SpaceOperator &spaceop, - int np, int print_lvl) - : FeastEigenSolver(comm, iodata, spaceop, np, print_lvl) -{ - opK = opC = opM = nullptr; - normK = normC = normM = 0.0; - AQ = BQ = AQ0 = XQ = XQ0 = nullptr; -} - -void FeastPEPSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) -{ - MFEM_VERIFY(!opK || opK->GetNumRows() == K.GetNumRows(), - "Invalid modification of eigenvalue problem size!"); - bool first = (opK == nullptr); - opK = &K; - opC = &C; - opM = &M; - if (first && type != ScaleType::NONE) - { - normK = opK->Norm2(); - normC = opC->Norm2(); - normM = opM->Norm2(); - MFEM_VERIFY(normK > 0.0 && normC > 0.0 && normM > 0.0, - "Invalid matrix norms for PEP scaling!"); - gamma = std::sqrt(normK / normM); - delta = 2.0 / (normK + gamma * normC); - } -} - -int FeastPEPSolver::Solve() -{ - // Check inputs. - CheckParameters(); - MFEM_VERIFY(opK && opC && opM, "Operators are not set for FeastPEPSolver!"); - - // Allocate storage for eigenvectors. - MPI_Comm comm = opK->GetComm(); - if (!X) - { - X = new petsc::PetscDenseMatrix(comm, opK->GetNumRows(), PETSC_DECIDE, PETSC_DECIDE, m0, - nullptr); - } - if (!r0) - { - r0 = new petsc::PetscParVector(*opK); - } - - // Allocate sequential matrices for the projected linearized generalized eigenvalue - // problems at each iteration, and associated eigenvectors. - AQ = new petsc::PetscDenseMatrix(2 * mQ, 2 * mQ, nullptr); - BQ = new petsc::PetscDenseMatrix(2 * mQ, 2 * mQ, nullptr); - AQ0 = new petsc::PetscDenseMatrix(mQ, mQ, nullptr); - AQ0->SetSymmetric(opK->GetSymmetric() && opC->GetSymmetric() && opM->GetSymmetric()); - XQ = new petsc::PetscDenseMatrix(2 * mQ, 2 * mQ, nullptr); - XQ0 = new petsc::PetscDenseMatrix(mQ, m0, nullptr); - - // Create region object for integration contour and configure the linear solvers at each - // integration point. The linear solves use the unscaled space. - PetscScalar *z, *w; - RG rg = ConfigureRG(z, w); - Mpi::Print(comm, "Quadrature points for FEAST contour\n"); - for (int k = 0; k < static_cast(opInv.size()); k++) - { - Mpi::Print(comm, " {:d}: zₖ = {:+.3e}{:+.3e}i\n", k + 1, PetscRealPart(z[k]) * gamma, - PetscImaginaryPart(z[k]) * gamma); - opInv[k].SetOperators(z[k] * gamma, w[k], *opK, *opC, *opM); -#if 0 - int l = 0; - for (; l < k; l++) - { - constexpr double atol = 1.0e-9; - if (PetscAbsReal(PetscAbsScalar(z[k]) - PetscAbsScalar(z[l])) < atol) - { - // Reuse preconditioner assembled for contour point with same real magnitude. - opInv[k].SetOperators(z[k] * gamma, w[k], *opK, *opC, *opM, opInv[l].pc); - break; - } - } - if (l == k) - { - opInv[k].SetOperators(z[k] * gamma, w[k], *opK, *opC, *opM); - } -#endif - } - Mpi::Print(comm, "\n"); - delete[] z; - delete[] w; - - // Solve the quadratic eigenvalue problem. - int nconv = SolveInternal(rg); - - // Cleanup. - PalacePetscCall(RGDestroy(&rg)); - delete AQ; - delete BQ; - delete AQ0; - delete XQ; - delete XQ0; - - return nconv; -} - -void FeastPEPSolver::SolveProjectedProblem(const petsc::PetscDenseMatrix &Q_, - petsc::PetscDenseMatrix &R_, - petsc::PetscDenseMatrix &X_, PetscScalar *eig_) -{ - // Form mQ x mQ projected matrices and construct the canonincal linearization: - // L₀ = [ 0 I ] L₁ = [ I 0 ] - // [ -K -C ] , [ 0 M ] . - AQ->Scale(0.0); - BQ->Scale(0.0); - PetscScalar *pAQ = AQ->GetArray(); - PetscScalar *pBQ = BQ->GetArray(); - for (PetscInt i = 0; i < mQ; i++) - { - pAQ[i + 2 * mQ * (i + mQ)] = 1.0; - } - { - // AQ0->Scale(0.0); - BVMatProjectInternal(Q_, *opK, *AQ0, R_, delta); - - const PetscScalar *pAQ0 = AQ0->GetArrayRead(); - for (PetscInt j = 0; j < mQ; j++) - { - for (PetscInt i = 0; i < mQ; i++) - { - pAQ[i + mQ + 2 * mQ * j] = -pAQ0[i + mQ * j]; - } - } - AQ0->RestoreArrayRead(pAQ0); - } - { - // AQ0->Scale(0.0); - BVMatProjectInternal(Q_, *opC, *AQ0, R_, delta * gamma); - - const PetscScalar *pAQ0 = AQ0->GetArrayRead(); - for (PetscInt j = 0; j < mQ; j++) - { - for (PetscInt i = 0; i < mQ; i++) - { - pAQ[i + mQ + 2 * mQ * (j + mQ)] = -pAQ0[i + mQ * j]; - } - } - AQ0->RestoreArrayRead(pAQ0); - } - for (PetscInt i = 0; i < mQ; i++) - { - pBQ[i + 2 * mQ * i] = 1.0; - } - { - // AQ0->Scale(0.0); - BVMatProjectInternal(Q_, *opM, *AQ0, R_, delta * gamma * gamma); - - const PetscScalar *pAQ0 = AQ0->GetArrayRead(); - for (PetscInt j = 0; j < mQ; j++) - { - PalacePetscCall(PetscArraycpy(pBQ + mQ + 2 * mQ * (j + mQ), pAQ0 + mQ * j, mQ)); - } - AQ0->RestoreArrayRead(pAQ0); - } - - // Solve projected EVP using LAPACK wrapper. - PetscBLASInt info, n, lwork, lrwork; - PetscScalar *work, *alpha, *beta; - PetscReal *rwork; - PetscBLASIntCast(2 * mQ, &n); - lwork = 2 * n; - lrwork = 8 * n; - work = new PetscScalar[lwork]; - rwork = new PetscReal[lrwork]; - alpha = new PetscScalar[n]; - beta = new PetscScalar[n]; - - PetscScalar *pXQ = XQ->GetArray(); - LAPACKggev_("N", "V", &n, pAQ, &n, pBQ, &n, alpha, beta, nullptr, &n, pXQ, &n, work, - &lwork, rwork, &info); - AQ->RestoreArray(pAQ); - BQ->RestoreArray(pBQ); - XQ->RestoreArray(pXQ); - - // Sort eigenpairs by distance to center. From the linearization, we extract the - // eigenvectors from the top block and normalize later on. - for (PetscBLASInt i = 0; i < n; i++) - { - alpha[i] /= beta[i]; - } - - // Debug - // Mpi::Print(Q_.GetComm(), "Before sort, eigenvalues:\n"); - // for (PetscBLASInt i = 0; i < n; i++) - // { - // Mpi::Print(Q_.GetComm(), " {:+e}{:+e}i\n", - // PetscRealPart(alpha[i]*gamma), - // PetscImaginaryPart(alpha[i]*gamma)); - // } - - PetscInt *sort = SortEigenvalues(alpha, n); - for (PetscInt i = 0; i < m0; i++) - { - eig_[i] = alpha[sort[i]]; - const PetscScalar *pXQ = XQ->GetArrayRead(); - PetscScalar *pXQ0 = XQ0->GetArray(); - PalacePetscCall(PetscArraycpy(pXQ0 + mQ * i, pXQ + 2 * mQ * sort[i], mQ)); - XQ->RestoreArrayRead(pXQ); - XQ0->RestoreArray(pXQ0); - } - - // Cleanup. - delete[] sort; - delete[] work; - delete[] rwork; - delete[] alpha; - delete[] beta; - - // Reconstruct the first m0 high-dimensional eigenvectors. - const PetscScalar *pQ = Q_.GetArrayRead(); - PetscScalar *pX = X_.GetArray(); - petsc::PetscDenseMatrix locQ(X_.GetNumRows(), mQ, const_cast(pQ)); - petsc::PetscDenseMatrix locX(X_.GetNumRows(), m0, pX); - locQ.MatMult(*XQ0, locX); - Q_.RestoreArrayRead(pQ); - X_.RestoreArray(pX); -} - -void FeastPEPSolver::GetResidual(PetscScalar eig_, const petsc::PetscParVector &x_, - petsc::PetscParVector &r_) const -{ - // r = P(λ) x = (K + λ C + λ² M) x for eigenvalue λ. - opM->Mult(x_, r_); - r_.Scale(eig_); - opC->MultAdd(x_, r_); - r_.Scale(eig_); - opK->MultAdd(x_, r_); -} - -PetscReal FeastPEPSolver::GetBackwardScaling(PetscScalar eig_) const -{ - // Make sure not to use norms from scaling as this can be confusing if they are different. - // Note that SLEPc uses ||.||∞, not the 2-norm. - if (normK <= 0.0) - { - normK = opK->Norm2(); - } - if (normC <= 0.0) - { - normC = opC->Norm2(); - } - if (normM <= 0.0) - { - normM = opM->Norm2(); - } - PetscReal t = PetscAbsScalar(eig_); - return normK + t * normC + t * t * normM; -} - -} // namespace palace::feast - -PetscErrorCode __mat_apply_FEAST_EPS(Mat A, Vec x, Vec y) -{ - // Apply the operator: K - zₖ M . - palace::feast::internal::FeastLinearSolver *feast; - palace::petsc::PetscParVector xx(x, true), yy(y, true); - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&feast)); - MFEM_VERIFY(feast, "Invalid PETSc shell matrix context for FEAST!"); - { - feast->opM->Mult(xx, yy); - yy.Scale(-feast->zk); - feast->opK->MultAdd(xx, yy); - } - PetscFunctionReturn(0); -} - -PetscErrorCode __mat_apply_FEAST_PEP(Mat A, Vec x, Vec y) -{ - // Apply the operator: K + zₖ C + zₖ² M . - palace::feast::internal::FeastLinearSolver *feast; - palace::petsc::PetscParVector xx(x, true), yy(y, true); - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&feast)); - MFEM_VERIFY(feast, "Invalid PETSc shell matrix context for FEAST!"); - { - feast->opM->Mult(xx, yy); - yy.Scale(feast->zk); - feast->opC->MultAdd(xx, yy); - yy.Scale(feast->zk); - feast->opK->MultAdd(xx, yy); - } - PetscFunctionReturn(0); -} - -#endif diff --git a/palace/linalg/feast.hpp b/palace/linalg/feast.hpp deleted file mode 100644 index 63547c422..000000000 --- a/palace/linalg/feast.hpp +++ /dev/null @@ -1,287 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_LINALG_FEAST_HPP -#define PALACE_LINALG_FEAST_HPP - -#if defined(PALACE_WITH_SLEPC) - -#include "linalg/petsc.hpp" - -#if !defined(PETSC_USE_COMPLEX) -#error "FEAST eigenvalue solver requires PETSc built with complex scalars!" -#endif - -#include -#include "linalg/eigen.hpp" - -// Forward declarations of SLEPc objects. -typedef struct _p_RG *RG; - -namespace palace -{ - -class DivFreeSolver; -class IoData; -class KspSolver; -class SpaceOperator; - -namespace feast -{ - -namespace internal -{ - -class FeastLinearSolver; - -} // namespace internal - -// -// A native implementation of the FEAST eigenvalue solver, with residual- inverse iteration -// for linear and quadratic eigenvalue problems with complex- symmetric matrices. Depends -// on SLEPc for some functionality like quadrature and solving projected the eigenvalue -// problem. -// -class FeastEigenSolver : public EigenSolverBase -{ -protected: - // Control print level for debugging. - int print; - - // Status variable. - int info; - - // Number eigenvalues to be computed. Also the subspace and projected system dimensions. - PetscInt nev, m0, mQ; - - // Number of moments to consider for subspace construction. - PetscInt M; - - // Relative eigenvalue error convergence tolerance for the solver. - PetscReal rtol; - - // Maximum number of FEAST iterations. - PetscInt max_it; - - // Variables for scaling, from Higham et al., IJNME 2008. - PetscReal gamma, delta; - - // Parameters defining the integration contour. - PetscScalar bl, tr; - bool real_threshold, imag_threshold; - - // Storage for computed eigenvalues. - PetscScalar *eig; - PetscInt *perm; - - // Storage for computed eigenvectors. - petsc::PetscDenseMatrix *X; - - // Storage for computed residual norms. - mutable PetscReal *res; - - // Workspace vector for initial space and residual calculations. - mutable petsc::PetscParVector *r0; - - // Solvers for Linear systems for the different quadrature points along the contour. - std::vector opInv; - - // Reference to solver for projecting an intermediate vector onto a divergence-free space - // (not owned). - const DivFreeSolver *opProj; - - // Reference to matrix used for weighted inner products (not owned). May be nullptr, in - // which case identity is used. - const petsc::PetscParMatrix *opB; - - // Perform the FEAST solve. - int SolveInternal(RG rg); - - // Helper routine for parameter checking. - void CheckParameters(); - - // Helper routine to construct the integration contour. - RG ConfigureRG(PetscScalar *&z, PetscScalar *&w); - - // Helper routine for sorting eigenvalues of the projected problem. - PetscInt *SortEigenvalues(const PetscScalar *eig_, PetscInt m) const; - - // Helper routine for computing the Qᴴ A Q matrix product. - void BVMatProjectInternal(const petsc::PetscDenseMatrix &Q, - const petsc::PetscParMatrix &A, petsc::PetscDenseMatrix &Ar, - petsc::PetscDenseMatrix &R, PetscReal scale) const; - - // Helper routine for solving the projected eigenvalue problem. - virtual void SolveProjectedProblem(const petsc::PetscDenseMatrix &Q_, - petsc::PetscDenseMatrix &R_, - petsc::PetscDenseMatrix &X_, PetscScalar *eig_) = 0; - - // Helper routine for computing the eigenpair residual. - virtual void GetResidual(PetscScalar eig_, const petsc::PetscParVector &x_, - petsc::PetscParVector &r_) const = 0; - - // Helper routine for computing the backward error. - virtual PetscReal GetBackwardScaling(PetscScalar eig_) const = 0; - - // Return problem type name. - virtual const char *GetName() const = 0; - -public: - FeastEigenSolver(MPI_Comm comm, const IoData &iodata, SpaceOperator &spaceop, int np, - int print_lvl); - ~FeastEigenSolver() override; - - // Set operators for the generalized eigenvalue problem or for the quadratic polynomial - // eigenvalue problem. - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &M, - ScaleType type) override; - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, ScaleType type) override; - - // Set the projection operator for the divergence-free constraint. - void SetProjector(const DivFreeSolver &divfree) override; - - // Get scaling factors used by the solver. - double GetScalingGamma() const override { return (double)gamma; } - double GetScalingDelta() const override { return (double)delta; } - - // Set the number of required eigenmodes. - void SetNumModes(int numeig, int numvec = 0) override; - - // Set solver tolerance. - void SetTol(double tol) override; - - // Set maximum number of FEAST iterations. - void SetMaxIter(int maxits) override; - - // Set up region for contour integration. Region is defined by the bottom- left and - // top-right points in the complex plane. - void SetContour(double blr, double bli, double trr, double tri, - bool filter_small_real = false, bool filter_small_imag = false); - - // Set optional B matrix used for weighted inner products. This must be set explicitly - // even for generalized problems, otherwise the identity will be used. - void SetBMat(const petsc::PetscParMatrix &B) override; - - // Set an initial vector for the solution subspace. - void SetInitialSpace(const petsc::PetscParVector &v) override; - - // Solve the eigenvalue problem. Returns the number of converged eigenvalues. - int Solve() override = 0; - - // Return number of linear solves and linear solver iterations performed during the FEAST - // solve. - int GetTotalKspMult() const; - int GetTotalKspIter() const; - - // Get the corresponding eigenvalue. - void GetEigenvalue(int i, double &eigr, double &eigi) const override; - - // Get the corresponding eigenvector. - void GetEigenvector(int i, petsc::PetscParVector &v) const override; - - // Get the corresponding eigenpair error. - void GetError(int i, ErrorType type, double &err) const override; - - // Methods not relevant to the FEAST eigenvalue solver. - void SetLinearSolver(const KspSolver &ksp) override - { - MFEM_ABORT("SetLinearSolver not defined for FeastEigenSolver!"); - } - void SetWhichEigenpairs(WhichType type) override - { - MFEM_ABORT("SetWhichEigenpairs not defined for FeastEigenSolver!"); - } - void SetShiftInvert(double tr, double ti, bool precond = false) override - { - MFEM_ABORT("SetShiftInvert not defined for FeastEigenSolver!"); - } -}; - -// Generalized eigenvalue problem solver: K x = λ M x . -class FeastEPSSolver : public FeastEigenSolver -{ -private: - // References to matrices defining the generalized eigenvalue problem (not owned). - const petsc::PetscParMatrix *opK, *opM; - - // Operator norms for scaling. - mutable PetscReal normK, normM; - - // Sequential workspace matrices for projected problem. - petsc::PetscDenseMatrix *AQ, *BQ, *XQ, *XQ0; - -protected: - // Helper routine for solving the projected eigenvalue problem. - void SolveProjectedProblem(const petsc::PetscDenseMatrix &Q_, petsc::PetscDenseMatrix &R_, - petsc::PetscDenseMatrix &X_, PetscScalar *eig_) override; - - // Helper routine for computing the eigenpair residuals: R = K X - M X Λ . - void GetResidual(PetscScalar eig_, const petsc::PetscParVector &x_, - petsc::PetscParVector &r_) const override; - - // Helper routine for computing the backward error. - PetscReal GetBackwardScaling(PetscScalar eig_) const override; - - // Return problem type name. - const char *GetName() const override { return "EPS"; } - -public: - FeastEPSSolver(MPI_Comm comm, const IoData &iodata, SpaceOperator &spaceop, int np, - int print_lvl); - - // Set operators for the generalized eigenvalue problem. - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &M, - ScaleType type) override; - - // Solve the eigenvalue problem. Returns the number of converged eigenvalues. - int Solve() override; -}; - -// Quadratic eigenvalue problem solver: P(λ) x = (K + λ C + λ² M) x = 0 . -class FeastPEPSolver : public FeastEigenSolver -{ -private: - // References to matrices defining the quadratic eigenvalue problem (not owned). - const petsc::PetscParMatrix *opK, *opC, *opM; - - // Operator norms for scaling. - mutable PetscReal normK, normC, normM; - - // Sequential workspace matrices for projected problem. - petsc::PetscDenseMatrix *AQ, *BQ, *AQ0, *XQ, *XQ0; - -protected: - // Helper routine for solving the projected eigenvalue problem. - void SolveProjectedProblem(const petsc::PetscDenseMatrix &Q_, petsc::PetscDenseMatrix &R_, - petsc::PetscDenseMatrix &X_, PetscScalar *eig_) override; - - // Helper routine for computing the eigenpair residuals: R = P(Λ, X) . - void GetResidual(PetscScalar eig_, const petsc::PetscParVector &x_, - petsc::PetscParVector &r_) const override; - - // Helper routine for computing the backward error. - PetscReal GetBackwardScaling(PetscScalar eig_) const override; - - // Return problem type name. - const char *GetName() const override { return "PEP"; } - -public: - FeastPEPSolver(MPI_Comm comm, const IoData &iodata, SpaceOperator &spaceop, int np, - int print_lvl); - - // Set operators for the quadratic polynomial eigenvalue problem. - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, ScaleType type) override; - - // Solve the eigenvalue problem. Returns the number of converged eigenvalues. - int Solve() override; -}; - -} // namespace feast - -} // namespace palace - -#endif - -#endif // PALACE_LINALG_FEAST_HPP diff --git a/palace/linalg/gmg.cpp b/palace/linalg/gmg.cpp index 3083e5dea..f1bf1a7e9 100644 --- a/palace/linalg/gmg.cpp +++ b/palace/linalg/gmg.cpp @@ -3,128 +3,192 @@ #include "gmg.hpp" +#include #include "linalg/chebyshev.hpp" #include "linalg/distrelaxation.hpp" +#include "linalg/rap.hpp" namespace palace { -GeometricMultigridSolver::GeometricMultigridSolver( - std::unique_ptr &&coarse_solver, const mfem::Array &dbc_marker, +template +GeometricMultigridSolver::GeometricMultigridSolver( + std::unique_ptr> &&coarse_solver, mfem::ParFiniteElementSpaceHierarchy &fespaces, mfem::ParFiniteElementSpaceHierarchy *aux_fespaces, int cycle_it, int smooth_it, int cheby_order) - : mfem::Solver(), fespaces_(fespaces), pc_it(cycle_it) + : Solver(), pc_it(cycle_it), A(fespaces.GetNumLevels()), + P(fespaces.GetNumLevels() - 1), dbc_tdof_lists(fespaces.GetNumLevels() - 1), + B(fespaces.GetNumLevels()), X(fespaces.GetNumLevels()), Y(fespaces.GetNumLevels()), + R(fespaces.GetNumLevels()) { - // Read configuration file parameters used to set up the preconditioner. The default MG - // parameters are for a V-cycle with a single pre/post smoothing iteration. - MFEM_VERIFY(GetNumLevels() > 0, - "Empty finite element space hierarchy during multigrid solver setup!"); - // Configure levels of geometric coarsening. Multigrid vectors will be configured at first // call to Mult. The multigrid operator size is set based on the finest space dimension. - const int m = GetNumLevels(); - A_.resize(m, nullptr); - x_.resize(m, mfem::Vector()); - y_.resize(m, mfem::Vector()); - r_.resize(m, mfem::Vector()); - X_.resize(m, mfem::Array()); - Y_.resize(m, mfem::Array()); - R_.resize(m, mfem::Array()); - xrefs_.resize(m, std::vector()); - yrefs_.resize(m, std::vector()); - rrefs_.resize(m, std::vector()); + const int n_levels = fespaces.GetNumLevels(); + MFEM_VERIFY(n_levels > 0, + "Empty finite element space hierarchy during multigrid solver setup!"); + + // Configure prolongation operators. + for (int l = 0; l < n_levels - 1; l++) + { + P[l] = fespaces.GetProlongationAtLevel(l); + } // Use the supplied level 0 (coarse) solver. - B_.reserve(m); - B_.push_back(std::move(coarse_solver)); + B[0] = std::move(coarse_solver); // Configure level smoothers. Use distributive relaxation smoothing if an auxiliary // finite element space was provided. - if (aux_fespaces) + for (int l = 1; l < n_levels; l++) { - int cheby_smooth_it = 1; - for (int l = 1; l < m; l++) + if (aux_fespaces) { - B_.push_back(std::make_unique( - fespaces.GetFESpaceAtLevel(l), aux_fespaces->GetFESpaceAtLevel(l), dbc_marker, - smooth_it, cheby_smooth_it, cheby_order)); + B[l] = std::make_unique>( + fespaces.GetFESpaceAtLevel(l), aux_fespaces->GetFESpaceAtLevel(l), smooth_it, 1, + cheby_order); } - } - else - { - for (int l = 1; l < m; l++) + else { - mfem::Array dbc_tdof_list_l; - fespaces.GetFESpaceAtLevel(l).GetEssentialTrueDofs(dbc_marker, dbc_tdof_list_l); - B_.push_back( - std::make_unique(fespaces.GetFESpaceAtLevel(l).GetComm(), - dbc_tdof_list_l, smooth_it, cheby_order)); + B[l] = std::make_unique>(smooth_it, cheby_order); } } } -void GeometricMultigridSolver::SetOperator( - const std::vector> &ops, - const std::vector> *aux_ops) +template +void GeometricMultigridSolver::SetOperator(const OperType &op) { - const int m = GetNumLevels(); - MFEM_VERIFY(ops.size() == static_cast(m) && - (!aux_ops || aux_ops->size() == static_cast(m)), - "Invalid number of levels for operators in multigrid solver setup!"); - for (int l = 0; l < m; l++) + using ParOperType = + typename std::conditional::value, + ComplexParOperator, ParOperator>::type; + + const auto *mg_op = dynamic_cast *>(&op); + MFEM_VERIFY(mg_op, "GeometricMultigridSolver requires a MultigridOperator or " + "ComplexMultigridOperator argument provided to SetOperator!"); + + const int n_levels = static_cast(A.size()); + MFEM_VERIFY( + mg_op->GetNumLevels() == n_levels && + (!mg_op->HasAuxiliaryOperators() || mg_op->GetNumAuxiliaryLevels() == n_levels), + "Invalid number of levels for operators in multigrid solver setup!"); + for (int l = 0; l < n_levels; l++) { - A_[l] = ops[l].get(); - auto *dist_smoother = dynamic_cast(B_[l].get()); + A[l] = &mg_op->GetOperatorAtLevel(l); + MFEM_VERIFY( + A[l]->Width() == A[l]->Height() && + (n_levels == 1 || + (A[l]->Height() == ((l < n_levels - 1) ? P[l]->Width() : P[l - 1]->Height()))), + "Invalid operator sizes for GeometricMultigridSolver!"); + + const auto *PtAP_l = dynamic_cast(&mg_op->GetOperatorAtLevel(l)); + MFEM_VERIFY( + PtAP_l, + "GeometricMultigridSolver requires ParOperator or ComplexParOperator operators!"); + if (l < n_levels - 1) + { + dbc_tdof_lists[l] = PtAP_l->GetEssentialTrueDofs(); + } + + auto *dist_smoother = dynamic_cast *>(B[l].get()); if (dist_smoother) { - MFEM_VERIFY(aux_ops, "Distributive relaxation smoother relies on both primary space " - "and auxiliary space operators for geometric multigrid!") - dist_smoother->SetOperator(*ops[l], *(*aux_ops)[l]); + MFEM_VERIFY(mg_op->HasAuxiliaryOperators(), + "Distributive relaxation smoother relies on both primary space and " + "auxiliary space operators for multigrid smoothing!"); + dist_smoother->SetOperators(mg_op->GetOperatorAtLevel(l), + mg_op->GetAuxiliaryOperatorAtLevel(l)); } else { - B_[l]->SetOperator(*ops[l]); + B[l]->SetOperator(mg_op->GetOperatorAtLevel(l)); } + + X[l].SetSize(A[l]->Height()); + Y[l].SetSize(A[l]->Height()); + R[l].SetSize(A[l]->Height()); + } +} + +template +void GeometricMultigridSolver::Mult(const VecType &x, VecType &y) const +{ + // Initialize. + const int n_levels = static_cast(A.size()); + MFEM_ASSERT(!this->initial_guess, + "Geometric multigrid solver does not use initial guess!"); + MFEM_ASSERT(n_levels > 1 || pc_it == 1, + "Single-level geometric multigrid will not work with multiple iterations!"); + + // Apply V-cycle. The initial guess for y is zero'd at the first pre-smooth iteration. + X.back() = x; + for (int it = 0; it < pc_it; it++) + { + VCycle(n_levels - 1, (it > 0)); } - height = A_.back()->Height(); - width = A_.back()->Width(); + y = Y.back(); } -void GeometricMultigridSolver::VCycle(int l, bool initial_guess) const +namespace +{ + +inline void RealMult(const Operator &op, const Vector &x, Vector &y) +{ + op.Mult(x, y); +} + +inline void RealMult(const Operator &op, const ComplexVector &x, ComplexVector &y) +{ + op.Mult(x.Real(), y.Real()); + op.Mult(x.Imag(), y.Imag()); +} + +inline void RealMultTranspose(const Operator &op, const Vector &x, Vector &y) +{ + op.MultTranspose(x, y); +} + +inline void RealMultTranspose(const Operator &op, const ComplexVector &x, ComplexVector &y) +{ + op.MultTranspose(x.Real(), y.Real()); + op.MultTranspose(x.Imag(), y.Imag()); +} + +} // namespace + +template +void GeometricMultigridSolver::VCycle(int l, bool initial_guess) const { // Pre-smooth, with zero initial guess (Y = 0 set inside). This is the coarse solve at - // level 0. Important to note that the smoothers must respect the iterative_mode flag + // level 0. Important to note that the smoothers must respect the initial guess flag // correctly (given X, Y, compute Y <- Y + B (X - A Y)) . - const int nrhs = X_[l].Size(); - B_[l]->iterative_mode = initial_guess; - B_[l]->ArrayMult(X_[l], Y_[l]); + B[l]->SetInitialGuess(initial_guess); + B[l]->Mult(X[l], Y[l]); if (l == 0) { return; } - // Compute residual and restrict. - A_[l]->ArrayMult(Y_[l], R_[l]); - for (int j = 0; j < nrhs; j++) - { - subtract(*X_[l][j], *R_[l][j], *R_[l][j]); - } - GetProlongationAtLevel(l - 1).ArrayMultTranspose(R_[l], X_[l - 1]); + // Compute residual. + A[l]->Mult(Y[l], R[l]); + linalg::AXPBY(1.0, X[l], -1.0, R[l]); // Coarse grid correction. + RealMultTranspose(*P[l - 1], R[l], X[l - 1]); + if (dbc_tdof_lists[l - 1]) + { + linalg::SetSubVector(X[l - 1], *dbc_tdof_lists[l - 1], 0.0); + } VCycle(l - 1, false); // Prolongate and add. - GetProlongationAtLevel(l - 1).ArrayMult(Y_[l - 1], R_[l]); - for (int j = 0; j < nrhs; j++) - { - *Y_[l][j] += *R_[l][j]; - } + RealMult(*P[l - 1], Y[l - 1], R[l]); + Y[l] += R[l]; // Post-smooth, with nonzero initial guess. - B_[l]->iterative_mode = true; - B_[l]->ArrayMultTranspose(X_[l], Y_[l]); + B[l]->SetInitialGuess(true); + B[l]->MultTranspose(X[l], Y[l]); } +template class GeometricMultigridSolver; +template class GeometricMultigridSolver; + } // namespace palace diff --git a/palace/linalg/gmg.hpp b/palace/linalg/gmg.hpp index eede99036..c7b770109 100644 --- a/palace/linalg/gmg.hpp +++ b/palace/linalg/gmg.hpp @@ -6,9 +6,20 @@ #include #include -#include +#include "linalg/operator.hpp" +#include "linalg/solver.hpp" +#include "linalg/vector.hpp" #include "utils/iodata.hpp" +namespace mfem +{ + +template +class Array; +class ParFiniteElementSpaceHierarchy; + +} // namespace mfem + namespace palace { @@ -17,128 +28,49 @@ namespace palace // hierarchy of finite element spaces. Optionally can be configured to use auxiliary space // smoothing at each level. // -class GeometricMultigridSolver : public mfem::Solver +template +class GeometricMultigridSolver : public Solver { + using VecType = typename Solver::VecType; + private: - // Reference to the underlying finite element space hierarchy used to construct the - // multilevel preconditioner. - const mfem::ParFiniteElementSpaceHierarchy &fespaces_; + // Number of V-cycles per preconditioner application. + const int pc_it; - // System matrices at each multigrid level (not owned). - std::vector A_; + // System matrices at each multigrid level and prolongation operators (not owned). + std::vector A; + std::vector P; + std::vector *> dbc_tdof_lists; - // Smoothers for each level. Coarse level solver is B_[0]. - std::vector> B_; + // Smoothers for each level. Coarse level solver is B[0]. + mutable std::vector>> B; // Temporary vectors for preconditioner application. The type of these is dictated by the // MFEM Operator interface for multiple RHS. - mutable std::vector x_, y_, r_; - mutable std::vector> X_, Y_, R_; - mutable std::vector> xrefs_, yrefs_, rrefs_; - - // Number of V-cycles per preconditioner application. - const int pc_it; - - // Returns prolongation operator at given level. - const mfem::Operator &GetProlongationAtLevel(int l) const - { - return *fespaces_.GetProlongationAtLevel(l); - } - - // Returns the number of levels. - int GetNumLevels() const { return fespaces_.GetNumLevels(); } + mutable std::vector X, Y, R; // Internal function to perform a single V-cycle iteration. void VCycle(int l, bool initial_guess) const; public: - GeometricMultigridSolver(std::unique_ptr &&coarse_solver, - const mfem::Array &dbc_marker, + GeometricMultigridSolver(std::unique_ptr> &&coarse_solver, mfem::ParFiniteElementSpaceHierarchy &fespaces, mfem::ParFiniteElementSpaceHierarchy *aux_fespaces, int cycle_it, int smooth_it, int cheby_order); GeometricMultigridSolver(const IoData &iodata, - std::unique_ptr &&coarse_solver, - const mfem::Array &dbc_marker, + std::unique_ptr> &&coarse_solver, mfem::ParFiniteElementSpaceHierarchy &fespaces, mfem::ParFiniteElementSpaceHierarchy *aux_fespaces) - : GeometricMultigridSolver(std::move(coarse_solver), dbc_marker, fespaces, aux_fespaces, + : GeometricMultigridSolver(std::move(coarse_solver), fespaces, aux_fespaces, iodata.solver.linear.mg_cycle_it, iodata.solver.linear.mg_smooth_it, iodata.solver.linear.mg_smooth_order) { } - // Sets the matrices from which to contruct a multilevel preconditioner. - void SetOperator(const Operator &op) override - { - MFEM_ABORT("SetOperator with a single operator is not implemented for " - "GeometricMultigridSolver, use the other signature instead!"); - } - void SetOperator(const std::vector> &ops, - const std::vector> *aux_ops = nullptr); - - // Application of the solver. - void Mult(const mfem::Vector &x, mfem::Vector &y) const override - { - mfem::Array X(1); - mfem::Array Y(1); - X[0] = &x; - Y[0] = &y; - ArrayMult(X, Y); - } + void SetOperator(const OperType &op) override; - void ArrayMult(const mfem::Array &X, - mfem::Array &Y) const override - { - // Initialize. - const int m = GetNumLevels(), nrhs = X.Size(); - MFEM_VERIFY(!iterative_mode, "Geometric multigrid solver does not use iterative_mode!"); - MFEM_VERIFY(m > 1 || pc_it == 1, - "Single-level geometric multigrid will not work with multiple iterations!"); - if (nrhs * height != x_[m - 1].Size()) - { - for (int l = 0; l < m; l++) - { - MFEM_VERIFY(A_[l], "Missing operator for geometric multigrid level " << l << "!"); - x_[l].SetSize(nrhs * A_[l]->Height()); - y_[l].SetSize(nrhs * A_[l]->Height()); - r_[l].SetSize(nrhs * A_[l]->Height()); - } - } - for (int l = 0; l < m; l++) - { - xrefs_[l].resize(nrhs); - yrefs_[l].resize(nrhs); - rrefs_[l].resize(nrhs); - X_[l].SetSize(nrhs); - Y_[l].SetSize(nrhs); - R_[l].SetSize(nrhs); - for (int j = 0; j < nrhs; j++) - { - xrefs_[l][j].MakeRef(x_[l], j * A_[l]->Height(), A_[l]->Height()); - yrefs_[l][j].MakeRef(y_[l], j * A_[l]->Height(), A_[l]->Height()); - rrefs_[l][j].MakeRef(r_[l], j * A_[l]->Height(), A_[l]->Height()); - X_[l][j] = &xrefs_[l][j]; - Y_[l][j] = &yrefs_[l][j]; - R_[l][j] = &rrefs_[l][j]; - } - } - - // Apply V-cycle. - for (int j = 0; j < nrhs; j++) - { - *X_[m - 1][j] = *X[j]; - } - for (int it = 0; it < pc_it; it++) - { - VCycle(m - 1, (it > 0)); - } - for (int j = 0; j < nrhs; j++) - { - *Y[j] = *Y_[m - 1][j]; - } - } + void Mult(const VecType &x, VecType &y) const override; }; } // namespace palace diff --git a/palace/linalg/hypre.cpp b/palace/linalg/hypre.cpp deleted file mode 100644 index 4a2167051..000000000 --- a/palace/linalg/hypre.cpp +++ /dev/null @@ -1,877 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#include "hypre.hpp" - -#include - -namespace mfem -{ - -mfem::HypreParMatrix * -HypreParMatrixFromBlocks(mfem::Array2D &blocks, - mfem::Array2D *blockCoeff) -{ - mfem::Array2D blocks_without_const(blocks.NumRows(), - blocks.NumCols()); - for (int i = 0; i < blocks.NumRows(); i++) - { - for (int j = 0; j < blocks.NumCols(); j++) - { - blocks_without_const(i, j) = const_cast(blocks(i, j)); - } - } - return HypreParMatrixFromBlocks(blocks_without_const, blockCoeff); -} - -} // namespace mfem - -namespace palace::hypre -{ - -void hypreParCSREliminateRowsCols(hypre_ParCSRMatrix *A, const mfem::Array &rows_cols, - hypre::DiagonalPolicy diag_policy, HYPRE_Complex diag, - bool ignore_rows) -{ - hypre_error_flag = 0; - - hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); - HYPRE_Real *A_diag_a = hypre_CSRMatrixData(A_diag); - HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); - HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); - HYPRE_Int ncols_A_diag = hypre_CSRMatrixNumCols(A_diag); - - hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); - HYPRE_Real *A_offd_a = hypre_CSRMatrixData(A_offd); - HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); - HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); - HYPRE_Int ncols_A_offd = hypre_CSRMatrixNumCols(A_offd); - - HYPRE_BigInt *col_map_offd_A = hypre_ParCSRMatrixColMapOffd(A); - HYPRE_Int *marker_offd = nullptr; - - HYPRE_BigInt first_row = hypre_ParCSRMatrixFirstRowIndex(A); - HYPRE_Int nrows_local = hypre_CSRMatrixNumRows(A_diag); - - HYPRE_Int i, j, k, nnz_diag, nnz_offd, A_diag_i_i, A_offd_i_i; - - // Get markers for columns of the diagonal and off-diagonal matrix to eliminate - // (from mfem::internal::hypre_ParCSRMatrixEliminateAAe). - HYPRE_Int *eliminate_diag_col, *eliminate_offd_col; - { - hypre_ParCSRCommHandle *comm_handle; - hypre_ParCSRCommPkg *comm_pkg; - HYPRE_Int num_sends, *int_buf_data; - HYPRE_Int index, start; - - eliminate_diag_col = mfem_hypre_CTAlloc_host(HYPRE_Int, ncols_A_diag); - eliminate_offd_col = mfem_hypre_CTAlloc_host(HYPRE_Int, ncols_A_offd); - - // Make sure A has a communication package. - comm_pkg = hypre_ParCSRMatrixCommPkg(A); - if (!comm_pkg) - { - hypre_MatvecCommPkgCreate(A); - comm_pkg = hypre_ParCSRMatrixCommPkg(A); - } - - // Which of the local rows are to be eliminated. - for (i = 0; i < ncols_A_diag; i++) - { - eliminate_diag_col[i] = 0; - } - for (i = 0; i < rows_cols.Size(); i++) - { - eliminate_diag_col[rows_cols[i]] = 1; - } - - // Use a Matvec communication pattern to find (in eliminate_col) which of the local offd - // columns are to be eliminated. - num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); - int_buf_data = mfem_hypre_CTAlloc_host( - HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); - index = 0; - for (i = 0; i < num_sends; i++) - { - start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1); j++) - { - k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j); - int_buf_data[index++] = eliminate_diag_col[k]; - } - } - comm_handle = - hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, eliminate_offd_col); - - // Finish the communication. - hypre_ParCSRCommHandleDestroy(comm_handle); - - mfem_hypre_TFree_host(int_buf_data); - } - - marker_offd = hypre_CTAlloc(HYPRE_Int, ncols_A_offd, HYPRE_MEMORY_HOST); - - nnz_diag = nnz_offd = A_diag_i_i = A_offd_i_i = 0; - for (i = 0; i < nrows_local; i++) - { - // Drop eliminated entries in the diagonal block. - for (j = A_diag_i_i; j < A_diag_i[i + 1]; j++) - { - HYPRE_Int col = A_diag_j[j]; - HYPRE_Complex val = A_diag_a[j]; - if ((!ignore_rows && eliminate_diag_col[i]) || eliminate_diag_col[col]) - { - // Always keep the diagonal entry (even if it is 0). - if (!ignore_rows && i == col) - { - if (diag_policy == DiagonalPolicy::USER) - { - val = diag; - } - else if (diag_policy == DiagonalPolicy::ONE) - { - val = 1.0; - } - else if (diag_policy == DiagonalPolicy::ZERO) - { - val = 0.0; - } - // else (diag_policy == DiagonalPolicy::KEEP) - } - else - { - continue; - } - } - A_diag_j[nnz_diag] = col; - A_diag_a[nnz_diag] = val; - nnz_diag++; - } - - // Drop eliminated entries in the off-diagonal block. - for (j = A_offd_i_i; j < A_offd_i[i + 1]; j++) - { - HYPRE_Int col = A_offd_j[j]; - HYPRE_Complex val = A_offd_a[j]; - if ((!ignore_rows && eliminate_diag_col[i]) || eliminate_offd_col[col]) - { - // In normal cases: diagonal entry should not appear in A_offd (but this can still - // be possible). - if (!ignore_rows && i + first_row == col_map_offd_A[col]) - { - if (diag_policy == DiagonalPolicy::USER) - { - val = diag; - } - else if (diag_policy == DiagonalPolicy::ONE) - { - val = 1.0; - } - else if (diag_policy == DiagonalPolicy::ZERO) - { - val = 0.0; - } - // else (diag_policy == DiagonalPolicy::KEEP) - } - else - { - continue; - } - } - if (marker_offd[col] == 0) - { - marker_offd[col] = 1; - } - A_offd_j[nnz_offd] = col; - A_offd_a[nnz_offd] = val; - nnz_offd++; - } - A_diag_i_i = A_diag_i[i + 1]; - A_offd_i_i = A_offd_i[i + 1]; - A_diag_i[i + 1] = nnz_diag; - A_offd_i[i + 1] = nnz_offd; - } - - mfem_hypre_TFree_host(eliminate_offd_col); - mfem_hypre_TFree_host(eliminate_diag_col); - - hypre_CSRMatrixNumNonzeros(A_diag) = nnz_diag; - hypre_CSRMatrixNumNonzeros(A_offd) = nnz_offd; - hypre_ParCSRMatrixSetNumNonzeros(A); - hypre_ParCSRMatrixDNumNonzeros(A) = (HYPRE_Real)hypre_ParCSRMatrixNumNonzeros(A); - - for (i = 0, k = 0; i < ncols_A_offd; i++) - { - if (marker_offd[i]) - { - col_map_offd_A[k] = col_map_offd_A[i]; - marker_offd[i] = k++; - } - } - hypre_CSRMatrixNumCols(A_offd) = k; // ncols_A_offd = k - for (i = 0; i < nnz_offd; i++) - { - A_offd_j[i] = marker_offd[A_offd_j[i]]; - } - - hypre_TFree(marker_offd, HYPRE_MEMORY_HOST); - - if (hypre_ParCSRMatrixCommPkg(A)) - { - hypre_MatvecCommPkgDestroy(hypre_ParCSRMatrixCommPkg(A)); - } - hypre_MatvecCommPkgCreate(A); - - MFEM_VERIFY(!hypre_error_flag, - "HYPRE error encountered: Error code = " << hypre_error_flag << "!"); -} - -void hypreParCSREliminateRowsColsv2(hypre_ParCSRMatrix *A, - const mfem::Array &rows_cols, - hypre::DiagonalPolicy diag_policy, HYPRE_Complex diag, - bool ignore_rows) -{ - hypre_error_flag = 0; - - hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); - HYPRE_Int ncols_A_diag = hypre_CSRMatrixNumRows(A_diag); - - hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); - HYPRE_Int ncols_A_offd = hypre_CSRMatrixNumCols(A_offd); - - const auto n_ess_dofs = rows_cols.Size(); - const auto ess_dofs_d = - rows_cols.GetMemory().Read(mfem::GetHypreMemoryClass(), n_ess_dofs); - - // Start communication to figure out which columns need to be eliminated in the - // off-diagonal block. - hypre_ParCSRCommHandle *comm_handle; - HYPRE_Int *int_buf_data, *eliminate_row, *eliminate_col; - { - eliminate_row = mfem_hypre_CTAlloc(HYPRE_Int, ncols_A_diag); - eliminate_col = mfem_hypre_CTAlloc(HYPRE_Int, ncols_A_offd); - - // Make sure A has a communication package. - hypre_ParCSRCommPkg *comm_pkg = hypre_ParCSRMatrixCommPkg(A); - if (!comm_pkg) - { - hypre_MatvecCommPkgCreate(A); - comm_pkg = hypre_ParCSRMatrixCommPkg(A); - } - - // Which of the local rows are to be eliminated? - MFEM_HYPRE_FORALL(i, ncols_A_diag, { eliminate_row[i] = 0; }); - MFEM_HYPRE_FORALL(i, n_ess_dofs, { eliminate_row[ess_dofs_d[i]] = 1; }); - - // Use a matvec communication pattern to find (in eliminate_col) which of the local offd - // columns are to be eliminated. - HYPRE_Int num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); - HYPRE_Int int_buf_sz = hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends); - int_buf_data = mfem_hypre_CTAlloc(HYPRE_Int, int_buf_sz); - - HYPRE_Int *send_map_elmts; -#if defined(HYPRE_USING_GPU) - hypre_ParCSRCommPkgCopySendMapElmtsToDevice(comm_pkg); - send_map_elmts = hypre_ParCSRCommPkgDeviceSendMapElmts(comm_pkg); -#else - send_map_elmts = hypre_ParCSRCommPkgSendMapElmts(comm_pkg); -#endif - MFEM_HYPRE_FORALL(i, int_buf_sz, { - auto k = send_map_elmts[i]; - int_buf_data[i] = eliminate_row[k]; - }); - -#if defined(HYPRE_USING_GPU) - // Try to use device-aware MPI for the communication if available. - comm_handle = - hypre_ParCSRCommHandleCreate_v2(11, comm_pkg, HYPRE_MEMORY_DEVICE, int_buf_data, - HYPRE_MEMORY_DEVICE, eliminate_col); -#else - comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, eliminate_col); -#endif - } - - // Eliminate rows and columns in the diagonal block. - if (ignore_rows) - { - const auto nrows_A_diag = hypre_CSRMatrixNumRows(A_diag); - const auto I = A_diag->i; - const auto J = A_diag->j; - auto data = A_diag->data; - MFEM_HYPRE_FORALL(i, nrows_A_diag, { - for (auto j = I[i]; j < I[i + 1]; j++) - { - data[j] *= 1 - eliminate_row[J[j]]; - } - }); - } - else - { - const auto I = A_diag->i; - const auto J = A_diag->j; - auto data = A_diag->data; - MFEM_HYPRE_FORALL(i, n_ess_dofs, { - const auto idof = ess_dofs_d[i]; - for (auto j = I[idof]; j < I[idof + 1]; j++) - { - const auto jdof = J[j]; - if (jdof == idof) - { - if (diag_policy == DiagonalPolicy::USER) - { - data[j] = diag; - } - else if (diag_policy == DiagonalPolicy::ONE) - { - data[j] = 1.0; - } - else if (diag_policy == DiagonalPolicy::ZERO) - { - data[j] = 0.0; - } - // else (diag_policy == DiagonalPolicy::KEEP) - } - else - { - data[j] = 0.0; - for (auto k = I[jdof]; k < I[jdof + 1]; k++) - { - if (J[k] == idof) - { - data[k] = 0.0; - break; - } - } - } - } - }); - } - - // Eliminate rows in the off-diagonal block. - if (!ignore_rows) - { - const auto I = A_offd->i; - auto data = A_offd->data; - MFEM_HYPRE_FORALL(i, n_ess_dofs, { - const auto idof = ess_dofs_d[i]; - for (auto j = I[idof]; j < I[idof + 1]; j++) - { - data[j] = 0.0; - } - }); - } - - // Wait for MPI communication to finish. - hypre_ParCSRCommHandleDestroy(comm_handle); - mfem_hypre_TFree(int_buf_data); - mfem_hypre_TFree(eliminate_row); - - // Eliminate columns in the off-diagonal block. - { - const auto nrows_A_offd = hypre_CSRMatrixNumRows(A_offd); - const auto I = A_offd->i; - const auto J = A_offd->j; - auto data = A_offd->data; - MFEM_HYPRE_FORALL(i, nrows_A_offd, { - for (auto j = I[i]; j < I[i + 1]; j++) - { - data[j] *= 1 - eliminate_col[J[j]]; - } - }); - } - - mfem_hypre_TFree(eliminate_col); - - MFEM_VERIFY(!hypre_error_flag, - "HYPRE error encountered: Error code = " << hypre_error_flag << "!"); -} - -hypre_ParCSRMatrix *hypreParCSREliminateRowsWithCols(hypre_ParCSRMatrix *A, - const mfem::Array &rows) -{ - hypre_error_flag = 0; - - HYPRE_Int nrows_local = hypre_ParCSRMatrixNumRows(A); - HYPRE_Int ncols_local = hypre_ParCSRMatrixNumCols(A); - - HYPRE_Int *diag_rows_bc, *offd_rows_bc; - - hypre_ParCSRMatrix *At, *B; - - HYPRE_Int i, j, k; - - diag_rows_bc = mfem_hypre_CTAlloc_host(HYPRE_Int, nrows_local); - - // Which of the local rows are to be eliminated. - for (i = 0; i < rows.Size(); i++) - { - diag_rows_bc[rows[i]] = 1; - } - - hypre_ParCSRMatrixTranspose(A, &At, 1); - hypre_MatvecCommPkgCreate(At); - - // Use a Matvec communication pattern to find which of the rows connected to local columns - // are to be eliminated. - { - hypre_ParCSRCommHandle *comm_handle; - hypre_ParCSRCommPkg *comm_pkg; - HYPRE_Int num_sends, *int_buf_data; - HYPRE_Int index, start; - - offd_rows_bc = mfem_hypre_TAlloc_host( - HYPRE_Int, hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(At))); - - comm_pkg = hypre_ParCSRMatrixCommPkg(At); - num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); - int_buf_data = mfem_hypre_TAlloc_host( - HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); - index = 0; - for (i = 0; i < num_sends; i++) - { - start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1); j++) - { - k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j); - int_buf_data[index++] = diag_rows_bc[k]; - } - } - comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, offd_rows_bc); - - // Finish the communication. - hypre_ParCSRCommHandleDestroy(comm_handle); - - hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST); - } - - // Eliminate the columns of the original matrix (rows in the transposed matrix). - { - hypre_CSRMatrix *Atd = hypre_ParCSRMatrixDiag(At); - HYPRE_Real *AtdA = hypre_CSRMatrixData(Atd); - HYPRE_Int *AtdI = hypre_CSRMatrixI(Atd); - HYPRE_Int *AtdJ = hypre_CSRMatrixJ(Atd); - - hypre_CSRMatrix *Ato = hypre_ParCSRMatrixOffd(At); - HYPRE_Real *AtoA = hypre_CSRMatrixData(Ato); - HYPRE_Int *AtoI = hypre_CSRMatrixI(Ato); - HYPRE_Int *AtoJ = hypre_CSRMatrixJ(Ato); - - HYPRE_Int elim; - - for (i = 0; i < ncols_local; i++) - { - // A column is eliminated if it has a nonzero in an eliminated row. - elim = 0; - for (j = AtdI[i]; j < AtdI[i + 1]; j++) - { - if (diag_rows_bc[AtdJ[j]]) - { - elim = 1; - break; - } - } - if (!elim && AtoI) - { - for (j = AtoI[i]; j < AtoI[i + 1]; j++) - { - if (offd_rows_bc[AtoJ[j]]) - { - elim = 1; - break; - } - } - } - if (elim) - { - for (j = AtdI[i]; j < AtdI[i + 1]; j++) - { - // if (!diag_rows_bc[AtdJ[j]]) - AtdA[j] = 0.0; - } - if (AtoI) - { - for (j = AtoI[i]; j < AtoI[i + 1]; j++) - { - // if (!offd_rows_bc[AtoJ[j]]) - AtoA[j] = 0.0; - } - } - } - } - } - - hypre_TFree(diag_rows_bc, HYPRE_MEMORY_HOST); - hypre_TFree(offd_rows_bc, HYPRE_MEMORY_HOST); - - // Create as a new matrix. - hypre_ParCSRMatrixTranspose(At, &B, 1); - hypre_MatvecCommPkgCreate(B); - hypre_ParCSRMatrixDestroy(At); - - MFEM_VERIFY(!hypre_error_flag, - "HYPRE error encountered: Error code = " << hypre_error_flag << "!"); - return B; -} - -hypre_ParCSRMatrix *hypreParCSREliminateColsWithRows(hypre_ParCSRMatrix *A, - const mfem::Array &cols) -{ - hypre_error_flag = 0; - - HYPRE_Int nrows_local = hypre_ParCSRMatrixNumRows(A); - - HYPRE_Int *diag_cols_bc, *offd_cols_bc; - - hypre_ParCSRMatrix *B; - - HYPRE_Int i, j, k; - - diag_cols_bc = mfem_hypre_CTAlloc_host(HYPRE_Int, nrows_local); - - // Which of the local columns are to be eliminated. - for (i = 0; i < cols.Size(); i++) - { - diag_cols_bc[cols[i]] = 1; - } - - // Clone the original matrix. - B = hypre_ParCSRMatrixClone(A, 1); - hypre_MatvecCommPkgCreate(B); - - // Use a Matvec communication pattern to find which of the off-diagonal columns are to be - // eliminated. - { - hypre_ParCSRCommHandle *comm_handle; - hypre_ParCSRCommPkg *comm_pkg; - HYPRE_Int num_sends, *int_buf_data; - HYPRE_Int index, start; - - offd_cols_bc = mfem_hypre_TAlloc_host( - HYPRE_Int, hypre_CSRMatrixNumCols(hypre_ParCSRMatrixOffd(B))); - - comm_pkg = hypre_ParCSRMatrixCommPkg(B); - num_sends = hypre_ParCSRCommPkgNumSends(comm_pkg); - int_buf_data = mfem_hypre_TAlloc_host( - HYPRE_Int, hypre_ParCSRCommPkgSendMapStart(comm_pkg, num_sends)); - index = 0; - for (i = 0; i < num_sends; i++) - { - start = hypre_ParCSRCommPkgSendMapStart(comm_pkg, i); - for (j = start; j < hypre_ParCSRCommPkgSendMapStart(comm_pkg, i + 1); j++) - { - k = hypre_ParCSRCommPkgSendMapElmt(comm_pkg, j); - int_buf_data[index++] = diag_cols_bc[k]; - } - } - comm_handle = hypre_ParCSRCommHandleCreate(11, comm_pkg, int_buf_data, offd_cols_bc); - - // Finish the communication. - hypre_ParCSRCommHandleDestroy(comm_handle); - - hypre_TFree(int_buf_data, HYPRE_MEMORY_HOST); - } - - // Eliminate the rows of the original matrix. - { - hypre_CSRMatrix *Bd = hypre_ParCSRMatrixDiag(B); - HYPRE_Real *BdA = hypre_CSRMatrixData(Bd); - HYPRE_Int *BdI = hypre_CSRMatrixI(Bd); - HYPRE_Int *BdJ = hypre_CSRMatrixJ(Bd); - - hypre_CSRMatrix *Bo = hypre_ParCSRMatrixOffd(B); - HYPRE_Real *BoA = hypre_CSRMatrixData(Bo); - HYPRE_Int *BoI = hypre_CSRMatrixI(Bo); - HYPRE_Int *BoJ = hypre_CSRMatrixJ(Bo); - - HYPRE_Int elim; - - for (i = 0; i < nrows_local; i++) - { - // A column is eliminated if it has a nonzero in an eliminated row. - elim = 0; - for (j = BdI[i]; j < BdI[i + 1]; j++) - { - if (diag_cols_bc[BdJ[j]]) - { - elim = 1; - break; - } - } - if (!elim && BoI) - { - for (j = BoI[i]; j < BoI[i + 1]; j++) - { - if (offd_cols_bc[BoJ[j]]) - { - elim = 1; - break; - } - } - } - if (elim) - { - for (j = BdI[i]; j < BdI[i + 1]; j++) - { - // if (!diag_cols_bc[BdJ[j]]) - BdA[j] = 0.0; - } - if (BoI) - { - for (j = BoI[i]; j < BoI[i + 1]; j++) - { - // if (!offd_cols_bc[BoJ[j]]) - BoA[j] = 0.0; - } - } - } - } - } - - hypre_TFree(diag_cols_bc, HYPRE_MEMORY_HOST); - hypre_TFree(offd_cols_bc, HYPRE_MEMORY_HOST); - - MFEM_VERIFY(!hypre_error_flag, - "HYPRE error encountered: Error code = " << hypre_error_flag << "!"); - return B; -} - -void hypreParCSRCopy(hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B) -{ - hypre_error_flag = 0; - - hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); - HYPRE_Real *A_diag_a = hypre_CSRMatrixData(A_diag); - HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); - HYPRE_Int *A_diag_j = hypre_CSRMatrixJ(A_diag); - HYPRE_Int ncols_A_diag = hypre_CSRMatrixNumCols(A_diag); - - hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); - HYPRE_Real *A_offd_a = hypre_CSRMatrixData(A_offd); - HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); - HYPRE_Int *A_offd_j = hypre_CSRMatrixJ(A_offd); - - HYPRE_BigInt *col_map_offd_A = hypre_ParCSRMatrixColMapOffd(A); - - hypre_CSRMatrix *B_diag = hypre_ParCSRMatrixDiag(B); - HYPRE_Real *B_diag_a = hypre_CSRMatrixData(B_diag); - HYPRE_Int *B_diag_i = hypre_CSRMatrixI(B_diag); - HYPRE_Int *B_diag_j = hypre_CSRMatrixJ(B_diag); - HYPRE_Int ncols_B_diag = hypre_CSRMatrixNumCols(B_diag); - - hypre_CSRMatrix *B_offd = hypre_ParCSRMatrixOffd(B); - HYPRE_Real *B_offd_a = hypre_CSRMatrixData(B_offd); - HYPRE_Int *B_offd_i = hypre_CSRMatrixI(B_offd); - HYPRE_Int *B_offd_j = hypre_CSRMatrixJ(B_offd); - - HYPRE_BigInt *col_map_offd_B = hypre_ParCSRMatrixColMapOffd(B); - - HYPRE_Int i, j, pos; - - HYPRE_BigInt first_row = hypre_ParCSRMatrixFirstRowIndex(A); - HYPRE_Int nrows_local = hypre_CSRMatrixNumRows(A_diag); - MFEM_VERIFY(first_row == hypre_ParCSRMatrixFirstRowIndex(B) && - nrows_local == hypre_CSRMatrixNumRows(B_diag) && - ncols_A_diag == ncols_B_diag, - "Invalid mismatch in matrix sizes/distribution!"); - - // Copy the diagonal block A => B. - { - HYPRE_Int *marker = mfem_hypre_CTAlloc_host(HYPRE_Int, ncols_A_diag); - for (j = 0; j < ncols_A_diag; j++) - { - marker[j] = -1; - } - - for (i = 0; i < nrows_local; i++) - { - for (j = A_diag_i[i]; j < A_diag_i[i + 1]; j++) - { - marker[A_diag_j[j]] = j; - } - - for (j = B_diag_i[i]; j < B_diag_i[i + 1]; j++) - { - // Skip entries not in sparsity pattern of B to copy. All columns of B are marked in - // the array because sparsity(B) ⊆ sparsity(A). - pos = marker[B_diag_j[j]]; - MFEM_VERIFY(pos >= A_diag_i[i], - "Found nonzero entry of B in copy which is not in A!"); - B_diag_a[j] = A_diag_a[pos]; - } - } - mfem_hypre_TFree_host(marker); - } - - // Copy the off-diagonal block A => B. - { - for (i = 0; i < nrows_local; i++) - { - std::map marker; - // std::unordered_map marker; - for (j = A_offd_i[i]; j < A_offd_i[i + 1]; j++) - { - marker.insert(std::make_pair(col_map_offd_A[A_offd_j[j]], j)); - } - - for (j = B_offd_i[i]; j < B_offd_i[i + 1]; j++) - { - auto it = marker.find(col_map_offd_B[B_offd_j[j]]); - MFEM_VERIFY(it != marker.end(), - "Found nonzero entry of B in copy which is not in A!"); - pos = it->second; - B_offd_a[j] = A_offd_a[pos]; - } - } - } - - MFEM_VERIFY(!hypre_error_flag, - "HYPRE error encountered: Error code = " << hypre_error_flag << "!"); -} - -void hypreParCSRRowSums(hypre_ParCSRMatrix *A, mfem::Vector &rowsums) -{ - hypre_error_flag = 0; - - hypre_CSRMatrix *A_diag = hypre_ParCSRMatrixDiag(A); - HYPRE_Real *A_diag_a = hypre_CSRMatrixData(A_diag); - HYPRE_Int *A_diag_i = hypre_CSRMatrixI(A_diag); - - hypre_CSRMatrix *A_offd = hypre_ParCSRMatrixOffd(A); - HYPRE_Real *A_offd_a = hypre_CSRMatrixData(A_offd); - HYPRE_Int *A_offd_i = hypre_CSRMatrixI(A_offd); - - HYPRE_Int nrows_local = hypre_CSRMatrixNumRows(A_diag); - - HYPRE_Int i, j; - HYPRE_Real rowsum; - - for (i = 0; i < nrows_local; i++) - { - rowsum = 0.0; - for (j = A_diag_i[i]; j < A_diag_i[i + 1]; j++) - { - rowsum += std::abs(A_diag_a[j]); - } - for (j = A_offd_i[i]; j < A_offd_i[i + 1]; j++) - { - rowsum += std::abs(A_offd_a[j]); - } - rowsums(i) = rowsum; - } - - MFEM_VERIFY(!hypre_error_flag, - "HYPRE error encountered: Error code = " << hypre_error_flag << "!"); -} - -void hypreParCSRInfNorm(hypre_ParCSRMatrix *Ar, hypre_ParCSRMatrix *Ai, HYPRE_Real *norm) -{ - hypre_error_flag = 0; - - MPI_Comm comm = hypre_ParCSRMatrixComm(Ar); - - hypre_CSRMatrix *Ar_diag = hypre_ParCSRMatrixDiag(Ar); - HYPRE_Real *Ar_diag_a = hypre_CSRMatrixData(Ar_diag); - HYPRE_Int *Ar_diag_i = hypre_CSRMatrixI(Ar_diag); - HYPRE_Int *Ar_diag_j = hypre_CSRMatrixJ(Ar_diag); - HYPRE_Int ncols_Ar_diag = hypre_CSRMatrixNumCols(Ar_diag); - - hypre_CSRMatrix *Ar_offd = hypre_ParCSRMatrixOffd(Ar); - HYPRE_Real *Ar_offd_a = hypre_CSRMatrixData(Ar_offd); - HYPRE_Int *Ar_offd_i = hypre_CSRMatrixI(Ar_offd); - HYPRE_Int *Ar_offd_j = hypre_CSRMatrixJ(Ar_offd); - - HYPRE_BigInt *col_map_offd_Ar = hypre_ParCSRMatrixColMapOffd(Ar); - - hypre_CSRMatrix *Ai_diag = hypre_ParCSRMatrixDiag(Ai); - HYPRE_Real *Ai_diag_a = hypre_CSRMatrixData(Ai_diag); - HYPRE_Int *Ai_diag_i = hypre_CSRMatrixI(Ai_diag); - HYPRE_Int *Ai_diag_j = hypre_CSRMatrixJ(Ai_diag); - HYPRE_Int ncols_Ai_diag = hypre_CSRMatrixNumCols(Ai_diag); - - hypre_CSRMatrix *Ai_offd = hypre_ParCSRMatrixOffd(Ai); - HYPRE_Real *Ai_offd_a = hypre_CSRMatrixData(Ai_offd); - HYPRE_Int *Ai_offd_i = hypre_CSRMatrixI(Ai_offd); - HYPRE_Int *Ai_offd_j = hypre_CSRMatrixJ(Ai_offd); - - HYPRE_BigInt *col_map_offd_Ai = hypre_ParCSRMatrixColMapOffd(Ai); - - HYPRE_Int *marker_diag; - - HYPRE_BigInt first_row = hypre_ParCSRMatrixFirstRowIndex(Ar); - HYPRE_Int nrows_local = hypre_CSRMatrixNumRows(Ar_diag); - MFEM_VERIFY(first_row == hypre_ParCSRMatrixFirstRowIndex(Ai) && - nrows_local == hypre_CSRMatrixNumRows(Ai_diag) && - ncols_Ar_diag == ncols_Ai_diag, - "Invalid mismatch in matrix sizes/distribution!"); - - HYPRE_Int i, j, pos; - HYPRE_Real rowsum, maxsum = 0.0; - - // We assume the sparsity of the imaginary part is a subset of the real part. Entries - // outside the sparsity of the real part will be ignored for the calculation of matrix - // norm. - marker_diag = mfem_hypre_CTAlloc_host(HYPRE_Int, ncols_Ai_diag); - for (j = 0; j < ncols_Ai_diag; j++) - { - marker_diag[j] = -1; - } - - for (i = 0; i < nrows_local; i++) - { - rowsum = 0.0; - - // Diagonal part - for (j = Ai_diag_i[i]; j < Ai_diag_i[i + 1]; j++) - { - marker_diag[Ai_diag_j[j]] = j; - } - - for (j = Ar_diag_i[i]; j < Ar_diag_i[i + 1]; j++) - { - pos = marker_diag[Ar_diag_j[j]]; - if (pos >= Ai_diag_i[i]) - { - // Column entry is nonzero in both Ar and Ai. - rowsum += std::hypot(Ar_diag_a[j], Ai_diag_a[pos]); - } - else - { - rowsum += std::abs(Ar_diag_a[j]); - } - } - - // Off-diagonal part - std::map marker_offd; - // std::unordered_map marker_offd; - for (j = Ai_offd_i[i]; j < Ai_offd_i[i + 1]; j++) - { - marker_offd.insert(std::make_pair(col_map_offd_Ai[Ai_offd_j[j]], j)); - } - - for (j = Ar_offd_i[i]; j < Ar_offd_i[i + 1]; j++) - { - auto it = marker_offd.find(col_map_offd_Ar[Ar_offd_j[j]]); - if (it != marker_offd.end()) - { - // Column entry is nonzero in both Ar and Ai. - pos = it->second; - rowsum += std::hypot(Ar_offd_a[j], Ai_offd_a[pos]); - } - else - { - rowsum += std::abs(Ar_offd_a[j]); - } - } - - maxsum = std::max(maxsum, rowsum); - } - - mfem_hypre_TFree_host(marker_diag); - - MPI_Allreduce(&maxsum, norm, 1, HYPRE_MPI_REAL, MPI_MAX, comm); - - MFEM_VERIFY(!hypre_error_flag, - "HYPRE error encountered: Error code = " << hypre_error_flag << "!"); -} - -} // namespace palace::hypre diff --git a/palace/linalg/hypre.hpp b/palace/linalg/hypre.hpp deleted file mode 100644 index 3dbc954f9..000000000 --- a/palace/linalg/hypre.hpp +++ /dev/null @@ -1,70 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_LINALG_HYPRE_HPP -#define PALACE_LINALG_HYPRE_HPP - -#include - -namespace mfem -{ - -// Convenience wrapper for casting away the const on the pointers and dispatching onto the -// original function that has the argument type: mfem::Array2D &. -mfem::HypreParMatrix * -HypreParMatrixFromBlocks(mfem::Array2D &blocks, - mfem::Array2D *blockCoeff = nullptr); - -} // namespace mfem - -namespace palace::hypre -{ - -// -// Extensions to Hypre linear algebra routines. -// - -// Version 1: Eliminates (including from the sparsity pattern) the given list of -// rows/columns from the square matrix and sets the diagonal value according to -// diag_policy. A combination of mfem::HypreParMatrix:: EliminateRowsCols and -// hypre_ParCSRMatrixDropSmallEntriesHost. Specialized for host operation currently. -// Version 2: A mfem::HypreParMatrix::EliminateBC with option to specify a general scalar -// for eliminated rows. -// The specified rows/columns should be in local numbering. -enum class DiagonalPolicy -{ - USER, - ZERO, - ONE, - KEEP -}; -void hypreParCSREliminateRowsCols(hypre_ParCSRMatrix *A, const mfem::Array &rows_cols, - hypre::DiagonalPolicy diag_policy, - HYPRE_Complex diag = 0.0, bool ignore_rows = false); -void hypreParCSREliminateRowsColsv2(hypre_ParCSRMatrix *A, - const mfem::Array &rows_cols, - hypre::DiagonalPolicy diag_policy, - HYPRE_Complex diag = 0.0, bool ignore_rows = false); - -// Eliminates (zeros) the given list of rows (columns), and also eliminates all columns -// (rows) which contain a nonzero in the specified rows (columns) to be eliminated. From -// Hypre's hypre_AMESetup. Returns as a new matrix (leaves the old matrix intact). The -// specified rows (columns) should be in local numbering. -hypre_ParCSRMatrix *hypreParCSREliminateRowsWithCols(hypre_ParCSRMatrix *A, - const mfem::Array &rows); -hypre_ParCSRMatrix *hypreParCSREliminateColsWithRows(hypre_ParCSRMatrix *A, - const mfem::Array &cols); - -// Copy the entries from A into B, for sparsity(B) ⊆ sparsity(A). -void hypreParCSRCopy(hypre_ParCSRMatrix *A, hypre_ParCSRMatrix *B); - -// Get the row sums (with absolute value) of the local rows of the matrix. -void hypreParCSRRowSums(hypre_ParCSRMatrix *A, mfem::Vector &rowsums); - -// Compute the matrix infinity norm for a complex matrix stored with separate real and -// imaginary parts, for sparsity(Ai) ⊆ sparsity(Ar). -void hypreParCSRInfNorm(hypre_ParCSRMatrix *Ar, hypre_ParCSRMatrix *Ai, HYPRE_Real *norm); - -} // namespace palace::hypre - -#endif // PALACE_LINALG_HYPRE_HPP diff --git a/palace/linalg/iterative.cpp b/palace/linalg/iterative.cpp new file mode 100644 index 000000000..ccb6ff529 --- /dev/null +++ b/palace/linalg/iterative.cpp @@ -0,0 +1,844 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#include "iterative.hpp" + +#include +#include +#include +#include +#include "linalg/orthog.hpp" +#include "utils/communication.hpp" + +namespace palace +{ + +namespace +{ + +template +inline void CheckDot(T dot, const char *msg) +{ + MFEM_ASSERT(std::isfinite(dot) && dot >= 0.0, msg << dot << "!"); +} + +template +inline void CheckDot(std::complex dot, const char *msg) +{ + MFEM_ASSERT(std::isfinite(dot.real()) && std::isfinite(dot.imag()) && dot.real() >= 0.0, + msg << dot << "!"); +} + +template +inline constexpr T SafeMin() +{ + // Originally part of LAPACK. + // LAPACK is free software: you can redistribute it and/or modify it under + // the terms of the BSD 3-Clause license. + // + // Copyright (c) 2021-2023, University of Colorado Denver. All rights reserved. + // Copyright (c) 2017-2021, University of Tennessee. All rights reserved. + // + // Original author: Weslley S Pereira, University of Colorado Denver, USA + constexpr int fradix = std::numeric_limits::radix; + constexpr int expm = std::numeric_limits::min_exponent; + constexpr int expM = std::numeric_limits::max_exponent; + // Note: pow is not constexpr in C++17 so this actually might not return a constexpr for + // all compilers. + return std::max(std::pow(fradix, T(expm - 1)), std::pow(fradix, T(1 - expM))); +} + +template +inline constexpr T SafeMax() +{ + // Originally part of LAPACK. + // LAPACK is free software: you can redistribute it and/or modify it under + // the terms of the BSD 3-Clause license. + // + // Copyright (c) 2021-2023, University of Colorado Denver. All rights reserved. + // Copyright (c) 2017-2021, University of Tennessee. All rights reserved. + // + // Original author: Weslley S Pereira, University of Colorado Denver, USA + constexpr int fradix = std::numeric_limits::radix; + constexpr int expm = std::numeric_limits::min_exponent; + constexpr int expM = std::numeric_limits::max_exponent; + // Note: pow is not constexpr in C++17 so this actually might not return a constexpr for + // all compilers. + return std::min(std::pow(fradix, T(1 - expm)), std::pow(fradix, T(expM - 1))); +} + +template +inline void GeneratePlaneRotation(const T dx, const T dy, T &cs, T &sn) +{ + // See LAPACK's s/dlartg. + const T safmin = SafeMin(); + const T safmax = SafeMax(); + const T root_min = std::sqrt(safmin); + const T root_max = std::sqrt(safmax / 2); + if (dy == 0.0) + { + cs = 1.0; + sn = 0.0; + return; + } + if (dx == 0.0) + { + cs = 0.0; + sn = std::copysign(1.0, dy); + return; + } + T dx1 = std::abs(dx); + T dy1 = std::abs(dy); + if (dx1 > root_min && dx1 < root_max && dy1 > root_min && dy1 < root_max) + { + T d = std::sqrt(dx * dx + dy * dy); + cs = dx1 / d; + sn = dy / std::copysign(d, dx); + } + else + { + T u = std::min(safmax, std::max(safmin, std::max(dx1, dy1))); + T dxs = dx / u; + T dys = dy / u; + T d = std::sqrt(dxs * dxs + dys * dys); + cs = std::abs(dxs) / d; + sn = dys / std::copysign(d, dx); + } +} + +template +inline void GeneratePlaneRotation(const std::complex dx, const std::complex dy, T &cs, + std::complex &sn) +{ + // Generates a plane rotation so that: + // [ cs sn ] [ dx ] = [ r ] + // [ -conj(sn) cs ] [ dy ] [ 0 ] + // where cs is real and cs² + |sn|² = 1. See LAPACK's c/zlartg. + const T safmin = SafeMin(); + const T safmax = SafeMax(); + if (dy == 0.0) + { + cs = 1.0; + sn = 0.0; + return; + } + if (dx == 0.0) + { + cs = 0.0; + if (dy.real() == 0.0) + { + sn = std::conj(dy) / std::abs(dy.imag()); + } + else if (dy.imag() == 0.0) + { + sn = std::conj(dy) / std::abs(dy.real()); + } + else + { + const T root_min = std::sqrt(safmin); + const T root_max = std::sqrt(safmax / 2); + T dy1 = std::max(std::abs(dy.real()), std::abs(dy.imag())); + if (dy1 > root_min && dy1 < root_max) + { + sn = std::conj(dy) / std::sqrt(dy.real() * dy.real() + dy.imag() * dy.imag()); + } + else + { + T u = std::min(safmax, std::max(safmin, dy1)); + std::complex dys = dy / u; + sn = std::conj(dys) / std::sqrt(dys.real() * dys.real() + dys.imag() * dys.imag()); + } + } + return; + } + const T root_min = std::sqrt(safmin); + const T root_max = std::sqrt(safmax / 4); + T dx1 = std::max(std::abs(dx.real()), std::abs(dx.imag())); + T dy1 = std::max(std::abs(dy.real()), std::abs(dy.imag())); + if (dx1 > root_min && dx1 < root_max && dy1 > root_min && dy1 < root_max) + { + T dx2 = dx.real() * dx.real() + dx.imag() * dx.imag(); + T dy2 = dy.real() * dy.real() + dy.imag() * dy.imag(); + T dz2 = dx2 + dy2; + if (dx2 >= dz2 * safmin) + { + cs = std::sqrt(dx2 / dz2); + if (dx2 > root_min && dz2 < root_max * 2) + { + sn = std::conj(dy) * (dx / std::sqrt(dx2 * dz2)); + } + else + { + sn = std::conj(dy) * ((dx / cs) / dz2); + } + } + else + { + T d = std::sqrt(dx2 * dz2); + cs = dx2 / d; + sn = std::conj(dy) * (dx / d); + } + } + else + { + T u = std::min(safmax, std::max(safmin, std::max(dx1, dy1))), w; + std::complex dys = dy / u, dxs; + T dy2 = dys.real() * dys.real() + dys.imag() * dys.imag(), dx2, dz2; + if (dx1 / u < root_min) + { + T v = std::min(safmax, std::max(safmin, dx1)); + w = v / u; + dxs = dx / v; + dx2 = dxs.real() * dxs.real() + dxs.imag() * dxs.imag(); + dz2 = dx2 * w * w + dy2; + } + else + { + w = 1.0; + dxs = dx / u; + dx2 = dxs.real() * dxs.real() + dxs.imag() * dxs.imag(); + dz2 = dx2 + dy2; + } + if (dx2 >= dz2 * safmin) + { + cs = std::sqrt(dx2 / dz2); + if (dx2 > root_min && dz2 < root_max * 2) + { + sn = std::conj(dys) * (dxs / std::sqrt(dx2 * dz2)); + } + else + { + sn = std::conj(dys) * ((dxs / cs) / dz2); + } + } + else + { + T d = std::sqrt(dx2 * dz2); + cs = dx2 / d; + sn = std::conj(dys) * (dxs / d); + } + cs *= w; + } +} + +template +inline void ApplyPlaneRotation(T &dx, T &dy, const T cs, const T sn) +{ + T t = cs * dx + sn * dy; + dy = -sn * dx + cs * dy; + dx = t; +} + +template +inline void ApplyPlaneRotation(std::complex &dx, std::complex &dy, const T cs, + const std::complex sn) +{ + std::complex t = cs * dx + sn * dy; + dy = -std::conj(sn) * dx + cs * dy; + dx = t; +} + +} // namespace + +template +IterativeSolver::IterativeSolver(MPI_Comm comm, int print) + : Solver(), comm(comm), A(nullptr), B(nullptr) +{ + print_opts.Warnings(); + if (print > 0) + { + print_opts.Summary(); + if (print > 1) + { + print_opts.Iterations(); + if (print > 2) + { + print_opts.All(); + } + } + } + int_width = 3; + tab_width = 0; + + rel_tol = abs_tol = 0.0; + max_it = 100; + + converged = false; + initial_res = 1.0; + final_res = 0.0; + final_it = 0; +} + +template +void CgSolver::Mult(const VecType &b, VecType &x) const +{ + // Set up workspace. + ScalarType beta, beta_prev = 0.0, alpha, denom; + RealType res, eps; + MFEM_VERIFY(A, "Operator must be set for CgSolver::Mult!"); + MFEM_ASSERT(A->Width() == x.Size() && A->Height() == b.Size(), + "Size mismatch for CgSolver::Mult!"); + r.SetSize(A->Height()); + z.SetSize(A->Height()); + p.SetSize(A->Height()); + + // Initialize. + if (this->initial_guess) + { + A->Mult(x, r); + linalg::AXPBY(1.0, b, -1.0, r); + } + else + { + r = b; + x = 0.0; + } + if (B) + { + B->Mult(r, z); + } + else + { + z = r; + } + beta = linalg::Dot(comm, z, r); + CheckDot(beta, "PCG preconditioner is not positive definite: (Br, r) = "); + res = std::sqrt(std::abs(beta)); + if (this->initial_guess && B) + { + B->Mult(b, p); + auto beta_rhs = linalg::Dot(comm, p, b); + CheckDot(beta_rhs, "PCG preconditioner is not positive definite: (Bb, b) = "); + initial_res = std::sqrt(std::abs(beta_rhs)); + } + else + { + initial_res = res; + } + eps = std::max(rel_tol * initial_res, abs_tol); + converged = (res < eps); + + // Begin iterations. + int it = 0; + if (print_opts.iterations) + { + Mpi::Print(comm, "{}Residual norms for PCG solve\n", + std::string(tab_width + int_width - 1, ' ')); + } + for (; it < max_it && !converged; it++) + { + if (print_opts.iterations) + { + Mpi::Print(comm, "{}{:{}d} KSP residual norm ||r||_B = {:.6e}\n", + std::string(tab_width, ' '), it, int_width, res); + } + if (!it) + { + p = z; + } + else + { + linalg::AXPBY(ScalarType(1.0), z, beta / beta_prev, p); + } + + A->Mult(p, z); + denom = linalg::Dot(comm, z, p); + CheckDot(denom, "PCG operator is not positive definite: (Ap, p) = "); + alpha = beta / denom; + + x.Add(alpha, p); + r.Add(-alpha, z); + + beta_prev = beta; + if (B) + { + B->Mult(r, z); + } + else + { + z = r; + } + beta = linalg::Dot(comm, z, r); + CheckDot(beta, "PCG preconditioner is not positive definite: (Br, r) = "); + res = std::sqrt(std::abs(beta)); + converged = (res < eps); + } + if (print_opts.iterations) + { + Mpi::Print(comm, "{}{:{}d} KSP residual norm ||r||_B = {:.6e}\n", + std::string(tab_width, ' '), it, int_width, res); + } + if (print_opts.summary || (print_opts.warnings && !converged)) + { + Mpi::Print(comm, "{}PCG solver {} in {:d} iteration{}", std::string(tab_width, ' '), + converged ? "converged" : "did NOT converge", it, (it == 1) ? "" : "s"); + if (it > 0) + { + Mpi::Print(comm, " (avg. reduction factor: {:.3e})\n", + std::pow(res / initial_res, 1.0 / it)); + } + else + { + Mpi::Print(comm, "\n"); + } + } + final_res = res; + final_it = it; +} + +namespace +{ + +template +inline void InitialResidual(PrecSide side, const OperType *A, const Solver *B, + const VecType &b, VecType &x, VecType &r, VecType &z, + bool initial_guess) +{ + if (B && side == GmresSolver::PrecSide::LEFT) + { + if (initial_guess) + { + A->Mult(x, z); + linalg::AXPBY(1.0, b, -1.0, z); + B->Mult(z, r); + } + else + { + B->Mult(b, r); + x = 0.0; + } + } + else // !B || side == PrecSide::RIGHT + { + if (initial_guess) + { + A->Mult(x, r); + linalg::AXPBY(1.0, b, -1.0, r); + } + else + { + r = b; + x = 0.0; + } + } +} + +template +inline void ApplyBA(PrecSide side, const OperType *A, const Solver *B, + const VecType &x, VecType &y, VecType &z) +{ + if (B && side == GmresSolver::PrecSide::LEFT) + { + A->Mult(x, z); + B->Mult(z, y); + } + else if (B && side == GmresSolver::PrecSide::RIGHT) + { + B->Mult(x, z); + A->Mult(z, y); + } + else + { + A->Mult(x, y); + } +} + +template +inline void OrthogonalizeIteration(OrthogType type, MPI_Comm comm, + const std::vector &V, VecType &w, + ScalarType *Hj, int j) +{ + using OperType = typename std::conditional::value, + ComplexOperator, Operator>::type; + + // Orthogonalize w against the leading j + 1 columns of V. + switch (type) + { + case GmresSolver::OrthogType::MGS: + linalg::OrthogonalizeColumnMGS(comm, V, w, Hj, j + 1); + break; + case GmresSolver::OrthogType::CGS: + linalg::OrthogonalizeColumnCGS(comm, V, w, Hj, j + 1); + break; + case GmresSolver::OrthogType::CGS2: + linalg::OrthogonalizeColumnCGS(comm, V, w, Hj, j + 1, true); + break; + } +} + +} // namespace + +template +void GmresSolver::Initialize() const +{ + if (!V.empty()) + { + MFEM_ASSERT(V.size() == static_cast(max_dim + 1) && + V[0].Size() == A->Height(), + "Repeated solves with GmresSolver should not modify the operator size or " + "restart dimension!"); + return; + } + if (max_dim < 0) + { + max_dim = max_it; + } + constexpr int init_size = 5; + V.resize(max_dim + 1); + for (int j = 0; j < std::min(init_size, max_dim + 1); j++) + { + V[j].SetSize(A->Height()); + } + H.resize((max_dim + 1) * max_dim); + s.resize(max_dim + 1); + cs.resize(max_dim + 1); + sn.resize(max_dim + 1); +} + +template +void GmresSolver::Update(int j) const +{ + // Add storage for basis vectors in increments. + constexpr int add_size = 10; + for (int k = j + 1; k < std::min(j + 1 + add_size, max_dim + 1); k++) + { + V[k].SetSize(A->Height()); + } +} + +template +void GmresSolver::Mult(const VecType &b, VecType &x) const +{ + // Set up workspace. + RealType beta = 0.0, true_beta, eps = 0.0; + MFEM_VERIFY(A, "Operator must be set for GmresSolver::Mult!"); + MFEM_ASSERT(A->Width() == x.Size() && A->Height() == b.Size(), + "Size mismatch for GmresSolver::Mult!"); + r.SetSize(A->Height()); + Initialize(); + + // Begin iterations. + converged = false; + int it = 0, restart = 0; + if (print_opts.iterations) + { + Mpi::Print(comm, "{}Residual norms for GMRES solve\n", + std::string(tab_width + int_width - 1, ' ')); + } + for (; it < max_it; restart++) + { + // Initialize. + InitialResidual(pc_side, A, B, b, x, r, V[0], (this->initial_guess || restart > 0)); + true_beta = linalg::Norml2(comm, r); + CheckDot(true_beta, "GMRES residual norm is not valid: beta = "); + if (it == 0) + { + if (this->initial_guess) + { + RealType beta_rhs; + if (B && pc_side == PrecSide::LEFT) + { + B->Mult(b, V[0]); + beta_rhs = linalg::Norml2(comm, V[0]); + } + else // !B || pc_side == PrecSide::RIGHT + { + beta_rhs = linalg::Norml2(comm, b); + } + CheckDot(beta_rhs, "GMRES residual norm is not valid: beta_rhs = "); + initial_res = beta_rhs; + } + else + { + initial_res = true_beta; + } + eps = std::max(rel_tol * initial_res, abs_tol); + } + else if (beta > 0.0 && std::abs(beta - true_beta) > 0.1 * true_beta && + print_opts.warnings) + { + Mpi::Print( + comm, + "{}GMRES residual at restart ({:.6e}) is far from the residual norm estimate " + "from the recursion formula ({:.6e}) (initial residual = {:.6e})\n", + std::string(tab_width, ' '), true_beta, beta, initial_res); + } + beta = true_beta; + if (beta < eps) + { + converged = true; + break; + } + + V[0] = 0.0; + V[0].Add(1.0 / beta, r); + std::fill(s.begin(), s.end(), 0.0); + s[0] = beta; + + int j = 0; + for (;; j++, it++) + { + if (print_opts.iterations) + { + Mpi::Print(comm, "{}{:{}d} (restart {:d}) KSP residual norm {:.6e}\n", + std::string(tab_width, ' '), it, int_width, restart, beta); + } + VecType &w = V[j + 1]; + if (w.Size() == 0) + { + Update(j); + } + ApplyBA(pc_side, A, B, V[j], w, r); + + ScalarType *Hj = H.data() + j * (max_dim + 1); + OrthogonalizeIteration(orthog_type, comm, V, w, Hj, j); + Hj[j + 1] = linalg::Norml2(comm, w); + w *= 1.0 / Hj[j + 1]; + + for (int k = 0; k < j; k++) + { + ApplyPlaneRotation(Hj[k], Hj[k + 1], cs[k], sn[k]); + } + GeneratePlaneRotation(Hj[j], Hj[j + 1], cs[j], sn[j]); + ApplyPlaneRotation(Hj[j], Hj[j + 1], cs[j], sn[j]); + ApplyPlaneRotation(s[j], s[j + 1], cs[j], sn[j]); + + beta = std::abs(s[j + 1]); + CheckDot(beta, "GMRES residual norm is not valid: beta = "); + converged = (beta < eps); + if (converged || j + 1 == max_dim || it + 1 == max_it) + { + it++; + break; + } + } + + // Reconstruct the solution (for restart or due to convergence or maximum iterations). + for (int i = j; i >= 0; i--) + { + ScalarType *Hi = H.data() + i * (max_dim + 1); + s[i] /= Hi[i]; + for (int k = i - 1; k >= 0; k--) + { + s[k] -= Hi[k] * s[i]; + } + } + if (!B || pc_side == PrecSide::LEFT) + { + for (int k = 0; k <= j; k++) + { + x.Add(s[k], V[k]); + } + } + else // B && pc_side == PrecSide::RIGHT + { + r = 0.0; + for (int k = 0; k <= j; k++) + { + r.Add(s[k], V[k]); + } + B->Mult(r, V[0]); + x += V[0]; + } + if (converged) + { + break; + } + } + if (print_opts.iterations) + { + Mpi::Print(comm, "{}{:{}d} (restart {:d}) KSP residual norm {:.6e}\n", + std::string(tab_width, ' '), it, int_width, restart, beta); + } + if (print_opts.summary || (print_opts.warnings && !converged)) + { + Mpi::Print(comm, "{}GMRES solver {} in {:d} iteration{}", std::string(tab_width, ' '), + converged ? "converged" : "did NOT converge", it, (it == 1) ? "" : "s"); + if (it > 0) + { + Mpi::Print(comm, " (avg. reduction factor: {:.3e})\n", + std::pow(beta / initial_res, 1.0 / it)); + } + else + { + Mpi::Print(comm, "\n"); + } + } + final_res = beta; + final_it = it; +} + +template +void FgmresSolver::Initialize() const +{ + GmresSolver::Initialize(); + constexpr int init_size = 5; + Z.resize(max_dim + 1); + for (int j = 0; j < std::min(init_size, max_dim + 1); j++) + { + Z[j].SetSize(A->Height()); + } +} + +template +void FgmresSolver::Update(int j) const +{ + // Add storage for basis vectors in increments. + GmresSolver::Update(j); + constexpr int add_size = 10; + for (int k = j + 1; k < std::min(j + 1 + add_size, max_dim + 1); k++) + { + Z[k].SetSize(A->Height()); + } +} + +template +void FgmresSolver::Mult(const VecType &b, VecType &x) const +{ + // Set up workspace. + RealType beta = 0.0, true_beta, eps = 0.0; + MFEM_VERIFY(A && B, "Operator and preconditioner must be set for FgmresSolver::Mult!"); + MFEM_ASSERT(A->Width() == x.Size() && A->Height() == b.Size(), + "Size mismatch for FgmresSolver::Mult!"); + Initialize(); + + // Begin iterations. + converged = false; + int it = 0, restart = 0; + if (print_opts.iterations) + { + Mpi::Print(comm, "{}Residual norms for FGMRES solve\n", + std::string(tab_width + int_width - 1, ' ')); + } + for (; it < max_it; restart++) + { + // Initialize. + InitialResidual(PrecSide::RIGHT, A, B, b, x, Z[0], V[0], + (this->initial_guess || restart > 0)); + true_beta = linalg::Norml2(comm, Z[0]); + CheckDot(true_beta, "FGMRES residual norm is not valid: beta = "); + if (it == 0) + { + if (this->initial_guess) + { + auto beta_rhs = linalg::Norml2(comm, b); + CheckDot(beta_rhs, "GMRES residual norm is not valid: beta_rhs = "); + initial_res = beta_rhs; + } + else + { + initial_res = true_beta; + } + eps = std::max(rel_tol * initial_res, abs_tol); + } + else if (beta > 0.0 && std::abs(beta - true_beta) > 0.1 * true_beta && + print_opts.warnings) + { + Mpi::Print( + comm, + "{}FGMRES residual at restart ({:.6e}) is far from the residual norm estimate " + "from the recursion formula ({:.6e}) (initial residual = {:.6e})\n", + std::string(tab_width, ' '), true_beta, beta, initial_res); + } + beta = true_beta; + if (beta < eps) + { + converged = true; + break; + } + + V[0] = 0.0; + V[0].Add(1.0 / beta, Z[0]); + std::fill(s.begin(), s.end(), 0.0); + s[0] = beta; + + int j = 0; + for (;; j++, it++) + { + if (print_opts.iterations) + { + Mpi::Print(comm, "{}{:{}d} (restart {:d}) KSP residual norm {:.6e}\n", + std::string(tab_width, ' '), it, int_width, restart, beta); + } + VecType &w = V[j + 1]; + if (w.Size() == 0) + { + Update(j); + } + ApplyBA(PrecSide::RIGHT, A, B, V[j], w, Z[j]); + + ScalarType *Hj = H.data() + j * (max_dim + 1); + OrthogonalizeIteration(orthog_type, comm, V, w, Hj, j); + Hj[j + 1] = linalg::Norml2(comm, w); + w *= 1.0 / Hj[j + 1]; + + for (int k = 0; k < j; k++) + { + ApplyPlaneRotation(Hj[k], Hj[k + 1], cs[k], sn[k]); + } + GeneratePlaneRotation(Hj[j], Hj[j + 1], cs[j], sn[j]); + ApplyPlaneRotation(Hj[j], Hj[j + 1], cs[j], sn[j]); + ApplyPlaneRotation(s[j], s[j + 1], cs[j], sn[j]); + + beta = std::abs(s[j + 1]); + CheckDot(beta, "FGMRES residual norm is not valid: beta = "); + converged = (beta < eps); + if (converged || j + 1 == max_dim || it + 1 == max_it) + { + it++; + break; + } + } + + // Reconstruct the solution (for restart or due to convergence or maximum iterations). + for (int i = j; i >= 0; i--) + { + ScalarType *Hi = H.data() + i * (max_dim + 1); + s[i] /= Hi[i]; + for (int k = i - 1; k >= 0; k--) + { + s[k] -= Hi[k] * s[i]; + } + } + for (int k = 0; k <= j; k++) + { + x.Add(s[k], Z[k]); + } + if (converged) + { + break; + } + } + if (print_opts.iterations) + { + Mpi::Print(comm, "{}{:{}d} (restart {:d}) KSP residual norm {:.6e}\n", + std::string(tab_width, ' '), it, int_width, restart, beta); + } + if (print_opts.summary || (print_opts.warnings && !converged)) + { + Mpi::Print(comm, "{}FGMRES solver {} in {:d} iteration{}", std::string(tab_width, ' '), + converged ? "converged" : "did NOT converge", it, (it == 1) ? "" : "s"); + if (it > 0) + { + Mpi::Print(comm, " (avg. reduction factor: {:.3e})\n", + std::pow(beta / initial_res, 1.0 / it)); + } + else + { + Mpi::Print(comm, "\n"); + } + } + final_res = beta; + final_it = it; +} + +template class IterativeSolver; +template class IterativeSolver; +template class CgSolver; +template class CgSolver; +template class GmresSolver; +template class GmresSolver; +template class FgmresSolver; +template class FgmresSolver; + +} // namespace palace diff --git a/palace/linalg/iterative.hpp b/palace/linalg/iterative.hpp new file mode 100644 index 000000000..1ab2b8c75 --- /dev/null +++ b/palace/linalg/iterative.hpp @@ -0,0 +1,279 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LINALG_ITERATIVE_HPP +#define PALACE_LINALG_ITERATIVE_HPP + +#include +#include +#include +#include "linalg/operator.hpp" +#include "linalg/solver.hpp" +#include "linalg/vector.hpp" + +namespace palace +{ + +// +// Iterative solvers based on Krylov subspace methods with optional preconditioning, for +// real- or complex-valued systems. +// + +// Base class for iterative solvers based on Krylov subspace methods with optional +// preconditioning. +template +class IterativeSolver : public Solver +{ +protected: + using RealType = double; + using ScalarType = + typename std::conditional::value, + std::complex, RealType>::type; + + // MPI communicator associated with the solver. + MPI_Comm comm; + + // Control level of printing during solves. + mfem::IterativeSolver::PrintLevel print_opts; + int int_width, tab_width; + + // Relative and absolute tolerances. + double rel_tol, abs_tol; + + // Limit for the number of solver iterations. + int max_it; + + // Operator and (optional) preconditioner associated with the iterative solver (not + // owned). + const OperType *A; + const Solver *B; + + // Variables set during solve to capture solve statistics. + mutable bool converged; + mutable double initial_res, final_res; + mutable int final_it; + +public: + IterativeSolver(MPI_Comm comm, int print); + + // Set an indentation for all log printing. + void SetTabWidth(int width) { tab_width = width; } + + // Set the relative convergence tolerance. + void SetTol(double tol) { SetRelTol(tol); } + void SetRelTol(double tol) { rel_tol = tol; } + + // Set the absolute convergence tolerance. + void SetAbsTol(double tol) { abs_tol = tol; } + + // Set the maximum number of iterations. + void SetMaxIter(int its) + { + max_it = its; + int_width = 1 + static_cast(std::log10(its)); + } + + // Set the operator for the solver. + void SetOperator(const OperType &op) override { A = &op; } + + // Set the preconditioner for the solver. + void SetPreconditioner(const Solver &pc) { B = &pc; } + + // Returns if the previous solve converged or not. + bool GetConverged() const { return converged; } + + // Returns the initial (absolute) residual for the previous solve. + double GetInitialRes() const { return initial_res; } + + // Returns the final (absolute) residual for the previous solve, which may be an estimate + // to the true residual. + double GetFinalRes() const { return final_res; } + + // Returns the number of iterations for the previous solve. + int GetNumIterations() const { return final_it; } + + // Get the associated MPI communicator. + MPI_Comm GetComm() const { return comm; } +}; + +// Preconditioned Conjugate Gradient (CG) method for SPD linear systems. +template +class CgSolver : public IterativeSolver +{ +protected: + using VecType = typename Solver::VecType; + using RealType = typename IterativeSolver::RealType; + using ScalarType = typename IterativeSolver::ScalarType; + + using IterativeSolver::comm; + using IterativeSolver::print_opts; + using IterativeSolver::int_width; + using IterativeSolver::tab_width; + + using IterativeSolver::rel_tol; + using IterativeSolver::abs_tol; + using IterativeSolver::max_it; + + using IterativeSolver::A; + using IterativeSolver::B; + + using IterativeSolver::converged; + using IterativeSolver::initial_res; + using IterativeSolver::final_res; + using IterativeSolver::final_it; + + // Temporary workspace for solve. + mutable VecType r, z, p; + +public: + CgSolver(MPI_Comm comm, int print) : IterativeSolver(comm, print) {} + + void Mult(const VecType &b, VecType &x) const override; +}; + +// Preconditioned Generalized Minimum Residual Method (GMRES) for general nonsymmetric +// linear systems. +template +class GmresSolver : public IterativeSolver +{ +public: + enum class OrthogType + { + MGS, + CGS, + CGS2 + }; + + enum class PrecSide + { + LEFT, + RIGHT + }; + +protected: + using VecType = typename Solver::VecType; + using RealType = typename IterativeSolver::RealType; + using ScalarType = typename IterativeSolver::ScalarType; + + using IterativeSolver::comm; + using IterativeSolver::print_opts; + using IterativeSolver::int_width; + using IterativeSolver::tab_width; + + using IterativeSolver::rel_tol; + using IterativeSolver::abs_tol; + using IterativeSolver::max_it; + + using IterativeSolver::A; + using IterativeSolver::B; + + using IterativeSolver::converged; + using IterativeSolver::initial_res; + using IterativeSolver::final_res; + using IterativeSolver::final_it; + + // Maximum subspace dimension for restarted GMRES. + mutable int max_dim; + + // Orthogonalization method for orthonormalizing a newly computed vector against a basis + // at each iteration. + OrthogType orthog_type; + + // Use left or right preconditioning. + PrecSide pc_side; + + // Temporary workspace for solve. + mutable std::vector V; + mutable VecType r; + mutable std::vector H; + mutable std::vector s, sn; + mutable std::vector cs; + + // Allocate storage for solve. + virtual void Initialize() const; + virtual void Update(int j) const; + +public: + GmresSolver(MPI_Comm comm, int print) + : IterativeSolver(comm, print), max_dim(-1), orthog_type(OrthogType::MGS), + pc_side(PrecSide::LEFT) + { + } + + // Set the dimension for restart. + void SetRestartDim(int dim) { max_dim = dim; } + + // Set the orthogonalization method. + void SetOrthogonalization(OrthogType type) { orthog_type = type; } + + // Set the side for preconditioning. + virtual void SetPrecSide(PrecSide side) { pc_side = side; } + + void Mult(const VecType &b, VecType &x) const override; +}; + +// Preconditioned Flexible Generalized Minimum Residual Method (FGMRES) for general +// nonsymmetric linear systems with a non-constant preconditioner. +template +class FgmresSolver : public GmresSolver +{ +public: + using OrthogType = typename GmresSolver::OrthogType; + using PrecSide = typename GmresSolver::PrecSide; + +protected: + using VecType = typename GmresSolver::VecType; + using RealType = typename GmresSolver::RealType; + using ScalarType = typename GmresSolver::ScalarType; + + using GmresSolver::comm; + using GmresSolver::print_opts; + using GmresSolver::int_width; + using GmresSolver::tab_width; + + using GmresSolver::rel_tol; + using GmresSolver::abs_tol; + using GmresSolver::max_it; + + using GmresSolver::A; + using GmresSolver::B; + + using GmresSolver::converged; + using GmresSolver::initial_res; + using GmresSolver::final_res; + using GmresSolver::final_it; + + using GmresSolver::max_dim; + using GmresSolver::orthog_type; + using GmresSolver::pc_side; + using GmresSolver::V; + using GmresSolver::H; + using GmresSolver::s; + using GmresSolver::sn; + using GmresSolver::cs; + + // Temporary workspace for solve. + mutable std::vector Z; + + // Allocate storage for solve. + void Initialize() const override; + void Update(int j) const override; + +public: + FgmresSolver(MPI_Comm comm, int print) : GmresSolver(comm, print) + { + pc_side = PrecSide::RIGHT; + } + + void SetPrecSide(PrecSide side) override + { + MFEM_VERIFY(side == PrecSide::RIGHT, + "FGMRES solver only supports right preconditioning!"); + } + + void Mult(const VecType &b, VecType &x) const override; +}; + +} // namespace palace + +#endif // PALACE_LINALG_ITERATIVE_HPP diff --git a/palace/linalg/jacobi.cpp b/palace/linalg/jacobi.cpp new file mode 100644 index 000000000..f6f003f1a --- /dev/null +++ b/palace/linalg/jacobi.cpp @@ -0,0 +1,31 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#include "jacobi.hpp" + +#include + +namespace palace +{ + +void JacobiSmoother::SetOperator(const Operator &op) +{ + height = op.Height(); + width = op.Width(); + dinv.SetSize(height); + op.AssembleDiagonal(dinv); + dinv.Reciprocal(); +} + +void JacobiSmoother::Mult(const Vector &x, Vector &y) const +{ + MFEM_ASSERT(!iterative_mode, + "JacobiSmoother is not implemented for iterative_mode = true!"); + const int N = height; + const auto *DI = dinv.Read(); + const auto *X = x.Read(); + auto *Y = y.Write(); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { Y[i] = DI[i] * X[i]; }); +} + +} // namespace palace diff --git a/palace/linalg/jacobi.hpp b/palace/linalg/jacobi.hpp new file mode 100644 index 000000000..25e4735f0 --- /dev/null +++ b/palace/linalg/jacobi.hpp @@ -0,0 +1,36 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LINALG_JACOBI_SMOOTHER_HPP +#define PALACE_LINALG_JACOBI_SMOOTHER_HPP + +#include +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" + +namespace palace +{ + +// +// Simple Jacobi smoother using the diagonal vector from Operator::AssembleDiagonal(), +// which allows for (approximate) diagonal construction for matrix-free operators. +// +class JacobiSmoother : public mfem::Solver +{ +private: + // Inverse diagonal scaling of the operator. + Vector dinv; + +public: + JacobiSmoother() : mfem::Solver() {} + + void SetOperator(const Operator &op) override; + + void Mult(const Vector &x, Vector &y) const override; + + void MultTranspose(const Vector &x, Vector &y) const override { Mult(x, y); } +}; + +} // namespace palace + +#endif // PALACE_LINALG_JACOBI_SMOOTHER_HPP diff --git a/palace/linalg/ksp.cpp b/palace/linalg/ksp.cpp index 4a0946f72..98d40f1d7 100644 --- a/palace/linalg/ksp.cpp +++ b/palace/linalg/ksp.cpp @@ -3,397 +3,298 @@ #include "ksp.hpp" -#include #include -#include "linalg/pc.hpp" -#include "linalg/petsc.hpp" +#include "linalg/amg.hpp" +#include "linalg/ams.hpp" +#include "linalg/gmg.hpp" +#include "linalg/mumps.hpp" +#include "linalg/strumpack.hpp" +#include "linalg/superlu.hpp" #include "utils/communication.hpp" #include "utils/iodata.hpp" namespace palace { -KspSolver::KspSolver(MPI_Comm comm, const IoData &iodata, const std::string &prefix) - : clcustom(false), print(iodata.problem.verbose), print_opts(true), check_final(true), - solve(0) +namespace { - PalacePetscCall(KSPCreate(comm, &ksp)); - PalacePetscCall(KSPSetOptionsPrefix(ksp, prefix.c_str())); - Configure(iodata); - ConfigureVerbose(print, prefix); -} - -KspSolver::KspSolver(MPI_Comm comm, int print_lvl, const std::string &prefix) - : clcustom(false), print(print_lvl), print_opts(true), check_final(true), solve(0) -{ - PalacePetscCall(KSPCreate(comm, &ksp)); - PalacePetscCall(KSPSetOptionsPrefix(ksp, prefix.c_str())); - ConfigureVerbose(print, prefix); -} -KspSolver::~KspSolver() +template +std::unique_ptr> ConfigureKrylovSolver(MPI_Comm comm, + const IoData &iodata) { - MPI_Comm comm; - PalacePetscCall(PetscObjectGetComm(reinterpret_cast(ksp), &comm)); - PalacePetscCall(KSPDestroy(&ksp)); -} + // Configure solver settings as needed based on inputs. + config::LinearSolverData::KspType type = iodata.solver.linear.ksp_type; + if (type == config::LinearSolverData::KspType::DEFAULT) + { + if (iodata.problem.type == config::ProblemData::Type::ELECTROSTATIC || + iodata.problem.type == config::ProblemData::Type::MAGNETOSTATIC || + iodata.problem.type == config::ProblemData::Type::TRANSIENT) + { + type = config::LinearSolverData::KspType::CG; + } + else + { + type = config::LinearSolverData::KspType::GMRES; + } + } -void KspSolver::Configure(const IoData &iodata) -{ - // Configure the Krylov solver. GMRES is the default solver for frequency domain - // problems. - switch (iodata.solver.linear.ksp_type) + // Create the solver. + std::unique_ptr> ksp; + switch (type) { case config::LinearSolverData::KspType::CG: - SetType(Type::CG); - break; - case config::LinearSolverData::KspType::CGSYM: - SetType(Type::CGSYM); - break; - case config::LinearSolverData::KspType::FCG: - SetType(Type::FCG); - break; - case config::LinearSolverData::KspType::MINRES: - SetType(Type::MINRES); + ksp = std::make_unique>(comm, iodata.problem.verbose); break; case config::LinearSolverData::KspType::GMRES: - case config::LinearSolverData::KspType::DEFAULT: - SetType(Type::GMRES); - SetGMRESOptions(iodata.solver.linear.max_size, iodata.solver.linear.orthog_mgs, - iodata.solver.linear.orthog_cgs2); + { + auto gmres = std::make_unique>(comm, iodata.problem.verbose); + gmres->SetRestartDim(iodata.solver.linear.max_size); + ksp = std::move(gmres); + } break; case config::LinearSolverData::KspType::FGMRES: - SetType(Type::FGMRES); - SetGMRESOptions(iodata.solver.linear.max_size, iodata.solver.linear.orthog_mgs, - iodata.solver.linear.orthog_cgs2); - break; - case config::LinearSolverData::KspType::BCGS: - SetType(Type::BCGS); - break; - case config::LinearSolverData::KspType::BCGSL: - SetType(Type::BCGSL); - break; - case config::LinearSolverData::KspType::FBCGS: - SetType(Type::FBCGS); - break; - case config::LinearSolverData::KspType::QMRCGS: - SetType(Type::QMRCGS); - break; - case config::LinearSolverData::KspType::TFQMR: - SetType(Type::TFQMR); + { + auto fgmres = + std::make_unique>(comm, iodata.problem.verbose); + fgmres->SetRestartDim(iodata.solver.linear.max_size); + ksp = std::move(fgmres); + } break; + case config::LinearSolverData::KspType::MINRES: + case config::LinearSolverData::KspType::BICGSTAB: + case config::LinearSolverData::KspType::DEFAULT: case config::LinearSolverData::KspType::INVALID: - MFEM_ABORT("Unexpected type for KspSolver configuration!"); + MFEM_ABORT("Unexpected solver type for Krylov solver configuration!"); break; } - SetTol(iodata.solver.linear.tol); - SetMaxIter(iodata.solver.linear.max_it); - - // Reuse previous solution as guess for later solves if desired. - SetNonzeroInitialGuess(iodata.solver.linear.ksp_initial_guess); + ksp->SetInitialGuess(iodata.solver.linear.initial_guess); + ksp->SetRelTol(iodata.solver.linear.tol); + ksp->SetMaxIter(iodata.solver.linear.max_it); - // Optionally use left or right preconditioning (otherwise use PETSc default for the given - // solver). - if (iodata.solver.linear.pc_side_type == config::LinearSolverData::SideType::LEFT) + // Configure preconditioning side (only for GMRES). + if (iodata.solver.linear.pc_side_type != config::LinearSolverData::SideType::DEFAULT) { - PalacePetscCall(KSPSetPCSide(ksp, PC_LEFT)); + if (type != config::LinearSolverData::KspType::GMRES) + { + Mpi::Warning( + comm, "Preconditioner side will be ignored for non-GMRES iterative solvers!\n"); + } + else + { + auto *gmres = static_cast *>(ksp.get()); + switch (iodata.solver.linear.pc_side_type) + { + case config::LinearSolverData::SideType::LEFT: + gmres->SetPrecSide(GmresSolver::PrecSide::LEFT); + break; + case config::LinearSolverData::SideType::RIGHT: + gmres->SetPrecSide(GmresSolver::PrecSide::RIGHT); + break; + case config::LinearSolverData::SideType::DEFAULT: + case config::LinearSolverData::SideType::INVALID: + MFEM_ABORT("Unexpected side for configuring preconditioning!"); + break; + } + } } - else if (iodata.solver.linear.pc_side_type == config::LinearSolverData::SideType::RIGHT) + + // Configure orthogonalization method for GMRES/FMGRES. + if (type == config::LinearSolverData::KspType::GMRES || + type == config::LinearSolverData::KspType::FGMRES) { - PalacePetscCall(KSPSetPCSide(ksp, PC_RIGHT)); + // Because FGMRES inherits from GMRES, this is OK. + auto *gmres = static_cast *>(ksp.get()); + switch (iodata.solver.linear.gs_orthog_type) + { + case config::LinearSolverData::OrthogType::MGS: + gmres->SetOrthogonalization(GmresSolver::OrthogType::MGS); + break; + case config::LinearSolverData::OrthogType::CGS: + gmres->SetOrthogonalization(GmresSolver::OrthogType::CGS); + break; + case config::LinearSolverData::OrthogType::CGS2: + gmres->SetOrthogonalization(GmresSolver::OrthogType::CGS2); + break; + case config::LinearSolverData::OrthogType::INVALID: + MFEM_ABORT("Unexpected orthogonalization type for Krylov solver configuration!"); + break; + } } + + return ksp; } -void KspSolver::ConfigureVerbose(int print, const std::string &prefix) +template +std::unique_ptr> +ConfigurePreconditionerSolver(MPI_Comm comm, const IoData &iodata, + mfem::ParFiniteElementSpaceHierarchy &fespaces, + mfem::ParFiniteElementSpaceHierarchy *aux_fespaces) { - // Manage debugging output. - if (print > 0) + // Configure solver settings as needed based on inputs. + config::LinearSolverData::Type type = iodata.solver.linear.type; + if (type == config::LinearSolverData::Type::DEFAULT) { - std::string opts = "-ksp_converged_reason"; - if (print > 1) - { - opts.append(" -ksp_monitor"); - } - if (print > 3) + if (iodata.problem.type == config::ProblemData::Type::ELECTROSTATIC || + (iodata.problem.type == config::ProblemData::Type::TRANSIENT && + iodata.solver.transient.type == config::TransientSolverData::Type::CENTRAL_DIFF)) { - opts.append(" -ksp_view"); + type = config::LinearSolverData::Type::BOOMER_AMG; } - if (prefix.length() > 0) + else if (iodata.problem.type == config::ProblemData::Type::MAGNETOSTATIC || + iodata.problem.type == config::ProblemData::Type::TRANSIENT) { - PetscOptionsPrefixPush(nullptr, prefix.c_str()); + type = config::LinearSolverData::Type::AMS; } - PetscOptionsInsertString(nullptr, opts.c_str()); - if (prefix.length() > 0) + else { - PetscOptionsPrefixPop(nullptr); + // Prefer sparse direct solver for frequency domain problems if available. +#if defined(MFEM_USE_SUPERLU) + type = config::LinearSolverData::Type::SUPERLU; +#elif defined(MFEM_USE_STRUMPACK) + type = config::LinearSolverData::Type::STRUMPACK; +#elif defined(MFEM_USE_MUMPS) + type = config::LinearSolverData::Type::MUMPS; +#else + type = config::LinearSolverData::Type::AMS; +#endif } } -} + int print = iodata.problem.verbose - 1; -void KspSolver::SetType(KspSolver::Type type, bool piped) -{ + // Create the real-valued solver first. + std::unique_ptr pc0; switch (type) { - case Type::CG: - PalacePetscCall((piped) ? KSPSetType(ksp, KSPPIPECG) : KSPSetType(ksp, KSPCG)); - PalacePetscCall(KSPCGSetType(ksp, KSP_CG_HERMITIAN)); - break; - case Type::CGSYM: - PalacePetscCall((piped) ? KSPSetType(ksp, KSPPIPECG) : KSPSetType(ksp, KSPCG)); - PalacePetscCall(KSPCGSetType(ksp, KSP_CG_SYMMETRIC)); - break; - case Type::FCG: - PalacePetscCall(KSPSetType(ksp, KSPFCG)); - break; - case Type::GMRES: - PalacePetscCall((piped) ? KSPSetType(ksp, KSPPGMRES) : KSPSetType(ksp, KSPGMRES)); - break; - case Type::FGMRES: - PalacePetscCall((piped) ? KSPSetType(ksp, KSPPIPEFGMRES) - : KSPSetType(ksp, KSPFGMRES)); - break; - case Type::MINRES: - PalacePetscCall(KSPSetType(ksp, KSPMINRES)); - break; - case Type::BCGS: - PalacePetscCall(KSPSetType(ksp, KSPBCGS)); + case config::LinearSolverData::Type::AMS: + // Can either be the coarse solve for geometric multigrid or the solver at the finest + // space (in which case fespaces.GetNumLevels() == 1). + MFEM_VERIFY(aux_fespaces, "AMS solver relies on both primary space " + "and auxiliary spaces for construction!"); + pc0 = std::make_unique(iodata, fespaces.GetFESpaceAtLevel(0), + aux_fespaces->GetFESpaceAtLevel(0), print); break; - case Type::BCGSL: - PalacePetscCall(KSPSetType(ksp, KSPBCGSL)); - PalacePetscCall(KSPBCGSLSetEll(ksp, 2)); // PETSc default + case config::LinearSolverData::Type::BOOMER_AMG: + pc0 = std::make_unique(iodata, print); break; - case Type::FBCGS: - PalacePetscCall(KSPSetType(ksp, KSPFBCGS)); + case config::LinearSolverData::Type::SUPERLU: +#if defined(MFEM_USE_SUPERLU) + pc0 = std::make_unique(comm, iodata, print); +#else + MFEM_ABORT("Solver was not built with SuperLU_DIST support, please choose a " + "different solver!"); +#endif break; - case Type::QMRCGS: - PalacePetscCall(KSPSetType(ksp, KSPQMRCGS)); + case config::LinearSolverData::Type::STRUMPACK: +#if defined(MFEM_USE_STRUMPACK) + pc0 = std::make_unique(comm, iodata, print); +#else + MFEM_ABORT("Solver was not built with STRUMPACK support, please choose a " + "different solver!"); +#endif break; - case Type::TFQMR: - PalacePetscCall(KSPSetType(ksp, KSPTFQMR)); + case config::LinearSolverData::Type::STRUMPACK_MP: +#if defined(MFEM_USE_STRUMPACK) + pc0 = std::make_unique(comm, iodata, print); +#else + MFEM_ABORT("Solver was not built with STRUMPACK support, please choose a " + "different solver!"); +#endif break; - case Type::CHOLESKY: - { - PC pc; - PalacePetscCall(KSPSetType(ksp, KSPPREONLY)); - PalacePetscCall(KSPGetPC(ksp, &pc)); - PalacePetscCall(PCSetType(pc, PCCHOLESKY)); - SetCheckFinal(false); - } + case config::LinearSolverData::Type::MUMPS: +#if defined(MFEM_USE_MUMPS) + pc0 = std::make_unique(comm, iodata, print); +#else + MFEM_ABORT( + "Solver was not built with MUMPS support, please choose a different solver!"); +#endif break; - case Type::LU: - { - PC pc; - PalacePetscCall(KSPSetType(ksp, KSPPREONLY)); - PalacePetscCall(KSPGetPC(ksp, &pc)); - PalacePetscCall(PCSetType(pc, PCLU)); - SetCheckFinal(false); - } + case config::LinearSolverData::Type::DEFAULT: + case config::LinearSolverData::Type::INVALID: + MFEM_ABORT("Unexpected solver type for preconditioner configuration!"); break; } -} - -void KspSolver::SetTol(PetscReal tol) -{ - PalacePetscCall(KSPSetTolerances(ksp, tol, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT)); -} - -void KspSolver::SetAbsTol(PetscReal tol) -{ - PalacePetscCall(KSPSetTolerances(ksp, PETSC_DEFAULT, tol, PETSC_DEFAULT, PETSC_DEFAULT)); -} - -void KspSolver::SetMaxIter(PetscInt maxits) -{ - PalacePetscCall( - KSPSetTolerances(ksp, PETSC_DEFAULT, PETSC_DEFAULT, PETSC_DEFAULT, maxits)); -} -void KspSolver::SetGMRESOptions(PetscInt maxsize, bool mgs, bool cgs2) -{ - PalacePetscCall(KSPGMRESSetRestart(ksp, maxsize)); - if (mgs) + // Construct the actual solver, which has the right value type. + auto pc = std::make_unique>(std::move(pc0)); + if (iodata.solver.linear.pc_mg) { - PalacePetscCall( - KSPGMRESSetOrthogonalization(ksp, KSPGMRESModifiedGramSchmidtOrthogonalization)); + // This will construct the multigrid hierarchy using pc as the coarse solver + // (ownership of pc is transferred to the GeometricMultigridSolver). When a special + // auxiliary space smoother for pre-/post-smoothing is not desired, the auxiliary + // space is a nullptr here. + if (iodata.solver.linear.mg_smooth_aux) + { + MFEM_VERIFY(aux_fespaces, "Multigrid with auxiliary space smoothers requires both " + "primary space and auxiliary spaces for construction!"); + return std::make_unique>(iodata, std::move(pc), + fespaces, aux_fespaces); + } + else + { + return std::make_unique>(iodata, std::move(pc), + fespaces, nullptr); + } } - else if (cgs2) + else { - PalacePetscCall(KSPGMRESSetCGSRefinementType(ksp, KSP_GMRES_CGS_REFINE_ALWAYS)); + return pc; } } -void KspSolver::SetTabLevel(PetscInt l) -{ - PalacePetscCall(PetscObjectSetTabLevel(reinterpret_cast(ksp), l)); -} - -void KspSolver::SetNonzeroInitialGuess(bool guess) -{ - PalacePetscCall(KSPSetInitialGuessNonzero(ksp, guess ? PETSC_TRUE : PETSC_FALSE)); -} - -void KspSolver::SetOperator(const petsc::PetscParMatrix &A, bool copy_prefix) -{ - // If A is the same as before, PETSc will reuse things like symbolic factorizations - // automatically. - PalacePetscCall(KSPSetOperators(ksp, A, A)); - if (copy_prefix) - { - // Set Mat prefix to be the same as KSP to enable setting command-line options. - const char *prefix; - PalacePetscCall(KSPGetOptionsPrefix(ksp, &prefix)); - PalacePetscCall(MatSetOptionsPrefix(A, prefix)); - } -} +} // namespace -void KspSolver::SetPreconditioner(const KspPreconditioner &op) +template +BaseKspSolver::BaseKspSolver(const IoData &iodata, + mfem::ParFiniteElementSpaceHierarchy &fespaces, + mfem::ParFiniteElementSpaceHierarchy *aux_fespaces) + : BaseKspSolver( + ConfigureKrylovSolver(fespaces.GetFinestFESpace().GetComm(), iodata), + ConfigurePreconditionerSolver(fespaces.GetFinestFESpace().GetComm(), + iodata, fespaces, aux_fespaces)) { - // The PETSc shell preconditioner does not take ownership of the preconditioner object. - PC pc; - PalacePetscCall(KSPGetPC(ksp, &pc)); - PalacePetscCall(PCSetType(pc, PCSHELL)); - PalacePetscCall(PCShellSetContext(pc, (void *)&op)); - PalacePetscCall(PCShellSetSetUp(pc, KspPreconditioner::PCSetUp)); - PalacePetscCall(PCShellSetApply(pc, KspPreconditioner::PCApply)); - PalacePetscCall(PCShellSetDestroy(pc, KspPreconditioner::PCDestroy)); } -void KspSolver::Customize() const +template +BaseKspSolver::BaseKspSolver(std::unique_ptr> &&ksp, + std::unique_ptr> &&pc) + : ksp(std::move(ksp)), pc(std::move(pc)), ksp_mult(0), ksp_mult_it(0) { - if (!clcustom) - { - PalacePetscCall(KSPSetFromOptions(ksp)); - if (print > 0 && print_opts) - { - PetscOptionsView(nullptr, PETSC_VIEWER_STDOUT_(GetComm())); - Mpi::Print(GetComm(), "\n"); - } - clcustom = true; - } + this->ksp->SetPreconditioner(*this->pc); } -void KspSolver::Mult(const petsc::PetscParVector &b, petsc::PetscParVector &x) const +template +void BaseKspSolver::SetOperators(const OperType &op, const OperType &pc_op) { - KSPConvergedReason reason; - PetscReal norm0 = 1.0, norm; - if (check_final) + ksp->SetOperator(op); + const auto *mg_op = dynamic_cast *>(&pc_op); + const auto *mg_pc = dynamic_cast *>(pc.get()); + if (mg_op && !mg_pc) { - norm0 = b.Norml2(); + pc->SetOperator(mg_op->GetFinestOperator()); } - Customize(); - PalacePetscCall(KSPSolve(ksp, b, x)); - PalacePetscCall(KSPGetConvergedReason(ksp, &reason)); - if (check_final && reason < 0) + else { - Mat A; - Vec r; - PalacePetscCall(VecDuplicate(b, &r)); - PalacePetscCall(KSPGetOperators(ksp, &A, nullptr)); - PalacePetscCall(MatMult(A, x, r)); - PalacePetscCall(VecAXPY(r, -1.0, b)); - PalacePetscCall(VecNorm(r, NORM_2, &norm)); - PalacePetscCall(VecDestroy(&r)); - Mpi::Warning(GetComm(), - "Linear solver did not converge, " - "norm(Ax-b)/norm(b) = {:.3e} (norm(b) = {:.3e})!\n", - norm / norm0, norm0); + pc->SetOperator(pc_op); } - solve++; -} - -void KspSolver::Reset() -{ - PalacePetscCall(KSPReset(ksp)); } -PetscInt KspSolver::GetTotalNumMult() const +template +void BaseKspSolver::Mult(const VecType &x, VecType &y) const { - return solve; -} - -PetscInt KspSolver::GetNumIter() const -{ - PetscInt num_it; - PalacePetscCall(KSPGetIterationNumber(ksp, &num_it)); - return num_it; -} - -PetscInt KspSolver::GetTotalNumIter() const -{ - PetscInt num_it; - PalacePetscCall(KSPGetTotalIterations(ksp, &num_it)); - return num_it; -} - -MPI_Comm KspSolver::GetComm() const -{ - return ksp ? PetscObjectComm(reinterpret_cast(ksp)) : MPI_COMM_NULL; -} - -void KspSolver::SolveJacobi(const petsc::PetscParMatrix &A, const petsc::PetscParVector &b, - petsc::PetscParVector &x, PetscInt sym, PetscReal tol, - PetscInt max_it) -{ - MPI_Comm comm; - KSP ksp; - PC pc; - KSPConvergedReason reason; - - comm = A.GetComm(); - PalacePetscCall(KSPCreate(comm, &ksp)); - PalacePetscCall(KSPSetOperators(ksp, A, A)); - PalacePetscCall(KSPSetType(ksp, (sym == 1) ? KSPCG : KSPGMRES)); - PalacePetscCall(KSPGetPC(ksp, &pc)); - PalacePetscCall(PCSetType(pc, PCJACOBI)); - PalacePetscCall(PCJacobiSetFixDiagonal(pc, PETSC_TRUE)); - PalacePetscCall(KSPSetTolerances(ksp, tol, PETSC_DEFAULT, PETSC_DEFAULT, max_it)); - // std::string opts = "-ksp_converged_reason -ksp_monitor"; - // PetscOptionsInsertString(nullptr, opts.c_str()); - // PalacePetscCall(KSPSetFromOptions(ksp)); - x.SetZero(); - PalacePetscCall(KSPSolve(ksp, b, x)); - PalacePetscCall(KSPGetConvergedReason(ksp, &reason)); - MFEM_VERIFY(reason > 0, "PETSc KSP did not converge!"); - PalacePetscCall(KSPDestroy(&ksp)); -} - -void KspSolver::SolveDirect(const petsc::PetscParMatrix &A, const petsc::PetscParVector &b, - petsc::PetscParVector &x, PetscInt sym) -{ - MPI_Comm comm; - KSP ksp; - PC pc; - KSPConvergedReason reason; - - comm = A.GetComm(); - PalacePetscCall(KSPCreate(comm, &ksp)); - PalacePetscCall(KSPSetOperators(ksp, A, A)); - PalacePetscCall(KSPSetType(ksp, KSPPREONLY)); - PalacePetscCall(KSPGetPC(ksp, &pc)); -#if defined(PETSC_HAVE_MUMPS) || defined(PETSC_HAVE_SUPERLU_DIST) - PalacePetscCall(PCSetType(pc, (sym > 0) ? PCCHOLESKY : PCLU)); -#if defined(PETSC_HAVE_MUMPS) - PalacePetscCall(PCFactorSetMatSolverType(pc, MATSOLVERMUMPS)); -#elif defined(PETSC_HAVE_SUPERLU_DIST) - PalacePetscCall(PCFactorSetMatSolverType(pc, MATSOLVERSUPERLU_DIST)); -#endif -#else - // Use PETSc default serial direct solver. - PalacePetscCall(PCSetType(pc, PCREDUNDANT)); - PalacePetscCall(PCRedundantSetNumber(pc, Mpi::Size(comm))); + ksp->Mult(x, y); + if (!ksp->GetConverged()) { - KSP ksp_in; - PC pc_in; - PalacePetscCall(PCRedundantGetKSP(pc, &ksp_in)); - PalacePetscCall(KSPGetPC(ksp_in, &pc_in)); - PalacePetscCall(PCSetType(pc_in, (sym > 0) ? PCCHOLESKY : PCLU)); + Mpi::Warning( + ksp->GetComm(), + "Linear solver did not converge, norm(Ax-b)/norm(b) = {:.3e} (norm(b) = {:.3e})!\n", + ksp->GetFinalRes() / ksp->GetInitialRes(), ksp->GetInitialRes()); } -#endif - x.SetZero(); - PalacePetscCall(KSPSolve(ksp, b, x)); - PalacePetscCall(KSPGetConvergedReason(ksp, &reason)); - MFEM_VERIFY(reason > 0, "PETSc KSP did not converge!"); - PalacePetscCall(KSPDestroy(&ksp)); + ksp_mult++; + ksp_mult_it += ksp->GetNumIterations(); } +template class BaseKspSolver; +template class BaseKspSolver; + } // namespace palace diff --git a/palace/linalg/ksp.hpp b/palace/linalg/ksp.hpp index ef236afe7..b9580e3bc 100644 --- a/palace/linalg/ksp.hpp +++ b/palace/linalg/ksp.hpp @@ -1,147 +1,66 @@ // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. // SPDX-License-Identifier: Apache-2.0 -#ifndef PALACE_LINALG_KSP_SOLVER_HPP -#define PALACE_LINALG_KSP_SOLVER_HPP +#ifndef PALACE_LINALG_KSP_HPP +#define PALACE_LINALG_KSP_HPP -#include -#include "linalg/petsc.hpp" +#include +#include +#include "linalg/iterative.hpp" +#include "linalg/operator.hpp" +#include "linalg/solver.hpp" -namespace palace +namespace mfem { -class IoData; -class KspPreconditioner; +class ParFiniteElementSpaceHierarchy; -namespace petsc -{ +} // namespace mfem -class PetscParMatrix; -class PetscParVector; +namespace palace +{ -} // namespace petsc +class IoData; // -// A wrapper of PETSc's KSP class for solving linear systems. +// Linear solver class composing an iterative solver and preconditioner object. // -class KspSolver +template +class BaseKspSolver { -public: - enum class Type - { - CG, - CGSYM, - FCG, - MINRES, - GMRES, - FGMRES, - BCGS, - BCGSL, - FBCGS, - QMRCGS, - TFQMR, - CHOLESKY, - LU - }; - -private: - // The actual PETSc object. - KSP ksp; - - // Boolean to handle SetFromOptions calls. - mutable bool clcustom; - - // Control print level for debugging. - int print; - - // Print PETSc options database prior to solve. - bool print_opts; - - // Check for final residual if not converged. Defaults to true. - bool check_final; - - // Counter for number of calls to Mult method for a linear solve. - mutable PetscInt solve; - - // Set up debugging output and configure the solver based on user specified parameters. - void Configure(const IoData &iodata); - void ConfigureVerbose(int print, const std::string &prefix); - - // Customize object with command line options set. - void Customize() const; - -public: - // Calls PETSc's KSPCreate. - KspSolver(MPI_Comm comm, const IoData &iodata, const std::string &prefix = std::string()); - KspSolver(MPI_Comm comm, int print_lvl, const std::string &prefix = std::string()); - - // Calls PETSc's KSPDestroy. - ~KspSolver(); - - // Sets the solver type. - void SetType(Type type, bool piped = false); - - // Set solver tolerance. - void SetTol(PetscReal tol); + static_assert(std::is_same::value || + std::is_same::value, + "Solver can only be defined for OperType = Operator or ComplexOperator!"); - // Set solver tolerance. - void SetAbsTol(PetscReal tol); + using VecType = typename std::conditional::value, + ComplexVector, Vector>::type; - // Set maximum number of iterations. - void SetMaxIter(PetscInt maxits); +protected: + // The actual solver and preconditioner objects. + std::unique_ptr> ksp; + std::unique_ptr> pc; - // Set options specific to GMRES and FGMRES solvers. - void SetGMRESOptions(PetscInt maxsize, bool mgs, bool cgs2); + // Counters for number of calls to Mult method for linear solves, and cumulative number + // of iterations. + mutable int ksp_mult, ksp_mult_it; - // Sets the tab level for KSP output. - void SetTabLevel(PetscInt l); - - // Set flag to print PETSc options database at start of solve. - void SetPrintOptions(bool opts) { print_opts = opts; } - - // Set flag to check final residual if unconverged. - void SetCheckFinal(bool check) { check_final = check; } - - // Set an initial vector for the solution subspace. - void SetNonzeroInitialGuess(bool guess); - - // Sets the MVP and preconditioner matrix. - void SetOperator(const petsc::PetscParMatrix &A, bool copy_prefix = true); - - // Configures a shell preconditioner based on the given preconditioner object. - void SetPreconditioner(const KspPreconditioner &op); - - // Application of the solver. - void Mult(const petsc::PetscParVector &b, petsc::PetscParVector &x) const; - - // Call KSPReset, for example if the operator dimension has changed. - void Reset(); - - // Get number of solver calls. - PetscInt GetTotalNumMult() const; - - // Get number of solver iterations. - PetscInt GetNumIter() const; - PetscInt GetTotalNumIter() const; - - // Get the associated MPI communicator. - MPI_Comm GetComm() const; +public: + BaseKspSolver(const IoData &iodata, mfem::ParFiniteElementSpaceHierarchy &fespaces, + mfem::ParFiniteElementSpaceHierarchy *aux_fespaces = nullptr); + BaseKspSolver(std::unique_ptr> &&ksp, + std::unique_ptr> &&pc); - // Conversion function to PETSc's KSP type. - operator KSP() const { return ksp; } + int NumTotalMult() const { return ksp_mult; } + int NumTotalMultIterations() const { return ksp_mult_it; } - // Typecasting to PETSc object. - operator PetscObject() const { return reinterpret_cast(ksp); } + void SetOperators(const OperType &op, const OperType &pc_op); - // Simple static linear solve methods. The sym variable defines the matrix type: 0 for - // general, 1 for SPD, 2 for symmetric indefinite (definitions from MUMPS). - static void SolveJacobi(const petsc::PetscParMatrix &A, const petsc::PetscParVector &b, - petsc::PetscParVector &x, PetscInt sym, double PetscReal = 1.0e-9, - PetscInt max_it = 5000); - static void SolveDirect(const petsc::PetscParMatrix &A, const petsc::PetscParVector &b, - petsc::PetscParVector &x, PetscInt sym); + void Mult(const VecType &x, VecType &y) const; }; +using KspSolver = BaseKspSolver; +using ComplexKspSolver = BaseKspSolver; + } // namespace palace -#endif // PALACE_LINALG_KSP_SOLVER_HPP +#endif // PALACE_LINALG_KSP_HPP diff --git a/palace/linalg/mumps.cpp b/palace/linalg/mumps.cpp index 548643c8c..8026eb803 100644 --- a/palace/linalg/mumps.cpp +++ b/palace/linalg/mumps.cpp @@ -5,6 +5,8 @@ #if defined(MFEM_USE_MUMPS) +#include "linalg/rap.hpp" + namespace palace { @@ -34,7 +36,8 @@ MumpsSolver::MumpsSolver(MPI_Comm comm, mfem::MUMPSSolver::MatType sym, } else { - SetReorderingStrategy(mfem::MUMPSSolver::AUTOMATIC); // MUMPS should have good defaults + // SetReorderingStrategy(mfem::MUMPSSolver::AUTOMATIC); // Should have good default + SetReorderingStrategy(mfem::MUMPSSolver::PORD); } SetReorderingReuse(true); // Repeated calls use same sparsity pattern if (blr_tol > 0.0) @@ -43,6 +46,19 @@ MumpsSolver::MumpsSolver(MPI_Comm comm, mfem::MUMPSSolver::MatType sym, } } +void MumpsSolver::SetOperator(const Operator &op) +{ + const auto *PtAP = dynamic_cast(&op); + if (PtAP) + { + mfem::MUMPSSolver::SetOperator(PtAP->ParallelAssemble()); + } + else + { + mfem::MUMPSSolver::SetOperator(op); + } +} + } // namespace palace #endif diff --git a/palace/linalg/mumps.hpp b/palace/linalg/mumps.hpp index fa10193f8..f98bd0266 100644 --- a/palace/linalg/mumps.hpp +++ b/palace/linalg/mumps.hpp @@ -8,6 +8,7 @@ #if defined(MFEM_USE_MUMPS) +#include "linalg/operator.hpp" #include "utils/iodata.hpp" namespace palace @@ -23,7 +24,7 @@ class MumpsSolver : public mfem::MUMPSSolver config::LinearSolverData::SymFactType reorder, double blr_tol, int print); MumpsSolver(MPI_Comm comm, const IoData &iodata, int print) : MumpsSolver(comm, - (iodata.solver.linear.mat_shifted || + (iodata.solver.linear.pc_mat_shifted || iodata.problem.type == config::ProblemData::Type::TRANSIENT || iodata.problem.type == config::ProblemData::Type::ELECTROSTATIC || iodata.problem.type == config::ProblemData::Type::MAGNETOSTATIC) @@ -37,6 +38,8 @@ class MumpsSolver : public mfem::MUMPSSolver print) { } + + void SetOperator(const Operator &op) override; }; } // namespace palace diff --git a/palace/linalg/operator.cpp b/palace/linalg/operator.cpp new file mode 100644 index 000000000..a463d3687 --- /dev/null +++ b/palace/linalg/operator.cpp @@ -0,0 +1,632 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#include "operator.hpp" + +#include +#include "linalg/slepc.hpp" +#include "utils/communication.hpp" + +namespace palace +{ + +bool ComplexOperator::IsReal() const +{ + MFEM_ABORT("IsReal() is not implemented for base class ComplexOperator!"); + return false; +} + +bool ComplexOperator::IsImag() const +{ + MFEM_ABORT("IsImag() is not implemented for base class ComplexOperator!"); + return false; +} + +bool ComplexOperator::HasReal() const +{ + MFEM_ABORT("HasReal() is not implemented for base class ComplexOperator!"); + return false; +} + +bool ComplexOperator::HasImag() const +{ + MFEM_ABORT("HasImag() is not implemented for base class ComplexOperator!"); + return false; +} + +const Operator *ComplexOperator::Real() const +{ + MFEM_ABORT("Real() is not implemented for base class ComplexOperator!"); + return nullptr; +} + +Operator *ComplexOperator::Real() +{ + MFEM_ABORT("Real() is not implemented for base class ComplexOperator!"); + return nullptr; +} + +const Operator *ComplexOperator::Imag() const +{ + MFEM_ABORT("Imag() is not implemented for base class ComplexOperator!"); + return nullptr; +} + +Operator *ComplexOperator::Imag() +{ + MFEM_ABORT("Imag() is not implemented for base class ComplexOperator!"); + return nullptr; +} + +void ComplexOperator::MultTranspose(const ComplexVector &x, ComplexVector &y) const +{ + MFEM_ABORT("Base class ComplexOperator does not implement MultTranspose!"); +} + +void ComplexOperator::MultHermitianTranspose(const ComplexVector &x, ComplexVector &y) const +{ + MFEM_ABORT("Base class ComplexOperator does not implement MultHermitianTranspose!"); +} + +void ComplexOperator::AddMult(const ComplexVector &x, ComplexVector &y, + const std::complex a) const +{ + MFEM_ABORT("Base class ComplexOperator does not implement AddMult!"); +} + +void ComplexOperator::AddMultTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a) const +{ + MFEM_ABORT("Base class ComplexOperator does not implement AddMultTranspose!"); +} + +void ComplexOperator::AddMultHermitianTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a) const +{ + MFEM_ABORT("Base class ComplexOperator does not implement AddMultHermitianTranspose!"); +} + +ComplexWrapperOperator::ComplexWrapperOperator(std::unique_ptr &&dAr, + std::unique_ptr &&dAi, + Operator *pAr, Operator *pAi) + : ComplexOperator(), data_Ar(std::move(dAr)), data_Ai(std::move(dAi)), + Ar((data_Ar != nullptr) ? data_Ar.get() : pAr), + Ai((data_Ai != nullptr) ? data_Ai.get() : pAi) +{ + MFEM_VERIFY(Ar || Ai, "Cannot construct ComplexWrapperOperator from an empty matrix!"); + MFEM_VERIFY((!Ar || !Ai) || (Ar->Height() == Ai->Height() && Ar->Width() == Ai->Width()), + "Mismatch in dimension of real and imaginary matrix parts!"); + height = Ar ? Ar->Height() : Ai->Height(); + width = Ar ? Ar->Width() : Ai->Width(); +} + +ComplexWrapperOperator::ComplexWrapperOperator(std::unique_ptr &&Ar, + std::unique_ptr &&Ai) + : ComplexWrapperOperator(std::move(Ar), std::move(Ai), nullptr, nullptr) +{ +} + +ComplexWrapperOperator::ComplexWrapperOperator(Operator *Ar, Operator *Ai) + : ComplexWrapperOperator(nullptr, nullptr, Ar, Ai) +{ +} + +void ComplexWrapperOperator::Mult(const ComplexVector &x, ComplexVector &y) const +{ + constexpr bool zero_real = false; + constexpr bool zero_imag = false; + const Vector &xr = x.Real(); + const Vector &xi = x.Imag(); + Vector &yr = y.Real(); + Vector &yi = y.Imag(); + if (Ar) + { + if (!zero_real) + { + Ar->Mult(xr, yr); + } + if (!zero_imag) + { + Ar->Mult(xi, yi); + } + } + else + { + yr = 0.0; + yi = 0.0; + } + if (Ai) + { + if (!zero_imag) + { + Ai->AddMult(xi, yr, -1.0); + } + if (!zero_real) + { + Ai->AddMult(xr, yi, 1.0); + } + } +} + +void ComplexWrapperOperator::MultTranspose(const ComplexVector &x, ComplexVector &y) const +{ + constexpr bool zero_real = false; + constexpr bool zero_imag = false; + const Vector &xr = x.Real(); + const Vector &xi = x.Imag(); + Vector &yr = y.Real(); + Vector &yi = y.Imag(); + if (Ar) + { + if (!zero_real) + { + Ar->MultTranspose(xr, yr); + } + if (!zero_imag) + { + Ar->MultTranspose(xi, yi); + } + } + else + { + yr = 0.0; + yi = 0.0; + } + if (Ai) + { + if (!zero_imag) + { + Ai->AddMultTranspose(xi, yr, -1.0); + } + if (!zero_real) + { + Ai->AddMultTranspose(xr, yi, 1.0); + } + } +} + +void ComplexWrapperOperator::MultHermitianTranspose(const ComplexVector &x, + ComplexVector &y) const +{ + constexpr bool zero_real = false; + constexpr bool zero_imag = false; + const Vector &xr = x.Real(); + const Vector &xi = x.Imag(); + Vector &yr = y.Real(); + Vector &yi = y.Imag(); + if (Ar) + { + if (!zero_real) + { + Ar->MultTranspose(xr, yr); + } + if (!zero_imag) + { + Ar->MultTranspose(xi, yi); + } + } + else + { + yr = 0.0; + yi = 0.0; + } + if (Ai) + { + if (!zero_imag) + { + Ai->AddMultTranspose(xi, yr, 1.0); + } + if (!zero_real) + { + Ai->AddMultTranspose(xr, yi, -1.0); + } + } +} + +void ComplexWrapperOperator::AddMult(const ComplexVector &x, ComplexVector &y, + const std::complex a) const +{ + constexpr bool zero_real = false; + constexpr bool zero_imag = false; + const Vector &xr = x.Real(); + const Vector &xi = x.Imag(); + Vector &yr = y.Real(); + Vector &yi = y.Imag(); + if (a.real() != 0.0 && a.imag() != 0.0) + { + ty.SetSize(height); + Mult(x, ty); + const int N = height; + const double ar = a.real(); + const double ai = a.imag(); + const auto *TYR = ty.Real().Read(); + const auto *TYI = ty.Imag().Read(); + auto *YR = yr.ReadWrite(); + auto *YI = yi.ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + YR[i] += ar * TYR[i] - ai * TYI[i]; + YI[i] += ai * TYR[i] + ar * TYI[i]; + }); + } + else if (a.real() != 0.0) + { + if (Ar) + { + if (!zero_real) + { + Ar->AddMult(xr, yr, a.real()); + } + if (!zero_imag) + { + Ar->AddMult(xi, yi, a.real()); + } + } + if (Ai) + { + if (!zero_imag) + { + Ai->AddMult(xi, yr, -a.real()); + } + if (!zero_real) + { + Ai->AddMult(xr, yi, a.real()); + } + } + } + else if (a.imag() != 0.0) + { + if (Ar) + { + if (!zero_real) + { + Ar->AddMult(xr, yi, a.imag()); + } + if (!zero_imag) + { + Ar->AddMult(xi, yr, -a.imag()); + } + } + if (Ai) + { + if (!zero_imag) + { + Ai->AddMult(xi, yi, -a.imag()); + } + if (!zero_real) + { + Ai->AddMult(xr, yr, -a.imag()); + } + } + } +} + +void ComplexWrapperOperator::AddMultTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a) const +{ + constexpr bool zero_real = false; + constexpr bool zero_imag = false; + const Vector &xr = x.Real(); + const Vector &xi = x.Imag(); + Vector &yr = y.Real(); + Vector &yi = y.Imag(); + if (a.real() != 0.0 && a.imag() != 0.0) + { + tx.SetSize(width); + MultTranspose(x, tx); + const int N = width; + const double ar = a.real(); + const double ai = a.imag(); + const auto *TXR = tx.Real().Read(); + const auto *TXI = tx.Imag().Read(); + auto *YR = yr.ReadWrite(); + auto *YI = yi.ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + YR[i] += ar * TXR[i] - ai * TXI[i]; + YI[i] += ai * TXR[i] + ar * TXI[i]; + }); + } + else if (a.real() != 0.0) + { + if (Ar) + { + if (!zero_real) + { + Ar->AddMultTranspose(xr, yr, a.real()); + } + if (!zero_imag) + { + Ar->AddMultTranspose(xi, yi, a.real()); + } + } + if (Ai) + { + if (!zero_imag) + { + Ai->AddMultTranspose(xi, yr, -a.real()); + } + if (!zero_real) + { + Ai->AddMultTranspose(xr, yi, a.real()); + } + } + } + else if (a.imag() != 0.0) + { + if (Ar) + { + if (!zero_real) + { + Ar->AddMultTranspose(xr, yi, a.imag()); + } + if (!zero_imag) + { + Ar->AddMultTranspose(xi, yr, -a.imag()); + } + } + if (Ai) + { + if (!zero_imag) + { + Ai->AddMultTranspose(xi, yi, -a.imag()); + } + if (!zero_real) + { + Ai->AddMultTranspose(xr, yr, -a.imag()); + } + } + } +} + +void ComplexWrapperOperator::AddMultHermitianTranspose(const ComplexVector &x, + ComplexVector &y, + const std::complex a) const +{ + constexpr bool zero_real = false; + constexpr bool zero_imag = false; + const Vector &xr = x.Real(); + const Vector &xi = x.Imag(); + Vector &yr = y.Real(); + Vector &yi = y.Imag(); + if (a.real() != 0.0 && a.imag() != 0.0) + { + tx.SetSize(width); + MultHermitianTranspose(x, tx); + const int N = width; + const double ar = a.real(); + const double ai = a.imag(); + const auto *TXR = tx.Real().Read(); + const auto *TXI = tx.Imag().Read(); + auto *YR = yr.ReadWrite(); + auto *YI = yi.ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + YR[i] += ar * TXR[i] - ai * TXI[i]; + YI[i] += ai * TXR[i] + ar * TXI[i]; + }); + } + else if (a.real() != 0.0) + { + if (Ar) + { + if (!zero_real) + { + Ar->AddMultTranspose(xr, yr, a.real()); + } + if (!zero_imag) + { + Ar->AddMultTranspose(xi, yi, a.real()); + } + } + if (Ai) + { + if (!zero_imag) + { + Ai->AddMultTranspose(xi, yr, a.real()); + } + if (!zero_real) + { + Ai->AddMultTranspose(xr, yi, -a.real()); + } + } + } + else if (a.imag() != 0.0) + { + if (Ar) + { + if (!zero_real) + { + Ar->AddMultTranspose(xr, yi, a.imag()); + } + if (!zero_imag) + { + Ar->AddMultTranspose(xi, yr, -a.imag()); + } + } + if (Ai) + { + if (!zero_imag) + { + Ai->AddMultTranspose(xi, yi, a.imag()); + } + if (!zero_real) + { + Ai->AddMultTranspose(xr, yr, a.imag()); + } + } + } +} + +SumOperator::SumOperator(const Operator &op, double c) : Operator(op.Height(), op.Width()) +{ + AddOperator(op, c); +} + +void SumOperator::AddOperator(const Operator &op, double c) +{ + MFEM_VERIFY(op.Height() == height && op.Width() == width, + "Invalid Operator dimensions for SumOperator!"); + ops.emplace_back(&op, c); +} + +void SumOperator::Mult(const Vector &x, Vector &y) const +{ + if (ops.size() == 1) + { + ops.front().first->Mult(x, y); + if (ops.front().second != 1.0) + { + y *= ops.front().second; + } + return; + } + y = 0.0; + AddMult(x, y); +} + +void SumOperator::MultTranspose(const Vector &x, Vector &y) const +{ + if (ops.size() == 1) + { + ops.front().first->MultTranspose(x, y); + if (ops.front().second != 1.0) + { + y *= ops.front().second; + } + return; + } + y = 0.0; + AddMultTranspose(x, y); +} + +void SumOperator::AddMult(const Vector &x, Vector &y, const double a) const +{ + for (const auto &[op, c] : ops) + { + op->AddMult(x, y, a * c); + } +} + +void SumOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) const +{ + for (const auto &[op, c] : ops) + { + op->AddMultTranspose(x, y, a * c); + } +} + +template <> +void BaseDiagonalOperator::Mult(const Vector &x, Vector &y) const +{ + const int N = this->height; + const auto *D = d.Read(); + const auto *X = x.Read(); + auto *Y = y.Write(); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { Y[i] = D[i] * X[i]; }); +} + +template <> +void BaseDiagonalOperator::Mult(const ComplexVector &x, + ComplexVector &y) const +{ + const int N = this->height; + const auto *DR = d.Real().Read(); + const auto *DI = d.Imag().Read(); + const auto *XR = x.Real().Read(); + const auto *XI = x.Imag().Read(); + auto *YR = y.Real().Write(); + auto *YI = y.Imag().Write(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + YR[i] = DR[i] * XR[i] - DI[i] * XI[i]; + YI[i] = DI[i] * XR[i] + DR[i] * XI[i]; + }); +} + +template <> +void DiagonalOperatorHelper, + ComplexOperator>::MultHermitianTranspose(const ComplexVector &x, + ComplexVector &y) const +{ + const ComplexVector &d = + static_cast *>(this)->d; + const int N = this->height; + const auto *DR = d.Real().Read(); + const auto *DI = d.Imag().Read(); + const auto *XR = x.Real().Read(); + const auto *XI = x.Imag().Read(); + auto *YR = y.Real().Write(); + auto *YI = y.Imag().Write(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + YR[i] = DR[i] * XR[i] + DI[i] * XI[i]; + YI[i] = -DI[i] * XR[i] + DR[i] * XI[i]; + }); +} + +namespace linalg +{ + +double SpectralNorm(MPI_Comm comm, const Operator &A, bool sym, double tol, int max_it) +{ + ComplexWrapperOperator Ar(const_cast(&A), nullptr); // Non-owning constructor + return SpectralNorm(comm, Ar, sym, tol, max_it); +} + +double SpectralNorm(MPI_Comm comm, const ComplexOperator &A, bool herm, double tol, + int max_it) +{ + // XX TODO: Use ARPACK or SLEPc for this when configured. +#if defined(PALACE_WITH_SLEPC) + return slepc::GetMaxSingularValue(comm, A, herm, tol, max_it); +#else + // Power iteration loop: ||A||₂² = λₙ(Aᴴ A). + int it = 0; + double res = 0.0; + double l, l0 = 0.0; + ComplexVector u(A.Height()), v(A.Height()); + SetRandom(comm, u); + Normalize(comm, u); + while (it < max_it) + { + A.Mult(u, v); + if (herm) + { + u = v; + } + else + { + A.MultHermitianTranspose(v, u); + } + l = Normalize(comm, u); + if (it > 0) + { + res = std::abs(l - l0) / l0; + if (res < tol) + { + break; + } + } + l0 = l; + it++; + } + if (it >= max_it) + { + Mpi::Warning(comm, + "Power iteration did not converge in {:d} iterations, res = {:.3e}, " + "lambda = {:.3e}!\n", + it, res, l); + } + return herm ? l : std::sqrt(l); +#endif +} + +} // namespace linalg + +} // namespace palace diff --git a/palace/linalg/operator.hpp b/palace/linalg/operator.hpp new file mode 100644 index 000000000..d7e8882dd --- /dev/null +++ b/palace/linalg/operator.hpp @@ -0,0 +1,348 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LINALG_OPERATOR_HPP +#define PALACE_LINALG_OPERATOR_HPP + +#include +#include +#include +#include +#include +#include "linalg/vector.hpp" + +namespace palace +{ + +// +// Functionality extending mfem::Operator from MFEM. +// + +// Abstract base class for complex-valued operators. +class ComplexOperator +{ +protected: + // The size of the complex-valued operator. + int height, width; + +public: + ComplexOperator(int s = 0) : height(s), width(s) {} + ComplexOperator(int h, int w) : height(h), width(w) {} + virtual ~ComplexOperator() = default; + + // Get the height (size of output) of the operator. + int Height() const { return height; } + + // Get the width (size of input) of the operator. + int Width() const { return width; } + + // Test whether or not the operator is purely real or imaginary. + virtual bool IsReal() const; + virtual bool IsImag() const; + + // Test whether or not we can access the real and imaginary operator parts. + virtual bool HasReal() const; + virtual bool HasImag() const; + + // Get access to the real and imaginary operator parts. + virtual const Operator *Real() const; + virtual Operator *Real(); + virtual const Operator *Imag() const; + virtual Operator *Imag(); + + // Operator application. + virtual void Mult(const ComplexVector &x, ComplexVector &y) const = 0; + + virtual void MultTranspose(const ComplexVector &x, ComplexVector &y) const; + + virtual void MultHermitianTranspose(const ComplexVector &x, ComplexVector &y) const; + + virtual void AddMult(const ComplexVector &x, ComplexVector &y, + const std::complex a = 1.0) const; + + virtual void AddMultTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a = 1.0) const; + + virtual void AddMultHermitianTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a = 1.0) const; +}; + +// A complex-valued operator represented using a block 2 x 2 equivalent-real formulation: +// [ yr ] = [ Ar -Ai ] [ xr ] +// [ yi ] [ Ai Ar ] [ xi ] . +class ComplexWrapperOperator : public ComplexOperator +{ +private: + // Storage and access for real and imaginary parts of the operator. + std::unique_ptr data_Ar, data_Ai; + Operator *Ar, *Ai; + + // Temporary storage for operator application. + mutable ComplexVector tx, ty; + + ComplexWrapperOperator(std::unique_ptr &&dAr, std::unique_ptr &&dAi, + Operator *pAr, Operator *pAi); + +public: + // Construct a complex operator which inherits ownership of the input real and imaginary + // parts. + ComplexWrapperOperator(std::unique_ptr &&Ar, std::unique_ptr &&Ai); + + // Non-owning constructor. + ComplexWrapperOperator(Operator *Ar, Operator *Ai); + + bool IsReal() const override { return Ai == nullptr; } + bool IsImag() const override { return Ar == nullptr; } + bool HasReal() const override { return Ar != nullptr; } + bool HasImag() const override { return Ai != nullptr; } + const Operator *Real() const override { return Ar; } + Operator *Real() override { return Ar; } + const Operator *Imag() const override { return Ai; } + Operator *Imag() override { return Ai; } + + void Mult(const ComplexVector &x, ComplexVector &y) const override; + + void MultTranspose(const ComplexVector &x, ComplexVector &y) const override; + + void MultHermitianTranspose(const ComplexVector &x, ComplexVector &y) const override; + + void AddMult(const ComplexVector &x, ComplexVector &y, + const std::complex a = 1.0) const override; + + void AddMultTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a = 1.0) const override; + + void AddMultHermitianTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a = 1.0) const override; +}; + +// Wrap a sequence of operators of the same dimensions and optional coefficients. +class SumOperator : public Operator +{ +private: + std::vector> ops; + +public: + SumOperator(int s) : Operator(s) {} + SumOperator(int h, int w) : Operator(h, w) {} + SumOperator(const Operator &op, double c = 1.0); + + void AddOperator(const Operator &op, double c = 1.0); + + void Mult(const Vector &x, Vector &y) const override; + + void MultTranspose(const Vector &x, Vector &y) const override; + + void AddMult(const Vector &x, Vector &y, const double a = 1.0) const override; + + void AddMultTranspose(const Vector &x, Vector &y, const double a = 1.0) const override; +}; + +// Wraps two operators such that: (AB)ᵀ = BᵀAᵀ and, for complex symmetric operators, the +// Hermitian transpose operation is (AB)ᴴ = BᴴAᴴ. +template +class ProductOperatorHelper : public OperType +{ +}; + +template +class ProductOperatorHelper : public Operator +{ +public: + ProductOperatorHelper(int h, int w) : Operator(h, w) {} +}; + +template +class ProductOperatorHelper : public ComplexOperator +{ +public: + ProductOperatorHelper(int h, int w) : ComplexOperator(h, w) {} + void MultHermitianTranspose(const ComplexVector &x, ComplexVector &y) const override + { + const ComplexOperator &A = static_cast(this)->A; + const ComplexOperator &B = static_cast(this)->B; + ComplexVector &z = static_cast(this)->z; + A.MultHermitianTranspose(x, z); + B.MultHermitianTranspose(z, y); + } +}; + +template +class BaseProductOperator + : public ProductOperatorHelper, OperType> +{ + friend class ProductOperatorHelper, OperType>; + + using VecType = typename std::conditional::value, + ComplexVector, Vector>::type; + +private: + const OperType &A, &B; + mutable VecType z; + +public: + BaseProductOperator(const OperType &A, const OperType &B) + : ProductOperatorHelper, OperType>(A.Height(), B.Width()), + A(A), B(B), z(B.Height()) + { + } + + void Mult(const VecType &x, VecType &y) const override + { + B.Mult(x, z); + A.Mult(z, y); + } + + void MultTranspose(const VecType &x, VecType &y) const override + { + A.MultTranspose(x, z); + B.MultTranspose(z, y); + } +}; + +using ProductOperator = BaseProductOperator; +using ComplexProductOperator = BaseProductOperator; + +// Applies the simple, symmetric but not necessarily Hermitian, operator: diag(d). +template +class DiagonalOperatorHelper : public OperType +{ +}; + +template +class DiagonalOperatorHelper : public Operator +{ +public: + DiagonalOperatorHelper(int s) : Operator(s) {} +}; + +template +class DiagonalOperatorHelper : public ComplexOperator +{ +public: + DiagonalOperatorHelper(int s) : ComplexOperator(s) {} + void MultHermitianTranspose(const ComplexVector &x, ComplexVector &y) const override; +}; + +template +class BaseDiagonalOperator + : public DiagonalOperatorHelper, OperType> +{ + friend class DiagonalOperatorHelper, OperType>; + + using VecType = typename std::conditional::value, + ComplexVector, Vector>::type; + +private: + const VecType &d; + +public: + BaseDiagonalOperator(const VecType &d) + : DiagonalOperatorHelper, OperType>(d.Size()), d(d) + { + } + + void Mult(const VecType &x, VecType &y) const override; + + void MultTranspose(const VecType &x, VecType &y) const override { Mult(x, y); } +}; + +using DiagonalOperator = BaseDiagonalOperator; +using ComplexDiagonalOperator = BaseDiagonalOperator; + +// A container for a sequence of operators corresponding to a multigrid hierarchy. +// Optionally includes operators for the auxiliary space at each level as well. The +// Operators are stored from coarsest to finest level. The height and width of this operator +// are never set. +template +class BaseMultigridOperator : public OperType +{ + using VecType = typename std::conditional::value, + ComplexVector, Vector>::type; + +private: + std::vector> ops, aux_ops; + +public: + BaseMultigridOperator(int l) : OperType(0) + { + ops.reserve(l); + aux_ops.reserve(l); + } + + void AddOperator(std::unique_ptr &&op) + { + ops.push_back(std::move(op)); + this->height = ops.back()->Height(); + this->width = ops.back()->Width(); + } + + void AddAuxiliaryOperator(std::unique_ptr &&aux_op) + { + aux_ops.push_back(std::move(aux_op)); + } + + bool HasAuxiliaryOperators() const { return !aux_ops.empty(); } + + int GetNumLevels() const { return static_cast(ops.size()); } + int GetNumAuxiliaryLevels() const { return static_cast(aux_ops.size()); } + + const OperType &GetFinestOperator() const { return *ops.back(); } + const OperType &GetFinestAuxiliaryOperator() const { return *aux_ops.back(); } + + const OperType &GetOperatorAtLevel(int l) const + { + MFEM_ASSERT(l >= 0 && l < GetNumLevels(), + "Out of bounds multigrid level operator requested!"); + return *ops[l]; + } + const OperType &GetAuxiliaryOperatorAtLevel(int l) const + { + MFEM_ASSERT(l < GetNumAuxiliaryLevels(), + "Out of bounds multigrid level auxiliary operator requested!"); + return *aux_ops[l]; + } + + void Mult(const VecType &x, VecType &y) const override { GetFinestOperator().Mult(x, y); } + void MultTranspose(const VecType &x, VecType &y) const override + { + GetFinestOperator().MultTranspose(x, y); + } +}; + +using MultigridOperator = BaseMultigridOperator; +using ComplexMultigridOperator = BaseMultigridOperator; + +namespace linalg +{ + +// Estimate operator 2-norm (spectral norm) using power iteration. Assumes the operator is +// not symmetric or Hermitian unless specified. +double SpectralNorm(MPI_Comm comm, const Operator &A, bool sym = false, double tol = 1.0e-4, + int max_it = 200); +double SpectralNorm(MPI_Comm comm, const ComplexOperator &A, bool herm = false, + double tol = 1.0e-4, int max_it = 200); + +} // namespace linalg + +} // namespace palace + +namespace mfem +{ + +// A symmetric bilinear form operator which replaces *MultTranspose with *Mult. +class SymmetricBilinearForm : public BilinearForm +{ +public: + using BilinearForm::BilinearForm; + + void MultTranspose(const Vector &x, Vector &y) const override { Mult(x, y); } + void AddMultTranspose(const Vector &x, Vector &y, double c = 1.0) const override + { + AddMult(x, y, c); + } +}; + +} // namespace mfem + +#endif // PALACE_LINALG_OPERATOR_HPP diff --git a/palace/linalg/orthog.hpp b/palace/linalg/orthog.hpp new file mode 100644 index 000000000..aded6ebd6 --- /dev/null +++ b/palace/linalg/orthog.hpp @@ -0,0 +1,69 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LINALG_ORTHOG_HPP +#define PALACE_LINALG_ORTHOG_HPP + +#include +#include "linalg/vector.hpp" +#include "utils/communication.hpp" + +namespace palace::linalg +{ + +// +// Orthogonalization functions for orthogonalizing a vector against a number of basis +// vectors using modified or classical Gram-Schmidt. +// + +template +inline void OrthogonalizeColumnMGS(MPI_Comm comm, const std::vector &V, VecType &w, + ScalarType *H, int m) +{ + MFEM_ASSERT(static_cast(m) <= V.size(), + "Out of bounds number of columns for MGS orthogonalization!"); + for (int j = 0; j < m; j++) + { + H[j] = linalg::Dot(comm, w, V[j]); // Global inner product + w.Add(-H[j], V[j]); + } +} + +template +inline void OrthogonalizeColumnCGS(MPI_Comm comm, const std::vector &V, VecType &w, + ScalarType *H, int m, bool refine = false) +{ + MFEM_ASSERT(static_cast(m) <= V.size(), + "Out of bounds number of columns for CGS orthogonalization!"); + if (m == 0) + { + return; + } + for (int j = 0; j < m; j++) + { + H[j] = w * V[j]; // Local inner product + } + Mpi::GlobalSum(m, H, comm); + for (int j = 0; j < m; j++) + { + w.Add(-H[j], V[j]); + } + if (refine) + { + std::vector dH(m); + for (int j = 0; j < m; j++) + { + dH[j] = w * V[j]; // Local inner product + } + Mpi::GlobalSum(m, dH.data(), comm); + for (int j = 0; j < m; j++) + { + H[j] += dH[j]; + w.Add(-dH[j], V[j]); + } + } +} + +} // namespace palace::linalg + +#endif // PALACE_LINALG_ORTHOG_HPP diff --git a/palace/linalg/pc.cpp b/palace/linalg/pc.cpp deleted file mode 100644 index 0cbe9cb5a..000000000 --- a/palace/linalg/pc.cpp +++ /dev/null @@ -1,217 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#include "pc.hpp" - -#include -#include "linalg/amg.hpp" -#include "linalg/ams.hpp" -#include "linalg/gmg.hpp" -#include "linalg/mumps.hpp" -#include "linalg/strumpack.hpp" -#include "linalg/superlu.hpp" -#include "utils/iodata.hpp" - -namespace palace -{ - -std::unique_ptr -ConfigurePreconditioner(const IoData &iodata, const mfem::Array &dbc_marker, - mfem::ParFiniteElementSpaceHierarchy &fespaces, - mfem::ParFiniteElementSpaceHierarchy *aux_fespaces) -{ - config::LinearSolverData::Type type = iodata.solver.linear.type; - if (type == config::LinearSolverData::Type::DEFAULT) - { - if (iodata.problem.type == config::ProblemData::Type::ELECTROSTATIC || - (iodata.problem.type == config::ProblemData::Type::TRANSIENT && - iodata.solver.transient.type == config::TransientSolverData::Type::CENTRAL_DIFF)) - { - type = config::LinearSolverData::Type::BOOMER_AMG; - } - else if (iodata.problem.type == config::ProblemData::Type::MAGNETOSTATIC || - iodata.problem.type == config::ProblemData::Type::TRANSIENT) - { - type = config::LinearSolverData::Type::AMS; - } - else - { - // Prefer sparse direct solver for frequency domain problems if available. -#if defined(MFEM_USE_SUPERLU) - type = config::LinearSolverData::Type::SUPERLU; -#elif defined(MFEM_USE_STRUMPACK) - type = config::LinearSolverData::Type::STRUMPACK; -#elif defined(MFEM_USE_MUMPS) - type = config::LinearSolverData::Type::MUMPS; -#else - type = config::LinearSolverData::Type::AMS; -#endif - } - } - int print = iodata.problem.verbose - 1; - MPI_Comm comm = fespaces.GetFESpaceAtLevel(0).GetComm(); - std::unique_ptr pc; - switch (type) - { - case config::LinearSolverData::Type::AMS: - // Can either be the coarse solve for geometric multigrid or the solver at the finest - // space (in which case fespaces.GetNumLevels() == 1). - pc = std::make_unique( - iodata, fespaces.GetFESpaceAtLevel(0), - aux_fespaces ? &aux_fespaces->GetFESpaceAtLevel(0) : nullptr, print); - break; - case config::LinearSolverData::Type::BOOMER_AMG: - pc = std::make_unique(iodata, print); - break; - case config::LinearSolverData::Type::SUPERLU: -#if defined(MFEM_USE_SUPERLU) - pc = std::make_unique(comm, iodata, print); -#else - MFEM_ABORT("Solver was not built with SuperLU_DIST support, please choose a " - "different solver!"); -#endif - break; - case config::LinearSolverData::Type::STRUMPACK: -#if defined(MFEM_USE_STRUMPACK) - pc = std::make_unique(comm, iodata, print); - break; -#endif - case config::LinearSolverData::Type::STRUMPACK_MP: -#if defined(MFEM_USE_STRUMPACK) && \ - (STRUMPACK_VERSION_MAJOR >= 6 && STRUMPACK_VERSION_MINOR >= 3 && \ - STRUMPACK_VERSION_PATCH > 1) - pc = std::make_unique(comm, iodata, print); -#else - MFEM_ABORT("Solver was not built with STRUMPACK support or uses STRUMPACK older than " - "6.3.1 which does not include mixed-precision support, please choose a " - "different solver!"); -#endif - break; - case config::LinearSolverData::Type::MUMPS: -#if defined(MFEM_USE_MUMPS) - pc = std::make_unique(comm, iodata, print); -#else - MFEM_ABORT( - "Solver was not built with MUMPS support, please choose a different solver!"); -#endif - break; - case config::LinearSolverData::Type::DEFAULT: - case config::LinearSolverData::Type::INVALID: - MFEM_ABORT("Unexpected type for KspPreconditioner configuration!"); - break; - } - if (iodata.solver.linear.mat_gmg) - { - // This will construct the multigrid hierarchy using pc as the coarse solver - // (ownership of pc is transfered to the GeometricMultigridSolver). When a special - // auxiliary space smoother for pre-/post-smoothing is not desired, h1_fespace is just - // a nullptr. - return std::make_unique(iodata, std::move(pc), dbc_marker, - fespaces, aux_fespaces); - } - else - { - return pc; - } -} - -void KspPreconditioner::Init(int n) -{ - // Set up temporary vector storage. -#if defined(PETSC_USE_COMPLEX) - if (x_.Size() == 2 * n && y_.Size() == 2 * n) - { - return; - } - x_.SetSize(2 * n); - y_.SetSize(2 * n); -#else - if (x_.Size() == n && y_.Size() == n) - { - return; - } - x_.SetSize(n); - y_.SetSize(n); -#endif -} - -void KspPreconditioner::SetOperator(const mfem::Operator &op) -{ - pc_->SetOperator(op); - Init(op.Height()); -} - -void KspPreconditioner::SetOperator( - const std::vector> &ops, - const std::vector> *aux_ops) -{ - auto *gmg = dynamic_cast(pc_.get()); - if (gmg) - { - gmg->SetOperator(ops, aux_ops); - Init(ops.back()->Height()); - } - else - { - SetOperator(*ops.back()); - } -} - -void KspPreconditioner::Mult(const petsc::PetscParVector &x, petsc::PetscParVector &y) const -{ -#if defined(PETSC_USE_COMPLEX) - mfem::Vector xr_, xi_, yr_, yi_; - mfem::Array X(2); - mfem::Array Y(2); - xr_.MakeRef(x_, 0, x_.Size() / 2); - xi_.MakeRef(x_, x_.Size() / 2, x_.Size() / 2); - yr_.MakeRef(y_, 0, y_.Size() / 2); - yi_.MakeRef(y_, y_.Size() / 2, y_.Size() / 2); - X[0] = &xr_; - X[1] = &xi_; - Y[0] = &yr_; - Y[1] = &yi_; - // yr_ = 0.0; - // yi_ = 0.0; - x.GetToVectors(xr_, xi_); - pc_->ArrayMult(X, Y); - y.SetFromVectors(yr_, yi_); -#else - // y_ = 0.0; - x.GetToVector(x_); - pc_->Mult(x_, y_); - y.SetFromVector(y_); -#endif -} - -PetscErrorCode KspPreconditioner::PCSetUp(PC pc) -{ - // The preconditioner operators are set up outside of the linear solve by the user, so - // this method does nothing. - PetscFunctionBeginUser; - PetscFunctionReturn(0); -} - -PetscErrorCode KspPreconditioner::PCApply(PC pc, Vec x, Vec y) -{ - // Apply the preconditioner. If PETSc is compiled with complex number support, the real - // preconditioner applied in block diagonal form. - KspPreconditioner *op; - petsc::PetscParVector xx(x, true), yy(y, true); - PetscFunctionBeginUser; - - PetscCall(PCShellGetContext(pc, (void **)&op)); - MFEM_VERIFY(op, "Invalid PETSc shell PC context!"); - op->Mult(xx, yy); - PetscFunctionReturn(0); -} - -PetscErrorCode KspPreconditioner::PCDestroy(PC pc) -{ - // Ownership of the preconditioner context is not inherited by the shell preconditioner, - // so this does nothing. - PetscFunctionBeginUser; - PetscFunctionReturn(0); -} - -} // namespace palace diff --git a/palace/linalg/pc.hpp b/palace/linalg/pc.hpp deleted file mode 100644 index dbb49a388..000000000 --- a/palace/linalg/pc.hpp +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#ifndef PALACE_LINALG_KSP_PRECONDITIONER_HPP -#define PALACE_LINALG_KSP_PRECONDITIONER_HPP - -#include -#include -#include -#include "linalg/petsc.hpp" - -namespace palace -{ - -class IoData; - -// Global method for preconditioner configuration and construction. -std::unique_ptr -ConfigurePreconditioner(const IoData &iodata, const mfem::Array &dbc_marker, - mfem::ParFiniteElementSpaceHierarchy &fespaces, - mfem::ParFiniteElementSpaceHierarchy *aux_fespaces = nullptr); - -// -// Class for preconditioning with interfaces to PETSc linear solvers or those from -// MFEM/Hypre. -// -class KspPreconditioner -{ -private: - // The actual preconditioner solver. - std::unique_ptr pc_; - - // Temporary vectors for preconditioner application. - mutable mfem::Vector x_, y_; - - // Helper function for setup. - void Init(int n); - -public: - KspPreconditioner(std::unique_ptr &&pc) : pc_(std::move(pc)) {} - KspPreconditioner(const IoData &iodata, const mfem::Array &dbc_marker, - mfem::ParFiniteElementSpaceHierarchy &fespaces, - mfem::ParFiniteElementSpaceHierarchy *aux_fespaces = nullptr) - : pc_(ConfigurePreconditioner(iodata, dbc_marker, fespaces, aux_fespaces)) - { - if (pc_->Height()) - { - Init(pc_->Height()); - } - } - - // Sets the matrix from which to contruct a preconditioner. - void SetOperator(const mfem::Operator &op); - void SetOperator(const std::vector> &ops, - const std::vector> *aux_ops = nullptr); - - // Application of the preconditioner. - void Mult(const petsc::PetscParVector &x, petsc::PetscParVector &y) const; - - // Wrapper functions for PETSc PCSHELL. - static PetscErrorCode PCSetUp(PC pc); - static PetscErrorCode PCApply(PC pc, Vec x, Vec y); - static PetscErrorCode PCDestroy(PC pc); -}; - -} // namespace palace - -#endif // PALACE_LINALG_KSP_PRECONDITIONER_HPP diff --git a/palace/linalg/petsc.cpp b/palace/linalg/petsc.cpp deleted file mode 100644 index 0b556b188..000000000 --- a/palace/linalg/petsc.cpp +++ /dev/null @@ -1,2554 +0,0 @@ -// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. -// SPDX-License-Identifier: Apache-2.0 - -#include "petsc.hpp" - -#include -#include -#include -// #include "linalg/hypre.hpp" -#include "linalg/slepc.hpp" -#include "utils/communication.hpp" - -static PetscErrorCode __mat_shell_init(Mat); -static PetscErrorCode __mat_shell_destroy(Mat); -static PetscErrorCode __mat_shell_apply(Mat, Vec, Vec); -static PetscErrorCode __mat_shell_apply_transpose(Mat, Vec, Vec); -static PetscErrorCode __mat_shell_apply_hermitian_transpose(Mat, Vec, Vec); -static PetscErrorCode __mat_shell_apply_add(Mat, Vec, Vec); -static PetscErrorCode __mat_shell_apply_transpose_add(Mat, Vec, Vec); -static PetscErrorCode __mat_shell_apply_hermitian_transpose_add(Mat, Vec, Vec); -#if defined(PETSC_USE_COMPLEX) -static PetscErrorCode __mat_shell_apply(Mat, const mfem::Vector &, Vec); -static PetscErrorCode __mat_shell_apply_transpose(Mat, const mfem::Vector &, Vec); -static PetscErrorCode __mat_shell_apply_hermitian_transpose(Mat, const mfem::Vector &, Vec); -#endif -static PetscErrorCode __mat_shell_get_diagonal(Mat, Vec); -// static PetscErrorCode __mat_shell_shift(Mat, PetscScalar); -// static PetscErrorCode __mat_shell_scale(Mat, PetscScalar); -// static PetscErrorCode __mat_shell_conj(Mat); -// static PetscErrorCode __mat_shell_axpy(Mat, PetscScalar, Mat, MatStructure); -// static PetscErrorCode __mat_shell_norm(Mat, NormType, PetscReal *); -static PetscErrorCode __mat_shell_real_part(Mat); -static PetscErrorCode __mat_shell_imag_part(Mat); -static PetscErrorCode __mat_convert_hypreParCSR_AIJ(hypre_ParCSRMatrix *, Mat *); -static PetscErrorCode __array_container_destroy(void *); - -namespace palace::petsc -{ - -using mfem::ForallWrap; - -void Initialize(int &argc, char **&argv, const char rc_file[], const char help[]) -{ - PalacePetscCall(PetscInitialize(&argc, &argv, rc_file, help)); -} - -void Finalize() -{ - PalacePetscCall(PetscFinalize()); -} - -// PetscScatter methods -PetscScatter::PetscScatter(PetscScatter::Type type, const PetscParVector &x, - std::unique_ptr &y) -{ - Vec yy; - if (type == Type::TO_ZERO) - { - PalacePetscCall(VecScatterCreateToZero(x, &ctx, &yy)); - } - else // type == Type::TO_ALL - { - PalacePetscCall(VecScatterCreateToAll(x, &ctx, &yy)); - } - y = std::make_unique(yy, false); -} - -PetscScatter::~PetscScatter() -{ - PalacePetscCall(VecScatterDestroy(&ctx)); -} - -void PetscScatter::Forward(const PetscParVector &x, PetscParVector &y) -{ - PalacePetscCall(VecScatterBegin(ctx, x, y, INSERT_VALUES, SCATTER_FORWARD)); - PalacePetscCall(VecScatterEnd(ctx, x, y, INSERT_VALUES, SCATTER_FORWARD)); -} - -void PetscScatter::Reverse(const PetscParVector &x, PetscParVector &y) -{ - PalacePetscCall(VecScatterBegin(ctx, x, y, INSERT_VALUES, SCATTER_REVERSE)); - PalacePetscCall(VecScatterEnd(ctx, x, y, INSERT_VALUES, SCATTER_REVERSE)); -} - -// PetscParVector methods - -PetscParVector::PetscParVector(const PetscParMatrix &A, bool transpose) -{ - if (!transpose) - { - PalacePetscCall(MatCreateVecs(A, &x, nullptr)); - } - else - { - PalacePetscCall(MatCreateVecs(A, nullptr, &x)); - } -} - -PetscParVector::PetscParVector(MPI_Comm comm, const mfem::Vector &y) -{ - PalacePetscCall(VecCreate(comm, &x)); - PalacePetscCall(VecSetSizes(x, y.Size(), PETSC_DECIDE)); - PalacePetscCall(VecSetType(x, VECSTANDARD)); - SetFromVector(y); -} - -PetscParVector::PetscParVector(const mfem::Vector &y) -{ - PalacePetscCall(VecCreateSeq(PETSC_COMM_SELF, y.Size(), &x)); - SetFromVector(y); -} - -#if defined(PETSC_USE_COMPLEX) -PetscParVector::PetscParVector(MPI_Comm comm, const mfem::Vector &yr, - const mfem::Vector &yi) -{ - MFEM_VERIFY(yr.Size() == yi.Size(), - "Mismatch in size of real and imaginary vector parts!"); - PalacePetscCall(VecCreate(comm, &x)); - PalacePetscCall(VecSetSizes(x, yr.Size(), PETSC_DECIDE)); - PalacePetscCall(VecSetType(x, VECSTANDARD)); - SetFromVectors(yr, yi); -} - -PetscParVector::PetscParVector(const mfem::Vector &yr, const mfem::Vector &yi) -{ - MFEM_VERIFY(yr.Size() == yi.Size(), - "Mismatch in size of real and imaginary vector parts!"); - PalacePetscCall(VecCreateSeq(PETSC_COMM_SELF, yr.Size(), &x)); - SetFromVectors(yr, yi); -} -#endif - -PetscParVector::PetscParVector(MPI_Comm comm, PetscInt n, PetscInt N) -{ - PalacePetscCall(VecCreateMPI(comm, n, N, &x)); -} - -// PetscParVector::PetscParVector(PetscInt n) -// { -// PalacePetscCall(VecCreateSeq(PETSC_COMM_SELF, n, &x)); -// } - -PetscParVector::PetscParVector(MPI_Comm comm, PetscInt n, PetscInt N, PetscScalar *data) -{ - PalacePetscCall(VecCreateMPIWithArray(comm, 1, n, N, data, &x)); -} - -PetscParVector::PetscParVector(PetscInt n, PetscScalar *data) -{ - PalacePetscCall(VecCreateSeqWithArray(PETSC_COMM_SELF, 1, n, data, &x)); -} - -PetscParVector::PetscParVector(const PetscParVector &y) -{ - PalacePetscCall(VecDuplicate(y, &x)); - Copy(y); -} - -PetscParVector::PetscParVector(Vec y, bool ref) -{ - x = y; - if (ref) - { - PalacePetscCall(PetscObjectReference(reinterpret_cast(y))); - } -} - -PetscParVector::~PetscParVector() -{ - PalacePetscCall(VecDestroy(&x)); -} - -void PetscParVector::Copy(const PetscParVector &y) -{ - MFEM_VERIFY(GetSize() == y.GetSize(), "Invalid size!"); - PalacePetscCall(VecCopy(y, x)); -} - -void PetscParVector::GetToVector(mfem::Vector &v, PetscInt start, PetscInt end) const -{ - const PetscScalar *xv; - if (start < 0) - { - start = 0; - } - if (end < 0) - { - end = GetSize(); - } - MFEM_VERIFY(0 <= start && start <= end && end <= GetSize() && v.Size() == end - start, - "Invalid start/end indices for vector extraction!"); - PalacePetscCall(VecGetArrayRead(x, &xv)); - auto vv = v.Write(); -#if defined(PETSC_USE_COMPLEX) - MFEM_FORALL(i, end - start, { vv[i] = PetscRealPart(xv[i + start]); }); -#else - MFEM_FORALL(i, end - start, { vv[i] = xv[i + start]; }); -#endif - PalacePetscCall(VecRestoreArrayRead(x, &xv)); -} - -void PetscParVector::SetFromVector(const mfem::Vector &v) -{ - PetscScalar *xv; - MFEM_VERIFY(GetSize() == v.Size(), "Invalid size!"); - PalacePetscCall(VecGetArray(x, &xv)); - const auto vv = v.Read(); - MFEM_FORALL(i, GetSize(), { xv[i] = vv[i]; }); - PalacePetscCall(VecRestoreArray(x, &xv)); -} - -void PetscParVector::AddFromVector(const mfem::Vector &v) -{ - PetscScalar *xv; - MFEM_VERIFY(GetSize() == v.Size(), "Invalid size!"); - PalacePetscCall(VecGetArray(x, &xv)); - const auto vv = v.Read(); - MFEM_FORALL(i, GetSize(), { xv[i] += vv[i]; }); - PalacePetscCall(VecRestoreArray(x, &xv)); -} - -#if defined(PETSC_USE_COMPLEX) -void PetscParVector::GetToVectors(mfem::Vector &vr, mfem::Vector &vi, PetscInt start, - PetscInt end) const -{ - const PetscScalar *xv; - if (start < 0) - { - start = 0; - } - if (end < 0) - { - end = GetSize(); - } - MFEM_VERIFY(0 <= start && start <= end && end <= GetSize() && vr.Size() == end - start && - vi.Size() == end - start, - "Invalid start/end indices for vector extraction!"); - PalacePetscCall(VecGetArrayRead(x, &xv)); - auto vvr = vr.Write(); - auto vvi = vi.Write(); - MFEM_FORALL(i, end - start, { - vvr[i] = PetscRealPart(xv[i + start]); - vvi[i] = PetscImaginaryPart(xv[i + start]); - }); - PalacePetscCall(VecRestoreArrayRead(x, &xv)); -} - -void PetscParVector::SetFromVectors(const mfem::Vector &vr, const mfem::Vector &vi) -{ - PetscScalar *xv; - MFEM_VERIFY(GetSize() == vr.Size() || GetSize() == vi.Size(), "Invalid size!"); - PalacePetscCall(VecGetArray(x, &xv)); - const auto vvr = vr.Read(); - const auto vvi = vi.Read(); - MFEM_FORALL(i, GetSize(), { - // xv[i] = vvr[i] + PETSC_i * vvi[i]; - reinterpret_cast(&xv[i])[0] = vvr[i]; - reinterpret_cast(&xv[i])[1] = vvi[i]; - }); - PalacePetscCall(VecRestoreArray(x, &xv)); -} - -void PetscParVector::AddFromVectors(const mfem::Vector &vr, const mfem::Vector &vi) -{ - PetscScalar *xv; - MFEM_VERIFY(GetSize() == vr.Size() || GetSize() == vi.Size(), "Invalid size!"); - PalacePetscCall(VecGetArray(x, &xv)); - const auto vvr = vr.Read(); - const auto vvi = vi.Read(); - MFEM_FORALL(i, GetSize(), { - // xv[i] += vvr[i] + PETSC_i * vvi[i]; - reinterpret_cast(&xv[i])[0] += vvr[i]; - reinterpret_cast(&xv[i])[1] += vvi[i]; - }); - PalacePetscCall(VecRestoreArray(x, &xv)); -} -#endif - -PetscScalar *PetscParVector::GetArray() -{ - PetscScalar *data; - PalacePetscCall(VecGetArray(x, &data)); - return data; -} - -const PetscScalar *PetscParVector::GetArrayRead() const -{ - const PetscScalar *data; - PalacePetscCall(VecGetArrayRead(x, &data)); - return data; -} - -void PetscParVector::RestoreArray(PetscScalar *data) -{ - PalacePetscCall(VecRestoreArray(x, &data)); -} - -void PetscParVector::RestoreArrayRead(const PetscScalar *data) const -{ - PalacePetscCall(VecRestoreArrayRead(x, &data)); -} - -void PetscParVector::PlaceArray(const PetscScalar *data) -{ - PalacePetscCall(VecPlaceArray(x, data)); -} - -void PetscParVector::ResetArray() -{ - PalacePetscCall(VecResetArray(x)); -} - -PetscInt PetscParVector::GetSize() const -{ - PetscInt n; - PalacePetscCall(VecGetLocalSize(x, &n)); - return n; -} - -PetscInt PetscParVector::GetGlobalSize() const -{ - PetscInt N; - PalacePetscCall(VecGetSize(x, &N)); - return N; -} - -void PetscParVector::Resize(PetscInt n, bool copy) -{ - Vec y; - const PetscScalar *xv; - PetscScalar *yv; - PetscInt n0 = GetSize(); - VecType type; - if (n0 == n) - { - return; - } - PalacePetscCall(VecGetType(x, &type)); - PalacePetscCall(VecCreate(GetComm(), &y)); - PalacePetscCall(VecSetSizes(y, n, PETSC_DECIDE)); - PalacePetscCall(VecSetType(y, type)); - if (copy) - { - PalacePetscCall(VecGetArrayRead(x, &xv)); - PalacePetscCall(VecGetArray(y, &yv)); - MFEM_FORALL(i, std::min(n, n0), { yv[i] = xv[i]; }); - PalacePetscCall(VecRestoreArrayRead(x, &xv)); - PalacePetscCall(VecRestoreArray(y, &yv)); - } - PalacePetscCall(VecDestroy(&x)); - x = y; -} - -void PetscParVector::SetZero() -{ - PalacePetscCall(VecZeroEntries(x)); -} - -void PetscParVector::SetRandom() -{ - PetscRandom rand; - MPI_Comm comm = GetComm(); - PalacePetscCall(PetscRandomCreate(comm, &rand)); -#if defined(PETSC_USE_COMPLEX) - PalacePetscCall(PetscRandomSetInterval(rand, -1.0 - PETSC_i, 1.0 + PETSC_i)); -#else - PalacePetscCall(PetscRandomSetInterval(rand, -1.0, 1.0)); -#endif - PalacePetscCall(VecSetRandom(x, rand)); - PalacePetscCall(PetscRandomDestroy(&rand)); -} - -#if defined(PETSC_USE_COMPLEX) -void PetscParVector::SetRandomReal() -{ - PetscRandom rand; - MPI_Comm comm = GetComm(); - PalacePetscCall(PetscRandomCreate(comm, &rand)); - PalacePetscCall(PetscRandomSetInterval(rand, -1.0, 1.0)); - PalacePetscCall(VecSetRandom(x, rand)); - PalacePetscCall(PetscRandomDestroy(&rand)); -} -#endif - -void PetscParVector::SetRandomSign(bool init) -{ - PetscScalar *xv; - if (!init) - { - SetRandomReal(); - } - PalacePetscCall(VecGetArray(x, &xv)); - MFEM_FORALL(i, GetSize(), { - // Leave zeros alone. - xv[i] = - (PetscRealPart(xv[i]) > 0.0) ? 1.0 : ((PetscRealPart(xv[i]) < 0.0) ? -1.0 : 0.0); - }); - PalacePetscCall(VecRestoreArray(x, &xv)); -} - -PetscParVector &PetscParVector::operator=(PetscScalar s) -{ - PalacePetscCall(VecSet(x, s)); - return *this; -} - -void PetscParVector::Scale(PetscScalar s) -{ - PalacePetscCall(VecScale(x, s)); -} - -void PetscParVector::Shift(PetscScalar s) -{ - PalacePetscCall(VecShift(x, s)); -} - -void PetscParVector::Abs() -{ - PalacePetscCall(VecAbs(x)); -} - -void PetscParVector::SqrtAbs() -{ - PalacePetscCall(VecSqrtAbs(x)); -} - -void PetscParVector::Inv() -{ - PalacePetscCall(VecReciprocal(x)); -} - -void PetscParVector::InvSqrt() -{ - PalacePetscCall(VecPow(x, -0.5)); -} - -#if defined(PETSC_USE_COMPLEX) -void PetscParVector::Conj() -{ - PalacePetscCall(VecConjugate(x)); -} - -void PetscParVector::GetRealPart() -{ - PalacePetscCall(VecRealPart(x)); -} - -void PetscParVector::GetImagPart() -{ - PalacePetscCall(VecImaginaryPart(x)); -} -#endif - -PetscReal PetscParVector::Normalize() -{ - PetscReal norm; - PalacePetscCall(VecNormalize(x, &norm)); - return norm; -} - -PetscReal PetscParVector::Normalize(const PetscParMatrix &B, PetscParVector &Bx) -{ - B.Mult(*this, Bx); - PetscReal norm = - PetscSqrtReal(PetscAbsScalar(Bx.Dot(*this))); // For SPD B, xᴴ B x is real - Scale(1.0 / norm); - return norm; -} - -PetscReal PetscParVector::Norml2() const -{ - PetscReal norm; - PalacePetscCall(VecNorm(x, NORM_2, &norm)); - return norm; -} - -PetscReal PetscParVector::Normlinf() const -{ - PetscReal norm; - PalacePetscCall(VecNorm(x, NORM_INFINITY, &norm)); - return norm; -} - -void PetscParVector::ZeroRows(const mfem::Array &rows) -{ - PetscScalar *xv; - PalacePetscCall(VecGetArray(x, &xv)); - MFEM_FORALL(i, rows.Size(), { xv[rows[i]] = 0.0; }); - PalacePetscCall(VecRestoreArray(x, &xv)); -} - -void PetscParVector::PointwiseMult(const PetscParVector &y, bool replace_zeros) -{ - MFEM_VERIFY(GetSize() == y.GetSize(), "Invalid size!"); - if (replace_zeros) - { - PetscScalar *yv; - PalacePetscCall(VecGetArray(y, &yv)); - MFEM_FORALL(i, GetSize(), { - if (yv[i] == 0.0) - { - yv[i] = 1.0; - } - }); - PalacePetscCall(VecRestoreArray(y, &yv)); - } - PalacePetscCall(VecPointwiseMult(x, x, y)); -} - -void PetscParVector::AXPY(PetscScalar alpha, const PetscParVector &y) -{ - MFEM_VERIFY(GetSize() == y.GetSize(), "Invalid size!"); - PalacePetscCall(VecAXPY(x, alpha, y)); -} - -void PetscParVector::AXPBY(PetscScalar alpha, const PetscParVector &y, PetscScalar beta) -{ - MFEM_VERIFY(GetSize() == y.GetSize(), "Invalid size!"); - PalacePetscCall(VecAXPBY(x, alpha, beta, y)); -} - -void PetscParVector::AXPBYPCZ(PetscScalar alpha, const PetscParVector &y, PetscScalar beta, - const PetscParVector &z, PetscScalar gamma) -{ - MFEM_VERIFY(GetSize() == y.GetSize() && GetSize() == z.GetSize(), "Invalid size!"); - PalacePetscCall(VecAXPBYPCZ(x, alpha, beta, gamma, y, z)); -} - -PetscScalar PetscParVector::Dot(const PetscParVector &y) const -{ - PetscScalar val; - PalacePetscCall(VecDot(x, y, &val)); - return val; -} - -PetscScalar PetscParVector::TransposeDot(const PetscParVector &y) const -{ - PetscScalar val; - PalacePetscCall(VecTDot(x, y, &val)); - return val; -} - -void PetscParVector::Print(const char *fname, bool binary) const -{ - if (fname) - { - PetscViewer view; - if (binary) - { - PalacePetscCall( - PetscViewerBinaryOpen(PetscObjectComm(reinterpret_cast(x)), fname, - FILE_MODE_WRITE, &view)); - } - else - { - PalacePetscCall(PetscViewerASCIIOpen( - PetscObjectComm(reinterpret_cast(x)), fname, &view)); - } - PalacePetscCall(VecView(x, view)); - PalacePetscCall(PetscViewerDestroy(&view)); - } - else - { - PalacePetscCall(VecView(x, nullptr)); - } -} - -MPI_Comm PetscParVector::GetComm() const -{ - return x ? PetscObjectComm(reinterpret_cast(x)) : MPI_COMM_NULL; -} - -// PetscParMatrix methods - -PetscParMatrix::PetscParMatrix(const PetscParMatrix &B) -{ - PalacePetscCall(MatDuplicate(B, MAT_COPY_VALUES, &A)); -} - -PetscParMatrix::PetscParMatrix(Mat B, bool ref) -{ - A = B; - if (ref) - { - PalacePetscCall(PetscObjectReference(reinterpret_cast(B))); - } -} - -PetscParMatrix::~PetscParMatrix() -{ - MPI_Comm comm; - PalacePetscCall(PetscObjectGetComm(reinterpret_cast(A), &comm)); - PalacePetscCall(MatDestroy(&A)); -} - -void PetscParMatrix::SetSymmetric(bool sym) -{ - PalacePetscCall(MatSetOption(A, MAT_SYMMETRIC, sym ? PETSC_TRUE : PETSC_FALSE)); - PalacePetscCall(MatSetOption(A, MAT_SYMMETRY_ETERNAL, PETSC_TRUE)); -} - -void PetscParMatrix::SetHermitian(bool herm) -{ - PalacePetscCall(MatSetOption(A, MAT_HERMITIAN, herm ? PETSC_TRUE : PETSC_FALSE)); - PalacePetscCall(MatSetOption(A, MAT_SYMMETRY_ETERNAL, PETSC_TRUE)); -} - -bool PetscParMatrix::GetSymmetric() const -{ - PetscBool flg, sym; - PalacePetscCall(MatIsSymmetricKnown(A, &flg, &sym)); - return (flg == PETSC_TRUE && sym == PETSC_TRUE); -} - -bool PetscParMatrix::GetHermitian() const -{ - PetscBool flg, herm; - PalacePetscCall(MatIsHermitianKnown(A, &flg, &herm)); - return (flg == PETSC_TRUE && herm == PETSC_TRUE); -} - -#if defined(PETSC_USE_COMPLEX) -void PetscParMatrix::SetRealSymmetric() -{ - PalacePetscCall(MatSetOption(A, MAT_SYMMETRIC, PETSC_TRUE)); - PalacePetscCall(MatSetOption(A, MAT_HERMITIAN, PETSC_TRUE)); - PalacePetscCall(MatSetOption(A, MAT_SYMMETRY_ETERNAL, PETSC_TRUE)); -} -#endif - -void PetscParMatrix::CopySymmetry(const PetscParMatrix &B) -{ - PalacePetscCall(MatPropagateSymmetryOptions(B, A)); -} - -PetscInt PetscParMatrix::GetNumRows() const -{ - PetscInt m; - PalacePetscCall(MatGetLocalSize(A, &m, nullptr)); - return m; -} - -PetscInt PetscParMatrix::GetNumCols() const -{ - PetscInt n; - PalacePetscCall(MatGetLocalSize(A, nullptr, &n)); - return n; -} - -PetscInt PetscParMatrix::GetGlobalNumRows() const -{ - PetscInt M; - PalacePetscCall(MatGetSize(A, &M, nullptr)); - return M; -} - -PetscInt PetscParMatrix::GetGlobalNumCols() const -{ - PetscInt N; - PalacePetscCall(MatGetSize(A, nullptr, &N)); - return N; -} - -PetscInt PetscParMatrix::NNZ() const -{ - MatInfo info; - PalacePetscCall(MatGetInfo(A, MAT_GLOBAL_SUM, &info)); - return (PetscInt)info.nz_used; -} - -PetscReal PetscParMatrix::NormF() const -{ - PetscReal norm; - PalacePetscCall(MatNorm(A, NORM_FROBENIUS, &norm)); - return norm; -} - -PetscReal PetscParMatrix::NormInf() const -{ - PetscReal norm; - PalacePetscCall(MatNorm(A, NORM_INFINITY, &norm)); - return norm; -} - -PetscReal PetscParMatrix::Norm2(PetscReal tol, PetscInt maxits) const -{ - // XX TODO: Add separate if condition using ARPACK estimate before reverting to power - // iteration. - if (tol == PETSC_DEFAULT) - { - tol = 1.0e-4; - } - if (maxits == PETSC_DEFAULT) - { - maxits = 100; - } -#if defined(PALACE_WITH_SLEPC) - return slepc::GetMaxSingularValue(*this, tol, maxits); -#else - // Power iteration loop: ||A||₂² = λₙ(Aᴴ A) . - PetscInt it = 0; - PetscReal res = 0.0; - PetscReal l, l0 = 0.0; - PetscParVector u(*this), v(*this); - u.SetRandom(); - u.Normalize(); - while (it < maxits) - { - Mult(u, v); - if (GetHermitian()) - { - u.Copy(v); - } - else - { - MultHermitianTranspose(v, u); - } - l = u.Normalize(); - if (it > 0) - { - res = PetscAbsReal(l - l0) / PetscAbsReal(l0); - if (res < tol) - { - break; - } - } - l0 = l; - it++; - } - if (it >= maxits) - { - Mpi::Warning(GetComm(), - "Power iteration did not converge in {:d} " - "iterations, res = {:.3e}, lambda = {:.3e}!\n", - it, res, l); - } - return GetHermitian() ? l : PetscSqrtReal(l); -#endif -} - -void PetscParMatrix::Scale(PetscScalar s) -{ - PalacePetscCall(MatScale(A, s)); -} - -#if defined(PETSC_USE_COMPLEX) -void PetscParMatrix::Conj() -{ - PalacePetscCall(MatConjugate(A)); -} - -void PetscParMatrix::GetRealPart() -{ - PalacePetscCall(MatRealPart(A)); -} - -void PetscParMatrix::GetImagPart() -{ - PalacePetscCall(MatImaginaryPart(A)); -} -#endif - -void PetscParMatrix::AXPY(PetscScalar alpha, const PetscParMatrix &B, - PetscParMatrix::NNZStructure struc) -{ - switch (struc) - { - case NNZStructure::DIFFERENT: - PalacePetscCall(MatAXPY(A, alpha, B, DIFFERENT_NONZERO_PATTERN)); - break; - case NNZStructure::SAME: - PalacePetscCall(MatAXPY(A, alpha, B, SAME_NONZERO_PATTERN)); - break; - case NNZStructure::SUBSET: - PalacePetscCall(MatAXPY(A, alpha, B, SUBSET_NONZERO_PATTERN)); - break; - } -} - -void PetscParMatrix::Mult(const PetscParVector &x, PetscParVector &y) const -{ - MFEM_VERIFY(x.GetSize() == GetNumCols() && y.GetSize() == GetNumRows(), - "Incorrect vector sizes for matrix-vector product!"); - PalacePetscCall(::MatMult(A, x, y)); -} - -void PetscParMatrix::MultAdd(const PetscParVector &x, PetscParVector &y) const -{ - MFEM_VERIFY(x.GetSize() == GetNumCols() && y.GetSize() == GetNumRows(), - "Incorrect vector sizes for matrix-vector product!"); - PalacePetscCall(MatMultAdd(A, x, y, y)); -} - -void PetscParMatrix::MultTranspose(const PetscParVector &x, PetscParVector &y) const -{ - MFEM_VERIFY(x.GetSize() == GetNumRows() && y.GetSize() == GetNumCols(), - "Incorrect vector sizes for matrix-vector product!"); - PalacePetscCall(::MatMultTranspose(A, (Vec)x, (Vec)y)); -} - -void PetscParMatrix::MultTransposeAdd(const PetscParVector &x, PetscParVector &y) const -{ - MFEM_VERIFY(x.GetSize() == GetNumRows() && y.GetSize() == GetNumCols(), - "Incorrect vector sizes for matrix-vector product!"); - PalacePetscCall(MatMultTransposeAdd(A, x, y, y)); -} - -void PetscParMatrix::MultHermitianTranspose(const PetscParVector &x, - PetscParVector &y) const -{ - MFEM_VERIFY(x.GetSize() == GetNumRows() && y.GetSize() == GetNumCols(), - "Incorrect vector sizes for matrix-vector product!"); - PalacePetscCall(MatMultHermitianTranspose(A, x, y)); -} - -void PetscParMatrix::MultHermitianTransposeAdd(const PetscParVector &x, - PetscParVector &y) const -{ - MFEM_VERIFY(x.GetSize() == GetNumRows() && y.GetSize() == GetNumCols(), - "Incorrect vector sizes for matrix-vector product!"); - PalacePetscCall(MatMultHermitianTransposeAdd(A, x, y, y)); -} - -#if defined(PETSC_USE_COMPLEX) -void PetscParMatrix::Mult(const mfem::Vector &x, PetscParVector &y) const -{ - MFEM_VERIFY(x.Size() == GetNumCols() && y.GetSize() == GetNumRows(), - "Incorrect vector sizes for matrix-vector product!"); - PetscParVector xx(GetComm(), x); - Mult(xx, y); -} - -void PetscParMatrix::MultTranspose(const mfem::Vector &x, PetscParVector &y) const -{ - MFEM_VERIFY(x.Size() == GetNumCols() && y.GetSize() == GetNumRows(), - "Incorrect vector sizes for matrix-vector product!"); - PetscParVector xx(GetComm(), x); - MultTranspose(xx, y); -} - -void PetscParMatrix::MultHermitianTranspose(const mfem::Vector &x, PetscParVector &y) const -{ - MFEM_VERIFY(x.Size() == GetNumCols() && y.GetSize() == GetNumRows(), - "Incorrect vector sizes for matrix-vector product!"); - PetscParVector xx(GetComm(), x); - MultHermitianTranspose(xx, y); -} -#endif - -void PetscParMatrix::Print(const char *fname, bool binary) const -{ - if (fname) - { - PetscViewer view; - if (binary) - { - PalacePetscCall( - PetscViewerBinaryOpen(PetscObjectComm(reinterpret_cast(A)), fname, - FILE_MODE_WRITE, &view)); - } - else - { - PalacePetscCall(PetscViewerASCIIOpen( - PetscObjectComm(reinterpret_cast(A)), fname, &view)); - } - PalacePetscCall(MatView(A, view)); - PalacePetscCall(PetscViewerDestroy(&view)); - } - else - { - PalacePetscCall(MatView(A, nullptr)); - } -} - -std::unique_ptr -#if defined(PETSC_USE_COMPLEX) -PetscParMatrix::GetHypreParMatrix(PetscParMatrix::ExtractStructure struc) const -#else -PetscParMatrix::GetHypreParMatrix() const -#endif -{ - HYPRE_BigInt M = GetGlobalNumRows(); - HYPRE_BigInt N = GetGlobalNumCols(); - std::unique_ptr rows, cols; - if (HYPRE_AssumedPartitionCheck()) - { - PetscInt start, end; - rows = std::make_unique(2); - PalacePetscCall(MatGetOwnershipRange(A, &start, &end)); - rows[0] = start; - rows[1] = end; - if (M != N) - { - cols = std::make_unique(2); - PalacePetscCall(MatGetOwnershipRangeColumn(A, &start, &end)); - cols[0] = start; - cols[1] = end; - } - } - else - { - PetscMPIInt comm_size; - const PetscInt *ranges; - MPI_Comm_size(GetComm(), &comm_size); - rows = std::make_unique(comm_size + 1); - PalacePetscCall(MatGetOwnershipRanges(A, &ranges)); - for (PetscMPIInt i = 0; i < comm_size + 1; i++) - { - rows[i] = ranges[i]; - } - if (M != N) - { - cols = std::make_unique(comm_size + 1); - PalacePetscCall(MatGetOwnershipRangesColumn(A, &ranges)); - for (PetscMPIInt i = 0; i < comm_size + 1; i++) - { - cols[i] = ranges[i]; - } - } - } - - // Count nonzeros. - MatInfo info; - PalacePetscCall(MatGetInfo(A, MAT_LOCAL, &info)); - PetscInt nnz = (PetscInt)info.nz_used; - - // Copy local CSR block of rows (columns in global numbering). - PetscInt rstart, rend, n; - const PetscInt *jj; - const PetscScalar *vals; - PalacePetscCall(MatGetOwnershipRange(A, &rstart, &rend)); - - int m = rend - rstart; - std::unique_ptr II = std::make_unique(m + 1); - std::unique_ptr JJ = std::make_unique(nnz); - std::unique_ptr data = std::make_unique(nnz); - nnz = 0; - - for (PetscInt i = rstart; i < rend; i++) - { - PalacePetscCall(MatGetRow(A, i, &n, &jj, &vals)); - II[i - rstart] = nnz; - for (PetscInt j = 0; j < n; j++) - { -#if defined(PETSC_USE_COMPLEX) - if (struc == ExtractStructure::REAL) - { - data[nnz] = PetscRealPart(vals[j]); - } - else if (struc == ExtractStructure::IMAGINARY) - { - data[nnz] = PetscImaginaryPart(vals[j]); - } - else // struc == ExtractStructure::SUM - { - data[nnz] = PetscRealPart(vals[j]) + PetscImaginaryPart(vals[j]); - } -#else - data[nnz] = vals[j]; -#endif - JJ[nnz++] = jj[j]; - } - PalacePetscCall(MatRestoreRow(A, i, &n, &jj, &vals)); - } - II[m] = nnz; - - // Create the HypreParMatrix (copies all inputs so memory of local variables is released - // after return). - if (M == N) - { - return std::make_unique(GetComm(), m, M, N, II.get(), JJ.get(), - data.get(), rows.get(), rows.get()); - } - else - { - return std::make_unique(GetComm(), m, M, N, II.get(), JJ.get(), - data.get(), rows.get(), cols.get()); - } -} - -PetscErrorCode Convert_Array_IS(MPI_Comm comm, bool islist, const mfem::Array &list, - PetscInt start, IS *is) -{ - // Converts from a list (or a marked Array if islist is false) to an IS. The offset where - // to start numbering is given as start. - PetscInt n = list.Size(), *idxs; - const auto *data = list.HostRead(); - PetscFunctionBeginUser; - - if (islist) - { - PetscCall(PetscMalloc1(n, &idxs)); - for (PetscInt i = 0; i < n; i++) - { - idxs[i] = data[i] + start; - } - } - else - { - PetscInt cum = 0; - for (PetscInt i = 0; i < n; i++) - { - if (data[i]) - { - cum++; - } - } - PetscCall(PetscMalloc1(cum, &idxs)); - cum = 0; - for (PetscInt i = 0; i < n; i++) - { - if (data[i]) - { - idxs[cum++] = i + start; - } - } - n = cum; - } - PetscCall(ISCreateGeneral(comm, n, idxs, PETSC_OWN_POINTER, is)); - PetscFunctionReturn(0); -} - -std::unique_ptr PetscParMatrix::GetSubMatrix(const mfem::Array &rows, - const mfem::Array &cols) -{ - PetscInt rst, cst; - IS row_is, col_is; - Mat B; - PalacePetscCall(MatSetOption(A, MAT_NO_OFF_PROC_ZERO_ROWS, PETSC_TRUE)); - // Rows need to be in global numbering. - PalacePetscCall(MatGetOwnershipRange(A, &rst, nullptr)); - PalacePetscCall(MatGetOwnershipRange(A, &cst, nullptr)); - PalacePetscCall(Convert_Array_IS(GetComm(), true, rows, rst, &row_is)); - PalacePetscCall(Convert_Array_IS(GetComm(), true, cols, cst, &col_is)); - PalacePetscCall(MatCreateSubMatrix(A, row_is, col_is, MAT_INITIAL_MATRIX, &B)); - PalacePetscCall(ISDestroy(&row_is)); - PalacePetscCall(ISDestroy(&col_is)); - return std::make_unique(B, false); -} - -std::unique_ptr PetscParMatrix::GetSequentialMatrix(bool create) -{ - IS row_is, col_is; - PetscInt nmat = create ? 1 : 0; - Mat *pB = nullptr, B = nullptr; - if (create) - { - PetscInt M = GetGlobalNumRows(), N = GetGlobalNumCols(); - PalacePetscCall(ISCreateStride(PETSC_COMM_SELF, M, 0, 1, &row_is)); - PalacePetscCall(ISCreateStride(PETSC_COMM_SELF, N, 0, 1, &col_is)); - } - PalacePetscCall(MatCreateSubMatrices(A, nmat, &row_is, &col_is, MAT_INITIAL_MATRIX, &pB)); - if (create) - { - PalacePetscCall(ISDestroy(&row_is)); - PalacePetscCall(ISDestroy(&col_is)); - B = pB[0]; - } - PalacePetscCall(PetscFree(pB)); - return (B) ? std::make_unique(B, false) : nullptr; -} - -MPI_Comm PetscParMatrix::GetComm() const -{ - return A ? PetscObjectComm(reinterpret_cast(A)) : MPI_COMM_NULL; -} - -PetscShellMatrix::PetscShellMatrix(MPI_Comm comm, std::unique_ptr &&B) -{ - // Wrap the MFEM Operator as a PETSc shell, which inherets the underlying matrix storage - // (when the PETSc matrix is destroyed, so is the Hypre one). - MFEM_VERIFY(B, "Cannot construct PETSc shell from an empty matrix!"); - PetscInt m = (PetscInt)B->Height(); - PetscInt n = (PetscInt)B->Width(); - - PetscMatShellCtx *ctx = new PetscMatShellCtx; - ctx->Ar = std::move(B); -#if defined(PETSC_USE_COMPLEX) - ctx->Ai = nullptr; - ctx->x.SetSize(2 * n); - ctx->y.SetSize(2 * m); -#else - ctx->x.SetSize(n); - ctx->y.SetSize(m); -#endif - - PalacePetscCall(MatCreateShell(comm, m, n, PETSC_DECIDE, PETSC_DECIDE, (void *)ctx, &A)); - __mat_shell_init(A); -} - -#if defined(PETSC_USE_COMPLEX) -PetscShellMatrix::PetscShellMatrix(MPI_Comm comm, std::unique_ptr &&Br, - std::unique_ptr &&Bi) -{ - MFEM_VERIFY(Br || Bi, "Cannot construct PETSc shell from an empty matrix!"); - MFEM_VERIFY((!Br || !Bi) || (Br->Height() == Bi->Height() && Br->Width() == Bi->Width()), - "Mismatch in dimension of real and imaginary matrix parts!"); - PetscInt m, n; - if (Br) - { - m = (PetscInt)Br->Height(); - n = (PetscInt)Br->Width(); - } - else - { - m = (PetscInt)Bi->Height(); - n = (PetscInt)Bi->Width(); - } - - PetscMatShellCtx *ctx = new PetscMatShellCtx; - ctx->Ar = std::move(Br); - ctx->Ai = std::move(Bi); - ctx->x.SetSize(2 * n); - ctx->y.SetSize(2 * m); - - PalacePetscCall(MatCreateShell(comm, m, n, PETSC_DECIDE, PETSC_DECIDE, (void *)ctx, &A)); - __mat_shell_init(A); -} -#endif - -PetscMatShellCtx *PetscShellMatrix::GetContext() const -{ - PetscMatShellCtx *ctx; - PalacePetscCall(MatShellGetContext(A, (void **)&ctx)); - MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); - return ctx; -} - -PetscInt PetscShellMatrix::NNZ() const -{ - HYPRE_BigInt nnz; - PetscMatShellCtx *ctx = GetContext(); -#if defined(PETSC_USE_COMPLEX) - MFEM_VERIFY(!(ctx->Ar && ctx->Ai), "Use NNZReal/NNZImag methods for complex matrices!"); - nnz = (ctx->Ar) ? dynamic_cast(*ctx->Ar).NNZ() - : ((ctx->Ai) ? dynamic_cast(*ctx->Ai).NNZ() : 0); -#else - nnz = (ctx->Ar) ? dynamic_cast(*ctx->Ar).NNZ() : 0; -#endif - return (PetscInt)nnz; -} - -#if defined(PETSC_USE_COMPLEX) -PetscInt PetscShellMatrix::NNZReal() const -{ - HYPRE_BigInt nnz; - PetscMatShellCtx *ctx = GetContext(); - nnz = (ctx->Ar) ? dynamic_cast(*ctx->Ar).NNZ() : 0; - return (PetscInt)nnz; -} - -PetscInt PetscShellMatrix::NNZImag() const -{ - HYPRE_BigInt nnz; - PetscMatShellCtx *ctx = GetContext(); - nnz = (ctx->Ai) ? dynamic_cast(*ctx->Ai).NNZ() : 0; - return (PetscInt)nnz; -} -#endif - -#if defined(PETSC_USE_COMPLEX) -PetscReal PetscShellMatrix::NormFReal() const -{ - HYPRE_Real norm; - PetscMatShellCtx *ctx = GetContext(); - norm = (ctx->Ar) ? hypre_ParCSRMatrixFnorm(dynamic_cast(*ctx->Ar)) - : 0.0; - return norm; -} - -PetscReal PetscShellMatrix::NormFImag() const -{ - HYPRE_Real norm; - PetscMatShellCtx *ctx = GetContext(); - norm = (ctx->Ai) ? hypre_ParCSRMatrixFnorm(dynamic_cast(*ctx->Ai)) - : 0.0; - return norm; -} - -PetscReal PetscShellMatrix::NormInfReal() const -{ - HYPRE_Real norm; - PetscMatShellCtx *ctx = GetContext(); - if (ctx->Ar) - { - hypre_ParCSRMatrixInfNorm(dynamic_cast(*ctx->Ar), &norm); - } - else - { - norm = 0.0; - } - return norm; -} - -PetscReal PetscShellMatrix::NormInfImag() const -{ - HYPRE_Real norm; - PetscMatShellCtx *ctx = GetContext(); - if (ctx->Ai) - { - hypre_ParCSRMatrixInfNorm(dynamic_cast(*ctx->Ai), &norm); - } - else - { - norm = 0.0; - } - return norm; -} -#endif - -#if defined(PETSC_USE_COMPLEX) -void PetscShellMatrix::Mult(const mfem::Vector &x, PetscParVector &y) const -{ - MFEM_VERIFY(x.Size() == GetNumCols() && y.GetSize() == GetNumRows(), - "Incorrect vector sizes for matrix-vector product!"); - __mat_shell_apply(A, x, y); -} - -void PetscShellMatrix::MultTranspose(const mfem::Vector &x, PetscParVector &y) const -{ - MFEM_VERIFY(x.Size() == GetNumCols() && y.GetSize() == GetNumRows(), - "Incorrect vector sizes for matrix-vector product!"); - __mat_shell_apply_transpose(A, x, y); -} - -void PetscShellMatrix::MultHermitianTranspose(const mfem::Vector &x, - PetscParVector &y) const -{ - MFEM_VERIFY(x.Size() == GetNumCols() && y.GetSize() == GetNumRows(), - "Incorrect vector sizes for matrix-vector product!"); - __mat_shell_apply_hermitian_transpose(A, x, y); -} -#endif - -void PetscShellMatrix::Print(const char *fname, bool binary) const -{ - MFEM_VERIFY( - fname && !binary, - "PetscShellMatrix::Print only works with a specified filename and binary = false!") - PetscMatShellCtx *ctx = GetContext(); -#if defined(PETSC_USE_COMPLEX) - MFEM_VERIFY(!(ctx->Ar && ctx->Ai), - "Use PrintReal/PrintImag methods for complex matrices!"); - if (ctx->Ar) - { - dynamic_cast(*ctx->Ar).Print(fname); - } - else if (ctx->Ai) - { - dynamic_cast(*ctx->Ai).Print(fname); - } -#else - if (ctx->Ar) - { - dynamic_cast(*ctx->Ar).Print(fname); - } -#endif -} - -#if defined(PETSC_USE_COMPLEX) -void PetscShellMatrix::PrintReal(const char *fname) const -{ - PetscMatShellCtx *ctx = GetContext(); - if (ctx->Ar) - { - dynamic_cast(*ctx->Ar).Print(fname); - } -} - -void PetscShellMatrix::PrintImag(const char *fname) const -{ - PetscMatShellCtx *ctx = GetContext(); - if (ctx->Ai) - { - dynamic_cast(*ctx->Ai).Print(fname); - } -} -#endif - -#if defined(PETSC_USE_COMPLEX) -bool PetscShellMatrix::HasReal() const -{ - PetscMatShellCtx *ctx = GetContext(); - return (ctx->Ar != nullptr); -} - -bool PetscShellMatrix::HasImag() const -{ - PetscMatShellCtx *ctx = GetContext(); - return (ctx->Ai != nullptr); -} -#endif - -const mfem::Operator * -#if defined(PETSC_USE_COMPLEX) -PetscShellMatrix::GetOperator(PetscParMatrix::ExtractStructure struc) const -#else -PetscShellMatrix::GetOperator() const -#endif -{ - PetscMatShellCtx *ctx = GetContext(); -#if defined(PETSC_USE_COMPLEX) - if (struc == ExtractStructure::REAL) - { - MFEM_VERIFY(ctx->Ar, "Invalid use of GetOperator, no real matrix component defined!"); - return ctx->Ar.get(); - } - else if (struc == ExtractStructure::IMAGINARY) - { - MFEM_VERIFY(ctx->Ai, - "Invalid use of GetOperator, no imaginary matrix component defined!"); - return ctx->Ai.get(); - } - MFEM_ABORT("ExtractStructure::SUM is not implemented for PetscShellMatrix!"); - return nullptr; -#else - MFEM_VERIFY(ctx->Ar, "Invalid use of GetOperator, no matrix defined!"); - return ctx->Ar.get(); -#endif -} - -PetscAijMatrix::PetscAijMatrix(const mfem::Operator &B) -{ - auto hB = dynamic_cast(&B); - MFEM_VERIFY(hB, "PetscAijMatrix constructor requires Operator of type HypreParMatrix!"); - PalacePetscCall(__mat_convert_hypreParCSR_AIJ(*hB, &A)); -} - -#if defined(PETSC_USE_COMPLEX) -PetscAijMatrix::PetscAijMatrix(const mfem::Operator &Br, const mfem::Operator &Bi) -{ - Mat Ai; - auto hBr = dynamic_cast(&Br); - auto hBi = dynamic_cast(&Bi); - MFEM_VERIFY(hBr && hBi, - "PetscAijMatrix constructor requires Operator of type HypreParMatrix!"); - PalacePetscCall(__mat_convert_hypreParCSR_AIJ(*hBr, &A)); - PalacePetscCall(__mat_convert_hypreParCSR_AIJ(*hBi, &Ai)); - PalacePetscCall(MatAXPY(A, PETSC_i, Ai, UNKNOWN_NONZERO_PATTERN)); - PalacePetscCall(MatDestroy(&Ai)); -} -#endif - -PetscDenseMatrix::PetscDenseMatrix(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, - PetscInt N, PetscScalar *data) -{ - PalacePetscCall(MatCreateDense(comm, m, n, M, N, data, &A)); -} - -PetscDenseMatrix::PetscDenseMatrix(PetscInt m, PetscInt n, PetscScalar *data) -{ - PalacePetscCall(MatCreateSeqDense(PETSC_COMM_SELF, m, n, data, &A)); -} - -void PetscDenseMatrix::Resize(PetscInt m, PetscInt n, bool copy) -{ - Mat B; - PetscScalar *Aj, *Bj; - PetscInt m0 = GetNumRows(), n0 = GetNumCols(); - if (m0 == m && n0 == n) - { - return; - } - PalacePetscCall(MatCreateDense(GetComm(), m, n, PETSC_DECIDE, PETSC_DECIDE, nullptr, &B)); - if (copy) - { - for (PetscInt j = 0; j < std::min(n, n0); j++) - { - PalacePetscCall(MatDenseGetColumn(A, j, &Aj)); - PalacePetscCall(MatDenseGetColumn(B, j, &Bj)); - for (PetscInt i = 0; i < std::min(m, m0); i++) - { - Bj[i] = Aj[i]; - } - PalacePetscCall(MatDenseRestoreColumn(A, &Aj)); - PalacePetscCall(MatDenseRestoreColumn(B, &Bj)); - } - } - PalacePetscCall(MatPropagateSymmetryOptions(A, B)); - PalacePetscCall(MatDestroy(&A)); - A = B; -} - -PetscParVector PetscDenseMatrix::GetColumn(PetscInt j) -{ - MFEM_VERIFY(j >= 0 && j < GetGlobalNumCols(), "Column index out of range!"); - Vec v; - PalacePetscCall(MatDenseGetColumnVec(A, j, &v)); - return PetscParVector(v, true); -} - -const PetscParVector PetscDenseMatrix::GetColumnRead(PetscInt j) const -{ - MFEM_VERIFY(j >= 0 && j < GetGlobalNumCols(), "Column index out of range!"); - Vec v; - PalacePetscCall(MatDenseGetColumnVecRead(A, j, &v)); - return PetscParVector(v, true); -} - -void PetscDenseMatrix::RestoreColumn(PetscInt j, PetscParVector &v) -{ - MFEM_VERIFY(j >= 0 && j < GetGlobalNumCols(), "Column index out of range!"); - Vec u = v; - PalacePetscCall(MatDenseRestoreColumnVec(A, j, &u)); -} - -void PetscDenseMatrix::RestoreColumnRead(PetscInt j, const PetscParVector &v) const -{ - MFEM_VERIFY(j >= 0 && j < GetGlobalNumCols(), "Column index out of range!"); - Vec u = v; - PalacePetscCall(MatDenseRestoreColumnVecRead(A, j, &u)); -} - -PetscScalar *PetscDenseMatrix::GetArray() -{ - PetscScalar *data; - PalacePetscCall(MatDenseGetArray(A, &data)); - return data; -} - -const PetscScalar *PetscDenseMatrix::GetArrayRead() const -{ - const PetscScalar *data; - PalacePetscCall(MatDenseGetArrayRead(A, &data)); - return data; -} - -void PetscDenseMatrix::RestoreArray(PetscScalar *data) -{ - PalacePetscCall(MatDenseRestoreArray(A, &data)); -} - -void PetscDenseMatrix::RestoreArrayRead(const PetscScalar *data) const -{ - PalacePetscCall(MatDenseRestoreArrayRead(A, &data)); -} - -void PetscDenseMatrix::SetRandom(PetscInt start, PetscInt end) -{ - PetscRandom rand; - MPI_Comm comm = GetComm(); - PalacePetscCall(PetscRandomCreate(comm, &rand)); -#if defined(PETSC_USE_COMPLEX) - PalacePetscCall(PetscRandomSetInterval(rand, -1.0 - PETSC_i, 1.0 + PETSC_i)); -#else - PalacePetscCall(PetscRandomSetInterval(rand, -1.0, 1.0)); -#endif - if (start < 0) - { - start = 0; - } - if (end < 0) - { - end = GetGlobalNumCols(); - } - MFEM_VERIFY(0 <= start && start <= end && end <= GetGlobalNumCols(), - "Invalid start/end columns for SetRandom!"); - for (PetscInt j = start; j < end; j++) - { - PetscParVector v = GetColumn(j); - PalacePetscCall(VecSetRandom(v, rand)); - RestoreColumn(j, v); - } - PalacePetscCall(PetscRandomDestroy(&rand)); -} - -#if defined(PETSC_USE_COMPLEX) -void PetscDenseMatrix::SetRandomReal(PetscInt start, PetscInt end) -{ - PetscRandom rand; - MPI_Comm comm = GetComm(); - PalacePetscCall(PetscRandomCreate(comm, &rand)); - PalacePetscCall(PetscRandomSetInterval(rand, -1.0, 1.0)); - if (start < 0) - { - start = 0; - } - if (end < 0) - { - end = GetGlobalNumCols(); - } - MFEM_VERIFY(0 <= start && start <= end && end <= GetGlobalNumCols(), - "Invalid start/end columns for SetRandom!"); - for (PetscInt j = start; j < end; j++) - { - PetscParVector v = GetColumn(j); - PalacePetscCall(VecSetRandom(v, rand)); - RestoreColumn(j, v); - } - PalacePetscCall(PetscRandomDestroy(&rand)); -} -#endif - -void PetscDenseMatrix::SetRandomSign(PetscInt start, PetscInt end, bool init) -{ - if (start < 0) - { - start = 0; - } - if (end < 0) - { - end = GetGlobalNumCols(); - } - MFEM_VERIFY(0 <= start && start <= end && end <= GetGlobalNumCols(), - "Invalid start/end columns for SetRandom!"); - if (!init) - { - SetRandomReal(start, end); - } - for (PetscInt j = start; j < end; j++) - { - PetscParVector v = GetColumn(j); - v.SetRandomSign(true); - RestoreColumn(j, v); - } -} - -PetscReal PetscDenseMatrix::OrthonormalizeColumn(PetscInt j, bool mgs, bool cgs2) -{ - auto Dot = [](const PetscParVector &v, const PetscParVector &w) -> PetscScalar - { return v.Dot(w); }; - auto VecDot = [](const PetscParVector &v, const PetscParMatrix &A, - PetscParVector &dot) -> void { A.MultHermitianTranspose(v, dot); }; - auto Normalize = [](PetscParVector &v) -> PetscReal { return v.Normalize(); }; - return OrthonormalizeColumnInternal(j, mgs, cgs2, Dot, VecDot, Normalize); -} - -PetscReal PetscDenseMatrix::OrthonormalizeColumn(PetscInt j, bool mgs, bool cgs2, - const PetscParMatrix &B, - PetscParVector &Bv) -{ - MFEM_VERIFY(Bv.GetSize() == B.GetNumRows(), - "Workspace error for B-matrix orthonormalization!"); - auto Dot = [&B, &Bv](const PetscParVector &v, const PetscParVector &w) -> PetscScalar - { - B.Mult(v, Bv); - return Bv.Dot(w); - }; - auto VecDot = [&B, &Bv](const PetscParVector &v, const PetscParMatrix &A, - PetscParVector &dot) -> void - { - B.Mult(v, Bv); - A.MultHermitianTranspose(Bv, dot); - }; - auto Normalize = [&B, &Bv](PetscParVector &v) -> PetscReal { return v.Normalize(B, Bv); }; - return OrthonormalizeColumnInternal(j, mgs, cgs2, Dot, VecDot, Normalize); -} - -PetscReal PetscDenseMatrix::OrthonormalizeColumnInternal( - PetscInt j, bool mgs, bool cgs2, - const std::function &Dot, - const std::function - &VecDot, - const std::function &Normalize) -{ - MFEM_VERIFY(j >= 0 && j < GetGlobalNumCols(), "Column index out of range!"); - PetscParVector v = GetColumn(j); - if (j > 0) - { - if (mgs) - { - // We can't call GetColumn twice. - PetscScalar *pA = GetArray(); - for (int i = 0; i < j; i++) - { - PetscParVector w(GetComm(), GetNumRows(), PETSC_DECIDE, pA + i * GetNumRows()); - PetscScalar dot = Dot(v, w); - v.AXPY(-dot, w); - } - RestoreArray(pA); - } - else - { - int refine = (cgs2) ? 2 : 1; - PetscScalar *pA = GetArray(); - for (int l = 0; l < refine; l++) - { - PetscDenseMatrix Aj(GetComm(), GetNumRows(), PETSC_DECIDE, PETSC_DECIDE, j, pA); - PetscParVector dot(Aj); - VecDot(v, Aj, dot); - dot.Scale(-1.0); - Aj.MultAdd(dot, v); - } - RestoreArray(pA); - } - } - PetscReal norm = Normalize(v); - MFEM_VERIFY(norm > 0.0, - "Linearly dependent column encountered during vector orthonormalization!"); - RestoreColumn(j, v); - // { - // // Debug - // Mpi::Print(GetComm(), "Orthogonality error (j = {:d}):\n", j); - // for (int ii = 0; ii <= j; ii++) - // { - // PetscParVector vv = GetColumn(ii); - // PetscScalar err = Dot(vv, vv); - // Mpi::Print(GetComm(), " ({:d}, {:d}): {:e}{:+e}i\n", ii, ii, PetscRealPart(err), - // PetscImaginaryPart(err)); - // PetscScalar *pA = GetArray(); - // for (int jj = ii + 1; jj <= j; jj++) - // { - // // We can't call GetColumn twice. - // PetscParVector ww(GetComm(), GetNumRows(), PETSC_DECIDE, pA + jj * GetNumRows()); - // err = Dot(vv, ww); - // Mpi::Print(GetComm(), " ({:d}, {:d}): {:e}{:+e}i\n", ii, jj, PetscRealPart(err), - // PetscImaginaryPart(err)); - // } - // RestoreArray(pA); - // RestoreColumn(ii, vv); - // } - // } - return norm; -} - -void PetscDenseMatrix::MatMult(const PetscDenseMatrix &X, PetscDenseMatrix &Y) const -{ - MFEM_VERIFY(X.GetNumRows() == GetNumCols() && Y.GetNumRows() == GetNumRows(), - "Incorrect matrix sizes for matrix-matrix product!"); - MFEM_VERIFY(Mpi::Size(GetComm()) == 1, - "PetscDenseMatrix::MatMult is only implemented for sequential " - "matrices!"); - const PetscScalar *pA, *pX; - PetscScalar *pY; - PetscInt lda; - PetscBLASInt m, k, n, ldaA, ldaX, ldaY; - PetscScalar One = 1.0, Zero = 0.0; - PetscBLASIntCast(Y.GetNumRows(), &m); - PetscBLASIntCast(Y.GetNumCols(), &n); - PetscBLASIntCast(GetNumCols(), &k); - - PalacePetscCall(MatDenseGetLDA(A, &lda)); - PetscBLASIntCast(lda, &ldaA); - PalacePetscCall(MatDenseGetLDA(X, &lda)); - PetscBLASIntCast(lda, &ldaX); - PalacePetscCall(MatDenseGetLDA(Y, &lda)); - PetscBLASIntCast(lda, &ldaY); - - PalacePetscCall(MatDenseGetArrayRead(A, &pA)); - PalacePetscCall(MatDenseGetArrayRead(X, &pX)); - PalacePetscCall(MatDenseGetArrayWrite(Y, &pY)); - BLASgemm_("N", "N", &m, &n, &k, &One, pA, &ldaA, pX, &ldaX, &Zero, pY, &ldaY); - PalacePetscCall(MatDenseRestoreArrayRead(A, &pA)); - PalacePetscCall(MatDenseRestoreArrayRead(X, &pX)); - PalacePetscCall(MatDenseRestoreArrayWrite(Y, &pY)); -} - -void PetscDenseMatrix::MatMultTranspose(const PetscDenseMatrix &X, - PetscDenseMatrix &Y) const -{ - MFEM_VERIFY(X.GetNumCols() == GetNumCols() && Y.GetNumRows() == GetNumRows(), - "Incorrect matrix sizes for matrix-matrix product!"); - MFEM_VERIFY(Mpi::Size(GetComm()) == 1, - "PetscDenseMatrix::MatMultTranspose is only implemented for " - "sequential matrices!"); - const PetscScalar *pA, *pX; - PetscScalar *pY; - PetscInt lda; - PetscBLASInt m, k, n, ldaA, ldaX, ldaY; - PetscScalar One = 1.0, Zero = 0.0; - PetscBLASIntCast(Y.GetNumRows(), &m); - PetscBLASIntCast(Y.GetNumCols(), &n); - PetscBLASIntCast(GetNumCols(), &k); - - PalacePetscCall(MatDenseGetLDA(A, &lda)); - PetscBLASIntCast(lda, &ldaA); - PalacePetscCall(MatDenseGetLDA(X, &lda)); - PetscBLASIntCast(lda, &ldaX); - PalacePetscCall(MatDenseGetLDA(Y, &lda)); - PetscBLASIntCast(lda, &ldaY); - - PalacePetscCall(MatDenseGetArrayRead(A, &pA)); - PalacePetscCall(MatDenseGetArrayRead(X, &pX)); - PalacePetscCall(MatDenseGetArrayWrite(Y, &pY)); - BLASgemm_("N", "T", &m, &n, &k, &One, pA, &ldaA, pX, &ldaX, &Zero, pY, &ldaY); - PalacePetscCall(MatDenseRestoreArrayRead(A, &pA)); - PalacePetscCall(MatDenseRestoreArrayRead(X, &pX)); - PalacePetscCall(MatDenseRestoreArrayWrite(Y, &pY)); -} - -void PetscDenseMatrix::MatTransposeMult(const PetscDenseMatrix &X, - PetscDenseMatrix &Y) const -{ - MFEM_VERIFY(X.GetNumRows() == GetNumRows() && Y.GetNumRows() == GetNumCols(), - "Incorrect matrix sizes for matrix-matrix product!"); - MFEM_VERIFY(Mpi::Size(GetComm()) == 1, - "PetscDenseMatrix::MatTransposeMult is only implemented for " - "sequential matrices!"); - const PetscScalar *pA, *pX; - PetscScalar *pY; - PetscInt lda; - PetscBLASInt m, k, n, ldaA, ldaX, ldaY; - PetscScalar One = 1.0, Zero = 0.0; - PetscBLASIntCast(Y.GetNumRows(), &m); - PetscBLASIntCast(Y.GetNumCols(), &n); - PetscBLASIntCast(GetNumRows(), &k); - - PalacePetscCall(MatDenseGetLDA(A, &lda)); - PetscBLASIntCast(lda, &ldaA); - PalacePetscCall(MatDenseGetLDA(X, &lda)); - PetscBLASIntCast(lda, &ldaX); - PalacePetscCall(MatDenseGetLDA(Y, &lda)); - PetscBLASIntCast(lda, &ldaY); - - PalacePetscCall(MatDenseGetArrayRead(A, &pA)); - PalacePetscCall(MatDenseGetArrayRead(X, &pX)); - PalacePetscCall(MatDenseGetArrayWrite(Y, &pY)); - BLASgemm_("T", "N", &m, &n, &k, &One, pA, &ldaA, pX, &ldaX, &Zero, pY, &ldaY); - PalacePetscCall(MatDenseRestoreArrayRead(A, &pA)); - PalacePetscCall(MatDenseRestoreArrayRead(X, &pX)); - PalacePetscCall(MatDenseRestoreArrayWrite(Y, &pY)); -} - -} // namespace palace::petsc - -PetscErrorCode __mat_shell_init(Mat A) -{ - PetscFunctionBeginUser; - - PalacePetscCall(MatShellSetManageScalingShifts(A)); - PalacePetscCall(MatShellSetOperation(A, MATOP_DESTROY, (void (*)())__mat_shell_destroy)); - PetscCall(MatShellSetOperation( - A, MATOP_MULT, - (void (*)()) static_cast(&__mat_shell_apply))); - PetscCall( - MatShellSetOperation(A, MATOP_MULT_TRANSPOSE, - (void (*)()) static_cast( - &__mat_shell_apply_transpose))); - PetscCall( - MatShellSetOperation(A, MATOP_MULT_HERMITIAN_TRANSPOSE, - (void (*)()) static_cast( - &__mat_shell_apply_hermitian_transpose))); - PetscCall(MatShellSetOperation( - A, MATOP_MULT_ADD, - (void (*)()) static_cast(&__mat_shell_apply_add))); - PetscCall( - MatShellSetOperation(A, MATOP_MULT_TRANSPOSE_ADD, - (void (*)()) static_cast( - &__mat_shell_apply_transpose_add))); - PetscCall( - MatShellSetOperation(A, MATOP_MULT_HERMITIAN_TRANS_ADD, - (void (*)()) static_cast( - &__mat_shell_apply_hermitian_transpose_add))); - PetscCall( - MatShellSetOperation(A, MATOP_GET_DIAGONAL, (void (*)())__mat_shell_get_diagonal)); - // PetscCall(MatShellSetOperation(A, MATOP_SHIFT, (void (*)())__mat_shell_shift)); - // PetscCall(MatShellSetOperation(A, MATOP_SCALE, (void (*)())__mat_shell_scale)); - // PetscCall(MatShellSetOperation(A, MATOP_CONJUGATE, (void (*)())__mat_shell_conj)); - // PetscCall(MatShellSetOperation(A, MATOP_AXPY, (void (*)())__mat_shell_axpy)); - // PetscCall(MatShellSetOperation(A, MATOP_NORM, (void (*)())__mat_shell_norm)); - PetscCall(MatShellSetOperation(A, MATOP_REAL_PART, (void (*)())__mat_shell_real_part)); - PetscCall( - MatShellSetOperation(A, MATOP_IMAGINARY_PART, (void (*)())__mat_shell_imag_part)); - PetscCall(MatSetUp(A)); - PetscFunctionReturn(0); -} - -PetscErrorCode __mat_shell_destroy(Mat A) -{ - palace::petsc::PetscMatShellCtx *ctx; - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&ctx)); - MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); - delete ctx; - PetscFunctionReturn(0); -} - -PetscErrorCode __mat_shell_apply_add(Mat A, Vec x, Vec y) -{ - palace::petsc::PetscMatShellCtx *ctx; - palace::petsc::PetscParVector xx(x, true), yy(y, true); - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&ctx)); - MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); -#if defined(PETSC_USE_COMPLEX) - { - mfem::Vector xr, xi, yr, yi; - xr.MakeRef(ctx->x, 0, ctx->x.Size() / 2); - xi.MakeRef(ctx->x, ctx->x.Size() / 2, ctx->x.Size() / 2); - yr.MakeRef(ctx->y, 0, ctx->y.Size() / 2); - yi.MakeRef(ctx->y, ctx->y.Size() / 2, ctx->y.Size() / 2); - xx.GetToVectors(xr, xi); - if (ctx->Ar) - { - ctx->Ar->Mult(xr, yr); - ctx->Ar->Mult(xi, yi); - } - else - { - yr = 0.0; - yi = 0.0; - } - if (ctx->Ai) - { - ctx->Ai->AddMult(xi, yr, -1.0); - ctx->Ai->AddMult(xr, yi, 1.0); - } - yy.AddFromVectors(yr, yi); - } -#else - { - xx.GetToVector(ctx->x); - if (ctx->Ar) - { - ctx->Ar->Mult(ctx->x, ctx->y); - } - else - { - ctx->y = 0.0; - } - yy.AddFromVector(ctx->y); - } -#endif - PetscFunctionReturn(0); -} - -PetscErrorCode __mat_shell_apply(Mat A, Vec x, Vec y) -{ - PetscCall(VecZeroEntries(y)); - return __mat_shell_apply_add(A, x, y); -} - -PetscErrorCode __mat_shell_apply_transpose_add(Mat A, Vec x, Vec y) -{ - palace::petsc::PetscMatShellCtx *ctx; - palace::petsc::PetscParVector xx(x, true), yy(y, true); - PetscBool flg, sym; - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&ctx)); - MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); - PetscCall(MatIsSymmetricKnown(A, &flg, &sym)); - if (flg == PETSC_TRUE && sym == PETSC_TRUE) - { - PetscCall(__mat_shell_apply_add(A, x, y)); - PetscFunctionReturn(0); - } -#if defined(PETSC_USE_COMPLEX) - { - mfem::Vector xr, xi, yr, yi; - xr.MakeRef(ctx->y, 0, ctx->y.Size() / 2); - xi.MakeRef(ctx->y, ctx->y.Size() / 2, ctx->y.Size() / 2); - yr.MakeRef(ctx->x, 0, ctx->x.Size() / 2); - yi.MakeRef(ctx->x, ctx->x.Size() / 2, ctx->x.Size() / 2); - xx.GetToVectors(xr, xi); - if (ctx->Ar) - { - ctx->Ar->MultTranspose(xr, yr); - ctx->Ar->MultTranspose(xi, yi); - } - else - { - yr = 0.0; - yi = 0.0; - } - if (ctx->Ai) - { - ctx->Ai->AddMultTranspose(xi, yr, -1.0); - ctx->Ai->AddMultTranspose(xr, yi, 1.0); - } - yy.AddFromVectors(yr, yi); - } -#else - { - xx.GetToVector(ctx->y); - if (ctx->Ar) - { - ctx->Ar->MultTranspose(ctx->y, ctx->x); - } - else - { - ctx->x = 0.0; - } - yy.AddFromVector(ctx->x); - } -#endif - PetscFunctionReturn(0); -} - -PetscErrorCode __mat_shell_apply_transpose(Mat A, Vec x, Vec y) -{ - PetscCall(VecZeroEntries(y)); - return __mat_shell_apply_transpose_add(A, x, y); -} - -PetscErrorCode __mat_shell_apply_hermitian_transpose_add(Mat A, Vec x, Vec y) -{ -#if defined(PETSC_USE_COMPLEX) - palace::petsc::PetscMatShellCtx *ctx; - palace::petsc::PetscParVector xx(x, true), yy(y, true); - PetscBool flg, sym; - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&ctx)); - MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); - PetscCall(MatIsHermitianKnown(A, &flg, &sym)); - if (flg == PETSC_TRUE && sym == PETSC_TRUE) - { - PetscCall(__mat_shell_apply_add(A, x, y)); - PetscFunctionReturn(0); - } - if (!ctx->Ai) - { - PetscCall(__mat_shell_apply_transpose_add(A, x, y)); - PetscFunctionReturn(0); - } - PetscCall(MatIsSymmetricKnown(A, &flg, &sym)); - { - mfem::Vector xr, xi, yr, yi; - xr.MakeRef(ctx->y, 0, ctx->y.Size() / 2); - xi.MakeRef(ctx->y, ctx->y.Size() / 2, ctx->y.Size() / 2); - yr.MakeRef(ctx->x, 0, ctx->x.Size() / 2); - yi.MakeRef(ctx->x, ctx->x.Size() / 2, ctx->x.Size() / 2); - xx.GetToVectors(xr, xi); - if (ctx->Ar) - { - if (flg == PETSC_TRUE && sym == PETSC_TRUE) - { - ctx->Ar->Mult(xr, yr); - ctx->Ar->Mult(xi, yi); - } - else - { - ctx->Ar->MultTranspose(xr, yr); - ctx->Ar->MultTranspose(xi, yi); - } - } - else - { - yr = 0.0; - yi = 0.0; - } - if (ctx->Ai) - { - if (flg == PETSC_TRUE && sym == PETSC_TRUE) - { - ctx->Ai->AddMult(xi, yr, 1.0); - ctx->Ai->AddMult(xr, yi, -1.0); - } - else - { - ctx->Ai->AddMultTranspose(xi, yr, 1.0); - ctx->Ai->AddMultTranspose(xr, yi, -1.0); - } - } - yy.AddFromVectors(yr, yi); - } -#else - PetscCall(__mat_shell_apply_transpose_add(A, x, y)); -#endif - PetscFunctionReturn(0); -} - -PetscErrorCode __mat_shell_apply_hermitian_transpose(Mat A, Vec x, Vec y) -{ - PetscCall(VecZeroEntries(y)); - return __mat_shell_apply_hermitian_transpose_add(A, x, y); -} - -#if defined(PETSC_USE_COMPLEX) -PetscErrorCode __mat_shell_apply(Mat A, const mfem::Vector &x, Vec y) -{ - palace::petsc::PetscMatShellCtx *ctx; - palace::petsc::PetscParVector yy(y, true); - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&ctx)); - MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); - { - mfem::Vector yr, yi; - yr.MakeRef(ctx->y, 0, ctx->y.Size() / 2); - yi.MakeRef(ctx->y, ctx->y.Size() / 2, ctx->y.Size() / 2); - if (ctx->Ar) - { - ctx->Ar->Mult(x, yr); - } - else - { - yr = 0.0; - } - if (ctx->Ai) - { - ctx->Ai->Mult(x, yi); - } - else - { - yi = 0.0; - } - yy.SetFromVectors(yr, yi); - } - PetscFunctionReturn(0); -} - -PetscErrorCode __mat_shell_apply_transpose(Mat A, const mfem::Vector &x, Vec y) -{ - palace::petsc::PetscMatShellCtx *ctx; - palace::petsc::PetscParVector yy(y, true); - PetscBool flg, sym; - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&ctx)); - MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); - PetscCall(MatIsSymmetricKnown(A, &flg, &sym)); - if (flg == PETSC_TRUE && sym == PETSC_TRUE) - { - PetscCall(__mat_shell_apply(A, x, y)); - PetscFunctionReturn(0); - } - { - mfem::Vector yr, yi; - yr.MakeRef(ctx->x, 0, ctx->x.Size() / 2); - yi.MakeRef(ctx->x, ctx->x.Size() / 2, ctx->x.Size() / 2); - if (ctx->Ar) - { - ctx->Ar->MultTranspose(x, yr); - } - else - { - yr = 0.0; - } - if (ctx->Ai) - { - ctx->Ai->MultTranspose(x, yi); - } - else - { - yi = 0.0; - } - yy.SetFromVectors(yr, yi); - } - PetscFunctionReturn(0); -} - -PetscErrorCode __mat_shell_apply_hermitian_transpose(Mat A, const mfem::Vector &x, Vec y) -{ - palace::petsc::PetscMatShellCtx *ctx; - palace::petsc::PetscParVector yy(y, true); - PetscBool flg, sym; - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&ctx)); - MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); - PetscCall(MatIsHermitianKnown(A, &flg, &sym)); - if (flg == PETSC_TRUE && sym == PETSC_TRUE) - { - PetscCall(__mat_shell_apply(A, x, y)); - PetscFunctionReturn(0); - } - if (!ctx->Ai) - { - PetscCall(__mat_shell_apply_transpose(A, x, y)); - PetscFunctionReturn(0); - } - { - mfem::Vector yr, yi; - yr.MakeRef(ctx->x, 0, ctx->x.Size() / 2); - yi.MakeRef(ctx->x, ctx->x.Size() / 2, ctx->x.Size() / 2); - PetscCall(MatIsSymmetricKnown(A, &flg, &sym)); - if (ctx->Ar) - { - if (flg == PETSC_TRUE && sym == PETSC_TRUE) - { - ctx->Ar->Mult(x, yr); - } - else - { - ctx->Ar->MultTranspose(x, yr); - } - } - else - { - yr = 0.0; - } - if (ctx->Ai) - { - if (flg == PETSC_TRUE && sym == PETSC_TRUE) - { - ctx->Ai->Mult(x, yi); - } - else - { - ctx->Ai->MultTranspose(x, yi); - } - yi.Neg(); - } - else - { - yi = 0.0; - } - yy.SetFromVectors(yr, yi); - } - PetscFunctionReturn(0); -} -#endif - -PetscErrorCode __mat_shell_get_diagonal(Mat A, Vec diag) -{ - palace::petsc::PetscMatShellCtx *ctx; - palace::petsc::PetscParVector ddiag(diag, true); - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(A, (void **)&ctx)); - MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); -#if defined(PETSC_USE_COMPLEX) - { - mfem::Vector xr, xi; - xr.MakeRef(ctx->x, 0, ctx->x.Size() / 2); - xi.MakeRef(ctx->x, ctx->x.Size() / 2, ctx->x.Size() / 2); - if (ctx->Ar) - { - ctx->Ar->AssembleDiagonal(xr); - } - else - { - xr = 0.0; - } - if (ctx->Ai) - { - ctx->Ai->AssembleDiagonal(xi); - } - else - { - xi = 0.0; - } - ddiag.SetFromVectors(xr, xi); - } -#else - { - if (ctx->Ar) - { - ctx->Ar->AssembleDiagonal(ctx->x); - } - else - { - ctx->x = 0.0; - } - ddiag.SetFromVector(ctx->x); - } -#endif - PetscFunctionReturn(0); -} - -// PetscErrorCode __mat_shell_shift(Mat Y, PetscScalar a) -// { -// palace::petsc::PetscMatShellCtx *ctx; -// HYPRE_Real as; -// PetscFunctionBeginUser; - -// PetscCall(MatShellGetContext(Y, (void **)&ctx)); -// MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); -// as = PetscRealPart(a); // Does nothing if not PETSC_USE_COMPLEX -// if (std::abs(as) > 0.0) -// { -// auto hAr = dynamic_cast(ctx->Ar.get()); -// MFEM_VERIFY(hAr, "Invalid real shift with no real matrix part!"); -// int n = hAr->Height(); -// const hypre_ParCSRMatrix *A = *hAr; -// const HYPRE_Int *A_diag_i = A->diag->i; -// HYPRE_Real *A_diag_d = A->diag->data; -// for (int j = 0; j < n; j++) -// { -// A_diag_d[A_diag_i[j]] += as; -// } -// } -// #if defined(PETSC_USE_COMPLEX) -// as = PetscImaginaryPart(a); -// if (std::abs(as) > 0.0) -// { -// auto hAi = dynamic_cast(ctx->Ai.get()); -// MFEM_VERIFY(hAi, "Invalid imaginary shift with no imaginary matrix part!"); -// int n = hAi->Height(); -// const hypre_ParCSRMatrix *A = *hAi; -// const HYPRE_Int *A_diag_i = A->diag->i; -// HYPRE_Real *A_diag_d = A->diag->data; -// for (int j = 0; j < n; j++) -// { -// A_diag_d[A_diag_i[j]] += as; -// } -// } -// #endif -// PetscFunctionReturn(0); -// } - -// PetscErrorCode __mat_shell_scale(Mat Y, PetscScalar a) -// { -// palace::petsc::PetscMatShellCtx *ctx; -// PetscFunctionBeginUser; - -// PetscCall(MatShellGetContext(Y, (void **)&ctx)); -// MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); -// if (a == 0.0) -// { -// ctx->Ar.reset(); -// #if defined(PETSC_USE_COMPLEX) -// ctx->Ai.reset(); -// #endif -// } -// else -// { -// #if defined(PETSC_USE_COMPLEX) -// HYPRE_Real ar, ai; -// ar = PetscRealPart(a); -// ai = PetscImaginaryPart(a); -// if (std::abs(ar) > 0.0 && std::abs(ai) == 0.0) -// { -// if (ctx->Ar) -// { -// *ctx->Ar *= ar; -// } -// if (ctx->Ai) -// { -// *ctx->Ai *= ar; -// } -// } -// else if (std::abs(ai) > 0.0 && std::abs(ar) == 0.0) -// { -// ctx->Ar.swap(ctx->Ai); -// if (ctx->Ar) -// { -// *ctx->Ar *= -ai; -// } -// if (ctx->Ai) -// { -// *ctx->Ai *= ai; -// } -// } -// else -// { -// // General complex coefficient case. -// mfem::HypreParMatrix *aYr, *aYi; -// if (ctx->Ar && ctx->Ai) -// { -// aYr = mfem::Add(ar, *ctx->Ar, -ai, *ctx->Ai); -// aYi = mfem::Add(ai, *ctx->Ar, ar, *ctx->Ai); -// ctx->Ar.reset(aYr); -// ctx->Ai.reset(aYi); -// } -// else if (!ctx->Ar) -// { -// ctx->Ar = std::make_unique(*ctx->Ai); -// *ctx->Ar *= -ai; -// *ctx->Ai *= ar; -// } -// else // !ctx->Ai -// { -// ctx->Ai = std::make_unique(*ctx->Ar); -// *ctx->Ar *= ar; -// *ctx->Ai *= ai; -// } -// } -// #else -// if (ctx->Ar) -// { -// *ctx->Ar *= a; -// } -// #endif -// } -// PetscFunctionReturn(0); -// } - -// PetscErrorCode __mat_shell_conj(Mat Y) -// { -// palace::petsc::PetscMatShellCtx *ctx; -// PetscFunctionBeginUser; - -// PetscCall(MatShellGetContext(Y, (void **)&ctx)); -// MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); -// #if defined(PETSC_USE_COMPLEX) -// if (ctx->Ai) -// { -// *ctx->Ai *= -1.0; -// } -// #endif -// PetscFunctionReturn(0); -// } - -// PetscErrorCode __mat_shell_axpy(Mat Y, PetscScalar a, Mat X, MatStructure str) -// { -// palace::petsc::PetscMatShellCtx *ctxY, *ctxX; -// #if defined(PETSC_USE_COMPLEX) -// HYPRE_Real ar, ai; -// #endif -// PetscFunctionBeginUser; - -// auto Add = [&str](std::unique_ptr &Y, HYPRE_Real a, -// const std::unique_ptr &X) -// { -// if (Y) -// { -// if (str == SAME_NONZERO_PATTERN) -// { -// Y->Add(a, *X); -// } -// else -// { -// Y.reset(mfem::Add(1.0, *Y, a, *X)); -// } -// } -// else -// { -// Y = std::unique_ptr(*X); -// *Y *= a; -// } -// }; -// PetscCall(MatShellGetContext(Y, (void **)&ctxY)); -// PetscCall(MatShellGetContext(X, (void **)&ctxX)); -// MFEM_VERIFY(ctxY && ctxX, "Invalid PETSc shell matrix contexts!"); -// #if defined(PETSC_USE_COMPLEX) -// ar = PetscRealPart(a); -// ai = PetscImaginaryPart(a); -// if (std::abs(ar) > 0.0) -// { -// if (ctxX->Ar) -// { -// Add(ctxY->Ar, ar, ctxX->Ar); -// } -// if (ctxX->Ai) -// { -// Add(ctxY->Ai, ar, ctxX->Ai); -// } -// } -// else if (std::abs(ai) > 0.0) -// { -// if (ctxX->Ai) -// { -// Add(ctxY->Ar, -ai, ctxX->Ai); -// } -// if (ctxX->Ar) -// { -// Add(ctxY->Ai, ai, ctxX->Ar); -// } -// } -// #else -// if (std::abs(a) > 0.0 && ctxX->Ar) -// { -// Add(ctxY->Ar, a, ctxX->Ar); -// } -// #endif -// PetscFunctionReturn(0); -// } - -// PetscErrorCode __mat_shell_norm(Mat A, NormType type, PetscReal *norm) -// { -// palace::petsc::PetscMatShellCtx *ctx; -// PetscFunctionBeginUser; - -// PetscCall(MatShellGetContext(A, (void **)&ctx)); -// MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); -// switch (type) -// { -// case NORM_FROBENIUS: -// #if defined(PETSC_USE_COMPLEX) -// *norm = std::hypot((ctx->Ar) ? hypre_ParCSRMatrixFnorm(*ctx->Ar) : 0.0, -// (ctx->Ai) ? hypre_ParCSRMatrixFnorm(*ctx->Ai) : 0.0); -// #else -// *norm = (ctx->Ar) ? hypre_ParCSRMatrixFnorm(*ctx->Ar) : 0.0; -// #endif -// break; -// case NORM_INFINITY: // Max absolute row sum -// #if defined(PETSC_USE_COMPLEX) -// if (!ctx->Ar && !ctx->Ai) -// { -// *norm = 0.0; -// } -// else if (ctx->Ar && !ctx->Ai) -// { -// hypre_ParCSRMatrixInfNorm(*ctx->Ar, norm); -// } -// else if (ctx->Ai && !ctx->Ar) -// { -// hypre_ParCSRMatrixInfNorm(*ctx->Ai, norm); -// } -// else -// { -// // Need to consider both real and imaginary parts of the matrix. -// hypre::hypreParCSRInfNorm(*ctx->Ar, *ctx->Ai, norm); -// } -// #else -// if (ctx->Ar) -// { -// hypre_ParCSRMatrixInfNorm(*ctx->Ar, norm); -// } -// else -// { -// *norm = 0.0; -// } -// #endif -// break; -// case NORM_1: // Max absolute column sum (not supported yet) -// MFEM_ABORT("Unsupported matrix norm type!"); -// } -// PetscFunctionReturn(0); -// } - -PetscErrorCode __mat_shell_real_part(Mat Y) -{ - palace::petsc::PetscMatShellCtx *ctx; - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(Y, (void **)&ctx)); - MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); -#if defined(PETSC_USE_COMPLEX) - ctx->Ai.reset(); -#endif - PetscFunctionReturn(0); -} - -PetscErrorCode __mat_shell_imag_part(Mat Y) -{ - palace::petsc::PetscMatShellCtx *ctx; - PetscFunctionBeginUser; - - PetscCall(MatShellGetContext(Y, (void **)&ctx)); - MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context!"); -#if defined(PETSC_USE_COMPLEX) - ctx->Ar = std::move(ctx->Ai); -#endif - PetscFunctionReturn(0); -} - -PetscErrorCode __mat_convert_hypreParCSR_AIJ(hypre_ParCSRMatrix *hA, Mat *pA) -{ - // Same as PETSc's MatConvert_HYPRE_AIJ function with mtype == MATAIJ, reuse == - // MAT_INITIAL_MATRIX, and sameint = true. Slightly modified to allow for using real - // Hypre matrices (double type) to construct a PETSc matrix with general complex entires - // (if PETSC_USE_COMPLEX is enabled). See also MFEM's MatConvert_hypreParCSR_AIJ which is - // a copy of the PETSc version. - hypre_CSRMatrix *hdiag, *hoffd; - MPI_Comm comm; - HYPRE_Int dnnz, onnz, m, n; - PetscScalar *da, *oa, *aptr; - PetscInt *dii, *djj, *oii, *ojj, *iptr; - PetscInt i; - PetscMPIInt size; - PetscBool sameint = (PetscBool)(sizeof(PetscInt) == sizeof(HYPRE_Int)); - PetscFunctionBeginUser; - - comm = hypre_ParCSRMatrixComm(hA); - MPI_Comm_size(comm, &size); - hdiag = hypre_ParCSRMatrixDiag(hA); - hoffd = hypre_ParCSRMatrixOffd(hA); - m = hypre_CSRMatrixNumRows(hdiag); - n = hypre_CSRMatrixNumCols(hdiag); - dnnz = hypre_CSRMatrixNumNonzeros(hdiag); - onnz = hypre_CSRMatrixNumNonzeros(hoffd); - PetscCall(PetscMalloc1(m + 1, &dii)); - PetscCall(PetscMalloc1(dnnz, &djj)); - PetscCall(PetscMalloc1(dnnz, &da)); - // MFEM_VERIFY(sizeof(HYPRE_Int) == sizeof(PetscInt), - // "Index size mismatch inf Hypre-PETSc MatConvert!"); - if (sameint) - { - PetscCall(PetscArraycpy(dii, hypre_CSRMatrixI(hdiag), m + 1)); - PetscCall(PetscArraycpy(djj, hypre_CSRMatrixJ(hdiag), dnnz)); - } - else - { - for (i = 0; i < m + 1; i++) - { - dii[i] = (PetscInt)(hypre_CSRMatrixI(hdiag)[i]); - } - for (i = 0; i < dnnz; i++) - { - djj[i] = (PetscInt)(hypre_CSRMatrixJ(hdiag)[i]); - } - } - // This loop replaces the call to PetscArraycpy to convert HYPRE_Complex to PetscScalar - // values. - for (i = 0; i < dnnz; i++) - { - da[i] = (PetscScalar)(hypre_CSRMatrixData(hdiag)[i]); - } - iptr = djj; - aptr = da; - for (i = 0; i < m; i++) - { - PetscInt nc = dii[i + 1] - dii[i]; - PetscCall(PetscSortIntWithScalarArray(nc, iptr, aptr)); - iptr += nc; - aptr += nc; - } - if (size > 1) - { - HYPRE_BigInt *coffd; - PetscCall(PetscMalloc1(m + 1, &oii)); - PetscCall(PetscMalloc1(onnz, &ojj)); - PetscCall(PetscMalloc1(onnz, &oa)); - if (sameint) - { - PetscCall(PetscArraycpy(oii, hypre_CSRMatrixI(hoffd), m + 1)); - } - else - { - for (i = 0; i < m + 1; i++) - { - oii[i] = (PetscInt)(hypre_CSRMatrixI(hoffd)[i]); - } - } - coffd = hypre_ParCSRMatrixColMapOffd(hA); - for (i = 0; i < onnz; i++) - { - ojj[i] = (PetscInt)coffd[hypre_CSRMatrixJ(hoffd)[i]]; - } - for (i = 0; i < onnz; i++) - { - oa[i] = (PetscScalar)(hypre_CSRMatrixData(hoffd)[i]); - } - iptr = ojj; - aptr = oa; - for (i = 0; i < m; i++) - { - PetscInt nc = oii[i + 1] - oii[i]; - PetscCall(PetscSortIntWithScalarArray(nc, iptr, aptr)); - iptr += nc; - aptr += nc; - } - PetscCall(MatCreateMPIAIJWithSplitArrays(comm, m, n, PETSC_DECIDE, PETSC_DECIDE, dii, - djj, da, oii, ojj, oa, pA)); - } - else - { - oii = ojj = nullptr; - oa = nullptr; - PetscCall(MatCreateSeqAIJWithArrays(comm, m, n, dii, djj, da, pA)); - } - // We are responsible to free the CSR arrays. However, since we can take references of a - // PetscParMatrix but we cannot take reference of PETSc arrays, we need to create a - // PetscContainer object to take reference of these arrays in reference objects. - void *ptrs[6] = {dii, djj, da, oii, ojj, oa}; - const char *names[6] = {"_csr_dii", "_csr_djj", "_csr_da", - "_csr_oii", "_csr_ojj", "_csr_oa"}; - for (i = 0; i < 6; i++) - { - PetscContainer c; - PetscCall(PetscContainerCreate(comm, &c)); - PetscCall(PetscContainerSetPointer(c, ptrs[i])); - PetscCall(PetscContainerSetUserDestroy(c, __array_container_destroy)); - PetscCall(PetscObjectCompose(reinterpret_cast(*pA), names[i], - reinterpret_cast(c))); - PetscCall(PetscContainerDestroy(&c)); - } - PetscFunctionReturn(0); -} - -PetscErrorCode __array_container_destroy(void *ptr) -{ - PetscFunctionBeginUser; - - PetscCall(PetscFree(ptr)); - PetscFunctionReturn(0); -} diff --git a/palace/linalg/petsc.hpp b/palace/linalg/petsc.hpp index fb9eddf2d..b62f5e3fb 100644 --- a/palace/linalg/petsc.hpp +++ b/palace/linalg/petsc.hpp @@ -4,16 +4,18 @@ #ifndef PALACE_LINALG_PETSC_HPP #define PALACE_LINALG_PETSC_HPP +#if defined(PALACE_WITH_SLEPC) + #include #include +#if !defined(PETSC_USE_REAL_DOUBLE) +#error "PETSc should be compiled with double precision!" +#endif #if defined(PETSC_HAVE_HYPRE) #error \ "PETSc should be built without Hypre to avoid conflicts with MFEM's Hypre dependency!" #endif -#if !defined(PETSC_USE_REAL_DOUBLE) -#error "PETSc should be compiled with double precision!" -#endif #if defined(PETSC_USE_64BIT_INDICES) && !(defined(HYPRE_BIGINT) || defined(HYPRE_MIXEDINT)) #warning "Mismatch between big HYPRE (32bit) and PETSc (64bit) integer types!" #endif @@ -21,592 +23,14 @@ #warning "Mismatch between big HYPRE (64bit) and PETSc (32bit) integer types!" #endif -#include -#include - // Forward declarations of PETSc objects. -typedef struct _p_PetscSF *VecScatter; typedef struct _p_Vec *Vec; typedef struct _p_Mat *Mat; -typedef struct _p_KSP *KSP; -typedef struct _p_PC *PC; // Error handling similar to Petsc's PetscCallAbort but always aborts on the global // PETSC_COMM_WORLD communicator. #define PalacePetscCall(...) PetscCallAbort(PETSC_COMM_WORLD, __VA_ARGS__) -namespace palace::petsc -{ - -// -// A minimal implementation of MFEM's PETSc wrappers to support PETSc built with complex -// numbers. -// - -class PetscParMatrix; -class PetscParVector; - -// Wrappers for PetscInitialize/PetscFinalize. -void Initialize(int &argc, char **&argv, const char rc_file[], const char help[]); -void Finalize(); - -// Wrapper for PETSc's vector scatter class. -class PetscScatter -{ -public: - enum class Type - { - TO_ZERO, - TO_ALL - }; - -private: - // The actual PETSc object. - VecScatter ctx; - -public: - // Creates a scatter context that copies all entries from the parallel vector to either - // all processes or to the root process. Allocates the - PetscScatter(Type type, const PetscParVector &x, std::unique_ptr &y); - - // Calls PETSc's destroy function. - ~PetscScatter(); - - // Routines for forward/reverse scattering. - void Forward(const PetscParVector &x, PetscParVector &y); - void Reverse(const PetscParVector &x, PetscParVector &y); -}; - -// Wrapper for PETSc's vector class. -class PetscParVector -{ -private: - // The actual PETSc object. - Vec x; - -public: - // Creates vector compatible with (i.e. in the domain of) A or Aᵀ. - PetscParVector(const PetscParMatrix &A, bool transpose = false); - - // Parallel and serial copy constructors from MFEM's Vector object. - PetscParVector(MPI_Comm comm, const mfem::Vector &y); - PetscParVector(const mfem::Vector &y); -#if defined(PETSC_USE_COMPLEX) - PetscParVector(MPI_Comm comm, const mfem::Vector &yr, const mfem::Vector &yi); - PetscParVector(const mfem::Vector &yr, const mfem::Vector &yi); -#endif - - // Create a parallel or sequential PETSc vector with the provided dimension. - PetscParVector(MPI_Comm comm, PetscInt n, PetscInt N); - // PetscParVector(PetscInt n); - - // Create a parallel or sequential PETSc vector with a data array. - PetscParVector(MPI_Comm comm, PetscInt n, PetscInt N, PetscScalar *data); - PetscParVector(PetscInt n, PetscScalar *data); - - // Copy constructor, calls VecDuplicate. - PetscParVector(const PetscParVector &y); - - // Constructor which wraps an existing PETSc Vec object and takes over ownership unless - // ref is true. - PetscParVector(Vec y, bool ref); - - // Calls PETSc's destroy function. - virtual ~PetscParVector(); - - // Copy to/from MFEM's Vector type. - void GetToVector(mfem::Vector &v, PetscInt start = -1, PetscInt end = -1) const; - void SetFromVector(const mfem::Vector &v); - void AddFromVector(const mfem::Vector &v); -#if defined(PETSC_USE_COMPLEX) - void GetToVectors(mfem::Vector &vr, mfem::Vector &vi, PetscInt start = -1, - PetscInt end = -1) const; - void SetFromVectors(const mfem::Vector &vr, const mfem::Vector &vi); - void AddFromVectors(const mfem::Vector &vr, const mfem::Vector &vi); -#endif - - // Access the data array of the vector. - PetscScalar *GetArray(); - const PetscScalar *GetArrayRead() const; - void RestoreArray(PetscScalar *data); - void RestoreArrayRead(const PetscScalar *data) const; - - // Temporarily replace the data array of the vector. - void PlaceArray(const PetscScalar *data); - void ResetArray(); - - // Copy entries of y to x. - void Copy(const PetscParVector &y); - - // Returns the local vector size. - PetscInt GetSize() const; - - // Returns the global vector size. - PetscInt GetGlobalSize() const; - - // Set the (local) vector dimension to n, copying previous contents to the upper block. - void Resize(PetscInt n, bool copy = false); - - // Zero all entries of the vector. - void SetZero(); - - // Sets all entries of the vector to random numbers sampled from the range[-1-i, 1+i], or - // [-1, 1]. - void SetRandom(); -#if defined(PETSC_USE_COMPLEX) - void SetRandomReal(); -#else - void SetRandomReal() { SetRandom(); } #endif - void SetRandomSign(bool init = false); - - // Set all entries to s. - PetscParVector &operator=(PetscScalar s); - - // Scale all entries by s. - void Scale(PetscScalar s); - - // Shift all entries by +s. - void Shift(PetscScalar s); - - // Compute pointwise |x|. - void Abs(); - - // Compute pointwise sqrt(|x|). - void SqrtAbs(); - - // Compute pointwise 1/x. - void Inv(); - - // Compute pointwise 1/sqrt(x). - void InvSqrt(); - -#if defined(PETSC_USE_COMPLEX) - // Replace entries with complex conjugate. - void Conj(); - - // Zero the imaginary part of the vector. - void GetRealPart(); - - // Move the imaginary part to the real part of the vector. - void GetImagPart(); -#endif - - // Normalize the vector. - PetscReal Normalize(); - PetscReal Normalize(const PetscParMatrix &B, PetscParVector &Bv); - - // Calculate the vector 2-norm. - PetscReal Norml2() const; - - // Calculate the vector infinity-norm. - PetscReal Normlinf() const; - - // Zero specified (local) rows of the vector. - void ZeroRows(const mfem::Array &rows); - - // Pointwise multiplication x *= y. - void PointwiseMult(const PetscParVector &y, bool replace_zeros); - - // In-place addition x += alpha * y. - void AXPY(PetscScalar alpha, const PetscParVector &y); - - // In-place addition x = alpha * y + beta * x. - void AXPBY(PetscScalar alpha, const PetscParVector &y, PetscScalar beta); - - // In-place addition x = alpha * y + beta * z + gamma * x. - void AXPBYPCZ(PetscScalar alpha, const PetscParVector &y, PetscScalar beta, - const PetscParVector &z, PetscScalar gamma); - - // Vector dot product (yᴴ x) or indefinite dot product (yᵀ x) for complex vectors. - PetscScalar Dot(const PetscParVector &y) const; - PetscScalar TransposeDot(const PetscParVector &y) const; - - // Prints the vector (to stdout if fname is nullptr). - void Print(const char *fname = nullptr, bool binary = false) const; - - // Get the associated MPI communicator. - MPI_Comm GetComm() const; - - // Typecasting to PETSc's Vec type. - operator Vec() const { return x; } - - // Typecasting to PETSc object. - operator PetscObject() const { return reinterpret_cast(x); } -}; - -// Base wrapper for PETSc's matrix class. -class PetscParMatrix -{ -public: - enum class NNZStructure - { - DIFFERENT, - SAME, - SUBSET - }; - -#if defined(PETSC_USE_COMPLEX) - enum class ExtractStructure - { - REAL, - IMAGINARY, - SUM - }; -#endif - -protected: - // The actual PETSc object. - Mat A; - - // Default constructor for derived classes. - PetscParMatrix() : A(nullptr) {} - -public: - // Copy constructor, calls MatDuplicate. - PetscParMatrix(const PetscParMatrix &B); - - // Constructor which wraps an existing PETSc Mat object and takes over ownership unless - // ref is true. - PetscParMatrix(Mat B, bool ref); - - // Calls PETSc's destroy function. - virtual ~PetscParMatrix(); - - // Get/set symmetric or Hermitian flags for the matrix. When setting the flags, it is - // assumed the structure does not change for the lifetime of the matrix(unless explicitly - // set again). - void SetSymmetric(bool sym = true); - void SetHermitian(bool herm = true); - bool GetSymmetric() const; - bool GetHermitian() const; -#if defined(PETSC_USE_COMPLEX) - void SetRealSymmetric(); -#endif - void CopySymmetry(const PetscParMatrix &B); - - // Returns the local number of rows. - PetscInt GetNumRows() const; - PetscInt Height() const { return GetNumRows(); } - - // Returns the local number of columns. - PetscInt GetNumCols() const; - PetscInt Width() const { return GetNumCols(); } - - // Returns the global number of rows. - PetscInt GetGlobalNumRows() const; - - // Returns the global number of columns. - PetscInt GetGlobalNumCols() const; - - // Returns the number of nonzeros. - virtual PetscInt NNZ() const; -#if defined(PETSC_USE_COMPLEX) - virtual PetscInt NNZReal() const - { - MFEM_ABORT("NNZReal is not supported for base class PetscParMatrix!"); - return 0; - } - virtual PetscInt NNZImag() const - { - MFEM_ABORT("NNZImag is not supported for base class PetscParMatrix!"); - return 0; - } -#endif - - // Calculate matrix Frobenius and infinity norms. - PetscReal NormF() const; - PetscReal NormInf() const; -#if defined(PETSC_USE_COMPLEX) - virtual PetscReal NormFReal() const - { - MFEM_ABORT("NormFReal is not supported for base class PetscParMatrix!"); - return 0.0; - } - virtual PetscReal NormFImag() const - { - MFEM_ABORT("NormFImag is not supported for base class PetscParMatrix!"); - return 0.0; - } - virtual PetscReal NormInfReal() const - { - MFEM_ABORT("NormInfReal is not supported for base class PetscParMatrix!"); - return 0.0; - } - virtual PetscReal NormInfImag() const - { - MFEM_ABORT("NormInfImag is not supported for base class PetscParMatrix!"); - return 0.0; - } -#endif - - // Estimate matrix 2-norm (spectral norm) using power iteration. - PetscReal Norm2(PetscReal tol = PETSC_DEFAULT, PetscInt maxits = PETSC_DEFAULT) const; - - // Scale all entries by s. - void Scale(PetscScalar s); - -#if defined(PETSC_USE_COMPLEX) - // Replace entries with complex conjugate. - void Conj(); - - // Zero the imaginary part of the matrix. - void GetRealPart(); - - // Move the imaginary part to the real part of the matrix. - void GetImagPart(); -#endif - - // In-place addition A += alpha * B. - void AXPY(PetscScalar alpha, const PetscParMatrix &B, NNZStructure struc); - - // Matrix-vector multiplication. - void Mult(const PetscParVector &x, PetscParVector &y) const; - void MultAdd(const PetscParVector &x, PetscParVector &y) const; - void MultTranspose(const PetscParVector &x, PetscParVector &y) const; - void MultTransposeAdd(const PetscParVector &x, PetscParVector &y) const; - void MultHermitianTranspose(const PetscParVector &x, PetscParVector &y) const; - void MultHermitianTransposeAdd(const PetscParVector &x, PetscParVector &y) const; - -#if defined(PETSC_USE_COMPLEX) - // Multiplication with a real-valued vector. - virtual void Mult(const mfem::Vector &x, PetscParVector &y) const; - virtual void MultTranspose(const mfem::Vector &x, PetscParVector &y) const; - virtual void MultHermitianTranspose(const mfem::Vector &x, PetscParVector &y) const; -#endif - - // Prints the matrix (to stdout if fname is nullptr). - virtual void Print(const char *fname = nullptr, bool binary = false) const; -#if defined(PETSC_USE_COMPLEX) - virtual void PrintReal(const char *fname) const - { - MFEM_ABORT("PrintReal is not supported for base class PetscParMatrix!"); - } - virtual void PrintImag(const char *fname) const - { - MFEM_ABORT("PrintImag is not supported for base class PetscParMatrix!"); - } -#endif - - // Returns a (real) MFEM Operator from the underlying shell matrix data. When complex - // scalars are used, the parameter controls which part of the matrix to extract. -#if defined(PETSC_USE_COMPLEX) - virtual const mfem::Operator *GetOperator(ExtractStructure struc) const -#else - virtual const mfem::Operator *GetOperator() const -#endif - { - MFEM_ABORT("GetOperator is not supported for base class PetscParMatrix!"); - return nullptr; - } - - // Test whether or not a shell matrix has a real or imaginary parts. -#if defined(PETSC_USE_COMPLEX) - virtual bool HasReal() const - { - MFEM_ABORT("HasReal is not supported for base class PetscParMatrix!"); - return false; - } - virtual bool HasImag() const - { - MFEM_ABORT("HasImag is not supported for base class PetscParMatrix!"); - return false; - } -#endif - - // Constructs a (real) HypreParMatrix from the PETSc matrix data. When complex scalars - // are used, the parameter controls which part of the matrix to extract. -#if defined(PETSC_USE_COMPLEX) - virtual std::unique_ptr - GetHypreParMatrix(ExtractStructure struc) const; -#else - virtual std::unique_ptr GetHypreParMatrix() const; -#endif - - // Create a submatrix on the same number of processors as the original matrix, - // corresponding to the provided rows and columns which are the selected(local) indices. - virtual std::unique_ptr GetSubMatrix(const mfem::Array &rows, - const mfem::Array &cols); - - // Create a sequential gathered matrix corresponding to the parallel matrix. All processes - // on the original communicator must call this function, but if the argument is false, no - // matrix is created (returned pointer is nullptr). - virtual std::unique_ptr GetSequentialMatrix(bool create); - - // Get the associated MPI communicator. - MPI_Comm GetComm() const; - - // Typecasting to PETSc's Mat type. - operator Mat() const { return A; } - - // Typecasting to PETSc object. - operator PetscObject() const { return reinterpret_cast(A); } -}; - -// Context data for PETSc shell matrices. These store complex matrices as -// Ar + i Ai and perform matrix-vector products. -struct PetscMatShellCtx -{ - std::unique_ptr Ar; - mfem::Vector x, y; -#if defined(PETSC_USE_COMPLEX) - std::unique_ptr Ai; -#endif -}; - -// Wrapper for PETSc's MATSHELL matrix class. -class PetscShellMatrix : public PetscParMatrix -{ -private: - // Returns the shell matrix context. - PetscMatShellCtx *GetContext() const; - -public: - // Create a PETSc shell matrix wrapping an MFEM Operator. Ownership of the operator is - // transfered to the PETSc shell. When PETSc is compiled with complex numbers support, - // the shell matrix wraps the real and imaginary parts to act on complex PETSc Vec - // objects. - PetscShellMatrix(MPI_Comm comm, std::unique_ptr &&B); -#if defined(PETSC_USE_COMPLEX) - PetscShellMatrix(MPI_Comm comm, std::unique_ptr &&Br, - std::unique_ptr &&Bi); -#endif - - // Returns the number of nonzeros. - PetscInt NNZ() const override; -#if defined(PETSC_USE_COMPLEX) - PetscInt NNZReal() const override; - PetscInt NNZImag() const override; -#endif - - // Calculate matrix Frobenius and infinity norms. -#if defined(PETSC_USE_COMPLEX) - PetscReal NormFReal() const override; - PetscReal NormFImag() const override; - PetscReal NormInfReal() const override; - PetscReal NormInfImag() const override; -#endif - -#if defined(PETSC_USE_COMPLEX) - // Multiplication with a real-valued vector. - void Mult(const mfem::Vector &x, PetscParVector &y) const override; - void MultTranspose(const mfem::Vector &x, PetscParVector &y) const override; - void MultHermitianTranspose(const mfem::Vector &x, PetscParVector &y) const override; -#endif - - // Prints the locally owned matrix rows in parallel. - void Print(const char *fname = nullptr, bool binary = false) const override; -#if defined(PETSC_USE_COMPLEX) - void PrintReal(const char *fname) const override; - void PrintImag(const char *fname) const override; -#endif - - // Test whether or not a shell matrix has a real or imaginary parts. -#if defined(PETSC_USE_COMPLEX) - bool HasReal() const override; - bool HasImag() const override; -#endif - - // Returns a (real) MFEM Operator from the underlying shell matrix data. When complex - // scalars are used, the parameter controls which part of the matrix to extract. -#if defined(PETSC_USE_COMPLEX) - const mfem::Operator *GetOperator(ExtractStructure struc) const override; -#else - const mfem::Operator *GetOperator() const override; -#endif - - // These methods are not supported for MATSHELL. -#if defined(PETSC_USE_COMPLEX) - std::unique_ptr - GetHypreParMatrix(ExtractStructure struc) const override -#else - std::unique_ptr GetHypreParMatrix() const override -#endif - { - MFEM_ABORT("GetHypreParMatrix is not supported for PetscShellMatrix!"); - return {}; - } - std::unique_ptr GetSubMatrix(const mfem::Array &, - const mfem::Array &) override - { - MFEM_ABORT("GetSubMatrix is not supported for PetscShellMatrix!"); - return {}; - } - std::unique_ptr GetSequentialMatrix(bool) override - { - MFEM_ABORT("GetSequentialMatrix is not supported for PetscShellMatrix!"); - return {}; - } -}; - -// Wrapper for PETSc's MATIJ matrix class. -class PetscAijMatrix : public PetscParMatrix -{ -public: - // Create a PETSc matrix explicitly converted from an MFEM Operator. - PetscAijMatrix(const mfem::Operator &B); -#if defined(PETSC_USE_COMPLEX) - PetscAijMatrix(const mfem::Operator &Br, const mfem::Operator &Bi); -#endif -}; - -// Wrapper for PETSc's MATDENSE matrix class. -class PetscDenseMatrix : public PetscParMatrix -{ -private: - // Helper method for column orthonormalization. - PetscReal OrthonormalizeColumnInternal( - PetscInt j, bool mgs, bool cgs2, - const std::function &Dot, - const std::function - &VecDot, - const std::function &Normalize); - -public: - // Create a parallel or sequential PETSc dense matrix. Option to specify an existing data - // array. - PetscDenseMatrix(MPI_Comm comm, PetscInt m, PetscInt n, PetscInt M, PetscInt N, - PetscScalar *data); - PetscDenseMatrix(PetscInt m, PetscInt n, PetscScalar *data); - - // Set the (local) matrix dimensions to m x n, copying previous contents to the upper-left - // block. - void Resize(PetscInt m, PetscInt n, bool copy = false); - - // Access methods for columns of the dense matrix. - PetscParVector GetColumn(PetscInt j); - const PetscParVector GetColumnRead(PetscInt j) const; - void RestoreColumn(PetscInt j, PetscParVector &v); - void RestoreColumnRead(PetscInt j, const PetscParVector &v) const; - - // Access the data array of the dense matrix. - PetscScalar *GetArray(); - const PetscScalar *GetArrayRead() const; - void RestoreArray(PetscScalar *data); - void RestoreArrayRead(const PetscScalar *data) const; - - // Sets all entries of the vector to random numbers sampled from the range[-1-i, 1+i], or - // [-1, 1]. - void SetRandom(PetscInt start = -1, PetscInt end = -1); -#if defined(PETSC_USE_COMPLEX) - void SetRandomReal(PetscInt start = -1, PetscInt end = -1); -#else - void SetRandomReal(PetscInt start = -1, PetscInt end = -1) { SetRandom(start, end); } -#endif - void SetRandomSign(PetscInt start = -1, PetscInt end = -1, bool init = false); - - // Orthonormalize column j of the matrix against the preceeding columns, using classical - // or modified Gram-Schmidt. - PetscReal OrthonormalizeColumn(PetscInt j, bool mgs, bool cgs2); - PetscReal OrthonormalizeColumn(PetscInt j, bool mgs, bool cgs2, const PetscParMatrix &B, - PetscParVector &Bv); - - // Dense matrix-matrix multiplication. - void MatMult(const PetscDenseMatrix &X, PetscDenseMatrix &Y) const; - void MatMultTranspose(const PetscDenseMatrix &X, PetscDenseMatrix &Y) const; - void MatTransposeMult(const PetscDenseMatrix &X, PetscDenseMatrix &Y) const; -}; - -} // namespace palace::petsc #endif // PALACE_LINALG_PETSC_HPP diff --git a/palace/linalg/rap.cpp b/palace/linalg/rap.cpp new file mode 100644 index 000000000..5ca522a16 --- /dev/null +++ b/palace/linalg/rap.cpp @@ -0,0 +1,809 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#include "rap.hpp" + +#include + +namespace palace +{ + +ParOperator::ParOperator(std::unique_ptr &&dA, Operator *pA, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, + bool test_restrict) + : Operator(test_fespace.GetTrueVSize(), trial_fespace.GetTrueVSize()), + data_A(std::move(dA)), A((data_A != nullptr) ? data_A.get() : pA), + trial_fespace(trial_fespace), test_fespace(test_fespace), use_R(test_restrict), + dbc_tdof_list(nullptr), diag_policy(DiagonalPolicy::DIAG_ONE), RAP(nullptr) +{ + MFEM_VERIFY(A, "Cannot construct ParOperator from an empty matrix!"); + lx.SetSize(A->Width()); + ly.SetSize(A->Height()); + ty.SetSize(width); +} + +ParOperator::ParOperator(std::unique_ptr &&A, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, + bool test_restrict) + : ParOperator(std::move(A), nullptr, trial_fespace, test_fespace, test_restrict) +{ +} + +ParOperator::ParOperator(Operator &A, const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, + bool test_restrict) + : ParOperator(nullptr, &A, trial_fespace, test_fespace, test_restrict) +{ +} + +const Operator &ParOperator::LocalOperator() const +{ + MFEM_ASSERT(A, "No local matrix available for ParOperator::LocalOperator!"); + return *A; +} + +Operator &ParOperator::LocalOperator() +{ + MFEM_ASSERT(A, "No local matrix available for ParOperator::LocalOperator!"); + return *A; +} + +void ParOperator::SetEssentialTrueDofs(const mfem::Array &tdof_list, + DiagonalPolicy policy) +{ + MFEM_VERIFY(policy == DiagonalPolicy::DIAG_ONE || policy == DiagonalPolicy::DIAG_ZERO, + "Essential boundary condition true dof elimination for ParOperator supports " + "only DiagonalPolicy::DIAG_ONE or DiagonalPolicy::DIAG_ZERO!"); + MFEM_VERIFY(height == width, "Set essential true dofs for both test and trial spaces " + "for rectangular ParOperator!"); + dbc_tdof_list = &tdof_list; + diag_policy = policy; +} + +void ParOperator::AssembleDiagonal(Vector &diag) const +{ + // For an AMR mesh, a convergent diagonal is assembled with |P|ᵀ dₗ, where |P| has + // entry-wise absolute values of the conforming prolongation operator. + MFEM_VERIFY(&trial_fespace == &test_fespace, + "Diagonal assembly is only available for square ParOperator!"); + if (auto *bfA = dynamic_cast(A)) + { + if (bfA->HasSpMat()) + { + bfA->SpMat().GetDiag(ly); + } + else if (bfA->HasExt()) + { + bfA->Ext().AssembleDiagonal(ly); + } + else + { + MFEM_ABORT("Unable to assemble the local operator diagonal of BilinearForm!"); + } + } + else if (auto *sA = dynamic_cast(A)) + { + sA->GetDiag(ly); + } + else + { + MFEM_ABORT("ParOperator::AssembleDiagonal requires A as a BilinearForm or " + "SparseMatrix!"); + } + + // Parallel assemble and eliminate essential true dofs. + const Operator *P = test_fespace.GetProlongationMatrix(); + if (const auto *hP = dynamic_cast(P)) + { + hP->AbsMultTranspose(1.0, ly, 0.0, diag); + } + else + { + P->MultTranspose(ly, diag); + } + if (dbc_tdof_list) + { + if (diag_policy == DiagonalPolicy::DIAG_ONE) + { + linalg::SetSubVector(diag, *dbc_tdof_list, 1.0); + } + else if (diag_policy == DiagonalPolicy::DIAG_ZERO) + { + linalg::SetSubVector(diag, *dbc_tdof_list, 0.0); + } + } +} + +mfem::HypreParMatrix &ParOperator::ParallelAssemble() const +{ + if (RAP) + { + return *RAP; + } + + // XX TODO: For mfem::AssemblyLevel::PARTIAL, we cannot use CeedOperatorFullAssemble for + // a ND space with p > 1. We should throw an error here that the user needs to + // use AssemblyLevel::LEGACY in this case. + + // Build the square or rectangular RAP HypreParMatrix. + if (&trial_fespace == &test_fespace) + { + mfem::SparseMatrix *lA; + bool own_lA = false; + if (auto *bfA = dynamic_cast(A)) + { +#ifdef MFEM_USE_CEED + if (bfA->HasSpMat()) + { + lA = &bfA->SpMat(); + } + else if (bfA->HasExt()) + { + lA = mfem::ceed::CeedOperatorFullAssemble(*bfA); + own_lA = true; + } + else + { + MFEM_ABORT("Unable to assemble the local operator for parallel assembly of " + "BilinearForm!"); + lA = nullptr; + } +#else + MFEM_VERIFY(bfA->HasSpMat(), + "Missing assembled SparseMatrix for parallel assembly of BilinearForm!"); + lA = &bfA->SpMat(); +#endif + } + else if (auto *sA = dynamic_cast(A)) + { + lA = sA; + } + else + { + MFEM_ABORT("ParOperator::ParallelAssemble requires A as a BilinearForm or " + "SparseMatrix!"); + lA = nullptr; + } + mfem::HypreParMatrix *hA = + new mfem::HypreParMatrix(trial_fespace.GetComm(), trial_fespace.GlobalVSize(), + trial_fespace.GetDofOffsets(), lA); + const mfem::HypreParMatrix *P = trial_fespace.Dof_TrueDof_Matrix(); + RAP = std::make_unique(hypre_ParCSRMatrixRAP(*P, *hA, *P), true); + delete hA; + if (own_lA) + { + delete lA; + } + } + else + { + mfem::SparseMatrix *lA; + bool own_lA = false; + if (auto *mbfA = dynamic_cast(A)) + { +#ifdef MFEM_USE_CEED + if (mbfA->HasSpMat()) + { + lA = &mbfA->SpMat(); + } + else if (mbfA->HasExt()) + { + lA = mfem::ceed::CeedOperatorFullAssemble(*mbfA); + own_lA = true; + } + else + { + MFEM_ABORT("Unable to assemble the local operator for parallel assembly of " + "MixedBilinearForm!"); + lA = nullptr; + } +#else + MFEM_VERIFY( + mbfA->HasSpMat(), + "Missing assembled SparseMatrix for parallel assembly of MixedBilinearForm!"); + lA = &mbfA->SpMat(); +#endif + } + else if (auto *sA = dynamic_cast(A)) + { + lA = sA; + } + else + { + MFEM_ABORT("ParOperator::ParallelAssemble requires A as a MixedBilinearForm or " + "SparseMatrix!"); + lA = nullptr; + } + mfem::HypreParMatrix *hA = new mfem::HypreParMatrix( + trial_fespace.GetComm(), test_fespace.GlobalVSize(), trial_fespace.GlobalVSize(), + test_fespace.GetDofOffsets(), trial_fespace.GetDofOffsets(), lA); + const mfem::HypreParMatrix *P = trial_fespace.Dof_TrueDof_Matrix(); + if (!use_R) + { + const mfem::HypreParMatrix *Rt = test_fespace.Dof_TrueDof_Matrix(); + RAP = + std::make_unique(hypre_ParCSRMatrixRAP(*Rt, *hA, *P), true); + } + else + { + mfem::SparseMatrix *sRt = mfem::Transpose(*test_fespace.GetRestrictionMatrix()); + mfem::HypreParMatrix *hRt = new mfem::HypreParMatrix( + test_fespace.GetComm(), test_fespace.GlobalVSize(), + test_fespace.GlobalTrueVSize(), test_fespace.GetDofOffsets(), + test_fespace.GetTrueDofOffsets(), sRt); + RAP = std::make_unique(hypre_ParCSRMatrixRAP(*hRt, *hA, *P), + true); + delete sRt; + delete hRt; + } + delete hA; + if (own_lA) + { + delete lA; + } + } + hypre_ParCSRMatrixSetNumNonzeros(*RAP); + + // Eliminate boundary conditions on the assembled (square) matrix. + if (dbc_tdof_list) + { + RAP->EliminateBC(*dbc_tdof_list, diag_policy); + } + return *RAP; +} + +void ParOperator::EliminateRHS(const Vector &x, Vector &b) const +{ + if (!dbc_tdof_list) + { + return; + } + + MFEM_VERIFY(A, "No local matrix available for ParOperator::EliminateRHS!"); + ty = 0.0; + linalg::SetSubVector(ty, *dbc_tdof_list, x); + trial_fespace.GetProlongationMatrix()->Mult(ty, lx); + + // Apply the unconstrained operator. + A->Mult(lx, ly); + + RestrictionMatrixAddMult(ly, b, -1.0); + if (diag_policy == DiagonalPolicy::DIAG_ONE) + { + linalg::SetSubVector(b, *dbc_tdof_list, x); + } + else if (diag_policy == DiagonalPolicy::DIAG_ZERO) + { + linalg::SetSubVector(b, *dbc_tdof_list, 0.0); + } +} + +void ParOperator::Mult(const Vector &x, Vector &y) const +{ + MFEM_ASSERT(x.Size() == width && y.Size() == height, + "Incompatible dimensions for ParOperator::Mult!"); + if (dbc_tdof_list) + { + ty = x; + linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + trial_fespace.GetProlongationMatrix()->Mult(ty, lx); + } + else + { + trial_fespace.GetProlongationMatrix()->Mult(x, lx); + } + + // Apply the operator on the L-vector. + A->Mult(lx, ly); + + RestrictionMatrixMult(ly, y); + if (dbc_tdof_list) + { + if (diag_policy == DiagonalPolicy::DIAG_ONE) + { + linalg::SetSubVector(y, *dbc_tdof_list, x); + } + else if (diag_policy == DiagonalPolicy::DIAG_ZERO) + { + linalg::SetSubVector(y, *dbc_tdof_list, 0.0); + } + } +} + +void ParOperator::AddMult(const Vector &x, Vector &y, const double a) const +{ + MFEM_ASSERT(x.Size() == width && y.Size() == height, + "Incompatible dimensions for ParOperator::AddMult!"); + if (dbc_tdof_list) + { + ty = x; + linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + trial_fespace.GetProlongationMatrix()->Mult(ty, lx); + } + else + { + trial_fespace.GetProlongationMatrix()->Mult(x, lx); + } + + // Apply the operator on the L-vector. + A->Mult(lx, ly); + + if (dbc_tdof_list) + { + RestrictionMatrixMult(ly, ty); + if (diag_policy == DiagonalPolicy::DIAG_ONE) + { + linalg::SetSubVector(ty, *dbc_tdof_list, x); + } + else if (diag_policy == DiagonalPolicy::DIAG_ZERO) + { + linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + } + y.Add(a, ty); + } + else + { + RestrictionMatrixAddMult(ly, y, a); + } +} + +void ParOperator::MultTranspose(const Vector &x, Vector &y) const +{ + MFEM_ASSERT(x.Size() == height && y.Size() == width, + "Incompatible dimensions for ParOperator::MultTranspose!"); + if (dbc_tdof_list) + { + ty = x; + linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + RestrictionMatrixMultTranspose(ty, ly); + } + else + { + RestrictionMatrixMultTranspose(x, ly); + } + + // Apply the operator on the L-vector. + A->MultTranspose(ly, lx); + + trial_fespace.GetProlongationMatrix()->MultTranspose(lx, y); + if (dbc_tdof_list) + { + if (diag_policy == DiagonalPolicy::DIAG_ONE) + { + linalg::SetSubVector(y, *dbc_tdof_list, x); + } + else if (diag_policy == DiagonalPolicy::DIAG_ZERO) + { + linalg::SetSubVector(y, *dbc_tdof_list, 0.0); + } + } +} + +void ParOperator::AddMultTranspose(const Vector &x, Vector &y, const double a) const +{ + MFEM_ASSERT(x.Size() == height && y.Size() == width, + "Incompatible dimensions for ParOperator::AddMultTranspose!"); + if (dbc_tdof_list) + { + ty = x; + linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + RestrictionMatrixMultTranspose(ty, ly); + } + else + { + RestrictionMatrixMultTranspose(x, ly); + } + + // Apply the operator on the L-vector. + A->MultTranspose(ly, lx); + + if (dbc_tdof_list) + { + trial_fespace.GetProlongationMatrix()->MultTranspose(lx, ty); + if (diag_policy == DiagonalPolicy::DIAG_ONE) + { + linalg::SetSubVector(ty, *dbc_tdof_list, x); + } + else if (diag_policy == DiagonalPolicy::DIAG_ZERO) + { + linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + } + y.Add(a, ty); + } + else + { + trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx, y, a); + } +} + +void ParOperator::RestrictionMatrixMult(const Vector &ly, Vector &ty) const +{ + if (!use_R) + { + test_fespace.GetProlongationMatrix()->MultTranspose(ly, ty); + } + else + { + test_fespace.GetRestrictionMatrix()->Mult(ly, ty); + } +} + +void ParOperator::RestrictionMatrixAddMult(const Vector &ly, Vector &ty, + const double a) const +{ + if (!use_R) + { + test_fespace.GetProlongationMatrix()->AddMultTranspose(ly, ty, a); + } + else + { + test_fespace.GetRestrictionMatrix()->AddMult(ly, ty, a); + } +} + +void ParOperator::RestrictionMatrixMultTranspose(const Vector &ty, Vector &ly) const +{ + if (!use_R) + { + test_fespace.GetProlongationMatrix()->Mult(ty, ly); + } + else + { + test_fespace.GetRestrictionMatrix()->MultTranspose(ty, ly); + } +} + +ComplexParOperator::ComplexParOperator(std::unique_ptr &&dAr, + std::unique_ptr &&dAi, Operator *pAr, + Operator *pAi, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, + bool test_restrict) + : ComplexOperator(test_fespace.GetTrueVSize(), trial_fespace.GetTrueVSize()), + data_A((dAr != nullptr || dAi != nullptr) + ? std::make_unique(std::move(dAr), std::move(dAi)) + : std::make_unique(pAr, pAi)), + A(data_A.get()), trial_fespace(trial_fespace), test_fespace(test_fespace), + use_R(test_restrict), dbc_tdof_list(nullptr), + diag_policy(Operator::DiagonalPolicy::DIAG_ONE), + RAPr(A->HasReal() + ? std::make_unique(*A->Real(), trial_fespace, test_fespace, use_R) + : nullptr), + RAPi(A->HasImag() + ? std::make_unique(*A->Imag(), trial_fespace, test_fespace, use_R) + : nullptr) +{ + // We use the non-owning constructors for real and imaginary part ParOperators. We know A + // is a ComplexWrapperOperator which has separate access to the real and imaginary + // components. + lx.SetSize(A->Width()); + ly.SetSize(A->Height()); + ty.SetSize(width); +} + +ComplexParOperator::ComplexParOperator(std::unique_ptr &&Ar, + std::unique_ptr &&Ai, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, + bool test_restrict) + : ComplexParOperator(std::move(Ar), std::move(Ai), nullptr, nullptr, trial_fespace, + test_fespace, test_restrict) +{ +} + +ComplexParOperator::ComplexParOperator(Operator *Ar, Operator *Ai, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, + bool test_restrict) + : ComplexParOperator(nullptr, nullptr, Ar, Ai, trial_fespace, test_fespace, test_restrict) +{ +} + +const ComplexOperator &ComplexParOperator::LocalOperator() const +{ + MFEM_ASSERT(A, "No local matrix available for ComplexParOperator::LocalOperator!"); + return *A; +} + +ComplexOperator &ComplexParOperator::LocalOperator() +{ + MFEM_ASSERT(A, "No local matrix available for ComplexParOperator::LocalOperator!"); + return *A; +} + +void ComplexParOperator::SetEssentialTrueDofs(const mfem::Array &tdof_list, + Operator::DiagonalPolicy policy) +{ + MFEM_VERIFY(policy == Operator::DiagonalPolicy::DIAG_ONE || + policy == Operator::DiagonalPolicy::DIAG_ZERO, + "Essential boundary condition true dof elimination for ComplexParOperator " + "supports only DiagonalPolicy::DIAG_ONE or DiagonalPolicy::DIAG_ZERO!"); + MFEM_VERIFY( + policy != Operator::DiagonalPolicy::DIAG_ONE || RAPr, + "DiagonalPolicy::DIAG_ONE specified for ComplexParOperator with no real part!"); + MFEM_VERIFY(height == width, "Set essential true dofs for both test and trial spaces " + "for rectangular ComplexParOperator!"); + dbc_tdof_list = &tdof_list; + diag_policy = policy; + if (RAPr) + { + RAPr->SetEssentialTrueDofs(tdof_list, policy); + } + if (RAPi) + { + RAPi->SetEssentialTrueDofs(tdof_list, Operator::DiagonalPolicy::DIAG_ZERO); + } +} + +void ComplexParOperator::Mult(const ComplexVector &x, ComplexVector &y) const +{ + MFEM_ASSERT(x.Size() == width && y.Size() == height, + "Incompatible dimensions for ComplexParOperator::Mult!"); + if (dbc_tdof_list) + { + ty = x; + linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + trial_fespace.GetProlongationMatrix()->Mult(ty.Real(), lx.Real()); + trial_fespace.GetProlongationMatrix()->Mult(ty.Imag(), lx.Imag()); + } + else + { + trial_fespace.GetProlongationMatrix()->Mult(x.Real(), lx.Real()); + trial_fespace.GetProlongationMatrix()->Mult(x.Imag(), lx.Imag()); + } + + // Apply the operator on the L-vector. + A->Mult(lx, ly); + + RestrictionMatrixMult(ly, y); + if (dbc_tdof_list) + { + if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) + { + linalg::SetSubVector(y, *dbc_tdof_list, x); + } + else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) + { + linalg::SetSubVector(y, *dbc_tdof_list, 0.0); + } + } +} + +void ComplexParOperator::AddMult(const ComplexVector &x, ComplexVector &y, + const std::complex a) const +{ + MFEM_ASSERT(x.Size() == width && y.Size() == height, + "Incompatible dimensions for ComplexParOperator::AddMult!"); + if (dbc_tdof_list) + { + ty = x; + linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + trial_fespace.GetProlongationMatrix()->Mult(ty.Real(), lx.Real()); + trial_fespace.GetProlongationMatrix()->Mult(ty.Imag(), lx.Imag()); + } + else + { + trial_fespace.GetProlongationMatrix()->Mult(x.Real(), lx.Real()); + trial_fespace.GetProlongationMatrix()->Mult(x.Imag(), lx.Imag()); + } + + // Apply the operator on the L-vector. + ly = 0.0; + A->AddMult(lx, ly, a); + + if (dbc_tdof_list) + { + RestrictionMatrixMult(ly, ty); + if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) + { + linalg::SetSubVector(ty, *dbc_tdof_list, x); + } + else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) + { + linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + } + y += ty; + } + else + { + RestrictionMatrixAddMult(ly, y, 1.0); + } +} + +void ComplexParOperator::MultTranspose(const ComplexVector &x, ComplexVector &y) const +{ + MFEM_ASSERT(x.Size() == height && y.Size() == width, + "Incompatible dimensions for ComplexParOperator::MultTranspose!"); + if (dbc_tdof_list) + { + ty = x; + linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + RestrictionMatrixMultTranspose(ty, ly); + } + else + { + RestrictionMatrixMultTranspose(x, ly); + } + + // Apply the operator on the L-vector. + A->MultTranspose(ly, lx); + + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), y.Real()); + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), y.Imag()); + if (dbc_tdof_list) + { + if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) + { + linalg::SetSubVector(y, *dbc_tdof_list, x); + } + else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) + { + linalg::SetSubVector(y, *dbc_tdof_list, 0.0); + } + } +} + +void ComplexParOperator::AddMultTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a) const +{ + MFEM_ASSERT(x.Size() == height && y.Size() == width, + "Incompatible dimensions for ComplexParOperator::AddMultTranspose!"); + if (dbc_tdof_list) + { + ty = x; + linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + RestrictionMatrixMultTranspose(ty, ly); + } + else + { + RestrictionMatrixMultTranspose(x, ly); + } + + // Apply the operator on the L-vector. + lx = 0.0; + A->AddMultTranspose(ly, lx, a); + + if (dbc_tdof_list) + { + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), ty.Real()); + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), ty.Imag()); + if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) + { + linalg::SetSubVector(ty, *dbc_tdof_list, x); + } + else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) + { + linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + } + y += ty; + } + else + { + trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Real(), y.Real()); + trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Imag(), y.Imag()); + } +} + +void ComplexParOperator::MultHermitianTranspose(const ComplexVector &x, + ComplexVector &y) const +{ + MFEM_ASSERT(x.Size() == height && y.Size() == width, + "Incompatible dimensions for ComplexParOperator::MultHermitianTranspose!"); + if (dbc_tdof_list) + { + ty = x; + linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + RestrictionMatrixMultTranspose(ty, ly); + } + else + { + RestrictionMatrixMultTranspose(x, ly); + } + + // Apply the operator on the L-vector. + A->MultHermitianTranspose(ly, lx); + + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), y.Real()); + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), y.Imag()); + if (dbc_tdof_list) + { + if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) + { + linalg::SetSubVector(y, *dbc_tdof_list, x); + } + else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) + { + linalg::SetSubVector(y, *dbc_tdof_list, 0.0); + } + } +} + +void ComplexParOperator::AddMultHermitianTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a) const +{ + MFEM_ASSERT(x.Size() == height && y.Size() == width, + "Incompatible dimensions for ComplexParOperator::AddMultHermitianTranspose!"); + if (dbc_tdof_list) + { + ty = x; + linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + RestrictionMatrixMultTranspose(ty, ly); + } + else + { + RestrictionMatrixMultTranspose(x, ly); + } + + // Apply the operator on the L-vector. + lx = 0.0; + A->AddMultHermitianTranspose(ly, lx, a); + + if (dbc_tdof_list) + { + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Real(), ty.Real()); + trial_fespace.GetProlongationMatrix()->MultTranspose(lx.Imag(), ty.Imag()); + if (diag_policy == Operator::DiagonalPolicy::DIAG_ONE) + { + linalg::SetSubVector(ty, *dbc_tdof_list, x); + } + else if (diag_policy == Operator::DiagonalPolicy::DIAG_ZERO) + { + linalg::SetSubVector(ty, *dbc_tdof_list, 0.0); + } + y += ty; + } + else + { + trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Real(), y.Real()); + trial_fespace.GetProlongationMatrix()->AddMultTranspose(lx.Imag(), y.Imag()); + } +} + +void ComplexParOperator::RestrictionMatrixMult(const ComplexVector &ly, + ComplexVector &ty) const +{ + if (!use_R) + { + test_fespace.GetProlongationMatrix()->MultTranspose(ly.Real(), ty.Real()); + test_fespace.GetProlongationMatrix()->MultTranspose(ly.Imag(), ty.Imag()); + } + else + { + test_fespace.GetRestrictionMatrix()->Mult(ly.Real(), ty.Real()); + test_fespace.GetRestrictionMatrix()->Mult(ly.Imag(), ty.Imag()); + } +} + +void ComplexParOperator::RestrictionMatrixAddMult(const ComplexVector &ly, + ComplexVector &ty, const double a) const +{ + if (!use_R) + { + test_fespace.GetProlongationMatrix()->AddMultTranspose(ly.Real(), ty.Real(), a); + test_fespace.GetProlongationMatrix()->AddMultTranspose(ly.Imag(), ty.Imag(), a); + } + else + { + test_fespace.GetRestrictionMatrix()->AddMult(ly.Real(), ty.Real(), a); + test_fespace.GetRestrictionMatrix()->AddMult(ly.Imag(), ty.Imag(), a); + } +} + +void ComplexParOperator::RestrictionMatrixMultTranspose(const ComplexVector &ty, + ComplexVector &ly) const +{ + if (!use_R) + { + test_fespace.GetProlongationMatrix()->Mult(ty.Real(), ly.Real()); + test_fespace.GetProlongationMatrix()->Mult(ty.Imag(), ly.Imag()); + } + else + { + test_fespace.GetRestrictionMatrix()->MultTranspose(ty.Real(), ly.Real()); + test_fespace.GetRestrictionMatrix()->MultTranspose(ty.Imag(), ly.Imag()); + } +} + +} // namespace palace diff --git a/palace/linalg/rap.hpp b/palace/linalg/rap.hpp new file mode 100644 index 000000000..62c4b2b62 --- /dev/null +++ b/palace/linalg/rap.hpp @@ -0,0 +1,212 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LINALG_RAP_HPP +#define PALACE_LINALG_RAP_HPP + +#include +#include +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" + +namespace palace +{ + +// +// A parallel operator represented by RAP constructed through the actions of R, A, and P, +// usually with R = Pᵀ, and with possible eliminated essential BC. Here R and P are the +// parallel restriction and prolongation matrices. +// + +// Real-valued RAP operator. +class ParOperator : public Operator +{ +private: + // Storage and access for the local operator. + std::unique_ptr data_A; + Operator *A; + + // Finite element spaces for parallel prolongation and restriction. + const mfem::ParFiniteElementSpace &trial_fespace, &test_fespace; + const bool use_R; + + // Lists of constrained essential boundary true dofs for elimination. + const mfem::Array *dbc_tdof_list; + + // Diagonal policy for constrained true dofs. + DiagonalPolicy diag_policy; + + // Assembled operator as a parallel Hypre matrix. If assembled, the local operator is not + // deleted. + mutable std::unique_ptr RAP; + + // Temporary storage for operator application. + mutable Vector lx, ly, ty; + + // Helper methods for operator application. + void RestrictionMatrixMult(const Vector &ly, Vector &ty) const; + void RestrictionMatrixAddMult(const Vector &ly, Vector &ty, const double a) const; + void RestrictionMatrixMultTranspose(const Vector &ty, Vector &ly) const; + + ParOperator(std::unique_ptr &&dA, Operator *pA, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); + +public: + // Construct the parallel operator, inheriting ownership of the local operator. + ParOperator(std::unique_ptr &&A, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); + ParOperator(std::unique_ptr &&A, const mfem::ParFiniteElementSpace &fespace) + : ParOperator(std::move(A), fespace, fespace, false) + { + } + + // Non-owning constructors. + ParOperator(Operator &A, const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); + ParOperator(Operator &A, const mfem::ParFiniteElementSpace &fespace) + : ParOperator(A, fespace, fespace, false) + { + } + + // Get access to the underlying local (L-vector) operator. + const Operator &LocalOperator() const; + Operator &LocalOperator(); + + // Get the associated MPI communicator. + MPI_Comm GetComm() const { return trial_fespace.GetComm(); } + + // Set essential boundary condition true dofs for square operators. + void SetEssentialTrueDofs(const mfem::Array &tdof_list, DiagonalPolicy policy); + + // Get the essential boundary condition true dofs associated with the operator. May be + // nullptr. + const mfem::Array *GetEssentialTrueDofs() const { return dbc_tdof_list; } + + // Assemble the diagonal for the parallel operator. + void AssembleDiagonal(Vector &diag) const override; + + // Assemble the operator as a parallel sparse matrix. The memory associated with the + // local operator is not freed. + mfem::HypreParMatrix &ParallelAssemble() const; + + // Steal the assembled parallel sparse matrix. + std::unique_ptr StealParallelAssemble() const + { + ParallelAssemble(); + return std::move(RAP); + } + + // Eliminate essential true dofs from the RHS vector b, using the essential boundary + // condition values in x. + void EliminateRHS(const Vector &x, Vector &b) const; + + void Mult(const Vector &x, Vector &y) const override; + + void MultTranspose(const Vector &x, Vector &y) const override; + + void AddMult(const Vector &x, Vector &y, const double a = 1.0) const override; + + void AddMultTranspose(const Vector &x, Vector &y, const double a = 1.0) const override; +}; + +// Complex-valued RAP operator. +class ComplexParOperator : public ComplexOperator +{ +private: + // Storage and access for the local operator. + std::unique_ptr data_A; + ComplexWrapperOperator *A; + + // Finite element spaces for parallel prolongation and restriction. + const mfem::ParFiniteElementSpace &trial_fespace, &test_fespace; + const bool use_R; + + // Lists of constrained essential boundary true dofs for elimination. + mutable const mfem::Array *dbc_tdof_list; + + // Diagonal policy for constrained true dofs. + Operator::DiagonalPolicy diag_policy; + + // Real and imaginary parts of the operator as non-owning ParOperator objects. + std::unique_ptr RAPr, RAPi; + + // Temporary storage for operator application. + mutable ComplexVector lx, ly, ty; + + // Helper methods for operator application. + void RestrictionMatrixMult(const ComplexVector &ly, ComplexVector &ty) const; + void RestrictionMatrixAddMult(const ComplexVector &ly, ComplexVector &ty, + const double a) const; + void RestrictionMatrixMultTranspose(const ComplexVector &ty, ComplexVector &ly) const; + + ComplexParOperator(std::unique_ptr &&dAr, std::unique_ptr &&dAi, + Operator *pAr, Operator *pAi, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); + +public: + // Construct the complex-valued parallel operator from the separate real and imaginary + // parts, inheriting ownership of the local operator. + ComplexParOperator(std::unique_ptr &&Ar, std::unique_ptr &&Ai, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); + ComplexParOperator(std::unique_ptr &&Ar, std::unique_ptr &&Ai, + const mfem::ParFiniteElementSpace &fespace) + : ComplexParOperator(std::move(Ar), std::move(Ai), fespace, fespace, false) + { + } + + // Non-owning constructors. + ComplexParOperator(Operator *Ar, Operator *Ai, + const mfem::ParFiniteElementSpace &trial_fespace, + const mfem::ParFiniteElementSpace &test_fespace, bool test_restrict); + ComplexParOperator(Operator *Ar, Operator *Ai, const mfem::ParFiniteElementSpace &fespace) + : ComplexParOperator(Ar, Ai, fespace, fespace, false) + { + } + + // Get access to the underlying local (L-vector) operator. + const ComplexOperator &LocalOperator() const; + ComplexOperator &LocalOperator(); + + // Get the associated MPI communicator. + MPI_Comm GetComm() const { return trial_fespace.GetComm(); } + + // Set essential boundary condition true dofs for square operators. + void SetEssentialTrueDofs(const mfem::Array &tdof_list, + Operator::DiagonalPolicy policy); + + // Get the essential boundary condition true dofs associated with the operator. May be + // nullptr. + const mfem::Array *GetEssentialTrueDofs() const { return dbc_tdof_list; } + + bool IsReal() const override { return A->IsReal(); } + bool IsImag() const override { return A->IsImag(); } + bool HasReal() const override { return RAPr != nullptr; } + bool HasImag() const override { return RAPi != nullptr; } + const Operator *Real() const override { return RAPr.get(); } + Operator *Real() override { return RAPr.get(); } + const Operator *Imag() const override { return RAPi.get(); } + Operator *Imag() override { return RAPi.get(); } + + void Mult(const ComplexVector &x, ComplexVector &y) const override; + + void MultTranspose(const ComplexVector &x, ComplexVector &y) const override; + + void MultHermitianTranspose(const ComplexVector &x, ComplexVector &y) const override; + + void AddMult(const ComplexVector &x, ComplexVector &y, + const std::complex a = 1.0) const override; + + void AddMultTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a = 1.0) const override; + + void AddMultHermitianTranspose(const ComplexVector &x, ComplexVector &y, + const std::complex a = 1.0) const override; +}; + +} // namespace palace + +#endif // PALACE_LINALG_RAP_HPP diff --git a/palace/linalg/slepc.cpp b/palace/linalg/slepc.cpp index b118e8667..fd3fa1ef5 100644 --- a/palace/linalg/slepc.cpp +++ b/palace/linalg/slepc.cpp @@ -5,14 +5,15 @@ #if defined(PALACE_WITH_SLEPC) +#include #include #include #include #include "linalg/divfree.hpp" -#include "linalg/ksp.hpp" #include "utils/communication.hpp" -static PetscErrorCode __mat_apply_EPS_A(Mat, Vec, Vec); +static PetscErrorCode __mat_apply_EPS_A0(Mat, Vec, Vec); +static PetscErrorCode __mat_apply_EPS_A1(Mat, Vec, Vec); static PetscErrorCode __mat_apply_EPS_B(Mat, Vec, Vec); static PetscErrorCode __pc_apply_EPS(PC, Vec, Vec); static PetscErrorCode __mat_apply_PEPLinear_L0(Mat, Vec, Vec); @@ -22,11 +23,120 @@ static PetscErrorCode __pc_apply_PEPLinear(PC, Vec, Vec); static PetscErrorCode __mat_apply_PEP_A0(Mat, Vec, Vec); static PetscErrorCode __mat_apply_PEP_A1(Mat, Vec, Vec); static PetscErrorCode __mat_apply_PEP_A2(Mat, Vec, Vec); +static PetscErrorCode __mat_apply_PEP_B(Mat, Vec, Vec); static PetscErrorCode __pc_apply_PEP(PC, Vec, Vec); namespace palace::slepc { +namespace +{ + +struct MatShellContext +{ + const ComplexOperator &A; + ComplexVector &x, &y; +}; + +PetscErrorCode __mat_apply_shell(Mat A, Vec x, Vec y) +{ + PetscFunctionBeginUser; + MatShellContext *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->A.Mult(ctx->x, ctx->y); + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); + + PetscFunctionReturn(0); +} + +PetscErrorCode __mat_apply_transpose_shell(Mat A, Vec x, Vec y) +{ + PetscFunctionBeginUser; + MatShellContext *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->A.MultTranspose(ctx->x, ctx->y); + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); + + PetscFunctionReturn(0); +} + +PetscErrorCode __mat_apply_hermitian_transpose_shell(Mat A, Vec x, Vec y) +{ + PetscFunctionBeginUser; + MatShellContext *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->A.MultHermitianTranspose(ctx->x, ctx->y); + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); + + PetscFunctionReturn(0); +}; + +void ConfigurePCShell(ST st, void *ctx, PetscErrorCode (*__pc_apply)(PC, Vec, Vec)) +{ + KSP ksp; + PC pc; + PalacePetscCall(STGetKSP(st, &ksp)); + PalacePetscCall(KSPGetPC(ksp, &pc)); + PalacePetscCall(PCSetType(pc, PCSHELL)); + PalacePetscCall(PCShellSetContext(pc, ctx)); + PalacePetscCall(PCShellSetApply(pc, __pc_apply)); +} + +void ConfigureRG(RG rg, PetscReal lr, PetscReal ur, PetscReal li, PetscReal ui, + bool complement = false) +{ + PalacePetscCall(RGSetType(rg, RGINTERVAL)); + PalacePetscCall(RGIntervalSetEndpoints(rg, lr, ur, li, ui)); + if (complement) + { + PalacePetscCall(RGSetComplement(rg, PETSC_TRUE)); + } +} + +} // namespace + void Initialize(int &argc, char **&argv, const char rc_file[], const char help[]) { PalacePetscCall(SlepcInitialize(&argc, &argv, rc_file, help)); @@ -42,119 +152,136 @@ void Finalize() PalacePetscCall(SlepcFinalize()); } -PetscReal GetMaxSingularValue(const petsc::PetscParMatrix &A, PetscReal tol, - PetscInt maxits) +PetscReal GetMaxSingularValue(MPI_Comm comm, const ComplexOperator &A, bool herm, + PetscReal tol, PetscInt max_it) { // This method assumes the provided operator has the required operations for SLEPc's EPS // or SVD solvers, namely MATOP_MULT and MATOP_MULT_HERMITIAN_TRANSPOSE (if the matrix // is not Hermitian). - PetscInt nconv; - PetscReal sigma; - if (A.GetHermitian()) // Returns true if symmetric and not PETSC_USE_COMPLEX + Mat A0; + PetscInt n = A.Height(); + ComplexVector x(n), y(n); + MatShellContext ctx = {A, x, y}; + PalacePetscCall( + MatCreateShell(comm, n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)&ctx, &A0)); + PalacePetscCall(MatShellSetOperation(A0, MATOP_MULT, (void (*)(void))__mat_apply_shell)); + if (herm) { EPS eps; + PetscInt num_conv; PetscScalar eig; - PalacePetscCall(EPSCreate(A.GetComm(), &eps)); - PalacePetscCall(EPSSetOperators(eps, A, nullptr)); + PalacePetscCall(EPSCreate(comm, &eps)); + PalacePetscCall(EPSSetOperators(eps, A0, nullptr)); PalacePetscCall(EPSSetProblemType(eps, EPS_HEP)); PalacePetscCall(EPSSetWhichEigenpairs(eps, EPS_LARGEST_MAGNITUDE)); PalacePetscCall(EPSSetDimensions(eps, 1, PETSC_DEFAULT, PETSC_DEFAULT)); - PalacePetscCall(EPSSetTolerances(eps, tol, maxits)); + PalacePetscCall(EPSSetTolerances(eps, tol, max_it)); PalacePetscCall(EPSSolve(eps)); - PalacePetscCall(EPSGetConverged(eps, &nconv)); - if (nconv < 1) + PalacePetscCall(EPSGetConverged(eps, &num_conv)); + if (num_conv < 1) + { + Mpi::Warning(comm, "SLEPc EPS solve did not converge for maximum singular value!\n"); + eig = 0.0; + } + else { - Mpi::Warning(A.GetComm(), - "SLEPc EPS solve did not converge for maximum singular value!\n"); - PalacePetscCall(EPSDestroy(&eps)); - return 0.0; + PalacePetscCall(EPSGetEigenvalue(eps, 0, &eig, nullptr)); + MFEM_VERIFY(PetscImaginaryPart(eig) == 0.0, + "Unexpected complex eigenvalue for Hermitian matrix (λ = " << eig + << ")!"); } - MFEM_VERIFY(nconv >= 1, " "); - PalacePetscCall(EPSGetEigenvalue(eps, 0, &eig, nullptr)); PalacePetscCall(EPSDestroy(&eps)); - MFEM_VERIFY(PetscImaginaryPart(eig) == 0.0, - "Unexpected complex eigenvalue for Hermitian matrix (λ = " << eig << ")!"); - sigma = PetscAbsScalar(eig); + PalacePetscCall(MatDestroy(&A0)); + return PetscAbsScalar(eig); } else { + PalacePetscCall(MatShellSetOperation(A0, MATOP_MULT_TRANSPOSE, + (void (*)(void))__mat_apply_transpose_shell)); + PalacePetscCall( + MatShellSetOperation(A0, MATOP_MULT_HERMITIAN_TRANSPOSE, + (void (*)(void))__mat_apply_hermitian_transpose_shell)); + SVD svd; - PalacePetscCall(SVDCreate(A.GetComm(), &svd)); - PalacePetscCall(SVDSetOperators(svd, A, nullptr)); + PetscInt num_conv; + PetscReal sigma; + PalacePetscCall(SVDCreate(comm, &svd)); + PalacePetscCall(SVDSetOperators(svd, A0, nullptr)); PalacePetscCall(SVDSetProblemType(svd, SVD_STANDARD)); PalacePetscCall(SVDSetWhichSingularTriplets(svd, SVD_LARGEST)); PalacePetscCall(SVDSetDimensions(svd, 1, PETSC_DEFAULT, PETSC_DEFAULT)); - PalacePetscCall(SVDSetTolerances(svd, tol, maxits)); + PalacePetscCall(SVDSetTolerances(svd, tol, max_it)); PalacePetscCall(SVDSolve(svd)); - PalacePetscCall(SVDGetConverged(svd, &nconv)); - if (nconv < 1) + PalacePetscCall(SVDGetConverged(svd, &num_conv)); + if (num_conv < 1) { - Mpi::Warning(A.GetComm(), - "SLEPc SVD solve did not converge for maximum singular value!\n"); - PalacePetscCall(SVDDestroy(&svd)); - return 0.0; + Mpi::Warning(comm, "SLEPc SVD solve did not converge for maximum singular value!\n"); + sigma = 0.0; + } + else + { + PalacePetscCall(SVDGetSingularTriplet(svd, 0, &sigma, nullptr, nullptr)); } - MFEM_VERIFY(nconv >= 1, " "); - PalacePetscCall(SVDGetSingularTriplet(svd, 0, &sigma, nullptr, nullptr)); PalacePetscCall(SVDDestroy(&svd)); + PalacePetscCall(MatDestroy(&A0)); + return sigma; } - return sigma; } // Eigensolver base class methods -SlepcEigenSolver::SlepcEigenSolver(int print_lvl) : clcustom(false), print(print_lvl) +SlepcEigenvalueSolver::SlepcEigenvalueSolver(int print) : print(print) { sinvert = false; region = true; sigma = 0.0; gamma = delta = 1.0; - res = nullptr; - v0 = r0 = nullptr; opInv = nullptr; opProj = nullptr; + opB = nullptr; + + B0 = nullptr; + v0 = nullptr; + + cl_custom = false; } -SlepcEigenSolver::~SlepcEigenSolver() +SlepcEigenvalueSolver::~SlepcEigenvalueSolver() { - delete[] res; - delete v0; - delete r0; + PalacePetscCall(MatDestroy(&B0)); + PalacePetscCall(VecDestroy(&v0)); } -void SlepcEigenSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) +void SlepcEigenvalueSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &M, + EigenvalueSolver::ScaleType type) { - MFEM_ABORT("SetOperators not defined for base class SlepcEigenSolver!"); + MFEM_ABORT("SetOperators not defined for base class SlepcEigenvalueSolver!"); } -void SlepcEigenSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) +void SlepcEigenvalueSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C, + const ComplexOperator &M, + EigenvalueSolver::ScaleType type) { - MFEM_ABORT("SetOperators not defined for base class SlepcEigenSolver!"); + MFEM_ABORT("SetOperators not defined for base class SlepcEigenvalueSolver!"); } -void SlepcEigenSolver::SetLinearSolver(const KspSolver &ksp) +void SlepcEigenvalueSolver::SetLinearSolver(const ComplexKspSolver &ksp) { opInv = &ksp; } -void SlepcEigenSolver::SetProjector(const DivFreeSolver &divfree) +void SlepcEigenvalueSolver::SetDivFreeProjector(const DivFreeSolver &divfree) { opProj = &divfree; } -void SlepcEigenSolver::SetBMat(const petsc::PetscParMatrix &B) +void SlepcEigenvalueSolver::SetBMat(const Operator &B) { - BV bv = GetBV(); - PalacePetscCall(BVSetMatrix(bv, B, PETSC_FALSE)); + opB = &B; } -void SlepcEigenSolver::SetShiftInvert(double tr, double ti, bool precond) +void SlepcEigenvalueSolver::SetShiftInvert(PetscScalar s, bool precond) { ST st = GetST(); if (precond) @@ -167,46 +294,43 @@ void SlepcEigenSolver::SetShiftInvert(double tr, double ti, bool precond) } PalacePetscCall(STSetTransform(st, PETSC_TRUE)); PalacePetscCall(STSetMatMode(st, ST_MATMODE_SHELL)); - sigma = tr + PETSC_i * ti; // Wait until solve time to call EPS/PEPSetTarget + sigma = s; // Wait until solve time to call EPS/PEPSetTarget sinvert = true; } -void SlepcEigenSolver::SetOrthogonalization(bool mgs, bool cgs2) +void SlepcEigenvalueSolver::SetOrthogonalization(bool mgs, bool cgs2) { + // The SLEPc default is CGS with refinement if needed. if (mgs || cgs2) { + BV bv = GetBV(); BVOrthogType type; BVOrthogRefineType refine; - PetscReal eta; - BVOrthogBlockType btype; - BV bv = GetBV(); if (mgs) { type = BV_ORTHOG_MGS; - PalacePetscCall(BVGetOrthogonalization(bv, nullptr, &refine, &eta, &btype)); + refine = BV_ORTHOG_REFINE_NEVER; } else // cgs2 { type = BV_ORTHOG_CGS; refine = BV_ORTHOG_REFINE_ALWAYS; - eta = 1.0; - PalacePetscCall(BVGetOrthogonalization(bv, nullptr, nullptr, nullptr, &btype)); } - PalacePetscCall(BVSetOrthogonalization(bv, type, refine, eta, btype)); + PalacePetscCall(BVSetOrthogonalization(bv, type, refine, 1.0, BV_ORTHOG_BLOCK_GS)); } } -void SlepcEigenSolver::Customize() +void SlepcEigenvalueSolver::Customize() { // Configure the KSP object for non-preconditioned spectral transformations. PetscBool precond; - KSP ksp; ST st = GetST(); - PalacePetscCall(STGetKSP(st, &ksp)); PalacePetscCall( PetscObjectTypeCompare(reinterpret_cast(st), STPRECOND, &precond)); if (!precond) { + KSP ksp; + PalacePetscCall(STGetKSP(st, &ksp)); PalacePetscCall(KSPSetType(ksp, KSPPREONLY)); } @@ -215,103 +339,57 @@ void SlepcEigenSolver::Customize() { if (PetscImaginaryPart(sigma) == 0.0) { - if (PetscRealPart(sigma) > 0.0) + PetscReal sr = PetscRealPart(sigma); + if (sr > 0.0) { - SetRegion(PetscRealPart(sigma) / gamma, mfem::infinity(), -mfem::infinity(), - mfem::infinity()); + ConfigureRG(GetRG(), sr / gamma, mfem::infinity(), -mfem::infinity(), + mfem::infinity()); } - else if (PetscRealPart(sigma) < 0.0) + else if (sr < 0.0) { - SetRegion(-mfem::infinity(), PetscRealPart(sigma) / gamma, -mfem::infinity(), - mfem::infinity()); + ConfigureRG(GetRG(), -mfem::infinity(), sr / gamma, -mfem::infinity(), + mfem::infinity()); } } else if (PetscRealPart(sigma) == 0.0) { - if (PetscImaginaryPart(sigma) > 0.0) + PetscReal si = PetscImaginaryPart(sigma); + if (si > 0.0) { - SetRegion(-mfem::infinity(), mfem::infinity(), PetscImaginaryPart(sigma) / gamma, - mfem::infinity()); + ConfigureRG(GetRG(), -mfem::infinity(), mfem::infinity(), si / gamma, + mfem::infinity()); } - else if (PetscImaginaryPart(sigma) < 0.0) + else if (si < 0.0) { - SetRegion(-mfem::infinity(), mfem::infinity(), -mfem::infinity(), - PetscImaginaryPart(sigma) / gamma); + ConfigureRG(GetRG(), -mfem::infinity(), mfem::infinity(), -mfem::infinity(), + si / gamma); } } else { - MFEM_ABORT("Shift-and-invert with general complex eigenvalue target " - "is unsupported!"); + MFEM_ABORT("Shift-and-invert with general complex eigenvalue target is unsupported!"); } } } -void SlepcEigenSolver::SetPCShell(void *ctx, PetscErrorCode (*__pc_apply)(PC, Vec, Vec)) +PetscReal SlepcEigenvalueSolver::GetError(int i, EigenvalueSolver::ErrorType type) const { - // Configure linear solver for generalized problem or spectral transformation. This also - // allows use of the divergence-free projector as a linear solve side-effect. - KSP ksp; - PC pc; - ST st = GetST(); - PalacePetscCall(STGetKSP(st, &ksp)); - PalacePetscCall(KSPGetPC(ksp, &pc)); - - // Configure the linear solver as a shell preconditioner. - PalacePetscCall(PCSetType(pc, PCSHELL)); - PalacePetscCall(PCShellSetContext(pc, ctx)); - PalacePetscCall(PCShellSetApply(pc, __pc_apply)); -} - -void SlepcEigenSolver::SetRegion(PetscReal rminr, PetscReal rmaxr, PetscReal rmini, - PetscReal rmaxi, bool complement) -{ - RG rg = GetRG(); - PalacePetscCall(RGSetType(rg, RGINTERVAL)); - PalacePetscCall(RGIntervalSetEndpoints(rg, rminr, rmaxr, rmini, rmaxi)); - if (complement) - { - PalacePetscCall(RGSetComplement(rg, PETSC_TRUE)); - } -} - -void SlepcEigenSolver::GetBackTransform(PetscScalar eig, PetscReal &eigr, - PetscReal &eigi) const -{ - eigr = gamma * PetscRealPart(eig); - eigi = gamma * PetscImaginaryPart(eig); -} - -void SlepcEigenSolver::GetError(int i, EigenSolverBase::ErrorType type, double &err) const -{ - PetscReal eigr, eigi; - GetEigenvalue(i, eigr, eigi); - PetscScalar eig = eigr + PETSC_i * eigi; - if (res[i] <= 0.0) - { - GetEigenvector(i, *v0); - GetResidual(eig, *v0, *r0); - res[i] = r0->Norml2() / v0->Norml2(); - } switch (type) { case ErrorType::ABSOLUTE: - err = res[i]; - break; + return res.get()[i]; case ErrorType::RELATIVE: - err = res[i] / PetscAbsScalar(eig); - break; + return res.get()[i] / PetscAbsScalar(GetEigenvalue(i)); case ErrorType::BACKWARD: - err = res[i] / GetBackwardScaling(eig); - break; + return res.get()[i] / GetBackwardScaling(GetEigenvalue(i)); } + return 0.0; } // EPS specific methods -SlepcEPSSolverBase::SlepcEPSSolverBase(MPI_Comm comm, int print_lvl, - const std::string &prefix) - : SlepcEigenSolver(print_lvl) +SlepcEPSSolverBase::SlepcEPSSolverBase(MPI_Comm comm, int print, const std::string &prefix) + : SlepcEigenvalueSolver(print) { PalacePetscCall(EPSCreate(comm, &eps)); PalacePetscCall(EPSSetOptionsPrefix(eps, prefix.c_str())); @@ -332,25 +410,23 @@ SlepcEPSSolverBase::SlepcEPSSolverBase(MPI_Comm comm, int print_lvl, PetscOptionsPrefixPop(nullptr); } } - A = B = nullptr; + A0 = A1 = nullptr; } SlepcEPSSolverBase::~SlepcEPSSolverBase() { - MPI_Comm comm; - PalacePetscCall(PetscObjectGetComm(reinterpret_cast(eps), &comm)); PalacePetscCall(EPSDestroy(&eps)); - delete A; - delete B; + PalacePetscCall(MatDestroy(&A0)); + PalacePetscCall(MatDestroy(&A1)); } -void SlepcEPSSolverBase::SetNumModes(int numeig, int numvec) +void SlepcEPSSolverBase::SetNumModes(int num_eig, int num_vec) { - PalacePetscCall( - EPSSetDimensions(eps, numeig, (numvec > 0) ? numvec : PETSC_DEFAULT, PETSC_DEFAULT)); + PalacePetscCall(EPSSetDimensions(eps, num_eig, (num_vec > 0) ? num_vec : PETSC_DEFAULT, + PETSC_DEFAULT)); } -void SlepcEPSSolverBase::SetTol(double tol) +void SlepcEPSSolverBase::SetTol(PetscReal tol) { PalacePetscCall(EPSSetTolerances(eps, tol, PETSC_DEFAULT)); PalacePetscCall(EPSSetConvergenceTest(eps, EPS_CONV_REL)); @@ -358,13 +434,13 @@ void SlepcEPSSolverBase::SetTol(double tol) // PalacePetscCall(EPSSetTrueResidual(eps, PETSC_TRUE)); } -void SlepcEPSSolverBase::SetMaxIter(int maxits) +void SlepcEPSSolverBase::SetMaxIter(int max_it) { PalacePetscCall( - EPSSetTolerances(eps, PETSC_DEFAULT, (maxits > 0) ? maxits : PETSC_DEFAULT)); + EPSSetTolerances(eps, PETSC_DEFAULT, (max_it > 0) ? max_it : PETSC_DEFAULT)); } -void SlepcEPSSolverBase::SetWhichEigenpairs(EigenSolverBase::WhichType type) +void SlepcEPSSolverBase::SetWhichEigenpairs(EigenvalueSolver::WhichType type) { switch (type) { @@ -401,7 +477,7 @@ void SlepcEPSSolverBase::SetWhichEigenpairs(EigenSolverBase::WhichType type) } } -void SlepcEPSSolverBase::SetProblemType(SlepcEigenSolver::ProblemType type) +void SlepcEPSSolverBase::SetProblemType(SlepcEigenvalueSolver::ProblemType type) { switch (type) { @@ -428,7 +504,7 @@ void SlepcEPSSolverBase::SetProblemType(SlepcEigenSolver::ProblemType type) } } -void SlepcEPSSolverBase::SetType(SlepcEigenSolver::Type type) +void SlepcEPSSolverBase::SetType(SlepcEigenvalueSolver::Type type) { switch (type) { @@ -453,46 +529,54 @@ void SlepcEPSSolverBase::SetType(SlepcEigenSolver::Type type) } } -void SlepcEPSSolverBase::SetInitialSpace(const petsc::PetscParVector &v) +void SlepcEPSSolverBase::SetInitialSpace(const ComplexVector &v) { + MFEM_VERIFY( + A0 && A1, + "Must call SetOperators before using SetInitialSpace for SLEPc eigenvalue solver!"); if (!v0) { - v0 = new petsc::PetscParVector(v); + PalacePetscCall(MatCreateVecs(A0, nullptr, &v0)); } - else - { - MFEM_VERIFY(v.GetSize() == v0->GetSize(), - "Invalid modification of eigenvalue problem size!"); - v0->Copy(v); - } - Vec is[1]; - is[0] = *v0; + + PetscInt n; + PalacePetscCall(VecGetLocalSize(v0, &n)); + MFEM_VERIFY(v.Size() == n, "Invalid size mismatch for provided initial space vector!"); + + PetscScalar *pv0; + PalacePetscCall(VecGetArrayWrite(v0, &pv0)); + v.Get(pv0, n); + PalacePetscCall(VecRestoreArrayWrite(v0, &pv0)); + + Vec is[1] = {v0}; PalacePetscCall(EPSSetInitialSpace(eps, 1, is)); } void SlepcEPSSolverBase::Customize() { - SlepcEigenSolver::Customize(); + SlepcEigenvalueSolver::Customize(); PalacePetscCall(EPSSetTarget(eps, sigma / gamma)); - if (!clcustom) + if (!cl_custom) { PalacePetscCall(EPSSetFromOptions(eps)); - // if (print > 0) // These are printed by PETSc linear solver - // { - // PetscOptionsView(nullptr, PETSC_VIEWER_STDOUT_(GetComm())); - // Mpi::Print(GetComm(), "\n"); - // } - clcustom = true; + if (print > 0) + { + PetscOptionsView(nullptr, PETSC_VIEWER_STDOUT_(GetComm())); + Mpi::Print(GetComm(), "\n"); + } + cl_custom = true; } } int SlepcEPSSolverBase::Solve() { - MFEM_VERIFY(A && B && opInv, "Operators are not set for SlepcEPSSolverBase!"); - PetscInt numconv; + MFEM_VERIFY(A0 && A1 && opInv, "Operators are not set for SlepcEPSSolverBase!"); + + // Solve the eigenvalue problem. + PetscInt num_conv; Customize(); PalacePetscCall(EPSSolve(eps)); - PalacePetscCall(EPSGetConverged(eps, &numconv)); + PalacePetscCall(EPSGetConverged(eps, &num_conv)); if (print > 0) { Mpi::Print(GetComm(), "\n"); @@ -500,27 +584,40 @@ int SlepcEPSSolverBase::Solve() Mpi::Print(GetComm(), " Total number of linear systems solved: {:d}\n" " Total number of linear solver iterations: {:d}\n", - opInv->GetTotalNumMult(), opInv->GetTotalNumIter()); + opInv->NumTotalMult(), opInv->NumTotalMultIterations()); } - delete[] res; - res = new PetscReal[numconv]; - for (PetscInt i = 0; i < numconv; i++) + + // Compute and store the eigenpair residuals. + res = std::make_unique(num_conv); + for (int i = 0; i < num_conv; i++) { - res[i] = -1.0; + res.get()[i] = GetResidualNorm(i); } - return (int)numconv; + return (int)num_conv; } -void SlepcEPSSolverBase::GetEigenvalue(int i, double &eigr, double &eigi) const +PetscScalar SlepcEPSSolverBase::GetEigenvalue(int i) const { - PetscScalar eig; - PalacePetscCall(EPSGetEigenvalue(eps, i, &eig, nullptr)); - GetBackTransform(eig, eigr, eigi); + PetscScalar l; + PalacePetscCall(EPSGetEigenvalue(eps, i, &l, nullptr)); + return l * gamma; } -void SlepcEPSSolverBase::GetEigenvector(int i, petsc::PetscParVector &v) const +void SlepcEPSSolverBase::GetEigenvector(int i, ComplexVector &x) const { - PalacePetscCall(EPSGetEigenvector(eps, i, v, nullptr)); + MFEM_VERIFY( + v0, + "Must call SetOperators before using GetEigenvector for SLEPc eigenvalue solver!"); + PalacePetscCall(EPSGetEigenvector(eps, i, v0, nullptr)); + + PetscInt n; + PalacePetscCall(VecGetLocalSize(v0, &n)); + MFEM_VERIFY(x.Size() == n, "Invalid size mismatch for provided eigenvector!"); + + const PetscScalar *pv0; + PalacePetscCall(VecGetArrayRead(v0, &pv0)); + x.Set(pv0, n); + PalacePetscCall(VecRestoreArrayRead(v0, &pv0)); } BV SlepcEPSSolverBase::GetBV() const @@ -544,51 +641,40 @@ RG SlepcEPSSolverBase::GetRG() const return rg; } -MPI_Comm SlepcEPSSolverBase::GetComm() const -{ - return eps ? PetscObjectComm(reinterpret_cast(eps)) : MPI_COMM_NULL; -} - -SlepcEPSSolver::SlepcEPSSolver(MPI_Comm comm, int print_lvl, const std::string &prefix) - : SlepcEPSSolverBase(comm, print_lvl, prefix) +SlepcEPSSolver::SlepcEPSSolver(MPI_Comm comm, int print, const std::string &prefix) + : SlepcEPSSolverBase(comm, print, prefix) { opK = opM = nullptr; normK = normM = 0.0; } -void SlepcEPSSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) +void SlepcEPSSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &M, + EigenvalueSolver::ScaleType type) { // Construct shell matrices for the scaled operators which define the generalized // eigenvalue problem. bool first = (opK == nullptr); + opK = &K; + opM = &M; + + if (first) { - Mat A_, B_; - MPI_Comm comm = GetComm(); - PetscInt n = K.GetNumRows(); + PetscInt n = opK->Height(); PalacePetscCall( - MatCreateShell(comm, n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A_)); + MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A0)); PalacePetscCall( - MatCreateShell(comm, n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &B_)); - PalacePetscCall(MatShellSetOperation( - A_, MATOP_MULT, - (void (*)()) static_cast(&__mat_apply_EPS_A))); - PalacePetscCall(MatShellSetOperation( - B_, MATOP_MULT, - (void (*)()) static_cast(&__mat_apply_EPS_B))); - delete A; - delete B; - A = new petsc::PetscParMatrix(A_, false); // Inherits the PETSc Mat - B = new petsc::PetscParMatrix(B_, false); + MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A1)); + PalacePetscCall( + MatShellSetOperation(A0, MATOP_MULT, (void (*)(void))__mat_apply_EPS_A0)); + PalacePetscCall( + MatShellSetOperation(A1, MATOP_MULT, (void (*)(void))__mat_apply_EPS_A1)); + PalacePetscCall(EPSSetOperators(eps, A0, A1)); } - PalacePetscCall(EPSSetOperators(eps, *A, *B)); - opK = &K; - opM = &M; + if (first && type != ScaleType::NONE) { - normK = opK->Norm2(); - normM = opM->Norm2(); + normK = linalg::SpectralNorm(GetComm(), *opK, opK->IsReal()); + normM = linalg::SpectralNorm(GetComm(), *opM, opM->IsReal()); MFEM_VERIFY(normK >= 0.0 && normM >= 0.0, "Invalid matrix norms for EPS scaling!"); if (normK > 0 && normM > 0.0) { @@ -600,103 +686,95 @@ void SlepcEPSSolver::SetOperators(const petsc::PetscParMatrix &K, // Set up workspace. if (!v0) { - v0 = new petsc::PetscParVector(K); - } - if (!r0) - { - r0 = new petsc::PetscParVector(K); + PalacePetscCall(MatCreateVecs(A0, nullptr, &v0)); } + x.SetSize(opK->Height()); + y.SetSize(opK->Height()); - // Configure linear solver. + // Configure linear solver for generalized problem or spectral transformation. This also + // allows use of the divergence-free projector as a linear solve side-effect. if (first) { - SetPCShell((void *)this, __pc_apply_EPS); + ConfigurePCShell(GetST(), (void *)this, __pc_apply_EPS); } } -void SlepcEPSSolver::GetResidual(PetscScalar eig, const petsc::PetscParVector &v, - petsc::PetscParVector &r) const +void SlepcEPSSolver::SetBMat(const Operator &B) { - // r = (K - λ M) v for eigenvalue λ. - opM->Mult(v, r); - r.Scale(-eig); - opK->MultAdd(v, r); + SlepcEigenvalueSolver::SetBMat(B); + + PetscInt n = B.Height(); + PalacePetscCall( + MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &B0)); + PalacePetscCall(MatShellSetOperation(B0, MATOP_MULT, (void (*)(void))__mat_apply_EPS_B)); + + BV bv = GetBV(); + PalacePetscCall(BVSetMatrix(bv, B0, PETSC_FALSE)); } -PetscReal SlepcEPSSolver::GetBackwardScaling(PetscScalar eig) const +PetscReal SlepcEPSSolver::GetResidualNorm(int i) const +{ + // Compute the i-th eigenpair residual: || (K - λ M) x ||₂ for eigenvalue λ. + PetscScalar l = GetEigenvalue(i); + GetEigenvector(i, x); + opK->Mult(x, y); + opM->AddMult(x, y, -l); + return linalg::Norml2(GetComm(), y); +} + +PetscReal SlepcEPSSolver::GetBackwardScaling(PetscScalar l) const { // Make sure not to use norms from scaling as this can be confusing if they are different. // Note that SLEPc typically uses ||.||∞, not the 2-norm. if (normK <= 0.0) { - normK = opK->Norm2(); + normK = linalg::SpectralNorm(GetComm(), *opK, opK->IsReal()); } if (normM <= 0.0) { - normM = opM->Norm2(); + normM = linalg::SpectralNorm(GetComm(), *opM, opM->IsReal()); } - return normK + PetscAbsScalar(eig) * normM; + return normK + PetscAbsScalar(l) * normM; } -SlepcPEPLinearSolver::SlepcPEPLinearSolver(MPI_Comm comm, int print_lvl, +SlepcPEPLinearSolver::SlepcPEPLinearSolver(MPI_Comm comm, int print, const std::string &prefix) - : SlepcEPSSolverBase(comm, print_lvl, prefix) + : SlepcEPSSolverBase(comm, print, prefix) { opK = opC = opM = nullptr; normK = normC = normM = 0.0; - B0 = nullptr; - opB = nullptr; - x1 = x2 = y1 = y2 = z = nullptr; } -SlepcPEPLinearSolver::~SlepcPEPLinearSolver() -{ - delete B0; - delete x1; - delete x2; - delete y1; - delete y2; - delete z; -} - -void SlepcPEPLinearSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) +void SlepcPEPLinearSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C, + const ComplexOperator &M, + EigenvalueSolver::ScaleType type) { // Construct shell matrices for the scaled linearized operators which define the block 2x2 // eigenvalue problem. bool first = (opK == nullptr); + opK = &K; + opC = &C; + opM = &M; + + if (first) { - Mat A_, B_; - MPI_Comm comm = GetComm(); - PetscInt n = K.GetNumRows(); - PalacePetscCall( - MatCreateShell(comm, 2 * n, 2 * n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A_)); + PetscInt n = opK->Height(); + PalacePetscCall(MatCreateShell(GetComm(), 2 * n, 2 * n, PETSC_DECIDE, PETSC_DECIDE, + (void *)this, &A0)); + PalacePetscCall(MatCreateShell(GetComm(), 2 * n, 2 * n, PETSC_DECIDE, PETSC_DECIDE, + (void *)this, &A1)); PalacePetscCall( - MatCreateShell(comm, 2 * n, 2 * n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &B_)); + MatShellSetOperation(A0, MATOP_MULT, (void (*)(void))__mat_apply_PEPLinear_L0)); PalacePetscCall( - MatShellSetOperation(A_, MATOP_MULT, - (void (*)()) static_cast( - &__mat_apply_PEPLinear_L0))); - PalacePetscCall( - MatShellSetOperation(B_, MATOP_MULT, - (void (*)()) static_cast( - &__mat_apply_PEPLinear_L1))); - delete A; - delete B; - A = new petsc::PetscParMatrix(A_, false); // Inherits the PETSc Mat - B = new petsc::PetscParMatrix(B_, false); + MatShellSetOperation(A1, MATOP_MULT, (void (*)(void))__mat_apply_PEPLinear_L1)); + PalacePetscCall(EPSSetOperators(eps, A0, A1)); } - PalacePetscCall(EPSSetOperators(eps, *A, *B)); - opK = &K; - opC = &C; - opM = &M; + if (first && type != ScaleType::NONE) { - normK = opK->Norm2(); - normC = opC->Norm2(); - normM = opM->Norm2(); + normK = linalg::SpectralNorm(GetComm(), *opK, opK->IsReal()); + normC = linalg::SpectralNorm(GetComm(), *opC, opC->IsReal()); + normM = linalg::SpectralNorm(GetComm(), *opM, opM->IsReal()); MFEM_VERIFY(normK >= 0.0 && normC >= 0.0 && normM >= 0.0, "Invalid matrix norms for PEP scaling!"); if (normK > 0 && normC > 0.0 && normM > 0.0) @@ -709,175 +787,122 @@ void SlepcPEPLinearSolver::SetOperators(const petsc::PetscParMatrix &K, // Set up workspace. if (!v0) { - v0 = new petsc::PetscParVector(K); - } - if (!r0) - { - r0 = new petsc::PetscParVector(K); - } - if (!z) - { - z = new petsc::PetscParVector(*A); - } - if (!x1) - { - MPI_Comm comm = GetComm(); - PetscInt n = K.GetNumRows(); - delete x1; - delete x2; - delete y1; - delete y2; - x1 = new petsc::PetscParVector(comm, n, PETSC_DECIDE, nullptr); - x2 = new petsc::PetscParVector(comm, n, PETSC_DECIDE, nullptr); - y1 = new petsc::PetscParVector(comm, n, PETSC_DECIDE, nullptr); - y2 = new petsc::PetscParVector(comm, n, PETSC_DECIDE, nullptr); + PalacePetscCall(MatCreateVecs(A0, nullptr, &v0)); } + x1.SetSize(opK->Height()); + x2.SetSize(opK->Height()); + y1.SetSize(opK->Height()); + y2.SetSize(opK->Height()); // Configure linear solver. if (first) { - SetPCShell((void *)this, __pc_apply_PEPLinear); + ConfigurePCShell(GetST(), (void *)this, __pc_apply_PEPLinear); } } -void SlepcPEPLinearSolver::SetBMat(const petsc::PetscParMatrix &B) +void SlepcPEPLinearSolver::SetBMat(const Operator &B) { - // Construct an SPD linearized mass matrix for weighted inner products. - Mat B0_; - MPI_Comm comm = GetComm(); - PetscInt n = B.GetNumRows(); - PalacePetscCall( - MatCreateShell(comm, 2 * n, 2 * n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &B0_)); + SlepcEigenvalueSolver::SetBMat(B); + + PetscInt n = B.Height(); + PalacePetscCall(MatCreateShell(GetComm(), 2 * n, 2 * n, PETSC_DECIDE, PETSC_DECIDE, + (void *)this, &B0)); PalacePetscCall( - MatShellSetOperation(B0_, MATOP_MULT, - (void (*)()) static_cast( - &__mat_apply_PEPLinear_B))); - delete B0; - B0 = new petsc::PetscParMatrix(B0_, false); // Inherits the PETSc Mat - opB = &B; - SlepcEigenSolver::SetBMat(*B0); + MatShellSetOperation(B0, MATOP_MULT, (void (*)(void))__mat_apply_PEPLinear_B)); + + BV bv = GetBV(); + PalacePetscCall(BVSetMatrix(bv, B0, PETSC_FALSE)); } -void SlepcPEPLinearSolver::SetInitialSpace(const petsc::PetscParVector &v) +void SlepcPEPLinearSolver::SetInitialSpace(const ComplexVector &v) { - if (!z) - { - z = new petsc::PetscParVector(v.GetComm(), 2 * v.GetSize(), PETSC_DECIDE); - } - else - { - MFEM_VERIFY(2 * v.GetSize() == z->GetSize(), - "Invalid modification of eigenvalue problem size!"); - } + MFEM_VERIFY( + A0 && A1, + "Must call SetOperators before using SetInitialSpace for SLEPc eigenvalue solver!"); + if (!v0) { - PetscScalar *pz = GetBlocks(*z, *x1, *x2); - x1->Copy(v); - x2->SetZero(); - RestoreBlocks(pz, *z, *x1, *x2); + PalacePetscCall(MatCreateVecs(A0, nullptr, &v0)); } - Vec is[1]; - is[0] = *z; + + PetscInt n; + PalacePetscCall(VecGetLocalSize(v0, &n)); + MFEM_VERIFY(2 * v.Size() == n, + "Invalid size mismatch for provided initial space vector!"); + + PetscScalar *pv0; + PalacePetscCall(VecGetArrayWrite(v0, &pv0)); + v.Get(pv0, n / 2); + std::fill(pv0 + n / 2, pv0 + n, 0.0); + PalacePetscCall(VecRestoreArrayWrite(v0, &pv0)); + + Vec is[1] = {v0}; PalacePetscCall(EPSSetInitialSpace(eps, 1, is)); } -void SlepcPEPLinearSolver::GetEigenvector(int i, petsc::PetscParVector &v) const +void SlepcPEPLinearSolver::GetEigenvector(int i, ComplexVector &x) const { - // Select the most accurate v for z = [v₁; v₂] from the linearized eigenvalue problem. - PalacePetscCall(EPSGetEigenvector(eps, i, *z, nullptr)); - const PetscScalar *pz = GetBlocksRead(*z, *x1, *x2); + // Select the most accurate x for y = [x₁; x₂] from the linearized eigenvalue problem. Or, + // just take x = x₁. + MFEM_VERIFY( + v0, + "Must call SetOperators before using GetEigenvector for SLEPc eigenvalue solver!"); + PalacePetscCall(EPSGetEigenvector(eps, i, v0, nullptr)); + PetscInt n; + PalacePetscCall(VecGetLocalSize(v0, &n)); + MFEM_VERIFY(2 * x.Size() == n, "Invalid size mismatch for provided eigenvector!"); + + const PetscScalar *pv0; + PalacePetscCall(VecGetArrayRead(v0, &pv0)); + x.Set(pv0, n / 2); + PalacePetscCall(VecRestoreArrayRead(v0, &pv0)); + + if (opB) { - if (opB) - { - x1->Normalize(*opB, *r0); - } - else - { - x1->Normalize(); - } - v.Copy(*x1); + linalg::Normalize(GetComm(), x, *opB, y1); + } + else + { + linalg::Normalize(GetComm(), x); } - RestoreBlocksRead(pz, *z, *x1, *x2); } -void SlepcPEPLinearSolver::GetResidual(PetscScalar eig, const petsc::PetscParVector &v, - petsc::PetscParVector &r) const +PetscReal SlepcPEPLinearSolver::GetResidualNorm(int i) const { - // r = P(λ) v = (K + λ C + λ² M) v for eigenvalue λ. - opM->Mult(v, r); - r.Scale(eig); - opC->MultAdd(v, r); - r.Scale(eig); - opK->MultAdd(v, r); + // Compute the i-th eigenpair residual: || P(λ) x ||₂ = || (K + λ C + λ² M) x ||₂ for + // eigenvalue λ. + PetscScalar l = GetEigenvalue(i); + GetEigenvector(i, x1); + opK->Mult(x1, y1); + opC->AddMult(x1, y1, l); + opM->AddMult(x1, y1, l * l); + return linalg::Norml2(GetComm(), y1); } -PetscReal SlepcPEPLinearSolver::GetBackwardScaling(PetscScalar eig) const +PetscReal SlepcPEPLinearSolver::GetBackwardScaling(PetscScalar l) const { // Make sure not to use norms from scaling as this can be confusing if they are different. // Note that SLEPc typically uses ||.||∞, not the 2-norm. if (normK <= 0.0) { - normK = opK->Norm2(); + normK = linalg::SpectralNorm(GetComm(), *opK, opK->IsReal()); } if (normC <= 0.0) { - normC = opC->Norm2(); + normC = linalg::SpectralNorm(GetComm(), *opC, opC->IsReal()); } if (normM <= 0.0) { - normM = opM->Norm2(); + normM = linalg::SpectralNorm(GetComm(), *opM, opM->IsReal()); } - PetscReal t = PetscAbsScalar(eig); + PetscReal t = PetscAbsScalar(l); return normK + t * normC + t * t * normM; } -PetscScalar *SlepcPEPLinearSolver::GetBlocks(petsc::PetscParVector &v, - petsc::PetscParVector &v1, - petsc::PetscParVector &v2) const -{ - PetscInt n1 = v1.GetSize(), n2 = v2.GetSize(); - MFEM_VERIFY(n1 + n2 == v.GetSize(), "Unexpected size in PEP linearization!"); - PetscScalar *pv = v.GetArray(); - v1.PlaceArray(pv); - v2.PlaceArray(pv + n1); - return pv; -} - -const PetscScalar *SlepcPEPLinearSolver::GetBlocksRead(const petsc::PetscParVector &v, - petsc::PetscParVector &v1, - petsc::PetscParVector &v2) const -{ - PetscInt n1 = v1.GetSize(), n2 = v2.GetSize(); - MFEM_VERIFY(n1 + n2 == v.GetSize(), "Unexpected size in PEP linearization!"); - const PetscScalar *pv = v.GetArrayRead(); - v1.PlaceArray(pv); - v2.PlaceArray(pv + n1); - return pv; -} - -void SlepcPEPLinearSolver::RestoreBlocks(PetscScalar *pv, petsc::PetscParVector &v, - petsc::PetscParVector &v1, - petsc::PetscParVector &v2) const -{ - v1.ResetArray(); - v2.ResetArray(); - v.RestoreArray(pv); -} - -void SlepcPEPLinearSolver::RestoreBlocksRead(const PetscScalar *pv, - const petsc::PetscParVector &v, - petsc::PetscParVector &v1, - petsc::PetscParVector &v2) const -{ - v1.ResetArray(); - v2.ResetArray(); - v.RestoreArrayRead(pv); -} - // PEP specific methods -SlepcPEPSolverBase::SlepcPEPSolverBase(MPI_Comm comm, int print_lvl, - const std::string &prefix) - : SlepcEigenSolver(print_lvl) +SlepcPEPSolverBase::SlepcPEPSolverBase(MPI_Comm comm, int print, const std::string &prefix) + : SlepcEigenvalueSolver(print) { PalacePetscCall(PEPCreate(comm, &pep)); PalacePetscCall(PEPSetOptionsPrefix(pep, prefix.c_str())); @@ -903,34 +928,32 @@ SlepcPEPSolverBase::SlepcPEPSolverBase(MPI_Comm comm, int print_lvl, SlepcPEPSolverBase::~SlepcPEPSolverBase() { - MPI_Comm comm; - PalacePetscCall(PetscObjectGetComm(reinterpret_cast(pep), &comm)); PalacePetscCall(PEPDestroy(&pep)); - delete A0; - delete A1; - delete A2; + PalacePetscCall(MatDestroy(&A0)); + PalacePetscCall(MatDestroy(&A1)); + PalacePetscCall(MatDestroy(&A2)); } -void SlepcPEPSolverBase::SetNumModes(int numeig, int numvec) +void SlepcPEPSolverBase::SetNumModes(int num_eig, int num_vec) { - PalacePetscCall( - PEPSetDimensions(pep, numeig, (numvec > 0) ? numvec : PETSC_DEFAULT, PETSC_DEFAULT)); + PalacePetscCall(PEPSetDimensions(pep, num_eig, (num_vec > 0) ? num_vec : PETSC_DEFAULT, + PETSC_DEFAULT)); } -void SlepcPEPSolverBase::SetTol(double tol) +void SlepcPEPSolverBase::SetTol(PetscReal tol) { PalacePetscCall(PEPSetTolerances(pep, tol, PETSC_DEFAULT)); PalacePetscCall(PEPSetConvergenceTest(pep, PEP_CONV_REL)); // PalacePetscCall(PEPSetTrackAll(pep, PETSC_TRUE)); } -void SlepcPEPSolverBase::SetMaxIter(int maxits) +void SlepcPEPSolverBase::SetMaxIter(int max_it) { PalacePetscCall( - PEPSetTolerances(pep, PETSC_DEFAULT, (maxits > 0) ? maxits : PETSC_DEFAULT)); + PEPSetTolerances(pep, PETSC_DEFAULT, (max_it > 0) ? max_it : PETSC_DEFAULT)); } -void SlepcPEPSolverBase::SetWhichEigenpairs(EigenSolverBase::WhichType type) +void SlepcPEPSolverBase::SetWhichEigenpairs(EigenvalueSolver::WhichType type) { switch (type) { @@ -967,7 +990,7 @@ void SlepcPEPSolverBase::SetWhichEigenpairs(EigenSolverBase::WhichType type) } } -void SlepcPEPSolverBase::SetProblemType(SlepcEigenSolver::ProblemType type) +void SlepcPEPSolverBase::SetProblemType(SlepcEigenvalueSolver::ProblemType type) { switch (type) { @@ -989,7 +1012,7 @@ void SlepcPEPSolverBase::SetProblemType(SlepcEigenSolver::ProblemType type) } } -void SlepcPEPSolverBase::SetType(SlepcEigenSolver::Type type) +void SlepcPEPSolverBase::SetType(SlepcEigenvalueSolver::Type type) { switch (type) { @@ -1014,46 +1037,54 @@ void SlepcPEPSolverBase::SetType(SlepcEigenSolver::Type type) } } -void SlepcPEPSolverBase::SetInitialSpace(const petsc::PetscParVector &v) +void SlepcPEPSolverBase::SetInitialSpace(const ComplexVector &v) { + MFEM_VERIFY( + A0 && A1 && A2, + "Must call SetOperators before using SetInitialSpace for SLEPc eigenvalue solver!"); if (!v0) { - v0 = new petsc::PetscParVector(v); - } - else - { - MFEM_VERIFY(v.GetSize() == v0->GetSize(), - "Invalid modification of eigenvalue problem size!"); - v0->Copy(v); + PalacePetscCall(MatCreateVecs(A0, nullptr, &v0)); } - Vec is[1]; - is[0] = *v0; + + PetscInt n; + PalacePetscCall(VecGetLocalSize(v0, &n)); + MFEM_VERIFY(v.Size() == n, "Invalid size mismatch for provided initial space vector!"); + + PetscScalar *pv0; + PalacePetscCall(VecGetArrayWrite(v0, &pv0)); + v.Get(pv0, n); + PalacePetscCall(VecRestoreArrayWrite(v0, &pv0)); + + Vec is[1] = {v0}; PalacePetscCall(PEPSetInitialSpace(pep, 1, is)); } void SlepcPEPSolverBase::Customize() { - SlepcEigenSolver::Customize(); + SlepcEigenvalueSolver::Customize(); PalacePetscCall(PEPSetTarget(pep, sigma / gamma)); - if (!clcustom) + if (!cl_custom) { PalacePetscCall(PEPSetFromOptions(pep)); - // if (print > 0) // These are printed by PETSc linear solver - // { - // PetscOptionsView(nullptr, PETSC_VIEWER_STDOUT_(GetComm())); - // Mpi::Print(GetComm(), "\n"); - // } - clcustom = true; + if (print > 0) + { + PetscOptionsView(nullptr, PETSC_VIEWER_STDOUT_(GetComm())); + Mpi::Print(GetComm(), "\n"); + } + cl_custom = true; } } int SlepcPEPSolverBase::Solve() { MFEM_VERIFY(A0 && A1 && A2 && opInv, "Operators are not set for SlepcPEPSolverBase!"); - PetscInt numconv; + + // Solve the eigenvalue problem. + PetscInt num_conv; Customize(); PalacePetscCall(PEPSolve(pep)); - PalacePetscCall(PEPGetConverged(pep, &numconv)); + PalacePetscCall(PEPGetConverged(pep, &num_conv)); if (print > 0) { Mpi::Print(GetComm(), "\n"); @@ -1061,27 +1092,40 @@ int SlepcPEPSolverBase::Solve() Mpi::Print(GetComm(), " Total number of linear systems solved: {:d}\n" " Total number of linear solver iterations: {:d}\n", - opInv->GetTotalNumMult(), opInv->GetTotalNumIter()); + opInv->NumTotalMult(), opInv->NumTotalMultIterations()); } - delete[] res; - res = new PetscReal[numconv]; - for (PetscInt i = 0; i < numconv; i++) + + // Compute and store the eigenpair residuals. + res = std::make_unique(num_conv); + for (int i = 0; i < num_conv; i++) { - res[i] = -1.0; + res.get()[i] = GetResidualNorm(i); } - return (int)numconv; + return (int)num_conv; } -void SlepcPEPSolverBase::GetEigenvalue(int i, double &eigr, double &eigi) const +PetscScalar SlepcPEPSolverBase::GetEigenvalue(int i) const { - PetscScalar eig; - PalacePetscCall(PEPGetEigenpair(pep, i, &eig, nullptr, nullptr, nullptr)); - GetBackTransform(eig, eigr, eigi); + PetscScalar l; + PalacePetscCall(PEPGetEigenpair(pep, i, &l, nullptr, nullptr, nullptr)); + return l * gamma; } -void SlepcPEPSolverBase::GetEigenvector(int i, petsc::PetscParVector &v) const +void SlepcPEPSolverBase::GetEigenvector(int i, ComplexVector &x) const { - PalacePetscCall(PEPGetEigenpair(pep, i, nullptr, nullptr, v, nullptr)); + MFEM_VERIFY( + v0, + "Must call SetOperators before using GetEigenvector for SLEPc eigenvalue solver!"); + PalacePetscCall(PEPGetEigenpair(pep, i, nullptr, nullptr, v0, nullptr)); + + PetscInt n; + PalacePetscCall(VecGetLocalSize(v0, &n)); + MFEM_VERIFY(x.Size() == n, "Invalid size mismatch for provided eigenvector!"); + + const PetscScalar *pv0; + PalacePetscCall(VecGetArrayRead(v0, &pv0)); + x.Set(pv0, n); + PalacePetscCall(VecRestoreArrayRead(v0, &pv0)); } BV SlepcPEPSolverBase::GetBV() const @@ -1105,62 +1149,48 @@ RG SlepcPEPSolverBase::GetRG() const return rg; } -MPI_Comm SlepcPEPSolverBase::GetComm() const -{ - return pep ? PetscObjectComm(reinterpret_cast(pep)) : MPI_COMM_NULL; -} - -SlepcPEPSolver::SlepcPEPSolver(MPI_Comm comm, int print_lvl, const std::string &prefix) - : SlepcPEPSolverBase(comm, print_lvl, prefix) +SlepcPEPSolver::SlepcPEPSolver(MPI_Comm comm, int print, const std::string &prefix) + : SlepcPEPSolverBase(comm, print, prefix) { opK = opC = opM = nullptr; normK = normC = normM = 0.0; } -void SlepcPEPSolver::SetOperators(const petsc::PetscParMatrix &K, - const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, - EigenSolverBase::ScaleType type) +void SlepcPEPSolver::SetOperators(const ComplexOperator &K, const ComplexOperator &C, + const ComplexOperator &M, + EigenvalueSolver::ScaleType type) { // Construct shell matrices for the scaled operators which define the quadratic polynomial // eigenvalue problem. bool first = (opK == nullptr); + opK = &K; + opC = &C; + opM = &M; + + if (first) { - Mat A0_, A1_, A2_; - MPI_Comm comm = GetComm(); - PetscInt n = K.GetNumRows(); + PetscInt n = opK->Height(); + PalacePetscCall( + MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A0)); PalacePetscCall( - MatCreateShell(comm, n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A0_)); + MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A1)); PalacePetscCall( - MatCreateShell(comm, n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A1_)); + MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A2)); PalacePetscCall( - MatCreateShell(comm, n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &A2_)); - PalacePetscCall(MatShellSetOperation( - A0_, MATOP_MULT, - (void (*)()) static_cast(&__mat_apply_PEP_A0))); - PalacePetscCall(MatShellSetOperation( - A1_, MATOP_MULT, - (void (*)()) static_cast(&__mat_apply_PEP_A1))); - PalacePetscCall(MatShellSetOperation( - A2_, MATOP_MULT, - (void (*)()) static_cast(&__mat_apply_PEP_A2))); - delete A0; - delete A1; - delete A2; - A0 = new petsc::PetscParMatrix(A0_, false); // Inherits the PETSc Mat - A1 = new petsc::PetscParMatrix(A1_, false); - A2 = new petsc::PetscParMatrix(A2_, false); + MatShellSetOperation(A0, MATOP_MULT, (void (*)(void))__mat_apply_PEP_A0)); + PalacePetscCall( + MatShellSetOperation(A1, MATOP_MULT, (void (*)(void))__mat_apply_PEP_A1)); + PalacePetscCall( + MatShellSetOperation(A2, MATOP_MULT, (void (*)(void))__mat_apply_PEP_A2)); + Mat A[3] = {A0, A1, A2}; + PalacePetscCall(PEPSetOperators(pep, 3, A)); } - Mat A[3] = {*A0, *A1, *A2}; - PalacePetscCall(PEPSetOperators(pep, 3, A)); - opK = &K; - opC = &C; - opM = &M; + if (first && type != ScaleType::NONE) { - normK = opK->Norm2(); - normC = opC->Norm2(); - normM = opM->Norm2(); + normK = linalg::SpectralNorm(GetComm(), *opK, opK->IsReal()); + normC = linalg::SpectralNorm(GetComm(), *opC, opC->IsReal()); + normM = linalg::SpectralNorm(GetComm(), *opM, opM->IsReal()); MFEM_VERIFY(normK >= 0.0 && normC >= 0.0 && normM >= 0.0, "Invalid matrix norms for PEP scaling!"); if (normK > 0 && normC > 0.0 && normM > 0.0) @@ -1173,349 +1203,498 @@ void SlepcPEPSolver::SetOperators(const petsc::PetscParMatrix &K, // Set up workspace. if (!v0) { - v0 = new petsc::PetscParVector(K); - } - if (!r0) - { - r0 = new petsc::PetscParVector(K); + PalacePetscCall(MatCreateVecs(A0, nullptr, &v0)); } + x.SetSize(opK->Height()); + y.SetSize(opK->Height()); // Configure linear solver. if (first) { - SetPCShell((void *)this, __pc_apply_PEP); + ConfigurePCShell(GetST(), (void *)this, __pc_apply_PEP); } } -void SlepcPEPSolver::GetResidual(PetscScalar eig, const petsc::PetscParVector &v, - petsc::PetscParVector &r) const +void SlepcPEPSolver::SetBMat(const Operator &B) { - // r = P(λ) v = (K + λ C + λ² M) v for eigenvalue λ. - opM->Mult(v, r); - r.Scale(eig); - opC->MultAdd(v, r); - r.Scale(eig); - opK->MultAdd(v, r); + SlepcEigenvalueSolver::SetBMat(B); + + PetscInt n = B.Height(); + PalacePetscCall( + MatCreateShell(GetComm(), n, n, PETSC_DECIDE, PETSC_DECIDE, (void *)this, &B0)); + PalacePetscCall(MatShellSetOperation(B0, MATOP_MULT, (void (*)(void))__mat_apply_PEP_B)); + + BV bv = GetBV(); + PalacePetscCall(BVSetMatrix(bv, B0, PETSC_FALSE)); } -PetscReal SlepcPEPSolver::GetBackwardScaling(PetscScalar eig) const +PetscReal SlepcPEPSolver::GetResidualNorm(int i) const +{ + // Compute the i-th eigenpair residual: || P(λ) x ||₂ = || (K + λ C + λ² M) x ||₂ for + // eigenvalue λ. + PetscScalar l = GetEigenvalue(i); + GetEigenvector(i, x); + opK->Mult(x, y); + opC->AddMult(x, y, l); + opM->AddMult(x, y, l * l); + return linalg::Norml2(GetComm(), y); +} + +PetscReal SlepcPEPSolver::GetBackwardScaling(PetscScalar l) const { // Make sure not to use norms from scaling as this can be confusing if they are different. // Note that SLEPc typically uses ||.||∞, not Frobenius. if (normK <= 0.0) { - normK = opK->NormInf(); + normK = linalg::SpectralNorm(GetComm(), *opK, opK->IsReal()); } if (normC <= 0.0) { - normC = opC->NormInf(); + normC = linalg::SpectralNorm(GetComm(), *opC, opC->IsReal()); } if (normM <= 0.0) { - normM = opM->NormInf(); + normM = linalg::SpectralNorm(GetComm(), *opM, opM->IsReal()); } - PetscReal t = PetscAbsScalar(eig); + PetscReal t = PetscAbsScalar(l); return normK + t * normC + t * t * normM; } } // namespace palace::slepc -PetscErrorCode __mat_apply_EPS_A(Mat A, Vec x, Vec y) +PetscErrorCode __mat_apply_EPS_A0(Mat A, Vec x, Vec y) { - // Apply the operator: K (no transform) or M . - palace::slepc::SlepcEPSSolver *slepc; - palace::petsc::PetscParVector xx(x, true), yy(y, true); PetscFunctionBeginUser; + palace::slepc::SlepcEPSSolver *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->opK->Mult(ctx->x, ctx->y); + ctx->y *= ctx->delta; + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); + + PetscFunctionReturn(0); +} + +PetscErrorCode __mat_apply_EPS_A1(Mat A, Vec x, Vec y) +{ + PetscFunctionBeginUser; + palace::slepc::SlepcEPSSolver *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->opM->Mult(ctx->x, ctx->y); + ctx->y *= ctx->delta * ctx->gamma; + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); - PetscCall(MatShellGetContext(A, (void **)&slepc)); - MFEM_VERIFY(slepc, "Invalid PETSc shell matrix context for SLEPc!"); - { - slepc->GetOpK()->Mult(xx, yy); - yy.Scale(slepc->GetScalingDelta()); - } PetscFunctionReturn(0); } PetscErrorCode __mat_apply_EPS_B(Mat A, Vec x, Vec y) { - // Apply the operator: M (no transform) or (K - σ M) . - palace::slepc::SlepcEPSSolver *slepc; - palace::petsc::PetscParVector xx(x, true), yy(y, true); PetscFunctionBeginUser; + palace::slepc::SlepcEPSSolver *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->opB->Mult(ctx->x.Real(), ctx->y.Real()); + ctx->opB->Mult(ctx->x.Imag(), ctx->y.Imag()); + ctx->y *= ctx->delta * ctx->gamma; + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); - PetscCall(MatShellGetContext(A, (void **)&slepc)); - MFEM_VERIFY(slepc, "Invalid PETSc shell matrix context for SLEPc!"); - { - slepc->GetOpM()->Mult(xx, yy); - yy.Scale(slepc->GetScalingDelta() * slepc->GetScalingGamma()); - } PetscFunctionReturn(0); } PetscErrorCode __pc_apply_EPS(PC pc, Vec x, Vec y) { - // Solve the linear system associated with the generalized eigenvalue problem: y = M⁻¹ x , - // or shift-and-invert spectral transformation: y =(K - σ M)⁻¹ x . Enforces the + // Solve the linear system associated with the generalized eigenvalue problem: y = + // M⁻¹ x, or shift-and-invert spectral transformation: y = (K - σ M)⁻¹ x . Enforces the // divergence-free constraint using the supplied projector. - palace::slepc::SlepcEPSSolver *slepc; - palace::petsc::PetscParVector xx(x, true), yy(y, true); PetscFunctionBeginUser; + palace::slepc::SlepcEPSSolver *ctx; + PetscCall(PCShellGetContext(pc, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell PC context for SLEPc!"); - PetscCall(PCShellGetContext(pc, (void **)&slepc)); - MFEM_VERIFY(slepc, "Invalid PETSc shell PC context for SLEPc!"); - slepc->GetKspSolver()->Mult(xx, yy); - if (!slepc->IsShiftInvert()) + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->opInv->Mult(ctx->x, ctx->y); + if (!ctx->sinvert) { - yy.Scale(1.0 / (slepc->GetScalingDelta() * slepc->GetScalingGamma())); + ctx->y *= 1.0 / (ctx->delta * ctx->gamma); } else { - yy.Scale(1.0 / slepc->GetScalingDelta()); + ctx->y *= 1.0 / ctx->delta; } - - // Debug - // Mpi::Print(" Before projection: {:e}\n", yy.Norml2()); - - if (slepc->GetDivFreeSolver()) + if (ctx->opProj) { - slepc->GetDivFreeSolver()->Mult(yy); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y)); + ctx->opProj->Mult(ctx->y); + // Mpi::Print(" After projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y)); } - // Debug - // Mpi::Print(" After projection: {:e}\n", yy.Norml2()); + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); PetscFunctionReturn(0); } PetscErrorCode __mat_apply_PEPLinear_L0(Mat A, Vec x, Vec y) { - // Apply the linearized operator: L₀ (no transform) or L₁ . With: - // L₀ = [ 0 I ] L₁ = [ I 0 ] - // [ -K -C ] , [ 0 M ] . - palace::slepc::SlepcPEPLinearSolver *slepc; - palace::petsc::PetscParVector xx(x, true), yy(y, true); + // Apply the linearized operator L₀ = [ 0 I ] + // [ -K -C ] . + PetscFunctionBeginUser; + palace::slepc::SlepcPEPLinearSolver *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x1.Set(px, n / 2); + ctx->x2.Set(px + n / 2, n / 2); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->y1 = ctx->x2; + ctx->opC->Mult(ctx->x2, ctx->y2); + ctx->y2 *= ctx->gamma; + ctx->opK->AddMult(ctx->x1, ctx->y2, std::complex(1.0, 0.0)); + ctx->y2 *= -ctx->delta; + PetscScalar *py; - PetscFunctionBeginUser; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y1.Get(py, n / 2); + ctx->y2.Get(py + n / 2, n / 2); + PetscCall(VecRestoreArrayWrite(y, &py)); - PetscCall(MatShellGetContext(A, (void **)&slepc)); - MFEM_VERIFY(slepc, "Invalid PETSc shell matrix context for SLEPc!"); - px = slepc->GetBlocksRead(xx, *slepc->GetX1(), *slepc->GetX2()); - py = slepc->GetBlocks(yy, *slepc->GetY1(), *slepc->GetY2()); - { - slepc->GetY1()->Copy(*slepc->GetX2()); - slepc->GetOpC()->Mult(*slepc->GetX2(), *slepc->GetY2()); - slepc->GetY2()->Scale(slepc->GetScalingGamma()); - slepc->GetOpK()->MultAdd(*slepc->GetX1(), *slepc->GetY2()); - slepc->GetY2()->Scale(-slepc->GetScalingDelta()); - } - slepc->RestoreBlocksRead(px, xx, *slepc->GetX1(), *slepc->GetX2()); - slepc->RestoreBlocks(py, yy, *slepc->GetY1(), *slepc->GetY2()); PetscFunctionReturn(0); } PetscErrorCode __mat_apply_PEPLinear_L1(Mat A, Vec x, Vec y) { - // Apply the linearized operator: L₁ (no transform) or (L₀ - σ L₁) . With: - // L₀ = [ 0 I ] L₁ = [ I 0 ] - // [ -K -C ] , [ 0 M ] . - palace::slepc::SlepcPEPLinearSolver *slepc; - palace::petsc::PetscParVector xx(x, true), yy(y, true); + // Apply the linearized operator L₁ = [ I 0 ] + // [ 0 M ] . + PetscFunctionBeginUser; + palace::slepc::SlepcPEPLinearSolver *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x1.Set(px, n / 2); + ctx->x2.Set(px + n / 2, n / 2); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->y1 = ctx->x1; + ctx->opM->Mult(ctx->x2, ctx->y2); + ctx->y2 *= ctx->delta * ctx->gamma * ctx->gamma; + PetscScalar *py; - PetscFunctionBeginUser; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y1.Get(py, n / 2); + ctx->y2.Get(py + n / 2, n / 2); + PetscCall(VecRestoreArrayWrite(y, &py)); - PetscCall(MatShellGetContext(A, (void **)&slepc)); - MFEM_VERIFY(slepc, "Invalid PETSc shell matrix context for SLEPc!"); - px = slepc->GetBlocksRead(xx, *slepc->GetX1(), *slepc->GetX2()); - py = slepc->GetBlocks(yy, *slepc->GetY1(), *slepc->GetY2()); - { - slepc->GetY1()->Copy(*slepc->GetX1()); - slepc->GetOpM()->Mult(*slepc->GetX2(), *slepc->GetY2()); - slepc->GetY2()->Scale(slepc->GetScalingDelta() * slepc->GetScalingGamma() * - slepc->GetScalingGamma()); - } - slepc->RestoreBlocksRead(px, xx, *slepc->GetX1(), *slepc->GetX2()); - slepc->RestoreBlocks(py, yy, *slepc->GetY1(), *slepc->GetY2()); PetscFunctionReturn(0); } PetscErrorCode __mat_apply_PEPLinear_B(Mat A, Vec x, Vec y) { - // Apply the linearized mass matrix L₁ using the supplied SPD mass matrix. - palace::slepc::SlepcPEPLinearSolver *slepc; - palace::petsc::PetscParVector xx(x, true), yy(y, true); + PetscFunctionBeginUser; + palace::slepc::SlepcPEPLinearSolver *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x1.Set(px, n / 2); + ctx->x2.Set(px + n / 2, n / 2); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->opB->Mult(ctx->x1.Real(), ctx->y1.Real()); + ctx->opB->Mult(ctx->x1.Imag(), ctx->y1.Imag()); + ctx->opB->Mult(ctx->x2.Real(), ctx->y2.Real()); + ctx->opB->Mult(ctx->x2.Imag(), ctx->y2.Imag()); + ctx->y1 *= ctx->delta * ctx->gamma * ctx->gamma; + ctx->y2 *= ctx->delta * ctx->gamma * ctx->gamma; + PetscScalar *py; - PetscFunctionBeginUser; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y1.Get(py, n / 2); + ctx->y2.Get(py + n / 2, n / 2); + PetscCall(VecRestoreArrayWrite(y, &py)); - PetscCall(MatShellGetContext(A, (void **)&slepc)); - MFEM_VERIFY(slepc, "Invalid PETSc shell matrix context for SLEPc!"); - px = slepc->GetBlocksRead(xx, *slepc->GetX1(), *slepc->GetX2()); - py = slepc->GetBlocks(yy, *slepc->GetY1(), *slepc->GetY2()); - { - slepc->GetY1()->Copy(*slepc->GetX1()); - slepc->GetOpB()->Mult(*slepc->GetX2(), *slepc->GetY2()); - slepc->GetY2()->Scale(slepc->GetScalingDelta() * slepc->GetScalingGamma() * - slepc->GetScalingGamma()); - } - slepc->RestoreBlocksRead(px, xx, *slepc->GetX1(), *slepc->GetX2()); - slepc->RestoreBlocks(py, yy, *slepc->GetY1(), *slepc->GetY2()); PetscFunctionReturn(0); } PetscErrorCode __pc_apply_PEPLinear(PC pc, Vec x, Vec y) { // Solve the linear system associated with the generalized eigenvalue problem after - // linearization: y = L₁⁻¹ x , or with the shift-and-invert spectral transformation: y = - // (L₀ - σ L₁)⁻¹ x . Enforces the divergence-free constraint using the supplied - // projectors. - palace::slepc::SlepcPEPLinearSolver *slepc; - palace::petsc::PetscParVector xx(x, true), yy(y, true); - const PetscScalar *px; - PetscScalar *py; - PetscScalar sigma; - PetscReal gamma, delta; + // linearization: y = L₁⁻¹ x, or with the shift-and-invert spectral transformation: + // y = (L₀ - σ L₁)⁻¹ x, with: + // L₀ = [ 0 I ] L₁ = [ I 0 ] + // [ -K -C ] , [ 0 M ] . + // Enforces the divergence-free constraint using the supplied projector. PetscFunctionBeginUser; + palace::slepc::SlepcPEPLinearSolver *ctx; + PetscCall(PCShellGetContext(pc, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell PC context for SLEPc!"); - PetscCall(PCShellGetContext(pc, (void **)&slepc)); - MFEM_VERIFY(slepc, "Invalid PETSc shell PC context for SLEPc!"); - sigma = slepc->GetTarget(); - gamma = slepc->GetScalingGamma(); - delta = slepc->GetScalingDelta(); - px = slepc->GetBlocksRead(xx, *slepc->GetX1(), *slepc->GetX2()); - py = slepc->GetBlocks(yy, *slepc->GetY1(), *slepc->GetY2()); - if (!slepc->IsShiftInvert()) + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x1.Set(px, n / 2); + ctx->x2.Set(px + n / 2, n / 2); + PetscCall(VecRestoreArrayRead(x, &px)); + + if (!ctx->sinvert) { - slepc->GetKspSolver()->Mult(*slepc->GetX2(), *slepc->GetY2()); - slepc->GetY2()->Scale(1.0 / (delta * gamma * gamma)); - if (slepc->GetDivFreeSolver()) + ctx->y1 = ctx->x1; + if (ctx->opProj) { - slepc->GetDivFreeSolver()->Mult(*slepc->GetY2()); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y1)); + ctx->opProj->Mult(ctx->y1); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y1)); } - slepc->GetY1()->Copy(*slepc->GetX1()); - if (slepc->GetDivFreeSolver()) + + ctx->opInv->Mult(ctx->x2, ctx->y2); + ctx->y2 *= 1.0 / (ctx->delta * ctx->gamma * ctx->gamma); + if (ctx->opProj) { - slepc->GetDivFreeSolver()->Mult(*slepc->GetY1()); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y2)); + ctx->opProj->Mult(ctx->y2); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y2)); } } else { - slepc->GetY1()->AXPBY(-sigma / (delta * gamma), *slepc->GetX2(), 0.0); // Temporarily - slepc->GetOpK()->MultAdd(*slepc->GetX1(), *slepc->GetY1()); - slepc->GetKspSolver()->Mult(*slepc->GetY1(), *slepc->GetY2()); - - // Debug - // Mpi::Print(" Before projection: {:e}\n", slepc->GetY2()->Norml2()); - - if (slepc->GetDivFreeSolver()) + ctx->y1.AXPBY(-ctx->sigma / (ctx->delta * ctx->gamma), ctx->x2, 0.0); // Temporarily + ctx->opK->AddMult(ctx->x1, ctx->y1, std::complex(1.0, 0.0)); + ctx->opInv->Mult(ctx->y1, ctx->y2); + if (ctx->opProj) { - slepc->GetDivFreeSolver()->Mult(*slepc->GetY2()); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y2)); + ctx->opProj->Mult(ctx->y2); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y2)); } - // Debug - // Mpi::Print(" After projection: {:e}\n", slepc->GetY2()->Norml2()); - - slepc->GetY1()->AXPBYPCZ(gamma / sigma, *slepc->GetY2(), -gamma / sigma, - *slepc->GetX1(), 0.0); - - // Debug - // Mpi::Print(" Before projection: {:e}\n", slepc->GetY1()->Norml2()); - - if (slepc->GetDivFreeSolver()) + ctx->y1.AXPBYPCZ(ctx->gamma / ctx->sigma, ctx->y2, -ctx->gamma / ctx->sigma, ctx->x1, + 0.0); + if (ctx->opProj) { - slepc->GetDivFreeSolver()->Mult(*slepc->GetY1()); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y1)); + ctx->opProj->Mult(ctx->y1); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y1)); } - - // Debug - // Mpi::Print(" After projection: {:e}\n", slepc->GetY1()->Norml2()); } - slepc->RestoreBlocksRead(px, xx, *slepc->GetX1(), *slepc->GetX2()); - slepc->RestoreBlocks(py, yy, *slepc->GetY1(), *slepc->GetY2()); + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y1.Get(py, n / 2); + ctx->y2.Get(py + n / 2, n / 2); + PetscCall(VecRestoreArrayWrite(y, &py)); + PetscFunctionReturn(0); } PetscErrorCode __mat_apply_PEP_A0(Mat A, Vec x, Vec y) { - // Apply the operator: K (no transform) or M . - palace::slepc::SlepcPEPSolver *slepc; - palace::petsc::PetscParVector xx(x, true), yy(y, true); PetscFunctionBeginUser; + palace::slepc::SlepcPEPSolver *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->opK->Mult(ctx->x, ctx->y); + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); - PetscCall(MatShellGetContext(A, (void **)&slepc)); - MFEM_VERIFY(slepc, "Invalid PETSc shell matrix context for SLEPc!"); - { - slepc->GetOpK()->Mult(xx, yy); - yy.Scale(slepc->GetScalingDelta()); - } PetscFunctionReturn(0); } PetscErrorCode __mat_apply_PEP_A1(Mat A, Vec x, Vec y) { - // Apply the operator: C (no transform) or (C + 2σ M) . - palace::slepc::SlepcPEPSolver *slepc; - palace::petsc::PetscParVector xx(x, true), yy(y, true); PetscFunctionBeginUser; + palace::slepc::SlepcPEPSolver *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->opC->Mult(ctx->x, ctx->y); + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); - PetscCall(MatShellGetContext(A, (void **)&slepc)); - MFEM_VERIFY(slepc, "Invalid PETSc shell matrix context for SLEPc!"); - { - slepc->GetOpC()->Mult(xx, yy); - yy.Scale(slepc->GetScalingDelta() * slepc->GetScalingGamma()); - } PetscFunctionReturn(0); } PetscErrorCode __mat_apply_PEP_A2(Mat A, Vec x, Vec y) { - // Apply the operator: M (no transform) or (K + σ C + σ² M) . - palace::slepc::SlepcPEPSolver *slepc; - palace::petsc::PetscParVector xx(x, true), yy(y, true); PetscFunctionBeginUser; + palace::slepc::SlepcPEPSolver *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->opM->Mult(ctx->x, ctx->y); + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); + + PetscFunctionReturn(0); +} + +PetscErrorCode __mat_apply_PEP_B(Mat A, Vec x, Vec y) +{ + PetscFunctionBeginUser; + palace::slepc::SlepcPEPSolver *ctx; + PetscCall(MatShellGetContext(A, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell matrix context for SLEPc!"); + + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); + + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->opB->Mult(ctx->x.Real(), ctx->y.Real()); + ctx->opB->Mult(ctx->x.Imag(), ctx->y.Imag()); + ctx->y *= ctx->delta * ctx->gamma; + + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); - PetscCall(MatShellGetContext(A, (void **)&slepc)); - MFEM_VERIFY(slepc, "Invalid PETSc shell matrix context for SLEPc!"); - { - slepc->GetOpM()->Mult(xx, yy); - yy.Scale(slepc->GetScalingDelta() * slepc->GetScalingGamma() * - slepc->GetScalingGamma()); - } PetscFunctionReturn(0); } PetscErrorCode __pc_apply_PEP(PC pc, Vec x, Vec y) { - // Solve the linear system associated with the generalized eigenvalue problem: y = M⁻¹ x , - // or shift-and-invert spectral transformation: y = P(σ)⁻¹ x . Enforces the - // divergence-free constraint using the supplied projector. - palace::slepc::SlepcPEPSolver *slepc; - palace::petsc::PetscParVector xx(x, true), yy(y, true); + // Solve the linear system associated with the generalized eigenvalue problem: y = M⁻¹ x, + // or shift-and-invert spectral transformation: y = P(σ)⁻¹ x . Enforces the divergence- + // free constraint using the supplied projector. + PetscFunctionBeginUser; + palace::slepc::SlepcPEPSolver *ctx; + PetscCall(PCShellGetContext(pc, (void **)&ctx)); + MFEM_VERIFY(ctx, "Invalid PETSc shell PC context for SLEPc!"); + PetscFunctionBeginUser; + PetscInt n; + PetscCall(VecGetLocalSize(x, &n)); - PetscCall(PCShellGetContext(pc, (void **)&slepc)); - MFEM_VERIFY(slepc, "Invalid PETSc shell PC context for SLEPc!"); - slepc->GetKspSolver()->Mult(xx, yy); - if (!slepc->IsShiftInvert()) + const PetscScalar *px; + PetscCall(VecGetArrayRead(x, &px)); + ctx->x.Set(px, n); + PetscCall(VecRestoreArrayRead(x, &px)); + + ctx->opInv->Mult(ctx->x, ctx->y); + if (!ctx->sinvert) { - yy.Scale(1.0 / (slepc->GetScalingDelta() * slepc->GetScalingGamma() * - slepc->GetScalingGamma())); + ctx->y *= 1.0 / (ctx->delta * ctx->gamma * ctx->gamma); } else { - yy.Scale(1.0 / slepc->GetScalingDelta()); + ctx->y *= 1.0 / ctx->delta; } - - // Debug - // Mpi::Print(" Before projection: {:e}\n", yy); - - if (slepc->GetDivFreeSolver()) + if (ctx->opProj) { - slepc->GetDivFreeSolver()->Mult(yy); + // Mpi::Print(" Before projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y)); + ctx->opProj->Mult(ctx->y); + // Mpi::Print(" After projection: {:e}\n", linalg::Norml2(ctx->GetComm(), ctx->y)); } - // Debug - // Mpi::Print(" After projection: {:e}\n", yy); + PetscScalar *py; + PetscCall(VecGetArrayWrite(y, &py)); + ctx->y.Get(py, n); + PetscCall(VecRestoreArrayWrite(y, &py)); PetscFunctionReturn(0); } diff --git a/palace/linalg/slepc.hpp b/palace/linalg/slepc.hpp index ba266662b..a6f3a2ba6 100644 --- a/palace/linalg/slepc.hpp +++ b/palace/linalg/slepc.hpp @@ -9,11 +9,16 @@ #include "linalg/petsc.hpp" #if !defined(PETSC_USE_COMPLEX) -#error "SLEPc interface requires PETSc built with complex scalars!" +#error "SLEPc interface requires PETSc compiled with complex scalars!" #endif +#include #include -#include "linalg/eigen.hpp" +#include +#include "linalg/eps.hpp" +#include "linalg/ksp.hpp" +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" // Forward declarations of SLEPc objects. typedef struct _p_EPS *EPS; @@ -26,15 +31,6 @@ namespace palace { class DivFreeSolver; -class KspSolver; - -namespace petsc -{ - -class PetscParMatrix; -class PetscParVector; - -} // namespace petsc namespace slepc { @@ -45,15 +41,16 @@ void Initialize(); void Finalize(); // Compute and return the maximum singular value of the given operator, σₙ² = λₙ(Aᴴ A) . -PetscReal GetMaxSingularValue(const petsc::PetscParMatrix &A, PetscReal tol = PETSC_DEFAULT, - PetscInt maxits = PETSC_DEFAULT); +PetscReal GetMaxSingularValue(MPI_Comm comm, const ComplexOperator &A, bool herm = false, + PetscReal tol = PETSC_DEFAULT, + PetscInt max_it = PETSC_DEFAULT); // // A wrapper for the SLEPc library for generalized linear eigenvalue problems or quadratic // polynomial eigenvalue problems. Shift-and-invert spectral transformations can be used to // compute interior eigenvalues. // -class SlepcEigenSolver : public EigenSolverBase +class SlepcEigenvalueSolver : public EigenvalueSolver { public: enum class ProblemType @@ -79,9 +76,6 @@ class SlepcEigenSolver : public EigenSolverBase }; protected: - // Boolean to handle SetFromOptions calls. - mutable bool clcustom; - // Control print level for debugging. int print; @@ -93,92 +87,67 @@ class SlepcEigenSolver : public EigenSolverBase bool sinvert, region; // Storage for computed residual norms. - mutable PetscReal *res; - - // Workspace vectors for initial space and residual calculations. - mutable petsc::PetscParVector *v0, *r0; + std::unique_ptr res; // Reference to linear solver used for operator action for M⁻¹ (with no spectral // transformation) or (K - σ M)⁻¹ (generalized EVP with shift-and- invert) or P(σ)⁻¹ // (polynomial with shift-and-invert) (not owned). - const KspSolver *opInv; + const ComplexKspSolver *opInv; // Reference to solver for projecting an intermediate vector onto a divergence-free space // (not owned). const DivFreeSolver *opProj; - // Customize object with command line options set. - virtual void Customize(); + // Reference to matrix used for weighted inner products (not owned). May be nullptr, in + // which case identity is used. + const Operator *opB; - // Configure KSP object associated with the spectral transformation. - void SetPCShell(void *ctx, PetscErrorCode (*__pc_apply)(PC, Vec, Vec)); + // Workspace objects for eigenvalue calculations. + Mat B0; + Vec v0; - // Specify rectangular region of the complex plane, bounded by[rminr, rmaxr] x - // [rmini, rmaxi] in which to constrain eigenvalue search. - void SetRegion(PetscReal rminr, PetscReal rmaxr, PetscReal rmini, PetscReal rmaxi, - bool complement = false); + // Boolean to handle SetFromOptions calls. + mutable bool cl_custom; - // Perform the back-transformation from the spectrally transformed eigenvalue back to the - // original problem. - void GetBackTransform(PetscScalar eig, PetscReal &eigr, PetscReal &eigi) const; + // Customize object with command line options set. + virtual void Customize(); // Helper routine for computing the eigenpair residual. - virtual void GetResidual(PetscScalar eig, const petsc::PetscParVector &v, - petsc::PetscParVector &r) const = 0; + virtual PetscReal GetResidualNorm(int i) const = 0; // Helper routine for computing the backward error. - virtual PetscReal GetBackwardScaling(PetscScalar eig) const = 0; + virtual PetscReal GetBackwardScaling(PetscScalar l) const = 0; public: - SlepcEigenSolver(int print_lvl); - ~SlepcEigenSolver() override; + SlepcEigenvalueSolver(int print); + ~SlepcEigenvalueSolver() override; // Set operators for the generalized eigenvalue problem or for the quadratic polynomial // eigenvalue problem. - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &M, + void SetOperators(const ComplexOperator &K, const ComplexOperator &M, ScaleType type) override; - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, ScaleType type) override; + void SetOperators(const ComplexOperator &K, const ComplexOperator &C, + const ComplexOperator &M, ScaleType type) override; // For the linear generalized case, the linear solver should be configured to compute the // action of M⁻¹ (with no spectral transformation) or (K - σ M)⁻¹. For the quadratic // case, the linear solver should be configured to compute the action of M⁻¹ (with no // spectral transformation) or P(σ)⁻¹. - void SetLinearSolver(const KspSolver &ksp) override; + void SetLinearSolver(const ComplexKspSolver &ksp) override; - // Set the projection operator or operators for the divergence-free constraint. - void SetProjector(const DivFreeSolver &divfree) override; + // Set the projection operator for enforcing the divergence-free constraint. + void SetDivFreeProjector(const DivFreeSolver &divfree) override; // Set optional B matrix used for weighted inner products. This must be set explicitly // even for generalized problems, otherwise the identity will be used. - void SetBMat(const petsc::PetscParMatrix &B) override; - - // Get spectral transformation target used by the solver. - bool IsShiftInvert() const { return sinvert; } - PetscScalar GetTarget() const { return sigma; } + void SetBMat(const Operator &B) override; // Get scaling factors used by the solver. - double GetScalingGamma() const override { return (double)gamma; } - double GetScalingDelta() const override { return (double)delta; } + PetscReal GetScalingGamma() const override { return gamma; } + PetscReal GetScalingDelta() const override { return delta; } // Set shift-and-invert spectral transformation. - void SetShiftInvert(double tr, double ti, bool precond = false) override; - - // Configure the basis vectors object associated with the eigenvalue solver. - void SetOrthogonalization(bool mgs, bool cgs2); - - // Set the number of required eigenmodes. - void SetNumModes(int numeig, int numvec = 0) override = 0; - - // Set solver tolerance. - void SetTol(double tol) override = 0; - - // Set maximum number of iterations. - void SetMaxIter(int maxits) override = 0; - - // Set target spectrum for the eigensolver. When a spectral transformation is used, this - // applies to the spectrum of the shifted operator. - void SetWhichEigenpairs(WhichType type) override = 0; + void SetShiftInvert(PetscScalar s, bool precond = false) override; // Set problem type. virtual void SetProblemType(ProblemType type) = 0; @@ -186,20 +155,11 @@ class SlepcEigenSolver : public EigenSolverBase // Set eigenvalue solver. virtual void SetType(Type type) = 0; - // Set an initial vector for the solution subspace. - void SetInitialSpace(const petsc::PetscParVector &v) override = 0; - - // Solve the eigenvalue problem. Returns the number of converged eigenvalues. - int Solve() override = 0; - - // Get the corresponding eigenvalue. - void GetEigenvalue(int i, double &eigr, double &eigi) const override = 0; - - // Get the corresponding eigenvector. - void GetEigenvector(int i, petsc::PetscParVector &v) const override = 0; + // Configure the basis vectors object associated with the eigenvalue solver. + void SetOrthogonalization(bool mgs, bool cgs2); // Get the corresponding eigenpair error. - void GetError(int i, ErrorType type, double &err) const override; + PetscReal GetError(int i, ErrorType type) const override; // Get the basis vectors object. virtual BV GetBV() const = 0; @@ -215,285 +175,233 @@ class SlepcEigenSolver : public EigenSolverBase // Conversion function to PetscObject. virtual operator PetscObject() const = 0; - - // Access solver object for callback functions. - const KspSolver *GetKspSolver() const { return opInv; } - - // Access solver object for callback functions. - const DivFreeSolver *GetDivFreeSolver() const { return opProj; } }; // Base class for SLEPc's EPS problem type. -class SlepcEPSSolverBase : public SlepcEigenSolver +class SlepcEPSSolverBase : public SlepcEigenvalueSolver { protected: // SLEPc eigensolver object. Polynomial problems are handled using linearization. EPS eps; // Shell matrices for the generalized eigenvalue problem. - petsc::PetscParMatrix *A, *B; + Mat A0, A1; - // Customize object with command line options set. void Customize() override; public: // Calls SLEPc's EPSCreate. Expects SLEPc to be initialized/finalized externally. - SlepcEPSSolverBase(MPI_Comm comm, int print_lvl, - const std::string &prefix = std::string()); + SlepcEPSSolverBase(MPI_Comm comm, int print, const std::string &prefix = std::string()); // Call's SLEPc's EPSDestroy. ~SlepcEPSSolverBase() override; - // Set the number of required eigenmodes. - void SetNumModes(int numeig, int numvec = 0) override; + // Conversion function to SLEPc's EPS type. + operator EPS() const { return eps; } + + void SetNumModes(int num_eig, int num_vec = 0) override; - // Set solver tolerance. - void SetTol(double tol) override; + void SetTol(PetscReal tol) override; - // Set maximum number of iterations. - void SetMaxIter(int maxits) override; + void SetMaxIter(int max_it) override; - // Set target spectrum for the eigensolver. When a spectral transformation is used, this - // applies to the spectrum of the shifted operator. void SetWhichEigenpairs(WhichType type) override; - // Set problem type. void SetProblemType(ProblemType type) override; - // Set eigenvalue solver. void SetType(Type type) override; - // Set an initial vector for the solution subspace. - void SetInitialSpace(const petsc::PetscParVector &v) override; + void SetInitialSpace(const ComplexVector &v) override; - // Solve the eigenvalue problem. Returns the number of converged eigenvalues. int Solve() override; - // Get the corresponding eigenvalue. - void GetEigenvalue(int i, double &eigr, double &eigi) const override; + PetscScalar GetEigenvalue(int i) const override; - // Get the corresponding eigenvector. - void GetEigenvector(int i, petsc::PetscParVector &v) const override; + void GetEigenvector(int i, ComplexVector &x) const override; - // Get the basis vectors object. BV GetBV() const override; - // Get the spectral transformation object. ST GetST() const override; - // Get the filtering region object. RG GetRG() const override; - // Get the associated MPI communicator. - MPI_Comm GetComm() const override; - - // Conversion function to SLEPc's EPS type. - operator EPS() const { return eps; } + MPI_Comm GetComm() const override + { + return eps ? PetscObjectComm(reinterpret_cast(eps)) : MPI_COMM_NULL; + } - // Conversion function to PetscObject. operator PetscObject() const override { return reinterpret_cast(eps); }; }; // Generalized eigenvalue problem solver: K x = λ M x . class SlepcEPSSolver : public SlepcEPSSolverBase { -private: +public: + using SlepcEigenvalueSolver::delta; + using SlepcEigenvalueSolver::gamma; + using SlepcEigenvalueSolver::opB; + using SlepcEigenvalueSolver::opInv; + using SlepcEigenvalueSolver::opProj; + using SlepcEigenvalueSolver::sigma; + using SlepcEigenvalueSolver::sinvert; + // References to matrices defining the generalized eigenvalue problem (not owned). - const petsc::PetscParMatrix *opK, *opM; + const ComplexOperator *opK, *opM; + + // Workspace vector for operator applications. + mutable ComplexVector x, y; +private: // Operator norms for scaling. mutable PetscReal normK, normM; protected: - // Helper routine for computing the eigenpair residual. - void GetResidual(PetscScalar eig, const petsc::PetscParVector &v, - petsc::PetscParVector &r) const override; + PetscReal GetResidualNorm(int i) const override; - // Helper routine for computing the backward error. - PetscReal GetBackwardScaling(PetscScalar eig) const override; + PetscReal GetBackwardScaling(PetscScalar l) const override; public: - SlepcEPSSolver(MPI_Comm comm, int print_lvl, const std::string &prefix = std::string()); + SlepcEPSSolver(MPI_Comm comm, int print, const std::string &prefix = std::string()); - // Set operators for the generalized eigenvalue problem. - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &M, + void SetOperators(const ComplexOperator &K, const ComplexOperator &M, ScaleType type) override; - // Access methods for operator application. - const petsc::PetscParMatrix *GetOpK() { return opK; } - const petsc::PetscParMatrix *GetOpM() { return opM; } + void SetBMat(const Operator &B) override; }; // Quadratic eigenvalue problem solver: P(λ) x = (K + λ C + λ² M) x = 0 , solved via // linearization: L₀ y = λ L₁ y . class SlepcPEPLinearSolver : public SlepcEPSSolverBase { -private: +public: + using SlepcEigenvalueSolver::delta; + using SlepcEigenvalueSolver::gamma; + using SlepcEigenvalueSolver::opB; + using SlepcEigenvalueSolver::opInv; + using SlepcEigenvalueSolver::opProj; + using SlepcEigenvalueSolver::sigma; + using SlepcEigenvalueSolver::sinvert; + // References to matrices defining the quadratic polynomial eigenvalue problem // (not owned). - const petsc::PetscParMatrix *opK, *opC, *opM; + const ComplexOperator *opK, *opC, *opM; + + // Workspace vectors for operator applications. + mutable ComplexVector x1, x2, y1, y2; +private: // Operator norms for scaling. mutable PetscReal normK, normC, normM; - // Shell matrix used for weighted inner products. May be nullptr, in which case identity - // is used. Also a reference to the original passed in matrix. - petsc::PetscParMatrix *B0; - const petsc::PetscParMatrix *opB; - - // Workspace vectors for operator applications. - mutable petsc::PetscParVector *x1, *x2, *y1, *y2, *z; - protected: - // Helper routine for computing the eigenpair residual. - void GetResidual(PetscScalar eig, const petsc::PetscParVector &v, - petsc::PetscParVector &r) const override; + PetscReal GetResidualNorm(int i) const override; - // Helper routine for computing the backward error. - PetscReal GetBackwardScaling(PetscScalar eig) const override; + PetscReal GetBackwardScaling(PetscScalar l) const override; public: - SlepcPEPLinearSolver(MPI_Comm comm, int print_lvl, - const std::string &prefix = std::string()); - ~SlepcPEPLinearSolver() override; + SlepcPEPLinearSolver(MPI_Comm comm, int print, const std::string &prefix = std::string()); - // Set operators for the quadratic polynomial eigenvalue problem. - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, ScaleType type) override; + void SetOperators(const ComplexOperator &K, const ComplexOperator &C, + const ComplexOperator &M, ScaleType type) override; - // Configure the basis vectors object associated with the eigenvalue solver. - void SetBMat(const petsc::PetscParMatrix &B) override; - - // Set an initial vector for the solution subspace. - void SetInitialSpace(const petsc::PetscParVector &v) override; - - // Get the corresponding eigenvector. - void GetEigenvector(int i, petsc::PetscParVector &v) const override; - - // Helper methods for splitting a block vector from the linearized problem into its into - // two parts. - PetscScalar *GetBlocks(petsc::PetscParVector &v, petsc::PetscParVector &v1, - petsc::PetscParVector &v2) const; - const PetscScalar *GetBlocksRead(const petsc::PetscParVector &v, - petsc::PetscParVector &v1, - petsc::PetscParVector &v2) const; - void RestoreBlocks(PetscScalar *pv, petsc::PetscParVector &v, petsc::PetscParVector &v1, - petsc::PetscParVector &v2) const; - void RestoreBlocksRead(const PetscScalar *pv, const petsc::PetscParVector &v, - petsc::PetscParVector &v1, petsc::PetscParVector &v2) const; - - // Access methods for operator application. - const petsc::PetscParMatrix *GetOpK() { return opK; } - const petsc::PetscParMatrix *GetOpC() { return opC; } - const petsc::PetscParMatrix *GetOpM() { return opM; } - const petsc::PetscParMatrix *GetOpB() { return opB; } - petsc::PetscParVector *GetX1() { return x1; } - petsc::PetscParVector *GetX2() { return x2; } - petsc::PetscParVector *GetY1() { return y1; } - petsc::PetscParVector *GetY2() { return y2; } + void SetBMat(const Operator &B) override; + + void SetInitialSpace(const ComplexVector &v) override; + + void GetEigenvector(int i, ComplexVector &x) const override; }; // Base class for SLEPc's PEP problem type. -class SlepcPEPSolverBase : public SlepcEigenSolver +class SlepcPEPSolverBase : public SlepcEigenvalueSolver { protected: // SLEPc eigensolver object. PEP pep; // Shell matrices for the quadratic polynomial eigenvalue problem - petsc::PetscParMatrix *A0, *A1, *A2; + Mat A0, A1, A2; - // Customize object with command line options set. void Customize() override; public: // Calls SLEPc's PEPCreate. Expects SLEPc to be initialized/finalized externally. - SlepcPEPSolverBase(MPI_Comm comm, int print_lvl, - const std::string &prefix = std::string()); + SlepcPEPSolverBase(MPI_Comm comm, int print, const std::string &prefix = std::string()); // Call's SLEPc's PEPDestroy. ~SlepcPEPSolverBase() override; - // Set the number of required eigenmodes. - void SetNumModes(int numeig, int numvec = 0) override; + // Conversion function to SLEPc's PEP type. + operator PEP() const { return pep; } + + void SetNumModes(int num_eig, int num_vec = 0) override; - // Set solver tolerance. - void SetTol(double tol) override; + void SetTol(PetscReal tol) override; - // Set maximum number of iterations. - void SetMaxIter(int maxits) override; + void SetMaxIter(int max_it) override; - // Set target spectrum for the eigensolver. When a spectral transformation is used, this - // applies to the spectrum of the shifted operator. void SetWhichEigenpairs(WhichType type) override; - // Set problem type. void SetProblemType(ProblemType type) override; - // Set eigenvalue solver. void SetType(Type type) override; - // Set an initial vector for the solution subspace. - void SetInitialSpace(const petsc::PetscParVector &v) override; + void SetInitialSpace(const ComplexVector &v) override; - // Solve the eigenvalue problem. Returns the number of converged eigenvalues. int Solve() override; - // Get the corresponding eigenvalue. - void GetEigenvalue(int i, double &eigr, double &eigi) const override; + PetscScalar GetEigenvalue(int i) const override; - // Get the corresponding eigenvector. - void GetEigenvector(int i, petsc::PetscParVector &v) const override; + void GetEigenvector(int i, ComplexVector &x) const override; - // Get the basis vectors object. BV GetBV() const override; - // Get the spectral transformation object. ST GetST() const override; - // Get the filtering region object. RG GetRG() const override; - // Get the associated MPI communicator. - MPI_Comm GetComm() const override; - - // Conversion function to SLEPc's PEP type. - operator PEP() const { return pep; } + MPI_Comm GetComm() const override + { + return pep ? PetscObjectComm(reinterpret_cast(pep)) : MPI_COMM_NULL; + } - // Conversion function to PetscObject. operator PetscObject() const override { return reinterpret_cast(pep); }; }; // Quadratic eigenvalue problem solver: P(λ) x = (K + λ C + λ² M) x = 0 . class SlepcPEPSolver : public SlepcPEPSolverBase { -private: +public: + using SlepcEigenvalueSolver::delta; + using SlepcEigenvalueSolver::gamma; + using SlepcEigenvalueSolver::opB; + using SlepcEigenvalueSolver::opInv; + using SlepcEigenvalueSolver::opProj; + using SlepcEigenvalueSolver::sigma; + using SlepcEigenvalueSolver::sinvert; + // References to matrices defining the quadratic polynomial eigenvalue problem // (not owned). - const petsc::PetscParMatrix *opK, *opC, *opM; + const ComplexOperator *opK, *opC, *opM; + + // Workspace vector for operator applications. + mutable ComplexVector x, y; +private: // Operator norms for scaling. mutable PetscReal normK, normC, normM; protected: - // Helper routine for computing the eigenpair residual. - void GetResidual(PetscScalar eig, const petsc::PetscParVector &v, - petsc::PetscParVector &r) const override; + PetscReal GetResidualNorm(int i) const override; - // Helper routine for computing the backward error. - PetscReal GetBackwardScaling(PetscScalar eig) const override; + PetscReal GetBackwardScaling(PetscScalar l) const override; public: - SlepcPEPSolver(MPI_Comm comm, int print_lvl, const std::string &prefix = std::string()); + SlepcPEPSolver(MPI_Comm comm, int print, const std::string &prefix = std::string()); - // Set operators for the quadratic polynomial eigenvalue problem. - void SetOperators(const petsc::PetscParMatrix &K, const petsc::PetscParMatrix &C, - const petsc::PetscParMatrix &M, ScaleType type) override; + void SetOperators(const ComplexOperator &K, const ComplexOperator &C, + const ComplexOperator &M, ScaleType type) override; - // Access methods for operator application. - const petsc::PetscParMatrix *GetOpK() { return opK; } - const petsc::PetscParMatrix *GetOpC() { return opC; } - const petsc::PetscParMatrix *GetOpM() { return opM; } + void SetBMat(const Operator &B) override; }; } // namespace slepc diff --git a/palace/linalg/solver.cpp b/palace/linalg/solver.cpp new file mode 100644 index 000000000..c78a00c85 --- /dev/null +++ b/palace/linalg/solver.cpp @@ -0,0 +1,42 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#include "solver.hpp" + +namespace palace +{ + +template <> +void WrapperSolver::SetOperator(const Operator &op) +{ + pc->SetOperator(op); +} + +template <> +void WrapperSolver::SetOperator(const ComplexOperator &op) +{ + MFEM_VERIFY(op.IsReal() && op.HasReal(), + "WrapperSolver::SetOperator requires an operator which is purely real for " + "mfem::Solver!"); + pc->SetOperator(*op.Real()); +} + +template <> +void WrapperSolver::Mult(const Vector &x, Vector &y) const +{ + pc->Mult(x, y); +} + +template <> +void WrapperSolver::Mult(const ComplexVector &x, ComplexVector &y) const +{ + mfem::Array X(2); + mfem::Array Y(2); + X[0] = &x.Real(); + X[1] = &x.Imag(); + Y[0] = &y.Real(); + Y[1] = &y.Imag(); + pc->ArrayMult(X, Y); +} + +} // namespace palace diff --git a/palace/linalg/solver.hpp b/palace/linalg/solver.hpp new file mode 100644 index 000000000..54f31a9a3 --- /dev/null +++ b/palace/linalg/solver.hpp @@ -0,0 +1,84 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LINALG_SOLVER_HPP +#define PALACE_LINALG_SOLVER_HPP + +#include +#include +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" + +namespace palace +{ + +// +// The base Solver class is a templated version of mfem::Solver for operation with +// real- or complex-valued operators. +// + +// Abstract base class for real-valued or complex-valued solvers. +template +class Solver +{ + static_assert(std::is_same::value || + std::is_same::value, + "Solver can only be defined for OperType = Operator or ComplexOperator!"); + +protected: + using VecType = typename std::conditional::value, + ComplexVector, Vector>::type; + + // Whether or not to use the second argument of Mult() as an initial guess. + bool initial_guess; + +public: + Solver(bool initial_guess = false) : initial_guess(initial_guess) {} + virtual ~Solver() = default; + + // Configure whether or not to use an initial guess when applying the solver. + virtual void SetInitialGuess(bool guess) { initial_guess = guess; } + + // Set the operator associated with the solver, or update it if called repeatedly. + virtual void SetOperator(const OperType &op) = 0; + + // Apply the solver. + virtual void Mult(const VecType &x, VecType &y) const = 0; + + // Apply the solver for the transpose problem. + virtual void MultTranspose(const VecType &x, VecType &y) const + { + MFEM_ABORT("MultTranspose() is not implemented for base class Solver!"); + } +}; + +// This solver wraps a real-valued mfem::Solver for application to complex-valued problems +// as a preconditioner inside of a Solver +template +class WrapperSolver : public Solver +{ + using VecType = typename Solver::VecType; + +protected: + std::unique_ptr pc; + +public: + WrapperSolver(std::unique_ptr &&pc) + : Solver(pc->iterative_mode), pc(std::move(pc)) + { + } + + void SetInitialGuess(bool guess) override + { + Solver::SetInitialGuess(guess); + pc->iterative_mode = guess; + } + + void SetOperator(const OperType &op) override; + + void Mult(const VecType &x, VecType &y) const override; +}; + +} // namespace palace + +#endif // PALACE_LINALG_SOLVER_HPP diff --git a/palace/linalg/strumpack.cpp b/palace/linalg/strumpack.cpp index d74a18bb7..9073ead71 100644 --- a/palace/linalg/strumpack.cpp +++ b/palace/linalg/strumpack.cpp @@ -5,6 +5,8 @@ #if defined(MFEM_USE_STRUMPACK) +#include "linalg/rap.hpp" + namespace palace { @@ -47,8 +49,8 @@ StrumpackSolverBase::StrumpackSolverBase( : StrumpackSolverType(comm), comm(comm) { // Configure the solver. - this->SetPrintFactorStatistics((print > 1)); - this->SetPrintSolveStatistics((print > 1)); + this->SetPrintFactorStatistics(print > 1); + this->SetPrintSolveStatistics(print > 1); this->SetKrylovSolver(strumpack::KrylovSolver::DIRECT); // Always as a preconditioner or // direct solver this->SetMatching(strumpack::MatchingJob::NONE); @@ -98,28 +100,65 @@ StrumpackSolverBase::StrumpackSolverBase( this->SetCompressionRelTol(lr_tol); break; case config::LinearSolverData::CompressionType::NONE: - default: + case config::LinearSolverData::CompressionType::INVALID: break; } } template -void StrumpackSolverBase::SetOperator(const mfem::Operator &op) +void StrumpackSolverBase::SetOperator(const Operator &op) { - // Convert the input operator to a distributed STRUMPACK matrix (always use - // symmetric sparsity pattern). Safe to delete the matrix since STRUMPACK - // copies it on input. - mfem::STRUMPACKRowLocMatrix A(op, true); + // Convert the input operator to a distributed STRUMPACK matrix (always assume a symmetric + // sparsity pattern). This is very similar to the MFEM STRUMPACKRowLocMatrix from a + // HypreParMatrix but avoids using the communicator from the Hypre matrix in the case that + // the solver is constructed on a different communicator. + const mfem::HypreParMatrix *hypA; + const auto *PtAP = dynamic_cast(&op); + if (PtAP) + { + hypA = &PtAP->ParallelAssemble(); + } + else + { + hypA = dynamic_cast(&op); + MFEM_VERIFY(hypA, "StrumpackSolver requires a HypreParMatrix operator!"); + } + auto *parcsr = (hypre_ParCSRMatrix *)const_cast(*hypA); + hypA->HostRead(); + hypre_CSRMatrix *csr = hypre_MergeDiagAndOffd(parcsr); + hypA->HypreRead(); + + // Create the STRUMPACKRowLocMatrix by taking the internal data from a hypre_CSRMatrix. + HYPRE_Int n_loc = csr->num_rows; + HYPRE_BigInt first_row = parcsr->first_row_index; + HYPRE_Int *I = csr->i; + HYPRE_BigInt *J = csr->big_j; + double *data = csr->data; - // Set up base class. + // Safe to delete the matrix since STRUMPACK copies it on input. Also clean up the Hypre + // data structure once we are done with it. +#if !defined(HYPRE_BIGINT) + mfem::STRUMPACKRowLocMatrix A(comm, n_loc, first_row, hypA->GetGlobalNumRows(), + hypA->GetGlobalNumCols(), I, J, data, true); +#else + int n_loc_int = static_cast(n_loc); + MFEM_ASSERT(n_loc == (HYPRE_Int)n_loc_int, + "Overflow error for local sparse matrix size!"); + mfem::Array II(n_loc_int + 1); + for (int i = 0; i <= n_loc_int; i++) + { + II[i] = static_cast(I[i]); + MFEM_ASSERT(I[i] == (HYPRE_Int)II[i], "Overflow error for local sparse matrix index!"); + } + mfem::STRUMPACKRowLocMatrix A(comm, n_loc_int, first_row, hypA->GetGlobalNumRows(), + hypA->GetGlobalNumCols(), II, J, data, true); +#endif StrumpackSolverType::SetOperator(A); + hypre_CSRMatrixDestroy(csr); } template class StrumpackSolverBase; -#if STRUMPACK_VERSION_MAJOR >= 6 && STRUMPACK_VERSION_MINOR >= 3 && \ - STRUMPACK_VERSION_PATCH > 1 template class StrumpackSolverBase; -#endif } // namespace palace diff --git a/palace/linalg/strumpack.hpp b/palace/linalg/strumpack.hpp index 3ffe46e1a..f1d17c979 100644 --- a/palace/linalg/strumpack.hpp +++ b/palace/linalg/strumpack.hpp @@ -8,6 +8,7 @@ #if defined(MFEM_USE_STRUMPACK) +#include "linalg/operator.hpp" #include "utils/iodata.hpp" namespace palace @@ -36,17 +37,13 @@ class StrumpackSolverBase : public StrumpackSolverType { } - // Sets matrix associated with the STRUMPACK solver. - void SetOperator(const mfem::Operator &op) override; + void SetOperator(const Operator &op) override; }; using StrumpackSolver = StrumpackSolverBase; -#if STRUMPACK_VERSION_MAJOR >= 6 && STRUMPACK_VERSION_MINOR >= 3 && \ - STRUMPACK_VERSION_PATCH > 1 using StrumpackMixedPrecisionSolver = StrumpackSolverBase; -#endif } // namespace palace diff --git a/palace/linalg/superlu.cpp b/palace/linalg/superlu.cpp index ba601c0bc..66cd59d33 100644 --- a/palace/linalg/superlu.cpp +++ b/palace/linalg/superlu.cpp @@ -5,7 +5,7 @@ #if defined(MFEM_USE_SUPERLU) -#include "linalg/petsc.hpp" +#include "linalg/rap.hpp" #include "utils/communication.hpp" namespace palace @@ -68,23 +68,68 @@ SuperLUSolver::SuperLUSolver(MPI_Comm comm, config::LinearSolverData::SymFactTyp { // Use default } - solver.SetRowPermutation(mfem::superlu::NOROWPERM); + // solver.SetRowPermutation(mfem::superlu::NOROWPERM); solver.SetIterativeRefine(mfem::superlu::NOREFINE); solver.SetSymmetricPattern(true); // Always symmetric sparsity pattern } -void SuperLUSolver::SetOperator(const mfem::Operator &op) +void SuperLUSolver::SetOperator(const Operator &op) { - // We need to save A because SuperLU does not copy the input matrix. For repeated - // factorizations, always reuse the sparsity pattern. + // For repeated factorizations, always reuse the sparsity pattern. This is very similar to + // the MFEM SuperLURowLocMatrix from a HypreParMatrix but avoids using the communicator + // from the Hypre matrix in the case that the solver is constructed on a different + // communicator. if (A) { solver.SetFact(mfem::superlu::SamePattern_SameRowPerm); } - A = std::make_unique(op); + const mfem::HypreParMatrix *hypA; + const auto *PtAP = dynamic_cast(&op); + if (PtAP) + { + hypA = &PtAP->ParallelAssemble(); + } + else + { + hypA = dynamic_cast(&op); + MFEM_VERIFY(hypA, "SuperLUSolver requires a HypreParMatrix operator!"); + } + auto *parcsr = (hypre_ParCSRMatrix *)const_cast(*hypA); + hypA->HostRead(); + hypre_CSRMatrix *csr = hypre_MergeDiagAndOffd(parcsr); + hypA->HypreRead(); - // Set up base class. + // Create the SuperLURowLocMatrix by taking the internal data from a hypre_CSRMatrix. + HYPRE_Int n_loc = csr->num_rows; + HYPRE_BigInt first_row = parcsr->first_row_index; + HYPRE_Int *I = csr->i; + HYPRE_BigInt *J = csr->big_j; + double *data = csr->data; + + // We need to save A because SuperLU does not copy the input matrix. Also clean up the + // Hypre data structure once we are done with it. +#if !defined(HYPRE_BIGINT) + A = std::make_unique(comm, n_loc, first_row, + hypA->GetGlobalNumRows(), + hypA->GetGlobalNumCols(), I, J, data); +#else + int n_loc_int = static_cast(n_loc); + MFEM_ASSERT(n_loc == (HYPRE_Int)n_loc_int, + "Overflow error for local sparse matrix size!"); + mfem::Array II(n_loc_int + 1); + for (int i = 0; i <= n_loc_int; i++) + { + II[i] = static_cast(I[i]); + MFEM_ASSERT(I[i] == (HYPRE_Int)II[i], "Overflow error for local sparse matrix index!"); + } + A = std::make_unique(comm, n_loc_int, first_row, + hypA->GetGlobalNumRows(), + hypA->GetGlobalNumCols(), II, J, data); +#endif solver.SetOperator(*A); + height = solver.Height(); + width = solver.Width(); + hypre_CSRMatrixDestroy(csr); } } // namespace palace diff --git a/palace/linalg/superlu.hpp b/palace/linalg/superlu.hpp index 1daf86631..51febe601 100644 --- a/palace/linalg/superlu.hpp +++ b/palace/linalg/superlu.hpp @@ -9,6 +9,8 @@ #if defined(MFEM_USE_SUPERLU) #include +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" #include "utils/iodata.hpp" namespace palace @@ -33,22 +35,22 @@ class SuperLUSolver : public mfem::Solver { } - // Sets matrix associated with the SuperLU solver. - void SetOperator(const mfem::Operator &op) override; + mfem::SuperLUSolver &GetSolver() { return solver; } - // Application of the solver. - void Mult(const mfem::Vector &x, mfem::Vector &y) const override { solver.Mult(x, y); } - void ArrayMult(const mfem::Array &X, - mfem::Array &Y) const override + void SetOperator(const Operator &op) override; + + void Mult(const Vector &x, Vector &y) const override { solver.Mult(x, y); } + void ArrayMult(const mfem::Array &X, + mfem::Array &Y) const override { solver.ArrayMult(X, Y); } - void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override + void MultTranspose(const Vector &x, Vector &y) const override { solver.MultTranspose(x, y); } - void ArrayMultTranspose(const mfem::Array &X, - mfem::Array &Y) const override + void ArrayMultTranspose(const mfem::Array &X, + mfem::Array &Y) const override { solver.ArrayMultTranspose(X, Y); } diff --git a/palace/linalg/vector.cpp b/palace/linalg/vector.cpp new file mode 100644 index 000000000..5f4b57f34 --- /dev/null +++ b/palace/linalg/vector.cpp @@ -0,0 +1,531 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#include "vector.hpp" + +#include +#include +#include + +namespace palace +{ + +ComplexVector::ComplexVector(int n) : x(2 * n), xr(x, 0, n), xi(x, n, n) {} + +ComplexVector::ComplexVector(const ComplexVector &y) : ComplexVector(y.Size()) +{ + Set(y); +} + +ComplexVector::ComplexVector(const Vector &yr, const Vector &yi) : ComplexVector(yr.Size()) +{ + MFEM_VERIFY(yr.Size() == yi.Size(), + "Mismatch in dimension of real and imaginary matrix parts in ComplexVector!"); + Set(yr, yi); +} + +ComplexVector::ComplexVector(const std::complex *py, int n) : ComplexVector(n) +{ + Set(py, n); +} + +void ComplexVector::SetSize(int n) +{ + x.SetSize(2 * n); + xr.MakeRef(x, 0, n); + xi.MakeRef(x, n, n); +} + +void ComplexVector::Set(const Vector &yr, const Vector &yi) +{ + MFEM_VERIFY(yr.Size() == yi.Size() && yr.Size() == Size(), + "Mismatch in dimension of real and imaginary matrix parts in ComplexVector!"); + Real() = yr; + Imag() = yi; +} + +void ComplexVector::Set(const std::complex *py, int n) +{ + MFEM_VERIFY(n == Size(), + "Mismatch in dimension for array of std::complex in ComplexVector!"); + Vector y(reinterpret_cast(const_cast *>(py)), 2 * n); + const int N = n; + const auto *Y = y.Read(); + auto *XR = Real().Write(); + auto *XI = Imag().Write(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + XR[i] = Y[2 * i]; + XI[i] = Y[2 * i + 1]; + }); +} + +void ComplexVector::Get(std::complex *py, int n) const +{ + MFEM_VERIFY(n == Size(), + "Mismatch in dimension for array of std::complex in ComplexVector!"); + Vector y(reinterpret_cast(py), 2 * n); + const int N = n; + const auto *XR = Real().Read(); + const auto *XI = Imag().Read(); + auto *Y = y.Write(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + Y[2 * i] = XR[i]; + Y[2 * i + 1] = XI[i]; + }); + y.HostReadWrite(); +} + +ComplexVector &ComplexVector::operator=(std::complex s) +{ + Real() = s.real(); + Imag() = s.imag(); + return *this; +} + +ComplexVector &ComplexVector::operator*=(std::complex s) +{ + const double sr = s.real(); + const double si = s.imag(); + if (si == 0.0) + { + Real() *= sr; + Imag() *= sr; + } + else + { + const int N = Size(); + auto *XR = Real().ReadWrite(); + auto *XI = Imag().ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const double t = si * XR[i] + sr * XI[i]; + XR[i] = sr * XR[i] - si * XI[i]; + XI[i] = t; + }); + } + return *this; +} + +void ComplexVector::Conj() +{ + Imag() *= -1.0; +} + +void ComplexVector::Abs() +{ + const int N = Size(); + auto *XR = Real().ReadWrite(); + auto *XI = Imag().ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + XR[i] = std::sqrt(XR[i] * XR[i] + XI[i] * XI[i]); + XI[i] = 0.0; + }); +} + +void ComplexVector::Reciprocal() +{ + const int N = Size(); + auto *XR = Real().ReadWrite(); + auto *XI = Imag().ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const std::complex t = 1.0 / std::complex(XR[i], XI[i]); + XR[i] = t.real(); + XI[i] = t.imag(); + }); +} + +std::complex ComplexVector::Dot(const ComplexVector &y) const +{ + return {(Real() * y.Real()) + (Imag() * y.Imag()), + (Imag() * y.Real()) - (Real() * y.Imag())}; +} + +std::complex ComplexVector::TransposeDot(const ComplexVector &y) const +{ + return {(Real() * y.Real()) - (Imag() * y.Imag()), + (Imag() * y.Real()) + (Real() * y.Imag())}; +} + +void ComplexVector::AXPY(std::complex alpha, const ComplexVector &x) +{ + const int N = Size(); + const double ar = alpha.real(); + const double ai = alpha.imag(); + const auto *XR = x.Real().Read(); + const auto *XI = x.Imag().Read(); + auto *YR = Real().ReadWrite(); + auto *YI = Imag().ReadWrite(); + if (ai == 0.0) + { + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { YR[i] += ar * XR[i]; }); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { YI[i] += ar * XI[i]; }); + } + else + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + YR[i] += ar * XR[i] - ai * XI[i]; + YI[i] += ai * XR[i] + ar * XI[i]; + }); + } +} + +void ComplexVector::AXPBY(std::complex alpha, const ComplexVector &x, + std::complex beta) +{ + const int N = Size(); + const double ar = alpha.real(); + const double ai = alpha.imag(); + const auto *XR = x.Real().Read(); + const auto *XI = x.Imag().Read(); + auto *YR = Real().ReadWrite(); + auto *YI = Imag().ReadWrite(); + if (beta == 0.0) + { + if (ai == 0.0) + { + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { YR[i] = ar * XR[i]; }); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { YI[i] = ar * XI[i]; }); + } + else + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + YR[i] = ar * XR[i] - ai * XI[i]; + YI[i] = ai * XR[i] + ar * XI[i]; + }); + } + } + else + { + const double br = beta.real(); + const double bi = beta.imag(); + if (ai == 0.0 && bi == 0.0) + { + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { YR[i] = ar * XR[i] + br * YR[i]; }); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { YI[i] = ar * XI[i] + br * YI[i]; }); + } + else + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const double t = bi * YR[i] + br * YI[i]; + YR[i] = ar * XR[i] - ai * XI[i] + br * YR[i] - bi * YI[i]; + YI[i] = ai * XR[i] + ar * XI[i] + t; + }); + } + } +} + +void ComplexVector::AXPBYPCZ(std::complex alpha, const ComplexVector &x, + std::complex beta, const ComplexVector &y, + std::complex gamma) +{ + const int N = Size(); + const double ar = alpha.real(); + const double ai = alpha.imag(); + const double br = beta.real(); + const double bi = beta.imag(); + const auto *XR = x.Real().Read(); + const auto *XI = x.Imag().Read(); + const auto *YR = y.Real().Read(); + const auto *YI = y.Imag().Read(); + auto *ZR = Real().Write(); + auto *ZI = Imag().Write(); + if (gamma == 0.0) + { + if (ai == 0.0 && bi == 0.0) + { + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { ZR[i] = ar * XR[i] + br * YR[i]; }); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) { ZI[i] = ar * XI[i] + br * YI[i]; }); + } + else + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + ZR[i] = ar * XR[i] - ai * XI[i] + br * YR[i] - bi * YI[i]; + ZI[i] = ai * XR[i] + ar * XI[i] + bi * YR[i] + br * YI[i]; + }); + } + } + else + { + const double gr = gamma.real(); + const double gi = gamma.imag(); + if (ai == 0.0 && bi == 0.0 && gi == 0.0) + { + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) + { ZR[i] = ar * XR[i] + br * YR[i] + gr * ZR[i]; }); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) + { ZI[i] = ar * XI[i] + br * YI[i] + gr * ZI[i]; }); + } + else + { + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const double t = gi * ZR[i] + gr * ZI[i]; + ZR[i] = ar * XR[i] - ai * XI[i] + br * YR[i] - bi * YI[i] + + gr * ZR[i] - gi * ZI[i]; + ZI[i] = ai * XR[i] + ar * XI[i] + bi * YR[i] + br * YI[i] + t; + }); + } + } +} + +namespace linalg +{ + +template <> +void SetRandom(MPI_Comm comm, Vector &x, int seed) +{ + if (seed == 0) + { + std::vector seeds(1); + std::seed_seq seed_gen{Mpi::Rank(comm)}; + seed_gen.generate(seeds.begin(), seeds.end()); + seed = static_cast(seeds[0]); + } + x.Randomize(seed); +} + +template <> +void SetRandomReal(MPI_Comm comm, Vector &x, int seed) +{ + SetRandom(comm, x, seed); +} + +template <> +void SetRandomSign(MPI_Comm comm, Vector &x, int seed) +{ + SetRandom(comm, x, seed); + const int N = x.Size(); + auto *X = x.ReadWrite(); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) + { X[i] = (X[i] > 0.0) ? 1.0 : ((X[i] < 0.0) ? -1.0 : 0.0); }); +} + +template <> +void SetRandom(MPI_Comm comm, ComplexVector &x, int seed) +{ + if (seed == 0) + { + std::vector seeds(2); + std::seed_seq seed_gen{2 * Mpi::Rank(comm), 2 * Mpi::Rank(comm) + 1}; + seed_gen.generate(seeds.begin(), seeds.end()); + SetRandom(comm, x.Real(), static_cast(seeds[0])); + SetRandom(comm, x.Imag(), static_cast(seeds[1])); + } + else + { + SetRandom(comm, x.Real(), seed); + SetRandom(comm, x.Imag(), seed); + } +} + +template <> +void SetRandomReal(MPI_Comm comm, ComplexVector &x, int seed) +{ + SetRandom(comm, x.Real(), seed); + x.Imag() = 0.0; +} + +template <> +void SetRandomSign(MPI_Comm comm, ComplexVector &x, int seed) +{ + SetRandom(comm, x, seed); + const int N = x.Size(); + auto *XR = x.Real().ReadWrite(); + auto *XI = x.Imag().ReadWrite(); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) + { XR[i] = (XR[i] > 0.0) ? 1.0 : ((XR[i] < 0.0) ? -1.0 : 0.0); }); + mfem::forall(N, [=] MFEM_HOST_DEVICE(int i) + { XI[i] = (XI[i] > 0.0) ? 1.0 : ((XI[i] < 0.0) ? -1.0 : 0.0); }); +} + +template <> +void SetSubVector(Vector &x, const mfem::Array &rows, double s) +{ + const int N = rows.Size(); + const double sr = s; + const auto *idx = rows.Read(); + auto *X = x.ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + X[id] = sr; + }); +} + +template <> +void SetSubVector(ComplexVector &x, const mfem::Array &rows, double s) +{ + const int N = rows.Size(); + const double sr = s; + const auto *idx = rows.Read(); + auto *XR = x.Real().ReadWrite(); + auto *XI = x.Imag().ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + XR[id] = sr; + }); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + XI[id] = 0.0; + }); +} + +template <> +void SetSubVector(Vector &x, const mfem::Array &rows, const Vector &y) +{ + const int N = rows.Size(); + const auto *idx = rows.Read(); + const auto *Y = y.Read(); + auto *X = x.ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + X[id] = Y[id]; + }); +} + +template <> +void SetSubVector(ComplexVector &x, const mfem::Array &rows, const ComplexVector &y) +{ + const int N = rows.Size(); + const auto *idx = rows.Read(); + const auto *YR = y.Real().Read(); + const auto *YI = y.Imag().Read(); + auto *XR = x.Real().ReadWrite(); + auto *XI = x.Imag().ReadWrite(); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + XR[id] = YR[id]; + }); + mfem::forall(N, + [=] MFEM_HOST_DEVICE(int i) + { + const int id = idx[i]; + XI[id] = YI[id]; + }); +} + +template <> +double Norml2(MPI_Comm comm, const Vector &x, const Operator &B, Vector &Bx) +{ + B.Mult(x, Bx); + double dot = Dot(comm, Bx, x); + MFEM_ASSERT(dot > 0.0, + "Non-positive vector norm in normalization (dot = " << dot << ")!"); + return std::sqrt(dot); +} + +template <> +double Norml2(MPI_Comm comm, const ComplexVector &x, const Operator &B, ComplexVector &Bx) +{ + // For SPD B, xᴴ B x is real. + B.Mult(x.Real(), Bx.Real()); + B.Mult(x.Imag(), Bx.Imag()); + std::complex dot = Dot(comm, Bx, x); + MFEM_ASSERT(dot.real() > 0.0 && std::abs(dot.imag()) < 1.0e-9 * dot.real(), + "Non-positive vector norm in normalization (dot = " << dot << ")!"); + return std::sqrt(dot.real()); +} + +template <> +void AXPY(double alpha, const Vector &x, Vector &y) +{ + if (alpha == 1.0) + { + y += x; + } + else + { + y.Add(alpha, x); + } +} + +template <> +void AXPY(double alpha, const ComplexVector &x, ComplexVector &y) +{ + y.AXPY(alpha, x); +} + +template <> +void AXPY(std::complex alpha, const ComplexVector &x, ComplexVector &y) +{ + y.AXPY(alpha, x); +} + +template <> +void AXPBY(double alpha, const Vector &x, double beta, Vector &y) +{ + add(alpha, x, beta, y, y); +} + +template <> +void AXPBY(std::complex alpha, const ComplexVector &x, std::complex beta, + ComplexVector &y) +{ + y.AXPBY(alpha, x, beta); +} + +template <> +void AXPBY(double alpha, const ComplexVector &x, double beta, ComplexVector &y) +{ + y.AXPBY(alpha, x, beta); +} + +template <> +void AXPBYPCZ(double alpha, const Vector &x, double beta, const Vector &y, double gamma, + Vector &z) +{ + if (gamma == 0.0) + { + add(alpha, x, beta, y, z); + } + else + { + AXPBY(alpha, x, gamma, z); + z.Add(beta, y); + } +} + +template <> +void AXPBYPCZ(std::complex alpha, const ComplexVector &x, std::complex beta, + const ComplexVector &y, std::complex gamma, ComplexVector &z) +{ + z.AXPBYPCZ(alpha, x, beta, y, gamma); +} + +template <> +void AXPBYPCZ(double alpha, const ComplexVector &x, double beta, const ComplexVector &y, + double gamma, ComplexVector &z) +{ + z.AXPBYPCZ(alpha, x, beta, y, gamma); +} + +} // namespace linalg + +} // namespace palace diff --git a/palace/linalg/vector.hpp b/palace/linalg/vector.hpp new file mode 100644 index 000000000..8f25fb2d7 --- /dev/null +++ b/palace/linalg/vector.hpp @@ -0,0 +1,195 @@ +// Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef PALACE_LINALG_VECTOR_HPP +#define PALACE_LINALG_VECTOR_HPP + +#include +#include +#include "utils/communication.hpp" + +namespace palace +{ + +using Operator = mfem::Operator; +using Vector = mfem::Vector; + +// +// Functionality extending mfem::Vector from MFEM, including basic functions for parallel +// vectors distributed across MPI processes. +// + +// A complex-valued vector represented as two real vectors, one for each component. +class ComplexVector +{ +private: + Vector x, xr, xi; + +public: + // Create a vector with the given size. + ComplexVector(int n = 0); + + // Copy constructor. + ComplexVector(const ComplexVector &y); + + // Copy constructor from separately provided real and imaginary parts. + ComplexVector(const Vector &yr, const Vector &yi); + + // Copy constructor from an array of complex values. + ComplexVector(const std::complex *py, int n); + + // Return the size of the vector. + int Size() const { return x.Size() / 2; } + + // Set the size of the vector. See the notes for Vector::SetSize for behavior in the cases + // where n is less than or greater than Size() or Capacity(). + void SetSize(int n); + + // Get access to the real and imaginary vector parts. + const Vector &Real() const { return xr; } + Vector &Real() { return xr; } + const Vector &Imag() const { return xi; } + Vector &Imag() { return xi; } + + // Set from a ComplexVector, without resizing. + ComplexVector &operator=(const ComplexVector &y) { return Set(y); } + ComplexVector &Set(const ComplexVector &y) + { + Set(y.Real(), y.Imag()); + return *this; + } + + // Set from separately provided real and imaginary parts, without resizing. + void Set(const Vector &yr, const Vector &yi); + + // Set from an array of complex values, without resizing. + void Set(const std::complex *py, int n); + + // Copy the vector into an array of complex values. + void Get(std::complex *py, int n) const; + + // Set all entries equal to s. + ComplexVector &operator=(std::complex s); + ComplexVector &operator=(double s) + { + *this = std::complex(s, 0.0); + return *this; + } + + // Scale all entries by s. + ComplexVector &operator*=(std::complex s); + + // Replace entries with their complex conjugate. + void Conj(); + + // Replace entries with their absolute value. + void Abs(); + + // Set all entries to their reciprocal. + void Reciprocal(); + + // Vector dot product (yᴴ x) or indefinite dot product (yᵀ x) for complex vectors. + std::complex Dot(const ComplexVector &y) const; + std::complex TransposeDot(const ComplexVector &y) const; + std::complex operator*(const ComplexVector &y) const { return Dot(y); } + + // In-place addition (*this) += alpha * x. + void AXPY(std::complex alpha, const ComplexVector &x); + void Add(std::complex alpha, const ComplexVector &x) { AXPY(alpha, x); } + ComplexVector &operator+=(const ComplexVector &x) + { + AXPY(1.0, x); + return *this; + } + + // In-place addition (*this) = alpha * x + beta * (*this). + void AXPBY(std::complex alpha, const ComplexVector &x, std::complex beta); + + // In-place addition (*this) = alpha * x + beta * y + gamma * (*this). + void AXPBYPCZ(std::complex alpha, const ComplexVector &x, + std::complex beta, const ComplexVector &y, + std::complex gamma); +}; + +namespace linalg +{ + +// Returns the global vector size. +template +inline HYPRE_BigInt GlobalSize(MPI_Comm comm, const VecType &x) +{ + HYPRE_BigInt N = x.Size(); + Mpi::GlobalSum(1, &N, comm); + return N; +} + +// Sets all entries of the vector corresponding to the given indices to the given (real) +// value. +template +void SetSubVector(VecType &x, const mfem::Array &rows, double s); +template +void SetSubVector(VecType &x, const mfem::Array &rows, const VecType &y); + +// Sets all entries of the vector to random numbers sampled from the [-1, 1] or [-1 - 1i, +// 1 + 1i] for complex-valued vectors. +template +void SetRandom(MPI_Comm comm, VecType &x, int seed = 0); +template +void SetRandomReal(MPI_Comm comm, VecType &x, int seed = 0); +template +void SetRandomSign(MPI_Comm comm, VecType &x, int seed = 0); + +// Calculate the inner product yᴴ x or yᵀ x. +template +inline auto Dot(MPI_Comm comm, const VecType &x, const VecType &y) +{ + auto dot = x * y; + Mpi::GlobalSum(1, &dot, comm); + return dot; +} + +// Calculate the vector 2-norm. +template +inline double Norml2(MPI_Comm comm, const VecType &x) +{ + return std::sqrt(std::abs(Dot(comm, x, x))); +} +template +double Norml2(MPI_Comm comm, const VecType &x, const Operator &B, VecType &Bx); + +// Normalize the vector, possibly with respect to an SPD matrix B. +template +inline double Normalize(MPI_Comm comm, VecType &x) +{ + double norm = Norml2(comm, x); + MFEM_ASSERT(norm > 0.0, "Zero vector norm in normalization!"); + x *= 1.0 / norm; + return norm; +} +template +inline double Normalize(MPI_Comm comm, VecType &x, const Operator &B, VecType &Bx) +{ + double norm = Norml2(comm, x, B, Bx); + MFEM_ASSERT(norm > 0.0, "Zero vector norm in normalization!"); + x *= 1.0 / norm; + return norm; +} + +// Addition y += alpha * x. +template +void AXPY(ScalarType alpha, const VecType &x, VecType &y); + +// Addition y = alpha * x + beta * y. +template +void AXPBY(ScalarType alpha, const VecType &x, ScalarType beta, VecType &y); + +// Addition z = alpha * x + beta * y + gamma * z. +template +void AXPBYPCZ(ScalarType alpha, const VecType &x, ScalarType beta, const VecType &y, + ScalarType gamma, VecType &z); + +} // namespace linalg + +} // namespace palace + +#endif // PALACE_LINALG_VECTOR_HPP diff --git a/palace/main.cpp b/palace/main.cpp index ed7583a37..b3e11632d 100644 --- a/palace/main.cpp +++ b/palace/main.cpp @@ -12,7 +12,6 @@ #include "drivers/electrostaticsolver.hpp" #include "drivers/magnetostaticsolver.hpp" #include "drivers/transientsolver.hpp" -#include "linalg/petsc.hpp" #include "linalg/slepc.hpp" #include "utils/communication.hpp" #include "utils/geodata.hpp" @@ -130,17 +129,16 @@ int main(int argc, char *argv[]) PrintBanner(world_comm, world_size, num_thread, git_tag); IoData iodata(argv[1], false); - // Initialize Hypre and PETSc, and optionally SLEPc. + // Initialize Hypre and, optionally, SLEPc/PETSc. mfem::Hypre::Init(); - petsc::Initialize(argc, argv, nullptr, nullptr); #if defined(PALACE_WITH_SLEPC) - slepc::Initialize(); -#endif + slepc::Initialize(argc, argv, nullptr, nullptr); if (PETSC_COMM_WORLD != world_comm) { Mpi::Print(world_comm, "Error: Problem during MPI initialization!\n\n"); return 1; } +#endif // Initialize the problem driver. std::unique_ptr solver; @@ -186,11 +184,10 @@ int main(int argc, char *argv[]) solver->SaveMetadata(timer); Mpi::Print(world_comm, "\n"); - // Finalize PETSc. + // Finalize SLEPc/PETSc. #if defined(PALACE_WITH_SLEPC) slepc::Finalize(); #endif - petsc::Finalize(); return 0; } diff --git a/palace/models/curlcurloperator.cpp b/palace/models/curlcurloperator.cpp index 801c27bff..0c142a1a5 100644 --- a/palace/models/curlcurloperator.cpp +++ b/palace/models/curlcurloperator.cpp @@ -6,7 +6,7 @@ #include "fem/coefficient.hpp" #include "fem/integrator.hpp" #include "fem/multigrid.hpp" -#include "fem/operator.hpp" +#include "linalg/rap.hpp" #include "utils/communication.hpp" #include "utils/geodata.hpp" #include "utils/iodata.hpp" @@ -70,22 +70,29 @@ mfem::Array SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMe CurlCurlOperator::CurlCurlOperator(const IoData &iodata, const std::vector> &mesh) - : dbc_marker(SetUpBoundaryProperties(iodata, *mesh.back())), skip_zeros(0), - pc_gmg(iodata.solver.linear.mat_gmg), print_hdr(true), + : assembly_level(iodata.solver.linear.mat_pa ? mfem::AssemblyLevel::PARTIAL + : mfem::AssemblyLevel::LEGACY), + skip_zeros(0), pc_mg(iodata.solver.linear.pc_mg), print_hdr(true), + dbc_marker(SetUpBoundaryProperties(iodata, *mesh.back())), nd_fecs(utils::ConstructFECollections( - pc_gmg, false, iodata.solver.order, mesh.back()->Dimension())), - h1_fec(iodata.solver.order, mesh.back()->Dimension()), + pc_mg, false, iodata.solver.order, mesh.back()->Dimension())), + h1_fecs(utils::ConstructFECollections( + pc_mg, false, iodata.solver.order, mesh.back()->Dimension())), rt_fec(iodata.solver.order - 1, mesh.back()->Dimension()), - nd_fespaces( - pc_gmg - ? utils::ConstructFiniteElementSpaceHierarchy(mesh, nd_fecs, dbc_marker) - : utils::ConstructFiniteElementSpaceHierarchy(*mesh.back(), *nd_fecs.back())), - h1_fespace(mesh.back().get(), &h1_fec), rt_fespace(mesh.back().get(), &rt_fec), - mat_op(iodata, *mesh.back()), surf_j_op(iodata, h1_fespace) + nd_fespaces(pc_mg ? utils::ConstructFiniteElementSpaceHierarchy( + mesh, nd_fecs, &dbc_marker, &dbc_tdof_lists) + : utils::ConstructFiniteElementSpaceHierarchy( + *mesh.back(), *nd_fecs.back(), &dbc_marker, + &dbc_tdof_lists.emplace_back())), + h1_fespaces(pc_mg ? utils::ConstructFiniteElementSpaceHierarchy( + mesh, h1_fecs) + : utils::ConstructFiniteElementSpaceHierarchy( + *mesh.back(), *h1_fecs.back())), + rt_fespace(mesh.back().get(), &rt_fec), mat_op(iodata, *mesh.back()), + surf_j_op(iodata, GetH1Space()) { // Finalize setup. CheckBoundaryProperties(); - nd_fespaces.GetFinestFESpace().GetEssentialTrueDofs(dbc_marker, dbc_tdof_list); // Print essential BC information. if (dbc_marker.Max() > 0) @@ -106,79 +113,78 @@ void CurlCurlOperator::CheckBoundaryProperties() } } -void CurlCurlOperator::PrintHeader() +std::unique_ptr CurlCurlOperator::GetStiffnessMatrix() { if (print_hdr) { - Mpi::Print("\nConfiguring system matrices, number of global unknowns: {:d}\n", - nd_fespaces.GetFinestFESpace().GlobalTrueVSize()); - print_hdr = false; + Mpi::Print("\nAssembling system matrices, number of global unknowns:\n" + " H1: {:d}, ND: {:d}, RT: {:d}\n", + GetH1Space().GlobalTrueVSize(), GetNDSpace().GlobalTrueVSize(), + GetRTSpace().GlobalTrueVSize()); + Mpi::Print("\nAssembling multigrid hierarchy:\n"); } -} - -void CurlCurlOperator::GetStiffnessMatrix(std::vector> &K) -{ - K.clear(); - K.reserve(nd_fespaces.GetNumLevels()); + auto K = std::make_unique(nd_fespaces.GetNumLevels()); for (int l = 0; l < nd_fespaces.GetNumLevels(); l++) { auto &nd_fespace_l = nd_fespaces.GetFESpaceAtLevel(l); - mfem::Array dbc_tdof_list_l; - nd_fespace_l.GetEssentialTrueDofs(dbc_marker, dbc_tdof_list_l); - - MaterialPropertyCoefficient muinv_func(mat_op); - mfem::ParBilinearForm k(&nd_fespace_l); - k.AddDomainIntegrator(new mfem::CurlCurlIntegrator(muinv_func)); - // k.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - k.Assemble(skip_zeros); - k.Finalize(skip_zeros); - mfem::HypreParMatrix *hK = k.ParallelAssemble(); - hK->EliminateBC(dbc_tdof_list_l, mfem::Operator::DiagonalPolicy::DIAG_ONE); - PrintHeader(); + constexpr auto MatType = MaterialPropertyType::INV_PERMEABILITY; + MaterialPropertyCoefficient muinv_func(mat_op); + auto k = std::make_unique(&nd_fespace_l); + k->AddDomainIntegrator(new mfem::CurlCurlIntegrator(muinv_func)); + k->SetAssemblyLevel(assembly_level); + k->Assemble(skip_zeros); + k->Finalize(skip_zeros); + if (print_hdr) { - std::string str = ""; - if (pc_gmg) + Mpi::Print(" Level {:d}: {:d} unknowns", l, nd_fespace_l.GlobalTrueVSize()); + if (assembly_level == mfem::AssemblyLevel::LEGACY) + { + HYPRE_BigInt nnz = k->SpMat().NumNonZeroElems(); + Mpi::GlobalSum(1, &nnz, nd_fespace_l.GetComm()); + Mpi::Print(", {:d} NNZ\n", nnz); + } + else { - str = - fmt::format(" (Level {:d}, {:d} unknowns)", l, nd_fespace_l.GlobalTrueVSize()); + Mpi::Print("\n"); } - Mpi::Print(" K{}: NNZ = {:d}, norm = {:e}\n", str, hK->NNZ(), - hypre_ParCSRMatrixFnorm(*hK)); } - K.emplace_back(hK); + auto K_l = std::make_unique(std::move(k), nd_fespace_l); + K_l->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); + K->AddOperator(std::move(K_l)); } + print_hdr = false; + return K; } -std::unique_ptr CurlCurlOperator::GetCurlMatrix() +std::unique_ptr CurlCurlOperator::GetCurlMatrix() { - mfem::ParDiscreteLinearOperator curl(&nd_fespaces.GetFinestFESpace(), &rt_fespace); - curl.AddDomainInterpolator(new mfem::CurlInterpolator); - // curl.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - curl.Assemble(); - curl.Finalize(); - return std::unique_ptr(curl.ParallelAssemble()); + auto curl = std::make_unique(&GetNDSpace(), &GetRTSpace()); + curl->AddDomainInterpolator(new mfem::CurlInterpolator); + curl->SetAssemblyLevel(assembly_level); + curl->Assemble(); + curl->Finalize(); + return std::make_unique(std::move(curl), GetNDSpace(), GetRTSpace(), true); } -void CurlCurlOperator::GetExcitationVector(int idx, mfem::Vector &RHS) +void CurlCurlOperator::GetExcitationVector(int idx, Vector &RHS) { // Assemble the surface current excitation +J. The SurfaceCurrentOperator assembles -J // (meant for time or frequency domain Maxwell discretization, so we multiply by -1 to // retrieve +J). - SumVectorCoefficient fb(nd_fespaces.GetFinestFESpace().GetParMesh()->SpaceDimension()); + SumVectorCoefficient fb(GetNDSpace().GetParMesh()->SpaceDimension()); surf_j_op.AddExcitationBdrCoefficients(idx, fb); - RHS.SetSize(nd_fespaces.GetFinestFESpace().GetTrueVSize()); + RHS.SetSize(GetNDSpace().GetTrueVSize()); RHS = 0.0; if (fb.empty()) { return; } - mfem::ParLinearForm rhs(&nd_fespaces.GetFinestFESpace()); + mfem::LinearForm rhs(&GetNDSpace()); rhs.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fb)); - rhs.UseFastAssembly(true); + rhs.UseFastAssembly(false); rhs.Assemble(); - rhs.ParallelAssemble(RHS); - RHS.Neg(); - RHS.SetSubVector(dbc_tdof_list, 0.0); + GetNDSpace().GetProlongationMatrix()->AddMultTranspose(rhs, RHS, -1.0); + linalg::SetSubVector(RHS, dbc_tdof_lists.back(), 0.0); } } // namespace palace diff --git a/palace/models/curlcurloperator.hpp b/palace/models/curlcurloperator.hpp index 26a65e865..d9577131f 100644 --- a/palace/models/curlcurloperator.hpp +++ b/palace/models/curlcurloperator.hpp @@ -7,6 +7,8 @@ #include #include #include +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" #include "models/materialoperator.hpp" #include "models/surfacecurrentoperator.hpp" @@ -21,26 +23,26 @@ class IoData; class CurlCurlOperator { private: - // Essential boundary condition markers. - mfem::Array dbc_marker, dbc_tdof_list; - void CheckBoundaryProperties(); + const mfem::AssemblyLevel assembly_level; // Use full or partial assembly for operators + const int skip_zeros; // Skip zeros during full assembly of operators + const bool pc_mg; // Use geometric multigrid in preconditioning - // Options for system matrix assembly. - const int skip_zeros; // Whether to skip the zeros during assembly of operators - const bool pc_gmg; // Whether to use geometric multigrid in preconditioning - - // Helper variable and function for log file printing. + // Helper variable for log file printing. bool print_hdr; - void PrintHeader(); + + // Essential boundary condition markers. + mfem::Array dbc_marker; + std::vector> dbc_tdof_lists; + void CheckBoundaryProperties(); // Objects defining the finite element spaces for the magnetic vector potential // (Nedelec) and magnetic flux density (Raviart-Thomas) on the given mesh. The H1 spaces // are used for various purposes throughout the code including postprocessing. std::vector> nd_fecs; - mfem::H1_FECollection h1_fec; + std::vector> h1_fecs; mfem::RT_FECollection rt_fec; - mfem::ParFiniteElementSpaceHierarchy nd_fespaces; - mfem::ParFiniteElementSpace h1_fespace, rt_fespace; + mfem::ParFiniteElementSpaceHierarchy nd_fespaces, h1_fespaces; + mfem::ParFiniteElementSpace rt_fespace; // Operator for domain material properties. MaterialOperator mat_op; @@ -52,10 +54,6 @@ class CurlCurlOperator CurlCurlOperator(const IoData &iodata, const std::vector> &mesh); - // Returns array marking Dirichlet BC attributes and local subdomain vdofs. - const auto &GetDbcMarker() const { return dbc_marker; } - const auto &GetDbcTDofList() const { return dbc_tdof_list; } - // Return material operator for postprocessing. const MaterialOperator &GetMaterialOp() const { return mat_op; } @@ -65,19 +63,26 @@ class CurlCurlOperator // Return the parallel finite element space objects. auto &GetNDSpaces() { return nd_fespaces; } auto &GetNDSpace() { return nd_fespaces.GetFinestFESpace(); } - auto &GetH1Space() { return h1_fespace; } + const auto &GetNDSpace() const { return nd_fespaces.GetFinestFESpace(); } + auto &GetH1Spaces() { return h1_fespaces; } + auto &GetH1Space() { return h1_fespaces.GetFinestFESpace(); } + const auto &GetH1Space() const { return h1_fespaces.GetFinestFESpace(); } auto &GetRTSpace() { return rt_fespace; } + const auto &GetRTSpace() const { return rt_fespace; } // Construct and return system matrix representing discretized curl-curl operator for // Ampere's law. - void GetStiffnessMatrix(std::vector> &K); + std::unique_ptr GetStiffnessMatrix(); // Construct and return the discrete curl matrix. - std::unique_ptr GetCurlMatrix(); + std::unique_ptr GetCurlMatrix(); // Assemble the right-hand side source term vector for a current source applied on // specified excited boundaries. - void GetExcitationVector(int idx, mfem::Vector &RHS); + void GetExcitationVector(int idx, Vector &RHS); + + // Get the associated MPI communicator. + MPI_Comm GetComm() const { return GetNDSpace().GetComm(); } }; } // namespace palace diff --git a/palace/models/domainpostoperator.cpp b/palace/models/domainpostoperator.cpp index 321c9a886..7a7a404de 100644 --- a/palace/models/domainpostoperator.cpp +++ b/palace/models/domainpostoperator.cpp @@ -12,157 +12,27 @@ namespace palace { -DomainPostOperatorMF::DomainPostOperatorMF(const IoData &iodata, - const MaterialOperator &mat, - mfem::ParFiniteElementSpace &h1_fespace) - : mat_op(mat), ones(&h1_fespace) -{ - // Define a constant 1 function on the scalar finite element space for computing volume - // integrals. - ones.mfem::Vector::operator=(1.0); - - // Use the provided domain postprocessing indices to group for postprocessing bulk - // dielectric loss. - for (const auto &[idx, data] : iodata.domains.postpro.dielectric) - { - mfem::Array &attr_marker = - attr_markers.emplace(idx, h1_fespace.GetParMesh()->attributes.Max()).first->second; - attr_marker = 0; - for (auto attr : data.attributes) - { - attr_marker[attr - 1] = 1; - } - } -} - -double -DomainPostOperatorMF::GetElectricFieldEnergy(const mfem::ParComplexGridFunction &E) const -{ - // Compute the electric field energy integral as: E_elec = 1/2 Re{∫_Ω Dᴴ E dV}. - std::map dummy_l2s; - EnergyDensityCoefficient - ue_func(E, mat_op, dummy_l2s); - return GetVolumeIntegral(ue_func); -} - -double DomainPostOperatorMF::GetElectricFieldEnergy(const mfem::ParGridFunction &E) const -{ - std::map dummy_l2s; - EnergyDensityCoefficient - ue_func(E, mat_op, dummy_l2s); - return GetVolumeIntegral(ue_func); -} - -double -DomainPostOperatorMF::GetMagneticFieldEnergy(const mfem::ParComplexGridFunction &B) const -{ - // Compute the magnetic field energy integral as: E_mag = 1/2 Re{∫_Ω Bᴴ H dV}. - std::map dummy_l2s; - EnergyDensityCoefficient - um_func(B, mat_op, dummy_l2s); - return GetVolumeIntegral(um_func); -} - -double DomainPostOperatorMF::GetMagneticFieldEnergy(const mfem::ParGridFunction &B) const -{ - std::map dummy_l2s; - EnergyDensityCoefficient - um_func(B, mat_op, dummy_l2s); - return GetVolumeIntegral(um_func); -} - -double DomainPostOperatorMF::GetDomainElectricFieldEnergy( - int idx, const mfem::ParComplexGridFunction &E) const -{ - // Compute the electric field energy integral for only a portion of the domain. - auto it = attr_markers.find(idx); - MFEM_VERIFY(it != attr_markers.end(), - "Invalid domain index when postprocessing bulk dielectric loss!"); - std::map dummy_l2s; - EnergyDensityCoefficient - ue_func(E, mat_op, dummy_l2s); - return GetVolumeIntegral(ue_func, it->second); -} - -double -DomainPostOperatorMF::GetDomainElectricFieldEnergy(int idx, - const mfem::ParGridFunction &E) const -{ - auto it = attr_markers.find(idx); - MFEM_VERIFY(it != attr_markers.end(), - "Invalid domain index when postprocessing bulk dielectric loss!"); - std::map dummy_l2s; - EnergyDensityCoefficient - ue_func(E, mat_op, dummy_l2s); - return GetVolumeIntegral(ue_func, it->second); -} - -double DomainPostOperatorMF::GetDomainElectricFieldEnergyLoss( - int idx, const mfem::ParComplexGridFunction &E) const -{ - // Compute the electric field energy integral for only a portion of the domain. - auto it = attr_markers.find(idx); - MFEM_VERIFY(it != attr_markers.end(), - "Invalid domain index when postprocessing bulk dielectric loss!"); - std::map dummy_l2s; - EnergyDensityCoefficient - uei_func(E, mat_op, dummy_l2s); - return GetVolumeIntegral(uei_func, it->second); -} - -double -DomainPostOperatorMF::GetDomainElectricFieldEnergyLoss(int idx, - const mfem::ParGridFunction &E) const -{ - auto it = attr_markers.find(idx); - MFEM_VERIFY(it != attr_markers.end(), - "Invalid domain index when postprocessing bulk dielectric loss!"); - std::map dummy_l2s; - EnergyDensityCoefficient - uei_func(E, mat_op, dummy_l2s); - return GetVolumeIntegral(uei_func, it->second); -} - -double DomainPostOperatorMF::GetVolumeIntegral(mfem::Coefficient &f) const -{ - // Integrate the coefficient over the entire domain. - mfem::ParLinearForm s(ones.ParFESpace()); - s.AddDomainIntegrator(new DomainLFIntegrator(f)); - s.UseFastAssembly(true); - s.Assemble(); - return s(ones); -} - -double DomainPostOperatorMF::GetVolumeIntegral(mfem::Coefficient &f, - mfem::Array &attr_marker) const -{ - // Integrate the coefficient over the domain attributes making up this domain index. - mfem::ParLinearForm s(ones.ParFESpace()); - s.AddDomainIntegrator(new DomainLFIntegrator(f), attr_marker); - s.UseFastAssembly(true); - s.Assemble(); - return s(ones); -} - DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOperator &mat_op, mfem::ParFiniteElementSpace *nd_fespace, mfem::ParFiniteElementSpace *rt_fespace) - : m0ND(nd_fespace ? std::optional(nd_fespace) : std::nullopt), - m0RT(rt_fespace ? std::optional(rt_fespace) : std::nullopt) + : M_ND(nd_fespace ? std::optional(nd_fespace) : std::nullopt), + M_RT(rt_fespace ? std::optional(rt_fespace) : std::nullopt) { - if (m0ND.has_value()) + if (M_ND.has_value()) { // Construct ND mass matrix to compute the electric field energy integral as: // E_elec = 1/2 Re{∫_Ω Dᴴ E dV} as (M_eps * e)ᴴ e. // Only the real part of the permeability contributes to the energy (imaginary part // cancels out in the inner product due to symmetry). - MaterialPropertyCoefficient epsilon_func( - mat_op); - m0ND->AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func)); - // m0ND->SetAssemblyLevel(mfem::AssemblyLevel::FULL); - m0ND->Assemble(); - m0ND->Finalize(); + constexpr auto MatTypeEpsReal = MaterialPropertyType::PERMITTIVITY_REAL; + constexpr auto MatTypeEpsImag = MaterialPropertyType::PERMITTIVITY_IMAG; + MaterialPropertyCoefficient epsilon_func(mat_op); + M_ND->AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func)); + // XX TODO: Partial assembly option? + M_ND->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + M_ND->Assemble(0); + M_ND->Finalize(0); + D.SetSize(M_ND->Height()); // Use the provided domain postprocessing indices to group for postprocessing bulk // dielectric loss. @@ -178,47 +48,50 @@ DomainPostOperator::DomainPostOperator(const IoData &iodata, const MaterialOpera SumMatrixCoefficient epsilon_func_r(nd_fespace->GetParMesh()->SpaceDimension()); SumMatrixCoefficient epsilon_func_i(nd_fespace->GetParMesh()->SpaceDimension()); epsilon_func_r.AddCoefficient( - std::make_unique< - MaterialPropertyCoefficient>(mat_op), + std::make_unique>(mat_op), attr_marker); epsilon_func_i.AddCoefficient( - std::make_unique< - MaterialPropertyCoefficient>(mat_op, - -1.0), + std::make_unique>(mat_op, -1.0), attr_marker); - auto &m0 = m0NDi.emplace(idx, std::make_pair(nd_fespace, nd_fespace)).first->second; - mfem::ParBilinearForm &m0r = m0.first; - mfem::ParBilinearForm &m0i = m0.second; - m0r.AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func_r)); - m0i.AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func_i)); - // m0r.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - // m0i.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - m0r.Assemble(); - m0i.Assemble(); - m0r.Finalize(); - m0i.Finalize(); + auto &M = M_NDi.emplace(idx, std::make_pair(nd_fespace, nd_fespace)).first->second; + mfem::BilinearForm &Mr = M.first; + mfem::BilinearForm &Mi = M.second; + Mr.AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func_r)); + Mi.AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func_i)); + // XX TODO: Partial assembly option? + Mr.SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + Mi.SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + Mr.Assemble(); + Mi.Assemble(); + Mr.Finalize(); + Mi.Finalize(); } } - if (m0RT.has_value()) + if (M_RT.has_value()) { // Construct RT mass matrix to compute the magnetic field energy integral as: // E_mag = 1/2 Re{∫_Ω Bᴴ H dV} as (M_muinv * b)ᴴ b. - MaterialPropertyCoefficient muinv_func(mat_op); - m0RT->AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(muinv_func)); - // m0RT->SetAssemblyLevel(mfem::AssemblyLevel::FULL); - m0RT->Assemble(); - m0RT->Finalize(); + constexpr auto MatTypeMuInv = MaterialPropertyType::INV_PERMEABILITY; + MaterialPropertyCoefficient muinv_func(mat_op); + M_RT->AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(muinv_func)); + // XX TODO: Partial assembly option? + M_RT->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + M_RT->Assemble(0); + M_RT->Finalize(0); + H.SetSize(M_RT->Height()); } } double DomainPostOperator::GetElectricFieldEnergy(const mfem::ParComplexGridFunction &E) const { - if (m0ND.has_value()) + if (M_ND.has_value()) { - double res = m0ND->InnerProduct(E.real(), E.real()); - res += m0ND->InnerProduct(E.imag(), E.imag()); + M_ND->Mult(E.real(), D); + double res = mfem::InnerProduct(E.real(), D); + M_ND->Mult(E.imag(), D); + res += mfem::InnerProduct(E.imag(), D); Mpi::GlobalSum(1, &res, E.ParFESpace()->GetComm()); return 0.5 * res; } @@ -229,9 +102,10 @@ DomainPostOperator::GetElectricFieldEnergy(const mfem::ParComplexGridFunction &E double DomainPostOperator::GetElectricFieldEnergy(const mfem::ParGridFunction &E) const { - if (m0ND.has_value()) + if (M_ND.has_value()) { - double res = m0ND->InnerProduct(E, E); + M_ND->Mult(E, D); + double res = mfem::InnerProduct(E, D); Mpi::GlobalSum(1, &res, E.ParFESpace()->GetComm()); return 0.5 * res; } @@ -243,10 +117,12 @@ double DomainPostOperator::GetElectricFieldEnergy(const mfem::ParGridFunction &E double DomainPostOperator::GetMagneticFieldEnergy(const mfem::ParComplexGridFunction &B) const { - if (m0RT.has_value()) + if (M_RT.has_value()) { - double res = m0RT->InnerProduct(B.real(), B.real()); - res += m0RT->InnerProduct(B.imag(), B.imag()); + M_RT->Mult(B.real(), H); + double res = mfem::InnerProduct(B.real(), H); + M_RT->Mult(B.imag(), H); + res += mfem::InnerProduct(B.imag(), H); Mpi::GlobalSum(1, &res, B.ParFESpace()->GetComm()); return 0.5 * res; } @@ -257,9 +133,10 @@ DomainPostOperator::GetMagneticFieldEnergy(const mfem::ParComplexGridFunction &B double DomainPostOperator::GetMagneticFieldEnergy(const mfem::ParGridFunction &B) const { - if (m0RT.has_value()) + if (M_RT.has_value()) { - double res = m0RT->InnerProduct(B, B); + M_RT->Mult(B, H); + double res = mfem::InnerProduct(B, H); Mpi::GlobalSum(1, &res, B.ParFESpace()->GetComm()); return 0.5 * res; } @@ -272,11 +149,13 @@ double DomainPostOperator::GetDomainElectricFieldEnergy( int idx, const mfem::ParComplexGridFunction &E) const { // Compute the electric field energy integral for only a portion of the domain. - auto it = m0NDi.find(idx); - MFEM_VERIFY(it != m0NDi.end(), + auto it = M_NDi.find(idx); + MFEM_VERIFY(it != M_NDi.end(), "Invalid domain index when postprocessing bulk dielectric loss!"); - double res = it->second.first.InnerProduct(E.real(), E.real()); - res += it->second.first.InnerProduct(E.imag(), E.imag()); + it->second.first.Mult(E.real(), D); + double res = mfem::InnerProduct(E.real(), D); + it->second.first.Mult(E.imag(), D); + res += mfem::InnerProduct(E.imag(), D); Mpi::GlobalSum(1, &res, E.ParFESpace()->GetComm()); return 0.5 * res; } @@ -285,10 +164,11 @@ double DomainPostOperator::GetDomainElectricFieldEnergy(int idx, const mfem::ParGridFunction &E) const { - auto it = m0NDi.find(idx); - MFEM_VERIFY(it != m0NDi.end(), + auto it = M_NDi.find(idx); + MFEM_VERIFY(it != M_NDi.end(), "Invalid domain index when postprocessing bulk dielectric loss!"); - double res = it->second.first.InnerProduct(E, E); + it->second.first.Mult(E, D); + double res = mfem::InnerProduct(E, D); Mpi::GlobalSum(1, &res, E.ParFESpace()->GetComm()); return 0.5 * res; } @@ -297,11 +177,13 @@ double DomainPostOperator::GetDomainElectricFieldEnergyLoss( int idx, const mfem::ParComplexGridFunction &E) const { // Compute the electric field energy integral for only a portion of the domain. - auto it = m0NDi.find(idx); - MFEM_VERIFY(it != m0NDi.end(), + auto it = M_NDi.find(idx); + MFEM_VERIFY(it != M_NDi.end(), "Invalid domain index when postprocessing bulk dielectric loss!"); - double res = it->second.second.InnerProduct(E.real(), E.real()); - res += it->second.second.InnerProduct(E.imag(), E.imag()); + it->second.second.Mult(E.real(), D); + double res = mfem::InnerProduct(E.real(), D); + it->second.second.Mult(E.imag(), D); + res += mfem::InnerProduct(E.imag(), D); Mpi::GlobalSum(1, &res, E.ParFESpace()->GetComm()); return 0.5 * res; } @@ -310,10 +192,11 @@ double DomainPostOperator::GetDomainElectricFieldEnergyLoss(int idx, const mfem::ParGridFunction &E) const { - auto it = m0NDi.find(idx); - MFEM_VERIFY(it != m0NDi.end(), + auto it = M_NDi.find(idx); + MFEM_VERIFY(it != M_NDi.end(), "Invalid domain index when postprocessing bulk dielectric loss!"); - double res = it->second.second.InnerProduct(E, E); + it->second.second.Mult(E, D); + double res = mfem::InnerProduct(E, D); Mpi::GlobalSum(1, &res, E.ParFESpace()->GetComm()); return 0.5 * res; } diff --git a/palace/models/domainpostoperator.hpp b/palace/models/domainpostoperator.hpp index 2af2f9f15..b3dd33c31 100644 --- a/palace/models/domainpostoperator.hpp +++ b/palace/models/domainpostoperator.hpp @@ -15,45 +15,6 @@ namespace palace class IoData; class MaterialOperator; -// -// A class handling domain postprocessing (matrix-free). -// -class DomainPostOperatorMF -{ -private: - // Reference to material property operator (not owned). - const MaterialOperator &mat_op; - - // Unit function used for computing volume integrals. - mfem::ParGridFunction ones; - - // Mapping from domain index to marker and loss tangent for postprocessing bulk dielectic - // loss. - mutable std::map> attr_markers; - - double GetVolumeIntegral(mfem::Coefficient &f) const; - double GetVolumeIntegral(mfem::Coefficient &f, mfem::Array &attr_marker) const; - -public: - DomainPostOperatorMF(const IoData &iodata, const MaterialOperator &mat, - mfem::ParFiniteElementSpace &h1_fespace); - - // Access underlying bulk loss postprocessing data structures (for keys). - const auto &GetEps() const { return attr_markers; } - auto SizeEps() const { return attr_markers.size(); } - - // Get volume integrals computing bulk electric or magnetic field energy. - double GetElectricFieldEnergy(const mfem::ParComplexGridFunction &E) const; - double GetElectricFieldEnergy(const mfem::ParGridFunction &E) const; - double GetMagneticFieldEnergy(const mfem::ParComplexGridFunction &B) const; - double GetMagneticFieldEnergy(const mfem::ParGridFunction &B) const; - double GetDomainElectricFieldEnergy(int idx, const mfem::ParComplexGridFunction &E) const; - double GetDomainElectricFieldEnergy(int idx, const mfem::ParGridFunction &E) const; - double GetDomainElectricFieldEnergyLoss(int idx, - const mfem::ParComplexGridFunction &E) const; - double GetDomainElectricFieldEnergyLoss(int idx, const mfem::ParGridFunction &E) const; -}; - // // A class handling domain postprocessing. // @@ -61,8 +22,11 @@ class DomainPostOperator { private: // Bilinear forms for computing field energy integrals over domains. - std::optional m0ND, m0RT; - std::map> m0NDi; + std::optional M_ND, M_RT; + std::map> M_NDi; + + // Temporary vectors for inner product calculations. + mutable mfem::Vector D, H; public: DomainPostOperator(const IoData &iodata, const MaterialOperator &mat_op, @@ -70,8 +34,8 @@ class DomainPostOperator mfem::ParFiniteElementSpace *rt_fespace); // Access underlying bulk loss postprocessing data structures (for keys). - const auto &GetEps() const { return m0NDi; } - auto SizeEps() const { return m0NDi.size(); } + const auto &GetEps() const { return M_NDi; } + auto SizeEps() const { return M_NDi.size(); } // Get volume integrals computing bulk electric or magnetic field energy. double GetElectricFieldEnergy(const mfem::ParComplexGridFunction &E) const; diff --git a/palace/models/farfieldboundaryoperator.cpp b/palace/models/farfieldboundaryoperator.cpp index c1a009d84..12881a3d8 100644 --- a/palace/models/farfieldboundaryoperator.cpp +++ b/palace/models/farfieldboundaryoperator.cpp @@ -69,9 +69,10 @@ void FarfieldBoundaryOperator::AddDampingBdrCoefficients(double coef, // First-order absorbing boundary condition. if (farfield_marker.Max() > 0) { + constexpr auto MatType = MaterialPropertyType::INV_Z0; + constexpr auto ElemType = MeshElementType::BDR_ELEMENT; fb.AddCoefficient( - std::make_unique>(mat_op, - coef), + std::make_unique>(mat_op, coef), farfield_marker); } } @@ -88,11 +89,12 @@ void FarfieldBoundaryOperator::AddExtraSystemBdrCoefficients(double omega, // does as well. if (farfield_marker.Max() > 0 && order > 1) { + constexpr auto MatType = MaterialPropertyType::INV_PERMEABILITY_C0; + constexpr auto ElemType = MeshElementType::BDR_ELEMENT; dfbi.AddCoefficient( std::make_unique( - std::make_unique< - MaterialPropertyCoefficient>( - mat_op, 0.5 / omega)), + std::make_unique>(mat_op, + 0.5 / omega)), farfield_marker); } } diff --git a/palace/models/laplaceoperator.cpp b/palace/models/laplaceoperator.cpp index 5e3a78bdf..3c3f877fe 100644 --- a/palace/models/laplaceoperator.cpp +++ b/palace/models/laplaceoperator.cpp @@ -5,7 +5,7 @@ #include "fem/coefficient.hpp" #include "fem/multigrid.hpp" -#include "fem/operator.hpp" +#include "linalg/rap.hpp" #include "utils/communication.hpp" #include "utils/geodata.hpp" #include "utils/iodata.hpp" @@ -113,21 +113,21 @@ std::map> ConstructSources(const IoData &iodata) LaplaceOperator::LaplaceOperator(const IoData &iodata, const std::vector> &mesh) - : dbc_marker(SetUpBoundaryProperties(iodata, *mesh.back())), skip_zeros(0), - pc_gmg(iodata.solver.linear.mat_gmg), print_hdr(true), + : assembly_level(iodata.solver.linear.mat_pa ? mfem::AssemblyLevel::PARTIAL + : mfem::AssemblyLevel::LEGACY), + skip_zeros(0), pc_mg(iodata.solver.linear.pc_mg), print_hdr(true), + dbc_marker(SetUpBoundaryProperties(iodata, *mesh.back())), h1_fecs(utils::ConstructFECollections( - pc_gmg, false, iodata.solver.order, mesh.back()->Dimension())), + pc_mg, false, iodata.solver.order, mesh.back()->Dimension())), nd_fec(iodata.solver.order, mesh.back()->Dimension()), - h1_fespaces( - pc_gmg - ? utils::ConstructFiniteElementSpaceHierarchy(mesh, h1_fecs, dbc_marker) - : utils::ConstructFiniteElementSpaceHierarchy(*mesh.back(), *h1_fecs.back())), + h1_fespaces(pc_mg ? utils::ConstructFiniteElementSpaceHierarchy( + mesh, h1_fecs, &dbc_marker, &dbc_tdof_lists) + : utils::ConstructFiniteElementSpaceHierarchy( + *mesh.back(), *h1_fecs.back(), &dbc_marker, + &dbc_tdof_lists.emplace_back())), nd_fespace(mesh.back().get(), &nd_fec), mat_op(iodata, *mesh.back()), source_attr_lists(ConstructSources(iodata)) { - // Finalize setup. - h1_fespaces.GetFinestFESpace().GetEssentialTrueDofs(dbc_marker, dbc_tdof_list); - // Print essential BC information. if (dbc_marker.Max() > 0) { @@ -136,73 +136,64 @@ LaplaceOperator::LaplaceOperator(const IoData &iodata, } } -void LaplaceOperator::PrintHeader() +std::unique_ptr LaplaceOperator::GetStiffnessMatrix() { if (print_hdr) { - Mpi::Print("\nConfiguring system matrices, number of global unknowns: {:d}\n", - h1_fespaces.GetFinestFESpace().GlobalTrueVSize()); - print_hdr = false; + Mpi::Print("\nAssembling system matrices, number of global unknowns:\n" + " H1: {:d}, ND: {:d}\n", + GetH1Space().GlobalTrueVSize(), GetNDSpace().GlobalTrueVSize()); + Mpi::Print("\nAssembling multigrid hierarchy:\n"); } -} - -void LaplaceOperator::GetStiffnessMatrix(std::vector> &K, - std::vector> &Ke) -{ - K.clear(); - Ke.clear(); - K.reserve(h1_fespaces.GetNumLevels()); - Ke.reserve(h1_fespaces.GetNumLevels()); + auto K = std::make_unique(h1_fespaces.GetNumLevels()); for (int l = 0; l < h1_fespaces.GetNumLevels(); l++) { auto &h1_fespace_l = h1_fespaces.GetFESpaceAtLevel(l); - mfem::Array dbc_tdof_list_l; - h1_fespace_l.GetEssentialTrueDofs(dbc_marker, dbc_tdof_list_l); - - MaterialPropertyCoefficient epsilon_func( - mat_op); - mfem::ParBilinearForm k(&h1_fespace_l); - k.AddDomainIntegrator(new mfem::MixedGradGradIntegrator(epsilon_func)); - // k.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - k.Assemble(skip_zeros); - k.Finalize(skip_zeros); - mfem::HypreParMatrix *hK = k.ParallelAssemble(); - mfem::HypreParMatrix *hKe = hK->EliminateRowsCols(dbc_tdof_list_l); - PrintHeader(); + constexpr auto MatType = MaterialPropertyType::PERMITTIVITY_REAL; + MaterialPropertyCoefficient epsilon_func(mat_op); + auto k = std::make_unique(&h1_fespace_l); + k->AddDomainIntegrator(new mfem::MixedGradGradIntegrator(epsilon_func)); + k->SetAssemblyLevel(assembly_level); + k->Assemble(skip_zeros); + k->Finalize(skip_zeros); + if (print_hdr) { - std::string str = ""; - if (pc_gmg) + Mpi::Print(" Level {:d}: {:d} unknowns", l, h1_fespace_l.GlobalTrueVSize()); + if (assembly_level == mfem::AssemblyLevel::LEGACY) + { + HYPRE_BigInt nnz = k->SpMat().NumNonZeroElems(); + Mpi::GlobalSum(1, &nnz, h1_fespace_l.GetComm()); + Mpi::Print(", {:d} NNZ\n", nnz); + } + else { - str = - fmt::format(" (Level {:d}, {:d} unknowns)", l, h1_fespace_l.GlobalTrueVSize()); + Mpi::Print("\n"); } - Mpi::Print(" K{}: NNZ = {:d}, norm = {:e}\n", str, hK->NNZ(), - hypre_ParCSRMatrixFnorm(*hK)); } - K.emplace_back(hK); - Ke.emplace_back(hKe); + auto K_l = std::make_unique(std::move(k), h1_fespace_l); + K_l->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); + K->AddOperator(std::move(K_l)); } + print_hdr = false; + return K; } -std::unique_ptr LaplaceOperator::GetNegGradMatrix() +std::unique_ptr LaplaceOperator::GetGradMatrix() { - mfem::ParDiscreteLinearOperator grad(&h1_fespaces.GetFinestFESpace(), &nd_fespace); - grad.AddDomainInterpolator(new mfem::GradientInterpolator); - // grad.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - grad.Assemble(); - grad.Finalize(); - std::unique_ptr NegGrad(grad.ParallelAssemble()); - *NegGrad *= -1.0; - return NegGrad; + auto grad = std::make_unique(&GetH1Space(), &GetNDSpace()); + grad->AddDomainInterpolator(new mfem::GradientInterpolator); + grad->SetAssemblyLevel(assembly_level); + grad->Assemble(); + grad->Finalize(); + return std::make_unique(std::move(grad), GetH1Space(), GetNDSpace(), true); } -void LaplaceOperator::GetExcitationVector(int idx, const mfem::Operator &K, - const mfem::Operator &Ke, mfem::Vector &X, - mfem::Vector &RHS) +void LaplaceOperator::GetExcitationVector(int idx, const Operator &K, Vector &X, + Vector &RHS) { // Apply the Dirichlet BCs to the solution vector: V = 1 on terminal boundaries with the // given index, V = 0 on all ground and other terminal boundaries. - mfem::ParGridFunction x(&h1_fespaces.GetFinestFESpace()); + mfem::ParGridFunction x(&GetH1Space()); x = 0.0; // Get a marker of all boundary attributes with the given source surface index. @@ -213,13 +204,16 @@ void LaplaceOperator::GetExcitationVector(int idx, const mfem::Operator &K, x.ProjectBdrCoefficient(one, source_marker); // Values are only correct on master // Eliminate the essential BC to get the RHS vector. - X.SetSize(h1_fespaces.GetFinestFESpace().GetTrueVSize()); - RHS.SetSize(h1_fespaces.GetFinestFESpace().GetTrueVSize()); + X.SetSize(GetH1Space().GetTrueVSize()); + RHS.SetSize(GetH1Space().GetTrueVSize()); X = 0.0; RHS = 0.0; x.ParallelProject(X); // Restrict to the true dofs - dynamic_cast(K).EliminateBC( - dynamic_cast(Ke), dbc_tdof_list, X, RHS); + const auto *mg_K = dynamic_cast(&K); + const auto *PtAP_K = mg_K ? dynamic_cast(&mg_K->GetFinestOperator()) + : dynamic_cast(&K); + MFEM_VERIFY(PtAP_K, "LaplaceOperator requires ParOperator for RHS elimination!"); + PtAP_K->EliminateRHS(X, RHS); } } // namespace palace diff --git a/palace/models/laplaceoperator.hpp b/palace/models/laplaceoperator.hpp index 30b9b601f..c97e57014 100644 --- a/palace/models/laplaceoperator.hpp +++ b/palace/models/laplaceoperator.hpp @@ -8,6 +8,8 @@ #include #include #include +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" #include "models/materialoperator.hpp" namespace palace @@ -21,16 +23,16 @@ class IoData; class LaplaceOperator { private: - // Essential boundary condition markers. - mfem::Array dbc_marker, dbc_tdof_list; - - // Options for system matrix assembly. - const int skip_zeros; // Whether to skip the zeros during assembly of operators - const bool pc_gmg; // Whether to use geometric multigrid in preconditioning + const mfem::AssemblyLevel assembly_level; // Use full or partial assembly for operators + const int skip_zeros; // Skip zeros during full assembly of operators + const bool pc_mg; // Use geometric multigrid in preconditioning - // Helper variable and function for log file printing. + // Helper variable for log file printing. bool print_hdr; - void PrintHeader(); + + // Essential boundary condition markers. + mfem::Array dbc_marker; + std::vector> dbc_tdof_lists; // Objects defining the finite element spaces for the electrostatic potential (H1) and // electric field (Nedelec) on the given mesh. @@ -49,10 +51,6 @@ class LaplaceOperator LaplaceOperator(const IoData &iodata, const std::vector> &mesh); - // Returns array marking Dirichlet BC attributes and local subdomain vdofs. - const auto &GetDbcMarker() const { return dbc_marker; } - const auto &GetDbcTDofList() const { return dbc_tdof_list; } - // Return material operator for postprocessing. const MaterialOperator &GetMaterialOp() const { return mat_op; } @@ -62,20 +60,23 @@ class LaplaceOperator // Return the parallel finite element space objects. auto &GetH1Spaces() { return h1_fespaces; } auto &GetH1Space() { return h1_fespaces.GetFinestFESpace(); } + const auto &GetH1Space() const { return h1_fespaces.GetFinestFESpace(); } auto &GetNDSpace() { return nd_fespace; } + const auto &GetNDSpace() const { return nd_fespace; } // Construct and return system matrix representing discretized Laplace operator for // Gauss's law. - void GetStiffnessMatrix(std::vector> &K, - std::vector> &Ke); + std::unique_ptr GetStiffnessMatrix(); - // Construct and return the discrete negative gradient matrix. - std::unique_ptr GetNegGradMatrix(); + // Construct and return the discrete gradient matrix. + std::unique_ptr GetGradMatrix(); // Assemble the solution boundary conditions and right-hand side vector for a nonzero // prescribed voltage on the specified surface index. - void GetExcitationVector(int idx, const mfem::Operator &K, const mfem::Operator &Ke, - mfem::Vector &X, mfem::Vector &RHS); + void GetExcitationVector(int idx, const Operator &K, Vector &X, Vector &RHS); + + // Get the associated MPI communicator. + MPI_Comm GetComm() const { return GetH1Space().GetComm(); } }; } // namespace palace diff --git a/palace/models/lumpedportoperator.cpp b/palace/models/lumpedportoperator.cpp index acbd9717d..b6309c1db 100644 --- a/palace/models/lumpedportoperator.cpp +++ b/palace/models/lumpedportoperator.cpp @@ -160,13 +160,12 @@ double LumpedPortData::GetExcitationVoltage() const } } -std::complex LumpedPortData::GetSParameter(mfem::ParComplexGridFunction &E) const +void LumpedPortData::InitializeLinearForms(mfem::ParFiniteElementSpace &nd_fespace) const { - // Compute port S-parameter, or the projection of the field onto the port mode: - // (E x H_inc) ⋅ n = E ⋅ (E_inc / Z_s), integrated over the port surface. + // The port S-parameter, or the projection of the field onto the port mode, is computed + // as: (E x H_inc) ⋅ n = E ⋅ (E_inc / Z_s), integrated over the port surface. if (!s) { - auto &nd_fespace = *E.ParFESpace(); SumVectorCoefficient fb(nd_fespace.GetParMesh()->SpaceDimension()); for (const auto &elem : elems) { @@ -175,17 +174,46 @@ std::complex LumpedPortData::GetSParameter(mfem::ParComplexGridFunction elem->GetGeometryLength() * elems.size()); fb.AddCoefficient(elem->GetModeCoefficient(Hinc), elem->GetMarker()); } - s = std::make_unique(&nd_fespace); + s = std::make_unique(&nd_fespace); s->AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fb)); - s->UseFastAssembly(true); + s->UseFastAssembly(false); s->Assemble(); } - return {(*s)(E.real()), (*s)(E.imag())}; + + // The voltage across a port is computed using the electric field solution. + // We have: + // V = ∫ E ⋅ l̂ dl = 1/w ∫ E ⋅ l̂ dS (for rectangular ports) + // or, + // V = 1/(2π) ∫ E ⋅ r̂ / r dS (for coaxial ports). + // We compute the surface integral via an inner product between the linear form with the + // averaging function as a vector coefficient and the solution expansion coefficients. + if (!v) + { + SumVectorCoefficient fb(nd_fespace.GetParMesh()->SpaceDimension()); + for (const auto &elem : elems) + { + fb.AddCoefficient( + elem->GetModeCoefficient(1.0 / (elem->GetGeometryWidth() * elems.size())), + elem->GetMarker()); + } + v = std::make_unique(&nd_fespace); + v->AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fb)); + v->UseFastAssembly(false); + v->Assemble(); + } +} + +std::complex LumpedPortData::GetSParameter(mfem::ParComplexGridFunction &E) const +{ + // Compute port S-parameter, or the projection of the field onto the port mode. + InitializeLinearForms(*E.ParFESpace()); + std::complex dot((*s) * E.real(), (*s) * E.imag()); + Mpi::GlobalSum(1, &dot, E.ParFESpace()->GetComm()); + return dot; } double LumpedPortData::GetPower(mfem::ParGridFunction &E, mfem::ParGridFunction &B, - const MaterialOperator &mat_op, - const std::map &local_to_shared) const + const MaterialOperator &mat_op) const { // Compute port power, (E x H) ⋅ n = E ⋅ (-n x H), integrated over the port surface // using the computed E and H = μ⁻¹ B fields. The linear form is reconstructed from @@ -195,21 +223,21 @@ double LumpedPortData::GetPower(mfem::ParGridFunction &E, mfem::ParGridFunction SumVectorCoefficient fb(nd_fespace.GetParMesh()->SpaceDimension()); for (const auto &elem : elems) { - fb.AddCoefficient( - std::make_unique(B, mat_op, local_to_shared), - elem->GetMarker()); + fb.AddCoefficient(std::make_unique(B, mat_op), + elem->GetMarker()); } - mfem::ParLinearForm p(&nd_fespace); + mfem::LinearForm p(&nd_fespace); p.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fb)); - p.UseFastAssembly(true); + p.UseFastAssembly(false); p.Assemble(); - return p(E); + double dot = p * E; + Mpi::GlobalSum(1, &dot, E.ParFESpace()->GetComm()); + return dot; } -std::complex -LumpedPortData::GetPower(mfem::ParComplexGridFunction &E, mfem::ParComplexGridFunction &B, - const MaterialOperator &mat_op, - const std::map &local_to_shared) const +std::complex LumpedPortData::GetPower(mfem::ParComplexGridFunction &E, + mfem::ParComplexGridFunction &B, + const MaterialOperator &mat_op) const { // Compute port power, (E x H⋆) ⋅ n = E ⋅ (-n x H⋆), integrated over the port surface // using the computed E and H = μ⁻¹ B fields. The linear form is reconstructed from @@ -220,53 +248,40 @@ LumpedPortData::GetPower(mfem::ParComplexGridFunction &E, mfem::ParComplexGridFu SumVectorCoefficient fbi(nd_fespace.GetParMesh()->SpaceDimension()); for (const auto &elem : elems) { - fbr.AddCoefficient( - std::make_unique(B.real(), mat_op, local_to_shared), - elem->GetMarker()); - fbi.AddCoefficient( - std::make_unique(B.imag(), mat_op, local_to_shared), - elem->GetMarker()); + fbr.AddCoefficient(std::make_unique(B.real(), mat_op), + elem->GetMarker()); + fbi.AddCoefficient(std::make_unique(B.imag(), mat_op), + elem->GetMarker()); } - mfem::ParLinearForm pr(&nd_fespace), pi(&nd_fespace); + mfem::LinearForm pr(&nd_fespace), pi(&nd_fespace); pr.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fbr)); pi.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fbi)); - pr.UseFastAssembly(true); - pi.UseFastAssembly(true); + pr.UseFastAssembly(false); + pi.UseFastAssembly(false); pr.Assemble(); pi.Assemble(); - return {pr(E.real()) + pi(E.imag()), pr(E.imag()) - pi(E.real())}; + std::complex dot((pr * E.real()) + (pi * E.imag()), + (pr * E.imag()) - (pi * E.real())); + Mpi::GlobalSum(1, &dot, E.ParFESpace()->GetComm()); + return dot; } double LumpedPortData::GetVoltage(mfem::ParGridFunction &E) const { - // Compute the voltage across a port using the electric field solution. - // We have: - // V = ∫ E ⋅ l̂ dl = 1/w ∫ E ⋅ l̂ dS (for rectangular ports) - // or, - // V = 1/(2π) ∫ E ⋅ r̂ / r dS (for coaxial ports). - // We compute the surface integral via an inner product between the linear form with the - // averaging function as a vector coefficient and the solution expansion coefficients. - if (!v) - { - auto &nd_fespace = *E.ParFESpace(); - SumVectorCoefficient fb(nd_fespace.GetParMesh()->SpaceDimension()); - for (const auto &elem : elems) - { - fb.AddCoefficient( - elem->GetModeCoefficient(1.0 / (elem->GetGeometryWidth() * elems.size())), - elem->GetMarker()); - } - v = std::make_unique(&nd_fespace); - v->AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fb)); - v->UseFastAssembly(true); - v->Assemble(); - } - return (*v)(E); + // Compute the average voltage across the port. + InitializeLinearForms(*E.ParFESpace()); + double dot = (*v) * E; + Mpi::GlobalSum(1, &dot, E.ParFESpace()->GetComm()); + return dot; } std::complex LumpedPortData::GetVoltage(mfem::ParComplexGridFunction &E) const { - return {GetVoltage(E.real()), GetVoltage(E.imag())}; + // Compute the average voltage across the port. + InitializeLinearForms(*E.ParFESpace()); + std::complex dot((*v) * E.real(), (*v) * E.imag()); + Mpi::GlobalSum(1, &dot, E.ParFESpace()->GetComm()); + return dot; } LumpedPortOperator::LumpedPortOperator(const IoData &iodata, diff --git a/palace/models/lumpedportoperator.hpp b/palace/models/lumpedportoperator.hpp index 04c905637..8c5fa30d9 100644 --- a/palace/models/lumpedportoperator.hpp +++ b/palace/models/lumpedportoperator.hpp @@ -40,7 +40,8 @@ class LumpedPortData std::vector> elems; // Linear forms for postprocessing integrated quantities on the port. - mutable std::unique_ptr s, v; + mutable std::unique_ptr s, v; + void InitializeLinearForms(mfem::ParFiniteElementSpace &nd_fespace) const; public: LumpedPortData(const config::LumpedPortData &data, @@ -69,11 +70,9 @@ class LumpedPortData std::complex GetSParameter(mfem::ParComplexGridFunction &E) const; std::complex GetPower(mfem::ParComplexGridFunction &E, mfem::ParComplexGridFunction &B, - const MaterialOperator &mat_op, - const std::map &local_to_shared) const; + const MaterialOperator &mat_op) const; double GetPower(mfem::ParGridFunction &E, mfem::ParGridFunction &B, - const MaterialOperator &mat_op, - const std::map &local_to_shared) const; + const MaterialOperator &mat_op) const; std::complex GetVoltage(mfem::ParComplexGridFunction &E) const; double GetVoltage(mfem::ParGridFunction &E) const; }; diff --git a/palace/models/materialoperator.cpp b/palace/models/materialoperator.cpp index b56f1f4fa..251feb038 100644 --- a/palace/models/materialoperator.cpp +++ b/palace/models/materialoperator.cpp @@ -279,14 +279,12 @@ mfem::DenseMatrix ToDenseMatrix(const config::SymmetricMatrixData &data) } // namespace -MaterialOperator::MaterialOperator(const IoData &iodata, const mfem::ParMesh &mesh) - : sdim(mesh.SpaceDimension()) +MaterialOperator::MaterialOperator(const IoData &iodata, mfem::ParMesh &mesh) { SetUpMaterialProperties(iodata, mesh); } -void MaterialOperator::SetUpMaterialProperties(const IoData &iodata, - const mfem::ParMesh &mesh) +void MaterialOperator::SetUpMaterialProperties(const IoData &iodata, mfem::ParMesh &mesh) { // Check that material attributes have been specified correctly. The mesh attributes may // be non-contiguous and when no material attribute is specified the elements are deleted @@ -314,6 +312,7 @@ void MaterialOperator::SetUpMaterialProperties(const IoData &iodata, // Set up material properties of the different domain regions, represented with piece-wise // constant matrix-valued coefficients for the relative permeability and permittivity, // and other material properties. + const int sdim = mesh.SpaceDimension(); mat_muinv.resize(attr_max, mfem::DenseMatrix(sdim)); mat_epsilon.resize(attr_max, mfem::DenseMatrix(sdim)); mat_epsilon_imag.resize(attr_max, mfem::DenseMatrix(sdim)); @@ -410,6 +409,13 @@ void MaterialOperator::SetUpMaterialProperties(const IoData &iodata, } } + // Construct shared face mapping for boundary coefficients. This is useful to have in one + // place alongside material properties so we construct and store it here. + for (int i = 0; i < mesh.GetNSharedFaces(); i++) + { + local_to_shared[mesh.GetSharedFace(i)] = i; + } + // Mark selected material attributes from the mesh as having certain local properties. mfem::Array losstan_mats, conductivity_mats, london_mats; losstan_mats.Reserve(attr_max); diff --git a/palace/models/materialoperator.hpp b/palace/models/materialoperator.hpp index 90475316a..459dd8729 100644 --- a/palace/models/materialoperator.hpp +++ b/palace/models/materialoperator.hpp @@ -4,6 +4,7 @@ #ifndef PALACE_MODELS_MATERIAL_OPERATOR_HPP #define PALACE_MODELS_MATERIAL_OPERATOR_HPP +#include #include #include @@ -18,9 +19,6 @@ class IoData; class MaterialOperator { private: - // Spatial dimension of the input mesh. Returned DenseMatrix objects are sdim x sdim. - const int sdim; - // Material properties for domain attributes: relative permeability, relative // permittivity, and others (like electrical conductivity and London penetration depth // for superconductors. The i-1-th entry of each Vector is the property for mesh domain @@ -29,12 +27,17 @@ class MaterialOperator mat_invz0, mat_c0, mat_sigma, mat_invLondon; std::vector mat_c0_min, mat_c0_max; mfem::Array losstan_marker, conductivity_marker, london_marker; - void SetUpMaterialProperties(const IoData &iodata, const mfem::ParMesh &mesh); + void SetUpMaterialProperties(const IoData &iodata, mfem::ParMesh &mesh); + + // Shared face mapping for boundary coefficients. + std::map local_to_shared; public: - MaterialOperator(const IoData &iodata, const mfem::ParMesh &mesh); + MaterialOperator(const IoData &iodata, mfem::ParMesh &mesh); + + int SpaceDimension() const { return mat_muinv.front().Height(); } - int SpaceDimension() const { return sdim; } + const auto &GetLocalToSharedFaceMap() const { return local_to_shared; } const auto &GetInvPermeability(int attr) const { return mat_muinv[attr - 1]; } const auto &GetPermittivityReal(int attr) const { return mat_epsilon[attr - 1]; } diff --git a/palace/models/postoperator.cpp b/palace/models/postoperator.cpp index 443067841..4ea382fe9 100644 --- a/palace/models/postoperator.cpp +++ b/palace/models/postoperator.cpp @@ -4,7 +4,6 @@ #include "postoperator.hpp" #include "fem/coefficient.hpp" -#include "linalg/petsc.hpp" #include "models/curlcurloperator.hpp" #include "models/laplaceoperator.hpp" #include "models/lumpedportoperator.hpp" @@ -24,18 +23,6 @@ using namespace std::complex_literals; namespace { -auto LocalToShared(const mfem::ParMesh &mesh) -{ - // Construct shared face mapping required for boundary coefficients. - std::map l2s; - for (int i = 0; i < mesh.GetNSharedFaces(); i++) - { - int i_local = mesh.GetSharedFace(i); - l2s[i_local] = i; - } - return l2s; -} - auto CreateParaviewPath(const IoData &iodata, const std::string &name) { std::string path = iodata.problem.output; @@ -51,9 +38,8 @@ auto CreateParaviewPath(const IoData &iodata, const std::string &name) PostOperator::PostOperator(const IoData &iodata, SpaceOperator &spaceop, const std::string &name) - : local_to_shared(LocalToShared(*spaceop.GetNDSpace().GetParMesh())), - mat_op(spaceop.GetMaterialOp()), - surf_post_op(iodata, spaceop.GetMaterialOp(), local_to_shared, spaceop.GetH1Space()), + : mat_op(spaceop.GetMaterialOp()), + surf_post_op(iodata, spaceop.GetMaterialOp(), spaceop.GetH1Space()), dom_post_op(iodata, spaceop.GetMaterialOp(), &spaceop.GetNDSpace(), &spaceop.GetRTSpace()), has_imaginary(iodata.problem.type != config::ProblemData::Type::TRANSIENT), @@ -64,31 +50,31 @@ PostOperator::PostOperator(const IoData &iodata, SpaceOperator &spaceop, spaceop.GetNDSpace().GetParMesh()), interp_op(iodata, *spaceop.GetNDSpace().GetParMesh()) { - Esr = std::make_unique(E->real(), mat_op, local_to_shared); - Bsr = std::make_unique(B->real(), mat_op, local_to_shared); - Jsr = std::make_unique(B->real(), mat_op, local_to_shared); - Qsr = std::make_unique(E->real(), mat_op, local_to_shared); + Esr = std::make_unique(E->real(), mat_op); + Bsr = std::make_unique(B->real(), mat_op); + Jsr = std::make_unique(B->real(), mat_op); + Qsr = std::make_unique(E->real(), mat_op); if (has_imaginary) { - Esi = std::make_unique(E->imag(), mat_op, local_to_shared); - Bsi = std::make_unique(B->imag(), mat_op, local_to_shared); - Jsi = std::make_unique(B->imag(), mat_op, local_to_shared); - Qsi = std::make_unique(E->imag(), mat_op, local_to_shared); + Esi = std::make_unique(E->imag(), mat_op); + Bsi = std::make_unique(B->imag(), mat_op); + Jsi = std::make_unique(B->imag(), mat_op); + Qsi = std::make_unique(E->imag(), mat_op); Ue = std::make_unique>( - *E, mat_op, local_to_shared); + mfem::ParComplexGridFunction>>(*E, + mat_op); Um = std::make_unique>( - *B, mat_op, local_to_shared); + mfem::ParComplexGridFunction>>(*B, + mat_op); } else { - Ue = std::make_unique>( - E->real(), mat_op, local_to_shared); - Um = std::make_unique>( - B->real(), mat_op, local_to_shared); + Ue = std::make_unique< + EnergyDensityCoefficient>( + E->real(), mat_op); + Um = std::make_unique< + EnergyDensityCoefficient>( + B->real(), mat_op); } // Initialize data collection objects and register additional fields associated with wave @@ -96,19 +82,19 @@ PostOperator::PostOperator(const IoData &iodata, SpaceOperator &spaceop, InitializeDataCollection(iodata); for (const auto &[idx, data] : spaceop.GetWavePortOp()) { - paraview_bdr.RegisterVCoeffField("nxH^0_" + std::to_string(idx) + "_real", - data.GetModeCoefficientReal().get()); - paraview_bdr.RegisterVCoeffField("nxH^0_" + std::to_string(idx) + "_imag", - data.GetModeCoefficientImag().get()); + paraview_bdr.RegisterVCoeffField( + "nxH^0_" + std::to_string(idx) + "_real", + const_cast(&data.GetModeCoefficientReal())); + paraview_bdr.RegisterVCoeffField( + "nxH^0_" + std::to_string(idx) + "_imag", + const_cast(&data.GetModeCoefficientImag())); } } PostOperator::PostOperator(const IoData &iodata, LaplaceOperator &laplaceop, const std::string &name) - : local_to_shared(LocalToShared(*laplaceop.GetNDSpace().GetParMesh())), - mat_op(laplaceop.GetMaterialOp()), - surf_post_op(iodata, laplaceop.GetMaterialOp(), local_to_shared, - laplaceop.GetH1Space()), + : mat_op(laplaceop.GetMaterialOp()), + surf_post_op(iodata, laplaceop.GetMaterialOp(), laplaceop.GetH1Space()), dom_post_op(iodata, laplaceop.GetMaterialOp(), &laplaceop.GetNDSpace(), nullptr), has_imaginary(false), E(&laplaceop.GetNDSpace()), B(std::nullopt), V(&laplaceop.GetH1Space()), A(std::nullopt), lumped_port_init(false), @@ -121,12 +107,12 @@ PostOperator::PostOperator(const IoData &iodata, LaplaceOperator &laplaceop, // Note: When using this constructor, you should not use any of the magnetic field related // postprocessing functions (magnetic field energy, inductor energy, surface currents, // etc.), since only V and E fields are supplied. - Esr = std::make_unique(E->real(), mat_op, local_to_shared); - Vs = std::make_unique(*V, mat_op, local_to_shared); + Esr = std::make_unique(E->real(), mat_op); + Vs = std::make_unique(*V, mat_op); Ue = std::make_unique< - EnergyDensityCoefficient>( - E->real(), mat_op, local_to_shared); - Qsr = std::make_unique(E->real(), mat_op, local_to_shared); + EnergyDensityCoefficient>( + E->real(), mat_op); + Qsr = std::make_unique(E->real(), mat_op); // Initialize data collection objects. InitializeDataCollection(iodata); @@ -134,10 +120,8 @@ PostOperator::PostOperator(const IoData &iodata, LaplaceOperator &laplaceop, PostOperator::PostOperator(const IoData &iodata, CurlCurlOperator &curlcurlop, const std::string &name) - : local_to_shared(LocalToShared(*curlcurlop.GetNDSpace().GetParMesh())), - mat_op(curlcurlop.GetMaterialOp()), - surf_post_op(iodata, curlcurlop.GetMaterialOp(), local_to_shared, - curlcurlop.GetH1Space()), + : mat_op(curlcurlop.GetMaterialOp()), + surf_post_op(iodata, curlcurlop.GetMaterialOp(), curlcurlop.GetH1Space()), dom_post_op(iodata, curlcurlop.GetMaterialOp(), nullptr, &curlcurlop.GetRTSpace()), has_imaginary(false), E(std::nullopt), B(&curlcurlop.GetRTSpace()), V(std::nullopt), A(&curlcurlop.GetNDSpace()), lumped_port_init(false), wave_port_init(false), @@ -149,12 +133,12 @@ PostOperator::PostOperator(const IoData &iodata, CurlCurlOperator &curlcurlop, // Note: When using this constructor, you should not use any of the electric field related // postprocessing functions (electric field energy, capacitor energy, surface charge, // etc.), since only the B field is supplied. - Bsr = std::make_unique(B->real(), mat_op, local_to_shared); - As = std::make_unique(*A, mat_op, local_to_shared); + Bsr = std::make_unique(B->real(), mat_op); + As = std::make_unique(*A, mat_op); Um = std::make_unique< - EnergyDensityCoefficient>( - B->real(), mat_op, local_to_shared); - Jsr = std::make_unique(B->real(), mat_op, local_to_shared); + EnergyDensityCoefficient>( + B->real(), mat_op); + Jsr = std::make_unique(B->real(), mat_op); // Initialize data collection objects. InitializeDataCollection(iodata); @@ -278,66 +262,33 @@ void PostOperator::InitializeDataCollection(const IoData &iodata) } } -void PostOperator::GetBField(std::complex omega, - const petsc::PetscParMatrix &NegCurl, - const petsc::PetscParVector &e, petsc::PetscParVector &b) -{ - // Compute B = -1/(iω) ∇ x E on the true dofs. - MFEM_VERIFY(e.GetSize() == NegCurl.Width() && b.GetSize() == NegCurl.Height(), - "Size mismatch error computing B-field in PostOperator!"); - NegCurl.Mult(e, b); - b.Scale(1.0 / (1i * omega)); -} - -void PostOperator::GetBField(const mfem::Operator &Curl, const mfem::Vector &a, - mfem::Vector &b) -{ - // Compute B = ∇ x A on the true dofs. - MFEM_VERIFY(a.Size() == Curl.Width() && b.Size() == Curl.Height(), - "Size mismatch error computing B-field in PostOperator!"); - Curl.Mult(a, b); -} - -void PostOperator::GetEField(const mfem::Operator &NegGrad, const mfem::Vector &v, - mfem::Vector &e) -{ - // Compute E = -∇V on the true dofs. - MFEM_VERIFY(v.Size() == NegGrad.Width() && e.Size() == NegGrad.Height(), - "Size mismatch error computing E-field in PostOperator!"); - NegGrad.Mult(v, e); -} - -void PostOperator::SetEGridFunction(const petsc::PetscParVector &e) +void PostOperator::SetEGridFunction(const ComplexVector &e) { MFEM_VERIFY( has_imaginary, "SetEGridFunction for complex-valued output called when has_imaginary == false!"); MFEM_VERIFY(E, "Incorrect usage of PostOperator::SetEGridFunction!"); - mfem::Vector Er(e.GetSize()), Ei(e.GetSize()); - e.GetToVectors(Er, Ei); - E->real().SetFromTrueDofs(Er); // Parallel distribute - E->imag().SetFromTrueDofs(Ei); + E->real().SetFromTrueDofs(e.Real()); // Parallel distribute + E->imag().SetFromTrueDofs(e.Imag()); E->real().ExchangeFaceNbrData(); // Ready for parallel comm on shared faces E->imag().ExchangeFaceNbrData(); lumped_port_init = wave_port_init = false; } -void PostOperator::SetBGridFunction(const petsc::PetscParVector &b) +void PostOperator::SetBGridFunction(const ComplexVector &b) { MFEM_VERIFY( has_imaginary, "SetBGridFunction for complex-valued output called when has_imaginary == false!"); MFEM_VERIFY(B, "Incorrect usage of PostOperator::SetBGridFunction!"); - mfem::Vector Br(b.GetSize()), Bi(b.GetSize()); - b.GetToVectors(Br, Bi); - B->real().SetFromTrueDofs(Br); // Parallel distribute - B->imag().SetFromTrueDofs(Bi); + B->real().SetFromTrueDofs(b.Real()); // Parallel distribute + B->imag().SetFromTrueDofs(b.Imag()); B->real().ExchangeFaceNbrData(); // Ready for parallel comm on shared faces B->imag().ExchangeFaceNbrData(); lumped_port_init = wave_port_init = false; } -void PostOperator::SetEGridFunction(const mfem::Vector &e) +void PostOperator::SetEGridFunction(const Vector &e) { MFEM_VERIFY(!has_imaginary, "SetEGridFunction for real-valued output called when has_imaginary == true!"); @@ -347,7 +298,7 @@ void PostOperator::SetEGridFunction(const mfem::Vector &e) lumped_port_init = wave_port_init = false; } -void PostOperator::SetBGridFunction(const mfem::Vector &b) +void PostOperator::SetBGridFunction(const Vector &b) { MFEM_VERIFY(!has_imaginary, "SetBGridFunction for real-valued output called when has_imaginary == true!"); @@ -357,7 +308,7 @@ void PostOperator::SetBGridFunction(const mfem::Vector &b) lumped_port_init = wave_port_init = false; } -void PostOperator::SetVGridFunction(const mfem::Vector &v) +void PostOperator::SetVGridFunction(const Vector &v) { MFEM_VERIFY(!has_imaginary, "SetVGridFunction for real-valued output called when has_imaginary == true!"); @@ -366,7 +317,7 @@ void PostOperator::SetVGridFunction(const mfem::Vector &v) V->ExchangeFaceNbrData(); } -void PostOperator::SetAGridFunction(const mfem::Vector &a) +void PostOperator::SetAGridFunction(const Vector &a) { MFEM_VERIFY(!has_imaginary, "SetAGridFunction for real-valued output called when has_imaginary == true!"); @@ -391,13 +342,13 @@ void PostOperator::UpdatePorts(const LumpedPortOperator &lumped_port_op, double omega > 0.0, "Frequency domain lumped port postprocessing requires nonzero frequency!"); vi.S = data.GetSParameter(*E); - vi.P = data.GetPower(*E, *B, mat_op, local_to_shared); + vi.P = data.GetPower(*E, *B, mat_op); vi.V = data.GetVoltage(*E); vi.Z = data.GetCharacteristicImpedance(omega); } else { - vi.P = data.GetPower(E->real(), B->real(), mat_op, local_to_shared); + vi.P = data.GetPower(E->real(), B->real(), mat_op); vi.V = data.GetVoltage(E->real()); vi.S = vi.Z = 0.0; } @@ -418,7 +369,7 @@ void PostOperator::UpdatePorts(const WavePortOperator &wave_port_op, double omeg "Frequency domain wave port postprocessing requires nonzero frequency!"); auto &vi = wave_port_vi[idx]; vi.S = data.GetSParameter(*E); - vi.P = data.GetPower(*E, *B, mat_op, local_to_shared); + vi.P = data.GetPower(*E, *B, mat_op); vi.V = vi.Z = 0.0; // Not yet implemented (Z = V² / P, I = V / Z) } wave_port_init = true; @@ -646,11 +597,9 @@ double PostOperator::GetInterfaceParticipation(int idx, double Em) const // with: // p_mj = 1/2 t_j Re{∫_{Γ_j} (ε_j E_m)ᴴ E_m dS} /(E_elec + E_cap). MFEM_VERIFY(E, "Surface Q not defined, no electric field solution found!"); - double Esurf = surf_post_op.GetInterfaceElectricFieldEnergy(idx, E->real()); - if (has_imaginary) - { - Esurf += surf_post_op.GetInterfaceElectricFieldEnergy(idx, E->imag()); - } + double Esurf = has_imaginary + ? surf_post_op.GetInterfaceElectricFieldEnergy(idx, *E) + : surf_post_op.GetInterfaceElectricFieldEnergy(idx, E->real()); return Esurf / Em; } @@ -661,12 +610,8 @@ double PostOperator::GetSurfaceCharge(int idx) const // for both sides of the surface. This then yields the capacitive coupling to the // excitation as C_jk = Q_j / V_k where V_k is the excitation voltage. MFEM_VERIFY(E, "Surface capacitance not defined, no electric field solution found!"); - double Q = surf_post_op.GetSurfaceElectricCharge(idx, E->real()); - if (has_imaginary) - { - double Qi = surf_post_op.GetSurfaceElectricCharge(idx, E->imag()); - Q = std::copysign(std::sqrt(Q * Q + Qi * Qi), Q); - } + double Q = has_imaginary ? surf_post_op.GetSurfaceElectricCharge(idx, *E) + : surf_post_op.GetSurfaceElectricCharge(idx, E->real()); return Q; } @@ -678,12 +623,8 @@ double PostOperator::GetSurfaceFlux(int idx) const // which are discontinuous at interior boundary elements. MFEM_VERIFY(B, "Surface inductance not defined, no magnetic flux density solution found!"); - double Phi = surf_post_op.GetSurfaceMagneticFlux(idx, B->real()); - if (has_imaginary) - { - double Phii = surf_post_op.GetSurfaceMagneticFlux(idx, B->imag()); - Phi = std::copysign(std::sqrt(Phi * Phi + Phii * Phii), Phi); - } + double Phi = has_imaginary ? surf_post_op.GetSurfaceMagneticFlux(idx, *B) + : surf_post_op.GetSurfaceMagneticFlux(idx, B->real()); return Phi; } diff --git a/palace/models/postoperator.hpp b/palace/models/postoperator.hpp index 88586ee97..6daa5f3c9 100644 --- a/palace/models/postoperator.hpp +++ b/palace/models/postoperator.hpp @@ -11,7 +11,9 @@ #include #include #include -#include "fem/interpolation.hpp" +#include "fem/interpolator.hpp" +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" #include "models/domainpostoperator.hpp" #include "models/surfacepostoperator.hpp" @@ -27,23 +29,12 @@ class SpaceOperator; class SurfaceCurrentOperator; class WavePortOperator; -namespace petsc -{ - -class PetscParMatrix; -class PetscParVector; - -} // namespace petsc - // // A class to handle solution postprocessing. // class PostOperator { private: - // Shared face mapping for boundary coefficients. - std::map local_to_shared; - // Reference to material property operator (not owned). const MaterialOperator &mat_op; @@ -86,30 +77,16 @@ class PostOperator bool HasE() const { return E.has_value(); } bool HasB() const { return B.has_value(); } - // Compute the magnetic flux density B in RT space from electric field solution E solution - // in ND space for the time-harmonic case: B = -1/(iω) ∇ x E. - static void GetBField(std::complex omega, const petsc::PetscParMatrix &NegCurl, - const petsc::PetscParVector &e, petsc::PetscParVector &b); - - // Compute the magnetic flux density B in RT space from the magnetic vector potential - // solution A in ND space: B = ∇ x A. - static void GetBField(const mfem::Operator &Curl, const mfem::Vector &a, mfem::Vector &b); - - // Compute the electric field E in ND space from the scalar potential solution V in H1 - // space: E = -∇V. - static void GetEField(const mfem::Operator &NegGrad, const mfem::Vector &v, - mfem::Vector &e); - // Populate the grid function solutions for the E- and B-field using the solution vectors // on the true dofs. For the real-valued overload, the electric scalar potential can be // specified too for electrostatic simulations. The output mesh and fields are // nondimensionalized consistently (B ~ E (L₀ ω₀ E₀⁻¹)). - void SetEGridFunction(const petsc::PetscParVector &e); - void SetBGridFunction(const petsc::PetscParVector &b); - void SetEGridFunction(const mfem::Vector &e); - void SetBGridFunction(const mfem::Vector &b); - void SetVGridFunction(const mfem::Vector &v); - void SetAGridFunction(const mfem::Vector &a); + void SetEGridFunction(const ComplexVector &e); + void SetBGridFunction(const ComplexVector &b); + void SetEGridFunction(const Vector &e); + void SetBGridFunction(const Vector &b); + void SetVGridFunction(const Vector &v); + void SetAGridFunction(const Vector &a); // Update cached port voltages and currents for lumped and wave port operators. void UpdatePorts(const LumpedPortOperator &lumped_port_op, diff --git a/palace/models/romoperator.cpp b/palace/models/romoperator.cpp index e369319f6..03d007eca 100644 --- a/palace/models/romoperator.cpp +++ b/palace/models/romoperator.cpp @@ -5,8 +5,8 @@ #include #include -#include "fem/freqdomain.hpp" -#include "fem/operator.hpp" +#include +#include "linalg/orthog.hpp" #include "models/spaceoperator.hpp" #include "utils/communication.hpp" #include "utils/iodata.hpp" @@ -16,44 +16,97 @@ namespace palace using namespace std::complex_literals; -RomOperator::RomOperator(const IoData &iodata, SpaceOperator &sp, int nmax) - : spaceop(sp), - engine((unsigned)std::chrono::system_clock::now().time_since_epoch().count()) +namespace +{ + +inline void ProjectMatInternal(MPI_Comm comm, const std::vector &V, + const ComplexOperator &A, Eigen::MatrixXcd &Ar, + ComplexVector &r, int n0) +{ + // Update Ar = Vᴴ A V for the new basis dimension n0 -> n. V is real and thus the result + // is complex symmetric if A is symmetric (which we assume is the case). Ar is replicated + // across all processes as a sequential n x n matrix. + const auto n = Ar.rows(); + MFEM_VERIFY(n0 < n, "Unexpected dimensions in PROM matrix projection!"); + for (int j = n0; j < n; j++) + { + // Fill block of Vᴴ A V = [ | Vᴴ A vj ] . We can optimize the matrix-vector product + // since the columns of V are real. + MFEM_VERIFY(A.HasReal() || A.HasImag(), + "Invalid zero ComplexOperator for PROM matrix projection!"); + if (A.HasReal()) + { + A.Real()->Mult(V[j], r.Real()); + } + if (A.HasImag()) + { + A.Imag()->Mult(V[j], r.Imag()); + } + for (int i = 0; i < n; i++) + { + Ar(i, j).real(A.HasReal() ? V[i] * r.Real() : 0.0); // Local inner product + Ar(i, j).imag(A.HasImag() ? V[i] * r.Imag() : 0.0); + } + } + Mpi::GlobalSum((n - n0) * n, Ar.data() + n0 * n, comm); + + // Fill lower block of Vᴴ A V = [ ____________ | ] + // [ vjᴴ A V[1:n0] | ] . + for (int j = 0; j < n0; j++) + { + for (int i = n0; i < n; i++) + { + Ar(i, j) = Ar(j, i); + } + } +} + +inline void ProjectVecInternal(MPI_Comm comm, const std::vector &V, + const ComplexVector &b, Eigen::VectorXcd &br, int n0) +{ + // Update br = Vᴴ b for the new basis dimension n0 -> n. br is replicated across all + // processes as a sequential n-dimensional vector. + const auto n = br.size(); + MFEM_VERIFY(n0 < n, "Unexpected dimensions in PROM vector projection!"); + for (int i = n0; i < n; i++) + { + br(i).real(V[i] * b.Real()); // Local inner product + br(i).imag(V[i] * b.Imag()); + } + Mpi::GlobalSum(n - n0, br.data() + n0, comm); +} + +} // namespace + +RomOperator::RomOperator(const IoData &iodata, SpaceOperator &spaceop) : spaceop(spaceop) { // Construct the system matrices defining the linear operator. PEC boundaries are handled // simply by setting diagonal entries of the system matrix for the corresponding dofs. // Because the Dirichlet BC is always homogenous, no special elimination is required on // the RHS. The damping matrix may be nullptr. - K = spaceop.GetSystemMatrixPetsc(SpaceOperator::OperatorType::STIFFNESS, - mfem::Operator::DIAG_ONE); - M = spaceop.GetSystemMatrixPetsc(SpaceOperator::OperatorType::MASS, - mfem::Operator::DIAG_ZERO); - C = spaceop.GetSystemMatrixPetsc(SpaceOperator::OperatorType::DAMPING, - mfem::Operator::DIAG_ZERO); - - // Set up the linear solver and set operators but don't set the operators yet (this will - // be done during an HDM solve at a given parameter point). The preconditioner for the - // complex linear system is constructed from a real approximation to the complex system - // matrix. - pc0 = std::make_unique(iodata, spaceop.GetDbcMarker(), - spaceop.GetNDSpaces(), &spaceop.GetH1Spaces()); - ksp0 = std::make_unique(K->GetComm(), iodata, "ksp_"); - ksp0->SetPreconditioner(*pc0); + K = spaceop.GetComplexStiffnessMatrix(Operator::DIAG_ONE); + C = spaceop.GetComplexDampingMatrix(Operator::DIAG_ZERO); + M = spaceop.GetComplexMassMatrix(Operator::DIAG_ZERO); + MFEM_VERIFY(K && M, "Invalid empty HDM matrices when constructing PROM!"); // Set up RHS vector (linear in frequency part) for the incident field at port boundaries, // and the vector for the solution, which satisfies the Dirichlet (PEC) BC. - RHS1 = std::make_unique(*K); - if (!spaceop.GetFreqDomainExcitationVector1(*RHS1)) + if (!spaceop.GetExcitationVector1(RHS1)) { - RHS1.reset(); + RHS1.SetSize(0); } - init2 = true; - hasA2 = hasRHS2 = false; + has_A2 = has_RHS2 = true; - // Initialize other data structure and storage. - E0 = std::make_unique(*K); - R0 = std::make_unique(*K); - T0 = std::make_unique(*K); + // Initialize temporary vector storage. + r.SetSize(K->Height()); + w.SetSize(K->Height()); + + // Set up the linear solver and set operators but don't set the operators yet (this will + // be done during an HDM solve at a given parameter point). The preconditioner for the + // complex linear system is constructed from a real approximation to the complex system + // matrix. + ksp = std::make_unique(iodata, spaceop.GetNDSpaces(), + &spaceop.GetH1Spaces()); // Initialize solver for inner product solves. The system matrix for the inner product is // real and SPD. This uses the dual norm from https://ieeexplore.ieee.org/document/5313818 @@ -61,304 +114,273 @@ RomOperator::RomOperator(const IoData &iodata, SpaceOperator &sp, int nmax) if (iodata.solver.driven.adaptive_metric_aposteriori) { constexpr int curlcurl_verbose = 0; - kspKM = std::make_unique( - spaceop.GetMaterialOp(), spaceop.GetDbcMarker(), spaceop.GetNDSpaces(), - spaceop.GetH1Spaces(), iodata.solver.linear.tol, iodata.solver.linear.max_it, - curlcurl_verbose); - - auto KM = std::make_unique(K->GetNumRows(), K->GetNumCols()); - KM->AddOperator(*K->GetOperator(petsc::PetscParMatrix::ExtractStructure::REAL)); - KM->AddOperator(*M->GetOperator(petsc::PetscParMatrix::ExtractStructure::REAL)); - opKM = std::make_unique(K->GetComm(), std::move(KM)); - opKM->SetRealSymmetric(); + kspKM = std::make_unique( + spaceop.GetMaterialOp(), spaceop.GetNDSpaces(), spaceop.GetH1Spaces(), + spaceop.GetNDDbcTDofLists(), spaceop.GetH1DbcTDofLists(), iodata.solver.linear.tol, + iodata.solver.linear.max_it, curlcurl_verbose); } - // Construct initial (empty) basis and ROM operators. Ar = Vᴴ A V when assembled is - // complex symmetric for real V. The provided nmax is the number of sample points(2 basis - // vectors per point). - MFEM_VERIFY(K && M, "Invalid empty HDM matrices constructing PROM operators!"); - MFEM_VERIFY(nmax > 0, "Reduced order basis storage must have > 0 columns!"); - dim = 0; - omega_min = delta_omega = 0.0; - V = std::make_unique(K->GetComm(), K->Height(), PETSC_DECIDE, - PETSC_DECIDE, 2 * nmax, nullptr); - - Kr = std::make_unique(dim, dim, nullptr); - Kr->CopySymmetry(*K); - Mr = std::make_unique(dim, dim, nullptr); - Mr->CopySymmetry(*M); - if (C) - { - Cr = std::make_unique(dim, dim, nullptr); - Cr->CopySymmetry(*C); - } - else - { - Cr = nullptr; - } - Ar = std::make_unique(dim, dim, nullptr); - Ar->SetSymmetric(K->GetSymmetric() && M->GetSymmetric() && (!C || C->GetSymmetric())); - - RHS1r = (RHS1) ? std::make_unique(*Ar) : nullptr; - RHSr = std::make_unique(*Ar); - Er = std::make_unique(*Ar); - - // Set up the linear solver (dense sequential on all processors). An indefinite LDLᵀ - // factorization is used when Ar has its symmetry flag set. The default sequential dense - // matrix uses LAPACK for the factorization. - int print = 0; - ksp = std::make_unique(Ar->GetComm(), print, "rom_"); - ksp->SetType(KspSolver::Type::CHOLESKY); // Symmetric indefinite factorization + // The initial PROM basis is empty. Orthogonalization uses MGS by default, else CGS2. + dim_V = 0; + orthog_mgs = + (iodata.solver.linear.gs_orthog_type == config::LinearSolverData::OrthogType::MGS); + + // Seed the random number generator for parameter space sampling. + engine.seed(std::chrono::system_clock::now().time_since_epoch().count()); } -void RomOperator::Initialize(int steps, double start, double delta) +void RomOperator::Initialize(double start, double delta, int num_steps, int max_dim) { // Initialize P = {ω_L, ω_L+δ, ..., ω_R}. Always insert in ascending order. - MFEM_VERIFY(Ps.empty(), "RomOperator::Initialize should only be called once!"); - MFEM_VERIFY(steps > 2, "RomOperator adaptive frequency sweep should have more than two " - "frequency steps!"); - Ps.reserve(steps); - PmPs.resize(steps); + MFEM_VERIFY(PS.empty() && P_m_PS.empty(), + "RomOperator::Initialize should only be called once!"); + MFEM_VERIFY( + num_steps > 2, + "RomOperator adaptive frequency sweep should have more than two frequency steps!"); if (delta < 0.0) { - start = start + (steps - 1) * delta; + start = start + (num_steps - 1) * delta; delta = -delta; } - for (int step = 0; step < steps; step++) + auto it = P_m_PS.begin(); + for (int step = 0; step < num_steps; step++) { - PmPs[step] = start + step * delta; + it = P_m_PS.emplace_hint(it, start + step * delta); } - omega_min = start; - delta_omega = delta; - A2.resize(steps); - RHS2.resize(steps); + + // PROM operators Ar = Vᴴ A V when assembled is complex symmetric for real V. The provided + // max_dim is the number of sample points (2 basis vectors per point). + MFEM_VERIFY(max_dim > 0, "Reduced order basis storage must have > 0 columns!"); + V.resize(2 * max_dim, Vector()); } -void RomOperator::SolveHDM(double omega, petsc::PetscParVector &E, bool print) +void RomOperator::SolveHDM(double omega, ComplexVector &e) { - // Compute HDM solution at the given frequency and add solution to the reduced-order - // basis, updating the PROM operators. Update P_S and P\P_S sets. - auto it = std::lower_bound(PmPs.begin(), PmPs.end(), omega); - MFEM_VERIFY(it != PmPs.end(), - "Sample frequency " << omega << " not found in parameter set!"); - PmPs.erase(it); - Ps.push_back(omega); - - // Set up HDM system and solve. The system matrix A = K + iω C - ω² M + A2(ω) is built - // by summing the underlying operator contributions (to save memory). + // Compute HDM solution at the given frequency. The system matrix, A = K + iω C - ω² M + + // A2(ω) is built by summing the underlying operator contributions. + A2 = spaceop.GetComplexExtraSystemMatrix(omega, Operator::DIAG_ZERO); + has_A2 = (A2 != nullptr); + auto A = spaceop.GetSystemMatrix(std::complex(1.0, 0.0), 1i * omega, + std::complex(-omega * omega, 0.0), K.get(), + C.get(), M.get(), A2.get()); + auto P = + spaceop.GetPreconditionerMatrix(1.0, omega, -omega * omega, omega); + ksp->SetOperators(*A, *P); + + // The HDM excitation vector is computed as RHS = iω RHS1 + RHS2(ω). + Mpi::Print("\n"); + if (has_RHS2) { - const auto step = std::lround((omega - omega_min) / delta_omega); - MFEM_VERIFY(step >= 0 && static_cast(step) < A2.size(), - "Invalid out-of-range frequency for PROM solution!"); - std::vector> P, AuxP; - A2[step] = spaceop.GetSystemMatrixPetsc(SpaceOperator::OperatorType::EXTRA, omega, - mfem::Operator::DIAG_ZERO, print); - auto A = utils::GetSystemMatrixShell(omega, *K, *M, C.get(), A2[step].get()); - spaceop.GetPreconditionerMatrix(omega, P, AuxP, print); - pc0->SetOperator(P, &AuxP); - ksp0->SetOperator(*A); - - Mpi::Print("\n"); - spaceop.GetFreqDomainExcitationVector(omega, *R0); - E.SetZero(); - ksp0->Mult(*R0, E); + has_RHS2 = spaceop.GetExcitationVector2(omega, r); } + else + { + r = 0.0; + } + if (RHS1.Size()) + { + r.Add(1i * omega, RHS1); + } + ksp->Mult(r, e); +} - double norm = E.Normlinf(), ntol = 1.0e-12; - mfem::Vector Er_(E.GetSize()), Ei_(E.GetSize()); - E.GetToVectors(Er_, Ei_); - bool has_real = (std::sqrt(mfem::InnerProduct(E.GetComm(), Er_, Er_)) > ntol * norm); - bool has_imag = (std::sqrt(mfem::InnerProduct(E.GetComm(), Ei_, Ei_)) > ntol * norm); +void RomOperator::AddHDMSample(double omega, ComplexVector &e) +{ + // Use the given HDM solution at the given frequency to update the reduced-order basis + // updating the PROM operators. + auto it = P_m_PS.lower_bound(omega); + MFEM_VERIFY(it != P_m_PS.end(), + "Sample frequency " << omega << " not found in parameter set!"); + P_m_PS.erase(it); + auto ret = PS.insert(omega); + MFEM_VERIFY(ret.second, "Sample frequency " + << omega << " already exists in the sampled parameter set!"); // Update V. The basis is always real (each complex solution adds two basis vectors if it // has a nonzero real and imaginary parts). - PetscInt nmax = V->GetGlobalNumCols(), dim0 = dim; - dim = (has_real) + (has_imag) + static_cast(dim0); - MFEM_VERIFY(dim <= nmax, "Unable to increase basis storage size, increase maximum number " - "of vectors!"); - bool mgs = false, cgs2 = true; - if (has_real && has_imag) + const double normr = linalg::Norml2(spaceop.GetComm(), e.Real()); + const double normi = linalg::Norml2(spaceop.GetComm(), e.Imag()); + const bool has_real = (normr > 1.0e-12 * std::sqrt(normr * normr + normi * normi)); + const bool has_imag = (normi > 1.0e-12 * std::sqrt(normr * normr + normi * normi)); + MFEM_VERIFY(dim_V + has_real + has_imag <= static_cast(V.size()), + "Unable to increase basis storage size, increase maximum number of vectors!"); + const int dim_V0 = dim_V; + std::vector H(dim_V + 1); + if (has_real) { + V[dim_V] = e.Real(); + if (orthog_mgs) { - petsc::PetscParVector v = V->GetColumn(dim - 2); - v.SetFromVector(Er_); - V->RestoreColumn(dim - 2, v); - if (opKM) - { - V->OrthonormalizeColumn(dim - 2, mgs, cgs2, *opKM, *T0); - } - else - { - V->OrthonormalizeColumn(dim - 2, mgs, cgs2); - } + linalg::OrthogonalizeColumnMGS(spaceop.GetComm(), V, V[dim_V], H.data(), dim_V); } + else { - petsc::PetscParVector v = V->GetColumn(dim - 1); - v.SetFromVector(Ei_); - V->RestoreColumn(dim - 1, v); - if (opKM) - { - V->OrthonormalizeColumn(dim - 1, mgs, cgs2, *opKM, *T0); - } - else - { - V->OrthonormalizeColumn(dim - 1, mgs, cgs2); - } + linalg::OrthogonalizeColumnCGS(spaceop.GetComm(), V, V[dim_V], H.data(), dim_V, true); } + V[dim_V] *= 1.0 / linalg::Norml2(spaceop.GetComm(), V[dim_V]); + dim_V++; } - else + if (has_imag) { + V[dim_V] = e.Imag(); + if (orthog_mgs) { - petsc::PetscParVector v = V->GetColumn(dim - 1); - v.Copy(E); - V->RestoreColumn(dim - 1, v); - if (opKM) - { - V->OrthonormalizeColumn(dim - 1, mgs, cgs2, *opKM, *T0); - } - else - { - V->OrthonormalizeColumn(dim - 1, mgs, cgs2); - } + linalg::OrthogonalizeColumnMGS(spaceop.GetComm(), V, V[dim_V], H.data(), dim_V); } + else + { + linalg::OrthogonalizeColumnCGS(spaceop.GetComm(), V, V[dim_V], H.data(), dim_V, true); + } + V[dim_V] *= 1.0 / linalg::Norml2(spaceop.GetComm(), V[dim_V]); + dim_V++; } // Update reduced-order operators. Resize preserves the upper dim0 x dim0 block of each // matrix and first dim0 entries of each vector and the projection uses the values // computed for the unchanged basis vectors. - bool init = (dim0 > 0); - Kr->Resize(dim, dim, init); - Mr->Resize(dim, dim, init); - BVMatProjectInternal(*V, *K, *Kr, *R0, dim0, dim); - BVMatProjectInternal(*V, *M, *Mr, *R0, dim0, dim); + Kr.conservativeResize(dim_V, dim_V); + ProjectMatInternal(spaceop.GetComm(), V, *K, Kr, r, dim_V0); if (C) { - Cr->Resize(dim, dim, init); - BVMatProjectInternal(*V, *C, *Cr, *R0, dim0, dim); + Cr.conservativeResize(dim_V, dim_V); + ProjectMatInternal(spaceop.GetComm(), V, *C, Cr, r, dim_V0); } - if (RHS1) + Mr.conservativeResize(dim_V, dim_V); + ProjectMatInternal(spaceop.GetComm(), V, *M, Mr, r, dim_V0); + Ar.resize(dim_V, dim_V); + if (RHS1.Size()) { - RHS1r->Resize(dim, init); - BVDotVecInternal(*V, *RHS1, *RHS1r, dim0, dim); + RHS1r.conservativeResize(dim_V); + ProjectVecInternal(spaceop.GetComm(), V, RHS1, RHS1r, dim_V0); } - Ar->Resize(dim, dim); - RHSr->Resize(dim); - Er->Resize(dim); - if (init) - { - ksp->Reset(); // Operator size change - } - ksp->SetOperator(*Ar); + RHSr.resize(dim_V); } void RomOperator::AssemblePROM(double omega) { - // Assemble the PROM linear system at the given frequency. Do some additional set up at - // the first solve call. The PROM system is defined by the matrix Aᵣ(ω) = Kᵣ + iω Cᵣ - // - ω² Mᵣ + Vᴴ A2ᵣ V(ω) and source vector RHSᵣ(ω) = iω RHS1ᵣ + Vᴴ RHS2ᵣ(ω) V. - const auto step = std::lround((omega - omega_min) / delta_omega); - MFEM_VERIFY(step >= 0 && static_cast(step) < A2.size(), - "Invalid out-of-range frequency for PROM solution!"); - - // Construct A2(ω) and RHS2(ω) if required (only nonzero on boundaries, will be empty - // if not needed). - if (init2) + // Assemble the PROM linear system at the given frequency. The PROM system is defined by + // the matrix Aᵣ(ω) = Kᵣ + iω Cᵣ - ω² Mᵣ + Vᴴ A2 V(ω) and source vector RHSᵣ(ω) = + // iω RHS1ᵣ + Vᴴ RHS2(ω). A2(ω) and RHS2(ω) are constructed only if required and are + // only nonzero on boundaries, will be empty if not needed. + if (has_A2) { - auto tA2 = spaceop.GetSystemMatrixPetsc(SpaceOperator::OperatorType::EXTRA, omega, - mfem::Operator::DIAG_ZERO, false); - if (tA2) - { - hasA2 = true; - A2[step] = std::move(tA2); - } - auto tRHS2 = std::make_unique(*K); - if (spaceop.GetFreqDomainExcitationVector2(omega, *tRHS2)) - { - hasRHS2 = true; - RHS2[step] = std::move(tRHS2); - } - init2 = false; + A2 = spaceop.GetComplexExtraSystemMatrix(omega, Operator::DIAG_ZERO); + ProjectMatInternal(spaceop.GetComm(), V, *A2, Ar, r, 0); } - - // Set up PROM linear system. - Ar->Scale(0.0); - if (hasA2) + else { - if (!A2[step]) - { - // Debug - // Mpi::Print("Inserting cache value for omega = {:e}\n", omega); - A2[step] = spaceop.GetSystemMatrixPetsc(SpaceOperator::OperatorType::EXTRA, omega, - mfem::Operator::DIAG_ZERO, false); - } - else - { - // Debug - // Mpi::Print("Found cache value for omega = {:e} (step = {:d})\n", omega, step); - } - BVMatProjectInternal(*V, *A2[step], *Ar, *R0, 0, dim); + Ar.setZero(); } - Ar->AXPY(1.0, *Kr, petsc::PetscParMatrix::NNZStructure::SAME); - Ar->AXPY(-omega * omega, *Mr, petsc::PetscParMatrix::NNZStructure::SAME); + Ar += Kr; if (C) { - Ar->AXPY(1i * omega, *Cr, petsc::PetscParMatrix::NNZStructure::SAME); + Ar += (1i * omega) * Cr; } + Ar += (-omega * omega) * Mr; - RHSr->SetZero(); - if (hasRHS2) + if (has_RHS2) { - if (!RHS2[step]) - { - RHS2[step] = std::make_unique(*K); - spaceop.GetFreqDomainExcitationVector2(omega, *RHS2[step]); - } - BVDotVecInternal(*V, *RHS2[step], *RHSr, 0, dim); + spaceop.GetExcitationVector2(omega, RHS2); + ProjectVecInternal(spaceop.GetComm(), V, RHS2, RHSr, 0); + } + else + { + RHSr.setZero(); } - if (RHS1) + if (RHS1.Size()) { - RHSr->AXPY(1i * omega, *RHS1r); + RHSr += (1i * omega) * RHS1r; } } -void RomOperator::SolvePROM(petsc::PetscParVector &E) +void RomOperator::SolvePROM(ComplexVector &e) +{ + // Compute PROM solution at the given frequency and expand into high-dimensional space. + // The PROM is solved on every process so the matrix-vector product for vector expansion + // does not require communication. + RHSr = Ar.partialPivLu().solve(RHSr); + // RHSr = Ar.ldlt().solve(RHSr); + // RHSr = Ar.selfadjointView().ldlt().solve(RHSr); + + e = 0.0; + for (int j = 0; j < dim_V; j++) + { + e.Real().Add(RHSr(j).real(), V[j]); + e.Imag().Add(RHSr(j).imag(), V[j]); + } +} + +double RomOperator::ComputeError(double omega) { - // Compute PROM solution at the given frequency and expand into high- dimensional space. - // The PROM is solved on every process so the matrix- vector product for vector expansion - // is sequential. - ksp->Mult(*RHSr, *Er); + // Compute the error metric associated with the approximate PROM solution at the given + // frequency. The HDM residual -r = [K + iω C - ω² M + A2(ω)] x - [iω RHS1 + RHS2(ω)] is + // computed using the most recently computed A2(ω) and RHS2(ω). + AssemblePROM(omega); + SolvePROM(w); + + // Residual error. + r = 0.0; + if (RHS1.Size()) + { + r.Add(-1i * omega, RHS1); + } + if (has_RHS2) + { + r.Add(-1.0, RHS2); + } + double den = !kspKM ? linalg::Norml2(spaceop.GetComm(), r) : 0.0; + + K->AddMult(w, r, 1.0); + if (C) + { + C->AddMult(w, r, 1i * omega); + } + M->AddMult(w, r, -omega * omega); + if (has_A2) { - PetscScalar *pV = V->GetArray(), *pE = E.GetArray(); - petsc::PetscDenseMatrix locV(V->Height(), dim, pV); - petsc::PetscParVector locE(V->Height(), pE); - locV.Mult(*Er, locE); - V->RestoreArray(pV); - E.RestoreArray(pE); + A2->AddMult(w, r, 1.0); } + + double num; + if (!kspKM) + { + num = linalg::Norml2(spaceop.GetComm(), r); + } + else + { + z.SetSize(r.Size()); + kspKM->Mult(r, z); + auto dot = linalg::Dot(spaceop.GetComm(), z, r); + MFEM_ASSERT(dot.real() > 0.0 && std::abs(dot.imag()) < 1.0e-9 * dot.real(), + "Non-positive vector norm in normalization (dot = " << dot << ")!"); + num = std::sqrt(dot.real()); + den = linalg::Norml2(spaceop.GetComm(), w, kspKM->GetOperator(), z); + } + MFEM_VERIFY(den > 0.0, "Unexpected zero denominator in HDM residual!"); + return num / den; } -double RomOperator::ComputeMaxError(int Nc, double &omega_star) +double RomOperator::ComputeMaxError(int num_cand, double &omega_star) { - // Greedy iteration: Find argmax_{ω ∈ P_C} η(E; ω). We sample Nc candidates from P \ P_S. - MPI_Comm comm = K->GetComm(); - Nc = std::min(Nc, static_cast(PmPs.size())); - std::vector Pc; - if (Mpi::Root(comm)) + // Greedy iteration: Find argmax_{ω ∈ P_C} η(e; ω). We sample num_cand candidates from + // P \ P_S. + num_cand = std::min(num_cand, static_cast(P_m_PS.size())); + std::vector PC; + if (Mpi::Root(spaceop.GetComm())) { // Sample with uniform probability. - Pc.reserve(Nc); - std::sample(PmPs.begin(), PmPs.end(), std::back_inserter(Pc), Nc, engine); + PC.reserve(num_cand); + std::sample(P_m_PS.begin(), P_m_PS.end(), std::back_inserter(PC), num_cand, engine); #if 0 // Sample with weighted probability by distance from the set of already sampled // points. - std::vector weights(PmPs.size()); + std::vector weights(P_m_PS.size()); weights = static_cast(weights.Size()); - Pc.reserve(Nc); - for (auto sample : Ps) + PC.reserve(num_cand); + for (auto sample : PS) { - int i = std::distance(PmPs.begin(), - std::lower_bound(PmPs.begin(), PmPs.end(), sample)); + int i = std::distance(P_m_PS.begin(), P_m_PS.lower_bound(sample)); int il = i-1; while (il >= 0) { @@ -372,30 +394,31 @@ double RomOperator::ComputeMaxError(int Nc, double &omega_star) iu++; } } - for (int i = 0; i < Nc; i++) + for (int i = 0; i < num_cand; i++) { std::discrete_distribution dist(weights.begin(), weights.end()); int res = dist(engine); - Pc.push_back(PmPs[res]); + PC.push_back(P_m_PS[res]); weights[res] = 0.0; // No replacement } #endif } else { - Pc.resize(Nc); + PC.resize(num_cand); } - Mpi::Broadcast(Nc, Pc.data(), 0, comm); + Mpi::Broadcast(num_cand, PC.data(), 0, spaceop.GetComm()); // Debug // Mpi::Print("Candidate sampling:\n"); - // Mpi::Print(" P_S: {}", Ps); - // Mpi::Print(" P\\P_S: {}\n", PmPs); - // Mpi::Print(" P_C: {}\n", Pc); + // Mpi::Print(" P_S: {}", PS); + // Mpi::Print(" P\\P_S: {}\n", P_m_PS); + // Mpi::Print(" P_C: {}\n", PC); + // Mpi::Print("\n"); // For each candidate, compute the PROM solution and associated error metric. double err_max = 0.0; - for (auto omega : Pc) + for (auto omega : PC) { double err = ComputeError(omega); @@ -411,124 +434,4 @@ double RomOperator::ComputeMaxError(int Nc, double &omega_star) return err_max; } -double RomOperator::ComputeError(double omega) -{ - // Compute the error metric associated with the approximate PROM solution at the given - // frequency. The HDM residual R = [K + iω C - ω² M + A2(ω)] x - [iω RHS1 + RHS2(ω)] is - // computed using the most recently computed A2(ω) and RHS2(ω). - AssemblePROM(omega); - SolvePROM(*E0); - - // Residual error. - const auto step = std::lround((omega - omega_min) / delta_omega); - MFEM_VERIFY(step >= 0 && static_cast(step) < A2.size(), - "Invalid out-of-range frequency for PROM solution!"); - double num, den = 1.0; - R0->SetZero(); - if (RHS1) - { - R0->AXPY(-1i * omega, *RHS1); - } - if (hasRHS2) - { - MFEM_VERIFY(RHS2[step], "Unexpected uncached frequency for RHS2 vector in PROM!"); - R0->AXPY(-1.0, *RHS2[step]); - } - if (!kspKM) - { - den = R0->Norml2(); - } - - K->MultAdd(*E0, *R0); - M->Mult(*E0, *T0); - R0->AXPY(-omega * omega, *T0); - if (C) - { - C->Mult(*E0, *T0); - R0->AXPY(1i * omega, *T0); - } - if (hasA2) - { - MFEM_VERIFY(A2[step], "Unexpected uncached frequency for A2 matrix in PROM!"); - A2[step]->MultAdd(*E0, *R0); - } - if (!kspKM) - { - num = R0->Norml2(); - } - else - { - kspKM->Mult(*R0, *T0); - num = std::sqrt(std::real(R0->Dot(*T0))); - opKM->Mult(*E0, *T0); - den = std::sqrt(std::real(E0->Dot(*T0))); - } - MFEM_VERIFY(den > 0.0, "Unexpected zero denominator in HDM residual!"); - return num / den; -} - -void RomOperator::BVMatProjectInternal(petsc::PetscDenseMatrix &V, petsc::PetscParMatrix &A, - petsc::PetscDenseMatrix &Ar, - petsc::PetscParVector &r, int n0, int n) -{ - // Update Ar = Vᴴ A V for the new basis dimension n0 => n. We assume V is real and thus - // the result is complex symmetric if A is symmetric. Ar is replicated across all - // processes (sequential n x n matrix). - MFEM_VERIFY(n0 < n, "Unexpected dimensions in BVMatProjectInternal!"); - MFEM_VERIFY(A.GetSymmetric() && Ar.GetSymmetric(), - "BVMatProjectInternal is specialized for symmetric matrices!"); - mfem::Vector vr(V.Height()); - for (int j = n0; j < n; j++) - { - // Fill block of Vᴴ A V = [ | Vᴴ A vj ] . We optimize matrix-vector product since we - // know columns of V are real. - { - petsc::PetscParVector v = V.GetColumn(j); - v.GetToVector(vr); - A.Mult(vr, r); - // A.Mult(v, r); - V.RestoreColumn(j, v); - } - { - PetscScalar *pV = V.GetArray(), *pr = r.GetArray(), *pAr = Ar.GetArray(); - petsc::PetscDenseMatrix locV(V.Height(), n, pV); - petsc::PetscParVector locr(V.Height(), pr), arn(n, pAr + j * n); - locV.MultTranspose(locr, arn); // Vᴴ = Vᵀ - V.RestoreArray(pV); - r.RestoreArray(pr); - Ar.RestoreArray(pAr); - } - } - // Fill lower block of Vᴴ A V = [ ____________ | ] - // [ vjᴴ A V[1:n0] | ] . - { - PetscScalar *pAr = Ar.GetArray(); - Mpi::GlobalSum((n - n0) * n, pAr + n0 * n, V.GetComm()); - for (int j = 0; j < n0; j++) - { - for (int i = n0; i < n; i++) - { - pAr[i + j * n] = pAr[j + i * n]; - } - } - Ar.RestoreArray(pAr); - } -} - -void RomOperator::BVDotVecInternal(petsc::PetscDenseMatrix &V, petsc::PetscParVector &b, - petsc::PetscParVector &br, int n0, int n) -{ - // Update br = Vᴴ b for the new basis dimension n0 => n. br is replicated across all - // processes (sequential n-dimensional vector). - MFEM_VERIFY(n0 < n, "Unexpected dimensions in BVDotVecInternal!"); - PetscScalar *pV = V.GetArray(), *pb = b.GetArray(), *pbr = br.GetArray(); - petsc::PetscDenseMatrix locV(V.Height(), n - n0, pV + n0 * V.Height()); - petsc::PetscParVector locb(V.Height(), pb), brn(n - n0, pbr + n0); - locV.MultTranspose(locb, brn); // Vᴴ = Vᵀ - V.RestoreArray(pV); - b.RestoreArray(pb); - Mpi::GlobalSum(n - n0, pbr + n0, V.GetComm()); - br.RestoreArray(pbr); -} - } // namespace palace diff --git a/palace/models/romoperator.hpp b/palace/models/romoperator.hpp index 616d7ad4b..4a230ebc6 100644 --- a/palace/models/romoperator.hpp +++ b/palace/models/romoperator.hpp @@ -6,12 +6,18 @@ #include #include +#include #include -#include +#include #include "linalg/curlcurl.hpp" #include "linalg/ksp.hpp" -#include "linalg/pc.hpp" -#include "linalg/petsc.hpp" +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" + +// XX TODO NOTES +// - Precompute A2, RHS2 for all frequencies? This seems very dumb and especially risky +// for fine resolution sweeps, so for now remove A2, RHS2 storage (and omega = omega_0 + +// delta_omega * step) namespace palace { @@ -20,7 +26,8 @@ class IoData; class SpaceOperator; // -// A class handling PROM construction and use for adaptive fast frequency sweeps. +// A class handling projection-based reduced order model (PROM) construction and use for +// adaptive fast frequency sweeps. // class RomOperator { @@ -29,78 +36,66 @@ class RomOperator SpaceOperator &spaceop; // HDM system matrices and excitation RHS. - std::unique_ptr K, M, C; - std::unique_ptr RHS1; - - // HDM storage for terms with non-polynomial frequency dependence. - std::vector> A2; - std::vector> RHS2; - bool init2, hasA2, hasRHS2; - - // HDM linear system solver and preconditioner. - std::unique_ptr ksp0; - std::unique_ptr pc0; + std::unique_ptr K, M, C, A2; + ComplexVector RHS1, RHS2; + bool has_A2, has_RHS2; // Working storage for HDM vectors. - std::unique_ptr E0, R0, T0; + ComplexVector r, w, z; - // PROM matrices, vectors, and linear solver. - std::unique_ptr Kr, Mr, Cr, Ar; - std::unique_ptr RHS1r, RHSr, Er; - std::unique_ptr ksp; + // HDM linear system solver and preconditioner. + std::unique_ptr ksp; // Linear solver for inner product solves for error metric. - std::unique_ptr kspKM; - std::unique_ptr opKM; - - // PROM reduced-order basis and parameter domain samplings. - int dim; - std::unique_ptr V; - std::vector Ps, PmPs; - double omega_min, delta_omega; - std::default_random_engine engine; + std::unique_ptr kspKM; - // Compute the error metric for the PROM solution (computed internally) at the specified - // frequency. - double ComputeError(double omega); + // PROM matrices and vectors. + Eigen::MatrixXcd Kr, Mr, Cr, Ar; + Eigen::VectorXcd RHS1r, RHSr; - // Helper functions for reduced-order matrix or vector construction/update. - void BVMatProjectInternal(petsc::PetscDenseMatrix &V, petsc::PetscParMatrix &A, - petsc::PetscDenseMatrix &Ar, petsc::PetscParVector &r, int n0, - int n); - void BVDotVecInternal(petsc::PetscDenseMatrix &V, petsc::PetscParVector &b, - petsc::PetscParVector &br, int n0, int n); + // PROM reduced-order basis (real-valued) and active dimension. + std::vector V; + int dim_V; + bool orthog_mgs; + + // Data structures for parameter domain sampling. + std::set PS, P_m_PS; + std::default_random_engine engine; public: - RomOperator(const IoData &iodata, SpaceOperator &sp, int nmax); + RomOperator(const IoData &iodata, SpaceOperator &sp); - // Return set of sampled parameter points for basis construction. - const std::vector &GetSampleFrequencies() const { return Ps; } + // Return the HDM linear solver. + const ComplexKspSolver &GetLinearSolver() const { return *ksp; } // Return PROM dimension. - int GetReducedDimension() const { return dim; } + int GetReducedDimension() const { return dim_V; } - // Return number of HDM linear solves and linear solver iterations performed during - // offline training. - int GetTotalKspMult() const { return ksp0->GetTotalNumMult(); } - int GetTotalKspIter() const { return ksp0->GetTotalNumIter(); } + // Return set of sampled parameter points for basis construction. + const std::set &GetSampleFrequencies() const { return PS; } - // Initialize the solution basis with HDM samples at the minimum and maximum frequencies. - void Initialize(int steps, double start, double delta); + // Initialize the parameter domain P = {ω_L, ω_L + δ, ..., ω_R}. Also sets the maximum + // number of sample points for the PROM construction. + void Initialize(double start, double delta, int num_steps, int max_dim); - // Assemble and solve the HDM at the specified frequency, adding the solution vector to - // the reduced-order basis. - void SolveHDM(double omega, petsc::PetscParVector &E, bool print = false); + // Assemble and solve the HDM at the specified frequency. + void SolveHDM(double omega, ComplexVector &e); + + // Add the solution vector to the reduced-order basis and update the PROM. + void AddHDMSample(double omega, ComplexVector &e); // Assemble and solve the PROM at the specified frequency, expanding the solution back // into the high-dimensional solution space. void AssemblePROM(double omega); - void SolvePROM(petsc::PetscParVector &E); + void SolvePROM(ComplexVector &e); + + // Compute the error metric for the PROM at the specified frequency. + double ComputeError(double omega); // Compute the maximum error over a randomly sampled set of candidate points. Returns the // maximum error and its correcponding frequency, as well as the number of candidate // points used (if fewer than those availble in the unsampled parameter domain). - double ComputeMaxError(int Nc, double &omega_star); + double ComputeMaxError(int num_cand, double &omega_star); }; } // namespace palace diff --git a/palace/models/spaceoperator.cpp b/palace/models/spaceoperator.cpp index c25404dbe..acd925d27 100644 --- a/palace/models/spaceoperator.cpp +++ b/palace/models/spaceoperator.cpp @@ -3,12 +3,10 @@ #include "spaceoperator.hpp" -#include -#include "fem/coefficient.hpp" +#include #include "fem/integrator.hpp" #include "fem/multigrid.hpp" -#include "fem/operator.hpp" -#include "linalg/petsc.hpp" +#include "linalg/rap.hpp" #include "utils/communication.hpp" #include "utils/geodata.hpp" #include "utils/iodata.hpp" @@ -70,70 +68,39 @@ mfem::Array SetUpBoundaryProperties(const IoData &iodata, const mfem::ParMe return dbc_marker; } -template -auto AddIntegrators(mfem::ParBilinearForm &a, T1 &df, T2 &f, T3 &dfb, T4 &fb) -{ - if (!df.empty()) - { - a.AddDomainIntegrator(new mfem::CurlCurlIntegrator(df)); - } - if (!f.empty()) - { - a.AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(f)); - } - if (!dfb.empty()) - { - a.AddBoundaryIntegrator(new mfem::CurlCurlIntegrator(dfb)); - } - if (!fb.empty()) - { - a.AddBoundaryIntegrator(new mfem::MixedVectorMassIntegrator(fb)); - } -} - -template -auto AddAuxIntegrators(mfem::ParBilinearForm &a, T1 &f, T2 &fb) -{ - if (!f.empty()) - { - a.AddDomainIntegrator(new mfem::MixedGradGradIntegrator(f)); - } - if (!fb.empty()) - { - a.AddBoundaryIntegrator(new mfem::MixedGradGradIntegrator(fb)); - } -} - } // namespace SpaceOperator::SpaceOperator(const IoData &iodata, const std::vector> &mesh) - : dbc_marker(SetUpBoundaryProperties(iodata, *mesh.back())), skip_zeros(0), - pc_gmg(iodata.solver.linear.mat_gmg), pc_lor(iodata.solver.linear.mat_lor), - pc_shifted(iodata.solver.linear.mat_shifted), print_hdr(true), + : assembly_level(iodata.solver.linear.mat_pa ? mfem::AssemblyLevel::PARTIAL + : mfem::AssemblyLevel::LEGACY), + skip_zeros(0), pc_mg(iodata.solver.linear.pc_mg), + pc_lor(iodata.solver.linear.pc_mat_lor), + pc_shifted(iodata.solver.linear.pc_mat_shifted), print_hdr(true), print_prec_hdr(true), + dbc_marker(SetUpBoundaryProperties(iodata, *mesh.back())), nd_fecs(utils::ConstructFECollections( - pc_gmg, pc_lor, iodata.solver.order, mesh.back()->Dimension())), + pc_mg, pc_lor, iodata.solver.order, mesh.back()->Dimension())), h1_fecs(utils::ConstructFECollections( - pc_gmg, false, iodata.solver.order, mesh.back()->Dimension())), + pc_mg, false, iodata.solver.order, mesh.back()->Dimension())), rt_fec(iodata.solver.order - 1, mesh.back()->Dimension()), - nd_fespaces(pc_gmg ? utils::ConstructFiniteElementSpaceHierarchy( - mesh, nd_fecs, dbc_marker) - : utils::ConstructFiniteElementSpaceHierarchy( - *mesh.back(), *nd_fecs.back())), - h1_fespaces(pc_gmg ? utils::ConstructFiniteElementSpaceHierarchy( - mesh, h1_fecs, dbc_marker) - : utils::ConstructFiniteElementSpaceHierarchy( - *mesh.back(), *h1_fecs.back())), + nd_fespaces(pc_mg ? utils::ConstructFiniteElementSpaceHierarchy( + mesh, nd_fecs, &dbc_marker, &nd_dbc_tdof_lists) + : utils::ConstructFiniteElementSpaceHierarchy( + *mesh.back(), *nd_fecs.back(), &dbc_marker, + &nd_dbc_tdof_lists.emplace_back())), + h1_fespaces(pc_mg ? utils::ConstructFiniteElementSpaceHierarchy( + mesh, h1_fecs, &dbc_marker, &h1_dbc_tdof_lists) + : utils::ConstructFiniteElementSpaceHierarchy( + *mesh.back(), *h1_fecs.back(), &dbc_marker, + &h1_dbc_tdof_lists.emplace_back())), rt_fespace(mesh.back().get(), &rt_fec), mat_op(iodata, *mesh.back()), farfield_op(iodata, mat_op, *mesh.back()), surf_sigma_op(iodata, *mesh.back()), - surf_z_op(iodata, *mesh.back()), lumped_port_op(iodata, h1_fespaces.GetFinestFESpace()), - wave_port_op(iodata, mat_op, nd_fespaces.GetFinestFESpace(), - h1_fespaces.GetFinestFESpace()), - surf_j_op(iodata, h1_fespaces.GetFinestFESpace()) + surf_z_op(iodata, *mesh.back()), lumped_port_op(iodata, GetH1Space()), + wave_port_op(iodata, mat_op, GetNDSpace(), GetH1Space()), + surf_j_op(iodata, GetH1Space()) { // Finalize setup. CheckBoundaryProperties(); - nd_fespaces.GetFinestFESpace().GetEssentialTrueDofs(dbc_marker, dbc_tdof_list); // Print essential BC information. if (dbc_marker.Max() > 0) @@ -165,6 +132,11 @@ void SpaceOperator::CheckBoundaryProperties() // aux_bdr_marker = 1; // Mark all boundaries (including material interfaces // // added during mesh preprocessing) // // As tested, this does not eliminate all DC modes! + for (int l = 0; l < h1_fespaces.GetNumLevels(); l++) + { + h1_fespaces.GetFESpaceAtLevel(l).GetEssentialTrueDofs( + aux_bdr_marker, aux_bdr_tdof_lists.emplace_back()); + } // A final check that no boundary attribute is assigned multiple boundary conditions. The // one exception is that a lumped port boundary attribute can be also be assigned some @@ -200,400 +172,734 @@ void SpaceOperator::CheckBoundaryProperties() } } -void SpaceOperator::PrintHeader() +namespace +{ + +void PrintHeader(mfem::ParFiniteElementSpace &h1_fespace, + mfem::ParFiniteElementSpace &nd_fespace, + mfem::ParFiniteElementSpace &rt_fespace, bool &print_hdr) { if (print_hdr) { - Mpi::Print("\nConfiguring system matrices, number of global unknowns: {:d}\n", - nd_fespaces.GetFinestFESpace().GlobalTrueVSize()); + Mpi::Print("\nAssembling system matrices, number of global unknowns:\n" + " H1: {:d}, ND: {:d}, RT: {:d}\n", + h1_fespace.GlobalTrueVSize(), nd_fespace.GlobalTrueVSize(), + rt_fespace.GlobalTrueVSize()); print_hdr = false; } } -std::unique_ptr -SpaceOperator::GetSystemMatrixPetsc(SpaceOperator::OperatorType type, double omega, - mfem::Operator::DiagonalPolicy ess_diag, bool print) +template +auto BuildOperator(mfem::ParFiniteElementSpace &fespace, T1 *df, T2 *f, T3 *dfb, T4 *fb, + mfem::AssemblyLevel assembly_level, int skip_zeros, + bool no_assembly = false) { - // Construct the frequency-dependent complex linear system matrix: - // A = K + iω C - ω² (Mr + i Mi) + A2(ω) - // or any one of its terms. - const int sdim = nd_fespaces.GetFinestFESpace().GetParMesh()->SpaceDimension(); - SumMatrixCoefficient dfr(sdim), dfi(sdim), fr(sdim), fi(sdim), fbr(sdim), fbi(sdim); + auto a = std::make_unique(&fespace); + if (df && !df->empty()) + { + a->AddDomainIntegrator(new mfem::CurlCurlIntegrator(*df)); + } + if (f && !f->empty()) + { + a->AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(*f)); + } + if (dfb && !dfb->empty()) + { + a->AddBoundaryIntegrator(new mfem::CurlCurlIntegrator(*dfb)); + } + if (fb && !fb->empty()) + { + a->AddBoundaryIntegrator(new mfem::MixedVectorMassIntegrator(*fb)); + } + if (!no_assembly) + { + a->SetAssemblyLevel(assembly_level); + a->Assemble(skip_zeros); + a->Finalize(skip_zeros); + } + return a; +} + +template +auto BuildAuxOperator(mfem::ParFiniteElementSpace &fespace, T1 *f, T2 *fb, + mfem::AssemblyLevel assembly_level, int skip_zeros, + bool no_assembly = false) +{ + auto a = std::make_unique(&fespace); + if (f && !f->empty()) + { + a->AddDomainIntegrator(new mfem::MixedGradGradIntegrator(*f)); + } + if (fb && !fb->empty()) + { + a->AddBoundaryIntegrator(new mfem::MixedGradGradIntegrator(*fb)); + } + if (!no_assembly) + { + a->SetAssemblyLevel(assembly_level); + a->Assemble(skip_zeros); + a->Finalize(skip_zeros); + } + return a; +} + +} // namespace + +std::unique_ptr +SpaceOperator::GetStiffnessMatrix(Operator::DiagonalPolicy diag_policy) +{ + PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr); + const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); + SumMatrixCoefficient df(sdim), f(sdim), fb(sdim); + AddStiffnessCoefficients(1.0, df, f); + AddStiffnessBdrCoefficients(1.0, fb); + if (df.empty() && f.empty() && fb.empty()) + { + return {}; + } + + auto K = std::make_unique(BuildOperator(GetNDSpace(), &df, &f, + (SumCoefficient *)nullptr, &fb, + assembly_level, skip_zeros), + GetNDSpace()); + K->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); + return K; +} + +std::unique_ptr +SpaceOperator::GetDampingMatrix(Operator::DiagonalPolicy diag_policy) +{ + PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr); + const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); + SumMatrixCoefficient f(sdim), fb(sdim); + AddDampingCoefficients(1.0, f); + AddDampingBdrCoefficients(1.0, fb); + if (f.empty() && fb.empty()) + { + return {}; + } + + auto C = std::make_unique( + BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, &f, (SumCoefficient *)nullptr, + &fb, assembly_level, skip_zeros), + GetNDSpace()); + C->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); + return C; +} + +std::unique_ptr SpaceOperator::GetMassMatrix(Operator::DiagonalPolicy diag_policy) +{ + PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr); + const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); + SumMatrixCoefficient f(sdim), fb(sdim); + AddRealMassCoefficients(1.0, f); + AddRealMassBdrCoefficients(1.0, fb); + if (f.empty() && fb.empty()) + { + return {}; + } + + auto M = std::make_unique( + BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, &f, (SumCoefficient *)nullptr, + &fb, assembly_level, skip_zeros), + GetNDSpace()); + M->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); + return M; +} + +std::unique_ptr +SpaceOperator::GetComplexStiffnessMatrix(Operator::DiagonalPolicy diag_policy) +{ + PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr); + const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); + SumMatrixCoefficient df(sdim), f(sdim), fb(sdim); + AddStiffnessCoefficients(1.0, df, f); + AddStiffnessBdrCoefficients(1.0, fb); + if (df.empty() && f.empty() && fb.empty()) + { + return {}; + } + + auto K = std::make_unique( + BuildOperator(GetNDSpace(), &df, &f, (SumCoefficient *)nullptr, &fb, assembly_level, + skip_zeros), + nullptr, GetNDSpace()); + K->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); + return K; +} + +std::unique_ptr +SpaceOperator::GetComplexDampingMatrix(Operator::DiagonalPolicy diag_policy) +{ + PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr); + const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); + SumMatrixCoefficient f(sdim), fb(sdim); + AddDampingCoefficients(1.0, f); + AddDampingBdrCoefficients(1.0, fb); + if (f.empty() && fb.empty()) + { + return {}; + } + + auto C = std::make_unique( + BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, &f, (SumCoefficient *)nullptr, + &fb, assembly_level, skip_zeros), + nullptr, GetNDSpace()); + C->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); + return C; +} + +std::unique_ptr +SpaceOperator::GetComplexMassMatrix(Operator::DiagonalPolicy diag_policy) +{ + PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr); + const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); + SumMatrixCoefficient fr(sdim), fi(sdim), fbr(sdim); + AddRealMassCoefficients(1.0, fr); + AddRealMassBdrCoefficients(1.0, fbr); + AddImagMassCoefficients(1.0, fi); + if (fr.empty() && fbr.empty() && fi.empty()) + { + return {}; + } + + std::unique_ptr mr, mi; + if (!fr.empty() || !fbr.empty()) + { + mr = BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, &fr, + (SumCoefficient *)nullptr, &fbr, assembly_level, skip_zeros); + } + if (!fi.empty()) + { + mi = BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, &fi, + (SumCoefficient *)nullptr, (SumCoefficient *)nullptr, assembly_level, + skip_zeros); + } + auto M = std::make_unique(std::move(mr), std::move(mi), GetNDSpace()); + M->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); + return M; +} + +std::unique_ptr +SpaceOperator::GetComplexExtraSystemMatrix(double omega, + Operator::DiagonalPolicy diag_policy) +{ + PrintHeader(GetH1Space(), GetNDSpace(), GetRTSpace(), print_hdr); + const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); + SumMatrixCoefficient fbr(sdim), fbi(sdim); SumCoefficient dfbr, dfbi; - std::string str; - switch (type) - { - case OperatorType::COMPLETE: - AddStiffnessCoefficients(1.0, dfr, fr, fbr); - AddDampingCoefficients(omega, fi, fbi); - AddRealMassCoefficients(-omega * omega, false, fr, fbr); - AddImagMassCoefficients(-omega * omega, fi, fbi); - AddExtraSystemBdrCoefficients(omega, dfbr, dfbi, fbr, fbi); - str = "A"; - break; - case OperatorType::STIFFNESS: - MFEM_VERIFY(omega == 0.0, - "GetSystemMatrix for type OperatorType::STIFFNESS does not use omega " - "parameter!"); - AddStiffnessCoefficients(1.0, dfr, fr, fbr); - str = "K"; - break; - case OperatorType::MASS: - MFEM_VERIFY( - omega == 0.0, - "GetSystemMatrix for type OperatorType::MASS does not use omega parameter!"); - AddRealMassCoefficients(1.0, false, fr, fbr); - AddImagMassCoefficients(1.0, fi, fbi); - str = "M"; - break; - case OperatorType::DAMPING: - MFEM_VERIFY( - omega == 0.0, - "GetSystemMatrix for type OperatorType::DAMPING does not use omega parameter!"); - AddDampingCoefficients(1.0, fr, fbr); - str = "C"; - break; - case OperatorType::EXTRA: - AddExtraSystemBdrCoefficients(omega, dfbr, dfbi, fbr, fbi); - str = "A2"; - break; - } - std::unique_ptr hAr, hAi; - bool has_real = false, has_imag = false; - if (!dfr.empty() || !fr.empty() || !dfbr.empty() || !fbr.empty()) - { - has_real = true; - mfem::ParBilinearForm a(&nd_fespaces.GetFinestFESpace()); - AddIntegrators(a, dfr, fr, dfbr, fbr); - // a.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - a.Assemble(skip_zeros); - a.Finalize(skip_zeros); - hAr.reset(a.ParallelAssemble()); - hAr->EliminateBC(dbc_tdof_list, ess_diag); - } - if (!dfi.empty() || !fi.empty() || !dfbi.empty() || !fbi.empty()) - { - has_imag = true; - mfem::ParBilinearForm a(&nd_fespaces.GetFinestFESpace()); - AddIntegrators(a, dfi, fi, dfbi, fbi); - // a.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - a.Assemble(skip_zeros); - a.Finalize(skip_zeros); - hAi.reset(a.ParallelAssemble()); - hAi->EliminateBC(dbc_tdof_list, mfem::Operator::DiagonalPolicy::DIAG_ZERO); - } - if (!has_real && !has_imag) + AddExtraSystemBdrCoefficients(omega, dfbr, dfbi, fbr, fbi); + if (dfbr.empty() && fbr.empty() && dfbi.empty() && fbi.empty()) { return {}; } - auto A = std::make_unique( - nd_fespaces.GetFinestFESpace().GetComm(), std::move(hAr), std::move(hAi)); - if (!has_imag) + + std::unique_ptr ar, ai; + if (!dfbr.empty() || !fbr.empty()) + { + ar = BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, (SumCoefficient *)nullptr, + &dfbr, &fbr, assembly_level, skip_zeros); + } + if (!dfbi.empty() || !fbi.empty()) { - A->SetRealSymmetric(); + ai = BuildOperator(GetNDSpace(), (SumCoefficient *)nullptr, (SumCoefficient *)nullptr, + &dfbi, &fbi, assembly_level, skip_zeros); } - else + auto A = std::make_unique(std::move(ar), std::move(ai), GetNDSpace()); + A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), diag_policy); + return A; +} + +namespace +{ + +auto BuildParSumOperator(int h, int w, double a0, double a1, double a2, + const ParOperator *K, const ParOperator *C, const ParOperator *M, + const ParOperator *A2, const mfem::ParFiniteElementSpace &fespace) +{ + auto sum = std::make_unique(h, w); + if (K && a0 != 0.0) { - A->SetSymmetric(); + sum->AddOperator(K->LocalOperator(), a0); } + if (C && a1 != 0.0) + { + sum->AddOperator(C->LocalOperator(), a1); + } + if (M && a2 != 0.0) + { + sum->AddOperator(M->LocalOperator(), a2); + } + if (A2) + { + sum->AddOperator(A2->LocalOperator(), 1.0); + } + return std::make_unique(std::move(sum), fespace); +} - // Print some information. - PrintHeader(); - if (print) +auto BuildParSumOperator(int h, int w, std::complex a0, std::complex a1, + std::complex a2, const ComplexParOperator *K, + const ComplexParOperator *C, const ComplexParOperator *M, + const ComplexParOperator *A2, + const mfem::ParFiniteElementSpace &fespace) +{ + // Block 2 x 2 equivalent-real formulation for each term in the sum: + // [ sumr ] += [ ar -ai ] [ Ar ] + // [ sumi ] [ ai ar ] [ Ai ] . + auto sumr = std::make_unique(h, w); + auto sumi = std::make_unique(h, w); + if (K) { - if (has_real && has_imag) + if (a0.real() != 0.0) { - Mpi::Print(" Re{{{}}}: NNZ = {:d}, norm = {:e}\n Im{{{}}}: NNZ = {:d}, norm = {:e}\n", - str, A->NNZReal(), A->NormFReal(), str, A->NNZImag(), A->NormFImag()); + if (K->LocalOperator().HasReal()) + { + sumr->AddOperator(*K->LocalOperator().Real(), a0.real()); + } + if (K->LocalOperator().HasImag()) + { + sumi->AddOperator(*K->LocalOperator().Imag(), a0.real()); + } } - else + if (a0.imag() != 0.0) { - Mpi::Print(" {}: NNZ = {:d}, norm = {:e}\n", str, - has_real ? A->NNZReal() : A->NNZImag(), - has_real ? A->NormFReal() : A->NormFImag()); + if (K->LocalOperator().HasImag()) + { + sumr->AddOperator(*K->LocalOperator().Imag(), -a0.imag()); + } + if (K->LocalOperator().HasReal()) + { + sumi->AddOperator(*K->LocalOperator().Real(), a0.imag()); + } } } + if (C && a1 != 0.0) + { + if (a1.real() != 0.0) + { + if (C->LocalOperator().HasReal()) + { + sumr->AddOperator(*C->LocalOperator().Real(), a1.real()); + } + if (C->LocalOperator().HasImag()) + { + sumi->AddOperator(*C->LocalOperator().Imag(), a1.real()); + } + } + if (a1.imag() != 0.0) + { + if (C->LocalOperator().HasImag()) + { + sumr->AddOperator(*C->LocalOperator().Imag(), -a1.imag()); + } + if (C->LocalOperator().HasReal()) + { + sumi->AddOperator(*C->LocalOperator().Real(), a1.imag()); + } + } + } + if (M && a2 != 0.0) + { + if (a2.real() != 0.0) + { + if (M->LocalOperator().HasReal()) + { + sumr->AddOperator(*M->LocalOperator().Real(), a2.real()); + } + if (M->LocalOperator().HasImag()) + { + sumi->AddOperator(*M->LocalOperator().Imag(), a2.real()); + } + } + if (a2.imag() != 0.0) + { + if (M->LocalOperator().HasImag()) + { + sumr->AddOperator(*M->LocalOperator().Imag(), -a2.imag()); + } + if (M->LocalOperator().HasReal()) + { + sumi->AddOperator(*M->LocalOperator().Real(), a2.imag()); + } + } + } + if (A2) + { + if (A2->LocalOperator().HasReal()) + { + sumr->AddOperator(*A2->LocalOperator().Real(), 1.0); + } + if (A2->LocalOperator().HasImag()) + { + sumi->AddOperator(*A2->LocalOperator().Imag(), 1.0); + } + } + return std::make_unique(std::move(sumr), std::move(sumi), fespace); +} + +} // namespace + +template +std::unique_ptr +SpaceOperator::GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2, + const OperType *K, const OperType *C, const OperType *M, + const OperType *A2) +{ + using ParOperType = + typename std::conditional::value, + ComplexParOperator, ParOperator>::type; + + const auto *PtAP_K = (K) ? dynamic_cast(K) : nullptr; + const auto *PtAP_C = (C) ? dynamic_cast(C) : nullptr; + const auto *PtAP_M = (M) ? dynamic_cast(M) : nullptr; + const auto *PtAP_A2 = (A2) ? dynamic_cast(A2) : nullptr; + MFEM_VERIFY((!K || PtAP_K) && (!C || PtAP_C) && (!M || PtAP_M) && (!A2 || PtAP_A2), + "SpaceOperator requires ParOperator or ComplexParOperator for system matrix " + "construction!"); + + int height = -1, width = -1; + if (PtAP_K) + { + height = PtAP_K->LocalOperator().Height(); + width = PtAP_K->LocalOperator().Width(); + } + else if (PtAP_C) + { + height = PtAP_C->LocalOperator().Height(); + width = PtAP_C->LocalOperator().Width(); + } + else if (PtAP_M) + { + height = PtAP_M->LocalOperator().Height(); + width = PtAP_M->LocalOperator().Width(); + } + else if (PtAP_A2) + { + height = PtAP_A2->LocalOperator().Height(); + width = PtAP_A2->LocalOperator().Width(); + } + MFEM_VERIFY(height >= 0 && width >= 0, + "At least one argument to GetSystemMatrix must not be empty!"); + + auto A = BuildParSumOperator(height, width, a0, a1, a2, PtAP_K, PtAP_C, PtAP_M, PtAP_A2, + GetNDSpace()); + A->SetEssentialTrueDofs(nd_dbc_tdof_lists.back(), Operator::DiagonalPolicy::DIAG_ONE); return A; } -std::unique_ptr -SpaceOperator::GetSystemMatrix(SpaceOperator::OperatorType type, double omega, - mfem::Operator::DiagonalPolicy ess_diag, bool print) +std::unique_ptr SpaceOperator::GetInnerProductMatrix(double a0, double a2, + const ComplexOperator *K, + const ComplexOperator *M) { - // Construct the frequency-dependent complex linear system matrix: - // A = K + iω C - ω² (Mr + i Mi) + A2(ω) - // or any subset of its terms. For output as a HypreParMatrix, only some of - // the terms are available. - MFEM_VERIFY(omega == 0.0, - "GetSystemMatrix for HypreParMatrix does not use omega parameter!"); - const int sdim = nd_fespaces.GetFinestFESpace().GetParMesh()->SpaceDimension(); - SumMatrixCoefficient df(sdim), f(sdim), fb(sdim); - SumCoefficient dfb; - std::string str; - switch (type) - { - case OperatorType::STIFFNESS: - AddStiffnessCoefficients(1.0, df, f, fb); - str = "K"; - break; - case OperatorType::MASS: - AddRealMassCoefficients(1.0, false, f, fb); - str = "M"; - break; - case OperatorType::DAMPING: - AddDampingCoefficients(1.0, f, fb); - str = "C"; - break; - case OperatorType::COMPLETE: - case OperatorType::EXTRA: - MFEM_ABORT("Invalid GetSystemMatrix matrix type for HypreParMatrix output!"); + const auto *PtAP_K = (K) ? dynamic_cast(K) : nullptr; + const auto *PtAP_M = (M) ? dynamic_cast(M) : nullptr; + MFEM_VERIFY( + (!K || PtAP_K) && (!M || PtAP_M), + "SpaceOperator requires ComplexParOperator for inner product matrix construction!"); + + int height = -1, width = -1; + if (PtAP_K) + { + height = PtAP_K->LocalOperator().Height(); + width = PtAP_K->LocalOperator().Width(); } - if (df.empty() && f.empty() && fb.empty()) + else if (PtAP_M) { - return {}; + height = PtAP_M->LocalOperator().Height(); + width = PtAP_M->LocalOperator().Width(); } - mfem::ParBilinearForm a(&nd_fespaces.GetFinestFESpace()); - AddIntegrators(a, df, f, dfb, fb); - // a.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - a.Assemble(skip_zeros); - a.Finalize(skip_zeros); - std::unique_ptr A(a.ParallelAssemble()); - A->EliminateBC(dbc_tdof_list, ess_diag); + MFEM_VERIFY(height >= 0 && width >= 0, + "At least one argument to GetInnerProductMatrix must not be empty!"); - // Print some information. - PrintHeader(); - if (print) + auto sum = std::make_unique(height, width); + if (PtAP_K && a0 != 0.0) { - Mpi::Print(" {}: NNZ = {:d}, norm = {:e}\n", str, A->NNZ(), - hypre_ParCSRMatrixFnorm(*A)); + MFEM_VERIFY( + PtAP_K->LocalOperator().HasReal(), + "Missing real part of stiffness matrix for inner product matrix construction!"); + sum->AddOperator(*PtAP_K->LocalOperator().Real(), a0); } - return A; + if (PtAP_M && a2 != 0.0) + { + MFEM_VERIFY(PtAP_M->LocalOperator().HasReal(), + "Missing real part of mass matrix for inner product matrix construction!"); + sum->AddOperator(*PtAP_M->LocalOperator().Real(), a2); + } + return std::make_unique(std::move(sum), GetNDSpace()); +} + +namespace +{ + +auto BuildLevelOperator(const MultigridOperator &B, std::unique_ptr &&br, + std::unique_ptr &&bi, + const mfem::ParFiniteElementSpace &fespace) +{ + return std::make_unique(std::move(br), fespace); } -void SpaceOperator::GetPreconditionerInternal( - const std::function &AddCoefficients, - std::vector> &B, - std::vector> &AuxB, bool print) +auto BuildLevelOperator(const ComplexMultigridOperator &B, std::unique_ptr &&br, + std::unique_ptr &&bi, + const mfem::ParFiniteElementSpace &fespace) +{ + return std::make_unique(std::move(br), std::move(bi), fespace); +} + +} // namespace + +template +std::unique_ptr SpaceOperator::GetPreconditionerMatrix(double a0, double a1, + double a2, double a3) { - // Construct the real, optionally SPD matrix for frequency or time domain preconditioning - // (Mr > 0, Mi < 0): - // B = K + ω C + ω² (-/+ Mr - Mi) , or - // B = a0 K + a1 C + Mr . + if (print_prec_hdr) + { + Mpi::Print("\nAssembling multigrid hierarchy:\n"); + } MFEM_VERIFY(h1_fespaces.GetNumLevels() == nd_fespaces.GetNumLevels(), - "Multigrid heirarchy mismatch for auxiliary space preconditioning!"); + "Multigrid hierarchy mismatch for auxiliary space preconditioning!"); + auto B = std::make_unique>(nd_fespaces.GetNumLevels()); for (int s = 0; s < 2; s++) { - auto &B_ = (s == 0) ? B : AuxB; - B_.clear(); - B_.reserve(nd_fespaces.GetNumLevels()); - for (int l = 0; l < nd_fespaces.GetNumLevels(); l++) + auto &fespaces = (s == 0) ? nd_fespaces : h1_fespaces; + auto &dbc_tdof_lists = (s == 0) ? nd_dbc_tdof_lists : h1_dbc_tdof_lists; + for (int l = 0; l < fespaces.GetNumLevels(); l++) { - auto &fespace_l = - (s == 0) ? nd_fespaces.GetFESpaceAtLevel(l) : h1_fespaces.GetFESpaceAtLevel(l); - mfem::Array dbc_tdof_list_l; - fespace_l.GetEssentialTrueDofs(dbc_marker, dbc_tdof_list_l); - - const int sdim = nd_fespaces.GetFinestFESpace().GetParMesh()->SpaceDimension(); - SumMatrixCoefficient df(sdim), f(sdim), fb(sdim); - SumCoefficient dfb; - AddCoefficients(df, f, dfb, fb); - mfem::ParBilinearForm b(&fespace_l); - if (s == 1) + auto &fespace_l = fespaces.GetFESpaceAtLevel(l); + if (print_prec_hdr) { - // H1 auxiliary space matrix Gᵀ B G. - AddAuxIntegrators(b, f, fb); + Mpi::Print(" Level {:d}{}: {:d} unknowns", l, (s == 0) ? "" : " (auxiliary)", + fespace_l.GlobalTrueVSize()); } - else + const int sdim = GetNDSpace().GetParMesh()->SpaceDimension(); + SumMatrixCoefficient dfr(sdim), fr(sdim), fi(sdim), fbr(sdim), fbi(sdim); + SumCoefficient dfbr, dfbi; + // if (s > 0) + // { + + // // XX TODO: Test complex PC matrix assembly for s > 0 + // // (or s == 0 if coarse solve supports it) + // // XX TODO: Handle complex coeff a0/a1/a2 (like SumOperator) + + // AddStiffnessCoefficients(a0, dfr, fr); + // AddStiffnessBdrCoefficients(a0, fbr); + // AddDampingCoefficients(a1, fi); + // AddDampingBdrCoefficients(a1, fbi); + // AddRealMassCoefficients(pc_shifted ? std::abs(a2) : a2, fr); + // AddRealMassBdrCoefficients(pc_shifted ? std::abs(a2) : a2, fbr); + // AddImagMassCoefficients(a2, fi); + // AddExtraSystemBdrCoefficients(a3, dfbr, dfbi, fbr, fbi); + // } + // else { - AddIntegrators(b, df, f, dfb, fb); + AddStiffnessCoefficients(a0, dfr, fr); + AddStiffnessBdrCoefficients(a0, fbr); + AddDampingCoefficients(a1, fr); + AddDampingBdrCoefficients(a1, fbr); + AddAbsMassCoefficients(pc_shifted ? std::abs(a2) : a2, fr); + AddRealMassBdrCoefficients(pc_shifted ? std::abs(a2) : a2, fbr); + AddExtraSystemBdrCoefficients(a3, dfbr, dfbr, fbr, fbr); } - // b.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - b.Assemble(skip_zeros); - b.Finalize(skip_zeros); - std::unique_ptr hB; - if (pc_lor) + + std::unique_ptr br, bi; + std::unique_ptr br_loc, bi_loc; + if (!dfr.empty() || !fr.empty() || !dfbr.empty() || !fbr.empty()) { - // After we construct the LOR discretization we can extract the LOR matrix and the - // original bilinear form and LOR discretization are no longer needed. - mfem::ParLORDiscretization lor(b, dbc_tdof_list_l); - hB = std::make_unique(lor.GetAssembledMatrix()); + br = (s == 0) ? BuildOperator(fespace_l, &dfr, &fr, &dfbr, &fbr, assembly_level, + skip_zeros, pc_lor) + : BuildAuxOperator(fespace_l, &fr, &fbr, assembly_level, skip_zeros, + pc_lor); } - else + if (!fi.empty() || !dfbi.empty() || !fbi.empty()) { - hB.reset(b.ParallelAssemble()); + bi = (s == 0) ? BuildOperator(fespace_l, (SumCoefficient *)nullptr, &fi, &dfbi, + &fbi, assembly_level, skip_zeros, pc_lor) + : BuildAuxOperator(fespace_l, &fi, &fbi, assembly_level, skip_zeros, + pc_lor); } - hB->EliminateBC(dbc_tdof_list_l, mfem::Operator::DiagonalPolicy::DIAG_ONE); - - // Print some information. - PrintHeader(); - if (s == 0 && print) + if (pc_lor) { - std::string str = ""; - if (pc_gmg && pc_lor) + // After we construct the LOR discretization we deep copy the LOR matrix and the + // original bilinear form and LOR discretization are no longer needed. + mfem::Array dummy_dbc_tdof_list; + mfem::LORDiscretization lor(*br, dummy_dbc_tdof_list); + auto br_lor = std::make_unique(lor.GetAssembledMatrix()); + if (print_prec_hdr) { - str = fmt::format(" (Level {:d}, {:d} unknowns, LOR)", l, - fespace_l.GlobalTrueVSize()); + HYPRE_BigInt nnz = br_lor->NumNonZeroElems(); + Mpi::GlobalSum(1, &nnz, fespace_l.GetComm()); + Mpi::Print(", {:d} NNZ (LOR)\n", nnz); } - else if (pc_gmg) + br_loc = std::move(br_lor); + br.reset(); + if (bi) { - str = fmt::format(" (Level {:d}, {:d} unknowns)", l, fespace_l.GlobalTrueVSize()); + mfem::LORDiscretization lori(*bi, dummy_dbc_tdof_list); + auto bi_lor = std::make_unique(lori.GetAssembledMatrix()); + bi_loc = std::move(bi_lor); + bi.reset(); } - else if (pc_lor) + } + else + { + if (print_prec_hdr) { - str = " (LOR)"; + if (assembly_level == mfem::AssemblyLevel::LEGACY) + { + HYPRE_BigInt nnz = br->SpMat().NumNonZeroElems(); + Mpi::GlobalSum(1, &nnz, fespace_l.GetComm()); + Mpi::Print(", {:d} NNZ\n", nnz); + } + else + { + Mpi::Print("\n"); + } } - Mpi::Print(" B{}: NNZ = {:d}, norm = {:e}\n", str, hB->NNZ(), - hypre_ParCSRMatrixFnorm(*hB)); + br_loc = std::move(br); + bi_loc = std::move(bi); + } + auto B_l = BuildLevelOperator(*B, std::move(br_loc), std::move(bi_loc), fespace_l); + B_l->SetEssentialTrueDofs(dbc_tdof_lists[l], Operator::DiagonalPolicy::DIAG_ONE); + if (s == 0) + { + B->AddOperator(std::move(B_l)); + } + else + { + B->AddAuxiliaryOperator(std::move(B_l)); } - B_.push_back(std::move(hB)); } } + print_prec_hdr = false; + return B; } -void SpaceOperator::GetPreconditionerMatrix( - double omega, std::vector> &B, - std::vector> &AuxB, bool print) +namespace +{ + +auto BuildCurl(mfem::ParFiniteElementSpace &nd_fespace, + mfem::ParFiniteElementSpace &rt_fespace, mfem::AssemblyLevel assembly_level, + int skip_zeros = 1) { - // Frequency domain preconditioner matrix. - auto AddCoefficients = [this, omega](SumMatrixCoefficient &df, SumMatrixCoefficient &f, - SumCoefficient &dfb, SumMatrixCoefficient &fb) - { - this->AddStiffnessCoefficients(1.0, df, f, fb); - this->AddDampingCoefficients(omega, f, fb); - this->AddRealMassCoefficients(pc_shifted ? omega * omega : -omega * omega, true, f, fb); - this->AddExtraSystemBdrCoefficients(omega, dfb, dfb, fb, fb); - }; - GetPreconditionerInternal(AddCoefficients, B, AuxB, print); + auto curl = std::make_unique(&nd_fespace, &rt_fespace); + curl->AddDomainInterpolator(new mfem::CurlInterpolator); + curl->SetAssemblyLevel(assembly_level); + curl->Assemble(skip_zeros); + curl->Finalize(skip_zeros); + return curl; } -void SpaceOperator::GetPreconditionerMatrix( - double a0, double a1, std::vector> &B, - std::vector> &AuxB, bool print) +auto BuildGrad(mfem::ParFiniteElementSpace &h1_fespace, + mfem::ParFiniteElementSpace &nd_fespace, mfem::AssemblyLevel assembly_level, + int skip_zeros = 1) { - // Time domain preconditioner matrix. - auto AddCoefficients = [this, a0, a1](SumMatrixCoefficient &df, SumMatrixCoefficient &f, - SumCoefficient &dfb, SumMatrixCoefficient &fb) - { - this->AddStiffnessCoefficients(a0, df, f, fb); - this->AddDampingCoefficients(a1, f, fb); - this->AddRealMassCoefficients(1.0, false, f, fb); - }; - GetPreconditionerInternal(AddCoefficients, B, AuxB, print); + auto grad = std::make_unique(&h1_fespace, &nd_fespace); + grad->AddDomainInterpolator(new mfem::GradientInterpolator); + grad->SetAssemblyLevel(assembly_level); + grad->Assemble(skip_zeros); + grad->Finalize(skip_zeros); + return grad; } -std::unique_ptr SpaceOperator::GetNegCurlMatrix() +} // namespace + +std::unique_ptr SpaceOperator::GetCurlMatrix() { - mfem::ParDiscreteLinearOperator curl(&nd_fespaces.GetFinestFESpace(), &rt_fespace); - curl.AddDomainInterpolator(new mfem::CurlInterpolator); - // curl.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - curl.Assemble(); - curl.Finalize(); - std::unique_ptr NegCurl(curl.ParallelAssemble()); - *NegCurl *= -1.0; - return NegCurl; + return std::make_unique( + BuildCurl(GetNDSpace(), GetRTSpace(), assembly_level), GetNDSpace(), GetRTSpace(), + true); } -std::unique_ptr SpaceOperator::GetNegCurlMatrixPetsc() +std::unique_ptr SpaceOperator::GetComplexCurlMatrix() { - return std::make_unique(nd_fespaces.GetFinestFESpace().GetComm(), - GetNegCurlMatrix()); + return std::make_unique( + BuildCurl(GetNDSpace(), GetRTSpace(), assembly_level), nullptr, GetNDSpace(), + GetRTSpace(), true); } -std::unique_ptr SpaceOperator::GetGradMatrix() +std::unique_ptr SpaceOperator::GetGradMatrix() { - mfem::ParDiscreteLinearOperator grad(&h1_fespaces.GetFinestFESpace(), - &nd_fespaces.GetFinestFESpace()); - grad.AddDomainInterpolator(new mfem::GradientInterpolator); - // grad.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - grad.Assemble(); - grad.Finalize(); - return std::unique_ptr(grad.ParallelAssemble()); + return std::make_unique( + BuildGrad(GetH1Space(), GetNDSpace(), assembly_level), GetH1Space(), GetNDSpace(), + true); } -std::unique_ptr SpaceOperator::GetGradMatrixPetsc() +std::unique_ptr SpaceOperator::GetComplexGradMatrix() { - return std::make_unique(nd_fespaces.GetFinestFESpace().GetComm(), - GetGradMatrix()); + return std::make_unique( + BuildGrad(GetH1Space(), GetNDSpace(), assembly_level), nullptr, GetH1Space(), + GetNDSpace(), true); } void SpaceOperator::AddStiffnessCoefficients(double coef, SumMatrixCoefficient &df, - SumMatrixCoefficient &f, - SumMatrixCoefficient &fb) + SumMatrixCoefficient &f) { - // Contribution for curl-curl term. - df.AddCoefficient( - std::make_unique>( - mat_op, coef)); + constexpr auto MatType = MaterialPropertyType::INV_PERMEABILITY; + df.AddCoefficient(std::make_unique>(mat_op, coef)); // Contribution for London superconductors. if (mat_op.HasLondonDepth()) { - f.AddCoefficient( - std::make_unique< - MaterialPropertyCoefficient>(mat_op, - coef), - mat_op.GetLondonDepthMarker()); + constexpr auto MatTypeL = MaterialPropertyType::INV_LONDON_DEPTH; + f.AddCoefficient(std::make_unique>(mat_op, coef), + mat_op.GetLondonDepthMarker()); } +} +void SpaceOperator::AddStiffnessBdrCoefficients(double coef, SumMatrixCoefficient &fb) +{ // Robin BC contributions due to surface impedance and lumped ports (inductance). surf_z_op.AddStiffnessBdrCoefficients(coef, fb); lumped_port_op.AddStiffnessBdrCoefficients(coef, fb); } -void SpaceOperator::AddRealMassCoefficients(double coef, bool abs_coef, - SumMatrixCoefficient &f, - SumMatrixCoefficient &fb) +void SpaceOperator::AddDampingCoefficients(double coef, SumMatrixCoefficient &f) { - if (abs_coef) - { - f.AddCoefficient(std::make_unique< - MaterialPropertyCoefficient>( - mat_op, coef)); - } - else + // Contribution for domain conductivity. + if (mat_op.HasConductivity()) { - f.AddCoefficient(std::make_unique< - MaterialPropertyCoefficient>( - mat_op, coef)); + constexpr auto MatType = MaterialPropertyType::CONDUCTIVITY; + f.AddCoefficient(std::make_unique>(mat_op, coef), + mat_op.GetConductivityMarker()); } +} + +void SpaceOperator::AddDampingBdrCoefficients(double coef, SumMatrixCoefficient &fb) +{ + // Robin BC contributions due to surface impedance, lumped ports, and absorbing + // boundaries (resistance). + farfield_op.AddDampingBdrCoefficients(coef, fb); + surf_z_op.AddDampingBdrCoefficients(coef, fb); + lumped_port_op.AddDampingBdrCoefficients(coef, fb); +} +void SpaceOperator::AddRealMassCoefficients(double coef, SumMatrixCoefficient &f) +{ + constexpr auto MatType = MaterialPropertyType::PERMITTIVITY_REAL; + f.AddCoefficient(std::make_unique>(mat_op, coef)); +} + +void SpaceOperator::AddRealMassBdrCoefficients(double coef, SumMatrixCoefficient &fb) +{ // Robin BC contributions due to surface impedance and lumped ports (capacitance). surf_z_op.AddMassBdrCoefficients(coef, fb); lumped_port_op.AddMassBdrCoefficients(coef, fb); } -void SpaceOperator::AddImagMassCoefficients(double coef, SumMatrixCoefficient &f, - SumMatrixCoefficient &fb) +void SpaceOperator::AddImagMassCoefficients(double coef, SumMatrixCoefficient &f) { - // Contribution for loss tangent: ε => ε * (1 - i tan(δ)). + // Contribution for loss tangent: ε -> ε * (1 - i tan(δ)). if (mat_op.HasLossTangent()) { - f.AddCoefficient( - std::make_unique< - MaterialPropertyCoefficient>(mat_op, - coef), - mat_op.GetLossTangentMarker()); + constexpr auto MatType = MaterialPropertyType::PERMITTIVITY_IMAG; + f.AddCoefficient(std::make_unique>(mat_op, coef), + mat_op.GetLossTangentMarker()); } } -void SpaceOperator::AddDampingCoefficients(double coef, SumMatrixCoefficient &f, - SumMatrixCoefficient &fb) +void SpaceOperator::AddAbsMassCoefficients(double coef, SumMatrixCoefficient &f) { - // Contribution for domain conductivity. - if (mat_op.HasConductivity()) - { - f.AddCoefficient( - std::make_unique>( - mat_op, coef), - mat_op.GetConductivityMarker()); - } - - // Robin BC contributions due to surface impedance, lumped ports, and absorbing - // boundaries (resistance). - farfield_op.AddDampingBdrCoefficients(coef, fb); - surf_z_op.AddDampingBdrCoefficients(coef, fb); - lumped_port_op.AddDampingBdrCoefficients(coef, fb); + constexpr auto MatType = MaterialPropertyType::PERMITTIVITY_ABS; + f.AddCoefficient(std::make_unique>(mat_op, coef)); } void SpaceOperator::AddExtraSystemBdrCoefficients(double omega, SumCoefficient &dfbr, @@ -609,105 +915,122 @@ void SpaceOperator::AddExtraSystemBdrCoefficients(double omega, SumCoefficient & wave_port_op.AddExtraSystemBdrCoefficients(omega, fbr, fbi); } -bool SpaceOperator::GetTimeDomainExcitationVector(mfem::Vector &RHS) +bool SpaceOperator::GetExcitationVector(Vector &RHS) { - return GetExcitationVector1Internal(RHS); + // Time domain excitation vector. + RHS.SetSize(GetNDSpace().GetTrueVSize()); + RHS = 0.0; + bool nnz = AddExcitationVector1Internal(RHS); + linalg::SetSubVector(RHS, nd_dbc_tdof_lists.back(), 0.0); + return nnz; } -bool SpaceOperator::GetFreqDomainExcitationVector(double omega, petsc::PetscParVector &RHS) +bool SpaceOperator::GetExcitationVector(double omega, ComplexVector &RHS) { - mfem::Vector hRHSr, hRHSi; - bool nnz1 = GetExcitationVector1Internal(hRHSr); - if (nnz1) - { - RHS.SetFromVector(hRHSr); // Sets into real part - RHS.Scale(1i * omega); - } - else - { - RHS.SetZero(); - } - bool nnz2 = GetExcitationVector2Internal(omega, hRHSr, hRHSi); - if (nnz2) - { - petsc::PetscParVector RHS2(RHS.GetComm(), hRHSr, hRHSi); - RHS.AXPY(1.0, RHS2); - } + // Frequency domain excitation vector: RHS = iω RHS1 + RHS2(ω). + RHS.SetSize(GetNDSpace().GetTrueVSize()); + RHS = 0.0; + bool nnz1 = AddExcitationVector1Internal(RHS.Real()); + RHS *= 1i * omega; + bool nnz2 = AddExcitationVector2Internal(omega, RHS); + linalg::SetSubVector(RHS, nd_dbc_tdof_lists.back(), 0.0); return nnz1 || nnz2; } -bool SpaceOperator::GetFreqDomainExcitationVector1(petsc::PetscParVector &RHS1) +bool SpaceOperator::GetExcitationVector1(ComplexVector &RHS1) { - // Assemble the frequency domain excitation term, including only the contributions from - // lumped ports and surface currents, which is purely imaginary with linear frequency - // dependence (coefficient iω, it is accounted for later). - mfem::Vector hRHS1; - bool nnz = GetExcitationVector1Internal(hRHS1); - RHS1.SetFromVector(hRHS1); // Sets into real part - return nnz; + // Assemble the frequency domain excitation term with linear frequency dependence + // (coefficient iω, see GetExcitationVector above, is accounted for later). + RHS1.SetSize(GetNDSpace().GetTrueVSize()); + RHS1 = 0.0; + bool nnz1 = AddExcitationVector1Internal(RHS1.Real()); + linalg::SetSubVector(RHS1.Real(), nd_dbc_tdof_lists.back(), 0.0); + return nnz1; } -bool SpaceOperator::GetFreqDomainExcitationVector2(double omega, - petsc::PetscParVector &RHS2) +bool SpaceOperator::GetExcitationVector2(double omega, ComplexVector &RHS2) { - mfem::Vector hRHS2r, hRHS2i; - bool nnz = GetExcitationVector2Internal(omega, hRHS2r, hRHS2i); - RHS2.SetFromVectors(hRHS2r, hRHS2i); - return nnz; + RHS2.SetSize(GetNDSpace().GetTrueVSize()); + RHS2 = 0.0; + bool nnz2 = AddExcitationVector2Internal(omega, RHS2); + linalg::SetSubVector(RHS2, nd_dbc_tdof_lists.back(), 0.0); + return nnz2; } -bool SpaceOperator::GetExcitationVector1Internal(mfem::Vector &RHS) +bool SpaceOperator::AddExcitationVector1Internal(Vector &RHS1) { - // Assemble the time domain excitation -g'(t) J or -iω J. The g'(t) factor is not - // accounted for here, it is accounted for in the time integration later. Likewise, the - // coefficient iω, is accounted for later). - SumVectorCoefficient fb(nd_fespaces.GetFinestFESpace().GetParMesh()->SpaceDimension()); + // Assemble the time domain excitation -g'(t) J or frequency domain excitation -iω J. + // The g'(t) or iω factors are not accounted for here, they is accounted for in the time + // integration or frequency sweep later. + MFEM_VERIFY(RHS1.Size() == GetNDSpace().GetTrueVSize(), + "Invalid T-vector size for AddExcitationVector1Internal!"); + SumVectorCoefficient fb(GetNDSpace().GetParMesh()->SpaceDimension()); lumped_port_op.AddExcitationBdrCoefficients(fb); surf_j_op.AddExcitationBdrCoefficients(fb); - RHS.SetSize(nd_fespaces.GetFinestFESpace().GetTrueVSize()); - RHS = 0.0; if (fb.empty()) { return false; } - mfem::ParLinearForm rhs(&nd_fespaces.GetFinestFESpace()); - rhs.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fb)); - rhs.UseFastAssembly(true); - rhs.Assemble(); - rhs.ParallelAssemble(RHS); - RHS.SetSubVector(dbc_tdof_list, 0.0); + mfem::LinearForm rhs1(&GetNDSpace()); + rhs1.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fb)); + rhs1.UseFastAssembly(false); + rhs1.Assemble(); + GetNDSpace().GetProlongationMatrix()->AddMultTranspose(rhs1, RHS1); return true; } -bool SpaceOperator::GetExcitationVector2Internal(double omega, mfem::Vector &RHSr, - mfem::Vector &RHSi) +bool SpaceOperator::AddExcitationVector2Internal(double omega, ComplexVector &RHS2) { // Assemble the contribution of wave ports to the frequency domain excitation term at the // specified frequency. - SumVectorCoefficient fbr(nd_fespaces.GetFinestFESpace().GetParMesh()->SpaceDimension()), - fbi(nd_fespaces.GetFinestFESpace().GetParMesh()->SpaceDimension()); + MFEM_VERIFY(RHS2.Size() == GetNDSpace().GetTrueVSize(), + "Invalid T-vector size for AddExcitationVector2Internal!"); + SumVectorCoefficient fbr(GetNDSpace().GetParMesh()->SpaceDimension()), + fbi(GetNDSpace().GetParMesh()->SpaceDimension()); wave_port_op.AddExcitationBdrCoefficients(omega, fbr, fbi); - RHSr.SetSize(nd_fespaces.GetFinestFESpace().GetTrueVSize()); - RHSi.SetSize(nd_fespaces.GetFinestFESpace().GetTrueVSize()); - RHSr = 0.0; - RHSi = 0.0; if (fbr.empty() && fbi.empty()) { return false; } - mfem::ParLinearForm rhsr(&nd_fespaces.GetFinestFESpace()); - mfem::ParLinearForm rhsi(&nd_fespaces.GetFinestFESpace()); - rhsr.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fbr)); - rhsi.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fbi)); - rhsr.UseFastAssembly(true); - rhsi.UseFastAssembly(true); - rhsr.Assemble(); - rhsi.Assemble(); - rhsr.ParallelAssemble(RHSr); - rhsi.ParallelAssemble(RHSi); - RHSr.SetSubVector(dbc_tdof_list, 0.0); - RHSi.SetSubVector(dbc_tdof_list, 0.0); + mfem::LinearForm rhs2r(&GetNDSpace()), rhs2i(&GetNDSpace()); + rhs2r.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fbr)); + rhs2i.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(fbi)); + rhs2r.UseFastAssembly(false); + rhs2i.UseFastAssembly(false); + rhs2r.Assemble(); + rhs2i.Assemble(); + GetNDSpace().GetProlongationMatrix()->AddMultTranspose(rhs2r, RHS2.Real()); + GetNDSpace().GetProlongationMatrix()->AddMultTranspose(rhs2i, RHS2.Imag()); return true; } +void SpaceOperator::GetConstantInitialVector(ComplexVector &v) +{ + v.SetSize(GetNDSpace().GetTrueVSize()); + v = 1.0; + linalg::SetSubVector(v.Real(), nd_dbc_tdof_lists.back(), 0.0); +} + +void SpaceOperator::GetRandomInitialVector(ComplexVector &v) +{ + v.SetSize(GetNDSpace().GetTrueVSize()); + linalg::SetRandom(GetNDSpace().GetComm(), v); + linalg::SetSubVector(v, nd_dbc_tdof_lists.back(), 0.0); +} + +template std::unique_ptr +SpaceOperator::GetSystemMatrix(double, double, double, const Operator *, + const Operator *, const Operator *, + const Operator *); +template std::unique_ptr +SpaceOperator::GetSystemMatrix>( + std::complex, std::complex, std::complex, + const ComplexOperator *, const ComplexOperator *, const ComplexOperator *, + const ComplexOperator *); + +template std::unique_ptr +SpaceOperator::GetPreconditionerMatrix(double, double, double, double); +template std::unique_ptr +SpaceOperator::GetPreconditionerMatrix(double, double, double, double); + } // namespace palace diff --git a/palace/models/spaceoperator.hpp b/palace/models/spaceoperator.hpp index ce526ae7d..f5a8b455b 100644 --- a/palace/models/spaceoperator.hpp +++ b/palace/models/spaceoperator.hpp @@ -4,10 +4,13 @@ #ifndef PALACE_MODELS_SPACE_OPERATOR_HPP #define PALACE_MODELS_SPACE_OPERATOR_HPP -#include +#include #include #include #include +#include "fem/coefficient.hpp" +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" #include "models/farfieldboundaryoperator.hpp" #include "models/lumpedportoperator.hpp" #include "models/materialoperator.hpp" @@ -20,16 +23,6 @@ namespace palace { class IoData; -class SumCoefficient; -class SumMatrixCoefficient; - -namespace petsc -{ - -class PetscParMatrix; -class PetscParVector; - -} // namespace petsc // // A class handling spatial discretization of the governing equations. @@ -37,23 +30,23 @@ class PetscParVector; class SpaceOperator { private: - // Perfect electrical conductor essential boundary condition markers. - mfem::Array dbc_marker, dbc_tdof_list, aux_bdr_marker; - void CheckBoundaryProperties(); + const mfem::AssemblyLevel assembly_level; // Use full or partial assembly for operators + const int skip_zeros; // Skip zeros during full assembly of operators + const bool pc_mg; // Use geometric multigrid in preconditioning + const bool pc_lor; // Use low-order refined (LOR) space for the preconditioner + const bool pc_shifted; // Use shifted mass matrix for the preconditioner - // Options for system matrix assembly. - const int skip_zeros; // Whether to skip the zeros during assembly of operators - const bool pc_gmg; // Whether to use geometric multigrid in preconditioning - const bool pc_lor; // Whether to use low-order refined (LOR) preconditioner - const bool pc_shifted; // Whether the preconditioner uses the shifted mass matrix + // Helper variables for log file printing. + bool print_hdr, print_prec_hdr; - // Helper variable and function for log file printing. - bool print_hdr; - void PrintHeader(); + // Perfect electrical conductor essential boundary condition markers. + mfem::Array dbc_marker, aux_bdr_marker; + std::vector> nd_dbc_tdof_lists, h1_dbc_tdof_lists, aux_bdr_tdof_lists; + void CheckBoundaryProperties(); - // Objects defining the finite element spaces for the electric field(Nedelec) and magnetic - // flux density (Raviart-Thomas) on the given mesh. The H1 spaces are used for various - // purposes throughout the code including postprocessing. + // Objects defining the finite element spaces for the electric field (Nedelec) and + // magnetic flux density (Raviart-Thomas) on the given mesh. The H1 spaces are used for + // various purposes throughout the code including postprocessing. std::vector> nd_fecs; std::vector> h1_fecs; mfem::RT_FECollection rt_fec; @@ -71,43 +64,46 @@ class SpaceOperator WavePortOperator wave_port_op; SurfaceCurrentOperator surf_j_op; - // Helper function to assemble preconditioner matrix data structures. - void GetPreconditionerInternal( - const std::function &AddCoefficients, - std::vector> &B, - std::vector> &AuxB, bool print); - // Helper functions for building the bilinear forms corresponding to the discretized // operators in Maxwell's equations. void AddStiffnessCoefficients(double coef, SumMatrixCoefficient &df, - SumMatrixCoefficient &f, SumMatrixCoefficient &fb); - void AddRealMassCoefficients(double coef, bool abs_coef, SumMatrixCoefficient &f, - SumMatrixCoefficient &fb); - void AddImagMassCoefficients(double coef, SumMatrixCoefficient &f, - SumMatrixCoefficient &fb); - void AddDampingCoefficients(double coef, SumMatrixCoefficient &f, - SumMatrixCoefficient &fb); + SumMatrixCoefficient &f); + void AddStiffnessBdrCoefficients(double coef, SumMatrixCoefficient &fb); + void AddDampingCoefficients(double coef, SumMatrixCoefficient &f); + void AddDampingBdrCoefficients(double coef, SumMatrixCoefficient &fb); + void AddRealMassCoefficients(double coef, SumMatrixCoefficient &f); + void AddRealMassBdrCoefficients(double coef, SumMatrixCoefficient &fb); + void AddImagMassCoefficients(double coef, SumMatrixCoefficient &f); + void AddAbsMassCoefficients(double coef, SumMatrixCoefficient &f); void AddExtraSystemBdrCoefficients(double omega, SumCoefficient &dfbr, SumCoefficient &dfbi, SumMatrixCoefficient &fbr, SumMatrixCoefficient &fbi); // Helper functions for excitation vector assembly. - bool GetExcitationVector1Internal(mfem::Vector &RHS); - bool GetExcitationVector2Internal(double omega, mfem::Vector &RHSr, mfem::Vector &RHSi); + bool AddExcitationVector1Internal(Vector &RHS); + bool AddExcitationVector2Internal(double omega, ComplexVector &RHS); public: SpaceOperator(const IoData &iodata, const std::vector> &mesh); - // Returns array marking Dirichlet BC (PEC) attributes and list of local true dofs. - const mfem::Array &GetDbcMarker() const { return dbc_marker; } - const mfem::Array &GetDbcTDofList() const { return dbc_tdof_list; } + // Return list of all PEC boundary true dofs for all finite element space levels. + const std::vector> &GetNDDbcTDofLists() const + { + return nd_dbc_tdof_lists; + } + const std::vector> &GetH1DbcTDofLists() const + { + return h1_dbc_tdof_lists; + } - // Returns array marking all boundary condition attributes, PEC included. These are all - // boundaries which affect the stiffness and damping (K and C) matrices, used for - // nullspace corrections. - const mfem::Array &GetAuxBdrMarker() const { return aux_bdr_marker; } + // Returns lists of all boundary condition true dofs, PEC included, for the auxiliary + // H1 space hierarchy. These are all boundaries which affect the stiffness and damping + // (K and C) matrices, used for nullspace corrections. + const std::vector> &GetAuxBdrTDofLists() const + { + return aux_bdr_tdof_lists; + } // Return material operator for postprocessing. const MaterialOperator &GetMaterialOp() const { return mat_op; } @@ -123,69 +119,82 @@ class SpaceOperator // Return the parallel finite element space objects. auto &GetNDSpaces() { return nd_fespaces; } auto &GetNDSpace() { return nd_fespaces.GetFinestFESpace(); } + const auto &GetNDSpace() const { return nd_fespaces.GetFinestFESpace(); } auto &GetH1Spaces() { return h1_fespaces; } auto &GetH1Space() { return h1_fespaces.GetFinestFESpace(); } + const auto &GetH1Space() const { return h1_fespaces.GetFinestFESpace(); } auto &GetRTSpace() { return rt_fespace; } - - // Construct the frequency-dependent complex linear system matrix: - // A = K + iω C - ω² (Mr + i Mi) + A2(ω) - // or any one of its terms. The type parameter controls which terms of the above - // formulation to include in the resulting matrix. The argument ω is only used for - // the "complete" or "extra" system matrix options, all others come unscaled. - enum class OperatorType - { - COMPLETE, - STIFFNESS, - MASS, - DAMPING, - EXTRA - }; - std::unique_ptr - GetSystemMatrixPetsc(OperatorType type, double omega, - mfem::Operator::DiagonalPolicy ess_diag, bool print = true); - std::unique_ptr - GetSystemMatrixPetsc(OperatorType type, mfem::Operator::DiagonalPolicy ess_diag, - bool print = true) - { - return GetSystemMatrixPetsc(type, 0.0, ess_diag, print); - } - std::unique_ptr GetSystemMatrix(OperatorType type, double omega, - mfem::Operator::DiagonalPolicy ess_diag, - bool print = true); - std::unique_ptr GetSystemMatrix(OperatorType type, - mfem::Operator::DiagonalPolicy ess_diag, - bool print = true) - { - return GetSystemMatrix(type, 0.0, ess_diag, print); - } - - // Construct the real, optionally SPD matrix for frequency or time domain preconditioning - // (Mr > 0, Mi < 0): - // B = K + ω C + ω² (-/+ Mr - Mi) , or - // B = a0 K + a1 C + Mr . - void GetPreconditionerMatrix(double omega, - std::vector> &B, - std::vector> &AuxB, - bool print = true); - void GetPreconditionerMatrix(double a0, double a1, - std::vector> &B, - std::vector> &AuxB, - bool print = true); - - // Construct and return the discrete negative curl or gradient matrices. - std::unique_ptr GetNegCurlMatrix(); - std::unique_ptr GetNegCurlMatrixPetsc(); - std::unique_ptr GetGradMatrix(); - std::unique_ptr GetGradMatrixPetsc(); + const auto &GetRTSpace() const { return rt_fespace; } + + // Construct any part of the frequency-dependent complex linear system matrix: + // A = K + iω C - ω² (Mr + i Mi) + A2(ω) . + // For time domain problems, any one of K, C, or M = Mr can be constructed. The argument + // ω is required only for the constructing the "extra" matrix A2(ω). + std::unique_ptr GetStiffnessMatrix(Operator::DiagonalPolicy diag_policy); + std::unique_ptr GetDampingMatrix(Operator::DiagonalPolicy diag_policy); + std::unique_ptr GetMassMatrix(Operator::DiagonalPolicy diag_policy); + std::unique_ptr + GetComplexStiffnessMatrix(Operator::DiagonalPolicy diag_policy); + std::unique_ptr + GetComplexDampingMatrix(Operator::DiagonalPolicy diag_policy); + std::unique_ptr + GetComplexMassMatrix(Operator::DiagonalPolicy diag_policy); + std::unique_ptr + GetComplexExtraSystemMatrix(double omega, Operator::DiagonalPolicy diag_policy); + + // Construct the complete frequency or time domain system matrix using the provided + // stiffness, damping, mass, and extra matrices: + // A = a0 K + a1 C + a2 (Mr + i Mi) + A2 . + // It is assumed that the inputs have been constructed using previous calls to + // GetSystemMatrix() and the returned operator does not inherit ownership of any of them. + template + std::unique_ptr + GetSystemMatrix(ScalarType a0, ScalarType a1, ScalarType a2, const OperType *K, + const OperType *C, const OperType *M, const OperType *A2 = nullptr); + + // Construct the real, SPD matrix for weighted L2 or H(curl) inner products: + // B = a0 Kr + a2 Mr . + // It is assumed that the inputs have been constructed using previous calls to + // GetSystemMatrix() and the returned operator does not inherit ownership of any of them. + // If K or M have eliminated boundary conditions, they are not eliminated from the + // returned operator. + std::unique_ptr GetInnerProductMatrix(double a0, double a2, + const ComplexOperator *K, + const ComplexOperator *M); + + // Construct the real, optionally SPD matrix for frequency or time domain linear system + // preconditioning (Mr > 0, Mi < 0, |Mr + i Mi| is done on the material property + // coefficient, not the matrix entries themselves): + // B = a0 K + a1 C -/+ a2 |Mr + i Mi| + A2r(a3) + A2i(a3) . + template + std::unique_ptr GetPreconditionerMatrix(double a0, double a1, double a2, + double a3); + + // Construct and return the discrete curl or gradient matrices. The complex variants + // return a matrix suitable for applying to complex-valued vectors. + std::unique_ptr GetCurlMatrix(); + std::unique_ptr GetComplexCurlMatrix(); + std::unique_ptr GetGradMatrix(); + std::unique_ptr GetComplexGradMatrix(); // Assemble the right-hand side source term vector for an incident field or current source - // applied on specified excited boundaries. - bool GetTimeDomainExcitationVector(mfem::Vector &RHS); - bool GetFreqDomainExcitationVector(double omega, petsc::PetscParVector &RHS); - - // Separate out RHS vector as RHS = iω RHS1 + RHS2(ω). - bool GetFreqDomainExcitationVector1(petsc::PetscParVector &RHS1); - bool GetFreqDomainExcitationVector2(double omega, petsc::PetscParVector &RHS2); + // applied on specified excited boundaries. The return value indicates whether or not the + // excitation is nonzero (and thus is true most of the time). + bool GetExcitationVector(Vector &RHS); + bool GetExcitationVector(double omega, ComplexVector &RHS); + + // Separate out RHS vector as RHS = iω RHS1 + RHS2(ω). The return value indicates whether + // or not the excitation is nonzero (and thus is true most of the time). + bool GetExcitationVector1(ComplexVector &RHS1); + bool GetExcitationVector2(double omega, ComplexVector &RHS2); + + // Construct a constant or randomly initialized vector which satisfies the PEC essential + // boundary conditions. + void GetRandomInitialVector(ComplexVector &v); + void GetConstantInitialVector(ComplexVector &v); + + // Get the associated MPI communicator. + MPI_Comm GetComm() const { return GetNDSpace().GetComm(); } }; } // namespace palace diff --git a/palace/models/surfacepostoperator.cpp b/palace/models/surfacepostoperator.cpp index 5ea49b447..1b1510091 100644 --- a/palace/models/surfacepostoperator.cpp +++ b/palace/models/surfacepostoperator.cpp @@ -3,9 +3,11 @@ #include "surfacepostoperator.hpp" +#include #include #include "fem/integrator.hpp" #include "models/materialoperator.hpp" +#include "utils/communication.hpp" #include "utils/geodata.hpp" #include "utils/iodata.hpp" @@ -65,8 +67,7 @@ SurfacePostOperator::InterfaceDielectricData::InterfaceDielectricData( // Store information about the surface side to consider. int component; - sides.emplace_back(); - mfem::Vector &side = sides.back(); + mfem::Vector &side = sides.emplace_back(); if (node.side.length() == 0) { // This is OK if surface is single sided, just push back an empty Vector. @@ -95,31 +96,30 @@ SurfacePostOperator::InterfaceDielectricData::InterfaceDielectricData( } // Store markers for this element of the postprocessing boundary. - attr_markers.emplace_back(); - mesh::AttrToMarker(mesh.bdr_attributes.Max(), node.attributes, attr_markers.back()); + mesh::AttrToMarker(mesh.bdr_attributes.Max(), node.attributes, + attr_markers.emplace_back()); } } std::unique_ptr SurfacePostOperator::InterfaceDielectricData::GetCoefficient( - int i, const mfem::ParGridFunction &U, const MaterialOperator &mat_op, - const std::map &local_to_shared) const + int i, const mfem::ParGridFunction &U, const MaterialOperator &mat_op) const { switch (type) { case DielectricInterfaceType::MA: return std::make_unique>( - U, mat_op, ts, epsilon, sides[i], local_to_shared); + U, mat_op, ts, epsilon, sides[i]); case DielectricInterfaceType::MS: return std::make_unique>( - U, mat_op, ts, epsilon, sides[i], local_to_shared); + U, mat_op, ts, epsilon, sides[i]); case DielectricInterfaceType::SA: return std::make_unique>( - U, mat_op, ts, epsilon, sides[i], local_to_shared); + U, mat_op, ts, epsilon, sides[i]); case DielectricInterfaceType::DEFAULT: return std::make_unique< DielectricInterfaceCoefficient>( - U, mat_op, ts, epsilon, sides[i], local_to_shared); + U, mat_op, ts, epsilon, sides[i]); } return {}; // For compiler warning } @@ -127,15 +127,14 @@ SurfacePostOperator::InterfaceDielectricData::GetCoefficient( SurfacePostOperator::SurfaceChargeData::SurfaceChargeData( const config::CapacitanceData &data, mfem::ParMesh &mesh) { - attr_markers.emplace_back(); - mesh::AttrToMarker(mesh.bdr_attributes.Max(), data.attributes, attr_markers.back()); + mesh::AttrToMarker(mesh.bdr_attributes.Max(), data.attributes, + attr_markers.emplace_back()); } std::unique_ptr SurfacePostOperator::SurfaceChargeData::GetCoefficient( - int i, const mfem::ParGridFunction &U, const MaterialOperator &mat_op, - const std::map &local_to_shared) const + int i, const mfem::ParGridFunction &U, const MaterialOperator &mat_op) const { - return std::make_unique(U, mat_op, local_to_shared); + return std::make_unique(U, mat_op); } SurfacePostOperator::SurfaceFluxData::SurfaceFluxData(const config::InductanceData &data, @@ -173,25 +172,26 @@ SurfacePostOperator::SurfaceFluxData::SurfaceFluxData(const config::InductanceDa // Construct the coefficient for this postprocessing boundary (copies the direction // vector). - attr_markers.emplace_back(); - mesh::AttrToMarker(mesh.bdr_attributes.Max(), data.attributes, attr_markers.back()); + mesh::AttrToMarker(mesh.bdr_attributes.Max(), data.attributes, + attr_markers.emplace_back()); } -std::unique_ptr SurfacePostOperator::SurfaceFluxData::GetCoefficient( - int i, const mfem::ParGridFunction &U, const MaterialOperator &mat_op, - const std::map &local_to_shared) const +std::unique_ptr +SurfacePostOperator::SurfaceFluxData::GetCoefficient(int i, const mfem::ParGridFunction &U, + const MaterialOperator &mat_op) const { - return std::make_unique(U, direction, local_to_shared); + return std::make_unique(U, direction, + mat_op.GetLocalToSharedFaceMap()); } -SurfacePostOperator::SurfacePostOperator(const IoData &iodata, const MaterialOperator &mat, - const std::map &l2s, +SurfacePostOperator::SurfacePostOperator(const IoData &iodata, + const MaterialOperator &mat_op, mfem::ParFiniteElementSpace &h1_fespace) - : mat_op(mat), local_to_shared(l2s), ones(&h1_fespace) + : mat_op(mat_op), ones(&h1_fespace) { // Define a constant 1 function on the scalar finite element space for computing surface // integrals. - ones.mfem::Vector::operator=(1.0); + ones = 1.0; // Surface dielectric loss postprocessing. for (const auto &[idx, data] : iodata.boundaries.postpro.dielectric) @@ -212,22 +212,49 @@ SurfacePostOperator::SurfacePostOperator(const IoData &iodata, const MaterialOpe } } -double -SurfacePostOperator::GetInterfaceElectricFieldEnergy(int idx, - const mfem::ParGridFunction &E) const +double SurfacePostOperator::GetInterfaceLossTangent(int idx) const { auto it = eps_surfs.find(idx); MFEM_VERIFY(it != eps_surfs.end(), "Unknown dielectric loss postprocessing surface index requested!"); - return GetSurfaceIntegral(it->second, E); + return it->second.tandelta; } -double SurfacePostOperator::GetInterfaceLossTangent(int idx) const +double SurfacePostOperator::GetInterfaceElectricFieldEnergy( + int idx, const mfem::ParComplexGridFunction &E) const { auto it = eps_surfs.find(idx); MFEM_VERIFY(it != eps_surfs.end(), "Unknown dielectric loss postprocessing surface index requested!"); - return it->second.tandelta; + double dot = GetLocalSurfaceIntegral(it->second, E.real()) + + GetLocalSurfaceIntegral(it->second, E.imag()); + Mpi::GlobalSum(1, &dot, E.ParFESpace()->GetComm()); + return dot; +} + +double +SurfacePostOperator::GetInterfaceElectricFieldEnergy(int idx, + const mfem::ParGridFunction &E) const +{ + auto it = eps_surfs.find(idx); + MFEM_VERIFY(it != eps_surfs.end(), + "Unknown dielectric loss postprocessing surface index requested!"); + double dot = GetLocalSurfaceIntegral(it->second, E); + Mpi::GlobalSum(1, &dot, E.ParFESpace()->GetComm()); + return dot; +} + +double +SurfacePostOperator::GetSurfaceElectricCharge(int idx, + const mfem::ParComplexGridFunction &E) const +{ + auto it = charge_surfs.find(idx); + MFEM_VERIFY(it != charge_surfs.end(), + "Unknown capacitance postprocessing surface index requested!"); + std::complex dot(GetLocalSurfaceIntegral(it->second, E.real()), + GetLocalSurfaceIntegral(it->second, E.imag())); + Mpi::GlobalSum(1, &dot, E.ParFESpace()->GetComm()); + return std::copysign(std::abs(dot), dot.real()); } double SurfacePostOperator::GetSurfaceElectricCharge(int idx, @@ -236,7 +263,22 @@ double SurfacePostOperator::GetSurfaceElectricCharge(int idx, auto it = charge_surfs.find(idx); MFEM_VERIFY(it != charge_surfs.end(), "Unknown capacitance postprocessing surface index requested!"); - return GetSurfaceIntegral(it->second, E); + double dot = GetLocalSurfaceIntegral(it->second, E); + Mpi::GlobalSum(1, &dot, E.ParFESpace()->GetComm()); + return dot; +} + +double +SurfacePostOperator::GetSurfaceMagneticFlux(int idx, + const mfem::ParComplexGridFunction &B) const +{ + auto it = flux_surfs.find(idx); + MFEM_VERIFY(it != flux_surfs.end(), + "Unknown inductance postprocessing surface index requested!"); + std::complex dot(GetLocalSurfaceIntegral(it->second, B.real()), + GetLocalSurfaceIntegral(it->second, B.imag())); + Mpi::GlobalSum(1, &dot, B.ParFESpace()->GetComm()); + return std::copysign(std::abs(dot), dot.real()); } double SurfacePostOperator::GetSurfaceMagneticFlux(int idx, @@ -245,23 +287,25 @@ double SurfacePostOperator::GetSurfaceMagneticFlux(int idx, auto it = flux_surfs.find(idx); MFEM_VERIFY(it != flux_surfs.end(), "Unknown inductance postprocessing surface index requested!"); - return GetSurfaceIntegral(it->second, B); + double dot = GetLocalSurfaceIntegral(it->second, B); + Mpi::GlobalSum(1, &dot, B.ParFESpace()->GetComm()); + return dot; } -double SurfacePostOperator::GetSurfaceIntegral(const SurfaceData &data, - const mfem::ParGridFunction &U) const +double SurfacePostOperator::GetLocalSurfaceIntegral(const SurfaceData &data, + const mfem::ParGridFunction &U) const { // Integrate the coefficient over the boundary attributes making up this surface index. std::vector> fb; - mfem::ParLinearForm s(ones.ParFESpace()); + mfem::LinearForm s(const_cast(ones.FESpace())); for (int i = 0; i < static_cast(data.attr_markers.size()); i++) { - fb.emplace_back(data.GetCoefficient(i, U, mat_op, local_to_shared)); + fb.emplace_back(data.GetCoefficient(i, U, mat_op)); s.AddBoundaryIntegrator(new BoundaryLFIntegrator(*fb.back()), data.attr_markers[i]); } - s.UseFastAssembly(true); + s.UseFastAssembly(false); s.Assemble(); - return s(ones); + return s * ones; } } // namespace palace diff --git a/palace/models/surfacepostoperator.hpp b/palace/models/surfacepostoperator.hpp index 697aca505..0e0c8e8bf 100644 --- a/palace/models/surfacepostoperator.hpp +++ b/palace/models/surfacepostoperator.hpp @@ -40,8 +40,8 @@ class SurfacePostOperator virtual ~SurfaceData() = default; virtual std::unique_ptr - GetCoefficient(int i, const mfem::ParGridFunction &U, const MaterialOperator &mat_op, - const std::map &local_to_shared) const = 0; + GetCoefficient(int i, const mfem::ParGridFunction &U, + const MaterialOperator &mat_op) const = 0; }; struct InterfaceDielectricData : public SurfaceData { @@ -53,16 +53,16 @@ class SurfacePostOperator mfem::ParMesh &mesh); std::unique_ptr - GetCoefficient(int i, const mfem::ParGridFunction &U, const MaterialOperator &mat_op, - const std::map &local_to_shared) const override; + GetCoefficient(int i, const mfem::ParGridFunction &U, + const MaterialOperator &mat_op) const override; }; struct SurfaceChargeData : public SurfaceData { SurfaceChargeData(const config::CapacitanceData &data, mfem::ParMesh &mesh); std::unique_ptr - GetCoefficient(int i, const mfem::ParGridFunction &U, const MaterialOperator &mat_op, - const std::map &local_to_shared) const override; + GetCoefficient(int i, const mfem::ParGridFunction &U, + const MaterialOperator &mat_op) const override; }; struct SurfaceFluxData : public SurfaceData { @@ -71,8 +71,8 @@ class SurfacePostOperator SurfaceFluxData(const config::InductanceData &data, mfem::ParMesh &mesh); std::unique_ptr - GetCoefficient(int i, const mfem::ParGridFunction &U, const MaterialOperator &mat_op, - const std::map &local_to_shared) const override; + GetCoefficient(int i, const mfem::ParGridFunction &U, + const MaterialOperator &mat_op) const override; }; std::map eps_surfs; std::map charge_surfs; @@ -81,17 +81,14 @@ class SurfacePostOperator // Reference to material property operator (not owned). const MaterialOperator &mat_op; - // Shared face mapping for boundary coefficients (not owned). - const std::map &local_to_shared; - // Unit function used for computing surface integrals. - mfem::ParGridFunction ones; + mfem::GridFunction ones; - double GetSurfaceIntegral(const SurfaceData &data, const mfem::ParGridFunction &U) const; + double GetLocalSurfaceIntegral(const SurfaceData &data, + const mfem::ParGridFunction &U) const; public: - SurfacePostOperator(const IoData &iodata, const MaterialOperator &mat, - const std::map &l2s, + SurfacePostOperator(const IoData &iodata, const MaterialOperator &mat_op, mfem::ParFiniteElementSpace &h1_fespace); // Access data structures for the postprocessing surface with the given type. @@ -104,9 +101,13 @@ class SurfacePostOperator // Get surface integrals computing dielectric interface energy, surface charge, or // surface magnetic flux. - double GetInterfaceElectricFieldEnergy(int idx, const mfem::ParGridFunction &E) const; double GetInterfaceLossTangent(int idx) const; + double GetInterfaceElectricFieldEnergy(int idx, + const mfem::ParComplexGridFunction &E) const; + double GetInterfaceElectricFieldEnergy(int idx, const mfem::ParGridFunction &E) const; + double GetSurfaceElectricCharge(int idx, const mfem::ParComplexGridFunction &E) const; double GetSurfaceElectricCharge(int idx, const mfem::ParGridFunction &E) const; + double GetSurfaceMagneticFlux(int idx, const mfem::ParComplexGridFunction &B) const; double GetSurfaceMagneticFlux(int idx, const mfem::ParGridFunction &B) const; }; diff --git a/palace/models/timeoperator.cpp b/palace/models/timeoperator.cpp index 6835846db..354645ed8 100644 --- a/palace/models/timeoperator.cpp +++ b/palace/models/timeoperator.cpp @@ -4,9 +4,9 @@ #include "timeoperator.hpp" #include -#include "linalg/gmg.hpp" -#include "linalg/pc.hpp" -#include "linalg/petsc.hpp" +#include "linalg/iterative.hpp" +#include "linalg/jacobi.hpp" +#include "linalg/solver.hpp" #include "models/spaceoperator.hpp" #include "utils/communication.hpp" #include "utils/iodata.hpp" @@ -17,236 +17,124 @@ namespace palace namespace { -class CurlCurlSystemOperator : public mfem::SecondOrderTimeDependentOperator +class TimeDependentCurlCurlOperator : public mfem::SecondOrderTimeDependentOperator { -private: - // MPI communicator for the parallel operators. +public: + // MPI communicator. MPI_Comm comm; // System matrices and excitation RHS. - std::unique_ptr K, M, C; - mfem::Vector NegJ; - - // Reference to essential boundary true degrees of freedom from SpaceOperator (not owned). - const mfem::Array dbc_tdof_list; + std::unique_ptr K, M, C; + Vector NegJ; // Time dependence of current pulse for excitation: -J'(t) = -g'(t) J. This function // returns g'(t). std::function &dJcoef; // Internal objects for solution of linear systems during time stepping. - mutable double a0_, a1_; - mutable mfem::Vector RHS; - mutable std::vector> P, AuxP; - std::function> &, - std::vector> &)> - GetPreconditionerMatrix; + double a0_, a1_; + std::unique_ptr kspM, kspA; + std::unique_ptr A, B; + mutable Vector RHS; - // Linear system solvers and settings for implicit time integration. - std::unique_ptr kspM, kspA; - std::unique_ptr pcM, pcA; - mutable int kspM_mult, kspA_mult, kspM_it, kspA_it; - - void FormRHS(const mfem::Vector &u, const mfem::Vector &du, mfem::Vector &rhs) const - { - // Multiply: rhs = -(K u + C du) - g'(t) J. - rhs = 0.0; - K->AddMult(u, rhs, -1.0); - if (C) - { - C->AddMult(du, rhs, -1.0); - } - rhs.Add(dJcoef(t), NegJ); - } + // Bindings to SpaceOperator functions to get the system matrix and preconditioner, and + // construct the linear solver. + std::function ConfigureLinearSolver; public: - CurlCurlSystemOperator(const IoData &iodata, SpaceOperator &spaceop, - std::function &djcoef, double t0, - mfem::TimeDependentOperator::Type type) + TimeDependentCurlCurlOperator(const IoData &iodata, SpaceOperator &spaceop, + std::function &djcoef, double t0, + mfem::TimeDependentOperator::Type type) : mfem::SecondOrderTimeDependentOperator(spaceop.GetNDSpace().GetTrueVSize(), t0, type), - comm(spaceop.GetNDSpace().GetComm()), dbc_tdof_list(spaceop.GetDbcTDofList()), - dJcoef(djcoef) + comm(spaceop.GetComm()), dJcoef(djcoef) { // Construct the system matrices defining the linear operator. PEC boundaries are // handled simply by setting diagonal entries of the mass matrix for the corresponding // dofs. Because the Dirichlet BC is always homogenous, no special elimination is // required on the RHS. Diagonal entries are set in M (so M is non-singular). - K = spaceop.GetSystemMatrix(SpaceOperator::OperatorType::STIFFNESS, - mfem::Operator::DIAG_ZERO); - M = spaceop.GetSystemMatrix(SpaceOperator::OperatorType::MASS, - mfem::Operator::DIAG_ONE); - C = spaceop.GetSystemMatrix(SpaceOperator::OperatorType::DAMPING, - mfem::Operator::DIAG_ZERO); + K = spaceop.GetStiffnessMatrix(Operator::DIAG_ZERO); + C = spaceop.GetDampingMatrix(Operator::DIAG_ZERO); + M = spaceop.GetMassMatrix(Operator::DIAG_ONE); // Set up RHS vector for the current source term: -g'(t) J, where g(t) handles the time // dependence. - spaceop.GetTimeDomainExcitationVector(NegJ); + spaceop.GetExcitationVector(NegJ); RHS.SetSize(NegJ.Size()); - // Set up linear solvers (SetOperator will be called later on at first time step). - mfem::IterativeSolver::PrintLevel print = - mfem::IterativeSolver::PrintLevel().Warnings().Errors(); - if (iodata.problem.verbose > 0) + // Set up linear solvers. { - print.Summary(); - if (iodata.problem.verbose > 1) - { - print.Iterations(); - if (iodata.problem.verbose > 2) - { - print.All(); - } - } - } - { - // PCG with a simple smoother preconditioner for mass matrix systems. - mfem::Vector diag(M->Height()); - M->AssembleDiagonal(diag); - pcM = std::make_unique(diag, spaceop.GetDbcTDofList()); - - auto pcg = std::make_unique(comm); - pcg->iterative_mode = iodata.solver.linear.ksp_initial_guess; + auto pcg = std::make_unique>(comm, 0); + pcg->SetInitialGuess(iodata.solver.linear.initial_guess); pcg->SetRelTol(iodata.solver.linear.tol); pcg->SetMaxIter(iodata.solver.linear.max_it); - pcg->SetPrintLevel(0); - pcg->SetOperator(*M); - pcg->SetPreconditioner(*pcM); - kspM = std::move(pcg); + auto jac = + std::make_unique>(std::make_unique()); + kspM = std::make_unique(std::move(pcg), std::move(jac)); + kspM->SetOperators(*M, *M); } { // For explicit schemes, recommended to just use cheaper preconditioners. Otherwise, // use AMS or a direct solver. The system matrix is formed as a sequence of matrix // vector products, and is only assembled for preconditioning. - pcA = ConfigurePreconditioner(iodata, spaceop.GetDbcMarker(), spaceop.GetNDSpaces(), - &spaceop.GetH1Spaces()); - - auto pcg = std::make_unique(comm); - pcg->iterative_mode = iodata.solver.linear.ksp_initial_guess; - pcg->SetRelTol(iodata.solver.linear.tol); - pcg->SetMaxIter(iodata.solver.linear.max_it); - pcg->SetPrintLevel(print); - pcg->SetOperator(*this); - pcg->SetPreconditioner(*pcA); - kspA = std::move(pcg); - if (iodata.solver.linear.ksp_type != config::LinearSolverData::KspType::DEFAULT && - iodata.solver.linear.ksp_type != config::LinearSolverData::KspType::CG) + ConfigureLinearSolver = [this, &iodata, &spaceop](double a0, double a1) { - Mpi::Warning("Transient problem type always uses CG as the Krylov solver!\n"); - } + // Configure the system matrix and also the matrix (matrices) from which the + // preconditioner will be constructed. + A = spaceop.GetSystemMatrix(a0, a1, 1.0, K.get(), C.get(), M.get()); + B = spaceop.GetPreconditionerMatrix(a0, a1, 1.0, 0.0); - // The assembled matrix for preconditioning is constructed as a function of the - // coefficients defined by the time integrator. - GetPreconditionerMatrix = [&](double a0, double a1, - std::vector> &B, - std::vector> &AuxB) - { spaceop.GetPreconditionerMatrix(a0, a1, B, AuxB, true); }; + // Configure the solver. + if (!kspA) + { + kspA = std::make_unique(iodata, spaceop.GetNDSpaces(), + &spaceop.GetH1Spaces()); + } + kspA->SetOperators(*A, *B); + }; } - kspM_mult = kspA_mult = kspM_it = kspA_it = 0; } - MPI_Comm GetComm() const { return comm; } - const mfem::Operator &GetK() const { return *K; } - const mfem::Operator &GetM() const { return *M; } - const mfem::Operator &GetC() const { return *C; } - const mfem::Array &GetDbcTDofList() const { return dbc_tdof_list; } - - int GetNumMult() const { return kspM_mult; } - int GetNumMultIter() const { return kspM_it; } - int GetNumImplicitSolve() const { return kspA_mult; } - int GetNumImplicitSolveIter() const { return kspA_it; } - - void Mult(const mfem::Vector &x, mfem::Vector &y) const override + void FormRHS(const Vector &u, const Vector &du, Vector &rhs) const { - // Multiply: y = (a0 K + a1 C + M) x. - M->Mult(x, y); - K->AddMult(x, y, a0_); + // Multiply: rhs = -(K u + C du) - g'(t) J. + K->Mult(u, rhs); if (C) { - C->AddMult(x, y, a1_); + C->AddMult(du, rhs, 1.0); } + linalg::AXPBYPCZ(-1.0, rhs, dJcoef(t), NegJ, 0.0, rhs); } - void Mult(const mfem::Vector &u, const mfem::Vector &du, mfem::Vector &ddu) const override + void Mult(const Vector &u, const Vector &du, Vector &ddu) const override { // Solve: M ddu = -(K u + C du) - g'(t) J. - Mpi::Print("\n"); - if (kspM_mult == 0) + if (kspM->NumTotalMult() == 0) { // Operators have already been set in constructor. ddu = 0.0; } FormRHS(u, du, RHS); kspM->Mult(RHS, ddu); - if (!kspM->GetConverged()) - { - Mpi::Warning("Linear solver did not converge in {:d} iterations!\n", - kspM->GetNumIterations()); - } - kspM_mult++; - kspM_it += kspM->GetNumIterations(); } - void ImplicitSolve(const double a0, const double a1, const mfem::Vector &u, - const mfem::Vector &du, mfem::Vector &k) override + void ImplicitSolve(const double a0, const double a1, const Vector &u, const Vector &du, + Vector &k) override { // Solve: (a0 K + a1 C + M) k = -(K u + C du) - g'(t) J, where a0 may be 0 in the // explicit case. At first iteration, construct the solver. Also don't print a newline // if already done by the mass matrix solve at the first iteration. - if (kspA_mult > 0) - { - Mpi::Print("\n"); - } - if (kspA_mult == 0 || a0 != a0_ || a1 != a1_) + if (!kspA || a0 != a0_ || a1 != a1_) { - // Configure the matrix (matrices) from which the preconditioner will be constructed. - GetPreconditionerMatrix(a0, a1, P, AuxP); - auto *gmg = dynamic_cast(pcA.get()); - if (gmg) - { - gmg->SetOperator(P, &AuxP); - } - else - { - pcA->SetOperator(*P.back()); - } + // Configure the linear solver, including the system matrix and also the matrix + // (matrices) from which the preconditioner will be constructed. + ConfigureLinearSolver(a0, a1); a0_ = a0; a1_ = a1; k = 0.0; } + Mpi::Print("\n"); FormRHS(u, du, RHS); kspA->Mult(RHS, k); - if (!kspA->GetConverged()) - { - Mpi::Warning("Linear solver did not converge in {:d} iterations!\n", - kspA->GetNumIterations()); - } - kspA_mult++; - kspA_it += kspA->GetNumIterations(); - } -}; - -class SymmetricProductOperator : public mfem::Operator -{ -private: - const mfem::Operator &A, &B; - mutable mfem::Vector z; - -public: - SymmetricProductOperator(const mfem::Operator &opA, const mfem::Operator &opB) - : mfem::Operator(opA.Height(), opB.Width()), A(opA), B(opB), z(opB.Height()) - { - } - - void Mult(const mfem::Vector &x, mfem::Vector &y) const override - { - B.Mult(x, z); - A.Mult(z, y); - } - - void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override - { - A.Mult(x, z); - B.Mult(z, y); } }; @@ -256,16 +144,16 @@ TimeOperator::TimeOperator(const IoData &iodata, SpaceOperator &spaceop, std::function &djcoef) { // Construct discrete curl matrix for B-field time integration. - NegCurl = spaceop.GetNegCurlMatrix(); + Curl = spaceop.GetCurlMatrix(); // Allocate space for solution vectors. - E.SetSize(NegCurl->Width()); - dE.SetSize(NegCurl->Width()); - En.SetSize(NegCurl->Width()); - B.SetSize(NegCurl->Height()); + E.SetSize(Curl->Width()); + dE.SetSize(Curl->Width()); + En.SetSize(Curl->Width()); + B.SetSize(Curl->Height()); // Create ODE solver for 2nd-order IVP. - mfem::TimeDependentOperator::Type type; + mfem::TimeDependentOperator::Type type = mfem::TimeDependentOperator::EXPLICIT; switch (iodata.solver.transient.type) { case config::TransientSolverData::Type::GEN_ALPHA: @@ -291,50 +179,44 @@ TimeOperator::TimeOperator(const IoData &iodata, SpaceOperator &spaceop, break; case config::TransientSolverData::Type::INVALID: MFEM_ABORT("Invalid transient solver type!"); - type = mfem::TimeDependentOperator::EXPLICIT; // For compiler warning break; } // Set up time-dependent operator for 2nd-order curl-curl equation for E. - op = std::make_unique(iodata, spaceop, djcoef, 0.0, type); -} - -int TimeOperator::GetTotalKspMult() const -{ - const auto &curlcurl = dynamic_cast(*op); - return curlcurl.GetNumMult() + curlcurl.GetNumImplicitSolve(); + op = std::make_unique(iodata, spaceop, djcoef, 0.0, type); } -int TimeOperator::GetTotalKspIter() const +const KspSolver &TimeOperator::GetLinearSolver() const { - const auto &curlcurl = dynamic_cast(*op); - return curlcurl.GetNumMultIter() + curlcurl.GetNumImplicitSolveIter(); + const auto &curlcurl = dynamic_cast(*op); + MFEM_VERIFY(curlcurl.kspA, + "No linear solver for time-depdendent operator has been constructed!\n"); + return *curlcurl.kspA; } double TimeOperator::GetMaxTimeStep() const { - const auto &curlcurl = dynamic_cast(*op); - const mfem::Operator &M = curlcurl.GetM(); - const mfem::Operator &K = curlcurl.GetK(); + const auto &curlcurl = dynamic_cast(*op); + MPI_Comm comm = curlcurl.comm; + const Operator &M = *curlcurl.M; + const Operator &K = *curlcurl.K; // Solver for M⁻¹. constexpr double lin_tol = 1.0e-9; constexpr int max_lin_it = 500; - mfem::CGSolver pcg(curlcurl.GetComm()); + mfem::CGSolver pcg(comm); pcg.SetRelTol(lin_tol); pcg.SetMaxIter(max_lin_it); pcg.SetPrintLevel(0); pcg.SetOperator(M); - mfem::Vector diag(M.Height()); - M.AssembleDiagonal(diag); - mfem::OperatorJacobiSmoother prec(diag, curlcurl.GetDbcTDofList()); - pcg.SetPreconditioner(prec); + JacobiSmoother jac; + jac.SetOperator(M); + pcg.SetPreconditioner(jac); // Power iteration to estimate largest eigenvalue of undamped system matrix M⁻¹ K. - petsc::PetscShellMatrix MinvK(curlcurl.GetComm(), - std::make_unique(pcg, K)); - double lam = MinvK.Norm2(); + ProductOperator op(pcg, K); + double lam = linalg::SpectralNorm(comm, op, false); MFEM_VERIFY(lam > 0.0, "Error during power iteration, λ = " << lam << "!"); return 2.0 / std::sqrt(lam); } @@ -355,8 +237,8 @@ void TimeOperator::Step(double &t, double &dt) ode->Step(E, dE, t, dt); // Trapezoidal integration for B-field: dB/dt = -∇ x E. - En.Add(1.0, E); - NegCurl->AddMult(En, B, 0.5 * dt); + En += E; + Curl->AddMult(En, B, -0.5 * dt); } } // namespace palace diff --git a/palace/models/timeoperator.hpp b/palace/models/timeoperator.hpp index c96e99190..5373b3f2a 100644 --- a/palace/models/timeoperator.hpp +++ b/palace/models/timeoperator.hpp @@ -7,6 +7,9 @@ #include #include #include +#include "linalg/ksp.hpp" +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" namespace palace { @@ -21,7 +24,7 @@ class TimeOperator { private: // Solution vector storage. - mfem::Vector E, dE, En, B; + Vector E, dE, En, B; // Time integrator for the curl-curl E-field formulation. std::unique_ptr ode; @@ -30,24 +33,22 @@ class TimeOperator std::unique_ptr op; // Discrete curl for B-field time integration. - std::unique_ptr NegCurl; + std::unique_ptr Curl; public: TimeOperator(const IoData &iodata, SpaceOperator &spaceop, std::function &djcoef); // Access solution vectors for E- and B-fields. - const mfem::Vector &GetE() const { return E; } - const mfem::Vector &GetEdot() const { return dE; } - const mfem::Vector &GetB() const { return B; } + const Vector &GetE() const { return E; } + const Vector &GetEdot() const { return dE; } + const Vector &GetB() const { return B; } - // Is time integration scheme explicit or implicit. - bool isExplicit() const { return op->isExplicit(); } + // Return the linear solver associated with the implicit or explicit time integrator. + const KspSolver &GetLinearSolver() const; - // Return number of linear solves and linear solver iterations performed during time - // integration. - int GetTotalKspMult() const; - int GetTotalKspIter() const; + // Return if the time integration scheme explicit or implicit. + bool isExplicit() const { return op->isExplicit(); } // Estimate the maximum stable time step based on the maximum eigenvalue of the // undamped system matrix M⁻¹ K. @@ -56,7 +57,7 @@ class TimeOperator // Initialize time integrators and set 0 initial conditions. void Init(); - // Perform time step from t => t + dt. + // Perform time step from t -> t + dt. void Step(double &t, double &dt); }; diff --git a/palace/models/waveportoperator.cpp b/palace/models/waveportoperator.cpp index ed5dc6963..9b128da6a 100644 --- a/palace/models/waveportoperator.cpp +++ b/palace/models/waveportoperator.cpp @@ -3,12 +3,18 @@ #include "waveportoperator.hpp" -#include +#include +#include #include "fem/coefficient.hpp" #include "fem/integrator.hpp" #include "linalg/arpack.hpp" -#include "linalg/hypre.hpp" +#include "linalg/iterative.hpp" +#include "linalg/mumps.hpp" +#include "linalg/rap.hpp" #include "linalg/slepc.hpp" +#include "linalg/solver.hpp" +#include "linalg/strumpack.hpp" +#include "linalg/superlu.hpp" #include "models/materialoperator.hpp" #include "utils/communication.hpp" #include "utils/geodata.hpp" @@ -17,226 +23,326 @@ namespace palace { +using namespace std::complex_literals; + namespace { +void GetEssentialTrueDofs(mfem::ParGridFunction &E0t, mfem::ParGridFunction &E0n, + mfem::ParGridFunction &port_E0t, mfem::ParGridFunction &port_E0n, + mfem::ParTransferMap &port_nd_transfer, + mfem::ParTransferMap &port_h1_transfer, + const mfem::Array &dbc_marker, + mfem::Array &port_nd_dbc_tdof_list, + mfem::Array &port_h1_dbc_tdof_list) +{ + mfem::ParFiniteElementSpace &nd_fespace = *E0t.ParFESpace(); + mfem::ParFiniteElementSpace &h1_fespace = *E0n.ParFESpace(); + mfem::ParFiniteElementSpace &port_nd_fespace = *port_E0t.ParFESpace(); + mfem::ParFiniteElementSpace &port_h1_fespace = *port_E0n.ParFESpace(); + + mfem::Array nd_dbc_tdof_list, h1_dbc_tdof_list; + nd_fespace.GetEssentialTrueDofs(dbc_marker, nd_dbc_tdof_list); + h1_fespace.GetEssentialTrueDofs(dbc_marker, h1_dbc_tdof_list); + + Vector tE0t(nd_fespace.GetTrueVSize()), tE0n(h1_fespace.GetTrueVSize()); + tE0t = 0.0; + tE0n = 0.0; + linalg::SetSubVector(tE0t, nd_dbc_tdof_list, 1.0); + linalg::SetSubVector(tE0n, h1_dbc_tdof_list, 1.0); + E0t.SetFromTrueDofs(tE0t); + E0n.SetFromTrueDofs(tE0n); + port_nd_transfer.Transfer(E0t, port_E0t); + port_h1_transfer.Transfer(E0n, port_E0n); + + Vector port_tE0t(port_nd_fespace.GetTrueVSize()), + port_tE0n(port_h1_fespace.GetTrueVSize()); + port_E0t.ParallelProject(port_tE0t); + port_E0n.ParallelProject(port_tE0n); + for (int i = 0; i < port_tE0t.Size(); i++) + { + if (port_tE0t[i] != 0.0) + { + port_nd_dbc_tdof_list.Append(i); + } + } + for (int i = 0; i < port_tE0n.Size(); i++) + { + if (port_tE0n[i] != 0.0) + { + port_h1_dbc_tdof_list.Append(i); + } + } +} + constexpr int skip_zeros = 0; -inline mfem::HypreParMatrix GetBtt(const MaterialOperator &mat_op, - mfem::ParFiniteElementSpace &nd_fespace, - mfem::Array &attr_marker) +std::unique_ptr GetBtt(const MaterialOperator &mat_op, + mfem::ParFiniteElementSpace &nd_fespace) { // Mass matrix: Bₜₜ = (μ⁻¹ u, v). - MaterialPropertyCoefficient muinv_func(mat_op); - mfem::ParBilinearForm btt(&nd_fespace); - btt.AddBoundaryIntegrator(new mfem::MixedVectorMassIntegrator(muinv_func), attr_marker); - // btt.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - btt.Assemble(skip_zeros); - btt.Finalize(skip_zeros); - return *btt.ParallelAssemble(); + constexpr auto MatType = MaterialPropertyType::INV_PERMEABILITY; + constexpr auto ElemType = MeshElementType::BDR_SUBMESH; + MaterialPropertyCoefficient muinv_func(mat_op); + auto btt = std::make_unique(&nd_fespace); + btt->AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(muinv_func)); + btt->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + btt->Assemble(skip_zeros); + btt->Finalize(skip_zeros); + return std::make_unique(std::move(btt), nd_fespace); } -inline mfem::HypreParMatrix GetBtn(const MaterialOperator &mat_op, - mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace &h1_fespace, - mfem::Array &attr_marker) +std::unique_ptr GetBtn(const MaterialOperator &mat_op, + mfem::ParFiniteElementSpace &nd_fespace, + mfem::ParFiniteElementSpace &h1_fespace) { // Mass matrix: Bₜₙ = (μ⁻¹ ∇ₜ u, v). - mfem::ParMixedBilinearForm btn(&h1_fespace, &nd_fespace); - MaterialPropertyCoefficient muinv_func(mat_op); - btn.AddBoundaryIntegrator(new mfem::MixedVectorGradientIntegrator(muinv_func), - attr_marker); - // btn.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - btn.Assemble(skip_zeros); - btn.Finalize(skip_zeros); - return *btn.ParallelAssemble(); + constexpr auto MatType = MaterialPropertyType::INV_PERMEABILITY; + constexpr auto ElemType = MeshElementType::BDR_SUBMESH; + MaterialPropertyCoefficient muinv_func(mat_op); + auto btn = std::make_unique(&h1_fespace, &nd_fespace); + btn->AddDomainIntegrator(new mfem::MixedVectorGradientIntegrator(muinv_func)); + btn->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + btn->Assemble(skip_zeros); + btn->Finalize(skip_zeros); + return std::make_unique(std::move(btn), h1_fespace, nd_fespace, false); } -struct Bnn -{ - mfem::HypreParMatrix Bnn1; - mfem::HypreParMatrix Bnn2r; - std::optional Bnn2i; -}; - -inline Bnn GetBnn(const MaterialOperator &mat_op, mfem::ParFiniteElementSpace &h1_fespace, - mfem::Array &attr_marker) +std::array, 3> GetBnn(const MaterialOperator &mat_op, + mfem::ParFiniteElementSpace &h1_fespace) { // Mass matrix: Bₙₙ = (μ⁻¹ ∇ₜ u, ∇ₜ v) - ω² (ε u, v) = Bₙₙ₁ - ω² Bₙₙ₂. - MaterialPropertyCoefficient muinv_func(mat_op); - mfem::ParBilinearForm bnn1(&h1_fespace); - bnn1.AddBoundaryIntegrator(new mfem::MixedGradGradIntegrator(muinv_func), attr_marker); - // bnn1.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - bnn1.Assemble(skip_zeros); - bnn1.Finalize(skip_zeros); - + constexpr auto MatTypeMuInv = MaterialPropertyType::INV_PERMEABILITY; + constexpr auto ElemType = MeshElementType::BDR_SUBMESH; + MaterialPropertyCoefficient muinv_func(mat_op); + auto bnn1 = std::make_unique(&h1_fespace); + bnn1->AddDomainIntegrator(new mfem::MixedGradGradIntegrator(muinv_func)); + bnn1->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + bnn1->Assemble(skip_zeros); + bnn1->Finalize(skip_zeros); + + constexpr auto MatTypeEpsReal = MaterialPropertyType::PERMITTIVITY_REAL; NormalProjectedCoefficient epsilon_func( - std::make_unique< - MaterialPropertyCoefficient>(mat_op)); - mfem::ParBilinearForm bnn2r(&h1_fespace); - bnn2r.AddBoundaryIntegrator(new mfem::MixedScalarMassIntegrator(epsilon_func), - attr_marker); - // bnn2r.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - bnn2r.Assemble(skip_zeros); - bnn2r.Finalize(skip_zeros); - - // Contribution for loss tangent: ε => ε * (1 - i tan(δ)). - if (mat_op.HasLossTangent()) - { - NormalProjectedCoefficient negepstandelta_func( - std::make_unique< - MaterialPropertyCoefficient>(mat_op)); - mfem::ParBilinearForm bnn2i(&h1_fespace); - bnn2i.AddBoundaryIntegrator(new mfem::MixedScalarMassIntegrator(negepstandelta_func), - attr_marker); - // bnn2i.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - bnn2i.Assemble(skip_zeros); - bnn2i.Finalize(skip_zeros); - return {*bnn1.ParallelAssemble(), *bnn2r.ParallelAssemble(), *bnn2i.ParallelAssemble()}; - } - else + std::make_unique>(mat_op)); + auto bnn2r = std::make_unique(&h1_fespace); + bnn2r->AddDomainIntegrator(new mfem::MixedScalarMassIntegrator(epsilon_func)); + bnn2r->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + bnn2r->Assemble(skip_zeros); + bnn2r->Finalize(skip_zeros); + + // Contribution for loss tangent: ε -> ε * (1 - i tan(δ)). + if (!mat_op.HasLossTangent()) { - return {*bnn1.ParallelAssemble(), *bnn2r.ParallelAssemble()}; + return {std::make_unique(std::move(bnn1), h1_fespace), + std::make_unique(std::move(bnn2r), h1_fespace), nullptr}; } + constexpr auto MatTypeEpsImag = MaterialPropertyType::PERMITTIVITY_IMAG; + NormalProjectedCoefficient negepstandelta_func( + std::make_unique>(mat_op)); + auto bnn2i = std::make_unique(&h1_fespace); + bnn2i->AddDomainIntegrator(new mfem::MixedScalarMassIntegrator(negepstandelta_func)); + bnn2i->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + bnn2i->Assemble(skip_zeros); + bnn2i->Finalize(skip_zeros); + return {std::make_unique(std::move(bnn1), h1_fespace), + std::make_unique(std::move(bnn2r), h1_fespace), + std::make_unique(std::move(bnn2i), h1_fespace)}; } -struct Att -{ - mfem::HypreParMatrix Att1; - mfem::HypreParMatrix Att2r; - std::optional Att2i; -}; - -inline Att GetAtt(const MaterialOperator &mat_op, mfem::ParFiniteElementSpace &nd_fespace, - mfem::Array &attr_marker) +std::array, 3> GetAtt(const MaterialOperator &mat_op, + mfem::ParFiniteElementSpace &nd_fespace) { // Stiffness matrix: Aₜₜ = (μ⁻¹ ∇ₜ x u, ∇ₜ x v) - ω² (ε u, v) = Aₜₜ₁ - ω² Aₜₜ₂. + constexpr auto MatTypeMuInv = MaterialPropertyType::INV_PERMEABILITY; + constexpr auto ElemType = MeshElementType::BDR_SUBMESH; NormalProjectedCoefficient muinv_func( - std::make_unique>( - mat_op)); - mfem::ParBilinearForm att1(&nd_fespace); - att1.AddBoundaryIntegrator(new mfem::CurlCurlIntegrator(muinv_func), attr_marker); - // att1.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - att1.Assemble(skip_zeros); - att1.Finalize(skip_zeros); - - MaterialPropertyCoefficient epsilon_func(mat_op); - mfem::ParBilinearForm att2r(&nd_fespace); - att2r.AddBoundaryIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func), - attr_marker); - // att2r.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - att2r.Assemble(skip_zeros); - att2r.Finalize(skip_zeros); - - // Contribution for loss tangent: ε => ε * (1 - i tan(δ)). + std::make_unique>(mat_op)); + auto att1 = std::make_unique(&nd_fespace); + att1->AddDomainIntegrator(new mfem::CurlCurlIntegrator(muinv_func)); + att1->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + att1->Assemble(skip_zeros); + att1->Finalize(skip_zeros); + + constexpr auto MatTypeEpsReal = MaterialPropertyType::PERMITTIVITY_REAL; + MaterialPropertyCoefficient epsilon_func(mat_op); + auto att2r = std::make_unique(&nd_fespace); + att2r->AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(epsilon_func)); + att2r->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + att2r->Assemble(skip_zeros); + att2r->Finalize(skip_zeros); + + // Contribution for loss tangent: ε -> ε * (1 - i tan(δ)). if (!mat_op.HasLossTangent()) { - return {*att1.ParallelAssemble(), *att2r.ParallelAssemble()}; - } - MaterialPropertyCoefficient negepstandelta_func( - mat_op); - mfem::ParBilinearForm att2i(&nd_fespace); - att2i.AddBoundaryIntegrator(new mfem::MixedVectorMassIntegrator(negepstandelta_func), - attr_marker); - // att2i.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - att2i.Assemble(skip_zeros); - att2i.Finalize(skip_zeros); - return {*att1.ParallelAssemble(), *att2r.ParallelAssemble(), *att2i.ParallelAssemble()}; + return {std::make_unique(std::move(att1), nd_fespace), + std::make_unique(std::move(att2r), nd_fespace), nullptr}; + } + constexpr auto MatTypeEpsImag = MaterialPropertyType::PERMITTIVITY_IMAG; + MaterialPropertyCoefficient negepstandelta_func(mat_op); + auto att2i = std::make_unique(&nd_fespace); + att2i->AddDomainIntegrator(new mfem::MixedVectorMassIntegrator(negepstandelta_func)); + att2i->SetAssemblyLevel(mfem::AssemblyLevel::LEGACY); + att2i->Assemble(skip_zeros); + att2i->Finalize(skip_zeros); + return {std::make_unique(std::move(att1), nd_fespace), + std::make_unique(std::move(att2r), nd_fespace), + std::make_unique(std::move(att2i), nd_fespace)}; } -inline mfem::HypreParMatrix GetZ(mfem::ParFiniteElementSpace &fespace) +std::array, 6> +GetSystemMatrices(std::unique_ptr Btt, std::unique_ptr Btn, + std::unique_ptr Bnn1, std::unique_ptr Bnn2r, + std::unique_ptr Bnn2i, std::unique_ptr Att1, + std::unique_ptr Att2r, std::unique_ptr Att2i, + const mfem::Array &nd_dbc_tdof_list, + const mfem::Array &h1_dbc_tdof_list) { - // Zero matrix on ND or H1 space dofs. - mfem::ParBilinearForm z(&fespace); - // z.SetAssemblyLevel(mfem::AssemblyLevel::FULL); - z.Assemble(skip_zeros); - z.Finalize(skip_zeros); - return *z.ParallelAssemble(); -} + // Construct the 2x2 block matrices for the eigenvalue problem A e = λ B e. We pre-compute + // the matrices such that: + // A = A₁ - ω² A₂, B = A₁ - ω² A₂ + 1/Θ² B₃ - ω²/Θ² B₄. + std::unique_ptr BtnT(Btn->ParallelAssemble().Transpose()); -struct SystemMatrices -{ - petsc::PetscParMatrix A1; - petsc::PetscParMatrix A2; - petsc::PetscParMatrix B3; - petsc::PetscParMatrix B4; -}; + mfem::Array2D blocks(2, 2); + blocks(0, 0) = &Btt->ParallelAssemble(); + blocks(0, 1) = &Btn->ParallelAssemble(); + blocks(1, 0) = BtnT.get(); + blocks(1, 1) = &Bnn1->ParallelAssemble(); + std::unique_ptr A1(mfem::HypreParMatrixFromBlocks(blocks)); -SystemMatrices -GetSystemMatrices(const mfem::HypreParMatrix &Att1, const mfem::HypreParMatrix &Att2r, - const std::optional &Att2i, - const mfem::HypreParMatrix &Btt, const mfem::HypreParMatrix &Btn, - const mfem::HypreParMatrix &Bnn1, const mfem::HypreParMatrix &Bnn2r, - const std::optional &Bnn2i, - const mfem::HypreParMatrix &Ztt, const mfem::HypreParMatrix &Znn, - const mfem::Array &nd_tdof_list, - const mfem::Array &h1_tdof_list, int nd_tdof_offset) -{ - // Construct the 2x2 block matrices for the eigenvalue problem. We pre- compute the - // eigenvalue problem matrices such that: - // A = A₁ - ω² A₂, B = A + 1/Θ² B₃ - ω²/Θ² B₄. - mfem::Array2D blocks(2, 2); - blocks(0, 0) = &Btt; - blocks(0, 1) = &Btn; - blocks(1, 0) = Btn.Transpose(); - blocks(1, 1) = &Bnn1; - std::unique_ptr hA1s(mfem::HypreParMatrixFromBlocks(blocks)); - auto A1s = petsc::PetscAijMatrix(*hA1s); + auto &Ztt = Btt->ParallelAssemble(); + Ztt *= 0.0; blocks = nullptr; blocks(0, 0) = &Ztt; - blocks(1, 1) = &Bnn2r; - std::unique_ptr hA2r(mfem::HypreParMatrixFromBlocks(blocks)); - auto A2s = [&]() + blocks(1, 1) = &Bnn2r->ParallelAssemble(); + std::unique_ptr A2r(mfem::HypreParMatrixFromBlocks(blocks)); + + std::unique_ptr A2i; + if (Bnn2i) { - if (!Bnn2i) - { - return petsc::PetscAijMatrix(*hA2r); - } - blocks(1, 1) = &*Bnn2i; - std::unique_ptr hA2i(mfem::HypreParMatrixFromBlocks(blocks)); - return petsc::PetscAijMatrix(*hA2r, *hA2i); - }(); + blocks(1, 1) = &Bnn2i->ParallelAssemble(); + A2i.reset(mfem::HypreParMatrixFromBlocks(blocks)); + } + + auto &Znn = Bnn1->ParallelAssemble(); + Znn *= 0.0; blocks = nullptr; - blocks(0, 0) = &Att1; + blocks(0, 0) = &Att1->ParallelAssemble(); blocks(1, 1) = &Znn; - std::unique_ptr hB3s(mfem::HypreParMatrixFromBlocks(blocks)); - auto B3s = petsc::PetscAijMatrix(*hB3s); + std::unique_ptr B3(mfem::HypreParMatrixFromBlocks(blocks)); - blocks = nullptr; - blocks(0, 0) = &Att2r; + blocks(0, 0) = &Att2r->ParallelAssemble(); blocks(1, 1) = &Znn; - std::unique_ptr hB4r(mfem::HypreParMatrixFromBlocks(blocks)); - auto B4s = [&]() + std::unique_ptr B4r(mfem::HypreParMatrixFromBlocks(blocks)); + + std::unique_ptr B4i; + if (Att2i) { - if (!Att2i) - { - return petsc::PetscAijMatrix(*hB4r); - } - blocks(0, 0) = &*Att2i; - std::unique_ptr hB4i(mfem::HypreParMatrixFromBlocks(blocks)); - return petsc::PetscAijMatrix(*hB4r, *hB4i); - }(); + blocks(0, 0) = &Att2i->ParallelAssemble(); + B4i.reset(mfem::HypreParMatrixFromBlocks(blocks)); + } - // Consolidate list of local ND and H1 tdofs before extracting the respective submatrices. - // The matrix is still distributed over the same number of processors, though some are - // empty (PETSc handles this). - mfem::Array tdof_list; - tdof_list.Reserve(nd_tdof_list.Size() + h1_tdof_list.Size()); - for (auto tdof : nd_tdof_list) + // Eliminate boundary true dofs not associated with this wave port or constrained by + // Dirichlet BCs. It is not guaranteed that any HypreParMatrix has a full diagonal in its + // sparsity pattern, so we add a zero diagonal before elimination to guarantee this for A1 + // and B3. + mfem::Array dbc_tdof_list; + int nd_tdof_offset = Btt->Height(); + dbc_tdof_list.Reserve(nd_dbc_tdof_list.Size() + h1_dbc_tdof_list.Size()); + for (auto tdof : nd_dbc_tdof_list) + { + dbc_tdof_list.Append(tdof); + } + for (auto tdof : h1_dbc_tdof_list) { - tdof_list.Append(tdof); + dbc_tdof_list.Append(tdof + nd_tdof_offset); } - for (auto tdof : h1_tdof_list) + + mfem::Vector d(B3->Height()); + d = 0.0; + mfem::SparseMatrix diag(d); + mfem::HypreParMatrix Diag(B3->GetComm(), B3->GetGlobalNumRows(), B3->GetRowStarts(), + &diag); + A1.reset(mfem::Add(1.0, *A1, 1.0, Diag)); + B3.reset(mfem::Add(1.0, *B3, 1.0, Diag)); + + A1->EliminateBC(dbc_tdof_list, Operator::DIAG_ZERO); + A2r->EliminateBC(dbc_tdof_list, Operator::DIAG_ZERO); + if (A2i) { - tdof_list.Append(tdof + nd_tdof_offset); + A2i->EliminateBC(dbc_tdof_list, Operator::DIAG_ZERO); } - return {*A1s.GetSubMatrix(tdof_list, tdof_list), *A2s.GetSubMatrix(tdof_list, tdof_list), - *B3s.GetSubMatrix(tdof_list, tdof_list), *B4s.GetSubMatrix(tdof_list, tdof_list)}; + B3->EliminateBC(dbc_tdof_list, Operator::DIAG_ONE); + B4r->EliminateBC(dbc_tdof_list, Operator::DIAG_ZERO); + if (B4i) + { + B4i->EliminateBC(dbc_tdof_list, Operator::DIAG_ZERO); + } + + return {std::move(A1), std::move(A2r), std::move(A2i), + std::move(B3), std::move(B4r), std::move(B4i)}; } -} // namespace +void GetInitialSpace(mfem::ParFiniteElementSpace &nd_fespace, + mfem::ParFiniteElementSpace &h1_fespace, + const mfem::Array &nd_dbc_tdof_list, + const mfem::Array &h1_dbc_tdof_list, ComplexVector &v) +{ + // Initial space chosen as such that B v₀ = y₀, with y₀ = [y₀ₜ, 0, ... 0]ᵀ ⟂ null(A) + // (with Aₜₜ nonsingular). See Lee, Sun, and Cendes, 1991 for reference. + // Note: When the eigenvalue solver uses a standard ℓ²-inner product instead of B-inner + // product (since we use a general non-Hermitian solver due to complex symmetric B), then + // we just use v0 = y0 directly. + v.SetSize(nd_fespace.GetTrueVSize() + h1_fespace.GetTrueVSize()); + // linalg::SetRandomReal(nd_fespace.GetComm(), v); + v = std::complex(1.0, 0.0); + linalg::SetSubVector(v, nd_dbc_tdof_list, 0.0); + for (int i = nd_fespace.GetTrueVSize(); + i < nd_fespace.GetTrueVSize() + h1_fespace.GetTrueVSize(); i++) + { + v.Real()[i] = v.Imag()[i] = 0.0; + } +} + +void NormalizeWithSign(const mfem::ParGridFunction &S0t, mfem::ParComplexGridFunction &E0t, + mfem::ParComplexGridFunction &E0n, mfem::LinearForm &sr, + mfem::LinearForm &si) +{ + // Normalize grid functions to a chosen polarization direction and unit power, |E x H⋆| ⋅ + // n, integrated over the port surface (+n is the direction of propagation). The n x H + // coefficients are updated implicitly as the only store references to the Et, En grid + // functions as well as kₙ, ω. We choose a (rather arbitrary) sign constraint to at least + // make results for the same port consistent between frequencies/meshes. + sr = 0.0; + si = 0.0; + sr.Assemble(); + si.Assemble(); + + // |E x H⋆| ⋅ n = |E ⋅ (-n x H⋆)| + double sign = sr * S0t; + std::complex dot(-(sr * E0t.real()) - (si * E0t.imag()), + -(sr * E0t.imag()) + (si * E0t.real())); + std::array data = {sign, dot.real(), dot.imag()}; + Mpi::GlobalSum(3, data.data(), S0t.ParFESpace()->GetComm()); + sign = (data[0] < 0.0) ? -1.0 : 1.0; + dot = {data[1], data[2]}; + + double scale = sign / std::sqrt(std::abs(dot)); + E0t.real() *= scale; // Updates the n x H coefficients depending on Et, En too + E0t.imag() *= scale; + E0n.real() *= scale; + E0n.imag() *= scale; + sr *= scale; // Update linear forms for postprocessing + si *= scale; + + // This parallel communication is not required since wave port boundaries are true + // one-sided boundaries. + // port_E0t->real().ExchangeFaceNbrData(); // Ready for parallel comm on shared faces + // port_E0t->imag().ExchangeFaceNbrData(); // for n x H coefficients evaluation + // port_E0n->real().ExchangeFaceNbrData(); + // port_E0n->imag().ExchangeFaceNbrData(); +} // Computes boundary modal n x H, where +n is the direction of wave propagation: n x H = // -1/(iωμ) (ikₙ Eₜ + ∇ₜ Eₙ), using the tangential and normal electric field component grid @@ -244,74 +350,121 @@ GetSystemMatrices(const mfem::HypreParMatrix &Att1, const mfem::HypreParMatrix & // grid function is to be dotted with a function E which is only in the tangential // component, so the fact that we use the full ∇ Eₙ in the element is fine. We use only the // real part of kn. -class BdrHVectorCoefficient : public mfem::VectorCoefficient +template +class BdrSubmeshHVectorCoefficient : public mfem::VectorCoefficient { private: - const mfem::ParComplexGridFunction &gridfunc_t, &gridfunc_n; + const mfem::ParComplexGridFunction &Et, &En; const MaterialOperator &mat_op; - const bool imaginary; + + mfem::ParSubMesh &submesh; + const mfem::ParMesh &parent; + std::unordered_map submesh_elem_ids; + std::complex kn; double omega; + mfem::ParSubMesh &GetSubMesh(mfem::ParMesh &mesh) + { + MFEM_ASSERT( + mfem::ParSubMesh::IsParSubMesh(&mesh), + "BdrSubmeshHVectorCoefficient requires the input grid function coefficients " + "to be defined on a SubMesh!"); + mfem::ParSubMesh &submesh = *static_cast(&mesh); + MFEM_ASSERT(submesh.GetFrom() == mfem::SubMesh::From::Boundary, + "BdrSubmeshHVectorCoefficient requires a SubMesh created using " + "CreateFromBoundary!"); + return submesh; + } + public: - BdrHVectorCoefficient(const mfem::ParComplexGridFunction &Et, - const mfem::ParComplexGridFunction &En, const MaterialOperator &op, - bool imag) - : mfem::VectorCoefficient(Et.ParFESpace()->GetParMesh()->SpaceDimension()), - gridfunc_t(Et), gridfunc_n(En), mat_op(op), imaginary(imag), kn(0.0), omega(0.0) + BdrSubmeshHVectorCoefficient(const mfem::ParComplexGridFunction &Et, + const mfem::ParComplexGridFunction &En, + const MaterialOperator &mat_op) + : mfem::VectorCoefficient(Et.ParFESpace()->GetParMesh()->SpaceDimension()), Et(Et), + En(En), mat_op(mat_op), submesh(GetSubMesh(*Et.ParFESpace()->GetParMesh())), + parent(*submesh.GetParent()), kn(0.0), omega(0.0) { + // Construct mapping from parent (boundary) element indices to submesh (domain) + // elements. + const mfem::Array &parent_element_ids = submesh.GetParentElementIDMap(); + for (int i = 0; i < parent_element_ids.Size(); i++) + { + submesh_elem_ids[parent_element_ids[i]] = i; + } } void Eval(mfem::Vector &V, mfem::ElementTransformation &T, const mfem::IntegrationPoint &ip) override { - MFEM_VERIFY(T.ElementType == mfem::ElementTransformation::BDR_ELEMENT, - "Unexpected element type in BdrHVectorCoefficient!"); - MFEM_VERIFY(gridfunc_t.ParFESpace()->GetParMesh() == T.mesh && - gridfunc_n.ParFESpace()->GetParMesh() == T.mesh, - "Invalid mesh for BdrHVectorCoefficient!"); - - // This coefficient is only to be used on true exterior boundaries. - int i, o; - int iel1, iel2, info1, info2; - const mfem::Mesh &mesh = *T.mesh; - mesh.GetBdrElementFace(T.ElementNo, &i, &o); - mesh.GetFaceElements(i, &iel1, &iel2); - mesh.GetFaceInfos(i, &info1, &info2); - if (info2 >= 0) + mfem::ElementTransformation *submesh_T = nullptr; + int attr = 0; + if (T.mesh == &parent) + { + MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::BDR_ELEMENT, + "BdrSubmeshHVectorCoefficient requires ElementType::BDR_ELEMENT when not " + "used on a SubMesh!"); + auto it = submesh_elem_ids.find(T.ElementNo); + if (it == submesh_elem_ids.end()) + { + // Just return zero for a boundary face not in the submesh. + V.SetSize(vdim); + V = 0.0; + return; + } + else + { + submesh_T = submesh.GetElementTransformation(it->second); + } + + int i, o, iel1, iel2; + parent.GetBdrElementFace(T.ElementNo, &i, &o); + parent.GetFaceElements(i, &iel1, &iel2); + attr = parent.GetAttribute(iel1); + } + else if (T.mesh == &submesh) + { + MFEM_ASSERT(T.ElementType == mfem::ElementTransformation::ELEMENT, + "BdrSubmeshHVectorCoefficient requires ElementType::ELEMENT when used on " + "a SubMesh!"); + submesh_T = &T; + + int i, o, iel1, iel2; + parent.GetBdrElementFace(submesh.GetParentElementIDMap()[T.ElementNo], &i, &o); + parent.GetFaceElements(i, &iel1, &iel2); + attr = parent.GetAttribute(iel1); + } + else { - // Just return for an non-true boundary face. - V.SetSize(vdim); - V = 0.0; - return; + MFEM_ABORT("Invalid use of BdrSubmeshHVectorCoefficient on an unrecognized mesh!"); } // Compute Re/Im{-1/i (ikₙ Eₜ + ∇ₜ Eₙ)}. - T.SetIntPoint(&ip); - if (imaginary) + mfem::Vector U; + submesh_T->SetIntPoint(&ip); + if constexpr (RealPart) { - gridfunc_t.imag().GetVectorValue(T, ip, V); - V *= -kn.real(); + Et.real().GetVectorValue(*submesh_T, ip, U); + U *= -kn.real(); - mfem::Vector Vn; - gridfunc_n.real().GetGradient(T, Vn); - V += Vn; + mfem::Vector dU; + En.imag().GetGradient(*submesh_T, dU); + U -= dU; } else { - gridfunc_t.real().GetVectorValue(T, ip, V); - V *= -kn.real(); + Et.imag().GetVectorValue(*submesh_T, ip, U); + U *= -kn.real(); - mfem::Vector Vn; - gridfunc_n.imag().GetGradient(T, Vn); - V -= Vn; + mfem::Vector dU; + En.real().GetGradient(*submesh_T, dU); + U += dU; } // Scale by 1/(ωμ) with μ evaluated in the neighboring element. - mfem::Vector t(V.Size()); + V.SetSize(U.Size()); + mat_op.GetInvPermeability(attr).Mult(U, V); V *= (1.0 / omega); - mat_op.GetInvPermeability(mesh.GetAttribute(iel1)).Mult(V, t); - V = std::move(t); } void SetFrequency(double w, std::complex k) @@ -321,17 +474,52 @@ class BdrHVectorCoefficient : public mfem::VectorCoefficient } }; +} // namespace + WavePortData::WavePortData(const config::WavePortData &data, const MaterialOperator &mat_op, - const mfem::Array &dbc_marker, mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace &h1_fespace) + mfem::ParFiniteElementSpace &h1_fespace, + const mfem::Array &dbc_marker) { excitation = data.excitation; mode_idx = data.mode_idx; d_offset = data.d_offset; + + // Construct the SubMesh. MFEM_VERIFY(!data.attributes.empty(), "Wave port boundary found with no attributes!"); - mesh::AttrToMarker(nd_fespace.GetParMesh()->bdr_attributes.Max(), data.attributes, - attr_marker); + mfem::ParMesh &mesh = *nd_fespace.GetParMesh(); + attr_list.Reserve(data.attributes.size()); + for (auto attr : data.attributes) + { + attr_list.Append(attr); + } + mesh::AttrToMarker(nd_fespace.GetParMesh()->bdr_attributes.Max(), attr_list, attr_marker); + port_mesh = std::make_unique( + mfem::ParSubMesh::CreateFromBoundary(mesh, attr_list)); + + int p_nd = nd_fespace.GetMaxElementOrder(); + int p_h1 = h1_fespace.GetMaxElementOrder(); + port_nd_fec = std::make_unique(p_nd, mesh.Dimension() - 1); + port_h1_fec = std::make_unique(p_h1, mesh.Dimension() - 1); + port_nd_fespace = + std::make_unique(port_mesh.get(), port_nd_fec.get()); + port_h1_fespace = + std::make_unique(port_mesh.get(), port_h1_fec.get()); + + mfem::ParGridFunction E0t(&nd_fespace), E0n(&h1_fespace); + port_E0t = std::make_unique(port_nd_fespace.get()); + port_E0n = std::make_unique(port_h1_fespace.get()); + + port_nd_transfer = std::make_unique( + mfem::ParSubMesh::CreateTransferMap(E0t, port_E0t->real())); + port_h1_transfer = std::make_unique( + mfem::ParSubMesh::CreateTransferMap(E0n, port_E0n->real())); + + // Extract Dirichlet BC true dofs for the port FE spaces. + mfem::Array port_nd_dbc_tdof_list, port_h1_dbc_tdof_list; + GetEssentialTrueDofs(E0t, E0n, port_E0t->real(), port_E0n->real(), *port_nd_transfer, + *port_h1_transfer, dbc_marker, port_nd_dbc_tdof_list, + port_h1_dbc_tdof_list); // Construct operators for the generalized eigenvalue problem: // [Aₜₜ 0] [eₜ] = -kₙ² [Bₜₜ Bₜₙ] [eₜ] @@ -340,271 +528,263 @@ WavePortData::WavePortData(const config::WavePortData &data, const MaterialOpera // field by Eₜ = eₜ/kₙ and Eₙ = ieₙ. This is solved on the global mesh so the result is a // grid function over the entire space, not just the port boundary (so that it can be // queried from functions which use the global mesh). - GetTrueDofs(dbc_marker, nd_fespace, h1_fespace, nd_attr_tdof_list, h1_attr_tdof_list); - - // Construct the system matrices. We will actually solve the shifted problem: - // [Bₜₜ Bₜₙ] [eₜ] = λ [Bₜₜ + 1/Θ² Aₜₜ Bₜₙ] [eₜ] - // [Bₜₙᵀ Bₙₙ] [eₙ] [Bₜₙᵀ Bₙₙ] [eₙ] - // (see Lee, Sun, and Cendes, 1991). Here we have λ = Θ²/(Θ²-kₙ²), where Θ² bounds the - // maximum kₙ² and is taken as ω² μₘₐₓ εₘₐₓ over the entire simulation domain. - double cmin = mfem::infinity(); - for (auto attr : nd_fespace.GetParMesh()->attributes) - { - double s = mat_op.GetLightSpeedMin(attr); - if (s < cmin) - { - cmin = s; - } + // + // We will actually solve the shifted problem A e = λ B e, where: + // [Bₜₜ Bₜₙ] [eₜ] = λ [Bₜₜ + 1/Θ² Aₜₜ Bₜₙ] [eₜ] + // [Bₜₙᵀ Bₙₙ] [eₙ] [Bₜₙᵀ Bₙₙ] [eₙ] . + // Here we have λ = Θ²/(Θ²-kₙ²), where Θ² bounds the maximum kₙ² and is taken as Θ² = + // ω² μₘₐₓ εₘₐₓ over the entire simulation domain. + // Reference: Lee, Sun, and Cendes, Full-wave analysis of dielectric waveguides using + // tangential vector finite elements, IEEE Trans. Microwave Theory Tech. + // (1991). + double c_min = mfem::infinity(); + for (auto attr : mesh.attributes) + { + c_min = std::min(c_min, mat_op.GetLightSpeedMin(attr)); } - MFEM_VERIFY(cmin > 0.0, "Invalid material speed of light detected in WavePortOperator!"); - muepsmax = 1.0 / (cmin * cmin); + MFEM_VERIFY(c_min > 0.0 && c_min < mfem::infinity(), + "Invalid material speed of light detected in WavePortOperator!"); + mu_eps_max = 1.0 / (c_min * c_min); // Pre-compute problem matrices such that: - // A = A₁ - ω² A₂, B = A + 1/Θ² B₃ - ω²/Θ² B₄. - // First, create parallel objects and then gather to matrices and vectors to root. - { - const auto &Btt = GetBtt(mat_op, nd_fespace, attr_marker); - const auto &Btn = GetBtn(mat_op, nd_fespace, h1_fespace, attr_marker); - const auto &[Bnn1, Bnn2r, Bnn2i] = GetBnn(mat_op, h1_fespace, attr_marker); - const auto &[Att1, Att2r, Att2i] = GetAtt(mat_op, nd_fespace, attr_marker); - const auto &Ztt = GetZ(nd_fespace); - const auto &Znn = GetZ(h1_fespace); - auto system_mat = - GetSystemMatrices(Att1, Att2r, Att2i, Btt, Btn, Bnn1, Bnn2r, Bnn2i, Ztt, Znn, - nd_attr_tdof_list, h1_attr_tdof_list, nd_fespace.GetTrueVSize()); - A1 = std::make_unique(std::move(system_mat.A1)); - A2 = std::make_unique(std::move(system_mat.A2)); - B3 = std::make_unique(std::move(system_mat.B3)); - B4 = std::make_unique(std::move(system_mat.B4)); - } - - // Configure sequential vector and scatter from parallel. The original vector is created - // to be compatible with the parallel matrix, and the scatter creates a sequential vector - // compatible with the sequential matrix. Then, gather matrices so eigenvalue problem can - // be solved sequentially without communication. A1/A2/B3/B4 = nullptr if !root. - { - bool root = Mpi::Root(A1->GetComm()); - e = std::make_unique(*A1); - scatter = - std::make_unique(petsc::PetscScatter::Type::TO_ZERO, *e, e0); - A1 = A1->GetSequentialMatrix(root); - A2 = A2->GetSequentialMatrix(root); - B3 = B3->GetSequentialMatrix(root); - B4 = B4->GetSequentialMatrix(root); - } - if (A1) - { - // sparsity(A2) ⊆ sparsity(A1), sparsity(B4) ⊆ sparsity(B3) ⊆ sparsity(A) - A = std::make_unique(*A1); - B = std::make_unique(*A1); - A->SetSymmetric(); - B->SetSymmetric(); - A1->SetSymmetric(); - A2->SetSymmetric(); - B3->SetSymmetric(); - B4->SetSymmetric(); - } - - // Create vector for initial space (initially parallel, then scattered to root). - { - petsc::PetscParVector y(*e); - GetInitialSpace(nd_attr_tdof_list.Size(), h1_attr_tdof_list.Size(), y); - y0 = std::make_unique(*e0); - scatter->Forward(y, *y0); + // A = A₁ - ω² A₂, B = A₁ - 1 / (μₘ εₘ) B₄ - ω² A₂ + 1/Θ² B₃ . + { + std::unique_ptr A1, B4r, B4i; + { + auto Btt = GetBtt(mat_op, *port_nd_fespace); + auto Btn = GetBtn(mat_op, *port_nd_fespace, *port_h1_fespace); + auto [Bnn1, Bnn2r, Bnn2i] = GetBnn(mat_op, *port_h1_fespace); + auto [Att1, Att2r, Att2i] = GetAtt(mat_op, *port_nd_fespace); + + auto system_mats = GetSystemMatrices( + std::move(Btt), std::move(Btn), std::move(Bnn1), std::move(Bnn2r), + std::move(Bnn2i), std::move(Att1), std::move(Att2r), std::move(Att2i), + port_nd_dbc_tdof_list, port_h1_dbc_tdof_list); + A1 = std::move(system_mats[0]); + A2r = std::move(system_mats[1]); + A2i = std::move(system_mats[2]); + B3 = std::move(system_mats[3]); + B4r = std::move(system_mats[4]); + B4i = std::move(system_mats[5]); + } + + // Allocate storage for the eigenvalue problem operators. We have sparsity(A2) = + // sparsity(B3) = sparsity(B4) ⊆ sparsity(A1). Precompute the frequency independent + // contributions to A and B. + P = std::make_unique( + std::make_unique(*A1), nullptr); + if (A2i) + { + A = std::make_unique( + std::make_unique(*A1), + std::make_unique(*A2i)); + B = std::make_unique( + std::make_unique(*A1), + std::make_unique(*A2i)); + + auto &Br = *static_cast(B->Real()); + Br.Add(-1.0 / mu_eps_max, *B4r); + + auto &Ai = *static_cast(A->Imag()); + auto &Bi = *static_cast(B->Imag()); + Ai *= 0.0; + Bi *= 0.0; + Bi.Add(-1.0 / mu_eps_max, *B4i); + } + else + { + A = std::make_unique( + std::make_unique(*A1), nullptr); + B = std::make_unique( + std::make_unique(*A1), nullptr); + + auto &Br = *static_cast(B->Real()); + Br.Add(-1.0 / mu_eps_max, *B4r); + } } - // Coefficients store references to kₙ, ω so they are updated implicitly at each new - // solve. Also, μ⁻¹ is persistent, so no copy is OK. - kn0 = 0.0; - omega0 = 0.0; - E0t = std::make_unique(&nd_fespace); - E0n = std::make_unique(&h1_fespace); - nxH0r_func = std::make_unique(*E0t, *E0n, mat_op, false); - nxH0i_func = std::make_unique(*E0t, *E0n, mat_op, true); + // Create vector for initial space for eigenvalue solves (for nullspace of [Aₜₜ 0] + // [0 0] ). + GetInitialSpace(*port_nd_fespace, *port_h1_fespace, port_nd_dbc_tdof_list, + port_h1_dbc_tdof_list, v0); + e0.SetSize(v0.Size()); + e0t.SetSize(port_nd_fespace->GetTrueVSize()); + e0n.SetSize(port_h1_fespace->GetTrueVSize()); + + // Configure a communicator for the processes which have elements for this port. + MPI_Comm comm = nd_fespace.GetComm(); + int color = (port_nd_fespace->GetVSize() > 0 || port_h1_fespace->GetVSize() > 0) + ? 0 + : MPI_UNDEFINED; + MPI_Comm_split(comm, color, Mpi::Rank(comm), &port_comm); + MFEM_VERIFY((color == 0 && port_comm != MPI_COMM_NULL) || + (color == MPI_UNDEFINED && port_comm == MPI_COMM_NULL), + "Unexpected error splitting communicator for wave port boundaries!"); + port_root = (color == MPI_UNDEFINED) ? Mpi::Size(comm) : Mpi::Rank(comm); + Mpi::GlobalMin(1, &port_root, comm); + MFEM_VERIFY(port_root < Mpi::Size(comm), "No root process found for port!"); // Configure the eigenvalue problem solver. As for the full 3D case, the system matrices // are in general complex and symmetric. We supply the operators to the solver in - // shift-inverted form and handle the back- transformation externally. - if (A) + // shift-inverted form and handle the back-transformation externally. + if (port_comm != MPI_COMM_NULL) { // Define the linear solver to be used for solving systems associated with the - // generalized eigenvalue problem. We use PETSc's sequential sparse solvers. - int print = 0; - ksp = std::make_unique(A->GetComm(), print, "port_"); - ksp->SetType(KspSolver::Type::CHOLESKY); // Symmetric indefinite factorization - ksp->SetOperator(*B); - - // Define the eigenvalue solver. - config::EigenSolverData::Type type = config::EigenSolverData::Type::DEFAULT; -#if defined(PALACE_WITH_ARPACK) && defined(PALACE_WITH_SLEPC) - if (type == config::EigenSolverData::Type::DEFAULT) + // generalized eigenvalue problem. + constexpr int ksp_print = 0; + constexpr double ksp_tol = 1.0e-8; + constexpr double ksp_max_it = 30; + auto gmres = std::make_unique>(port_comm, ksp_print); + gmres->SetInitialGuess(false); + gmres->SetRelTol(ksp_tol); + gmres->SetMaxIter(ksp_max_it); + gmres->SetRestartDim(ksp_max_it); + // gmres->SetPrecSide(GmresSolver::PrecSide::RIGHT); + + config::LinearSolverData::Type pc_type; +#if defined(MFEM_USE_SUPERLU) + pc_type = config::LinearSolverData::Type::SUPERLU; +#elif defined(MFEM_USE_STRUMPACK) + pc_type = config::LinearSolverData::Type::STRUMPACK; +#elif defined(MFEM_USE_MUMPS) + pc_type = config::LinearSolverData::Type::MUMPS; +#else +#error "Wave port solver requires building with SuperLU_DIST, STRUMPACK, or MUMPS!" +#endif + std::unique_ptr> pc; + if (pc_type == config::LinearSolverData::Type::SUPERLU) { - type = config::EigenSolverData::Type::SLEPC; +#if defined(MFEM_USE_SUPERLU) + auto slu = std::make_unique( + port_comm, config::LinearSolverData::SymFactType::DEFAULT, false, ksp_print - 1); + // slu->GetSolver().SetColumnPermutation(mfem::superlu::NATURAL); + pc = std::make_unique>(std::move(slu)); +#endif } -#elif defined(PALACE_WITH_ARPACK) - if (type == config::EigenSolverData::Type::SLEPC) + else if (pc_type == config::LinearSolverData::Type::STRUMPACK) { - Mpi::Warning("SLEPc eigensolver not available, using ARPACK!\n"); +#if defined(MFEM_USE_STRUMPACK) + auto strumpack = std::make_unique( + port_comm, config::LinearSolverData::SymFactType::DEFAULT, + config::LinearSolverData::CompressionType::NONE, 0.0, 0, 0, ksp_print - 1); + // strumpack->SetReorderingStrategy(strumpack::ReorderingStrategy::NATURAL); + pc = std::make_unique>(std::move(strumpack)); +#endif } - type = config::EigenSolverData::Type::ARPACK; -#elif defined(PALACE_WITH_SLEPC) - if (type == config::EigenSolverData::Type::ARPACK) + else // config::LinearSolverData::Type::MUMPS { - Mpi::Warning("ARPACK eigensolver not available, using SLEPc!\n"); +#if defined(MFEM_USE_MUMPS) + auto mumps = std::make_unique( + port_comm, mfem::MUMPSSolver::SYMMETRIC_INDEFINITE, + config::LinearSolverData::SymFactType::DEFAULT, 0.0, ksp_print - 1); + // mumps->SetReorderingStrategy(mfem::MUMPSSolver::AMD); + pc = std::make_unique>(std::move(mumps)); +#endif } + ksp = std::make_unique(std::move(gmres), std::move(pc)); + + // Define the eigenvalue solver. + constexpr int print = 0; + config::EigenSolverData::Type type; +#if defined(PALACE_WITH_SLEPC) type = config::EigenSolverData::Type::SLEPC; +#elif defined(PALACE_WITH_ARPACK) + type = config::EigenSolverData::Type::ARPACK; #else #error "Wave port solver requires building with ARPACK or SLEPc!" #endif if (type == config::EigenSolverData::Type::ARPACK) { #if defined(PALACE_WITH_ARPACK) - eigen = std::unique_ptr(new arpack::ArpackEPSSolver(print)); + eigen = std::make_unique(port_comm, print); #endif } else // config::EigenSolverData::Type::SLEPC { #if defined(PALACE_WITH_SLEPC) - eigen = - std::unique_ptr(new slepc::SlepcEPSSolver(A->GetComm(), print)); - auto *slepc = dynamic_cast(eigen.get()); - slepc->SetProblemType(slepc::SlepcEigenSolver::ProblemType::GEN_NON_HERMITIAN); - slepc->SetType(slepc::SlepcEigenSolver::Type::KRYLOVSCHUR); + auto slepc = std::make_unique(port_comm, print); + slepc->SetType(slepc::SlepcEigenvalueSolver::Type::KRYLOVSCHUR); + slepc->SetProblemType(slepc::SlepcEigenvalueSolver::ProblemType::GEN_NON_HERMITIAN); + eigen = std::move(slepc); #endif } constexpr double tol = 1.0e-6; - eigen->SetLinearSolver(*ksp); - eigen->SetWhichEigenpairs(EigenSolverBase::WhichType::LARGEST_MAGNITUDE); eigen->SetNumModes(mode_idx, std::max(2 * mode_idx + 1, 5)); eigen->SetTol(tol); + eigen->SetWhichEigenpairs(EigenvalueSolver::WhichType::LARGEST_MAGNITUDE); + eigen->SetLinearSolver(*ksp); } -} -void WavePortData::GetTrueDofs(const mfem::Array &dbc_marker, - mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace &h1_fespace, - mfem::Array &nd_tdof_list, - mfem::Array &h1_tdof_list) -{ - // Ensures no duplicates in the attribute list for this port index (this would imply a - // mistake in the configuration file). We can, however, have multiple unique ports with - // shared boundary attributes. - nd_fespace.GetEssentialTrueDofs(attr_marker, nd_tdof_list); - h1_fespace.GetEssentialTrueDofs(attr_marker, h1_tdof_list); - int nd_tdofs = nd_tdof_list.Size(); - int h1_tdofs = h1_tdof_list.Size(); - - // Mark all ND and H1 dofs on the port, then unmark PEC boundaries. - mfem::Array nd_tdof_marker(nd_fespace.GetTrueVSize()), - h1_tdof_marker(h1_fespace.GetTrueVSize()), nd_dbc_tdof_list, h1_dbc_tdof_list; - nd_tdof_marker = 0; - h1_tdof_marker = 0; - nd_fespace.GetEssentialTrueDofs(dbc_marker, nd_dbc_tdof_list); - h1_fespace.GetEssentialTrueDofs(dbc_marker, h1_dbc_tdof_list); - for (auto tdof : nd_tdof_list) - { - nd_tdof_marker[tdof] = 1; - } - for (auto tdof : nd_dbc_tdof_list) - { - nd_tdof_marker[tdof] = 0; - } - for (auto tdof : h1_tdof_list) - { - h1_tdof_marker[tdof] = 1; - } - for (auto tdof : h1_dbc_tdof_list) + // Coefficients store references to kₙ, ω so they are updated implicitly at each new + // solve. Also, μ⁻¹ is persistent, so no copy is OK. + kn0 = 0.0; + omega0 = 0.0; + port_nxH0r_func = + std::make_unique>(*port_E0t, *port_E0n, mat_op); + port_nxH0i_func = + std::make_unique>(*port_E0t, *port_E0n, mat_op); + port_sr = std::make_unique(port_nd_fespace.get()); + port_si = std::make_unique(port_nd_fespace.get()); + port_sr->AddDomainIntegrator(new VectorFEDomainLFIntegrator(*port_nxH0r_func)); + port_si->AddDomainIntegrator(new VectorFEDomainLFIntegrator(*port_nxH0i_func)); + port_sr->UseFastAssembly(false); + port_si->UseFastAssembly(false); + + // Configure port mode sign convention: 1ᵀ Re{-n x H} >= 0 on the "upper-right quadrant" + // of the wave port boundary, in order to deal with symmetry effectively. { - h1_tdof_marker[tdof] = 0; - } + Vector bbmin, bbmax; + port_mesh->GetBoundingBox(bbmin, bbmax); + const int dim = port_mesh->SpaceDimension(); - // Convert back to a list. - nd_tdof_list.DeleteAll(); - nd_tdof_list.Reserve(nd_tdofs); - for (int i = 0; i < nd_tdof_marker.Size(); i++) - { - if (nd_tdof_marker[i]) + double la = 0.0, lb = 0.0; + int da = -1, db = -1; + for (int d = 0; d < dim; d++) { - nd_tdof_list.Append(i); - } - } - h1_tdof_list.DeleteAll(); - h1_tdof_list.Reserve(h1_tdofs); - for (int i = 0; i < h1_tdof_marker.Size(); i++) - { - if (h1_tdof_marker[i]) - { - h1_tdof_list.Append(i); + double diff = bbmax(d) - bbmin(d); + if (diff > la) + { + lb = la; + la = diff; + db = da; + da = d; + } + else if (diff > lb) + { + lb = diff; + db = d; + } } - } -} + MFEM_VERIFY(da >= 0 && db >= 0 && da != db, + "Unexpected wave port geometry for normalization!"); + double ca = 0.5 * (bbmax[da] + bbmin[da]), cb = 0.5 * (bbmax[db] + bbmin[db]); -void WavePortData::GetInitialSpace(int nt, int nn, petsc::PetscParVector &y0) -{ - // Initial space chosen as such that B v₀ = y₀, with y₀ = [y₀ₜ, 0, ... 0]ᵀ ⟂ null(A) - // (with Aₜₜ nonsingular). See Lee, Sun, and Cendes, 1991 for reference. - // Note: When the eigenvalue solver uses a standard ℓ²-inner product instead of B-inner - // product(since we use a general non-Hermitian solver due to complex symmetric B), then - // we just use v0 = y0 directly. - MFEM_VERIFY(y0.GetSize() == nt + nn, "Invalid vector size!"); - y0.SetRandomReal(); - PetscScalar *py0 = y0.GetArray(); - // for (int i = 0; i < nt; i++) { py0[i] = 1.0; } - for (int i = nt; i < nt + nn; i++) - { - py0[i] = 0.0; + auto TDirection = [da, db, ca, cb, dim](const Vector &x, Vector &f) + { + MFEM_ASSERT(x.Size() == dim, + "Invalid dimension mismatch for wave port mode normalization!"); + f.SetSize(dim); + if (x[da] >= ca && x[db] >= cb) + { + f = 1.0; + } + else + { + f = 0.0; + } + }; + mfem::VectorFunctionCoefficient tfunc(dim, TDirection); + port_S0t = std::make_unique(port_nd_fespace.get()); + port_S0t->ProjectCoefficient(tfunc); } - y0.RestoreArray(py0); } -std::complex WavePortData::Solve(petsc::PetscParVector &y0, - petsc::PetscParVector &e0, - petsc::PetscParVector &e, - petsc::PetscScatter &scatter) +WavePortData::~WavePortData() { - double eig[2]; - if (A) // Only on root + if (port_comm != MPI_COMM_NULL) { - // The y0 and e0 vectors are still parallel vectors, but with all data on root. We want - // true sequential vectors. - PetscScalar *pe0 = e0.GetArray(); - petsc::PetscParVector e0s(e0.GetSize(), pe0); - - // Set starting vector. - { - PetscScalar *py0 = y0.GetArray(); - petsc::PetscParVector y0s(y0.GetSize(), py0); - eigen->SetInitialSpace(y0s); - y0.RestoreArray(py0); - } - -#if 0 - // Alternatively, use B-orthogonal initial space. Probably want to call SetBMat for - // the eigensolver in this case. - { - PetscScalar *py0 = y0.GetArray(); - petsc::PetscParVector y0s(y0.GetSize(), py0); - petsc::PetscParVector v0s(y0s); - ksp->Mult(y0s, v0s); - eigen->SetInitialSpace(v0s); - y0.RestoreArray(py0); - } -#endif - - // Solve (operators have been set in constructor). - int num_conv = 0; - eigen->SetOperators(*A, *B, EigenSolverBase::ScaleType::NONE); - num_conv = eigen->Solve(); - MFEM_VERIFY(num_conv >= mode_idx, "Wave port eigensolver did not converge!"); - eigen->GetEigenvalue(mode_idx - 1, eig[0], eig[1]); - eigen->GetEigenvector(mode_idx - 1, e0s); - e0.RestoreArray(pe0); + MPI_Comm_free(&port_comm); } - - // Scatter the result to all processors. - scatter.Reverse(e0, e); - Mpi::Broadcast(2, eig, 0, e.GetComm()); - return {eig[0], eig[1]}; } void WavePortData::Initialize(double omega) @@ -616,121 +796,87 @@ void WavePortData::Initialize(double omega) // Use pre-computed matrices to construct and solve the generalized eigenvalue problem for // the desired wave port mode. - double theta2 = muepsmax * omega * omega; - if (A) + double theta2 = mu_eps_max * omega * omega; { - MFEM_VERIFY(A1 && A2 && B3 && B4 && A && B, - "Boundary mode eigenvalue problem operators uninitialized for solve!"); - A->Scale(0.0); - A->AXPY(1.0, *A1, petsc::PetscParMatrix::NNZStructure::SAME); - A->AXPY(-omega * omega, *A2, petsc::PetscParMatrix::NNZStructure::SUBSET); - B->Scale(0.0); - B->AXPY(1.0, *A, petsc::PetscParMatrix::NNZStructure::SAME); - B->AXPY(1.0 / theta2, *B3, petsc::PetscParMatrix::NNZStructure::SUBSET); - B->AXPY(-omega * omega / theta2, *B4, petsc::PetscParMatrix::NNZStructure::SUBSET); + auto &Pr = *static_cast(P->Real()); + Pr *= 0.0; + + auto &Ar = *static_cast(A->Real()); + auto &Br = *static_cast(B->Real()); + Ar.Add(-omega * omega + omega0 * omega0, *A2r); + Br.Add(-omega * omega + omega0 * omega0, *A2r); + Br.Add(1.0 / theta2 - ((omega0 == 0.0) ? 0.0 : 1.0 / (mu_eps_max * omega0 * omega0)), + *B3); + Pr.Add(1.0, Br); + + if (A2i) + { + auto &Ai = *static_cast(A->Imag()); + auto &Bi = *static_cast(B->Imag()); + Ai.Add(-omega * omega + omega0 * omega0, *A2i); + Bi.Add(-omega * omega + omega0 * omega0, *A2i); + Pr.Add(1.0, Bi); + } } // Configure and solve the eigenvalue problem for the desired boundary mode. - std::complex lambda = Solve(*y0, *e0, *e, *scatter); + std::complex lambda; + if (port_comm != MPI_COMM_NULL) + { + ksp->SetOperators(*B, *P); + eigen->SetOperators(*A, *B, EigenvalueSolver::ScaleType::NONE); + eigen->SetInitialSpace(v0); + int num_conv = eigen->Solve(); + MFEM_VERIFY(num_conv >= mode_idx, "Wave port eigensolver did not converge!"); + lambda = eigen->GetEigenvalue(mode_idx - 1); + // Mpi::Print(port_comm, " ... Wave port eigensolver error = {} (bkwd), {} (abs)\n", + // eigen->GetError(mode_idx - 1, EigenvalueSolver::ErrorType::BACKWARD), + // eigen->GetError(mode_idx - 1, EigenvalueSolver::ErrorType::ABSOLUTE)); + } + Mpi::Broadcast(1, &lambda, port_root, B3->GetComm()); // Extract the eigenmode solution and postprocess. The extracted eigenvalue is λ = - // Θ²/(Θ²-kₙ²). + // Θ² / (Θ² - kₙ²). MFEM_VERIFY(lambda.real() > 1.0 / (1.0 - 1.0e-2), "Computed wave port mode is or is very close to being evanescent " << "(λ = " << lambda << ")!"); kn0 = std::sqrt(theta2 - theta2 / lambda); omega0 = omega; - dynamic_cast(*nxH0r_func).SetFrequency(omega0, kn0); - dynamic_cast(*nxH0i_func).SetFrequency(omega0, kn0); - - mfem::Vector etr(nd_attr_tdof_list.Size()), eti(nd_attr_tdof_list.Size()), - enr(h1_attr_tdof_list.Size()), eni(h1_attr_tdof_list.Size()); - MFEM_VERIFY(e->GetSize() == etr.Size() + enr.Size(), - "Unexpected vector size in wave port eigenmode solver!"); - e->GetToVectors(etr, eti, 0, nd_attr_tdof_list.Size()); - e->GetToVectors(enr, eni, nd_attr_tdof_list.Size(), - nd_attr_tdof_list.Size() + h1_attr_tdof_list.Size()); - - // Re-expand from restricted boundary dofs to true dofs and transform back to true - // electric field variables: Eₜ = eₜ/kₙ and Eₙ = ieₙ. - auto &nd_fespace = *E0t->ParFESpace(); - auto &h1_fespace = *E0n->ParFESpace(); - mfem::Vector E0tr(nd_fespace.GetTrueVSize()), E0ti(nd_fespace.GetTrueVSize()), - E0nr(h1_fespace.GetTrueVSize()), E0ni(h1_fespace.GetTrueVSize()); - E0tr = 0.0; - E0ti = 0.0; - E0nr = 0.0; - E0ni = 0.0; - std::complex ookn = 1.0 / kn0; - for (int i = 0; i < nd_attr_tdof_list.Size(); i++) - { - E0tr(nd_attr_tdof_list[i]) = ookn.real() * etr(i) - ookn.imag() * eti(i); - E0ti(nd_attr_tdof_list[i]) = ookn.imag() * etr(i) + ookn.real() * eti(i); - } - for (int i = 0; i < h1_attr_tdof_list.Size(); i++) - { - E0nr(h1_attr_tdof_list[i]) = -eni(i); - E0ni(h1_attr_tdof_list[i]) = enr(i); - } - E0t->real().SetFromTrueDofs(E0tr); // Parallel distribute - E0t->imag().SetFromTrueDofs(E0ti); - E0n->real().SetFromTrueDofs(E0nr); - E0n->imag().SetFromTrueDofs(E0ni); + static_cast *>(port_nxH0r_func.get()) + ->SetFrequency(omega0, kn0); + static_cast *>(port_nxH0i_func.get()) + ->SetFrequency(omega0, kn0); - // Normalize grid functions to a chosen polarization direction and unit power, |E x H⋆| ⋅ - // n, integrated over the port surface (+n is the direction of propagation). The n x H - // coefficients are updated implicitly as the only store references to the Et, En grid - // functions as well as kₙ, ω. - { - // Choose a (rather arbitrary) sign constraint: @ t = 0, 1ᵀ E > 0 when integrated over - // the port surface. This at least makes results for the same port consistent between - // frequencies/meshes. - mfem::Vector ones(nd_fespace.GetParMesh()->SpaceDimension()); - ones = 1.0; - mfem::VectorConstantCoefficient tdir(ones); - mfem::ConstantCoefficient ndir(1.0); - mfem::ParLinearForm sut(&nd_fespace), sun(&h1_fespace); - sut.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(tdir), attr_marker); - sun.AddBoundaryIntegrator(new BoundaryLFIntegrator(ndir), attr_marker); - sut.UseFastAssembly(true); - sun.UseFastAssembly(true); - sut.Assemble(); - sun.Assemble(); - if (sut(E0t->real()) + sun(E0n->real()) < 0.0) - { - E0t->real().Neg(); // This updates the n x H coefficients depending on Et, En - E0t->imag().Neg(); - E0n->real().Neg(); - E0n->imag().Neg(); - } + // Separate the computed field out into eₜ and eₙ and and transform back to true + // electric field variables: Eₜ = eₜ/kₙ and Eₙ = ieₙ. + if (port_comm != MPI_COMM_NULL) + { + Vector e0tr, e0ti, e0nr, e0ni; + eigen->GetEigenvector(mode_idx - 1, e0); + e0tr.MakeRef(e0.Real(), 0, e0t.Size()); + e0nr.MakeRef(e0.Real(), e0t.Size(), e0n.Size()); + e0ti.MakeRef(e0.Imag(), 0, e0t.Size()); + e0ni.MakeRef(e0.Imag(), e0t.Size(), e0n.Size()); + e0t.Real() = e0tr; + e0t.Imag() = e0ti; + e0n.Real() = e0nr; + e0n.Imag() = e0ni; + e0t *= 1.0 / kn0; + e0n *= 1i; } + else { - // |E x H⋆| ⋅ n = |E ⋅ (-n x H⋆)| - sr = std::make_unique(&nd_fespace); - si = std::make_unique(&nd_fespace); - sr->AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(*nxH0r_func), attr_marker); - si->AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(*nxH0i_func), attr_marker); - sr->UseFastAssembly(true); - si->UseFastAssembly(true); - sr->Assemble(); - si->Assemble(); - std::complex s0(-(*sr)(E0t->real()) - (*si)(E0t->imag()), - -(*sr)(E0t->imag()) + (*si)(E0t->real())); - double scale = 1.0 / std::sqrt(std::abs(s0)); - E0t->real() *= scale; // This updates the n x H coefficients depending on Et, En too - E0t->imag() *= scale; - E0n->real() *= scale; - E0n->imag() *= scale; - *sr *= scale; // Update linear forms for postprocessing - *si *= scale; + MFEM_ASSERT(e0.Size() == 0 && e0t.Size() == 0 && e0n.Size() == 0, + "Unexpected non-empty port FE space in wave port boundary mode solve!"); } - - // This parallel communication is not required since wave port boundaries are true - // one-sided boundaries. - // E0t->real().ExchangeFaceNbrData(); // Ready for parallel comm on shared faces - // E0t->imag().ExchangeFaceNbrData(); // for n x H coefficients evaluation - // E0n->real().ExchangeFaceNbrData(); - // E0n->imag().ExchangeFaceNbrData(); + port_E0t->real().SetFromTrueDofs(e0t.Real()); // Parallel distribute + port_E0t->imag().SetFromTrueDofs(e0t.Imag()); + port_E0n->real().SetFromTrueDofs(e0n.Real()); + port_E0n->imag().SetFromTrueDofs(e0n.Imag()); + + // Normalize the mode for a chosen polarization direction and unit power, |E x H⋆| ⋅ n, + // integrated over the port surface (+n is the direction of propagation). + NormalizeWithSign(*port_S0t, *port_E0t, *port_E0n, *port_sr, *port_si); } double WavePortData::GetExcitationPower() const @@ -744,29 +890,37 @@ std::complex WavePortData::GetSParameter(mfem::ParComplexGridFunction &E { // Compute port S-parameter, or the projection of the field onto the port mode: // (E x H_inc⋆) ⋅ n = E ⋅ (-n x H_inc⋆), integrated over the port surface. - return {-(*sr)(E.real()) - (*si)(E.imag()), -(*sr)(E.imag()) + (*si)(E.real())}; + mfem::ParComplexGridFunction port_E(port_nd_fespace.get()); + port_nd_transfer->Transfer(E.real(), port_E.real()); + port_nd_transfer->Transfer(E.imag(), port_E.imag()); + std::complex dot(-((*port_sr) * port_E.real()) - ((*port_si) * port_E.imag()), + -((*port_sr) * port_E.imag()) + ((*port_si) * port_E.real())); + Mpi::GlobalSum(1, &dot, port_nd_fespace->GetComm()); + return dot; } std::complex WavePortData::GetPower(mfem::ParComplexGridFunction &E, mfem::ParComplexGridFunction &B, - const MaterialOperator &mat_op, - const std::map &local_to_shared) const + const MaterialOperator &mat_op) const { // Compute port power, (E x H) ⋅ n = E ⋅ (-n x H), integrated over the port surface // using the computed E and H = μ⁻¹ B fields. The linear form is reconstructed from // scratch each time due to changing H. The BdrCurrentVectorCoefficient computes -n x H, // where n is an outward normal. auto &nd_fespace = *E.ParFESpace(); - BdrCurrentVectorCoefficient nxHr_func(B.real(), mat_op, local_to_shared); - BdrCurrentVectorCoefficient nxHi_func(B.imag(), mat_op, local_to_shared); - mfem::ParLinearForm pr(&nd_fespace), pi(&nd_fespace); + BdrCurrentVectorCoefficient nxHr_func(B.real(), mat_op); + BdrCurrentVectorCoefficient nxHi_func(B.imag(), mat_op); + mfem::LinearForm pr(&nd_fespace), pi(&nd_fespace); pr.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(nxHr_func), attr_marker); pi.AddBoundaryIntegrator(new VectorFEBoundaryLFIntegrator(nxHi_func), attr_marker); - pr.UseFastAssembly(true); - pi.UseFastAssembly(true); + pr.UseFastAssembly(false); + pi.UseFastAssembly(false); pr.Assemble(); pi.Assemble(); - return {pr(E.real()) + pi(E.imag()), pr(E.imag()) - pi(E.real())}; + std::complex dot(-(pr * E.real()) - (pi * E.imag()), + -(pr * E.imag()) + (pi * E.real())); + Mpi::GlobalSum(1, &dot, nd_fespace.GetComm()); + return dot; } WavePortOperator::WavePortOperator(const IoData &iod, const MaterialOperator &mat, @@ -841,7 +995,7 @@ void WavePortOperator::SetUpBoundaryProperties(const IoData &iodata, // Set up wave port data structures. for (const auto &[idx, data] : iodata.boundaries.waveport) { - ports.try_emplace(idx, data, mat_op, dbc_marker, nd_fespace, h1_fespace); + ports.try_emplace(idx, data, mat_op, nd_fespace, h1_fespace, dbc_marker); } MFEM_VERIFY( ports.empty() || iodata.problem.type == config::ProblemData::Type::DRIVEN, @@ -951,16 +1105,9 @@ void WavePortOperator::Initialize(double omega) { if (first) { - // Print header at first solve. - if (data.GetA() && data.GetB()) - { - Mpi::Print(" Number of global unknowns for port {:d}: {}\n", idx, - data.GetA()->GetGlobalNumRows()); - Mpi::Print(" A: NNZ = {:d}, norm = {:e}\n", data.GetA()->NNZ(), - data.GetA()->NormF()); - Mpi::Print(" B: NNZ = {:d}, norm = {:e}\n", data.GetB()->NNZ(), - data.GetB()->NormF()); - } + Mpi::Print(" Number of global unknowns for port {:d}:\n" + " H1: {:d}, ND: {:d}\n", + idx, data.GlobalTrueH1Size(), data.GlobalTrueNDSize()); } double k0 = 1.0 / iodata.DimensionalizeValue(IoData::ValueType::LENGTH, 1.0); Mpi::Print(" Port {:d}, mode {:d}: kₙ = {:.3e}{:+.3e}i m⁻¹\n", idx, @@ -981,11 +1128,11 @@ void WavePortOperator::AddExtraSystemBdrCoefficients(double omega, Initialize(omega); for (auto &[idx, data] : ports) { - fbi.AddCoefficient( - std::make_unique< - MaterialPropertyCoefficient>( - mat_op, data.GetPropagationConstant().real()), - data.GetMarker()); + constexpr auto MatType = MaterialPropertyType::INV_PERMEABILITY; + constexpr auto ElemType = MeshElementType::BDR_ELEMENT; + fbi.AddCoefficient(std::make_unique>( + mat_op, data.GetPropagationConstant().real()), + data.GetMarker()); } } @@ -1003,10 +1150,10 @@ void WavePortOperator::AddExcitationBdrCoefficients(double omega, SumVectorCoeff continue; } fbr.AddCoefficient(std::make_unique( - 2.0 * omega, *data.GetModeCoefficientImag()), + 2.0 * omega, data.GetModeCoefficientImag()), data.GetMarker()); fbi.AddCoefficient(std::make_unique( - -2.0 * omega, *data.GetModeCoefficientReal()), + -2.0 * omega, data.GetModeCoefficientReal()), data.GetMarker()); } } diff --git a/palace/models/waveportoperator.hpp b/palace/models/waveportoperator.hpp index 1769ffb74..f33ec844e 100644 --- a/palace/models/waveportoperator.hpp +++ b/palace/models/waveportoperator.hpp @@ -8,9 +8,10 @@ #include #include #include -#include "linalg/eigen.hpp" +#include "linalg/eps.hpp" #include "linalg/ksp.hpp" -#include "linalg/petsc.hpp" +#include "linalg/operator.hpp" +#include "linalg/vector.hpp" namespace palace { @@ -37,57 +38,53 @@ class WavePortData int mode_idx; double d_offset; - // Marker for all boundary attributes making up this port boundary. Mutable because - // some MFEM API calls are not const correct. + // Attribute list and marker for all boundary attributes making up this port boundary. + // Mutable because some MFEM API calls are not const correct. + mfem::Array attr_list; mutable mfem::Array attr_marker; - // Lists of non-essential true degrees of freedom associated with the port boundary. - mfem::Array nd_attr_tdof_list, h1_attr_tdof_list; + // SubMesh data structures to define finite element spaces and grid functions on the + // SubMesh corresponding to this port boundary. + std::unique_ptr port_mesh; + std::unique_ptr port_nd_fec, port_h1_fec; + std::unique_ptr port_nd_fespace, port_h1_fespace; + std::unique_ptr port_nd_transfer, port_h1_transfer; // Operator storage for repeated boundary mode eigenvalue problem solves. - std::unique_ptr A, B, A1, A2, B3, B4; - std::unique_ptr e, e0, y0; - std::unique_ptr scatter; - double muepsmax; + double mu_eps_max; + std::unique_ptr A2r, A2i, B3; + std::unique_ptr A, B, P; + ComplexVector v0, e0, e0t, e0n; + + // Eigenvalue solver for boundary modes. + MPI_Comm port_comm; + int port_root; + std::unique_ptr eigen; + std::unique_ptr ksp; // Grid functions storing the last computed electric field mode on the port and the - // associated propagation constant. - std::unique_ptr E0t, E0n; + // associated propagation constant. Also the coefficient for the incident port mode + // (n x H_inc) computed from the electric field mode. + std::unique_ptr port_E0t, port_E0n; + std::unique_ptr port_nxH0r_func, port_nxH0i_func; + std::unique_ptr port_sr, port_si; + std::unique_ptr port_S0t; std::complex kn0; double omega0; - // Coefficients storing the incident port mode (n x H_inc) and linear forms for - // postprocessing integrated quantities on the port. - std::unique_ptr nxH0r_func, nxH0i_func; - std::unique_ptr sr, si; - - // Eigenvalue solver for boundary modes. - std::unique_ptr eigen; - std::unique_ptr ksp; - - // Helper function to get true degrees of freedom on the port. - void GetTrueDofs(const mfem::Array &dbc_marker, - mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace &h1_fespace, mfem::Array &nd_tdof_list, - mfem::Array &h1_tdof_list); - - // Configure and solve the linear eigenvalue problem for the boundary mode. - void GetInitialSpace(int nt, int nn, petsc::PetscParVector &y0); - std::complex Solve(petsc::PetscParVector &y0, petsc::PetscParVector &e0, - petsc::PetscParVector &e, petsc::PetscScatter &scatter); - public: WavePortData(const config::WavePortData &data, const MaterialOperator &mat_op, - const mfem::Array &dbc_marker, mfem::ParFiniteElementSpace &nd_fespace, - mfem::ParFiniteElementSpace &h1_fespace); + mfem::ParFiniteElementSpace &nd_fespace, + mfem::ParFiniteElementSpace &h1_fespace, const mfem::Array &dbc_marker); + ~WavePortData(); const mfem::Array &GetMarker() const { return attr_marker; } mfem::Array &GetMarker() { return attr_marker; } void Initialize(double omega); - const petsc::PetscParMatrix *GetA() const { return A.get(); } - const petsc::PetscParMatrix *GetB() const { return B.get(); } + HYPRE_BigInt GlobalTrueNDSize() const { return port_nd_fespace->GlobalTrueVSize(); } + HYPRE_BigInt GlobalTrueH1Size() const { return port_h1_fespace->GlobalTrueVSize(); } std::complex GetPropagationConstant() const { return kn0; } double GetOperatingFrequency() const { return omega0; } @@ -96,14 +93,10 @@ class WavePortData int GetModeIndex() const { return mode_idx; } double GetOffsetDistance() const { return d_offset; } - const std::unique_ptr &GetModeCoefficientReal() const - { - return nxH0r_func; - } - const std::unique_ptr &GetModeCoefficientImag() const - { - return nxH0i_func; - } + const mfem::VectorCoefficient &GetModeCoefficientReal() const { return *port_nxH0r_func; } + mfem::VectorCoefficient &GetModeCoefficientReal() { return *port_nxH0r_func; } + const mfem::VectorCoefficient &GetModeCoefficientImag() const { return *port_nxH0i_func; } + mfem::VectorCoefficient &GetModeCoefficientImag() { return *port_nxH0i_func; } std::complex GetCharacteristicImpedance() const { @@ -121,8 +114,7 @@ class WavePortData std::complex GetSParameter(mfem::ParComplexGridFunction &E) const; std::complex GetPower(mfem::ParComplexGridFunction &E, mfem::ParComplexGridFunction &B, - const MaterialOperator &mat_op, - const std::map &local_to_shared) const; + const MaterialOperator &mat_op) const; std::complex GetVoltage(mfem::ParComplexGridFunction &E) const { MFEM_ABORT("GetVoltage is not yet implemented for wave port boundaries!"); @@ -185,4 +177,4 @@ class WavePortOperator } // namespace palace -#endif // PALACE_MODELS_WAVE_PORT_OPERATOR_HPP \ No newline at end of file +#endif // PALACE_MODELS_WAVE_PORT_OPERATOR_HPP diff --git a/palace/utils/configfile.cpp b/palace/utils/configfile.cpp index 2fbf74c6b..638788cd9 100644 --- a/palace/utils/configfile.cpp +++ b/palace/utils/configfile.cpp @@ -198,8 +198,7 @@ void RefinementData::SetUp(json &model) "configuration file!"); MFEM_VERIFY(it->find("Levels") != it->end(), "Missing \"Boxes\" refinement region \"Levels\" in configuration file!"); - boxlist.emplace_back(); - BoxRefinementData &data = boxlist.back(); + BoxRefinementData &data = boxlist.emplace_back(); data.ref_levels = it->at("Levels"); // Required std::vector bx = xlim->get>(); // Required @@ -267,8 +266,7 @@ void RefinementData::SetUp(json &model) MFEM_VERIFY( it->find("Levels") != it->end(), "Missing \"Spheres\" refinement region \"Levels\" in configuration file!"); - spherelist.emplace_back(); - SphereRefinementData &data = spherelist.back(); + SphereRefinementData &data = spherelist.emplace_back(); data.ref_levels = it->at("Levels"); // Required data.r = it->at("Radius"); // Required data.center = ctr->get>(); // Required @@ -345,8 +343,7 @@ void MaterialDomainData::SetUp(json &domains) MFEM_VERIFY( it->find("Attributes") != it->end(), "Missing \"Attributes\" list for \"Materials\" domain in configuration file!"); - vecdata.emplace_back(); - MaterialData &data = vecdata.back(); + MaterialData &data = vecdata.emplace_back(); data.attributes = it->at("Attributes").get>(); // Required data.mu_r = ParseSymmetricMatrixData(*it, "Permeability", data.mu_r); data.epsilon_r = ParseSymmetricMatrixData(*it, "Permittivity", data.epsilon_r); @@ -663,8 +660,7 @@ void ConductivityBoundaryData::SetUp(json &boundaries) MFEM_VERIFY( it->find("Conductivity") != it->end(), "Missing \"Conductivity\" boundary \"Conductivity\" in configuration file!"); - vecdata.emplace_back(); - ConductivityData &data = vecdata.back(); + ConductivityData &data = vecdata.emplace_back(); data.attributes = it->at("Attributes").get>(); // Required data.sigma = it->at("Conductivity"); // Required data.mu_r = it->value("Permeability", data.mu_r); @@ -704,8 +700,7 @@ void ImpedanceBoundaryData::SetUp(json &boundaries) MFEM_VERIFY( it->find("Attributes") != it->end(), "Missing \"Attributes\" list for \"Impedance\" boundary in configuration file!"); - vecdata.emplace_back(); - ImpedanceData &data = vecdata.back(); + ImpedanceData &data = vecdata.emplace_back(); data.attributes = it->at("Attributes").get>(); // Required data.Rs = it->value("Rs", data.Rs); data.Ls = it->value("Ls", data.Ls); @@ -793,8 +788,7 @@ void LumpedPortBoundaryData::SetUp(json &boundaries) MFEM_VERIFY(elem_it->find("Attributes") != elem_it->end(), "Missing \"Attributes\" list for \"LumpedPort\" or \"Terminal\" " "boundary element in configuration file!"); - data.nodes.emplace_back(); - LumpedPortData::Node &node = data.nodes.back(); + LumpedPortData::Node &node = data.nodes.emplace_back(); node.attributes = elem_it->at("Attributes").get>(); // Required node.direction = elem_it->value("Direction", node.direction); if (terminal == boundaries.end()) @@ -933,8 +927,7 @@ void SurfaceCurrentBoundaryData::SetUp(json &boundaries) elem_it->find("Attributes") != elem_it->end(), "Missing \"Attributes\" list for \"SurfaceCurrent\" boundary element in " "configuration file!"); - data.nodes.emplace_back(); - SurfaceCurrentData::Node &node = data.nodes.back(); + SurfaceCurrentData::Node &node = data.nodes.emplace_back(); node.attributes = it->at("Attributes").get>(); // Required node.direction = it->value("Direction", node.direction); CheckDirection(node.direction, true); @@ -1102,8 +1095,7 @@ void InterfaceDielectricPostData::SetUp(json &postpro) MFEM_VERIFY(elem_it->find("Attributes") != elem_it->end(), "Missing \"Attributes\" list for \"Dielectric\" boundary element in " "configuration file!"); - data.nodes.emplace_back(); - InterfaceDielectricData::Node &node = data.nodes.back(); + InterfaceDielectricData::Node &node = data.nodes.emplace_back(); node.attributes = elem_it->at("Attributes").get>(); // Required node.side = it->value("Side", node.side); if (!node.side.empty()) @@ -1513,16 +1505,10 @@ NLOHMANN_JSON_SERIALIZE_ENUM(LinearSolverData::Type, NLOHMANN_JSON_SERIALIZE_ENUM(LinearSolverData::KspType, {{LinearSolverData::KspType::INVALID, nullptr}, {LinearSolverData::KspType::CG, "CG"}, - {LinearSolverData::KspType::CGSYM, "CGSYM"}, - {LinearSolverData::KspType::FCG, "FCG"}, {LinearSolverData::KspType::MINRES, "MINRES"}, {LinearSolverData::KspType::GMRES, "GMRES"}, {LinearSolverData::KspType::FGMRES, "FGMRES"}, - {LinearSolverData::KspType::BCGS, "BCGS"}, - {LinearSolverData::KspType::BCGSL, "BCGSL"}, - {LinearSolverData::KspType::FBCGS, "FBCGS"}, - {LinearSolverData::KspType::QMRCGS, "QMRCGS"}, - {LinearSolverData::KspType::TFQMR, "TFQMR"}, + {LinearSolverData::KspType::BICGSTAB, "BiCGSTAB"}, {LinearSolverData::KspType::DEFAULT, "Default"}}) NLOHMANN_JSON_SERIALIZE_ENUM(LinearSolverData::SideType, {{LinearSolverData::SideType::INVALID, nullptr}, @@ -1546,6 +1532,11 @@ NLOHMANN_JSON_SERIALIZE_ENUM(LinearSolverData::CompressionType, {LinearSolverData::CompressionType::BLR_HODLR, "BLR-HODLR"}, {LinearSolverData::CompressionType::ZFP_BLR_HODLR, "ZFP-BLR-HODLR"}}) +NLOHMANN_JSON_SERIALIZE_ENUM(LinearSolverData::OrthogType, + {{LinearSolverData::OrthogType::INVALID, nullptr}, + {LinearSolverData::OrthogType::MGS, "MGS"}, + {LinearSolverData::OrthogType::CGS, "CGS"}, + {LinearSolverData::OrthogType::CGS2, "CGS2"}}) void LinearSolverData::SetUp(json &solver) { @@ -1563,22 +1554,22 @@ void LinearSolverData::SetUp(json &solver) tol = linear->value("Tol", tol); max_it = linear->value("MaxIts", max_it); max_size = linear->value("MaxSize", max_size); - orthog_mgs = linear->value("UseMGS", orthog_mgs); - orthog_cgs2 = linear->value("UseCGS2", orthog_cgs2); - ksp_initial_guess = linear->value("UseInitialGuess", ksp_initial_guess); - ksp_piped = linear->value("UseKSPPiped", ksp_piped); + initial_guess = linear->value("UseInitialGuess", initial_guess); // Preconditioner-specific options - mat_gmg = linear->value("UseGMG", mat_gmg); - mat_lor = linear->value("UseLOR", mat_lor); - mat_shifted = linear->value("UsePCShifted", mat_shifted); + mat_pa = linear->value("UsePartialAssembly", mat_pa); + pc_mat_lor = linear->value("UseLowOrderRefined", pc_mat_lor); + pc_mat_shifted = linear->value("UsePCMatShifted", pc_mat_shifted); + pc_side_type = linear->value("PCSide", pc_side_type); + MFEM_VERIFY(pc_side_type != LinearSolverData::SideType::INVALID, + "Invalid value for config[\"Linear\"][\"PCSide\"] in configuration file!"); + + pc_mg = linear->value("UseMultigrid", pc_mg); + mg_smooth_aux = linear->value("MGAuxiliarySmoother", mg_smooth_aux); mg_cycle_it = linear->value("MGCycleIts", mg_cycle_it); mg_smooth_it = linear->value("MGSmoothIts", mg_smooth_it); mg_smooth_order = linear->value("MGSmoothOrder", mg_smooth_order); - pc_side_type = linear->value("PrecondSide", pc_side_type); - MFEM_VERIFY( - pc_side_type != LinearSolverData::SideType::INVALID, - "Invalid value for config[\"Linear\"][\"PrecondSide\"] in configuration file!"); + sym_fact_type = linear->value("Reordering", sym_fact_type); MFEM_VERIFY( sym_fact_type != LinearSolverData::SymFactType::INVALID, @@ -1593,27 +1584,33 @@ void LinearSolverData::SetUp(json &solver) linear->value("STRUMPACKLossyPrecision", strumpack_lossy_precision); strumpack_butterfly_l = linear->value("STRUMPACKButterflyLevels", strumpack_butterfly_l); superlu_3d = linear->value("SuperLU3D", superlu_3d); + ams_vector = linear->value("AMSVector", ams_vector); + divfree_tol = linear->value("DivFreeTol", divfree_tol); divfree_max_it = linear->value("DivFreeMaxIts", divfree_max_it); + gs_orthog_type = linear->value("GSOrthogonalization", gs_orthog_type); + MFEM_VERIFY(gs_orthog_type != LinearSolverData::OrthogType::INVALID, + "Invalid value for config[\"Linear\"][\"GSOrthogonalization\"] in " + "configuration file!"); + // Cleanup linear->erase("Type"); linear->erase("KSPType"); linear->erase("Tol"); linear->erase("MaxIts"); linear->erase("MaxSize"); - linear->erase("UseMGS"); - linear->erase("UseCGS2"); linear->erase("UseInitialGuess"); - linear->erase("UseKSPPiped"); - linear->erase("UseGMG"); - linear->erase("UseLOR"); - linear->erase("UsePCShifted"); + linear->erase("UsePartialAssembly"); + linear->erase("UseLowOrderRefined"); + linear->erase("UsePCMatShifted"); + linear->erase("PCSide"); + linear->erase("UseMultigrid"); + linear->erase("MGAuxiliarySmoother"); linear->erase("MGCycleIts"); linear->erase("MGSmoothIts"); linear->erase("MGSmoothOrder"); - linear->erase("PrecondSide"); linear->erase("Reordering"); linear->erase("STRUMPACKCompressionType"); linear->erase("STRUMPACKCompressionTol"); @@ -1623,6 +1620,7 @@ void LinearSolverData::SetUp(json &solver) linear->erase("AMSVector"); linear->erase("DivFreeTol"); linear->erase("DivFreeMaxIts"); + linear->erase("GSOrthogonalization"); MFEM_VERIFY(linear->empty(), "Found an unsupported configuration file keyword under \"Linear\"!\n" << linear->dump(2)); @@ -1633,17 +1631,16 @@ void LinearSolverData::SetUp(json &solver) // std::cout << "Tol: " << tol << '\n'; // std::cout << "MaxIts: " << max_it << '\n'; // std::cout << "MaxSize: " << max_size << '\n'; - // std::cout << "UseMGS: " << orthog_mgs << '\n'; - // std::cout << "UseCGS2: " << orthog_cgs2 << '\n'; - // std::cout << "UseInitialGuess: " << ksp_initial_guess << '\n'; - // std::cout << "UseKSPPiped: " << ksp_piped << '\n'; - // std::cout << "UseGMG: " << mat_gmg << '\n'; - // std::cout << "UseLOR: " << mat_lor << '\n'; - // std::cout << "UsePCShifted: " << mat_shifted << '\n'; + // std::cout << "UseInitialGuess: " << initial_guess << '\n'; + // std::cout << "UsePartialAssembly: " << mat_pa << '\n'; + // std::cout << "UseLowOrderRefined: " << pc_mat_lor << '\n'; + // std::cout << "UsePCMatShifted: " << pc_mat_shifted << '\n'; + // std::cout << "PCSide: " << pc_side_type << '\n'; + // std::cout << "UseMultigrid: " << pc_mg << '\n'; + // std::cout << "MGAuxiliarySmoother: " << mg_smooth_aux << '\n'; // std::cout << "MGCycleIts: " << mg_cycle_it << '\n'; // std::cout << "MGSmoothIts: " << mg_smooth_it << '\n'; // std::cout << "MGSmoothOrder: " << mg_smooth_order << '\n'; - // std::cout << "PrecondSide: " << pc_side_type << '\n'; // std::cout << "Reordering: " << sym_fact_type << '\n'; // std::cout << "STRUMPACKCompressionType: " << strumpack_compression_type << '\n'; // std::cout << "STRUMPACKCompressionTol: " << strumpack_lr_tol << '\n'; @@ -1653,6 +1650,7 @@ void LinearSolverData::SetUp(json &solver) // std::cout << "AMSVector: " << ams_vector << '\n'; // std::cout << "DivFreeTol: " << divfree_tol << '\n'; // std::cout << "DivFreeMaxIts: " << divfree_max_it << '\n'; + // std::cout << "GSOrthogonalization: " << gs_orthog_type << '\n'; } void SolverData::SetUp(json &config) diff --git a/palace/utils/configfile.hpp b/palace/utils/configfile.hpp index b3e785565..345ebc4d6 100644 --- a/palace/utils/configfile.hpp +++ b/palace/utils/configfile.hpp @@ -716,16 +716,10 @@ struct LinearSolverData enum class KspType { CG, - CGSYM, - FCG, MINRES, GMRES, FGMRES, - BCGS, - BCGSL, - FBCGS, - QMRCGS, - TFQMR, + BICGSTAB, DEFAULT, INVALID = -1 }; @@ -740,28 +734,36 @@ struct LinearSolverData // Maximum Krylov space dimension for GMRES/FGMRES iterative solvers. int max_size = -1; - // Enable modified Gram-Schmidt orthogonalization instead of classical for GMRES/FGMRES - // Krylov solvers and SLEPc eigenvalue solver. - bool orthog_mgs = false; - bool orthog_cgs2 = false; - // Reuse previous solution as initial guess for Krylov solvers. - int ksp_initial_guess = -1; + int initial_guess = -1; - // Enable pipelined Krylov solver variants to reduce blocking communications. - bool ksp_piped = false; - - // Enable hp-geometric multigrid coarsening, using the solver specified by the type member - // at the coarsest level. - bool mat_gmg = true; + // Enable partial assembly for operators. + bool mat_pa = false; // Enable low-order refined (LOR) preconditioner construction. Only available for meshes - // based on tensor elements. - bool mat_lor = false; + // based on tensor-product elements. + bool pc_mat_lor = false; // For frequency domain applications, precondition linear systems with a shifted matrix // (makes the preconditoner matrix SPD). - int mat_shifted = -1; + int pc_mat_shifted = -1; + + // Choose left or right preconditioning. + enum class SideType + { + RIGHT, + LEFT, + DEFAULT, + INVALID = -1 + }; + SideType pc_side_type = SideType::DEFAULT; + + // Enable hp-geometric multigrid coarsening, using the solver specified by the type member + // at the coarsest level. + bool pc_mg = true; + + // Use auxiliary space smoothers on geometric multigrid levels + int mg_smooth_aux = -1; // Number of iterations for preconditioners which support it. For multigrid, this is the // number of V-cycles per Krylov solver iteration. @@ -774,17 +776,7 @@ struct LinearSolverData // Order of polynomial smoothing for geometric multigrid. int mg_smooth_order = 4; - // Choose left or right preconditioning. - enum class SideType - { - RIGHT, - LEFT, - DEFAULT, - INVALID = -1 - }; - SideType pc_side_type = SideType::DEFAULT; - - // Choose left or right preconditioning. + // Specify details for symbolic factorization used by sparse direct solvers. enum class SymFactType { METIS, @@ -797,7 +789,7 @@ struct LinearSolverData SymFactType sym_fact_type = SymFactType::DEFAULT; // Low-rank and butterfly compression parameters for sparse direct solvers which support - // it. + // it (mainly STRUMPACK). enum class CompressionType { NONE, @@ -826,6 +818,17 @@ struct LinearSolverData // Maximum number of iterations for solving linear systems in divergence-free projector. int divfree_max_it = 100; + // Enable different variants of Gram-Schmidt orthogonalization for GMRES/FGMRES iterative + // solvers and SLEPc eigenvalue solver. + enum class OrthogType + { + MGS, + CGS, + CGS2, + INVALID = -1 + }; + OrthogType gs_orthog_type = OrthogType::MGS; + void SetUp(json &solver); }; diff --git a/palace/utils/geodata.cpp b/palace/utils/geodata.cpp index b3b571ed8..0ea43766b 100644 --- a/palace/utils/geodata.cpp +++ b/palace/utils/geodata.cpp @@ -8,7 +8,6 @@ #include #include #include -#include #include "utils/communication.hpp" #include "utils/filesystem.hpp" #include "utils/iodata.hpp" @@ -153,7 +152,7 @@ void RefineMesh(const IoData &iodata, std::vector max_region_ref_levels = sphere.ref_levels; } } - if (iodata.solver.linear.mat_gmg) + if (iodata.solver.linear.pc_mg) { mesh.reserve(1 + uniform_ref_levels + max_region_ref_levels); } @@ -191,7 +190,7 @@ void RefineMesh(const IoData &iodata, std::vector mesh[0]->MeshGenerator() & 8)) { // XX TODO: Region-based refinement won't work if the ParMesh has been constructed from - // a conforming mesh, but non-conforming refinement is needed. Unclear if the + // a conforming mesh, but nonconforming refinement is needed. Unclear if the // current mesh distribution scheme will work even for a conforming serial mesh // which is a NCMesh after Mesh::EnsureNCMesh is called. MFEM_ABORT("Region-based refinement is currently only supported for simplex meshes!"); @@ -1068,16 +1067,17 @@ std::map> CheckMesh(std::unique_ptr &orig_me if (orig_mesh->GetNodes()) { const mfem::GridFunction *nodes = orig_mesh->GetNodes(); - const mfem::FiniteElementSpace *fes = nodes->FESpace(); + const mfem::FiniteElementSpace *fespace = nodes->FESpace(); - mfem::Ordering::Type ordering = fes->GetOrdering(); - int order = fes->GetMaxElementOrder(); + mfem::Ordering::Type ordering = fespace->GetOrdering(); + int order = fespace->GetMaxElementOrder(); int sdim = orig_mesh->SpaceDimension(); - bool discont = dynamic_cast(fes->FEColl()) != nullptr; + bool discont = + dynamic_cast(fespace->FEColl()) != nullptr; new_mesh->SetCurvature(order, discont, sdim, ordering); mfem::GridFunction *new_nodes = new_mesh->GetNodes(); - const mfem::FiniteElementSpace *new_fes = new_nodes->FESpace(); + const mfem::FiniteElementSpace *new_fespace = new_nodes->FESpace(); // The element loop works because we know the mapping from old_mesh to new_mesh element // indices from the insertion order. @@ -1088,9 +1088,9 @@ std::map> CheckMesh(std::unique_ptr &orig_me { if (!elem_delete[e]) { - fes->GetElementVDofs(e, vdofs); + fespace->GetElementVDofs(e, vdofs); nodes->GetSubVector(vdofs, loc_vec); - new_fes->GetElementVDofs(te, new_vdofs); + new_fespace->GetElementVDofs(te, new_vdofs); new_nodes->SetSubVector(new_vdofs, loc_vec); te++; } diff --git a/palace/utils/iodata.cpp b/palace/utils/iodata.cpp index d766df879..74a84face 100644 --- a/palace/utils/iodata.cpp +++ b/palace/utils/iodata.cpp @@ -332,24 +332,26 @@ void IoData::CheckConfiguration() { solver.linear.max_size = solver.linear.max_it; } - if (solver.linear.ksp_initial_guess < 0) + if (solver.linear.initial_guess < 0) { if ((problem.type == config::ProblemData::Type::DRIVEN && solver.driven.adaptive_tol <= 0.0) || - problem.type == config::ProblemData::Type::TRANSIENT) + problem.type == config::ProblemData::Type::TRANSIENT || + problem.type == config::ProblemData::Type::ELECTROSTATIC || + problem.type == config::ProblemData::Type::MAGNETOSTATIC) { - // Default true only driven simulations without adaptive frequency sweep, or transient - // simulations. - solver.linear.ksp_initial_guess = 1; + // Default true only driven simulations without adaptive frequency sweep, transient + // simulations, or electrostatic or magnetostatics. + solver.linear.initial_guess = 1; } else { - solver.linear.ksp_initial_guess = 0; + solver.linear.initial_guess = 0; } } - if (solver.linear.mat_shifted < 0) + if (solver.linear.pc_mat_shifted < 0) { - solver.linear.mat_shifted = 0; // Default false for most cases + solver.linear.pc_mat_shifted = 0; // Default false for most cases if (problem.type == config::ProblemData::Type::DRIVEN) { #if defined(MFEM_USE_SUPERLU) || defined(MFEM_USE_STRUMPACK) || defined(MFEM_USE_MUMPS) @@ -360,10 +362,24 @@ void IoData::CheckConfiguration() #endif { // Default true only driven simulations using AMS. - solver.linear.mat_shifted = 1; + solver.linear.pc_mat_shifted = 1; } } } + if (solver.linear.mg_smooth_aux < 0) + { + if (problem.type == config::ProblemData::Type::ELECTROSTATIC || + problem.type == config::ProblemData::Type::MAGNETOSTATIC) + { + // Disable auxiliary space smoothing using distributive relaxation by default for + // problems which don't need it. + solver.linear.mg_smooth_aux = 0; + } + else + { + solver.linear.mg_smooth_aux = 1; + } + } } namespace diff --git a/palace/utils/prettyprint.hpp b/palace/utils/prettyprint.hpp index 21335cabb..7c547cd59 100644 --- a/palace/utils/prettyprint.hpp +++ b/palace/utils/prettyprint.hpp @@ -5,6 +5,7 @@ #define PALACE_UTILS_PRETTY_PRINT_HPP #include +#include #include #include #include "utils/communication.hpp" @@ -22,15 +23,15 @@ namespace internal constexpr std::size_t max_width = 60; template -inline std::size_t GetSize(const mfem::Array &v) +inline std::size_t GetSize(const T &v) { - return v.Size(); + return v.size(); } template -inline std::size_t GetSize(const std::vector &v) +inline std::size_t GetSize(const mfem::Array &v) { - return v.size(); + return v.Size(); } inline std::size_t PrePrint(MPI_Comm comm, std::size_t w, std::size_t wv, std::size_t lead) @@ -57,10 +58,13 @@ inline std::size_t PrePrint(MPI_Comm comm, std::size_t w, std::size_t wv, std::s // Fixed column width wrapped printing with range notation for the contents of a marker // array. -template -inline void PrettyPrintMarker(const T &data, const std::string &prefix = "", +template