From 77a2be903a6d9a33ae671bf9f069b36217415995 Mon Sep 17 00:00:00 2001 From: Sebastian Grimberg Date: Tue, 27 Jun 2023 11:26:38 -0700 Subject: [PATCH] Update MFEM commit and patches --- palace/deps/CMakeLists.txt | 6 +- .../patch/mfem/patch_bilinearform_marker.diff | 720 - .../deps/patch/mfem/patch_direct_solvers.diff | 4600 +- .../deps/patch/mfem/patch_hypre_blocks.diff | 41 - palace/deps/patch/mfem/patch_mesh_part.diff | 70 +- palace/deps/patch/mfem/patch_pa_libceed.diff | 28397 ----------- palace/deps/patch/mfem/patch_pa_prereq.diff | 41184 ---------------- palace/deps/patch/mfem/patch_submesh.diff | 197 +- 8 files changed, 1017 insertions(+), 74198 deletions(-) delete mode 100644 palace/deps/patch/mfem/patch_bilinearform_marker.diff delete mode 100644 palace/deps/patch/mfem/patch_hypre_blocks.diff delete mode 100644 palace/deps/patch/mfem/patch_pa_libceed.diff delete mode 100644 palace/deps/patch/mfem/patch_pa_prereq.diff diff --git a/palace/deps/CMakeLists.txt b/palace/deps/CMakeLists.txt index 90c0f4aa6..a9eece82a 100644 --- a/palace/deps/CMakeLists.txt +++ b/palace/deps/CMakeLists.txt @@ -19,7 +19,7 @@ set(PALACE_INTERNAL_EIGEN_URL "URL for internal Eigen build" ) set(PALACE_INTERNAL_MFEM_GIT_TAG - "0f5d34b2b490819789d6c85546e17724ff37f021" CACHE STRING # master @ 05/13/2023 + "6470d3a7b2edf868aace2b9454d95d124ff98173" CACHE STRING # master @ 06/26/2023 "Git tag for internal MFEM build" ) @@ -108,14 +108,10 @@ if(PALACE_WITH_INTERNAL_MFEM) # A number of patches to MFEM for our uses set(PALACE_MFEM_PATCH_FILES - # "${CMAKE_CURRENT_SOURCE_DIR}/patch/mfem/patch_bilinearform_marker.diff" "${CMAKE_CURRENT_SOURCE_DIR}/patch/mfem/patch_mesh_part.diff" "${CMAKE_CURRENT_SOURCE_DIR}/patch/mfem/patch_mesh_vis.diff" "${CMAKE_CURRENT_SOURCE_DIR}/patch/mfem/patch_submesh.diff" - "${CMAKE_CURRENT_SOURCE_DIR}/patch/mfem/patch_hypre_blocks.diff" "${CMAKE_CURRENT_SOURCE_DIR}/patch/mfem/patch_direct_solvers.diff" - "${CMAKE_CURRENT_SOURCE_DIR}/patch/mfem/patch_pa_prereq.diff" - "${CMAKE_CURRENT_SOURCE_DIR}/patch/mfem/patch_pa_libceed.diff" ) include(FetchContent) diff --git a/palace/deps/patch/mfem/patch_bilinearform_marker.diff b/palace/deps/patch/mfem/patch_bilinearform_marker.diff deleted file mode 100644 index 7263fe75d..000000000 --- a/palace/deps/patch/mfem/patch_bilinearform_marker.diff +++ /dev/null @@ -1,720 +0,0 @@ -diff --git a/fem/bilinearform.cpp b/fem/bilinearform.cpp -index fad9717aa..f2fdd5c60 100644 ---- a/fem/bilinearform.cpp -+++ b/fem/bilinearform.cpp -@@ -100,6 +100,7 @@ BilinearForm::BilinearForm (FiniteElementSpace * f, BilinearForm * bf, int ps) - - // Copy the pointers to the integrators - domain_integs = bf->domain_integs; -+ domain_integs_marker = bf->domain_integs_marker; - - boundary_integs = bf->boundary_integs; - boundary_integs_marker = bf->boundary_integs_marker; -@@ -425,7 +426,7 @@ void BilinearForm::Assemble(int skip_zeros) - - for (int i = 0; i < fes -> GetNE(); i++) - { -- int elem_attr = fes->GetMesh()->GetAttribute(i); -+ int elem_attr = mesh->GetAttribute(i); - doftrans = fes->GetElementVDofs(i, vdofs); - if (element_matrices) - { -@@ -436,8 +437,8 @@ void BilinearForm::Assemble(int skip_zeros) - elmat.SetSize(0); - for (int k = 0; k < domain_integs.Size(); k++) - { -- if ( domain_integs_marker[k] == NULL || -- (*(domain_integs_marker[k]))[elem_attr-1] == 1) -+ if (domain_integs_marker[k] == NULL || -+ (*(domain_integs_marker[k]))[elem_attr-1] == 1) - { - const FiniteElement &fe = *fes->GetFE(i); - eltrans = fes->GetElementTransformation(i); -@@ -1176,11 +1177,14 @@ MixedBilinearForm::MixedBilinearForm (FiniteElementSpace *tr_fes, - - // Copy the pointers to the integrators - domain_integs = mbf->domain_integs; -+ domain_integs_marker = mbf->domain_integs_marker; -+ - boundary_integs = mbf->boundary_integs; -+ boundary_integs_marker = mbf->boundary_integs_marker; -+ - trace_face_integs = mbf->trace_face_integs; -- boundary_trace_face_integs = mbf->boundary_trace_face_integs; - -- boundary_integs_marker = mbf->boundary_integs_marker; -+ boundary_trace_face_integs = mbf->boundary_trace_face_integs; - boundary_trace_face_integs_marker = mbf->boundary_trace_face_integs_marker; - - assembly = AssemblyLevel::LEGACY; -@@ -1303,6 +1307,14 @@ void MixedBilinearForm::GetBlocks(Array2D &blocks) const - void MixedBilinearForm::AddDomainIntegrator (BilinearFormIntegrator * bfi) - { - domain_integs.Append (bfi); -+ domain_integs_marker.Append(NULL); // NULL marker means apply everywhere -+} -+ -+void MixedBilinearForm::AddDomainIntegrator (BilinearFormIntegrator * bfi, -+ Array &elem_marker) -+{ -+ domain_integs.Append (bfi); -+ domain_integs_marker.Append(&elem_marker); - } - - void MixedBilinearForm::AddBoundaryIntegrator (BilinearFormIntegrator * bfi) -@@ -1337,7 +1349,7 @@ void MixedBilinearForm::AddBdrTraceFaceIntegrator(BilinearFormIntegrator *bfi, - boundary_trace_face_integs_marker.Append(&bdr_marker); - } - --void MixedBilinearForm::Assemble (int skip_zeros) -+void MixedBilinearForm::Assemble(int skip_zeros) - { - if (ext) - { -@@ -1359,8 +1371,20 @@ void MixedBilinearForm::Assemble (int skip_zeros) - - if (domain_integs.Size()) - { -+ for (int k = 0; k < domain_integs.Size(); k++) -+ { -+ if (domain_integs_marker[k] != NULL) -+ { -+ MFEM_VERIFY(domain_integs_marker[k]->Size() == -+ (mesh->attributes.Size() ? mesh->attributes.Max() : 0), -+ "invalid element marker for domain integrator #" -+ << k << ", counting from zero"); -+ } -+ } -+ - for (int i = 0; i < test_fes -> GetNE(); i++) - { -+ int elem_attr = mesh->GetAttribute(i); - dom_dof_trans = trial_fes -> GetElementVDofs (i, trial_vdofs); - ran_dof_trans = test_fes -> GetElementVDofs (i, test_vdofs); - eltrans = test_fes -> GetElementTransformation (i); -@@ -1369,10 +1393,14 @@ void MixedBilinearForm::Assemble (int skip_zeros) - elmat = 0.0; - for (int k = 0; k < domain_integs.Size(); k++) - { -- domain_integs[k] -> AssembleElementMatrix2 (*trial_fes -> GetFE(i), -- *test_fes -> GetFE(i), -- *eltrans, elemmat); -- elmat += elemmat; -+ if (domain_integs_marker[k] == NULL || -+ (*(domain_integs_marker[k]))[elem_attr-1] == 1) -+ { -+ domain_integs[k] -> AssembleElementMatrix2 (*trial_fes -> GetFE(i), -+ *test_fes -> GetFE(i), -+ *eltrans, elemmat); -+ elmat += elemmat; -+ } - } - if (ran_dof_trans || dom_dof_trans) - { -@@ -1895,41 +1923,56 @@ void DiscreteLinearOperator::Assemble(int skip_zeros) - return; - } - -- Array dom_vdofs, ran_vdofs; -- ElementTransformation *T; -+ ElementTransformation *eltrans; - DofTransformation * dom_dof_trans; - DofTransformation * ran_dof_trans; -- const FiniteElement *dom_fe, *ran_fe; -- DenseMatrix totelmat, elmat; -+ DenseMatrix elmat; -+ -+ Mesh *mesh = test_fes->GetMesh(); - - if (mat == NULL) - { - mat = new SparseMatrix(height, width); - } - -- if (domain_integs.Size() > 0) -+ if (domain_integs.Size()) - { -+ for (int k = 0; k < domain_integs.Size(); k++) -+ { -+ if (domain_integs_marker[k] != NULL) -+ { -+ MFEM_VERIFY(domain_integs_marker[k]->Size() == -+ (mesh->attributes.Size() ? mesh->attributes.Max() : 0), -+ "invalid element marker for domain integrator #" -+ << k << ", counting from zero"); -+ } -+ } -+ - for (int i = 0; i < test_fes->GetNE(); i++) - { -- dom_dof_trans = trial_fes->GetElementVDofs(i, dom_vdofs); -- ran_dof_trans = test_fes->GetElementVDofs(i, ran_vdofs); -- T = test_fes->GetElementTransformation(i); -- dom_fe = trial_fes->GetFE(i); -- ran_fe = test_fes->GetFE(i); -- -- domain_integs[0]->AssembleElementMatrix2(*dom_fe, *ran_fe, *T, -- totelmat); -- for (int j = 1; j < domain_integs.Size(); j++) -+ int elem_attr = mesh->GetAttribute(i); -+ dom_dof_trans = trial_fes->GetElementVDofs(i, trial_vdofs); -+ ran_dof_trans = test_fes->GetElementVDofs(i, test_vdofs); -+ eltrans = test_fes->GetElementTransformation(i); -+ -+ elmat.SetSize(test_vdofs.Size(), trial_vdofs.Size()); -+ elmat = 0.0; -+ for (int k = 0; k < domain_integs.Size(); k++) - { -- domain_integs[j]->AssembleElementMatrix2(*dom_fe, *ran_fe, *T, -- elmat); -- totelmat += elmat; -+ if (domain_integs_marker[k] == NULL || -+ (*(domain_integs_marker[k]))[elem_attr-1] == 1) -+ { -+ domain_integs[k]->AssembleElementMatrix2(*trial_fes->GetFE(i), -+ *test_fes->GetFE(i), -+ *eltrans, elemmat); -+ elmat += elemmat; -+ } - } - if (ran_dof_trans || dom_dof_trans) - { -- TransformPrimal(ran_dof_trans, dom_dof_trans, totelmat); -+ TransformPrimal(ran_dof_trans, dom_dof_trans, elemmat); - } -- mat->SetSubMatrix(ran_vdofs, dom_vdofs, totelmat, skip_zeros); -+ mat->SetSubMatrix(test_vdofs, trial_vdofs, elemmat, skip_zeros); - } - } - -@@ -1938,21 +1981,20 @@ void DiscreteLinearOperator::Assemble(int skip_zeros) - const int nfaces = test_fes->GetMesh()->GetNumFaces(); - for (int i = 0; i < nfaces; i++) - { -- trial_fes->GetFaceVDofs(i, dom_vdofs); -- test_fes->GetFaceVDofs(i, ran_vdofs); -- T = test_fes->GetMesh()->GetFaceTransformation(i); -- dom_fe = trial_fes->GetFaceElement(i); -- ran_fe = test_fes->GetFaceElement(i); -- -- trace_face_integs[0]->AssembleElementMatrix2(*dom_fe, *ran_fe, *T, -- totelmat); -- for (int j = 1; j < trace_face_integs.Size(); j++) -+ trial_fes->GetFaceVDofs(i, trial_vdofs); -+ test_fes->GetFaceVDofs(i, test_vdofs); -+ eltrans = test_fes->GetMesh()->GetFaceTransformation(i); -+ -+ elmat.SetSize(test_vdofs.Size(), trial_vdofs.Size()); -+ elmat = 0.0; -+ for (int k = 0; k < trace_face_integs.Size(); k++) - { -- trace_face_integs[j]->AssembleElementMatrix2(*dom_fe, *ran_fe, *T, -- elmat); -- totelmat += elmat; -+ trace_face_integs[k]->AssembleElementMatrix2(*trial_fes->GetFaceElement(i), -+ *test_fes->GetFaceElement(i), -+ *eltrans, elemmat); -+ elmat += elemmat; - } -- mat->SetSubMatrix(ran_vdofs, dom_vdofs, totelmat, skip_zeros); -+ mat->SetSubMatrix(test_vdofs, trial_vdofs, elmat, skip_zeros); - } - } - } -diff --git a/fem/bilinearform.hpp b/fem/bilinearform.hpp -index b23df9280..876bc1b17 100644 ---- a/fem/bilinearform.hpp -+++ b/fem/bilinearform.hpp -@@ -100,7 +100,7 @@ protected: - /// Includes all by default. - /// 0 - ignore attribute - /// 1 - include attribute -- Array*> domain_integs_marker; -+ Array*> domain_integs_marker; ///< Entries are not owned. - - /// Set of Boundary Integrators to be applied. - Array boundary_integs; -@@ -716,10 +716,13 @@ protected: - - /// Domain integrators. - Array domain_integs; -+ /// Entries are not owned. -+ Array*> domain_integs_marker; - - /// Boundary integrators. - Array boundary_integs; -- Array*> boundary_integs_marker; ///< Entries are not owned. -+ /// Entries are not owned. -+ Array*> boundary_integs_marker; - - /// Trace face (skeleton) integrators. - Array trace_face_integs; -@@ -799,12 +802,16 @@ public: - /// Adds a domain integrator. Assumes ownership of @a bfi. - void AddDomainIntegrator(BilinearFormIntegrator *bfi); - -+ /// Adds a domain integrator. Assumes ownership of @a bfi. -+ void AddDomainIntegrator(BilinearFormIntegrator *bfi, -+ Array &elem_marker); -+ - /// Adds a boundary integrator. Assumes ownership of @a bfi. - void AddBoundaryIntegrator(BilinearFormIntegrator *bfi); - - /// Adds a boundary integrator. Assumes ownership of @a bfi. -- void AddBoundaryIntegrator (BilinearFormIntegrator * bfi, -- Array &bdr_marker); -+ void AddBoundaryIntegrator(BilinearFormIntegrator * bfi, -+ Array &bdr_marker); - - /** @brief Add a trace face integrator. Assumes ownership of @a bfi. - -@@ -814,14 +821,18 @@ public: - void AddTraceFaceIntegrator(BilinearFormIntegrator *bfi); - - /// Adds a boundary trace face integrator. Assumes ownership of @a bfi. -- void AddBdrTraceFaceIntegrator (BilinearFormIntegrator * bfi); -+ void AddBdrTraceFaceIntegrator(BilinearFormIntegrator * bfi); - - /// Adds a boundary trace face integrator. Assumes ownership of @a bfi. -- void AddBdrTraceFaceIntegrator (BilinearFormIntegrator * bfi, -- Array &bdr_marker); -+ void AddBdrTraceFaceIntegrator(BilinearFormIntegrator * bfi, -+ Array &bdr_marker); - - /// Access all integrators added with AddDomainIntegrator(). - Array *GetDBFI() { return &domain_integs; } -+ /** @brief Access all domain markers added with AddDomainIntegrator(). -+ If no marker was specified when the integrator was added, the -+ corresponding pointer (to Array) will be NULL. */ -+ Array*> *GetDBFI_Marker() { return &domain_integs_marker; } - - /// Access all integrators added with AddBoundaryIntegrator(). - Array *GetBBFI() { return &boundary_integs; } -@@ -1059,6 +1070,9 @@ public: - /// Adds a domain interpolator. Assumes ownership of @a di. - void AddDomainInterpolator(DiscreteInterpolator *di) - { AddDomainIntegrator(di); } -+ void AddDomainInterpolator(DiscreteInterpolator *di, -+ Array &elem_marker) -+ { AddDomainIntegrator(di, elem_marker); } - - /// Adds a trace face interpolator. Assumes ownership of @a di. - void AddTraceFaceInterpolator(DiscreteInterpolator *di) -@@ -1066,6 +1080,7 @@ public: - - /// Access all interpolators added with AddDomainInterpolator(). - Array *GetDI() { return &domain_integs; } -+ Array*> *GetDI_Marker() { return &domain_integs_marker; } - - /// Set the desired assembly level. The default is AssemblyLevel::FULL. - /** This method must be called before assembly. */ -diff --git a/fem/nonlinearform.cpp b/fem/nonlinearform.cpp -index a01e83ebe..88271e234 100644 ---- a/fem/nonlinearform.cpp -+++ b/fem/nonlinearform.cpp -@@ -97,12 +97,37 @@ double NonlinearForm::GetGridFunctionEnergy(const Vector &x) const - const FiniteElement *fe; - ElementTransformation *T; - DofTransformation *doftrans; -+ Mesh *mesh = fes->GetMesh(); - double energy = 0.0; - - if (dnfi.Size()) - { -+ // Which attributes need to be processed? -+ Array attr_marker(mesh->attributes.Size() ? -+ mesh->attributes.Max() : 0); -+ attr_marker = 0; -+ for (int k = 0; k < dnfi.Size(); k++) -+ { -+ if (dnfi_marker[k] == NULL) -+ { -+ attr_marker = 1; -+ break; -+ } -+ Array &marker = *dnfi_marker[k]; -+ MFEM_ASSERT(marker.Size() == attr_marker.Size(), -+ "invalid marker for domain integrator #" -+ << k << ", counting from zero"); -+ for (int i = 0; i < attr_marker.Size(); i++) -+ { -+ attr_marker[i] |= marker[i]; -+ } -+ } -+ - for (int i = 0; i < fes->GetNE(); i++) - { -+ const int attr = mesh->GetAttribute(i); -+ if (attr_marker[attr-1] == 0) { continue; } -+ - fe = fes->GetFE(i); - doftrans = fes->GetElementVDofs(i, vdofs); - T = fes->GetElementTransformation(i); -@@ -110,6 +135,9 @@ double NonlinearForm::GetGridFunctionEnergy(const Vector &x) const - if (doftrans) {doftrans->InvTransformPrimal(el_x); } - for (int k = 0; k < dnfi.Size(); k++) - { -+ if (dnfi_marker[k] && -+ (*dnfi_marker[k])[attr-1] == 0) { continue; } -+ - energy += dnfi[k]->GetElementEnergy(*fe, *T, el_x); - } - } -@@ -175,8 +203,32 @@ void NonlinearForm::Mult(const Vector &x, Vector &y) const - - if (dnfi.Size()) - { -+ // Which attributes need to be processed? -+ Array attr_marker(mesh->attributes.Size() ? -+ mesh->attributes.Max() : 0); -+ attr_marker = 0; -+ for (int k = 0; k < dnfi.Size(); k++) -+ { -+ if (dnfi_marker[k] == NULL) -+ { -+ attr_marker = 1; -+ break; -+ } -+ Array &marker = *dnfi_marker[k]; -+ MFEM_ASSERT(marker.Size() == attr_marker.Size(), -+ "invalid marker for domain integrator #" -+ << k << ", counting from zero"); -+ for (int i = 0; i < attr_marker.Size(); i++) -+ { -+ attr_marker[i] |= marker[i]; -+ } -+ } -+ - for (int i = 0; i < fes->GetNE(); i++) - { -+ const int attr = mesh->GetAttribute(i); -+ if (attr_marker[attr-1] == 0) { continue; } -+ - fe = fes->GetFE(i); - doftrans = fes->GetElementVDofs(i, vdofs); - T = fes->GetElementTransformation(i); -@@ -184,6 +236,9 @@ void NonlinearForm::Mult(const Vector &x, Vector &y) const - if (doftrans) {doftrans->InvTransformPrimal(el_x); } - for (int k = 0; k < dnfi.Size(); k++) - { -+ if (dnfi_marker[k] && -+ (*dnfi_marker[k])[attr-1] == 0) { continue; } -+ - dnfi[k]->AssembleElementVector(*fe, *T, el_x, el_y); - if (doftrans) {doftrans->TransformDual(el_y); } - py.AddElementVector(vdofs, el_y); -@@ -322,8 +377,32 @@ Operator &NonlinearForm::GetGradient(const Vector &x) const - - if (dnfi.Size()) - { -+ // Which attributes need to be processed? -+ Array attr_marker(mesh->attributes.Size() ? -+ mesh->attributes.Max() : 0); -+ attr_marker = 0; -+ for (int k = 0; k < dnfi.Size(); k++) -+ { -+ if (dnfi_marker[k] == NULL) -+ { -+ attr_marker = 1; -+ break; -+ } -+ Array &marker = *dnfi_marker[k]; -+ MFEM_ASSERT(marker.Size() == attr_marker.Size(), -+ "invalid marker for domain integrator #" -+ << k << ", counting from zero"); -+ for (int i = 0; i < attr_marker.Size(); i++) -+ { -+ attr_marker[i] |= marker[i]; -+ } -+ } -+ - for (int i = 0; i < fes->GetNE(); i++) - { -+ const int attr = mesh->GetAttribute(i); -+ if (attr_marker[attr-1] == 0) { continue; } -+ - fe = fes->GetFE(i); - doftrans = fes->GetElementVDofs(i, vdofs); - T = fes->GetElementTransformation(i); -@@ -331,6 +410,9 @@ Operator &NonlinearForm::GetGradient(const Vector &x) const - if (doftrans) {doftrans->InvTransformPrimal(el_x); } - for (int k = 0; k < dnfi.Size(); k++) - { -+ if (dnfi_marker[k] && -+ (*dnfi_marker[k])[attr-1] == 0) { continue; } -+ - dnfi[k]->AssembleElementGrad(*fe, *T, el_x, elmat); - if (doftrans) { doftrans->TransformDual(elmat); } - Grad->AddSubMatrix(vdofs, vdofs, elmat, skip_zeros); -@@ -561,13 +643,6 @@ BlockNonlinearForm::BlockNonlinearForm(Array &f) : - SetSpaces(f); - } - --void BlockNonlinearForm::AddBdrFaceIntegrator(BlockNonlinearFormIntegrator *nfi, -- Array &bdr_attr_marker) --{ -- bfnfi.Append(nfi); -- bfnfi_marker.Append(&bdr_attr_marker); --} -- - void BlockNonlinearForm::SetEssentialBC( - const Array *> &bdr_attr_is_ess, Array &rhs) - { -@@ -592,6 +667,7 @@ double BlockNonlinearForm::GetEnergyBlocked(const BlockVector &bx) const - Array fe(fes.Size()); - ElementTransformation *T; - DofTransformation *doftrans; -+ Mesh *mesh = fes[0]->GetMesh(); - double energy = 0.0; - - for (int i=0; i attr_marker(mesh->attributes.Size() ? -+ mesh->attributes.Max() : 0); -+ attr_marker = 0; -+ for (int k = 0; k < dnfi.Size(); k++) -+ { -+ if (dnfi_marker[k] == NULL) -+ { -+ attr_marker = 1; -+ break; -+ } -+ Array &marker = *dnfi_marker[k]; -+ MFEM_ASSERT(marker.Size() == attr_marker.Size(), -+ "invalid marker for domain integrator #" -+ << k << ", counting from zero"); -+ for (int i = 0; i < attr_marker.Size(); i++) -+ { -+ attr_marker[i] |= marker[i]; -+ } -+ } -+ - for (int i = 0; i < fes[0]->GetNE(); ++i) - { -+ const int attr = mesh->GetAttribute(i); -+ if (attr_marker[attr-1] == 0) { continue; } -+ - T = fes[0]->GetElementTransformation(i); - for (int s=0; sGetElementEnergy(fe, *T, el_x_const); - } - } -+ } - - // free the allocated memory - for (int i = 0; i < fes.Size(); ++i) -@@ -656,6 +761,7 @@ void BlockNonlinearForm::MultBlocked(const BlockVector &bx, - Array fe2(fes.Size()); - ElementTransformation *T; - Array doftrans(fes.Size()); doftrans = nullptr; -+ Mesh *mesh = fes[0]->GetMesh(); - - by.UseDevice(true); - by = 0.0; -@@ -670,8 +776,32 @@ void BlockNonlinearForm::MultBlocked(const BlockVector &bx, - - if (dnfi.Size()) - { -+ // Which attributes need to be processed? -+ Array attr_marker(mesh->attributes.Size() ? -+ mesh->attributes.Max() : 0); -+ attr_marker = 0; -+ for (int k = 0; k < dnfi.Size(); k++) -+ { -+ if (dnfi_marker[k] == NULL) -+ { -+ attr_marker = 1; -+ break; -+ } -+ Array &marker = *dnfi_marker[k]; -+ MFEM_ASSERT(marker.Size() == attr_marker.Size(), -+ "invalid marker for domain integrator #" -+ << k << ", counting from zero"); -+ for (int i = 0; i < attr_marker.Size(); i++) -+ { -+ attr_marker[i] |= marker[i]; -+ } -+ } -+ - for (int i = 0; i < fes[0]->GetNE(); ++i) - { -+ const int attr = mesh->GetAttribute(i); -+ if (attr_marker[attr-1] == 0) { continue; } -+ - T = fes[0]->GetElementTransformation(i); - for (int s = 0; s < fes.Size(); ++s) - { -@@ -683,6 +813,9 @@ void BlockNonlinearForm::MultBlocked(const BlockVector &bx, - - for (int k = 0; k < dnfi.Size(); ++k) - { -+ if (dnfi_marker[k] && -+ (*dnfi_marker[k])[attr-1] == 0) { continue; } -+ - dnfi[k]->AssembleElementVector(fe, *T, - el_x_const, el_y); - -@@ -698,7 +831,6 @@ void BlockNonlinearForm::MultBlocked(const BlockVector &bx, - - if (fnfi.Size()) - { -- Mesh *mesh = fes[0]->GetMesh(); - FaceElementTransformations *tr; - - for (int i = 0; i < mesh->GetNumFaces(); ++i) -@@ -736,8 +868,8 @@ void BlockNonlinearForm::MultBlocked(const BlockVector &bx, - - if (bfnfi.Size()) - { -- Mesh *mesh = fes[0]->GetMesh(); - FaceElementTransformations *tr; -+ - // Which boundary attributes need to be processed? - Array bdr_attr_marker(mesh->bdr_attributes.Size() ? - mesh->bdr_attributes.Max() : 0); -@@ -858,6 +990,7 @@ void BlockNonlinearForm::ComputeGradientBlocked(const BlockVector &bx) const - Arrayfe2(fes.Size()); - ElementTransformation * T; - Array doftrans(fes.Size()); doftrans = nullptr; -+ Mesh *mesh = fes[0]->GetMesh(); - - for (int i=0; i attr_marker(mesh->attributes.Size() ? -+ mesh->attributes.Max() : 0); -+ attr_marker = 0; -+ for (int k = 0; k < dnfi.Size(); k++) -+ { -+ if (dnfi_marker[k] == NULL) -+ { -+ attr_marker = 1; -+ break; -+ } -+ Array &marker = *dnfi_marker[k]; -+ MFEM_ASSERT(marker.Size() == attr_marker.Size(), -+ "invalid marker for domain integrator #" -+ << k << ", counting from zero"); -+ for (int i = 0; i < attr_marker.Size(); i++) -+ { -+ attr_marker[i] |= marker[i]; -+ } -+ } -+ - for (int i = 0; i < fes[0]->GetNE(); ++i) - { -+ const int attr = mesh->GetAttribute(i); -+ if (attr_marker[attr-1] == 0) { continue; } -+ - T = fes[0]->GetElementTransformation(i); - for (int s = 0; s < fes.Size(); ++s) - { -@@ -901,6 +1058,9 @@ void BlockNonlinearForm::ComputeGradientBlocked(const BlockVector &bx) const - - for (int k = 0; k < dnfi.Size(); ++k) - { -+ if (dnfi_marker[k] && -+ (*dnfi_marker[k])[attr-1] == 0) { continue; } -+ - dnfi[k]->AssembleElementGrad(fe, *T, el_x_const, elmats); - - for (int j=0; jGetMesh(); - - for (int i = 0; i < mesh->GetNumFaces(); ++i) - { -@@ -960,7 +1119,6 @@ void BlockNonlinearForm::ComputeGradientBlocked(const BlockVector &bx) const - if (bfnfi.Size()) - { - FaceElementTransformations *tr; -- Mesh *mesh = fes[0]->GetMesh(); - - // Which boundary attributes need to be processed? - Array bdr_attr_marker(mesh->bdr_attributes.Size() ? -diff --git a/fem/nonlinearform.hpp b/fem/nonlinearform.hpp -index d15d09e04..77da539f7 100644 ---- a/fem/nonlinearform.hpp -+++ b/fem/nonlinearform.hpp -@@ -37,6 +37,7 @@ protected: - - /// Set of Domain Integrators to be assembled (added). - Array dnfi; // owned -+ Array*> dnfi_marker; // not owned - - /// Set of interior face Integrators to be assembled (added). - Array fnfi; // owned -@@ -108,7 +109,12 @@ public: - - /// Adds new Domain Integrator. - void AddDomainIntegrator(NonlinearFormIntegrator *nlfi) -- { dnfi.Append(nlfi); } -+ { dnfi.Append(nlfi); dnfi_marker.Append(NULL); } -+ -+ /// Adds new Domain Integrator, restricted to specific attributes. -+ void AddDomainIntegrator(NonlinearFormIntegrator *nlfi, -+ Array &elem_marker) -+ { dnfi.Append(nlfi); dnfi_marker.Append(&elem_marker); } - - /// Access all integrators added with AddDomainIntegrator(). - Array *GetDNFI() { return &dnfi; } -@@ -227,13 +233,14 @@ protected: - - /// Set of Domain Integrators to be assembled (added). - Array dnfi; -+ Array*> dnfi_marker; - - /// Set of interior face Integrators to be assembled (added). - Array fnfi; - - /// Set of Boundary Face Integrators to be assembled (added). - Array bfnfi; -- Array*> bfnfi_marker; -+ Array*> bfnfi_marker; - - /** Auxiliary block-vectors for wrapping input and output vectors or holding - GridFunction-like block-vector data (e.g. in parallel). */ -@@ -298,7 +305,12 @@ public: - - /// Adds new Domain Integrator. - void AddDomainIntegrator(BlockNonlinearFormIntegrator *nlfi) -- { dnfi.Append(nlfi); } -+ { dnfi.Append(nlfi); dnfi_marker.Append(NULL); } -+ -+ /// Adds new Domain Integrator, restricted to specific attributes. -+ void AddDomainIntegrator(BlockNonlinearFormIntegrator *nlfi, -+ Array &elem_marker) -+ { dnfi.Append(nlfi); dnfi_marker.Append(&elem_marker); } - - /// Adds new Interior Face Integrator. - void AddInteriorFaceIntegrator(BlockNonlinearFormIntegrator *nlfi) -@@ -311,7 +323,8 @@ public: - /** @brief Adds new Boundary Face Integrator, restricted to specific boundary - attributes. */ - void AddBdrFaceIntegrator(BlockNonlinearFormIntegrator *nlfi, -- Array &bdr_marker); -+ Array &bdr_marker) -+ { bfnfi.Append(nlfi); bfnfi_marker.Append(&bdr_marker); } - - virtual void SetEssentialBC(const Array *>&bdr_attr_is_ess, - Array &rhs); diff --git a/palace/deps/patch/mfem/patch_direct_solvers.diff b/palace/deps/patch/mfem/patch_direct_solvers.diff index 98c0b7c53..cbe9eba3e 100644 --- a/palace/deps/patch/mfem/patch_direct_solvers.diff +++ b/palace/deps/patch/mfem/patch_direct_solvers.diff @@ -1,24 +1,3 @@ -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 32112b549..e9e6ae0b7 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -82,7 +82,7 @@ if (MFEM_USE_CONDUIT OR - # * find_package(PETSc REQUIRED) - set(XSDK_ENABLE_C ON) - endif() --if (MFEM_USE_STRUMPACK) -+if (MFEM_USE_STRUMPACK OR MFEM_USE_MUMPS) - # Just needed to find the MPI_Fortran libraries to link with - set(XSDK_ENABLE_Fortran ON) - endif() -@@ -333,6 +333,7 @@ endif() - if (MFEM_USE_MUMPS) - if (MFEM_USE_MPI) - find_package(MUMPS REQUIRED mumps_common pord) -+ set(MFEM_MUMPS_VERSION ${MUMPS_VERSION}) - else() - message(FATAL_ERROR " *** MUMPS requires that MPI be enabled.") - endif() diff --git a/INSTALL b/INSTALL index cb092cc1b..9a7deaa43 100644 --- a/INSTALL @@ -33,332 +12,11 @@ index cb092cc1b..9a7deaa43 100644 URL: http://portal.nersc.gov/project/sparse/strumpack Options: STRUMPACK_OPT, STRUMPACK_LIB. Versions: STRUMPACK >= 3.0.0. -diff --git a/config/cmake/config.hpp.in b/config/cmake/config.hpp.in -index 7e820088a..adde511fb 100644 ---- a/config/cmake/config.hpp.in -+++ b/config/cmake/config.hpp.in -@@ -80,96 +80,101 @@ - // Internal MFEM option: enable group/batch allocation for some small objects. - #cmakedefine MFEM_USE_MEMALLOC - -+// Which library functions to use in class StopWatch for measuring time. -+// For a list of the available options, see INSTALL. -+// If not defined, an option is selected automatically. -+#cmakedefine MFEM_TIMER_TYPE @MFEM_TIMER_TYPE@ -+ -+// Enable MFEM functionality based on the SUNDIALS libraries. -+#cmakedefine MFEM_USE_SUNDIALS -+ - // Enable MFEM functionality based on the SuiteSparse library. - #cmakedefine MFEM_USE_SUITESPARSE - - // Enable MFEM functionality based on the SuperLU_DIST library. - #cmakedefine MFEM_USE_SUPERLU -+#cmakedefine MFEM_USE_SUPERLU5 - - // Enable MFEM functionality based on the MUMPS library. - #cmakedefine MFEM_USE_MUMPS -+#cmakedefine MFEM_MUMPS_VERSION @MFEM_MUMPS_VERSION@ - - // Enable MFEM functionality based on the STRUMPACK library. - #cmakedefine MFEM_USE_STRUMPACK - --// Enable functionality based on the Ginkgo library -+// Enable functionality based on the Ginkgo library. - #cmakedefine MFEM_USE_GINKGO - --// Enable MFEM functionality based on the AmgX library -+// Enable MFEM functionality based on the AmgX library. - #cmakedefine MFEM_USE_AMGX - --// Enable MFEM functionality based on the GnuTLS library -+// Enable secure socket streams based on the GNUTLS library. - #cmakedefine MFEM_USE_GNUTLS - --// Enable MFEM functionality based on the GSLIB library --#cmakedefine MFEM_USE_GSLIB -- --// Enable MFEM functionality based on the NetCDF library --#cmakedefine MFEM_USE_NETCDF -- --// Enable MFEM functionality based on the PETSc library --#cmakedefine MFEM_USE_PETSC -- --// Enable MFEM functionality based on the SLEPc library --#cmakedefine MFEM_USE_SLEPC -- --// Enable MFEM functionality based on the Sidre library -+// Enable Sidre support. - #cmakedefine MFEM_USE_SIDRE - --// Enable the use of SIMD in the high performance templated classes -+// Enable the use of SIMD in the high performance templated classes. - #cmakedefine MFEM_USE_SIMD - --// Enable MFEM functionality based on the FMS library -+// Enable FMS support. - #cmakedefine MFEM_USE_FMS - --// Enable MFEM functionality based on Conduit -+// Enable Conduit support. - #cmakedefine MFEM_USE_CONDUIT - --// Enable MFEM functionality based on the PUMI library -+// Enable functionality based on the NetCDF library (reading CUBIT files). -+#cmakedefine MFEM_USE_NETCDF -+ -+// Enable functionality based on the PETSc library. -+#cmakedefine MFEM_USE_PETSC -+ -+// Enable functionality based on the SLEPc library. -+#cmakedefine MFEM_USE_SLEPC -+ -+// Enable functionality based on the MPFR library. -+#cmakedefine MFEM_USE_MPFR -+ -+// Enable MFEM functionality based on the PUMI library. - #cmakedefine MFEM_USE_PUMI - --// Enable MFEM functionality based on the Moonolith library -+// Enable Moonolith-based general interpolation between finite element spaces. - #cmakedefine MFEM_USE_MOONOLITH - --// Enable MFEM functionality based on the HiOp library -+// Enable MFEM functionality based on the HIOP library. - #cmakedefine MFEM_USE_HIOP - --// Build the GPU/CUDA-enabled version of the MFEM library. -+// Enable MFEM functionality based on the GSLIB library. -+#cmakedefine MFEM_USE_GSLIB -+ -+// Build the NVIDIA GPU/CUDA-enabled version of the MFEM library. - // Requires a CUDA compiler (nvcc). - #cmakedefine MFEM_USE_CUDA - --// Build the HIP-enabled version of the MFEM library. -+// Build the AMD GPU/HIP-enabled version of the MFEM library. - // Requires a HIP compiler (hipcc). - #cmakedefine MFEM_USE_HIP - --// Enable MFEM functionality based on the RAJA library -+// Enable functionality based on the RAJA library. - #cmakedefine MFEM_USE_RAJA - --// Enable MFEM functionality based on the OCCA library -+// Enable functionality based on the OCCA library. - #cmakedefine MFEM_USE_OCCA - --// Enable MFEM functionality based on the libCEED library -+// Enable functionality based on the libCEED library. - #cmakedefine MFEM_USE_CEED - --// Enable MFEM functionality based on the Umpire library --#cmakedefine MFEM_USE_UMPIRE -- --// Enable MFEM functionality based on the ADIOS2 library --#cmakedefine MFEM_USE_ADIOS2 -- --// Enable MFEM functionality based on the Caliper library -+// Enable functionality based on the Caliper library. - #cmakedefine MFEM_USE_CALIPER - --// Enable MFEM functionality based on the Algoim library -+// Enable functionality based on the Algoim library. - #cmakedefine MFEM_USE_ALGOIM - --// Which library functions to use in class StopWatch for measuring time. --// For a list of the available options, see INSTALL. --// If not defined, an option is selected automatically. --#define MFEM_TIMER_TYPE @MFEM_TIMER_TYPE@ -+// Enable functionality based on the Umpire library. -+#cmakedefine MFEM_USE_UMPIRE - --// Enable MFEM functionality based on the SUNDIALS libraries. --#cmakedefine MFEM_USE_SUNDIALS -+// Enable IO functionality based on the ADIOS2 library. -+#cmakedefine MFEM_USE_ADIOS2 - - // Version of HYPRE used for building MFEM. - #cmakedefine MFEM_HYPRE_VERSION @MFEM_HYPRE_VERSION@ -@@ -181,13 +186,13 @@ - // Enable interface to the MKL CPardiso library. - #cmakedefine MFEM_USE_MKL_CPARDISO - --// Use forward mode for automatic differentiation -+// Use forward mode for automatic differentiation. - #cmakedefine MFEM_USE_ADFORWARD - --// Enable the use of the CoDiPack library for AD -+// Enable the use of the CoDiPack library for AD. - #cmakedefine MFEM_USE_CODIPACK - --// Enable MFEM functionality based on the Google Benchmark library. -+// Enable functionality based on the Google Benchmark library. - #cmakedefine MFEM_USE_BENCHMARK - - // Enable Enzyme for AD -diff --git a/config/cmake/modules/FindMUMPS.cmake b/config/cmake/modules/FindMUMPS.cmake -index acdfd55a6..2b034d216 100644 ---- a/config/cmake/modules/FindMUMPS.cmake -+++ b/config/cmake/modules/FindMUMPS.cmake -@@ -11,8 +11,9 @@ - - # Sets the following variables: - # - MUMPS_FOUND --# - MUMPS_INCLUDE_DIRS - # - MUMPS_LIBRARIES -+# - MUMPS_INCLUDE_DIRS -+# - MUMPS_VERSION - - include(MfemCmakeUtilities) - mfem_find_package(MUMPS MUMPS MUMPS_DIR -@@ -21,3 +22,18 @@ mfem_find_package(MUMPS MUMPS MUMPS_DIR - "Libraries required by MUMPS." - ADD_COMPONENT mumps_common "include" dmumps_c.h "lib" mumps_common - ADD_COMPONENT pord "include" dmumps_c.h "lib" pord) -+ -+if (MUMPS_FOUND AND (NOT MUMPS_VERSION)) -+ try_run(MUMPS_VERSION_RUN_RESULT MUMPS_VERSION_COMPILE_RESULT -+ ${CMAKE_CURRENT_BINARY_DIR}/config -+ ${CMAKE_CURRENT_SOURCE_DIR}/config/get_mumps_version.cpp -+ CMAKE_FLAGS -DINCLUDE_DIRECTORIES:STRING=${MUMPS_INCLUDE_DIRS} -+ RUN_OUTPUT_VARIABLE MUMPS_VERSION_OUTPUT) -+ if ((MUMPS_VERSION_RUN_RESULT EQUAL 0) AND MUMPS_VERSION_OUTPUT) -+ string(STRIP "${MUMPS_VERSION_OUTPUT}" MUMPS_VERSION) -+ set(MUMPS_VERSION ${MUMPS_VERSION} CACHE STRING "MUMPS version." FORCE) -+ message(STATUS "Found MUMPS version ${MUMPS_VERSION}") -+ else() -+ message(FATAL_ERROR "Unable to determine MUMPS version.") -+ endif() -+endif() -diff --git a/config/config.hpp.in b/config/config.hpp.in -index 76145927b..7a737720e 100644 ---- a/config/config.hpp.in -+++ b/config/config.hpp.in -@@ -30,10 +30,10 @@ - #define MFEM_VERSION_MINOR (((MFEM_VERSION)/100)%100) - #define MFEM_VERSION_PATCH ((MFEM_VERSION)%100) - --// The absolute path of the MFEM source prefix -+// The absolute path of the MFEM source prefix. - // #define MFEM_SOURCE_DIR "@MFEM_SOURCE_DIR@" - --// The absolute path of the MFEM installation prefix -+// The absolute path of the MFEM installation prefix. - // #define MFEM_INSTALL_DIR "@MFEM_INSTALL_DIR@" - - // Description of the git commit used to build MFEM. -@@ -91,7 +91,7 @@ - // Enable MFEM functionality based on the SuiteSparse library. - // #define MFEM_USE_SUITESPARSE - --// Enable MFEM functionality based on the SuperLU library. -+// Enable MFEM functionality based on the SuperLU_DIST library. - // #define MFEM_USE_SUPERLU - // #define MFEM_USE_SUPERLU5 - -@@ -102,40 +102,40 @@ - // Enable MFEM functionality based on the STRUMPACK library. - // #define MFEM_USE_STRUMPACK - --// Enable MFEM features based on the Ginkgo library -+// Enable MFEM features based on the Ginkgo library. - // #define MFEM_USE_GINKGO - - // Enable MFEM functionality based on the AmgX library. - // #define MFEM_USE_AMGX - --// Enable secure socket streams based on the GNUTLS library -+// Enable secure socket streams based on the GNUTLS library. - // #define MFEM_USE_GNUTLS - --// Enable Sidre support -+// Enable Sidre support. - // #define MFEM_USE_SIDRE - --// Enable the use of SIMD in the high performance templated classes -+// Enable the use of SIMD in the high performance templated classes. - // #define MFEM_USE_SIMD - --// Enable FMS support -+// Enable FMS support. - // #define MFEM_USE_FMS - --// Enable Conduit support -+// Enable Conduit support. - // #define MFEM_USE_CONDUIT - --// Enable functionality based on the NetCDF library (reading CUBIT files) -+// Enable functionality based on the NetCDF library (reading CUBIT files). - // #define MFEM_USE_NETCDF - --// Enable functionality based on the PETSc library -+// Enable functionality based on the PETSc library. - // #define MFEM_USE_PETSC - --// Enable functionality based on the SLEPc library -+// Enable functionality based on the SLEPc library. - // #define MFEM_USE_SLEPC - - // Enable functionality based on the MPFR library. - // #define MFEM_USE_MPFR - --// Enable MFEM functionality based on the PUMI library -+// Enable MFEM functionality based on the PUMI library. - // #define MFEM_USE_PUMI - - // Enable Moonolith-based general interpolation between finite element spaces. -@@ -144,7 +144,7 @@ - // Enable MFEM functionality based on the HIOP library. - // #define MFEM_USE_HIOP - --// Enable MFEM functionality based on the GSLIB library -+// Enable MFEM functionality based on the GSLIB library. - // #define MFEM_USE_GSLIB - - // Build the NVIDIA GPU/CUDA-enabled version of the MFEM library. -@@ -186,10 +186,10 @@ - // Enable interface to the MKL CPardiso library. - // #define MFEM_USE_MKL_CPARDISO - --// Use forward mode for automatic differentiation -+// Use forward mode for automatic differentiation. - // #define MFEM_USE_ADFORWARD - --// Enable the use of the CoDiPack library for AD -+// Enable the use of the CoDiPack library for AD. - // #define MFEM_USE_CODIPACK - - // Enable functionality based on the Google Benchmark library. diff --git a/config/defaults.cmake b/config/defaults.cmake -index d5104092b..4386ce53a 100644 +index 390026414..4386ce53a 100644 --- a/config/defaults.cmake +++ b/config/defaults.cmake -@@ -134,16 +134,18 @@ set(ParMETIS_DIR "${MFEM_DIR}/../parmetis-4.0.3" CACHE PATH - set(ParMETIS_REQUIRED_PACKAGES "METIS" CACHE STRING - "Additional packages required by ParMETIS.") - --set(SuperLUDist_DIR "${MFEM_DIR}/../SuperLU_DIST_6.3.1" CACHE PATH -+set(SuperLUDist_DIR "${MFEM_DIR}/../SuperLU_DIST_8.1.2" CACHE PATH - "Path to the SuperLU_DIST library.") - # SuperLU_DIST may also depend on "OpenMP", depending on how it was compiled. --set(SuperLUDist_REQUIRED_PACKAGES "MPI" "BLAS" "ParMETIS" CACHE STRING -+set(SuperLUDist_REQUIRED_PACKAGES "MPI" "ParMETIS" "METIS" -+ "LAPACK" "BLAS" CACHE STRING - "Additional packages required by SuperLU_DIST.") - --set(MUMPS_DIR "${MFEM_DIR}/../MUMPS_5.2.0" CACHE PATH -+set(MUMPS_DIR "${MFEM_DIR}/../MUMPS_5.5.0" CACHE PATH - "Path to the MUMPS library.") --# Packages required by MUMPS, depending on how it was compiled. --set(MUMPS_REQUIRED_PACKAGES "MPI" "BLAS" "METIS" "ScaLAPACK" CACHE STRING -+# MUMPS may also depend on "OpenMP", depending on how it was compiled. -+set(MUMPS_REQUIRED_PACKAGES "MPI" "MPI_Fortran" "ParMETIS" "METIS" -+ "ScaLAPACK" "LAPACK" "BLAS" CACHE STRING - "Additional packages required by MUMPS.") - # If the MPI package does not find all required Fortran libraries: - # set(MUMPS_REQUIRED_LIBRARIES "gfortran" "mpi_mpifh" CACHE STRING -@@ -154,7 +156,8 @@ set(STRUMPACK_DIR "${MFEM_DIR}/../STRUMPACK-build" CACHE PATH +@@ -156,7 +156,8 @@ set(STRUMPACK_DIR "${MFEM_DIR}/../STRUMPACK-build" CACHE PATH # STRUMPACK may also depend on "OpenMP", depending on how it was compiled. # Starting with v2.2.0 of STRUMPACK, ParMETIS and Scotch are optional. set(STRUMPACK_REQUIRED_PACKAGES "MPI" "MPI_Fortran" "ParMETIS" "METIS" @@ -368,52 +26,6 @@ index d5104092b..4386ce53a 100644 "Additional packages required by STRUMPACK.") # If the MPI package does not find all required Fortran libraries: # set(STRUMPACK_REQUIRED_LIBRARIES "gfortran" "mpi_mpifh" CACHE STRING -diff --git a/config/defaults.mk b/config/defaults.mk -index ca5dc3c45..e149ae452 100644 ---- a/config/defaults.mk -+++ b/config/defaults.mk -@@ -284,10 +284,10 @@ ifeq ($(MFEM_USE_SUPERLU5),YES) - SUPERLU_LIB = $(XLINKER)-rpath,$(SUPERLU_DIR)/lib -L$(SUPERLU_DIR)/lib\ - -lsuperlu_dist_5.1.0 - else -- SUPERLU_DIR = @MFEM_DIR@/../SuperLU_DIST_6.3.1 -+ SUPERLU_DIR = @MFEM_DIR@/../SuperLU_DIST_8.1.2 - SUPERLU_OPT = -I$(SUPERLU_DIR)/include - SUPERLU_LIB = $(XLINKER)-rpath,$(SUPERLU_DIR)/lib64 -L$(SUPERLU_DIR)/lib64\ -- -lsuperlu_dist -lblas -+ -lsuperlu_dist $(LAPACK_LIB) - endif - - # SCOTCH library configuration (required by STRUMPACK <= v2.1.0, optional in -@@ -311,7 +311,7 @@ MPI_FORTRAN_LIB = -lmpifort - # MPI_FORTRAN_LIB += -lgfortran - - # MUMPS library configuration --MUMPS_DIR = @MFEM_DIR@/../MUMPS_5.2.0 -+MUMPS_DIR = @MFEM_DIR@/../MUMPS_5.5.0 - MUMPS_OPT = -I$(MUMPS_DIR)/include - MUMPS_LIB = $(XLINKER)-rpath,$(MUMPS_DIR)/lib -L$(MUMPS_DIR)/lib -ldmumps\ - -lmumps_common -lpord $(SCALAPACK_LIB) $(LAPACK_LIB) $(MPI_FORTRAN_LIB) -diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt -index afa98324c..7d9c835c9 100644 ---- a/examples/CMakeLists.txt -+++ b/examples/CMakeLists.txt -@@ -161,6 +161,15 @@ if (MFEM_ENABLE_TESTING) - $ "-no-vis" "--superlu" - ${MPIEXEC_POSTFLAGS}) - endif() -+ -+ # If MUMPS is enabled, add a test run that uses it. -+ if (MFEM_USE_MUMPS) -+ add_test(NAME ex25p_mumps_np=${MFEM_MPI_NP} -+ COMMAND ${MPIEXEC} ${MPIEXEC_NUMPROC_FLAG} ${MFEM_MPI_NP} -+ ${MPIEXEC_PREFLAGS} -+ $ "-no-vis" "--mumps-solver" -+ ${MPIEXEC_POSTFLAGS}) -+ endif() - endif() - - # Include the examples/amgx directory if AmgX is enabled diff --git a/examples/ex11p.cpp b/examples/ex11p.cpp index 216a6f443..eca3ce929 100644 --- a/examples/ex11p.cpp @@ -435,7 +47,7 @@ index 216a6f443..eca3ce929 100644 strumpack->SetFromCommandLine(); precond = strumpack; diff --git a/examples/ex25p.cpp b/examples/ex25p.cpp -index 160145719..2e36471bd 100644 +index e3848b848..cf5daf412 100644 --- a/examples/ex25p.cpp +++ b/examples/ex25p.cpp @@ -170,6 +170,7 @@ int main(int argc, char *argv[]) @@ -476,7 +88,7 @@ index 160145719..2e36471bd 100644 } if (iprob > 4) { iprob = 4; } -@@ -474,15 +481,33 @@ int main(int argc, char *argv[]) +@@ -474,6 +481,24 @@ int main(int argc, char *argv[]) delete A; } #endif @@ -501,17 +113,6 @@ index 160145719..2e36471bd 100644 #ifdef MFEM_USE_MUMPS if (!pa && mumps_solver) { - HypreParMatrix *A = Ah.As()->GetSystemMatrix(); -- MUMPSSolver mumps; -+ MUMPSSolver mumps(A->GetComm()); - mumps.SetPrintLevel(0); - mumps.SetMatrixSymType(MUMPSSolver::MatType::UNSYMMETRIC); - mumps.SetOperator(*A); -- mumps.Mult(B,X); -+ mumps.Mult(B, X); - delete A; - } - #endif @@ -493,7 +518,7 @@ int main(int argc, char *argv[]) // // In PML: 1/mu (abs(1/det(J) J^T J) Curl E, Curl F) @@ -541,3291 +142,957 @@ index 51238c4d7..e6f4730fe 100644 strumpack->SetOperator(*Arow); strumpack->SetFromCommandLine(); precond = strumpack; -diff --git a/examples/superlu/ex1p.cpp b/examples/superlu/ex1p.cpp -index 2bd220b07..a00f00af8 100644 ---- a/examples/superlu/ex1p.cpp -+++ b/examples/superlu/ex1p.cpp -@@ -67,6 +67,7 @@ int main(int argc, char *argv[]) - int slu_colperm = 4; - int slu_rowperm = 1; - int slu_iterref = 2; -+ int slu_npdep = 1; - - OptionsParser args(argc, argv); - args.AddOption(&mesh_file, "-m", "--mesh", -@@ -85,9 +86,11 @@ int main(int argc, char *argv[]) - "6-ZOLTAN"); - args.AddOption(&slu_rowperm, "-rp", "--rowperm", - "SuperLU Row Permutation Method: 0-NOROWPERM, 1-LargeDiag"); -- args.AddOption(&slu_iterref, "-rp", "--rowperm", -+ args.AddOption(&slu_iterref, "-ir", "--iterref", - "SuperLU Iterative Refinement: 0-NOREFINE, 1-Single, " - "2-Double, 3-Extra"); -+ args.AddOption(&slu_npdep, "-npdep", "--npdepth", -+ "Depth of 3D parition for SuperLU (>= 7.2.0)"); - - args.Parse(); - if (!args.Good()) -@@ -214,7 +217,7 @@ int main(int argc, char *argv[]) - a.FormLinearSystem(ess_tdof_list, x, b, A, X, B); - - // 13. Solve the linear system A X = B utilizing SuperLU. -- SuperLUSolver *superlu = new SuperLUSolver(MPI_COMM_WORLD); -+ SuperLUSolver *superlu = new SuperLUSolver(MPI_COMM_WORLD, slu_npdep); - Operator *SLU_A = new SuperLURowLocMatrix(*A.As()); - superlu->SetPrintStatistics(true); - superlu->SetSymmetricPattern(false); -@@ -281,10 +284,9 @@ int main(int argc, char *argv[]) - superlu->SetOperator(*SLU_A); - superlu->SetPrintStatistics(true); - superlu->Mult(B, X); -- superlu->DismantleGrid(); - -- delete SLU_A; - delete superlu; -+ delete SLU_A; - - // 14. Recover the parallel grid function corresponding to X. This is the - // local finite element solution on each processor. -diff --git a/linalg/mumps.cpp b/linalg/mumps.cpp -index fb6c7c15a..6efb98e3e 100644 ---- a/linalg/mumps.cpp -+++ b/linalg/mumps.cpp -@@ -16,58 +16,123 @@ - - #include "mumps.hpp" - --#ifdef HYPRE_BIGINT --#error "MUMPSSolver requires HYPRE_Int == int, for now." -+#include -+ -+#if MFEM_MUMPS_VERSION >= 530 -+#ifdef MUMPS_INTSIZE64 -+#error "Full 64-bit MUMPS is not yet supported" +diff --git a/general/communication.hpp b/general/communication.hpp +index 474486f1b..be8145689 100644 +--- a/general/communication.hpp ++++ b/general/communication.hpp +@@ -76,6 +76,14 @@ private: + static void Init_(int *argc, char ***argv) + { + MFEM_VERIFY(!IsInitialized(), "MPI already initialized!") ++#if defined(MFEM_USE_STRUMPACK) ++#if defined(STRUMPACK_USE_PTSCOTCH) || defined(STRUMPACK_USE_SLATE_SCALAPACK) ++ if (Root()) ++ { ++ MFEM_WARNING("STRUMPACK built with SLATE or PT-Scotch may require MPI_Init_thread with MPI_THREAD_MULTIPLE!"); ++ } +#endif -+#else -+#ifdef INTSIZE64 -+#error "Full 64-bit MUMPS is not yet supported" +#endif - #endif + MPI_Init(argc, argv); + // The "mpi" object below needs to be created after MPI_Init() for some + // MPI implementations +diff --git a/linalg/strumpack.cpp b/linalg/strumpack.cpp +index f0ff11ab4..5b54994aa 100644 +--- a/linalg/strumpack.cpp ++++ b/linalg/strumpack.cpp +@@ -16,238 +16,470 @@ --// macro s.t. indices match MUMPS documentation -+// Macro s.t. indices match MUMPS documentation - #define MUMPS_ICNTL(I) icntl[(I) -1] -+#define MUMPS_CNTL(I) cntl[(I) -1] - #define MUMPS_INFO(I) info[(I) -1] -+#define MUMPS_INFOG(I) infog[(I) -1] + #include "strumpack.hpp" +-using namespace std; +-using namespace strumpack; +- namespace mfem { --void MUMPSSolver::SetOperator(const Operator &op) -+MUMPSSolver::MUMPSSolver(MPI_Comm comm_) + STRUMPACKRowLocMatrix::STRUMPACKRowLocMatrix(MPI_Comm comm, +- int num_loc_rows, int first_loc_row, +- int glob_nrows, int glob_ncols, +- int *I, int *J, double *data) +- : comm_(comm), A_(NULL) ++ int num_loc_rows, ++ HYPRE_BigInt first_loc_row, ++ HYPRE_BigInt glob_nrows, ++ HYPRE_BigInt glob_ncols, ++ int *I, HYPRE_BigInt *J, ++ double *data, bool sym_sparse) { -- auto APtr = dynamic_cast(&op); -- -- MFEM_VERIFY(APtr, "Not compatible matrix type"); -+ Init(comm_); -+} - -- height = op.Height(); -- width = op.Width(); -+MUMPSSolver::MUMPSSolver(const Operator &op) -+{ -+ auto APtr = dynamic_cast(&op); -+ MFEM_VERIFY(APtr, "Not a compatible matrix type"); -+ Init(APtr->GetComm()); -+ SetOperator(op); -+} - -- comm = APtr->GetComm(); -+void MUMPSSolver::Init(MPI_Comm comm_) -+{ -+ id = nullptr; -+ comm = comm_; - MPI_Comm_size(comm, &numProcs); - MPI_Comm_rank(comm, &myid); + // Set mfem::Operator member data + height = num_loc_rows; + width = num_loc_rows; -- auto parcsr_op = (hypre_ParCSRMatrix *) const_cast(*APtr); -+ mat_type = MatType::UNSYMMETRIC; -+ print_level = 0; -+ reorder_method = ReorderingStrategy::AUTOMATIC; -+ reorder_reuse = false; -+ blr_tol = 0.0; -+ -+#if MFEM_MUMPS_VERSION >= 530 -+ irhs_loc = nullptr; -+ rhs_loc = nullptr; -+ isol_loc = nullptr; -+ sol_loc = nullptr; -+#else -+ recv_counts = nullptr; -+ displs = nullptr; -+ rhs_glob = nullptr; -+#endif -+} +- // Allocate STRUMPACK's CSRMatrixMPI +- int nprocs, rank; +- MPI_Comm_rank(comm_, &rank); +- MPI_Comm_size(comm_, &nprocs); +- int * dist = new int[nprocs + 1]; +- dist[rank + 1] = first_loc_row + num_loc_rows; ++ // Allocate STRUMPACK's CSRMatrixMPI (copies all inputs) ++ int rank, nprocs; ++ MPI_Comm_rank(comm, &rank); ++ MPI_Comm_size(comm, &nprocs); ++ Array dist(nprocs + 1); + dist[0] = 0; +- MPI_Allgather(MPI_IN_PLACE, 0, MPI_INT, dist + 1, 1, MPI_INT, comm_); +- A_ = new CSRMatrixMPI(num_loc_rows, I, J, data, dist, comm_, false); +- delete[] dist; ++ dist[rank + 1] = first_loc_row + (HYPRE_BigInt)num_loc_rows; ++ MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, ++ dist.GetData() + 1, 1, HYPRE_MPI_BIG_INT, comm); + -+MUMPSSolver::~MUMPSSolver() -+{ -+#if MFEM_MUMPS_VERSION >= 530 -+ delete [] irhs_loc; -+ delete [] rhs_loc; -+ delete [] isol_loc; -+ delete [] sol_loc; ++#if !(defined(HYPRE_BIGINT) || defined(HYPRE_MIXEDINT)) ++ A_ = new strumpack::CSRMatrixMPI( ++ (HYPRE_BigInt)num_loc_rows, I, J, data, dist.GetData(), ++ comm, sym_sparse); +#else -+ delete [] recv_counts; -+ delete [] displs; -+ delete [] rhs_glob; ++ Array II(num_loc_rows+1); ++ for (int i = 0; i <= num_loc_rows; i++) { II[i] = (HYPRE_BigInt)I[i]; } ++ A_ = new strumpack::CSRMatrixMPI( ++ (HYPRE_BigInt)num_loc_rows, II.GetData(), J, data, dist.GetData(), ++ comm, sym_sparse); +#endif -+ if (id) -+ { -+ id->job = -2; -+ dmumps_c(id); -+ delete id; -+ } -+} -+ -+void MUMPSSolver::SetOperator(const Operator &op) -+{ -+ auto APtr = dynamic_cast(&op); + } + +-STRUMPACKRowLocMatrix::STRUMPACKRowLocMatrix(const HypreParMatrix & hypParMat) +- : comm_(hypParMat.GetComm()), +- A_(NULL) ++STRUMPACKRowLocMatrix::STRUMPACKRowLocMatrix(const Operator &op, ++ bool sym_sparse) + { +- // First cast the parameter to a hypre_ParCSRMatrix +- hypre_ParCSRMatrix * parcsr_op = +- (hypre_ParCSRMatrix *)const_cast(hypParMat); ++ const HypreParMatrix *APtr = dynamic_cast(&op); + MFEM_VERIFY(APtr, "Not a compatible matrix type"); -+ ++ MPI_Comm comm = APtr->GetComm(); + +- MFEM_ASSERT(parcsr_op != NULL,"STRUMPACK: const_cast failed in SetOperator"); ++ // Set mfem::Operator member data + height = op.Height(); -+ width = op.Width(); ++ width = op.Width(); -+ auto parcsr_op = (hypre_ParCSRMatrix *)const_cast(*APtr); - APtr->HostRead(); - hypre_CSRMatrix *csr_op = hypre_MergeDiagAndOffd(parcsr_op); - APtr->HypreRead(); +- // Create the CSRMatrixMPI A_ by borrowing the internal data from a +- // hypre_CSRMatrix. +- hypParMat.HostRead(); +- hypre_CSRMatrix * csr_op = hypre_MergeDiagAndOffd(parcsr_op); +- hypParMat.HypreRead(); +- hypre_CSRMatrixSetDataOwner(csr_op,0); ++ // First cast the parameter to a hypre_ParCSRMatrix ++ hypre_ParCSRMatrix *parcsr_op = ++ (hypre_ParCSRMatrix *)const_cast(*APtr); ++ ++ // Create the CSRMatrixMPI A by taking the internal data from a ++ // hypre_CSRMatrix ++ APtr->HostRead(); ++ hypre_CSRMatrix *csr_op = hypre_MergeDiagAndOffd(parcsr_op); ++ APtr->HypreRead(); + HYPRE_Int *Iptr = csr_op->i; #if MFEM_HYPRE_VERSION >= 21600 +- // For now, this method assumes that HYPRE_Int is int. Also, csr_op->num_cols +- // is of type HYPRE_Int, so if we want to check for big indices in +- // csr_op->big_j, we'll have to check all entries and that check will only be +- // necessary in HYPRE_MIXEDINT mode which is not supported at the moment. - hypre_CSRMatrixBigJtoJ(csr_op); + HYPRE_BigInt *Jptr = csr_op->big_j; +#else + HYPRE_Int *Jptr = csr_op->j; #endif ++ double *data = csr_op->data; -- int *Iptr = csr_op->i; -- int *Jptr = csr_op->j; -- int n_loc = csr_op->num_rows; -- -- row_start = parcsr_op->first_row_index; -+ int n_loc = internal::to_int(csr_op->num_rows); -+ row_start = internal::to_int(parcsr_op->first_row_index); +- height = csr_op->num_rows; +- width = csr_op->num_rows; ++ HYPRE_BigInt fst_row = parcsr_op->first_row_index; ++ HYPRE_Int m_loc = csr_op->num_rows; -- MUMPS_INT8 nnz = 0; -+ MUMPS_INT8 nnz = 0, k = 0; - if (mat_type) - { -- // count nnz in case of symmetric mode -- int k = 0; -+ // Count nnz in case of symmetric mode - for (int i = 0; i < n_loc; i++) - { -- for (int j = Iptr[i]; j < Iptr[i + 1]; j++) -+ for (HYPRE_Int j = Iptr[i]; j < Iptr[i + 1]; j++) - { - int ii = row_start + i + 1; -- int jj = Jptr[k] + 1; -+#if MFEM_HYPRE_VERSION >= 21600 -+ HYPRE_BigInt jj = Jptr[k] + 1; +- int nprocs, rank; +- MPI_Comm_rank(comm_, &rank); +- MPI_Comm_size(comm_, &nprocs); +- int * dist = new int[nprocs + 1]; +- dist[rank + 1] = parcsr_op->first_row_index + csr_op->num_rows; ++ // Allocate STRUMPACK's CSRMatrixMPI ++ int rank, nprocs; ++ MPI_Comm_rank(comm, &rank); ++ MPI_Comm_size(comm, &nprocs); ++ Array dist(nprocs + 1); + dist[0] = 0; +- MPI_Allgather(MPI_IN_PLACE, 0, MPI_INT, dist + 1, 1, MPI_INT, comm_); +- A_ = new CSRMatrixMPI(csr_op->num_rows, csr_op->i, csr_op->j, +- csr_op->data, dist, comm_, false); +- delete[] dist; ++ dist[rank + 1] = fst_row + (HYPRE_BigInt)m_loc; ++ MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, ++ dist.GetData() + 1, 1, HYPRE_MPI_BIG_INT, comm); ++ ++#if !defined(HYPRE_MIXEDINT) ++ A_ = new strumpack::CSRMatrixMPI( ++ (HYPRE_BigInt)m_loc, Iptr, Jptr, data, dist.GetData(), ++ comm, sym_sparse); +#else -+ HYPRE_Int jj = Jptr[k] + 1; ++ Array II(m_loc+1); ++ for (int i = 0; i <= m_loc; i++) { II[i] = (HYPRE_BigInt)Iptr[i]; } ++ A_ = new strumpack::CSRMatrixMPI( ++ (HYPRE_BigInt)m_loc, II.GetData(), Jptr, data, dist.GetData(), ++ comm, sym_sparse); +#endif -+ if (ii >= jj) { nnz++; } - k++; -- if (ii>=jj) { nnz++; } - } - } - } -@@ -75,28 +140,31 @@ void MUMPSSolver::SetOperator(const Operator &op) - { - nnz = csr_op->num_nonzeros; - } -- -- int * I = new int[nnz]; -- int * J = new int[nnz]; -+ int *I = new int[nnz]; -+ int *J = new int[nnz]; - // Fill in I and J arrays for - // COO format in 1-based indexing -- int k = 0; -- double * data; -+ k = 0; -+ double *data; - if (mat_type) - { -- int l = 0; -+ MUMPS_INT8 l = 0; - data = new double[nnz]; - for (int i = 0; i < n_loc; i++) - { -- for (int j = Iptr[i]; j < Iptr[i + 1]; j++) -+ for (HYPRE_Int j = Iptr[i]; j < Iptr[i + 1]; j++) - { - int ii = row_start + i + 1; -- int jj = Jptr[k] + 1; -+#if MFEM_HYPRE_VERSION >= 21600 -+ HYPRE_BigInt jj = Jptr[k] + 1; -+#else -+ HYPRE_Int jj = Jptr[k] + 1; -+#endif - if (ii >= jj) - { - I[l] = ii; -- J[l] = jj; -+ J[l] = internal::to_int(jj); - data[l++] = csr_op->data[k]; - } - k++; -@@ -107,84 +175,122 @@ void MUMPSSolver::SetOperator(const Operator &op) - { - for (int i = 0; i < n_loc; i++) - { -- for (int j = Iptr[i]; j < Iptr[i + 1]; j++) -+ for (HYPRE_Int j = Iptr[i]; j < Iptr[i + 1]; j++) - { - I[k] = row_start + i + 1; -- J[k] = Jptr[k] + 1; -+ J[k] = internal::to_int(Jptr[k] + 1); - k++; - } - } - data = csr_op->data; - } +- // Everything has been copied or abducted so delete the structure ++ // Everything has been copied so delete the structure + hypre_CSRMatrixDestroy(csr_op); + } -- // new MUMPS object -- if (id) -+ // New MUMPS object or reuse the one from a previous matrix -+ if (!id || !reorder_reuse) - { -- id->job = -2; -- dmumps_c(id); -- delete id; -- } -- id = new DMUMPS_STRUC_C; -- // C to Fortran communicator -- id->comm_fortran = (MUMPS_INT) MPI_Comm_c2f(comm); -- -- // Host is involved in computation -- id->par = 1; -- -- id->sym = mat_type; -- -- // MUMPS init -- id->job = -1; -- dmumps_c(id); -- -- // Set MUMPS default parameters -- SetParameters(); -+ if (id) -+ { -+ id->job = -2; -+ dmumps_c(id); -+ delete id; -+ } -+ id = new DMUMPS_STRUC_C; -+ id->sym = mat_type; - -- id->n = parcsr_op->global_num_rows; -+ // C to Fortran communicator -+ id->comm_fortran = (MUMPS_INT)MPI_Comm_c2f(comm); - -- id->nnz_loc = nnz; -+ // Host is involved in computation -+ id->par = 1; - -- id->irn_loc = I; -+ // MUMPS init -+ id->job = -1; -+ dmumps_c(id); - -- id->jcn_loc = J; -+ // Set MUMPS default parameters -+ SetParameters(); - -- id->a_loc = data; -+ id->n = internal::to_int(parcsr_op->global_num_rows); -+ id->nnz_loc = nnz; -+ id->irn_loc = I; -+ id->jcn_loc = J; -+ id->a_loc = data; + STRUMPACKRowLocMatrix::~STRUMPACKRowLocMatrix() + { +- // Delete the struct +- if ( A_ != NULL ) { delete A_; } ++ delete A_; + } -- // MUMPS Analysis -- id->job = 1; -- dmumps_c(id); -+ // MUMPS analysis -+ id->job = 1; -+ dmumps_c(id); -+ } -+ else -+ { -+ id->irn_loc = I; -+ id->jcn_loc = J; -+ id->a_loc = data; -+ } +-STRUMPACKSolver::STRUMPACKSolver( int argc, char* argv[], MPI_Comm comm ) +- : comm_(comm), +- APtr_(NULL), +- solver_(NULL) ++template ++STRUMPACKSolverBase:: ++STRUMPACKSolverBase(MPI_Comm comm, int argc, char *argv[]) ++ : APtr_(NULL), ++ factor_verbose_(false), ++ solve_verbose_(false), ++ reorder_reuse_(false), ++ nrhs_(-1) + { +- this->Init(argc, argv); ++ solver_ = new STRUMPACKSolverType(comm, argc, argv, false); + } -- // MUMPS Factorization -+ // MUMPS factorization - id->job = 2; -- dmumps_c(id); -+ { -+ const int mem_relax_lim = 200; -+ while (true) -+ { -+ dmumps_c(id); -+ if (id->MUMPS_INFOG(1) < 0) -+ { -+ if (id->MUMPS_INFOG(1) == -8 || id->MUMPS_INFOG(1) == -9) -+ { -+ id->MUMPS_ICNTL(14) += 20; -+ MFEM_VERIFY(id->MUMPS_ICNTL(14) <= mem_relax_lim, -+ "Memory relaxation limit reached for MUMPS factorization"); -+ if (myid == 0 && print_level > 0) -+ { -+ mfem::out << "Re-running MUMPS factorization with memory relaxation " -+ << id->MUMPS_ICNTL(14) << '\n'; -+ } -+ } -+ else -+ { -+ MFEM_ABORT("Error during MUMPS numerical factorization"); -+ } -+ } -+ else { break; } -+ } -+ } +-STRUMPACKSolver::STRUMPACKSolver( STRUMPACKRowLocMatrix & A ) +- : comm_(A.GetComm()), +- APtr_(&A), +- solver_(NULL) ++template ++STRUMPACKSolverBase:: ++STRUMPACKSolverBase(STRUMPACKRowLocMatrix &A, int argc, char *argv[]) ++ : APtr_(&A), ++ factor_verbose_(false), ++ solve_verbose_(false), ++ reorder_reuse_(false), ++ nrhs_(-1) + { +- height = A.Height(); +- width = A.Width(); ++ solver_ = new STRUMPACKSolverType(A.GetComm(), argc, argv, false); ++ SetOperator(A); ++} - hypre_CSRMatrixDestroy(csr_op); - delete [] I; - delete [] J; - if (mat_type) { delete [] data; } +- this->Init(0, NULL); ++template ++STRUMPACKSolverBase:: ++~STRUMPACKSolverBase() ++{ ++ delete solver_; + } -+ id->nrhs = -1; // Set up solution storage on first call to Mult - #if MFEM_MUMPS_VERSION >= 530 - delete [] irhs_loc; -- irhs_loc = new int[n_loc]; -+ delete [] isol_loc; -+ id->nloc_rhs = n_loc; -+ id->lrhs_loc = n_loc; -+ id->lsol_loc = id->MUMPS_INFO(23); -+ irhs_loc = new int[id->lrhs_loc]; -+ isol_loc = new int[id->lsol_loc]; - for (int i = 0; i < n_loc; i++) - { - irhs_loc[i] = row_start + i + 1; - } -+ id->irhs_loc = irhs_loc; -+ id->isol_loc = isol_loc; -+ - row_starts.SetSize(numProcs); - MPI_Allgather(&row_start, 1, MPI_INT, row_starts, 1, MPI_INT, comm); - #else -+ id->lrhs = id->n; - if (myid == 0) - { -- delete [] rhs_glob; - delete [] recv_counts; -- rhs_glob = new double[parcsr_op->global_num_rows]; -+ delete [] displs; - recv_counts = new int[numProcs]; -+ displs = new int[numProcs]; - } - MPI_Gather(&n_loc, 1, MPI_INT, recv_counts, 1, MPI_INT, 0, comm); - if (myid == 0) - { -- delete [] displs; -- displs = new int[numProcs]; - displs[0] = 0; - int s = 0; - for (int k = 0; k < numProcs-1; k++) -@@ -196,54 +302,109 @@ void MUMPSSolver::SetOperator(const Operator &op) - #endif +-STRUMPACKSolver::~STRUMPACKSolver() ++template ++void STRUMPACKSolverBase:: ++SetFromCommandLine() + { +- if ( solver_ != NULL ) { delete solver_; } ++ solver_->options().set_from_command_line(); } --void MUMPSSolver::Mult(const Vector &x, Vector &y) const -+void MUMPSSolver::InitRhsSol(int nrhs) const +-void STRUMPACKSolver::Init( int argc, char* argv[] ) ++template ++void STRUMPACKSolverBase:: ++SetPrintFactorStatistics(bool print_stat) { -- x.HostRead(); -- y.HostReadWrite(); -+ if (id->nrhs != nrhs) -+ { - #if MFEM_MUMPS_VERSION >= 530 -+ delete [] rhs_loc; -+ delete [] sol_loc; -+ rhs_loc = (nrhs > 1) ? new double[nrhs * id->lrhs_loc] : nullptr; -+ sol_loc = new double[nrhs * id->lsol_loc]; -+ id->rhs_loc = rhs_loc; -+ id->sol_loc = sol_loc; -+#else -+ if (myid == 0) -+ { -+ delete rhs_glob; -+ rhs_glob = new double[nrhs * id->lrhs]; -+ id->rhs = rhs_glob; -+ } -+#endif -+ } -+ id->nrhs = nrhs; +- MPI_Comm_size(comm_, &numProcs_); +- MPI_Comm_rank(comm_, &myid_); ++ factor_verbose_ = print_stat; +} -- id->nloc_rhs = x.Size(); -- id->lrhs_loc = x.Size(); -- id->rhs_loc = x.GetData(); -- id->irhs_loc = irhs_loc; -+void MUMPSSolver::Mult(const Vector &x, Vector &y) const +- factor_verbose_ = false; +- solve_verbose_ = false; ++template ++void STRUMPACKSolverBase:: ++SetPrintSolveStatistics(bool print_stat) +{ -+ Array X(1); -+ Array Y(1); -+ X[0] = &x; -+ Y[0] = &y; -+ ArrayMult(X, Y); ++ solve_verbose_ = print_stat; +} -- id->lsol_loc = id->MUMPS_INFO(23); -- id->isol_loc = new int[id->MUMPS_INFO(23)]; -- id->sol_loc = new double[id->MUMPS_INFO(23)]; -+void MUMPSSolver::ArrayMult(const Array &X, -+ Array &Y) const +- solver_ = new StrumpackSparseSolverMPIDist(comm_, argc, argv, +- false); ++template ++void STRUMPACKSolverBase ++::SetRelTol(double rtol) +{ -+ MFEM_ASSERT(X.Size() == Y.Size(), -+ "Number of columns mismatch in MUMPSSolver::Mult!"); -+ InitRhsSol(X.Size()); -+#if MFEM_MUMPS_VERSION >= 530 -+ if (id->nrhs == 1) -+ { -+ MFEM_ASSERT(X.Size() == 1 && X[0], "Missing Vector in MUMPSSolver::Mult!"); -+ X[0]->HostRead(); -+ id->rhs_loc = X[0]->GetData(); -+ } -+ else -+ { -+ for (int i = 0; i < id->nrhs; i++) -+ { -+ MFEM_ASSERT(X[i], "Missing Vector in MUMPSSolver::Mult!"); -+ X[i]->HostRead(); -+ std::copy(X[i]->GetData(), X[i]->GetData() + X[i]->Size(), -+ id->rhs_loc + i * id->lrhs_loc); -+ } -+ } - - // MUMPS solve - id->job = 3; - dmumps_c(id); - -- RedistributeSol(id->isol_loc, id->sol_loc, y.GetData()); -- -- delete [] id->sol_loc; -- delete [] id->isol_loc; -+ RedistributeSol(id->isol_loc, id->sol_loc, id->lsol_loc, Y); - #else -- MPI_Gatherv(x.GetData(), x.Size(), MPI_DOUBLE, -- rhs_glob, recv_counts, -- displs, MPI_DOUBLE, 0, comm); -- -- if (myid == 0) { id->rhs = rhs_glob; } -+ for (int i = 0; i < id->nrhs; i++) -+ { -+ MFEM_ASSERT(X[i], "Missing Vector in MUMPSSolver::Mult!"); -+ X[i]->HostRead(); -+ MPI_Gatherv(X[i]->GetData(), X[i]->Size(), MPI_DOUBLE, -+ id->rhs + i * id->lrhs, recv_counts, displs, MPI_DOUBLE, 0, comm); -+ } ++ solver_->options().set_rel_tol(rtol); + } - // MUMPS solve - id->job = 3; - dmumps_c(id); +-void STRUMPACKSolver::SetFromCommandLine( ) ++template ++void STRUMPACKSolverBase ++::SetAbsTol(double atol) + { +- solver_->options().set_from_command_line( ); ++ solver_->options().set_abs_tol(atol); + } -- MPI_Scatterv(rhs_glob, recv_counts, displs, -- MPI_DOUBLE, y.GetData(), y.Size(), -- MPI_DOUBLE, 0, comm); -+ for (int i = 0; i < id->nrhs; i++) -+ { -+ MFEM_ASSERT(Y[i], "Missing Vector in MUMPSSolver::Mult!"); -+ Y[i]->HostWrite(); -+ MPI_Scatterv(id->rhs + i * id->lrhs, recv_counts, displs, MPI_DOUBLE, -+ Y[i]->GetData(), Y[i]->Size(), MPI_DOUBLE, 0, comm); -+ } - #endif +-void STRUMPACKSolver::SetPrintFactorStatistics( bool print_stat ) ++template ++void STRUMPACKSolverBase ++::SetMaxIter(int max_it) + { +- factor_verbose_ = print_stat; ++ solver_->options().set_maxit(max_it); } - void MUMPSSolver::MultTranspose(const Vector &x, Vector &y) const +-void STRUMPACKSolver::SetPrintSolveStatistics( bool print_stat ) ++template ++void STRUMPACKSolverBase ++::SetReorderingReuse(bool reuse) { -- // Set flag for Transpose Solve -+ // Set flag for transpose solve - id->MUMPS_ICNTL(9) = 0; -- Mult(x,y); -+ Mult(x, y); -+ - // Reset the flag - id->MUMPS_ICNTL(9) = 1; +- solve_verbose_ = print_stat; ++ reorder_reuse_ = reuse; +} - -+void MUMPSSolver::ArrayMultTranspose(const Array &X, -+ Array &Y) const -+{ -+ // Set flag for transpose solve -+ id->MUMPS_ICNTL(9) = 0; -+ ArrayMult(X, Y); + -+ // Reset the flag -+ id->MUMPS_ICNTL(9) = 1; ++template ++void STRUMPACKSolverBase ++::EnableGPU() ++{ ++ solver_->options().enable_gpu(); + } + +-void STRUMPACKSolver::SetKrylovSolver( strumpack::KrylovSolver method ) ++template ++void STRUMPACKSolverBase ++::DisableGPU() + { +- solver_->options().set_Krylov_solver( method ); ++ solver_->options().disable_gpu(); } - void MUMPSSolver::SetPrintLevel(int print_lvl) -@@ -256,34 +417,34 @@ void MUMPSSolver::SetMatrixSymType(MatType mtype) - mat_type = mtype; +-void STRUMPACKSolver::SetReorderingStrategy( strumpack::ReorderingStrategy +- method ) ++template ++void STRUMPACKSolverBase:: ++SetKrylovSolver(strumpack::KrylovSolver method) + { +- solver_->options().set_reordering_method( method ); ++ solver_->options().set_Krylov_solver(method); } --MUMPSSolver::~MUMPSSolver() -+void MUMPSSolver::SetReorderingStrategy(ReorderingStrategy method) +-void STRUMPACKSolver::DisableMatching( ) ++template ++void STRUMPACKSolverBase:: ++SetReorderingStrategy(strumpack::ReorderingStrategy method) { -- if (id) -- { --#if MFEM_MUMPS_VERSION >= 530 -- delete [] irhs_loc; --#else -- delete [] recv_counts; -- delete [] displs; -- delete [] rhs_glob; --#endif -- id->job = -2; -- dmumps_c(id); -- delete id; -- } -+ reorder_method = method; +-#if STRUMPACK_VERSION_MAJOR >= 3 +- solver_->options().set_matching( strumpack::MatchingJob::NONE ); ++ solver_->options().set_reordering_method(method); +} + -+void MUMPSSolver::SetReorderingReuse(bool reuse) -+{ -+ reorder_reuse = reuse; - } - -+#if MFEM_MUMPS_VERSION >= 510 -+void MUMPSSolver::SetBLRTol(double tol) ++template ++void STRUMPACKSolverBase:: ++SetMatching(strumpack::MatchingJob job) +{ -+ blr_tol = tol; ++ solver_->options().set_matching(job); +} -+#endif + - void MUMPSSolver::SetParameters() - { -- // output stream for error messages -+ // Output stream for error messages - id->MUMPS_ICNTL(1) = 6; -- // output stream for diagnosting printing local to each proc -- id->MUMPS_ICNTL(2) = 6; -- // output stream for global info -+ // Output stream for diagnosting printing local to each proc -+ id->MUMPS_ICNTL(2) = 0; -+ // Output stream for global info - id->MUMPS_ICNTL(3) = 6; - // Level of error printing - id->MUMPS_ICNTL(4) = print_level; -- //input matrix format (assembled) -+ // Input matrix format (assembled) - id->MUMPS_ICNTL(5) = 0; - // Use A or A^T - id->MUMPS_ICNTL(9) = 1; -@@ -301,7 +462,6 @@ void MUMPSSolver::SetParameters() - id->MUMPS_ICNTL(18) = 3; - // Schur complement (no Schur complement matrix returned) - id->MUMPS_ICNTL(19) = 0; -- - #if MFEM_MUMPS_VERSION >= 530 - // Distributed RHS - id->MUMPS_ICNTL(20) = 10; -@@ -317,6 +477,53 @@ void MUMPSSolver::SetParameters() - id->MUMPS_ICNTL(22) = 0; - // Max size of working memory (default = based on estimates) - id->MUMPS_ICNTL(23) = 0; -+ // Configure reordering -+ switch (reorder_method) ++template ++void STRUMPACKSolverBase:: ++SetCompression(strumpack::CompressionType type) ++{ ++#if STRUMPACK_VERSION_MAJOR >= 5 ++ solver_->options().set_compression(type); + #else +- solver_->options().set_mc64job( strumpack::MC64Job::NONE ); ++ switch (type) + { -+ case ReorderingStrategy::AUTOMATIC: -+ id->MUMPS_ICNTL(28) = 0; -+ id->MUMPS_ICNTL(7) = 7; -+ id->MUMPS_ICNTL(29) = 0; -+ break; -+ case ReorderingStrategy::AMD: -+ id->MUMPS_ICNTL(28) = 1; -+ id->MUMPS_ICNTL(7) = 0; -+ break; -+ case ReorderingStrategy::AMF: -+ id->MUMPS_ICNTL(28) = 1; -+ id->MUMPS_ICNTL(7) = 2; -+ break; -+ case ReorderingStrategy::PORD: -+ id->MUMPS_ICNTL(28) = 1; -+ id->MUMPS_ICNTL(7) = 4; -+ break; -+ case ReorderingStrategy::METIS: -+ id->MUMPS_ICNTL(28) = 1; -+ id->MUMPS_ICNTL(7) = 5; -+ break; -+ case ReorderingStrategy::PARMETIS: -+ id->MUMPS_ICNTL(28) = 2; -+ id->MUMPS_ICNTL(29) = 2; ++ case strumpack::NONE: ++ solver_->options().disable_BLR(); ++ solver_->options().disable_HSS(); + break; -+ case ReorderingStrategy::SCOTCH: -+ id->MUMPS_ICNTL(28) = 1; -+ id->MUMPS_ICNTL(7) = 3; ++ case strumpack::BLR: ++ solver_->options().enable_BLR(); + break; -+ case ReorderingStrategy::PTSCOTCH: -+ id->MUMPS_ICNTL(28) = 2; -+ id->MUMPS_ICNTL(29) = 1; ++ case strumpack::HSS: ++ solver_->options().enable_HSS(); + break; + default: -+ break; // This should be unreachable -+ } -+ // Option to activate BLR factorization -+#if MFEM_MUMPS_VERSION >= 510 -+ if (blr_tol > 0.0) -+ { -+ id->MUMPS_ICNTL(35) = 1; -+ id->MUMPS_CNTL(7) = blr_tol; ++ MFEM_ABORT("Invalid compression type for STRUMPACK version " << ++ STRUMPACK_VERSION_MAJOR << "!"); ++ break; + } -+#endif + #endif } - #if MFEM_MUMPS_VERSION >= 530 -@@ -330,24 +537,23 @@ int MUMPSSolver::GetRowRank(int i, const Array &row_starts_) const - return std::distance(row_starts_.begin(), up) - 1; +-void STRUMPACKSolver::EnableMatching( ) ++template ++void STRUMPACKSolverBase:: ++SetCompressionRelTol(double rtol) + { +-#if STRUMPACK_VERSION_MAJOR >= 3 +- solver_->options().set_matching +- ( strumpack::MatchingJob::MAX_DIAGONAL_PRODUCT_SCALING ); ++#if STRUMPACK_VERSION_MAJOR >= 5 ++ solver_->options().set_compression_rel_tol(rtol); + #else +- solver_->options().set_mc64job +- ( strumpack::MC64Job::MAX_DIAGONAL_PRODUCT_SCALING ); ++ solver_->options().BLR_options().set_rel_tol(rtol); ++ solver_->options().HSS_options().set_rel_tol(rtol); + #endif } --void MUMPSSolver::RedistributeSol(const int * row_map, -- const double * x, double * y) const -+void MUMPSSolver::RedistributeSol(const int *rmap, const double *x, -+ const int lx_loc, Array &Y) const +-#if STRUMPACK_VERSION_MAJOR >= 3 +-void STRUMPACKSolver::EnableParallelMatching( ) ++template ++void STRUMPACKSolverBase:: ++SetCompressionAbsTol(double atol) { -- int size = id->MUMPS_INFO(23); -- int * send_count = new int[numProcs](); -- for (int i = 0; i < size; i++) -+ int *send_count = new int[numProcs](); -+ for (int i = 0; i < lx_loc; i++) - { -- int j = row_map[i] - 1; -+ int j = rmap[i] - 1; - int row_rank = GetRowRank(j, row_starts); - if (myid == row_rank) { continue; } - send_count[row_rank]++; - } - -- int * recv_count = new int[numProcs]; -+ int *recv_count = new int[numProcs]; - MPI_Alltoall(send_count, 1, MPI_INT, recv_count, 1, MPI_INT, comm); - -- int * send_displ = new int [numProcs]; send_displ[0] = 0; -- int * recv_displ = new int [numProcs]; recv_displ[0] = 0; -+ int *send_displ = new int[numProcs]; send_displ[0] = 0; -+ int *recv_displ = new int[numProcs]; recv_displ[0] = 0; - int sbuff_size = send_count[numProcs-1]; - int rbuff_size = recv_count[numProcs-1]; - for (int k = 0; k < numProcs - 1; k++) -@@ -358,54 +564,59 @@ void MUMPSSolver::RedistributeSol(const int * row_map, - rbuff_size += recv_count[k]; - } - -- int * sendbuf_index = new int[sbuff_size]; -- double * sendbuf_values = new double[sbuff_size]; -- int * soffs = new int[numProcs](); -+ int *sendbuf_index = new int[sbuff_size]; -+ double *sendbuf_values = new double[sbuff_size]; -+ int *recvbuf_index = new int[rbuff_size]; -+ double *recvbuf_values = new double[rbuff_size]; -+ int *soffs = new int[numProcs](); +- solver_->options().set_matching +- ( strumpack::MatchingJob::COMBBLAS ); +-} ++#if STRUMPACK_VERSION_MAJOR >= 5 ++ solver_->options().set_compression_abs_tol(atol); ++#else ++ solver_->options().BLR_options().set_abs_tol(atol); ++ solver_->options().HSS_options().set_abs_tol(atol); + #endif ++} -- for (int i = 0; i < size; i++) -+ for (int i = 0; i < lx_loc; i++) - { -- int j = row_map[i] - 1; -+ int j = rmap[i] - 1; - int row_rank = GetRowRank(j, row_starts); -- if (myid == row_rank) -- { -- int local_index = j - row_start; -- y[local_index] = x[i]; -- } -- else -+ if (myid != row_rank) - { - int k = send_displ[row_rank] + soffs[row_rank]; - sendbuf_index[k] = j; -- sendbuf_values[k] = x[i]; - soffs[row_rank]++; - } - } - -- int * recvbuf_index = new int[rbuff_size]; -- double * recvbuf_values = new double[rbuff_size]; -- MPI_Alltoallv(sendbuf_index, -- send_count, -- send_displ, -- MPI_INT, -- recvbuf_index, -- recv_count, -- recv_displ, -- MPI_INT, -- comm); -- MPI_Alltoallv(sendbuf_values, -- send_count, -- send_displ, -- MPI_DOUBLE, -- recvbuf_values, -- recv_count, -- recv_displ, -- MPI_DOUBLE, -- comm); -- -- // Unpack recv buffer -- for (int i = 0; i < rbuff_size; i++) -+ MPI_Alltoallv(sendbuf_index, send_count, send_displ, MPI_INT, -+ recvbuf_index, recv_count, recv_displ, MPI_INT, comm); -+ -+ for (int rhs = 0; rhs < Y.Size(); rhs++) - { -- int local_index = recvbuf_index[i] - row_start; -- y[local_index] = recvbuf_values[i]; -+ MFEM_ASSERT(Y[rhs], "Missing Vector in MUMPSSolver::Mult!"); -+ Y[rhs]->HostWrite(); -+ -+ std::fill(soffs, soffs + numProcs, 0); -+ for (int i = 0; i < lx_loc; i++) -+ { -+ int j = rmap[i] - 1; -+ int row_rank = GetRowRank(j, row_starts); -+ if (myid == row_rank) -+ { -+ int local_index = j - row_start; -+ (*Y[rhs])(local_index) = x[rhs * lx_loc + i]; -+ } -+ else -+ { -+ int k = send_displ[row_rank] + soffs[row_rank]; -+ sendbuf_values[k] = x[rhs * lx_loc + i]; -+ soffs[row_rank]++; -+ } -+ } -+ -+ MPI_Alltoallv(sendbuf_values, send_count, send_displ, MPI_DOUBLE, -+ recvbuf_values, recv_count, recv_displ, MPI_DOUBLE, comm); -+ -+ // Unpack recv buffer -+ for (int i = 0; i < rbuff_size; i++) -+ { -+ int local_index = recvbuf_index[i] - row_start; -+ (*Y[rhs])(local_index) = recvbuf_values[i]; -+ } - } - - delete [] recvbuf_values; -diff --git a/linalg/mumps.hpp b/linalg/mumps.hpp -index 43604f4cc..9fef9a292 100644 ---- a/linalg/mumps.hpp -+++ b/linalg/mumps.hpp -@@ -16,12 +16,12 @@ - - #ifdef MFEM_USE_MUMPS - #ifdef MFEM_USE_MPI -+ - #include "operator.hpp" - #include "hypre.hpp" -- - #include -+ - #include "dmumps_c.h" --#include - - namespace mfem - { -@@ -31,20 +31,37 @@ namespace mfem - * - * Interface for the distributed MUMPS solver - */ --class MUMPSSolver : public mfem::Solver -+class MUMPSSolver : public Solver - { - public: - enum MatType - { - UNSYMMETRIC = 0, -- SYMMETRIC_INDEFINITE = 1, -- SYMMETRIC_POSITIVE_DEFINITE = 2 -+ SYMMETRIC_POSITIVE_DEFINITE = 1, -+ SYMMETRIC_INDEFINITE = 2 -+ }; -+ -+ enum ReorderingStrategy -+ { -+ AUTOMATIC = 0, -+ AMD, -+ AMF, -+ PORD, -+ METIS, -+ PARMETIS, -+ SCOTCH, -+ PTSCOTCH - }; - - /** -- * @brief Default Constructor -+ * @brief Constructor with MPI_Comm parameter. - */ -- MUMPSSolver() {} -+ MUMPSSolver(MPI_Comm comm_); -+ -+ /** -+ * @brief Constructor with a HypreParMatrix Operator. -+ */ -+ MUMPSSolver(const Operator &op); - - /** - * @brief Set the Operator and perform factorization -@@ -62,6 +79,7 @@ public: - * @param y Solution vector - */ - void Mult(const Vector &x, Vector &y) const; -+ void ArrayMult(const Array &X, Array &Y) const; - - /** - * @brief Transpose Solve y = Op^{-T} x. -@@ -70,13 +88,15 @@ public: - * @param y Solution vector - */ - void MultTranspose(const Vector &x, Vector &y) const; -+ void ArrayMultTranspose(const Array &X, -+ Array &Y) const; - - /** - * @brief Set the error print level for MUMPS - * - * @param print_lvl Print level - * -- * @note This method has to be called before SetOperator. -+ * @note This method has to be called before SetOperator - */ - void SetPrintLevel(int print_lvl); - -@@ -88,65 +108,109 @@ public: - * - * @param mtype Matrix type - * -- * @note This method has to be called before SetOperator. -+ * @note This method has to be called before SetOperator - */ - void SetMatrixSymType(MatType mtype); - -+ /** -+ * @brief Set the reordering strategy -+ * -+ * Supported reorderings are: AUTOMATIC, AMD, AMF, PORD, METIS, PARMETIS, -+ * SCOTCH, and PTSCOTCH -+ * -+ * @param method Reordering method -+ * -+ * @note This method has to be called before SetOperator -+ */ -+ void SetReorderingStrategy(ReorderingStrategy method); -+ -+ /** -+ * @brief Set the flag controlling reuse of the symbolic factorization -+ * for multiple operators -+ * -+ * @param reuse Flag to reuse symbolic factorization -+ * -+ * @note This method has to be called before repeated calls to SetOperator -+ */ -+ void SetReorderingReuse(bool reuse); -+ -+ /** -+ * @brief Set the tolerance for activating block low-rank (BLR) approximate -+ * factorization -+ * -+ * @param tol Tolerance -+ * -+ * @note This method has to be called before SetOperator -+ */ -+#if MFEM_MUMPS_VERSION >= 510 -+ void SetBLRTol(double tol); -+#endif -+ - // Destructor - ~MUMPSSolver(); - - private: -- - // MPI communicator - MPI_Comm comm; - - // Number of procs - int numProcs; - -- // local mpi id -+ // MPI rank - int myid; - -- // parameter controlling the matrix type -- MatType mat_type = MatType::UNSYMMETRIC; -+ // Parameter controlling the matrix type -+ MatType mat_type; -+ -+ // Parameter controlling the printing level -+ int print_level; -+ -+ // Parameter controlling the reordering strategy -+ ReorderingStrategy reorder_method; -+ -+ // Parameter controlling whether or not to reuse the symbolic factorization -+ // for multiple calls to SetOperator -+ bool reorder_reuse; - -- // parameter controlling the printing level -- int print_level = 0; -+#if MFEM_MUMPS_VERSION >= 510 -+ // Parameter controlling the Block Low-Rank (BLR) feature in MUMPS -+ double blr_tol; -+#endif - -- // local row offsets -+ // Local row offsets - int row_start; - - // MUMPS object -- DMUMPS_STRUC_C *id=nullptr; -+ DMUMPS_STRUC_C *id; -+ -+ // Method for initialization -+ void Init(MPI_Comm comm_); - - // Method for setting MUMPS internal parameters - void SetParameters(); - --#if MFEM_MUMPS_VERSION >= 530 -+ // Method for configuring storage for distributed/centralized RHS and -+ // solution -+ void InitRhsSol(int nrhs) const; - -- // row offsets array on all procs -+#if MFEM_MUMPS_VERSION >= 530 -+ // Row offests array on all procs - Array row_starts; - -- // row map -- int * irhs_loc = nullptr; -+ // Row maps and storage for distributed RHS and solution -+ int *irhs_loc, *isol_loc; -+ mutable double *rhs_loc, *sol_loc; - - // These two methods are needed to distribute the local solution - // vectors returned by MUMPS to the original MFEM parallel partition - int GetRowRank(int i, const Array &row_starts_) const; -- -- void RedistributeSol(const int * row_map, -- const double * x, -- double * y) const; -+ void RedistributeSol(const int *rmap, const double *x, const int lx_loc, -+ Array &Y) const; - #else -- -- // Arrays needed for MPI_Gather and MPI_Scatter -- int * recv_counts = nullptr; -- -- int * displs = nullptr; -- -- double * rhs_glob = nullptr; -- -+ // Arrays needed for MPI_Gatherv and MPI_Scatterv -+ int *recv_counts, *displs; -+ mutable double *rhs_glob; - #endif -- - }; // mfem::MUMPSSolver class - - } // namespace mfem -diff --git a/linalg/strumpack.cpp b/linalg/strumpack.cpp -index f0ff11ab4..5b54994aa 100644 ---- a/linalg/strumpack.cpp -+++ b/linalg/strumpack.cpp -@@ -16,238 +16,470 @@ - - #include "strumpack.hpp" - --using namespace std; --using namespace strumpack; -- - namespace mfem - { - - STRUMPACKRowLocMatrix::STRUMPACKRowLocMatrix(MPI_Comm comm, -- int num_loc_rows, int first_loc_row, -- int glob_nrows, int glob_ncols, -- int *I, int *J, double *data) -- : comm_(comm), A_(NULL) -+ int num_loc_rows, -+ HYPRE_BigInt first_loc_row, -+ HYPRE_BigInt glob_nrows, -+ HYPRE_BigInt glob_ncols, -+ int *I, HYPRE_BigInt *J, -+ double *data, bool sym_sparse) - { - // Set mfem::Operator member data - height = num_loc_rows; - width = num_loc_rows; - -- // Allocate STRUMPACK's CSRMatrixMPI -- int nprocs, rank; -- MPI_Comm_rank(comm_, &rank); -- MPI_Comm_size(comm_, &nprocs); -- int * dist = new int[nprocs + 1]; -- dist[rank + 1] = first_loc_row + num_loc_rows; -+ // Allocate STRUMPACK's CSRMatrixMPI (copies all inputs) -+ int rank, nprocs; -+ MPI_Comm_rank(comm, &rank); -+ MPI_Comm_size(comm, &nprocs); -+ Array dist(nprocs + 1); - dist[0] = 0; -- MPI_Allgather(MPI_IN_PLACE, 0, MPI_INT, dist + 1, 1, MPI_INT, comm_); -- A_ = new CSRMatrixMPI(num_loc_rows, I, J, data, dist, comm_, false); -- delete[] dist; -+ dist[rank + 1] = first_loc_row + (HYPRE_BigInt)num_loc_rows; -+ MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, -+ dist.GetData() + 1, 1, HYPRE_MPI_BIG_INT, comm); -+ -+#if !(defined(HYPRE_BIGINT) || defined(HYPRE_MIXEDINT)) -+ A_ = new strumpack::CSRMatrixMPI( -+ (HYPRE_BigInt)num_loc_rows, I, J, data, dist.GetData(), -+ comm, sym_sparse); -+#else -+ Array II(num_loc_rows+1); -+ for (int i = 0; i <= num_loc_rows; i++) { II[i] = (HYPRE_BigInt)I[i]; } -+ A_ = new strumpack::CSRMatrixMPI( -+ (HYPRE_BigInt)num_loc_rows, II.GetData(), J, data, dist.GetData(), -+ comm, sym_sparse); -+#endif - } - --STRUMPACKRowLocMatrix::STRUMPACKRowLocMatrix(const HypreParMatrix & hypParMat) -- : comm_(hypParMat.GetComm()), -- A_(NULL) -+STRUMPACKRowLocMatrix::STRUMPACKRowLocMatrix(const Operator &op, -+ bool sym_sparse) - { -- // First cast the parameter to a hypre_ParCSRMatrix -- hypre_ParCSRMatrix * parcsr_op = -- (hypre_ParCSRMatrix *)const_cast(hypParMat); -+ const HypreParMatrix *APtr = dynamic_cast(&op); -+ MFEM_VERIFY(APtr, "Not a compatible matrix type"); -+ MPI_Comm comm = APtr->GetComm(); - -- MFEM_ASSERT(parcsr_op != NULL,"STRUMPACK: const_cast failed in SetOperator"); -+ // Set mfem::Operator member data -+ height = op.Height(); -+ width = op.Width(); - -- // Create the CSRMatrixMPI A_ by borrowing the internal data from a -- // hypre_CSRMatrix. -- hypParMat.HostRead(); -- hypre_CSRMatrix * csr_op = hypre_MergeDiagAndOffd(parcsr_op); -- hypParMat.HypreRead(); -- hypre_CSRMatrixSetDataOwner(csr_op,0); -+ // First cast the parameter to a hypre_ParCSRMatrix -+ hypre_ParCSRMatrix *parcsr_op = -+ (hypre_ParCSRMatrix *)const_cast(*APtr); -+ -+ // Create the CSRMatrixMPI A by taking the internal data from a -+ // hypre_CSRMatrix -+ APtr->HostRead(); -+ hypre_CSRMatrix *csr_op = hypre_MergeDiagAndOffd(parcsr_op); -+ APtr->HypreRead(); -+ HYPRE_Int *Iptr = csr_op->i; - #if MFEM_HYPRE_VERSION >= 21600 -- // For now, this method assumes that HYPRE_Int is int. Also, csr_op->num_cols -- // is of type HYPRE_Int, so if we want to check for big indices in -- // csr_op->big_j, we'll have to check all entries and that check will only be -- // necessary in HYPRE_MIXEDINT mode which is not supported at the moment. -- hypre_CSRMatrixBigJtoJ(csr_op); -+ HYPRE_BigInt *Jptr = csr_op->big_j; -+#else -+ HYPRE_Int *Jptr = csr_op->j; - #endif -+ double *data = csr_op->data; - -- height = csr_op->num_rows; -- width = csr_op->num_rows; -+ HYPRE_BigInt fst_row = parcsr_op->first_row_index; -+ HYPRE_Int m_loc = csr_op->num_rows; - -- int nprocs, rank; -- MPI_Comm_rank(comm_, &rank); -- MPI_Comm_size(comm_, &nprocs); -- int * dist = new int[nprocs + 1]; -- dist[rank + 1] = parcsr_op->first_row_index + csr_op->num_rows; -+ // Allocate STRUMPACK's CSRMatrixMPI -+ int rank, nprocs; -+ MPI_Comm_rank(comm, &rank); -+ MPI_Comm_size(comm, &nprocs); -+ Array dist(nprocs + 1); - dist[0] = 0; -- MPI_Allgather(MPI_IN_PLACE, 0, MPI_INT, dist + 1, 1, MPI_INT, comm_); -- A_ = new CSRMatrixMPI(csr_op->num_rows, csr_op->i, csr_op->j, -- csr_op->data, dist, comm_, false); -- delete[] dist; -+ dist[rank + 1] = fst_row + (HYPRE_BigInt)m_loc; -+ MPI_Allgather(MPI_IN_PLACE, 0, MPI_DATATYPE_NULL, -+ dist.GetData() + 1, 1, HYPRE_MPI_BIG_INT, comm); -+ -+#if !defined(HYPRE_MIXEDINT) -+ A_ = new strumpack::CSRMatrixMPI( -+ (HYPRE_BigInt)m_loc, Iptr, Jptr, data, dist.GetData(), -+ comm, sym_sparse); -+#else -+ Array II(m_loc+1); -+ for (int i = 0; i <= m_loc; i++) { II[i] = (HYPRE_BigInt)Iptr[i]; } -+ A_ = new strumpack::CSRMatrixMPI( -+ (HYPRE_BigInt)m_loc, II.GetData(), Jptr, data, dist.GetData(), -+ comm, sym_sparse); -+#endif - -- // Everything has been copied or abducted so delete the structure -+ // Everything has been copied so delete the structure - hypre_CSRMatrixDestroy(csr_op); - } - - STRUMPACKRowLocMatrix::~STRUMPACKRowLocMatrix() - { -- // Delete the struct -- if ( A_ != NULL ) { delete A_; } -+ delete A_; - } - --STRUMPACKSolver::STRUMPACKSolver( int argc, char* argv[], MPI_Comm comm ) -- : comm_(comm), -- APtr_(NULL), -- solver_(NULL) -+template -+STRUMPACKSolverBase:: -+STRUMPACKSolverBase(MPI_Comm comm, int argc, char *argv[]) -+ : APtr_(NULL), -+ factor_verbose_(false), -+ solve_verbose_(false), -+ reorder_reuse_(false), -+ nrhs_(-1) - { -- this->Init(argc, argv); -+ solver_ = new STRUMPACKSolverType(comm, argc, argv, false); - } - --STRUMPACKSolver::STRUMPACKSolver( STRUMPACKRowLocMatrix & A ) -- : comm_(A.GetComm()), -- APtr_(&A), -- solver_(NULL) -+template -+STRUMPACKSolverBase:: -+STRUMPACKSolverBase(STRUMPACKRowLocMatrix &A, int argc, char *argv[]) -+ : APtr_(&A), -+ factor_verbose_(false), -+ solve_verbose_(false), -+ reorder_reuse_(false), -+ nrhs_(-1) - { -- height = A.Height(); -- width = A.Width(); -+ solver_ = new STRUMPACKSolverType(A.GetComm(), argc, argv, false); -+ SetOperator(A); -+} - -- this->Init(0, NULL); -+template -+STRUMPACKSolverBase:: -+~STRUMPACKSolverBase() -+{ -+ delete solver_; - } - --STRUMPACKSolver::~STRUMPACKSolver() -+template -+void STRUMPACKSolverBase:: -+SetFromCommandLine() - { -- if ( solver_ != NULL ) { delete solver_; } -+ solver_->options().set_from_command_line(); - } - --void STRUMPACKSolver::Init( int argc, char* argv[] ) -+template -+void STRUMPACKSolverBase:: -+SetPrintFactorStatistics(bool print_stat) - { -- MPI_Comm_size(comm_, &numProcs_); -- MPI_Comm_rank(comm_, &myid_); -+ factor_verbose_ = print_stat; -+} - -- factor_verbose_ = false; -- solve_verbose_ = false; -+template -+void STRUMPACKSolverBase:: -+SetPrintSolveStatistics(bool print_stat) -+{ -+ solve_verbose_ = print_stat; -+} - -- solver_ = new StrumpackSparseSolverMPIDist(comm_, argc, argv, -- false); -+template -+void STRUMPACKSolverBase -+::SetRelTol(double rtol) -+{ -+ solver_->options().set_rel_tol(rtol); - } - --void STRUMPACKSolver::SetFromCommandLine( ) -+template -+void STRUMPACKSolverBase -+::SetAbsTol(double atol) - { -- solver_->options().set_from_command_line( ); -+ solver_->options().set_abs_tol(atol); - } - --void STRUMPACKSolver::SetPrintFactorStatistics( bool print_stat ) -+template -+void STRUMPACKSolverBase -+::SetMaxIter(int max_it) - { -- factor_verbose_ = print_stat; -+ solver_->options().set_maxit(max_it); - } - --void STRUMPACKSolver::SetPrintSolveStatistics( bool print_stat ) -+template -+void STRUMPACKSolverBase -+::SetReorderingReuse(bool reuse) - { -- solve_verbose_ = print_stat; -+ reorder_reuse_ = reuse; -+} -+ -+template -+void STRUMPACKSolverBase -+::EnableGPU() -+{ -+ solver_->options().enable_gpu(); - } - --void STRUMPACKSolver::SetKrylovSolver( strumpack::KrylovSolver method ) -+template -+void STRUMPACKSolverBase -+::DisableGPU() - { -- solver_->options().set_Krylov_solver( method ); -+ solver_->options().disable_gpu(); - } - --void STRUMPACKSolver::SetReorderingStrategy( strumpack::ReorderingStrategy -- method ) -+template -+void STRUMPACKSolverBase:: -+SetKrylovSolver(strumpack::KrylovSolver method) - { -- solver_->options().set_reordering_method( method ); -+ solver_->options().set_Krylov_solver(method); - } - --void STRUMPACKSolver::DisableMatching( ) -+template -+void STRUMPACKSolverBase:: -+SetReorderingStrategy(strumpack::ReorderingStrategy method) - { --#if STRUMPACK_VERSION_MAJOR >= 3 -- solver_->options().set_matching( strumpack::MatchingJob::NONE ); -+ solver_->options().set_reordering_method(method); -+} -+ -+template -+void STRUMPACKSolverBase:: -+SetMatching(strumpack::MatchingJob job) -+{ -+ solver_->options().set_matching(job); -+} -+ -+template -+void STRUMPACKSolverBase:: -+SetCompression(strumpack::CompressionType type) -+{ -+#if STRUMPACK_VERSION_MAJOR >= 5 -+ solver_->options().set_compression(type); - #else -- solver_->options().set_mc64job( strumpack::MC64Job::NONE ); -+ switch (type) -+ { -+ case strumpack::NONE: -+ solver_->options().disable_BLR(); -+ solver_->options().disable_HSS(); -+ break; -+ case strumpack::BLR: -+ solver_->options().enable_BLR(); -+ break; -+ case strumpack::HSS: -+ solver_->options().enable_HSS(); -+ break; -+ default: -+ MFEM_ABORT("Invalid compression type for STRUMPACK version " << -+ STRUMPACK_VERSION_MAJOR << "!"); -+ break; -+ } - #endif - } - --void STRUMPACKSolver::EnableMatching( ) -+template -+void STRUMPACKSolverBase:: -+SetCompressionRelTol(double rtol) - { --#if STRUMPACK_VERSION_MAJOR >= 3 -- solver_->options().set_matching -- ( strumpack::MatchingJob::MAX_DIAGONAL_PRODUCT_SCALING ); -+#if STRUMPACK_VERSION_MAJOR >= 5 -+ solver_->options().set_compression_rel_tol(rtol); - #else -- solver_->options().set_mc64job -- ( strumpack::MC64Job::MAX_DIAGONAL_PRODUCT_SCALING ); -+ solver_->options().BLR_options().set_rel_tol(rtol); -+ solver_->options().HSS_options().set_rel_tol(rtol); - #endif - } - --#if STRUMPACK_VERSION_MAJOR >= 3 --void STRUMPACKSolver::EnableParallelMatching( ) -+template -+void STRUMPACKSolverBase:: -+SetCompressionAbsTol(double atol) - { -- solver_->options().set_matching -- ( strumpack::MatchingJob::COMBBLAS ); --} -+#if STRUMPACK_VERSION_MAJOR >= 5 -+ solver_->options().set_compression_abs_tol(atol); -+#else -+ solver_->options().BLR_options().set_abs_tol(atol); -+ solver_->options().HSS_options().set_abs_tol(atol); - #endif -+} - --void STRUMPACKSolver::SetRelTol( double rtol ) -+#if STRUMPACK_VERSION_MAJOR >= 5 -+template -+void STRUMPACKSolverBase:: -+SetCompressionLossyPrecision(int precision) - { -- solver_->options().set_rel_tol( rtol ); -+ solver_->options().set_lossy_precision(precision); - } - --void STRUMPACKSolver::SetAbsTol( double atol ) -+template -+void STRUMPACKSolverBase:: -+SetCompressionButterflyLevels(int levels) - { -- solver_->options().set_abs_tol( atol ); -+ solver_->options().HODLR_options().set_butterfly_levels(levels); - } -+#endif - -- --void STRUMPACKSolver::Mult( const Vector & x, Vector & y ) const -+template -+void STRUMPACKSolverBase:: -+SetOperator(const Operator &op) - { -- MFEM_ASSERT(APtr_ != NULL, -- "STRUMPACK Error: The operator must be set before" -- " the system can be solved."); -- MFEM_ASSERT(x.Size() == Width(), "invalid x.Size() = " << x.Size() -- << ", expected size = " << Width()); -- MFEM_ASSERT(y.Size() == Height(), "invalid y.Size() = " << y.Size() -- << ", expected size = " << Height()); -+ // Verify that we have a compatible operator -+ bool first_mat = !APtr_; -+ APtr_ = dynamic_cast(&op); -+ MFEM_VERIFY(APtr_, -+ "STRUMPACK: Operator is not a STRUMPACKRowLocMatrix!"); - -- double* yPtr = y.HostWrite(); -- const double* xPtr = x.HostRead(); -+ // Set mfem::Operator member data -+ height = op.Height(); -+ width = op.Width(); - -- solver_->options().set_verbose( factor_verbose_ ); -- ReturnCode ret = solver_->factor(); -- switch (ret) -+ if (first_mat || !reorder_reuse_) - { -- case ReturnCode::SUCCESS: break; -- case ReturnCode::MATRIX_NOT_SET: -- { -- MFEM_ABORT("STRUMPACK: Matrix was not set!"); -- } -- break; -- case ReturnCode::REORDERING_ERROR: -- { -- MFEM_ABORT("STRUMPACK: Matrix reordering failed!"); -- } -- break; -- default: -- { -- MFEM_ABORT("STRUMPACK: 'factor()' error code = " << ret); -- } -+ solver_->set_matrix(*(APtr_->GetA())); -+ } -+ else -+ { -+ solver_->update_matrix_values(*(APtr_->GetA())); - } -- solver_->options().set_verbose( solve_verbose_ ); -- solver_->solve(xPtr, yPtr); -+} - -+template -+void STRUMPACKSolverBase:: -+FactorInternal() const -+{ -+ MFEM_ASSERT(APtr_, -+ "STRUMPACK: Operator must be set before the system can be " -+ "solved!"); -+ solver_->options().set_verbose(factor_verbose_); -+ strumpack::ReturnCode ret = solver_->factor(); -+ if (ret != strumpack::ReturnCode::SUCCESS) -+ { -+#if STRUMPACK_VERSION_MAJOR >= 7 -+ MFEM_ABORT("STRUMPACK: Factor failed with return code " << ret << "!"); -+#else -+ MFEM_ABORT("STRUMPACK: Factor failed!"); -+#endif -+ } - } - --void STRUMPACKSolver::SetOperator( const Operator & op ) -+template -+void STRUMPACKSolverBase:: -+Mult(const Vector &x, Vector &y) const - { -- // Verify that we have a compatible operator -- APtr_ = dynamic_cast(&op); -- if ( APtr_ == NULL ) -+ MFEM_ASSERT(x.Size() == Width(), -+ "STRUMPACK: Invalid x.Size() = " << x.Size() << -+ ", expected size = " << Width() << "!"); -+ MFEM_ASSERT(y.Size() == Height(), -+ "STRUMPACK: Invalid y.Size() = " << y.Size() << -+ ", expected size = " << Height() << "!"); -+ -+ const double *xPtr = x.HostRead(); -+ double *yPtr = y.HostReadWrite(); -+ -+ FactorInternal(); -+ solver_->options().set_verbose(solve_verbose_); -+ strumpack::ReturnCode ret = solver_->solve(xPtr, yPtr, false); -+ if (ret != strumpack::ReturnCode::SUCCESS) - { -- mfem_error("STRUMPACKSolver::SetOperator : not STRUMPACKRowLocMatrix!"); -+#if STRUMPACK_VERSION_MAJOR >= 7 -+ MFEM_ABORT("STRUMPACK: Solve failed with return code " << ret << "!"); -+#else -+ MFEM_ABORT("STRUMPACK: Solve failed!"); -+#endif - } -+} - -- solver_->set_matrix( *(APtr_->getA()) ); -+template -+void STRUMPACKSolverBase:: -+ArrayMult(const Array &X, Array &Y) const -+{ -+ MFEM_ASSERT(X.Size() == Y.Size(), -+ "Number of columns mismatch in STRUMPACK solve!"); -+ if (X.Size() == 1) -+ { -+ nrhs_ = 1; -+ MFEM_ASSERT(X[0] && Y[0], "Missing Vector in STRUMPACK solve!"); -+ Mult(*X[0], *Y[0]); -+ return; -+ } - -- // Set mfem::Operator member data -- height = op.Height(); -- width = op.Width(); -+ // Multiple RHS case -+ int ldx = Height(); -+ if (nrhs_ != X.Size()) -+ { -+ rhs_.SetSize(X.Size() * ldx); -+ sol_.SetSize(X.Size() * ldx); -+ nrhs_ = X.Size(); -+ } -+ for (int i = 0; i < nrhs_; i++) -+ { -+ MFEM_ASSERT(X[i] && X[i]->Size() == Width(), -+ "STRUMPACK: Missing or invalid sized RHS Vector in solve!"); -+ Vector s(rhs_, i * ldx, ldx); -+ s = *X[i]; -+ } -+ const double *xPtr = rhs_.HostRead(); -+ double *yPtr = sol_.HostReadWrite(); -+ -+ FactorInternal(); -+ solver_->options().set_verbose(solve_verbose_); -+ strumpack::ReturnCode ret = solver_->solve(nrhs_, xPtr, ldx, yPtr, ldx, -+ false); -+ if (ret != strumpack::ReturnCode::SUCCESS) -+ { -+#if STRUMPACK_VERSION_MAJOR >= 7 -+ MFEM_ABORT("STRUMPACK: Solve failed with return code " << ret << "!"); -+#else -+ MFEM_ABORT("STRUMPACK: Solve failed!"); -+#endif -+ } - -+ for (int i = 0; i < nrhs_; i++) -+ { -+ MFEM_ASSERT(Y[i] && Y[i]->Size() == Width(), -+ "STRUMPACK: Missing or invalid sized solution Vector in solve!"); -+ Vector s(sol_, i * ldx, ldx); -+ *Y[i] = s; -+ } - } - -+STRUMPACKSolver:: -+STRUMPACKSolver(MPI_Comm comm) -+ : STRUMPACKSolverBase> -+ (comm, 0, NULL) {} -+ -+STRUMPACKSolver:: -+STRUMPACKSolver(STRUMPACKRowLocMatrix &A) -+ : STRUMPACKSolverBase> -+ (A, 0, NULL) {} -+ -+STRUMPACKSolver:: -+STRUMPACKSolver(MPI_Comm comm, int argc, char *argv[]) -+ : STRUMPACKSolverBase> -+ (comm, argc, argv) {} -+ -+STRUMPACKSolver:: -+STRUMPACKSolver(STRUMPACKRowLocMatrix &A, int argc, char *argv[]) -+ : STRUMPACKSolverBase> -+ (A, argc, argv) {} -+ -+#if STRUMPACK_VERSION_MAJOR >= 7 -+STRUMPACKMixedPrecisionSolver:: -+STRUMPACKMixedPrecisionSolver(MPI_Comm comm) -+ : STRUMPACKSolverBase> -+ (comm, 0, NULL) {} -+ -+STRUMPACKMixedPrecisionSolver:: -+STRUMPACKMixedPrecisionSolver(STRUMPACKRowLocMatrix &A) -+ : STRUMPACKSolverBase> -+ (A, 0, NULL) {} -+ -+STRUMPACKMixedPrecisionSolver:: -+STRUMPACKMixedPrecisionSolver(MPI_Comm comm, int argc, char *argv[]) -+ : STRUMPACKSolverBase> -+ (comm, argc, argv) {} -+ -+STRUMPACKMixedPrecisionSolver:: -+STRUMPACKMixedPrecisionSolver(STRUMPACKRowLocMatrix &A, int argc, char *argv[]) -+ : STRUMPACKSolverBase> -+ (A, argc, argv) {} -+#endif -+ -+template class STRUMPACKSolverBase>; -+#if STRUMPACK_VERSION_MAJOR >= 7 -+template class STRUMPACKSolverBase>; -+#endif -+ - } // mfem namespace - - #endif // MFEM_USE_MPI -diff --git a/linalg/strumpack.hpp b/linalg/strumpack.hpp -index 300b8415e..6a8ac4c30 100644 ---- a/linalg/strumpack.hpp -+++ b/linalg/strumpack.hpp -@@ -16,12 +16,14 @@ - - #ifdef MFEM_USE_STRUMPACK - #ifdef MFEM_USE_MPI -+ - #include "operator.hpp" - #include "hypre.hpp" -- - #include - -+// STRUMPACK headers - #include "StrumpackSparseSolverMPIDist.hpp" -+#include "StrumpackSparseSolverMixedPrecisionMPIDist.hpp" - - namespace mfem - { -@@ -34,63 +36,80 @@ public: - be of size (local) nrows by (global) glob_ncols. The new parallel matrix - contains copies of all input arrays (so they can be deleted). */ - STRUMPACKRowLocMatrix(MPI_Comm comm, -- int num_loc_rows, int first_loc_row, -- int glob_nrows, int glob_ncols, -- int *I, int *J, double *data); -+ int num_loc_rows, HYPRE_BigInt first_loc_row, -+ HYPRE_BigInt glob_nrows, HYPRE_BigInt glob_ncols, -+ int *I, HYPRE_BigInt *J, double *data, -+ bool sym_sparse = false); - - /** Creates a copy of the parallel matrix hypParMat in STRUMPACK's RowLoc - format. All data is copied so the original matrix may be deleted. */ -- STRUMPACKRowLocMatrix(const HypreParMatrix & hypParMat); -+ STRUMPACKRowLocMatrix(const Operator &op, bool sym_sparse = false); - - ~STRUMPACKRowLocMatrix(); - - void Mult(const Vector &x, Vector &y) const - { -- mfem_error("STRUMPACKRowLocMatrix::Mult(...)\n" -- " matrix vector products are not supported."); -+ MFEM_ABORT("STRUMPACKRowLocMatrix::Mult: Matrix vector products are not " -+ "supported!"); - } - -- MPI_Comm GetComm() const { return comm_; } -+ MPI_Comm GetComm() const { return A_->comm(); } - -- strumpack::CSRMatrixMPI* getA() const { return A_; } -+ strumpack::CSRMatrixMPI *GetA() const { return A_; } - - private: -- MPI_Comm comm_; -- strumpack::CSRMatrixMPI* A_; -- --}; // mfem::STRUMPACKRowLocMatrix -+ strumpack::CSRMatrixMPI *A_; -+}; - - /** The MFEM STRUMPACK Direct Solver class. - - The mfem::STRUMPACKSolver class uses the STRUMPACK library to perform LU - factorization of a parallel sparse matrix. The solver is capable of handling -- double precision types. See http://portal.nersc.gov/project/sparse/strumpack -+ double precision types. See -+ http://portal.nersc.gov/project/sparse/strumpack/. - */ --class STRUMPACKSolver : public mfem::Solver -+template -+class STRUMPACKSolverBase : public Solver - { --public: -- // Constructor with MPI_Comm parameter. -- STRUMPACKSolver( int argc, char* argv[], MPI_Comm comm ); -+protected: -+ // Constructor with MPI_Comm parameter and command line arguments. -+ STRUMPACKSolverBase(MPI_Comm comm, int argc, char *argv[]); - -- // Constructor with STRUMPACK Matrix Object. -- STRUMPACKSolver( STRUMPACKRowLocMatrix & A); -+ // Constructor with STRUMPACK matrix object and command line arguments. -+ STRUMPACKSolverBase(STRUMPACKRowLocMatrix &A, int argc, char *argv[]); - -+public: - // Default destructor. -- ~STRUMPACKSolver( void ); -+ virtual ~STRUMPACKSolverBase(); - - // Factor and solve the linear system y = Op^{-1} x. -- void Mult( const Vector & x, Vector & y ) const; -+ void Mult(const Vector &x, Vector &y) const; -+ void ArrayMult(const Array &X, Array &Y) const; - - // Set the operator. -- void SetOperator( const Operator & op ); -+ void SetOperator(const Operator &op); - - // Set various solver options. Refer to STRUMPACK documentation for - // details. -- void SetFromCommandLine( ); -- void SetPrintFactorStatistics( bool print_stat ); -- void SetPrintSolveStatistics( bool print_stat ); -- void SetRelTol( double rtol ); -- void SetAbsTol( double atol ); -+ void SetFromCommandLine(); -+ void SetPrintFactorStatistics(bool print_stat); -+ void SetPrintSolveStatistics(bool print_stat); -+ -+ // Set tolerances and iterations for iterative solvers. Compression -+ // tolerance is handled below. -+ void SetRelTol(double rtol); -+ void SetAbsTol(double atol); -+ void SetMaxIter(int max_it); -+ -+ // Set the flag controlling reuse of the symbolic factorization for multiple -+ // operators. This method has to be called before repeated calls to -+ // SetOperator. -+ void SetReorderingReuse(bool reuse); -+ -+ // Enable or not GPU off-loading available if STRUMPACK was compiled with CUDA. Note -+ // that input/output from MFEM to STRUMPACK is all still through host memory. -+ void EnableGPU(); -+ void DisableGPU(); - - /** - * STRUMPACK is an (approximate) direct solver. It can be used as a direct -@@ -100,70 +119,151 @@ public: - * used without preconditioner. - * - * Supported values are: -- * AUTO: Use iterative refinement if no HSS compression is used, -- * otherwise use GMRes. -- * DIRECT: No outer iterative solver, just a single application of -- * the multifrontal solver. -- * REFINE: Iterative refinement. -- * PREC_GMRES: Preconditioned GMRes. -- * The preconditioner is the (approx) multifrontal solver. -- * GMRES: UN-preconditioned GMRes. (for testing mainly) -- * PREC_BICGSTAB: Preconditioned BiCGStab. -- * The preconditioner is the (approx) multifrontal solver. -+ * AUTO: Use iterative refinement if no HSS compression is -+ * used, otherwise use GMRes -+ * DIRECT: No outer iterative solver, just a single application -+ * of the multifrontal solver -+ * REFINE: Iterative refinement -+ * PREC_GMRES: Preconditioned GMRes -+ * The preconditioner is the (approx) multifrontal solver -+ * GMRES: UN-preconditioned GMRes (for testing mainly) -+ * PREC_BICGSTAB: Preconditioned BiCGStab -+ * The preconditioner is the (approx) multifrontal solver - * BICGSTAB: UN-preconditioned BiCGStab. (for testing mainly) - */ -- void SetKrylovSolver( strumpack::KrylovSolver method ); -+ void SetKrylovSolver(strumpack::KrylovSolver method); - - /** - * Supported reorderings are: -- * METIS, PARMETIS, SCOTCH, PTSCOTCH, RCM -+ * NATURAL: Do not reorder the system -+ * METIS: Use Metis nested-dissection reordering (default) -+ * PARMETIS: Use ParMetis nested-dissection reordering -+ * SCOTCH: Use Scotch nested-dissection reordering -+ * PTSCOTCH: Use PT-Scotch nested-dissection reordering -+ * RCM: Use RCM reordering -+ * GEOMETRIC: A simple geometric nested dissection code that -+ * only works for regular meshes -+ * AMD: Approximate minimum degree -+ * MMD: Multiple minimum degree -+ * AND: Nested dissection -+ * MLF: Minimum local fill -+ * SPECTRAL: Spectral nested dissection - */ -- void SetReorderingStrategy( strumpack::ReorderingStrategy method ); -+ void SetReorderingStrategy(strumpack::ReorderingStrategy method); - - /** -- * Disable static pivoting for stability. The static pivoting in strumpack -+ * Configure static pivoting for stability. The static pivoting in STRUMPACK - * permutes the sparse input matrix in order to get large (nonzero) elements - * on the diagonal. If the input matrix is already diagonally dominant, this - * reordering can be disabled. -+ * -+ * Supported matching algorithms are: -+ * NONE: Don't do anything -+ * MAX_CARDINALITY: Maximum cardinality -+ * MAX_SMALLEST_DIAGONAL: Maximum smallest diagonal value -+ * MAX_SMALLEST_DIAGONAL_2: Same as MAX_SMALLEST_DIAGONAL -+ * but different algorithm -+ * MAX_DIAGONAL_SUM: Maximum sum of diagonal values -+ * MAX_DIAGONAL_PRODUCT_SCALING: Maximum product of diagonal values -+ * and row and column scaling (default) -+ * COMBBLAS: Use AWPM from CombBLAS (only with -+ * version >= 3) - */ -- void DisableMatching(); -- -- /** -- * Enable static pivoting for stability using the MC64 algorithm with -- * job=5. Using a matching algorithm, this will permute the sparse input -- * matrix in order to get nonzero elements (as large as possible) on the -- * diagonal. And will also scale the rows and columns of the matrix. -- */ -- void EnableMatching(); -+ void SetMatching(strumpack::MatchingJob job); - --#if STRUMPACK_VERSION_MAJOR >= 3 - /** -- * Use the AWPM (approximate weight perfect matching) algorithm from the -- * Combinatorial BLAS library for static pivoting, i.e. getting large -- * nonzeros on the diagonal. This requires that strumpack was compiled with -- * support for Combinatorial BLAS. -+ * Enable support for rank-structured data formats, which can be used -+ * for compression within the sparse solver. -+ * -+ * Supported compression types are: -+ * NONE: No compression, purely direct solver (default) -+ * HSS: HSS compression of frontal matrices -+ * BLR: Block low-rank compression of fronts -+ * HODLR: Hierarchically Off-diagonal Low-Rank -+ * compression of frontal matrices -+ * BLR_HODLR: Block low-rank compression of medium -+ * fronts and Hierarchically Off-diagonal -+ * Low-Rank compression of large fronts -+ * ZFP_BLR_HODLR: ZFP compression for small fronts, -+ * Block low-rank compression of medium -+ * fronts and Hierarchically Off-diagonal -+ * Low-Rank compression of large fronts -+ * LOSSLESS: Lossless compression -+ * LOSSY: Lossy compression -+ * -+ * For versions of STRUMPACK < 5, we support only NONE, HSS, and BLR. -+ * BLR_HODLR and ZPR_BLR_HODLR are supported in STRUMPACK >= 6. - */ -- void EnableParallelMatching(); -+ void SetCompression(strumpack::CompressionType type); -+ void SetCompressionRelTol(double rtol); -+ void SetCompressionAbsTol(double atol); -+#if STRUMPACK_VERSION_MAJOR >= 5 -+ void SetCompressionLossyPrecision(int precision); -+ void SetCompressionButterflyLevels(int levels); - #endif - - private: -- void Init( int argc, char* argv[] ); -+ // Helper method for calling the STRUMPACK factoriation routine. -+ void FactorInternal() const; - - protected: -- -- MPI_Comm comm_; -- int numProcs_; -- int myid_; -+ const STRUMPACKRowLocMatrix *APtr_; -+ STRUMPACKSolverType *solver_; - - bool factor_verbose_; - bool solve_verbose_; -+ bool reorder_reuse_; -+ -+ mutable Vector rhs_, sol_; -+ mutable int nrhs_; -+}; - -- const STRUMPACKRowLocMatrix * APtr_; -- strumpack::StrumpackSparseSolverMPIDist * solver_; -+class STRUMPACKSolver : -+ public STRUMPACKSolverBase> -+{ -+public: -+ // Constructor with MPI_Comm parameter. -+ STRUMPACKSolver(MPI_Comm comm); -+ -+ // Constructor with STRUMPACK matrix object. -+ STRUMPACKSolver(STRUMPACKRowLocMatrix &A); - --}; // mfem::STRUMPACKSolver class -+ // Constructor with MPI_Comm parameter and command line arguments. -+ STRUMPACKSolver(MPI_Comm comm, int argc, char *argv[]); -+ -+ // Constructor with STRUMPACK matrix object and command line arguments. -+ STRUMPACKSolver(STRUMPACKRowLocMatrix &A, int argc, char *argv[]); -+ -+ // Destructor. -+ ~STRUMPACKSolver() {} -+}; -+ -+#if STRUMPACK_VERSION_MAJOR >= 7 -+class STRUMPACKMixedPrecisionSolver : -+ public STRUMPACKSolverBase> -+{ -+public: -+ // Constructor with MPI_Comm parameter. -+ STRUMPACKMixedPrecisionSolver(MPI_Comm comm); -+ -+ // Constructor with STRUMPACK matrix object. -+ STRUMPACKMixedPrecisionSolver(STRUMPACKRowLocMatrix &A); -+ -+ // Constructor with MPI_Comm parameter and command line arguments. -+ STRUMPACKMixedPrecisionSolver(MPI_Comm comm, int argc, char *argv[]); -+ -+ // Constructor with STRUMPACK matrix object and command line arguments. -+ STRUMPACKMixedPrecisionSolver(STRUMPACKRowLocMatrix &A, -+ int argc, char *argv[]); -+ -+ // Destructor. -+ ~STRUMPACKMixedPrecisionSolver() {} -+}; -+#endif - --} // mfem namespace -+} // namespace mfem - - #endif // MFEM_USE_MPI - #endif // MFEM_USE_STRUMPACK -diff --git a/linalg/superlu.cpp b/linalg/superlu.cpp -index bec377739..948415d32 100644 ---- a/linalg/superlu.cpp -+++ b/linalg/superlu.cpp -@@ -16,48 +16,50 @@ - - #include "superlu.hpp" - --// SuperLU headers --#include "superlu_defs.h" -+// SuperLU header - #include "superlu_ddefs.h" - --#if XSDK_INDEX_SIZE == 64 --#error "SuperLUDist has been built with 64bit integers. This is not supported" -+#if XSDK_INDEX_SIZE == 64 && !(defined(HYPRE_BIGINT) || defined(HYPRE_MIXEDINT)) -+#error "Mismatch between HYPRE (32bit) and SuperLU (64bit) integer types" - #endif -- --// For now, it is assumed that HYPRE_BigInt is int. --#if defined(HYPRE_BIGINT) || defined(HYPRE_MIXEDINT) --#error "SuperLUDist support requires HYPRE_BigInt == int, for now." -+#if XSDK_INDEX_SIZE == 32 && (defined(HYPRE_BIGINT) || defined(HYPRE_MIXEDINT)) -+#error "Mismatch between HYPRE (64bit) and SuperLU (32bit) integer types" - #endif - --#if SUPERLU_DIST_MAJOR_VERSION > 6 || \ -- (SUPERLU_DIST_MAJOR_VERSION == 6 && SUPERLU_DIST_MINOR_VERSION > 2) -+#if SUPERLU_DIST_MAJOR_VERSION > 6 || \ -+ (SUPERLU_DIST_MAJOR_VERSION == 6 && SUPERLU_DIST_MINOR_VERSION >= 3) - #define ScalePermstruct_t dScalePermstruct_t - #define LUstruct_t dLUstruct_t - #define SOLVEstruct_t dSOLVEstruct_t --#define ScalePermstructFree dScalePermstructFree -+#define ZeroLblocks dZeroLblocks -+#define ZeroUblocks dZeroUblocks - #define Destroy_LU dDestroy_LU -+#define SolveFinalize dSolveFinalize -+#define ScalePermstructInit dScalePermstructInit -+#define ScalePermstructFree dScalePermstructFree - #define LUstructFree dLUstructFree - #define LUstructInit dLUstructInit - #endif - -+#if SUPERLU_DIST_MAJOR_VERSION > 7 || \ -+ (SUPERLU_DIST_MAJOR_VERSION == 7 && SUPERLU_DIST_MINOR_VERSION >= 2) -+#define DeAllocLlu_3d dDeAllocLlu_3d -+#define DeAllocGlu_3d dDeAllocGlu_3d -+#define Destroy_A3d_gathered_on_2d dDestroy_A3d_gathered_on_2d -+#endif - --using namespace std; -- --namespace mfem --{ --unsigned int superlu_internal::sqrti( const unsigned int & a ) -+unsigned int sqrti(unsigned int a) - { -- unsigned int a_ = a; -- unsigned int rem = 0; -- unsigned int root = 0; -+ unsigned int rem = 0; -+ unsigned int root = 0; - unsigned short len = sizeof(int); len <<= 2; -- unsigned short shift = (unsigned short)((len<<1) - 2); -+ unsigned short shift = (unsigned short)((len << 1) - 2); - -- for (int i=0; i> shift)); -- a_ <<= 2; -+ rem = ((rem << 2) + (a >> shift)); -+ a <<= 2; - root ++; - if (root <= rem) - { -@@ -72,546 +74,692 @@ unsigned int superlu_internal::sqrti( const unsigned int & a ) - return (root >> 1); - } - -+int GetGridRows(MPI_Comm comm, int npdep) -+{ -+ int np; -+ MPI_Comm_size(comm, &np); -+ MFEM_VERIFY(npdep > 0 && np % npdep == 0 && !(npdep & (npdep - 1)), -+ "SuperLUSolver: 3D partition depth must be a power of two " -+ "and evenly divide the number of processors!"); -+ int nr = (int)sqrti((unsigned int)(np / npdep)); -+ while (np % nr != 0 && nr > 0) -+ { -+ nr--; -+ } -+ MFEM_VERIFY(nr > 0, -+ "SuperLUSolver: Unable to determine processor grid for np = " << np); -+ return nr; -+} -+ -+int GetGridCols(MPI_Comm comm, int npdep, int nr) -+{ -+ int np; -+ MPI_Comm_size(comm, &np); -+ int nc = np / (nr * npdep); -+ MFEM_VERIFY(nr * nc * npdep == np, -+ "SuperLUSolver: Impossible processor partition!"); -+ return nc; -+} -+ -+namespace mfem -+{ -+ - SuperLURowLocMatrix::SuperLURowLocMatrix(MPI_Comm comm, -- int num_loc_rows, int first_loc_row, -- int glob_nrows, int glob_ncols, -- int *I, int *J, double *data) -- : comm_(comm), -- rowLocPtr_(NULL) -+ int num_loc_rows, -+ HYPRE_BigInt first_loc_row, -+ HYPRE_BigInt glob_nrows, -+ HYPRE_BigInt glob_ncols, -+ int *I, HYPRE_BigInt *J, -+ double *data) -+ : comm_(comm) - { - // Set mfem::Operator member data - height = num_loc_rows; - width = num_loc_rows; - - // Allocate SuperLU's SuperMatrix struct -- rowLocPtr_ = new SuperMatrix; -- SuperMatrix * A = (SuperMatrix*)rowLocPtr_; -- -- A->Store = NULL; -+ rowLocPtr_ = new SuperMatrix; -+ SuperMatrix *A = (SuperMatrix *)rowLocPtr_; -+ A->Store = NULL; - -- int m = glob_nrows; -- int n = glob_ncols; -- int nnz_loc = I[num_loc_rows]; -- int m_loc = num_loc_rows; -- int fst_row = first_loc_row; -+ int_t m = glob_nrows; -+ int_t n = glob_ncols; -+ int_t nnz_loc = I[num_loc_rows]; -+ int_t m_loc = num_loc_rows; -+ int_t fst_row = first_loc_row; - -- double * nzval = NULL; -- int * colind = NULL; -- int * rowptr = NULL; -+ double *nzval = NULL; -+ int_t *colind = NULL; -+ int_t *rowptr = NULL; - -- if ( !(nzval = doubleMalloc_dist(nnz_loc)) ) -+ if (!(nzval = doubleMalloc_dist(nnz_loc))) - { -- ABORT("Malloc fails for nzval[]."); -+ MFEM_ABORT("SuperLURowLocMatrix: Malloc failed for nzval!"); - } -- for (int i=0; iStore = NULL; -+ const HypreParMatrix *APtr = dynamic_cast(&op); -+ MFEM_VERIFY(APtr, "Not a compatible matrix type"); -+ comm_ = APtr->GetComm(); - -- // First cast the parameter to a hypre_ParCSRMatrix -- hypre_ParCSRMatrix * parcsr_op = -- (hypre_ParCSRMatrix *)const_cast(hypParMat); -+ // Set mfem::Operator member data -+ height = op.Height(); -+ width = op.Width(); - -- MFEM_ASSERT(parcsr_op != NULL,"SuperLU: const_cast failed in SetOperator"); -+ // Allocate SuperLU's SuperMatrix struct -+ rowLocPtr_ = new SuperMatrix; -+ SuperMatrix *A = (SuperMatrix *)rowLocPtr_; -+ A->Store = NULL; - -- // Create the SuperMatrix A by borrowing the internal data from a -- // hypre_CSRMatrix. -- hypParMat.HostRead(); -- hypre_CSRMatrix * csr_op = hypre_MergeDiagAndOffd(parcsr_op); -- hypParMat.HypreRead(); -- hypre_CSRMatrixSetDataOwner(csr_op,0); -+ // First cast the parameter to a hypre_ParCSRMatrix -+ hypre_ParCSRMatrix *parcsr_op = -+ (hypre_ParCSRMatrix *)const_cast(*APtr); -+ -+ // Create the SuperMatrix A by taking the internal data from a -+ // hypre_CSRMatrix -+ APtr->HostRead(); -+ hypre_CSRMatrix *csr_op = hypre_MergeDiagAndOffd(parcsr_op); -+ APtr->HypreRead(); -+ HYPRE_Int *Iptr = csr_op->i; - #if MFEM_HYPRE_VERSION >= 21600 -- // For now, this method assumes that HYPRE_BigInt is int. Also, csr_op->num_cols -- // is of type HYPRE_Int, so if we want to check for big indices in -- // csr_op->big_j, we'll have to check all entries and that check will only be -- // necessary in HYPRE_MIXEDINT mode which is not supported at the moment. -- hypre_CSRMatrixBigJtoJ(csr_op); -+ HYPRE_BigInt *Jptr = csr_op->big_j; -+#else -+ HYPRE_Int *Jptr = csr_op->j; - #endif -+ int_t m = parcsr_op->global_num_rows; -+ int_t n = parcsr_op->global_num_cols; -+ int_t fst_row = parcsr_op->first_row_index; -+ int_t nnz_loc = csr_op->num_nonzeros; -+ int_t m_loc = csr_op->num_rows; - -- int m = parcsr_op->global_num_rows; -- int n = parcsr_op->global_num_cols; -- int fst_row = parcsr_op->first_row_index; -- int nnz_loc = csr_op->num_nonzeros; -- int m_loc = csr_op->num_rows; -- -- height = m_loc; -- width = m_loc; -+ double *nzval = csr_op->data; -+ int_t *colind = NULL; -+ int_t *rowptr = NULL; - -- double * nzval = csr_op->data; -- int * colind = csr_op->j; -- int * rowptr = NULL; -+ // Some machines don't like HYPRE_BigInt to int_t -+#if defined(HYPRE_BIGINT) || defined(HYPRE_MIXEDINT) -+ if (!(colind = intMalloc_dist(nnz_loc))) -+ { -+ MFEM_ABORT("SuperLURowLocMatrix: Malloc failed for colind!") -+ } -+ for (int_t i = 0; i < nnz_loc; i++) -+ { -+ colind[i] = Jptr[i]; -+ } -+#else -+ colind = Jptr; -+#endif - - // The "i" array cannot be stolen from the hypre_CSRMatrix so we'll copy it -- if ( !(rowptr = intMalloc_dist(m_loc+1)) ) -+ if (!(rowptr = intMalloc_dist(m_loc+1))) - { -- ABORT("Malloc fails for rowptr[]."); -+ MFEM_ABORT("SuperLURowLocMatrix: Malloc failed for rowptr!") - } -- for (int i=0; i<=m_loc; i++) -+ for (int_t i = 0; i <= m_loc; i++) - { -- rowptr[i] = (csr_op->i)[i]; -+ rowptr[i] = (int_t)Iptr[i]; // Promotion for HYPRE_MIXEDINT - } - -- // Everything has been copied or abducted so delete the structure -- hypre_CSRMatrixDestroy(csr_op); -- - // Assign he matrix data to SuperLU's SuperMatrix structure - dCreate_CompRowLoc_Matrix_dist(A, m, n, nnz_loc, m_loc, fst_row, - nzval, colind, rowptr, - SLU_NR_loc, SLU_D, SLU_GE); - -- // Save global number of columns (width) of the matrix -- num_global_cols = n; -+ // SuperLU will free the passed CSR data arrays -+ hypre_CSRMatrixSetDataOwner(csr_op, 0); -+ hypre_CSRMatrixDestroy(csr_op); -+#if defined(HYPRE_BIGINT) || defined(HYPRE_MIXEDINT) -+ delete Jptr; -+#endif -+ -+ // Save global number of rows and columns of the matrix -+ num_global_rows_ = m; -+ num_global_cols_ = n; - } - - SuperLURowLocMatrix::~SuperLURowLocMatrix() - { -- SuperMatrix * A = (SuperMatrix*)rowLocPtr_; -- -- // Delete the internal data -+ SuperMatrix *A = (SuperMatrix *)rowLocPtr_; - Destroy_CompRowLoc_Matrix_dist(A); -- -- // Delete the struct -- if ( A != NULL ) { delete A; } -+ delete A; - } - --SuperLUSolver::SuperLUSolver( MPI_Comm comm ) -- : comm_(comm), -+SuperLUSolver::SuperLUSolver(MPI_Comm comm, int npdep) -+ : nprow_(GetGridRows(comm, npdep)), -+ npcol_(GetGridCols(comm, npdep, nprow_)), -+ npdep_(npdep), - APtr_(NULL), -- optionsPtr_(NULL), -- statPtr_(NULL), -- ScalePermstructPtr_(NULL), -- LUstructPtr_(NULL), -- SOLVEstructPtr_(NULL), -- gridPtr_(NULL), -- berr_(NULL), -- perm_r_(NULL), -- nrhs_(1), -- nprow_(0), -- npcol_(0), -- firstSolveWithThisA_(false), -- gridInitialized_(false), -- LUStructInitialized_(false) -+ nrhs_(0) - { -- this->Init(); -+ Init(comm); - } - --SuperLUSolver::SuperLUSolver( SuperLURowLocMatrix & A ) -- : comm_(A.GetComm()), -- APtr_(&A), -- optionsPtr_(NULL), -- statPtr_(NULL), -- ScalePermstructPtr_(NULL), -- LUstructPtr_(NULL), -- SOLVEstructPtr_(NULL), -- gridPtr_(NULL), -- berr_(NULL), -- perm_r_(NULL), -- nrhs_(1), -- nprow_(0), -- npcol_(0), -- firstSolveWithThisA_(true), -- gridInitialized_(false), -- LUStructInitialized_(false) -+SuperLUSolver::SuperLUSolver(SuperLURowLocMatrix &A, int npdep) -+ : SuperLUSolver(A.GetComm(), npdep) - { -- height = A.Height(); -- width = A.Width(); -- -- this->Init(); -+ SetOperator(A); - } - - SuperLUSolver::~SuperLUSolver() - { -- superlu_dist_options_t * options = (superlu_dist_options_t*)optionsPtr_; -- SuperLUStat_t * stat = (SuperLUStat_t*)statPtr_; -- ScalePermstruct_t * SPstruct = (ScalePermstruct_t*)ScalePermstructPtr_; -- LUstruct_t * LUstruct = (LUstruct_t*)LUstructPtr_; -- SOLVEstruct_t * SOLVEstruct = (SOLVEstruct_t*)SOLVEstructPtr_; -- gridinfo_t * grid = (gridinfo_t*)gridPtr_; -+ superlu_dist_options_t *options = (superlu_dist_options_t *)optionsPtr_; - -- SUPERLU_FREE(berr_); -- PStatFree(stat); -+ ScalePermstruct_t *ScalePermstruct = (ScalePermstruct_t *)ScalePermstructPtr_; -+ LUstruct_t *LUstruct = (LUstruct_t *)LUstructPtr_; -+ SOLVEstruct_t *SOLVEstruct = (SOLVEstruct_t *)SOLVEstructPtr_; - -- if ( LUStructInitialized_ ) -+#if SUPERLU_DIST_MAJOR_VERSION > 7 || \ -+ (SUPERLU_DIST_MAJOR_VERSION == 7 && SUPERLU_DIST_MINOR_VERSION >= 2) -+ if (npdep_ > 1) - { -- ScalePermstructFree(SPstruct); -- Destroy_LU(APtr_->GetGlobalNumColumns(), grid, LUstruct); -- LUstructFree(LUstruct); -- } -+ gridinfo3d_t *grid3d = (gridinfo3d_t *)gridPtr_; - -- if ( options->SolveInitialized ) -+ if (APtr_) -+ { -+ if (grid3d->zscp.Iam == 0) -+ { -+ // Process layer 0 -+ Destroy_LU(APtr_->GetGlobalNumColumns(), &(grid3d->grid2d), -+ LUstruct); -+ SolveFinalize(options, SOLVEstruct); -+ } -+ else -+ { -+ // Process layers not equal 0 -+ DeAllocLlu_3d(APtr_->GetGlobalNumColumns(), LUstruct, grid3d); -+ DeAllocGlu_3d(LUstruct); -+ } -+ Destroy_A3d_gathered_on_2d(SOLVEstruct, grid3d); -+ ScalePermstructFree(ScalePermstruct); -+ LUstructFree(LUstruct); -+ } -+ -+ superlu_gridexit3d(grid3d); -+ delete grid3d; -+ } -+ else -+#endif - { -- dSolveFinalize(options, SOLVEstruct); -+ gridinfo_t *grid = (gridinfo_t *)gridPtr_; -+ -+ if (APtr_) -+ { -+ Destroy_LU(APtr_->GetGlobalNumColumns(), grid, LUstruct); -+ SolveFinalize(options, SOLVEstruct); -+ ScalePermstructFree(ScalePermstruct); -+ LUstructFree(LUstruct); -+ } -+ -+ superlu_gridexit(grid); -+ delete grid; - } - -- if ( options != NULL ) { delete options; } -- if ( stat != NULL ) { delete stat; } -- if ( SPstruct != NULL ) { delete SPstruct; } -- if ( LUstruct != NULL ) { delete LUstruct; } -- if ( SOLVEstruct != NULL ) { delete SOLVEstruct; } -- if ( grid != NULL ) { delete grid; } -- if ( perm_r_ != NULL ) { SUPERLU_FREE(perm_r_); } -+ delete options; -+ delete ScalePermstruct; -+ delete LUstruct; -+ delete SOLVEstruct; - } - --void SuperLUSolver::Init() -+void SuperLUSolver::Init(MPI_Comm comm) - { -- MPI_Comm_size(comm_, &numProcs_); -- MPI_Comm_rank(comm_, &myid_); -- - optionsPtr_ = new superlu_dist_options_t; -- statPtr_ = new SuperLUStat_t; - ScalePermstructPtr_ = new ScalePermstruct_t; - LUstructPtr_ = new LUstruct_t; - SOLVEstructPtr_ = new SOLVEstruct_t; -- gridPtr_ = new gridinfo_t; -- -- superlu_dist_options_t * options = (superlu_dist_options_t*)optionsPtr_; -- SuperLUStat_t * stat = (SuperLUStat_t*)statPtr_; - -- if ( !(berr_ = doubleMalloc_dist(nrhs_)) ) -+ // Initialize process grid -+#if SUPERLU_DIST_MAJOR_VERSION > 7 || \ -+ (SUPERLU_DIST_MAJOR_VERSION == 7 && SUPERLU_DIST_MINOR_VERSION >= 2) -+ if (npdep_ > 1) -+ { -+ gridPtr_ = new gridinfo3d_t; -+ superlu_gridinit3d(comm, nprow_, npcol_, npdep_, (gridinfo3d_t *)gridPtr_); -+ } -+ else -+#endif - { -- ABORT("Malloc fails for berr[]."); -+ gridPtr_ = new gridinfo_t; -+ MFEM_VERIFY(npdep_ == 1, -+ "SuperLUSolver: 3D partitioning is only available for " -+ "SuperLU_DIST version >= 7.2.0!"); -+ superlu_gridinit(comm, nprow_, npcol_, (gridinfo_t *)gridPtr_); - } - -- // Set default options -+ // Set default options: -+ // options.Fact = DOFACT; -+ // options.Equil = YES; -+ // options.ColPerm = METIS_AT_PLUS_A; -+ // options.RowPerm = LargeDiag_MC64; -+ // options.ReplaceTinyPivot = NO; -+ // options.Trans = NOTRANS; -+ // options.IterRefine = SLU_DOUBLE; -+ // options.SolveInitialized = NO; -+ // options.RefineInitialized = NO; -+ // options.PrintStat = YES; -+ superlu_dist_options_t *options = (superlu_dist_options_t *)optionsPtr_; - set_default_options_dist(options); -- -- // Choose nprow and npcol so that the process grid is as square as possible. -- // If the processes cannot be divided evenly, keep the row dimension smaller -- // than the column dimension. -- -- nprow_ = (int)superlu_internal::sqrti((unsigned int)numProcs_); -- while (numProcs_ % nprow_ != 0 && nprow_ > 0) -+#if SUPERLU_DIST_MAJOR_VERSION > 7 || \ -+ (SUPERLU_DIST_MAJOR_VERSION == 7 && SUPERLU_DIST_MINOR_VERSION >= 2) -+ if (npdep_ > 1) - { -- nprow_--; -+ options->Algo3d = YES; - } -- -- npcol_ = (int)(numProcs_ / nprow_); -- MFEM_ASSERT(nprow_ * npcol_ == numProcs_, ""); -- -- PStatInit(stat); // Initialize the statistics variables. -+#endif - } - --void SuperLUSolver::SetPrintStatistics( bool print_stat ) -+void SuperLUSolver::SetPrintStatistics(bool print_stat) +-void STRUMPACKSolver::SetRelTol( double rtol ) ++#if STRUMPACK_VERSION_MAJOR >= 5 ++template ++void STRUMPACKSolverBase:: ++SetCompressionLossyPrecision(int precision) { -- superlu_dist_options_t * options = (superlu_dist_options_t*)optionsPtr_; -- -- yes_no_t opt = print_stat?YES:NO; -- -+ superlu_dist_options_t *options = (superlu_dist_options_t *)optionsPtr_; -+ yes_no_t opt = print_stat ? YES : NO; - options->PrintStat = opt; +- solver_->options().set_rel_tol( rtol ); ++ solver_->options().set_lossy_precision(precision); } --void SuperLUSolver::SetEquilibriate( bool equil ) -+void SuperLUSolver::SetEquilibriate(bool equil) +-void STRUMPACKSolver::SetAbsTol( double atol ) ++template ++void STRUMPACKSolverBase:: ++SetCompressionButterflyLevels(int levels) { -- superlu_dist_options_t * options = (superlu_dist_options_t*)optionsPtr_; -- -- yes_no_t opt = equil?YES:NO; -- -+ superlu_dist_options_t *options = (superlu_dist_options_t *)optionsPtr_; -+ yes_no_t opt = equil ? YES : NO; - options->Equil = opt; +- solver_->options().set_abs_tol( atol ); ++ solver_->options().HODLR_options().set_butterfly_levels(levels); } ++#endif --void SuperLUSolver::SetColumnPermutation( superlu::ColPerm col_perm ) -+void SuperLUSolver::SetColumnPermutation(superlu::ColPerm col_perm) - { -- superlu_dist_options_t * options = (superlu_dist_options_t*)optionsPtr_; -- -+ superlu_dist_options_t *options = (superlu_dist_options_t *)optionsPtr_; - colperm_t opt = (colperm_t)col_perm; - -+ if (opt == MY_PERMC) -+ { -+ MFEM_ABORT("SuperLUSolver::SetColumnPermutation does not yet support " -+ "MY_PERMC!"); -+ } -+ else if (opt == PARMETIS) -+ { -+ options->ParSymbFact = YES; -+ } - options->ColPerm = opt; - } - --void SuperLUSolver::SetRowPermutation( superlu::RowPerm row_perm, -- Array * perm ) -+void SuperLUSolver::SetRowPermutation(superlu::RowPerm row_perm) +-void STRUMPACKSolver::Mult( const Vector & x, Vector & y ) const ++template ++void STRUMPACKSolverBase:: ++SetOperator(const Operator &op) { -- superlu_dist_options_t * options = (superlu_dist_options_t*)optionsPtr_; -- -+ superlu_dist_options_t *options = (superlu_dist_options_t *)optionsPtr_; - rowperm_t opt = (rowperm_t)row_perm; -- -- options->RowPerm = opt; -- -- if ( opt == MY_PERMR ) -+ if (opt == MY_PERMR) +- MFEM_ASSERT(APtr_ != NULL, +- "STRUMPACK Error: The operator must be set before" +- " the system can be solved."); +- MFEM_ASSERT(x.Size() == Width(), "invalid x.Size() = " << x.Size() +- << ", expected size = " << Width()); +- MFEM_ASSERT(y.Size() == Height(), "invalid y.Size() = " << y.Size() +- << ", expected size = " << Height()); ++ // Verify that we have a compatible operator ++ bool first_mat = !APtr_; ++ APtr_ = dynamic_cast(&op); ++ MFEM_VERIFY(APtr_, ++ "STRUMPACK: Operator is not a STRUMPACKRowLocMatrix!"); + +- double* yPtr = y.HostWrite(); +- const double* xPtr = x.HostRead(); ++ // Set mfem::Operator member data ++ height = op.Height(); ++ width = op.Width(); + +- solver_->options().set_verbose( factor_verbose_ ); +- ReturnCode ret = solver_->factor(); +- switch (ret) ++ if (first_mat || !reorder_reuse_) { -- if ( perm == NULL ) +- case ReturnCode::SUCCESS: break; +- case ReturnCode::MATRIX_NOT_SET: - { -- mfem_error("SuperLUSolver::SetRowPermutation :" -- " permutation vector not set!"); +- MFEM_ABORT("STRUMPACK: Matrix was not set!"); - } -- -- if ( !(perm_r_ = intMalloc_dist(perm->Size())) ) +- break; +- case ReturnCode::REORDERING_ERROR: - { -- ABORT("Malloc fails for perm_r[]."); +- MFEM_ABORT("STRUMPACK: Matrix reordering failed!"); - } -- for (int i=0; iSize(); i++) +- break; +- default: - { -- perm_r_[i] = (*perm)[i]; +- MFEM_ABORT("STRUMPACK: 'factor()' error code = " << ret); - } -+ MFEM_ABORT("SuperLUSolver::SetRowPermutation does not yet support " -+ "MY_PERMR!"); ++ solver_->set_matrix(*(APtr_->GetA())); ++ } ++ else ++ { ++ solver_->update_matrix_values(*(APtr_->GetA())); } -+ options->RowPerm = opt; - } +- solver_->options().set_verbose( solve_verbose_ ); +- solver_->solve(xPtr, yPtr); ++} --void SuperLUSolver::SetTranspose( superlu::Trans trans ) --{ -- superlu_dist_options_t * options = (superlu_dist_options_t*)optionsPtr_; -- -- trans_t opt = (trans_t)trans; -- -- options->Trans = opt; --} -- --void SuperLUSolver::SetIterativeRefine( superlu::IterRefine iter_ref ) -+void SuperLUSolver::SetIterativeRefine(superlu::IterRefine iter_ref) - { -- superlu_dist_options_t * options = (superlu_dist_options_t*)optionsPtr_; -- -+ superlu_dist_options_t *options = (superlu_dist_options_t *)optionsPtr_; - IterRefine_t opt = (IterRefine_t)iter_ref; -- - options->IterRefine = opt; ++template ++void STRUMPACKSolverBase:: ++FactorInternal() const ++{ ++ MFEM_ASSERT(APtr_, ++ "STRUMPACK: Operator must be set before the system can be " ++ "solved!"); ++ solver_->options().set_verbose(factor_verbose_); ++ strumpack::ReturnCode ret = solver_->factor(); ++ if (ret != strumpack::ReturnCode::SUCCESS) ++ { ++#if STRUMPACK_VERSION_MAJOR >= 7 ++ MFEM_ABORT("STRUMPACK: Factor failed with return code " << ret << "!"); ++#else ++ MFEM_ABORT("STRUMPACK: Factor failed!"); ++#endif ++ } } --void SuperLUSolver::SetReplaceTinyPivot( bool rtp ) -+void SuperLUSolver::SetReplaceTinyPivot(bool rtp) +-void STRUMPACKSolver::SetOperator( const Operator & op ) ++template ++void STRUMPACKSolverBase:: ++Mult(const Vector &x, Vector &y) const { -- superlu_dist_options_t * options = (superlu_dist_options_t*)optionsPtr_; -- -- yes_no_t opt = rtp?YES:NO; -- -+ superlu_dist_options_t *options = (superlu_dist_options_t *)optionsPtr_; -+ yes_no_t opt = rtp ? YES : NO; - options->ReplaceTinyPivot = opt; - } +- // Verify that we have a compatible operator +- APtr_ = dynamic_cast(&op); +- if ( APtr_ == NULL ) ++ MFEM_ASSERT(x.Size() == Width(), ++ "STRUMPACK: Invalid x.Size() = " << x.Size() << ++ ", expected size = " << Width() << "!"); ++ MFEM_ASSERT(y.Size() == Height(), ++ "STRUMPACK: Invalid y.Size() = " << y.Size() << ++ ", expected size = " << Height() << "!"); ++ ++ const double *xPtr = x.HostRead(); ++ double *yPtr = y.HostReadWrite(); ++ ++ FactorInternal(); ++ solver_->options().set_verbose(solve_verbose_); ++ strumpack::ReturnCode ret = solver_->solve(xPtr, yPtr, false); ++ if (ret != strumpack::ReturnCode::SUCCESS) + { +- mfem_error("STRUMPACKSolver::SetOperator : not STRUMPACKRowLocMatrix!"); ++#if STRUMPACK_VERSION_MAJOR >= 7 ++ MFEM_ABORT("STRUMPACK: Solve failed with return code " << ret << "!"); ++#else ++ MFEM_ABORT("STRUMPACK: Solve failed!"); ++#endif + } ++} --void SuperLUSolver::SetNumLookAheads( int num_lookaheads ) -+void SuperLUSolver::SetNumLookAheads(int num_lookaheads) - { -- superlu_dist_options_t * options = (superlu_dist_options_t*)optionsPtr_; -- -+ superlu_dist_options_t *options = (superlu_dist_options_t *)optionsPtr_; - options->num_lookaheads = num_lookaheads; - } +- solver_->set_matrix( *(APtr_->getA()) ); ++template ++void STRUMPACKSolverBase:: ++ArrayMult(const Array &X, Array &Y) const ++{ ++ MFEM_ASSERT(X.Size() == Y.Size(), ++ "Number of columns mismatch in STRUMPACK solve!"); ++ if (X.Size() == 1) ++ { ++ nrhs_ = 1; ++ MFEM_ASSERT(X[0] && Y[0], "Missing Vector in STRUMPACK solve!"); ++ Mult(*X[0], *Y[0]); ++ return; ++ } --void SuperLUSolver::SetLookAheadElimTree( bool etree ) -+void SuperLUSolver::SetLookAheadElimTree(bool etree) - { -- superlu_dist_options_t * options = (superlu_dist_options_t*)optionsPtr_; -- -- yes_no_t opt = etree?YES:NO; -- -+ superlu_dist_options_t *options = (superlu_dist_options_t *)optionsPtr_; -+ yes_no_t opt = etree ? YES : NO; - options->lookahead_etree = opt; - } +- // Set mfem::Operator member data +- height = op.Height(); +- width = op.Width(); ++ // Multiple RHS case ++ int ldx = Height(); ++ if (nrhs_ != X.Size()) ++ { ++ rhs_.SetSize(X.Size() * ldx); ++ sol_.SetSize(X.Size() * ldx); ++ nrhs_ = X.Size(); ++ } ++ for (int i = 0; i < nrhs_; i++) ++ { ++ MFEM_ASSERT(X[i] && X[i]->Size() == Width(), ++ "STRUMPACK: Missing or invalid sized RHS Vector in solve!"); ++ Vector s(rhs_, i * ldx, ldx); ++ s = *X[i]; ++ } ++ const double *xPtr = rhs_.HostRead(); ++ double *yPtr = sol_.HostReadWrite(); ++ ++ FactorInternal(); ++ solver_->options().set_verbose(solve_verbose_); ++ strumpack::ReturnCode ret = solver_->solve(nrhs_, xPtr, ldx, yPtr, ldx, ++ false); ++ if (ret != strumpack::ReturnCode::SUCCESS) ++ { ++#if STRUMPACK_VERSION_MAJOR >= 7 ++ MFEM_ABORT("STRUMPACK: Solve failed with return code " << ret << "!"); ++#else ++ MFEM_ABORT("STRUMPACK: Solve failed!"); ++#endif ++ } --void SuperLUSolver::SetSymmetricPattern( bool sym ) -+void SuperLUSolver::SetSymmetricPattern(bool sym) - { -- superlu_dist_options_t * options = (superlu_dist_options_t*)optionsPtr_; -- -- yes_no_t opt = sym?YES:NO; -- -+ superlu_dist_options_t *options = (superlu_dist_options_t *)optionsPtr_; -+ yes_no_t opt = sym ? YES : NO; - options->SymPattern = opt; ++ for (int i = 0; i < nrhs_; i++) ++ { ++ MFEM_ASSERT(Y[i] && Y[i]->Size() == Width(), ++ "STRUMPACK: Missing or invalid sized solution Vector in solve!"); ++ Vector s(sol_, i * ldx, ldx); ++ *Y[i] = s; ++ } } --void SuperLUSolver::SetParSymbFact( bool par ) -+void SuperLUSolver::SetParSymbFact(bool par) - { -- superlu_dist_options_t * options = (superlu_dist_options_t*)optionsPtr_; -- -- yes_no_t opt = par?YES:NO; ++STRUMPACKSolver:: ++STRUMPACKSolver(MPI_Comm comm) ++ : STRUMPACKSolverBase> ++ (comm, 0, NULL) {} ++ ++STRUMPACKSolver:: ++STRUMPACKSolver(STRUMPACKRowLocMatrix &A) ++ : STRUMPACKSolverBase> ++ (A, 0, NULL) {} ++ ++STRUMPACKSolver:: ++STRUMPACKSolver(MPI_Comm comm, int argc, char *argv[]) ++ : STRUMPACKSolverBase> ++ (comm, argc, argv) {} ++ ++STRUMPACKSolver:: ++STRUMPACKSolver(STRUMPACKRowLocMatrix &A, int argc, char *argv[]) ++ : STRUMPACKSolverBase> ++ (A, argc, argv) {} ++ ++#if STRUMPACK_VERSION_MAJOR >= 7 ++STRUMPACKMixedPrecisionSolver:: ++STRUMPACKMixedPrecisionSolver(MPI_Comm comm) ++ : STRUMPACKSolverBase> ++ (comm, 0, NULL) {} ++ ++STRUMPACKMixedPrecisionSolver:: ++STRUMPACKMixedPrecisionSolver(STRUMPACKRowLocMatrix &A) ++ : STRUMPACKSolverBase> ++ (A, 0, NULL) {} ++ ++STRUMPACKMixedPrecisionSolver:: ++STRUMPACKMixedPrecisionSolver(MPI_Comm comm, int argc, char *argv[]) ++ : STRUMPACKSolverBase> ++ (comm, argc, argv) {} ++ ++STRUMPACKMixedPrecisionSolver:: ++STRUMPACKMixedPrecisionSolver(STRUMPACKRowLocMatrix &A, int argc, char *argv[]) ++ : STRUMPACKSolverBase> ++ (A, argc, argv) {} ++#endif ++ ++template class STRUMPACKSolverBase>; ++#if STRUMPACK_VERSION_MAJOR >= 7 ++template class STRUMPACKSolverBase>; ++#endif ++ + } // mfem namespace + + #endif // MFEM_USE_MPI +diff --git a/linalg/strumpack.hpp b/linalg/strumpack.hpp +index 300b8415e..6a8ac4c30 100644 +--- a/linalg/strumpack.hpp ++++ b/linalg/strumpack.hpp +@@ -16,12 +16,14 @@ + + #ifdef MFEM_USE_STRUMPACK + #ifdef MFEM_USE_MPI ++ + #include "operator.hpp" + #include "hypre.hpp" - -+ superlu_dist_options_t *options = (superlu_dist_options_t *)optionsPtr_; -+ yes_no_t opt = par ? YES : NO; - options->ParSymbFact = opt; - } + #include + ++// STRUMPACK headers + #include "StrumpackSparseSolverMPIDist.hpp" ++#include "StrumpackSparseSolverMixedPrecisionMPIDist.hpp" --void SuperLUSolver::SetupGrid() -+void SuperLUSolver::SetFact(superlu::Fact fact) + namespace mfem { -- gridinfo_t * grid = (gridinfo_t*)gridPtr_; -+ superlu_dist_options_t *options = (superlu_dist_options_t *)optionsPtr_; -+ fact_t opt = (fact_t)fact; -+ options->Fact = opt; -+} +@@ -34,63 +36,80 @@ public: + be of size (local) nrows by (global) glob_ncols. The new parallel matrix + contains copies of all input arrays (so they can be deleted). */ + STRUMPACKRowLocMatrix(MPI_Comm comm, +- int num_loc_rows, int first_loc_row, +- int glob_nrows, int glob_ncols, +- int *I, int *J, double *data); ++ int num_loc_rows, HYPRE_BigInt first_loc_row, ++ HYPRE_BigInt glob_nrows, HYPRE_BigInt glob_ncols, ++ int *I, HYPRE_BigInt *J, double *data, ++ bool sym_sparse = false); -- // Make sure the values of nprow and npcol are reasonable -- if ( ((nprow_ * npcol_) > numProcs_) || ((nprow_ * npcol_) < 1) ) -- { -- if ( myid_ == 0 ) -- { -- mfem::err << "Warning: User specified nprow and npcol are such that " -- << "(nprow * npcol) > numProcs or (nprow * npcol) < 1. " -- << "Using default values for nprow and npcol instead." -- << endl; -- } -+void SuperLUSolver::SetOperator(const Operator &op) -+{ -+ // Verify that we have a compatible operator -+ bool LUStructInitialized = (APtr_ != NULL); -+ APtr_ = dynamic_cast(&op); -+ MFEM_VERIFY(APtr_, "SuperLUSolver::SetOperator: Not a SuperLURowLocMatrix!"); + /** Creates a copy of the parallel matrix hypParMat in STRUMPACK's RowLoc + format. All data is copied so the original matrix may be deleted. */ +- STRUMPACKRowLocMatrix(const HypreParMatrix & hypParMat); ++ STRUMPACKRowLocMatrix(const Operator &op, bool sym_sparse = false); -- nprow_ = (int)superlu_internal::sqrti((unsigned int)numProcs_); -- while (numProcs_ % nprow_ != 0 && nprow_ > 0) -- { -- nprow_--; -- } -+ superlu_dist_options_t *options = (superlu_dist_options_t *)optionsPtr_; + ~STRUMPACKRowLocMatrix(); -- npcol_ = (int)(numProcs_ / nprow_); -- MFEM_ASSERT(nprow_ * npcol_ == numProcs_, ""); -- } -+ ScalePermstruct_t *ScalePermstruct = (ScalePermstruct_t *)ScalePermstructPtr_; -+ LUstruct_t *LUstruct = (LUstruct_t *)LUstructPtr_; + void Mult(const Vector &x, Vector &y) const + { +- mfem_error("STRUMPACKRowLocMatrix::Mult(...)\n" +- " matrix vector products are not supported."); ++ MFEM_ABORT("STRUMPACKRowLocMatrix::Mult: Matrix vector products are not " ++ "supported!"); + } -- superlu_gridinit(comm_, nprow_, npcol_, grid); -+ gridinfo_t *grid; -+#if SUPERLU_DIST_MAJOR_VERSION > 7 || \ -+ (SUPERLU_DIST_MAJOR_VERSION == 7 && SUPERLU_DIST_MINOR_VERSION >= 2) -+ gridinfo3d_t *grid3d = NULL; -+ if (npdep_ > 1) -+ { -+ grid3d = (gridinfo3d_t *)gridPtr_; -+ grid = NULL; -+ } -+ else -+#endif -+ { -+ grid = (gridinfo_t *)gridPtr_; -+ } +- MPI_Comm GetComm() const { return comm_; } ++ MPI_Comm GetComm() const { return A_->comm(); } -- gridInitialized_ = true; --} -+ // Set mfem::Operator member data -+ MFEM_VERIFY(!LUStructInitialized || -+ (height == op.Height() && width == op.Width()), -+ "SuperLUSolver::SetOperator: Inconsistent new matrix size!"); -+ height = op.Height(); -+ width = op.Width(); +- strumpack::CSRMatrixMPI* getA() const { return A_; } ++ strumpack::CSRMatrixMPI *GetA() const { return A_; } --void SuperLUSolver::DismantleGrid() --{ -- if ( gridInitialized_ ) -+ if (!LUStructInitialized) - { -- gridinfo_t * grid = (gridinfo_t*)gridPtr_; + private: +- MPI_Comm comm_; +- strumpack::CSRMatrixMPI* A_; - -- superlu_gridexit(grid); -+ // Initialize ScalePermstruct and LUstruct once for all operators (must -+ // have same dimensions) -+ ScalePermstructInit(APtr_->GetGlobalNumRows(), -+ APtr_->GetGlobalNumColumns(), ScalePermstruct); -+ LUstructInit(APtr_->GetGlobalNumColumns(), LUstruct); -+ options->Fact = DOFACT; - } -+ else -+ { -+ // A previous matrix has already been set and factored -+ switch (options->Fact) -+ { -+ case DOFACT: -+ MFEM_ABORT("SuperLUSolver::SetOperator: Previous matrix was never used!"); -+ break; -+ case SamePattern_SameRowPerm: -+ { -+ // Just zero the LU factors -+#if SUPERLU_DIST_MAJOR_VERSION > 7 || \ -+(SUPERLU_DIST_MAJOR_VERSION == 7 && SUPERLU_DIST_MINOR_VERSION >= 2) -+ if (npdep_ > 1) -+ { -+ if (grid3d->zscp.Iam == 0) -+ { -+ ZeroLblocks(grid3d->iam, APtr_->GetGlobalNumColumns(), -+ &(grid3d->grid2d), LUstruct); -+ ZeroUblocks(grid3d->iam, APtr_->GetGlobalNumColumns(), -+ &(grid3d->grid2d), LUstruct); -+ } -+ } -+ else -+#endif -+ { -+ ZeroLblocks(grid->iam, APtr_->GetGlobalNumColumns(), -+ grid, LUstruct); -+ ZeroUblocks(grid->iam, APtr_->GetGlobalNumColumns(), -+ grid, LUstruct); -+ } -+ } -+ break; -+ case SamePattern: -+ case FACTORED: -+ { -+ // Delete factors from the prior factorization -+#if SUPERLU_DIST_MAJOR_VERSION > 7 || \ -+(SUPERLU_DIST_MAJOR_VERSION == 7 && SUPERLU_DIST_MINOR_VERSION >= 2) -+ if (npdep_ > 1) -+ { -+ if (grid3d->zscp.Iam == 0) -+ { -+ Destroy_LU(APtr_->GetGlobalNumColumns(), &(grid3d->grid2d), -+ LUstruct); -+ } -+ else -+ { -+ DeAllocLlu_3d(APtr_->GetGlobalNumColumns(), LUstruct, -+ grid3d); -+ DeAllocGlu_3d(LUstruct); -+ } -+ } -+ else -+#endif -+ { -+ Destroy_LU(APtr_->GetGlobalNumColumns(), grid, LUstruct); -+ } -+ } -+ break; -+ default: -+ MFEM_ABORT("SuperLUSolver::SetOperator: Unexpected value for " -+ "options->Fact!"); -+ break; -+ } -+ if (options->Fact == FACTORED) { options->Fact = DOFACT; } -+ } -+} +-}; // mfem::STRUMPACKRowLocMatrix ++ strumpack::CSRMatrixMPI *A_; ++}; -- gridInitialized_ = false; -+void SuperLUSolver::Mult(const Vector &x, Vector &y) const -+{ -+ Array X(1); -+ Array Y(1); -+ X[0] = &x; -+ Y[0] = &y; -+ ArrayMult(X, Y); - } + /** The MFEM STRUMPACK Direct Solver class. --void SuperLUSolver::Mult( const Vector & x, Vector & y ) const -+void SuperLUSolver::ArrayMult(const Array &X, -+ Array &Y) const + The mfem::STRUMPACKSolver class uses the STRUMPACK library to perform LU + factorization of a parallel sparse matrix. The solver is capable of handling +- double precision types. See http://portal.nersc.gov/project/sparse/strumpack ++ double precision types. See ++ http://portal.nersc.gov/project/sparse/strumpack/. + */ +-class STRUMPACKSolver : public mfem::Solver ++template ++class STRUMPACKSolverBase : public Solver { - MFEM_ASSERT(APtr_ != NULL, - "SuperLU Error: The operator must be set before" - " the system can be solved."); -- -- superlu_dist_options_t * options = (superlu_dist_options_t*)optionsPtr_; -- SuperLUStat_t * stat = (SuperLUStat_t*)statPtr_; -- SuperMatrix * A = (SuperMatrix*)APtr_->InternalData(); -- -- ScalePermstruct_t * SPstruct = (ScalePermstruct_t*)ScalePermstructPtr_; -- LUstruct_t * LUstruct = (LUstruct_t*)LUstructPtr_; -- SOLVEstruct_t * SOLVEstruct = (SOLVEstruct_t*)SOLVEstructPtr_; -- gridinfo_t * grid = (gridinfo_t*)gridPtr_; -- -- if (!firstSolveWithThisA_) -+ SuperMatrix *A = (SuperMatrix *)APtr_->InternalData(); -+ superlu_dist_options_t *options = (superlu_dist_options_t *)optionsPtr_; -+ -+ ScalePermstruct_t *ScalePermstruct = (ScalePermstruct_t *)ScalePermstructPtr_; -+ LUstruct_t *LUstruct = (LUstruct_t *)LUstructPtr_; -+ SOLVEstruct_t *SOLVEstruct = (SOLVEstruct_t *)SOLVEstructPtr_; -+ -+ gridinfo_t *grid; -+#if SUPERLU_DIST_MAJOR_VERSION > 7 || \ -+ (SUPERLU_DIST_MAJOR_VERSION == 7 && SUPERLU_DIST_MINOR_VERSION >= 2) -+ gridinfo3d_t *grid3d = NULL; -+ if (npdep_ > 1) - { -- options->Fact = FACTORED; // Indicate the factored form of A is supplied. -+ grid3d = (gridinfo3d_t *)gridPtr_; -+ grid = NULL; - } -- else // This is the first solve with this A -+ else -+#endif - { -- firstSolveWithThisA_ = false; -- -- // Make sure that the parameters have been initialized The only parameter -- // we might have to worry about is ScalePermstruct, if the user is -- // supplying a row or column permutation. -- -- // Initialize ScalePermstruct and LUstruct. -- SPstruct->DiagScale = NOEQUIL; -+ grid = (gridinfo_t *)gridPtr_; -+ } - -- // Transfer ownership of the row permutations if available -- if ( perm_r_ != NULL ) -+ // SuperLU overwrites x with y, so copy x to y and pass that to the solve -+ // routine. Due to issues with repeated solves and changes in the number -+ // of RHS vectors, this is not supported. -+ MFEM_ASSERT(X.Size() == Y.Size(), -+ "Number of columns mismatch in SuperLUSolver::Mult!"); -+ MFEM_VERIFY(nrhs_ < 1 || nrhs_ == X.Size(), -+ "SuperLUSolver does not support multiple solves with different " -+ "numbers of RHS vectors!"); -+ int ldx = Height(); -+ if (X.Size() == 1) -+ { -+ MFEM_ASSERT(X[0] && Y[0], "Missing Vector in SuperLUSolver::Mult!"); -+ sol_.MakeRef(*Y[0], 0, Y[0]->Size()); -+ sol_ = *X[0]; -+ nrhs_ = 1; -+ } -+ else -+ { -+ if (nrhs_ < 1) - { -- SPstruct->perm_r = perm_r_; -- perm_r_ = NULL; -+ sol_.SetSize(X.Size() * ldx); -+ nrhs_ = X.Size(); - } -- else -+ for (int i = 0; i < nrhs_; i++) - { -- if ( !(SPstruct->perm_r = intMalloc_dist(A->nrow)) ) -- { -- ABORT("Malloc fails for perm_r[]."); -- } -+ MFEM_ASSERT(X[i], "Missing Vector in SuperLUSolver::Mult!"); -+ Vector s(sol_, i * ldx, ldx); -+ s = *X[i]; - } -- if ( !(SPstruct->perm_c = intMalloc_dist(A->ncol)) ) -+ } -+ -+ // Solve the system -+ double *B = sol_.HostReadWrite(), *berr; -+ if (!(berr = doubleMalloc_dist(nrhs_))) -+ { -+ MFEM_ABORT("SuperLUSolver::Mult: Malloc failed for berr!"); -+ } -+ SuperLUStat_t stat; -+ PStatInit(&stat); -+ int info = -1; -+#if SUPERLU_DIST_MAJOR_VERSION > 7 || \ -+ (SUPERLU_DIST_MAJOR_VERSION == 7 && SUPERLU_DIST_MINOR_VERSION >= 2) -+ if (npdep_ > 1) -+ { -+ pdgssvx3d(options, A, ScalePermstruct, B, ldx, nrhs_, -+ grid3d, LUstruct, SOLVEstruct, berr, &stat, &info); -+ } -+ else -+#endif -+ { -+ pdgssvx(options, A, ScalePermstruct, B, ldx, nrhs_, -+ grid, LUstruct, SOLVEstruct, berr, &stat, &info); -+ } -+ HandleError(info); -+ SUPERLU_FREE(berr); -+ PStatFree(&stat); -+ options->Fact = FACTORED; -+ -+ // Copy solution into output (no need to do anything for single RHS since -+ // solution is written directly into output Vector) -+ if (nrhs_ > 1) -+ { -+ for (int i = 0; i < nrhs_; i++) - { -- ABORT("Malloc fails for perm_c[]."); -+ MFEM_ASSERT(Y[i], "Missing Vector in SuperLUSolver::Mult!"); -+ Vector s(sol_, i * ldx, ldx); -+ *Y[i] = s; - } -- -- LUstructInit(A->ncol, LUstruct); -- LUStructInitialized_ = true; - } -+} +-public: +- // Constructor with MPI_Comm parameter. +- STRUMPACKSolver( int argc, char* argv[], MPI_Comm comm ); ++protected: ++ // Constructor with MPI_Comm parameter and command line arguments. ++ STRUMPACKSolverBase(MPI_Comm comm, int argc, char *argv[]); -- // SuperLU overwrites x with y, so copy x to y and pass that to the solve -- // routine. -+void SuperLUSolver::MultTranspose(const Vector &x, Vector &y) const -+{ -+ // Set flag for transpose solve -+ superlu_dist_options_t *options = (superlu_dist_options_t *)optionsPtr_; -+ options->Trans = TRANS; -+ Mult(x, y); +- // Constructor with STRUMPACK Matrix Object. +- STRUMPACKSolver( STRUMPACKRowLocMatrix & A); ++ // Constructor with STRUMPACK matrix object and command line arguments. ++ STRUMPACKSolverBase(STRUMPACKRowLocMatrix &A, int argc, char *argv[]); -- const double *xPtr = x.HostRead(); -- y = xPtr; -- double * yPtr = y.HostReadWrite(); -+ // Reset the flag -+ options->Trans = NOTRANS; -+} ++public: + // Default destructor. +- ~STRUMPACKSolver( void ); ++ virtual ~STRUMPACKSolverBase(); -- int info = -1, locSize = y.Size(); -+void SuperLUSolver::ArrayMultTranspose(const Array &X, -+ Array &Y) const -+{ -+ // Set flag for transpose solve -+ superlu_dist_options_t *options = (superlu_dist_options_t *)optionsPtr_; -+ options->Trans = TRANS; -+ ArrayMult(X, Y); + // Factor and solve the linear system y = Op^{-1} x. +- void Mult( const Vector & x, Vector & y ) const; ++ void Mult(const Vector &x, Vector &y) const; ++ void ArrayMult(const Array &X, Array &Y) const; -- // Solve the system -- pdgssvx(options, A, SPstruct, yPtr, locSize, nrhs_, grid, -- LUstruct, SOLVEstruct, berr_, stat, &info); -+ // Reset the flag -+ options->Trans = NOTRANS; -+} + // Set the operator. +- void SetOperator( const Operator & op ); ++ void SetOperator(const Operator &op); -- if ( info != 0 ) -+void SuperLUSolver::HandleError(int info) const -+{ -+ if (info != 0) - { -- if ( info < 0 ) -+ SuperMatrix *A = (SuperMatrix *)APtr_->InternalData(); -+ if (info < 0) - { - switch (-info) - { - case 1: -- MFEM_ABORT("SuperLU: SuperLU options are invalid."); -+ MFEM_ABORT("SuperLUSolver: SuperLU options are invalid!"); - break; - case 2: -- MFEM_ABORT("SuperLU: Matrix A (in Ax=b) is invalid."); -+ MFEM_ABORT("SuperLUSolver: Matrix A (in Ax=b) is invalid!"); - break; - case 5: -- MFEM_ABORT("SuperLU: Vector b dimension (in Ax=b) is invalid."); -+ MFEM_ABORT("SuperLUSolver: Vector b dimension (in Ax=b) is " -+ "invalid!"); - break; - case 6: -- MFEM_ABORT("SuperLU: Number of right-hand sides is invalid."); -+ MFEM_ABORT("SuperLUSolver: Number of right-hand sides is " -+ "invalid!"); - break; - default: -- MFEM_ABORT("SuperLU: Parameter with index " -- << -info << "invalid. (1-indexed)"); -+ MFEM_ABORT("SuperLUSolver: Parameter with index " -+ << -info << "invalid (1-indexed)!"); - break; - } - } -- else if ( info <= A->ncol ) -+ else if (info <= A->ncol) - { -- MFEM_ABORT("SuperLU: Found a singular matrix, U(" -- << info << "," << info << ") is exactly zero."); -+ MFEM_ABORT("SuperLUSolver: Found a singular matrix, U(" -+ << info << "," << info << ") is exactly zero!"); - } -- else if ( info > A->ncol ) -+ else if (info > A->ncol) - { -- MFEM_ABORT("SuperLU: Memory allocation error with " -- << info - A->ncol << " bytes already allocated,"); -+ MFEM_ABORT("SuperLUSolver: Memory allocation error with " -+ << info - A->ncol << " bytes already allocated!"); - } - else - { -- MFEM_ABORT("Unknown SuperLU Error"); -+ MFEM_ABORT("Unknown SuperLU error: info = " << info << "!"); - } - } - } + // Set various solver options. Refer to STRUMPACK documentation for + // details. +- void SetFromCommandLine( ); +- void SetPrintFactorStatistics( bool print_stat ); +- void SetPrintSolveStatistics( bool print_stat ); +- void SetRelTol( double rtol ); +- void SetAbsTol( double atol ); ++ void SetFromCommandLine(); ++ void SetPrintFactorStatistics(bool print_stat); ++ void SetPrintSolveStatistics(bool print_stat); ++ ++ // Set tolerances and iterations for iterative solvers. Compression ++ // tolerance is handled below. ++ void SetRelTol(double rtol); ++ void SetAbsTol(double atol); ++ void SetMaxIter(int max_it); ++ ++ // Set the flag controlling reuse of the symbolic factorization for multiple ++ // operators. This method has to be called before repeated calls to ++ // SetOperator. ++ void SetReorderingReuse(bool reuse); ++ ++ // Enable or not GPU off-loading available if STRUMPACK was compiled with CUDA. Note ++ // that input/output from MFEM to STRUMPACK is all still through host memory. ++ void EnableGPU(); ++ void DisableGPU(); --void SuperLUSolver::SetOperator( const Operator & op ) --{ -- // Verify that we have a compatible operator -- APtr_ = dynamic_cast(&op); -- if ( APtr_ == NULL ) -- { -- mfem_error("SuperLUSolver::SetOperator : not SuperLURowLocMatrix!"); -- } -- -- // Everything is OK so finish setting the operator -- firstSolveWithThisA_ = true; -- -- // Set mfem::Operator member data -- height = op.Height(); -- width = op.Width(); -- -- // Initialize the processor grid if necessary -- if (!gridInitialized_) -- { -- this->SetupGrid(); -- } --} -- --} // mfem namespace -+} // namespace mfem + /** + * STRUMPACK is an (approximate) direct solver. It can be used as a direct +@@ -100,70 +119,151 @@ public: + * used without preconditioner. + * + * Supported values are: +- * AUTO: Use iterative refinement if no HSS compression is used, +- * otherwise use GMRes. +- * DIRECT: No outer iterative solver, just a single application of +- * the multifrontal solver. +- * REFINE: Iterative refinement. +- * PREC_GMRES: Preconditioned GMRes. +- * The preconditioner is the (approx) multifrontal solver. +- * GMRES: UN-preconditioned GMRes. (for testing mainly) +- * PREC_BICGSTAB: Preconditioned BiCGStab. +- * The preconditioner is the (approx) multifrontal solver. ++ * AUTO: Use iterative refinement if no HSS compression is ++ * used, otherwise use GMRes ++ * DIRECT: No outer iterative solver, just a single application ++ * of the multifrontal solver ++ * REFINE: Iterative refinement ++ * PREC_GMRES: Preconditioned GMRes ++ * The preconditioner is the (approx) multifrontal solver ++ * GMRES: UN-preconditioned GMRes (for testing mainly) ++ * PREC_BICGSTAB: Preconditioned BiCGStab ++ * The preconditioner is the (approx) multifrontal solver + * BICGSTAB: UN-preconditioned BiCGStab. (for testing mainly) + */ +- void SetKrylovSolver( strumpack::KrylovSolver method ); ++ void SetKrylovSolver(strumpack::KrylovSolver method); - #endif // MFEM_USE_MPI - #endif // MFEM_USE_SUPERLU -diff --git a/linalg/superlu.hpp b/linalg/superlu.hpp -index 1edec0a08..e22020751 100644 ---- a/linalg/superlu.hpp -+++ b/linalg/superlu.hpp -@@ -16,33 +16,30 @@ + /** + * Supported reorderings are: +- * METIS, PARMETIS, SCOTCH, PTSCOTCH, RCM ++ * NATURAL: Do not reorder the system ++ * METIS: Use Metis nested-dissection reordering (default) ++ * PARMETIS: Use ParMetis nested-dissection reordering ++ * SCOTCH: Use Scotch nested-dissection reordering ++ * PTSCOTCH: Use PT-Scotch nested-dissection reordering ++ * RCM: Use RCM reordering ++ * GEOMETRIC: A simple geometric nested dissection code that ++ * only works for regular meshes ++ * AMD: Approximate minimum degree ++ * MMD: Multiple minimum degree ++ * AND: Nested dissection ++ * MLF: Minimum local fill ++ * SPECTRAL: Spectral nested dissection + */ +- void SetReorderingStrategy( strumpack::ReorderingStrategy method ); ++ void SetReorderingStrategy(strumpack::ReorderingStrategy method); - #ifdef MFEM_USE_SUPERLU - #ifdef MFEM_USE_MPI -+ - #include "operator.hpp" - #include "hypre.hpp" + /** +- * Disable static pivoting for stability. The static pivoting in strumpack ++ * Configure static pivoting for stability. The static pivoting in STRUMPACK + * permutes the sparse input matrix in order to get large (nonzero) elements + * on the diagonal. If the input matrix is already diagonally dominant, this + * reordering can be disabled. ++ * ++ * Supported matching algorithms are: ++ * NONE: Don't do anything ++ * MAX_CARDINALITY: Maximum cardinality ++ * MAX_SMALLEST_DIAGONAL: Maximum smallest diagonal value ++ * MAX_SMALLEST_DIAGONAL_2: Same as MAX_SMALLEST_DIAGONAL ++ * but different algorithm ++ * MAX_DIAGONAL_SUM: Maximum sum of diagonal values ++ * MAX_DIAGONAL_PRODUCT_SCALING: Maximum product of diagonal values ++ * and row and column scaling (default) ++ * COMBBLAS: Use AWPM from CombBLAS (only with ++ * version >= 3) + */ +- void DisableMatching(); - - #include - - namespace mfem - { +- /** +- * Enable static pivoting for stability using the MC64 algorithm with +- * job=5. Using a matching algorithm, this will permute the sparse input +- * matrix in order to get nonzero elements (as large as possible) on the +- * diagonal. And will also scale the rows and columns of the matrix. +- */ +- void EnableMatching(); ++ void SetMatching(strumpack::MatchingJob job); --namespace superlu_internal --{ --unsigned int sqrti(const unsigned int & a); --} -- - namespace superlu - { --// Copy selected enumerations from SuperLU -+ -+// Copy selected enumerations from SuperLU (from superlu_enum_consts.h) - #ifdef MFEM_USE_SUPERLU5 --typedef enum {NOROWPERM, LargeDiag, MY_PERMR} RowPerm; -+typedef enum {NOROWPERM, LargeDiag, MY_PERMR} RowPerm; - #else --typedef enum {NOROWPERM, LargeDiag_MC64, LargeDiag_HWPM, MY_PERMR} RowPerm; -+typedef enum {NOROWPERM, LargeDiag_MC64, LargeDiag_HWPM, MY_PERMR} RowPerm; +-#if STRUMPACK_VERSION_MAJOR >= 3 + /** +- * Use the AWPM (approximate weight perfect matching) algorithm from the +- * Combinatorial BLAS library for static pivoting, i.e. getting large +- * nonzeros on the diagonal. This requires that strumpack was compiled with +- * support for Combinatorial BLAS. ++ * Enable support for rank-structured data formats, which can be used ++ * for compression within the sparse solver. ++ * ++ * Supported compression types are: ++ * NONE: No compression, purely direct solver (default) ++ * HSS: HSS compression of frontal matrices ++ * BLR: Block low-rank compression of fronts ++ * HODLR: Hierarchically Off-diagonal Low-Rank ++ * compression of frontal matrices ++ * BLR_HODLR: Block low-rank compression of medium ++ * fronts and Hierarchically Off-diagonal ++ * Low-Rank compression of large fronts ++ * ZFP_BLR_HODLR: ZFP compression for small fronts, ++ * Block low-rank compression of medium ++ * fronts and Hierarchically Off-diagonal ++ * Low-Rank compression of large fronts ++ * LOSSLESS: Lossless compression ++ * LOSSY: Lossy compression ++ * ++ * For versions of STRUMPACK < 5, we support only NONE, HSS, and BLR. ++ * BLR_HODLR and ZPR_BLR_HODLR are supported in STRUMPACK >= 6. + */ +- void EnableParallelMatching(); ++ void SetCompression(strumpack::CompressionType type); ++ void SetCompressionRelTol(double rtol); ++ void SetCompressionAbsTol(double atol); ++#if STRUMPACK_VERSION_MAJOR >= 5 ++ void SetCompressionLossyPrecision(int precision); ++ void SetCompressionButterflyLevels(int levels); #endif - typedef enum {NATURAL, MMD_ATA, MMD_AT_PLUS_A, COLAMD, - METIS_AT_PLUS_A, PARMETIS, ZOLTAN, MY_PERMC -- } ColPerm; --typedef enum {NOTRANS, TRANS, CONJ} Trans; --typedef enum {NOREFINE, SLU_SINGLE=1, SLU_DOUBLE, SLU_EXTRA} IterRefine; --} -+ } ColPerm; -+typedef enum {NOREFINE, SLU_SINGLE=1, SLU_DOUBLE, SLU_EXTRA} IterRefine; -+typedef enum {DOFACT, SamePattern, SamePattern_SameRowPerm, FACTORED} Fact; -+ -+} // namespace superlu - - class SuperLURowLocMatrix : public Operator - { -@@ -52,34 +49,35 @@ public: - be of size (local) nrows by (global) glob_ncols. The new parallel matrix - contains copies of all input arrays (so they can be deleted). */ - SuperLURowLocMatrix(MPI_Comm comm, -- int num_loc_rows, int first_loc_row, -- int glob_nrows, int glob_ncols, -- int *I, int *J, double *data); -+ int num_loc_rows, HYPRE_BigInt first_loc_row, -+ HYPRE_BigInt glob_nrows, HYPRE_BigInt glob_ncols, -+ int *I, HYPRE_BigInt *J, double *data); - - /** Creates a copy of the parallel matrix hypParMat in SuperLU's RowLoc - format. All data is copied so the original matrix may be deleted. */ -- SuperLURowLocMatrix(const HypreParMatrix & hypParMat); -+ SuperLURowLocMatrix(const Operator &op); - ~SuperLURowLocMatrix(); + private: +- void Init( int argc, char* argv[] ); ++ // Helper method for calling the STRUMPACK factoriation routine. ++ void FactorInternal() const; - void Mult(const Vector &x, Vector &y) const - { -- mfem_error("SuperLURowLocMatrix::Mult(...)\n" -- " matrix vector products are not supported."); -+ MFEM_ABORT("SuperLURowLocMatrix::Mult: Matrix vector products are not " -+ "supported!"); - } + protected: +- +- MPI_Comm comm_; +- int numProcs_; +- int myid_; ++ const STRUMPACKRowLocMatrix *APtr_; ++ STRUMPACKSolverType *solver_; -+ void *InternalData() const { return rowLocPtr_; } + bool factor_verbose_; + bool solve_verbose_; ++ bool reorder_reuse_; + - MPI_Comm GetComm() const { return comm_; } - -- void * InternalData() const { return rowLocPtr_; } -+ HYPRE_BigInt GetGlobalNumRows() const { return num_global_rows_; } - -- HYPRE_BigInt GetGlobalNumColumns() const { return num_global_cols; } -+ HYPRE_BigInt GetGlobalNumColumns() const { return num_global_cols_; } - - private: -- MPI_Comm comm_; -- void * rowLocPtr_; -- HYPRE_BigInt num_global_cols; -- --}; // mfem::SuperLURowLocMatrix -+ MPI_Comm comm_; -+ void *rowLocPtr_; -+ HYPRE_BigInt num_global_rows_, num_global_cols_; ++ mutable Vector rhs_, sol_; ++ mutable int nrhs_; +}; - /** The MFEM SuperLU Direct Solver class. - -@@ -88,80 +86,75 @@ private: - double precision types. It is currently maintained by Xiaoye Sherry Li at - NERSC, see http://crd-legacy.lbl.gov/~xiaoye/SuperLU/. - */ --class SuperLUSolver : public mfem::Solver -+class SuperLUSolver : public Solver - { - public: - // Constructor with MPI_Comm parameter. -- SuperLUSolver( MPI_Comm comm ); -+ SuperLUSolver(MPI_Comm comm, int npdep = 1); - -- // Constructor with SuperLU Matrix Object. -- SuperLUSolver( SuperLURowLocMatrix & A); -+ // Constructor with SuperLU matrix object. -+ SuperLUSolver(SuperLURowLocMatrix &A, int npdep = 1); - - // Default destructor. -- ~SuperLUSolver( void ); -+ ~SuperLUSolver(); - -- // Allocate and deallocate the MPI communicators. This routine is called -- // internally by SetOperator(). -- void SetupGrid(); -- // This routing must be called after the solve, but before destruction. -- void DismantleGrid(); -+ // Set the operator. -+ void SetOperator(const Operator &op); +- const STRUMPACKRowLocMatrix * APtr_; +- strumpack::StrumpackSparseSolverMPIDist * solver_; ++class STRUMPACKSolver : ++ public STRUMPACKSolverBase> ++{ ++public: ++ // Constructor with MPI_Comm parameter. ++ STRUMPACKSolver(MPI_Comm comm); ++ ++ // Constructor with STRUMPACK matrix object. ++ STRUMPACKSolver(STRUMPACKRowLocMatrix &A); - // Factor and solve the linear system y = Op^{-1} x. -- void Mult( const Vector & x, Vector & y ) const; -- -- // Set the operator. -- void SetOperator( const Operator & op ); -- -- // Set various solver options. Refer to SuperLU documentation for details. -- void SetPrintStatistics ( bool print_stat ); -- void SetEquilibriate ( bool equil ); -- void SetColumnPermutation( superlu::ColPerm col_perm ); -- void SetRowPermutation ( superlu::RowPerm row_perm, -- Array * perm = NULL ); -- void SetTranspose ( superlu::Trans trans ); -- void SetIterativeRefine ( superlu::IterRefine iter_ref ); -- void SetReplaceTinyPivot ( bool rtp ); -- void SetNumLookAheads ( int num_lookaheads ); -- void SetLookAheadElimTree( bool etree ); -- void SetSymmetricPattern ( bool sym ); -- void SetParSymbFact ( bool par ); -+ // Note: Factorization modifies the operator matrix. -+ void Mult(const Vector &x, Vector &y) const; -+ void ArrayMult(const Array &X, Array &Y) const; +-}; // mfem::STRUMPACKSolver class ++ // Constructor with MPI_Comm parameter and command line arguments. ++ STRUMPACKSolver(MPI_Comm comm, int argc, char *argv[]); + -+ // Factor and solve the linear system y = Op^{-T} x. -+ // Note: Factorization modifies the operator matrix. -+ void MultTranspose(const Vector &x, Vector &y) const; -+ void ArrayMultTranspose(const Array &X, -+ Array &Y) const; ++ // Constructor with STRUMPACK matrix object and command line arguments. ++ STRUMPACKSolver(STRUMPACKRowLocMatrix &A, int argc, char *argv[]); + -+ // Set various solver options. Refer to SuperLU_DIST documentation for -+ // details. -+ void SetPrintStatistics(bool print_stat); -+ void SetEquilibriate(bool equil); -+ void SetColumnPermutation(superlu::ColPerm col_perm); -+ void SetRowPermutation(superlu::RowPerm row_perm); -+ void SetIterativeRefine(superlu::IterRefine iter_ref); -+ void SetReplaceTinyPivot(bool rtp); -+ void SetNumLookAheads(int num_lookaheads); -+ void SetLookAheadElimTree(bool etree); -+ void SetSymmetricPattern(bool sym); -+ void SetParSymbFact(bool par); -+ void SetFact(superlu::Fact fact); ++ // Destructor. ++ ~STRUMPACKSolver() {} ++}; + -+ // Processor grid for SuperLU_DIST. -+ const int nprow_, npcol_, npdep_; - - private: -- void Init(); -+ // Initialize the solver. -+ void Init(MPI_Comm comm); - --protected: -+ // Handle error message from call to SuperLU solver. -+ void HandleError(int info) const; - -- MPI_Comm comm_; -- int numProcs_; -- int myid_; -- -- const SuperLURowLocMatrix * APtr_; -- -- // The actual types of the following pointers are hidden to avoid exposing -- // the SuperLU header files to the entire library. Their types are given in -- // the trailing comments. The reason that this is necessary is that SuperLU -- // defines these structs differently for use with its real and complex -- // solvers. If we want to add support for SuperLU's complex solvers one day -- // we will need to hide these types to avoid name conflicts. -- void* optionsPtr_; // superlu_options_t * -- void* statPtr_; // SuperLUStat_t * -- void* ScalePermstructPtr_; // ScalePermsruct_t * -- void* LUstructPtr_; // LUstruct_t * -- void* SOLVEstructPtr_; // SOLVEstruct_t * -- void* gridPtr_; // gridinfo_t * -- -- double* berr_; -- mutable int* perm_r_; -- int nrhs_; -- int nprow_; -- int npcol_; -- mutable bool firstSolveWithThisA_; -- bool gridInitialized_; -- mutable bool LUStructInitialized_; -- --}; // mfem::SuperLUSolver class -- --} // mfem namespace -+protected: -+ const SuperLURowLocMatrix *APtr_; -+ mutable Vector sol_; -+ mutable int nrhs_; ++#if STRUMPACK_VERSION_MAJOR >= 7 ++class STRUMPACKMixedPrecisionSolver : ++ public STRUMPACKSolverBase> ++{ ++public: ++ // Constructor with MPI_Comm parameter. ++ STRUMPACKMixedPrecisionSolver(MPI_Comm comm); + -+ /** The actual types of the following pointers are hidden to avoid exposing -+ the SuperLU header files to the entire library. Their types are given in -+ the trailing comments. The reason that this is necessary is that SuperLU -+ defines these structs differently for use with its real and complex -+ solvers. If we want to add support for SuperLU's complex solvers one day -+ we will need to hide these types to avoid name conflicts. */ -+ void *optionsPtr_; // superlu_options_t * -+ void *ScalePermstructPtr_; // ScalePermsruct_t * -+ void *LUstructPtr_; // LUstruct_t * -+ void *SOLVEstructPtr_; // SOLVEstruct_t * -+ void *gridPtr_; // gridinfo_t * or gridinfo3d_t * -+}; ++ // Constructor with STRUMPACK matrix object. ++ STRUMPACKMixedPrecisionSolver(STRUMPACKRowLocMatrix &A); ++ ++ // Constructor with MPI_Comm parameter and command line arguments. ++ STRUMPACKMixedPrecisionSolver(MPI_Comm comm, int argc, char *argv[]); ++ ++ // Constructor with STRUMPACK matrix object and command line arguments. ++ STRUMPACKMixedPrecisionSolver(STRUMPACKRowLocMatrix &A, ++ int argc, char *argv[]); + ++ // Destructor. ++ ~STRUMPACKMixedPrecisionSolver() {} ++}; ++#endif + +-} // mfem namespace +} // namespace mfem #endif // MFEM_USE_MPI - #endif // MFEM_USE_SUPERLU + #endif // MFEM_USE_STRUMPACK diff --git a/miniapps/nurbs/nurbs_ex11p.cpp b/miniapps/nurbs/nurbs_ex11p.cpp index 7b8e3bd2d..e5cf95062 100644 --- a/miniapps/nurbs/nurbs_ex11p.cpp @@ -3847,10 +1114,10 @@ index 7b8e3bd2d..e5cf95062 100644 strumpack->SetFromCommandLine(); precond = strumpack; diff --git a/tests/unit/linalg/test_direct_solvers.cpp b/tests/unit/linalg/test_direct_solvers.cpp -index 838bb4009..848cf76df 100644 +index d015a8b94..de2d074c1 100644 --- a/tests/unit/linalg/test_direct_solvers.cpp +++ b/tests/unit/linalg/test_direct_solvers.cpp -@@ -23,14 +23,16 @@ using namespace mfem; +@@ -23,6 +23,9 @@ using namespace mfem; #ifdef MFEM_USE_SUPERLU #define DIRECT_SOLVE_PARALLEL #endif @@ -3860,211 +1127,35 @@ index 838bb4009..848cf76df 100644 #if defined(DIRECT_SOLVE_SERIAL) || defined(DIRECT_SOLVE_PARALLEL) --int dim; --double uexact(const Vector& x) -+double uexact(const Vector &x) - { - double u; -- switch (dim) -+ switch (x.Size()) - { - case 1: - u = 3.0 + 2.0 * x(0) - 0.5 * x(0) * x(0); -@@ -45,16 +47,16 @@ double uexact(const Vector& x) - return u; - } - --void gradexact(const Vector& x, Vector & grad) -+void gradexact(const Vector &x, Vector &grad) - { -- grad.SetSize(dim); -- switch (dim) -+ grad.SetSize(x.Size()); -+ switch (x.Size()) - { - case 1: - grad[0] = 2.0 - x(0); - break; - case 2: -- grad[0] = 0.2 - 0.9 * x(1) + x(1) * x (1); -+ grad[0] = 0.2 - 0.9 * x(1) + x(1) * x(1); - grad[1] = - 0.9 * x(0) + 2.0 * x(0) * x(1); - break; - default: -@@ -68,7 +70,7 @@ void gradexact(const Vector& x, Vector & grad) - double d2uexact(const Vector& x) // returns \Delta u - { - double d2u; -- switch (dim) -+ switch (x.Size()) - { - case 1: - d2u = -1.0; -@@ -83,7 +85,7 @@ double d2uexact(const Vector& x) // returns \Delta u - return d2u; - } - --double fexact(const Vector& x) // returns -\Delta u -+double fexact(const Vector &x) // returns -\Delta u - { - double d2u = d2uexact(x); - return -d2u; -@@ -93,7 +95,7 @@ double fexact(const Vector& x) // returns -\Delta u - - #ifdef DIRECT_SOLVE_SERIAL - --TEST_CASE("direct-serial","[CUDA]") -+TEST_CASE("Serial Direct Solvers", "[CUDA]") - { - const int ne = 2; - for (dim = 1; dim < 4; ++dim) -@@ -114,10 +116,9 @@ TEST_CASE("direct-serial","[CUDA]") - ne, ne, ne, Element::HEXAHEDRON, 1.0, 1.0, 1.0); +@@ -100,7 +103,7 @@ TEST_CASE("Serial Direct Solvers", "[CUDA]") + Mesh mesh; + if (dim == 1) + { +- mesh = Mesh::MakeCartesian1D(ne, 1.0); ++ mesh = Mesh::MakeCartesian1D(ne, 1.0); } - int order = 3; -- FiniteElementCollection* fec = new H1_FECollection(order, dim); -- FiniteElementSpace fespace(&mesh, fec); -- Array ess_tdof_list; -- Array ess_bdr(mesh.bdr_attributes.Max()); -+ H1_FECollection fec(order, dim); -+ FiniteElementSpace fespace(&mesh, &fec); -+ Array ess_tdof_list, ess_bdr(mesh.bdr_attributes.Max()); - ess_bdr = 1; - fespace.GetEssentialTrueDofs(ess_bdr, ess_tdof_list); - -@@ -146,15 +147,14 @@ TEST_CASE("direct-serial","[CUDA]") - umf_solver.Mult(B, X); - - Vector Y(X.Size()); -- A->Mult(X,Y); -- Y-=B; -+ A->Mult(X, Y); -+ Y -= B; - REQUIRE(Y.Norml2() < 1.e-12); - - a.RecoverFEMSolution(X, b, x); -- VectorFunctionCoefficient grad(dim,gradexact); -- double error = x.ComputeH1Error(&uex,&grad); -+ VectorFunctionCoefficient grad(dim, gradexact); -+ double error = x.ComputeH1Error(&uex, &grad); - REQUIRE(error < 1.e-12); -- delete fec; - } - } - -@@ -162,12 +162,12 @@ TEST_CASE("direct-serial","[CUDA]") - - #ifdef DIRECT_SOLVE_PARALLEL - --TEST_CASE("direct-parallel", "[Parallel], [CUDA]") -+TEST_CASE("Parallel Direct Solvers", "[Parallel], [CUDA]") + else if (dim == 2) + { +@@ -163,13 +166,13 @@ TEST_CASE("Parallel Direct Solvers", "[Parallel], [CUDA]") { int rank; MPI_Comm_rank(MPI_COMM_WORLD, &rank); - const int ne = 2; -- for (dim = 1; dim < 4; ++dim) -+ const int ne = 8; -+ for (int dim = 1; dim <= 3; ++dim) ++ const int ne = 4; + for (int dim = 1; dim < 4; ++dim) { Mesh mesh; if (dim == 1) -@@ -185,16 +185,15 @@ TEST_CASE("direct-parallel", "[Parallel], [CUDA]") - ne, ne, ne, Element::HEXAHEDRON, 1.0, 1.0, 1.0); - } - -- ParMesh *pmesh = new ParMesh(MPI_COMM_WORLD, mesh); -+ ParMesh pmesh(MPI_COMM_WORLD, mesh); - mesh.Clear(); - int order = 3; -- FiniteElementCollection* fec = new H1_FECollection(order, dim); -- ParFiniteElementSpace fespace(pmesh, fec); -- Array ess_tdof_list; -- Array ess_bdr; -- if (pmesh->bdr_attributes.Size()) -+ H1_FECollection fec(order, dim); -+ ParFiniteElementSpace fespace(&pmesh, &fec); -+ Array ess_tdof_list, ess_bdr; -+ if (pmesh.bdr_attributes.Size()) { -- ess_bdr.SetSize(pmesh->bdr_attributes.Max()); -+ ess_bdr.SetSize(pmesh.bdr_attributes.Max()); - ess_bdr = 1; - fespace.GetEssentialTrueDofs(ess_bdr, ess_tdof_list); +- mesh = Mesh::MakeCartesian1D(ne, 1.0); ++ mesh = Mesh::MakeCartesian1D(ne, 1.0); } -@@ -217,20 +216,40 @@ TEST_CASE("direct-parallel", "[Parallel], [CUDA]") - Vector B, X; - a.FormLinearSystem(ess_tdof_list, x, b, A, X, B); - -+ Vector B0(X.Size()), B1(X.Size()), X0(X.Size()), X1(X.Size()); -+ B0 = B; -+ B1 = B; -+ B1 *= 2.0; -+ Array BB(2), XX(2); -+ BB[0] = &B0; -+ BB[1] = &B1; -+ XX[0] = &X0; -+ XX[1] = &X1; -+ - #ifdef MFEM_USE_MUMPS + else if (dim == 2) { -- MUMPSSolver mumps; -+ MUMPSSolver mumps(MPI_COMM_WORLD); - mumps.SetPrintLevel(0); - mumps.SetOperator(*A.As()); -- mumps.Mult(B,X); -+ mumps.Mult(B, X); -+ - Vector Y(X.Size()); -- A->Mult(X,Y); -- Y-=B; -+ A->Mult(X, Y); -+ Y -= B; - REQUIRE(Y.Norml2() < 1.e-12); - -+ mumps.ArrayMult(BB,XX); -+ -+ for (int i = 0; i < XX.Size(); i++) -+ { -+ A->Mult(*XX[i], Y); -+ Y -= *BB[i]; -+ REQUIRE(Y.Norml2() < 1.e-12); -+ } -+ - a.RecoverFEMSolution(X, b, x); -- VectorFunctionCoefficient grad(dim,gradexact); -- double error = x.ComputeH1Error(&uex,&grad); -+ VectorFunctionCoefficient grad(dim, gradexact); -+ double error = x.ComputeH1Error(&uex, &grad); +@@ -288,6 +291,39 @@ TEST_CASE("Parallel Direct Solvers", "[Parallel], [CUDA]") REQUIRE(error < 1.e-12); } #endif -@@ -244,18 +263,59 @@ TEST_CASE("direct-parallel", "[Parallel], [CUDA]") - superlu.SetColumnPermutation(superlu::METIS_AT_PLUS_A); - superlu.SetOperator(SA); - superlu.Mult(B, X); -+ - Vector Y(X.Size()); -- A->Mult(X,Y); -- Y-=B; -+ A->Mult(X, Y); -+ Y -= B; - REQUIRE(Y.Norml2() < 1.e-12); -+ -+ // SuperLUSolver requires constant number of RHS across solves -+ SuperLURowLocMatrix SA2(*A.As()); -+ SuperLUSolver superlu2(MPI_COMM_WORLD); -+ superlu2.SetPrintStatistics(false); -+ superlu2.SetSymmetricPattern(false); -+ superlu2.SetColumnPermutation(superlu::METIS_AT_PLUS_A); -+ superlu2.SetOperator(SA2); -+ superlu2.ArrayMult(BB, XX); -+ -+ a.RecoverFEMSolution(X, b, x); -+ VectorFunctionCoefficient grad(dim, gradexact); -+ double error = x.ComputeH1Error(&uex, &grad); -+ REQUIRE(error < 1.e-12); -+ } -+#endif +#ifdef MFEM_USE_STRUMPACK + // Transform to monolithic HypreParMatrix + { @@ -4073,7 +1164,8 @@ index 838bb4009..848cf76df 100644 + strumpack.SetPrintFactorStatistics(false); + strumpack.SetPrintSolveStatistics(false); + strumpack.SetKrylovSolver(strumpack::KrylovSolver::DIRECT); -+ strumpack.SetReorderingStrategy(strumpack::ReorderingStrategy::METIS); ++ strumpack.SetReorderingStrategy(dim > 1 ? strumpack::ReorderingStrategy::METIS : ++ strumpack::ReorderingStrategy::NATURAL); + strumpack.SetOperator(SA); + strumpack.Mult(B, X); + @@ -4091,16 +1183,12 @@ index 838bb4009..848cf76df 100644 + REQUIRE(Y.Norml2() < 1.e-12); + } + - a.RecoverFEMSolution(X, b, x); -- VectorFunctionCoefficient grad(dim,gradexact); -- double error = x.ComputeH1Error(&uex,&grad); ++ a.RecoverFEMSolution(X, b, x); + VectorFunctionCoefficient grad(dim, gradexact); + double error = x.ComputeH1Error(&uex, &grad); - REQUIRE(error < 1.e-12); - } - #endif -- delete fec; -- delete pmesh; ++ REQUIRE(error < 1.e-12); ++ } ++#endif } } diff --git a/palace/deps/patch/mfem/patch_hypre_blocks.diff b/palace/deps/patch/mfem/patch_hypre_blocks.diff deleted file mode 100644 index 346657dcb..000000000 --- a/palace/deps/patch/mfem/patch_hypre_blocks.diff +++ /dev/null @@ -1,41 +0,0 @@ -diff --git a/linalg/hypre.cpp b/linalg/hypre.cpp -index d7069d4e6..689ce675e 100644 ---- a/linalg/hypre.cpp -+++ b/linalg/hypre.cpp -@@ -2996,10 +2996,7 @@ void GatherBlockOffsetData(MPI_Comm comm, const int rank, const int nprocs, - for (int i = 0; i < nprocs; ++i) - { - globalNum += all_num_loc[i]; -- if (rank == 0) -- { -- MFEM_VERIFY(globalNum >= 0, "overflow in global size"); -- } -+ MFEM_VERIFY(globalNum >= 0, "overflow in global size"); - if (i < rank) - { - firstLocal += all_num_loc[i]; -@@ -3064,9 +3061,6 @@ HypreParMatrix * HypreParMatrixFromBlocks(Array2D &blocks, - const int nrows = blocks(i,j)->NumRows(); - const int ncols = blocks(i,j)->NumCols(); - -- MFEM_VERIFY(nrows > 0 && -- ncols > 0, "Invalid block in HypreParMatrixFromBlocks"); -- - if (rowOffsets[i+1] == 0) - { - rowOffsets[i+1] = nrows; -@@ -3088,14 +3082,11 @@ HypreParMatrix * HypreParMatrixFromBlocks(Array2D &blocks, - } - } - } -- -- MFEM_VERIFY(rowOffsets[i+1] > 0, "Invalid input blocks"); - rowOffsets[i+1] += rowOffsets[i]; - } - - for (int j=0; j 0, "Invalid input blocks"); - colOffsets[j+1] += colOffsets[j]; - } - diff --git a/palace/deps/patch/mfem/patch_mesh_part.diff b/palace/deps/patch/mfem/patch_mesh_part.diff index 347ce63a5..55c132611 100644 --- a/palace/deps/patch/mfem/patch_mesh_part.diff +++ b/palace/deps/patch/mfem/patch_mesh_part.diff @@ -1,8 +1,8 @@ diff --git a/.gitignore b/.gitignore -index 553aa8582..e26611bdc 100644 +index dbdb54ce1..4f405c132 100644 --- a/.gitignore +++ b/.gitignore -@@ -218,7 +218,7 @@ miniapps/meshing/mobius-strip.mesh +@@ -220,7 +220,7 @@ miniapps/meshing/mobius-strip.mesh miniapps/meshing/klein-bottle.mesh miniapps/meshing/toroid-*.mesh miniapps/meshing/twist-*.mesh @@ -383,7 +383,7 @@ index 2ed9f4a1b..96373b2d1 100644 /// C = A * B (as boolean matrices) diff --git a/mesh/mesh.cpp b/mesh/mesh.cpp -index de2d80f15..0e0ce23c6 100644 +index 87e606510..a4f4e2069 100644 --- a/mesh/mesh.cpp +++ b/mesh/mesh.cpp @@ -19,6 +19,7 @@ @@ -402,7 +402,7 @@ index de2d80f15..0e0ce23c6 100644 // Include the METIS header, if using version 5. If using METIS 4, the needed // declarations are inlined below, i.e. no header is needed. -@@ -2979,7 +2981,7 @@ void Mesh::FinalizeTopology(bool generate_bdr) +@@ -2986,7 +2988,7 @@ void Mesh::FinalizeTopology(bool generate_bdr) { GetElementToFaceTable(); GenerateFaces(); @@ -411,7 +411,7 @@ index de2d80f15..0e0ce23c6 100644 { GenerateBoundaryElements(); GetElementToFaceTable(); // update be_to_face -@@ -2999,7 +3001,7 @@ void Mesh::FinalizeTopology(bool generate_bdr) +@@ -3006,7 +3008,7 @@ void Mesh::FinalizeTopology(bool generate_bdr) if (Dim == 2) { GenerateFaces(); // 'Faces' in 2D refers to the edges @@ -420,7 +420,7 @@ index de2d80f15..0e0ce23c6 100644 { GenerateBoundaryElements(); } -@@ -5387,6 +5389,12 @@ const FiniteElementSpace *Mesh::GetNodalFESpace() const +@@ -5394,6 +5396,12 @@ const FiniteElementSpace *Mesh::GetNodalFESpace() const void Mesh::SetCurvature(int order, bool discont, int space_dim, int ordering) { @@ -433,7 +433,7 @@ index de2d80f15..0e0ce23c6 100644 space_dim = (space_dim == -1) ? spaceDim : space_dim; FiniteElementCollection* nfec; if (discont) -@@ -12108,6 +12116,878 @@ int Mesh::FindPoints(DenseMatrix &point_mat, Array& elem_ids, +@@ -12230,6 +12238,878 @@ void Mesh::GetGeometricParametersFromJacobian(const DenseMatrix &J, } @@ -1313,7 +1313,7 @@ index de2d80f15..0e0ce23c6 100644 int flags, MemoryType d_mt) { diff --git a/mesh/mesh.hpp b/mesh/mesh.hpp -index 3e0590067..f8aa5706a 100644 +index 64cf55ae4..6bddc43e9 100644 --- a/mesh/mesh.hpp +++ b/mesh/mesh.hpp @@ -27,6 +27,7 @@ @@ -1337,7 +1337,7 @@ index 3e0590067..f8aa5706a 100644 // Counter for Mesh transformations: refinement, derefinement, rebalancing. // Used for checking during Update operations on objects depending on the -@@ -771,7 +774,7 @@ public: +@@ -767,7 +770,7 @@ public: int AddBdrPoint(int v, int attr = 1); @@ -1346,7 +1346,7 @@ index 3e0590067..f8aa5706a 100644 /// Finalize the construction of a triangular Mesh. void FinalizeTriMesh(int generate_edges = 0, int refine = 0, bool fix_orientation = true); -@@ -1978,6 +1981,195 @@ public: +@@ -1995,6 +1998,195 @@ public: std::ostream &operator<<(std::ostream &out, const Mesh &mesh); @@ -1632,22 +1632,20 @@ index c434ae903..b72e5db55 100644 void SetRefinementFlag(int rf) { refinement_flag = rf; } diff --git a/miniapps/meshing/makefile b/miniapps/meshing/makefile -index ce82f238d..b9cd9a30c 100644 +index 315dc4465..656f80d0e 100644 --- a/miniapps/meshing/makefile +++ b/miniapps/meshing/makefile -@@ -118,7 +118,9 @@ clean-build: - rm -rf *.dSYM *.TVD.*breakpoints +@@ -121,7 +121,7 @@ clean-build: + rm -rf *.dSYM *.TVD.*breakpoints clean-exec: -- @rm -f mobius-strip.mesh klein-bottle.mesh mesh-explorer.mesh -+ @rm -f mobius-strip.mesh klein-bottle.mesh mesh-explorer.mesh* -+ @rm -f toroid-*.mesh twist-*.mesh trimmer.mesh -+ @rm -f mobius-strip.mesh klein-bottle.mesh mesh-explorer.mesh* - @rm -f toroid-*.mesh twist-*.mesh trimmer.mesh reflected.mesh - @rm -f partitioning.txt shaper.mesh extruder.mesh - @rm -f optimized* perturbed* polar-nc.mesh +- @rm -f mobius-strip.mesh klein-bottle.mesh mesh-explorer.mesh ++ @rm -f mobius-strip.mesh klein-bottle.mesh mesh-explorer.mesh* + @rm -f toroid-*.mesh twist-*.mesh trimmer.mesh reflected.mesh + @rm -f partitioning.txt shaper.mesh extruder.mesh + @rm -f optimized* perturbed* polar-nc.mesh diff --git a/miniapps/meshing/mesh-explorer.cpp b/miniapps/meshing/mesh-explorer.cpp -index 49f3f9690..068512670 100644 +index f05e18e83..6a2a4e78b 100644 --- a/miniapps/meshing/mesh-explorer.cpp +++ b/miniapps/meshing/mesh-explorer.cpp @@ -308,6 +308,7 @@ int main (int argc, char *argv[]) @@ -1668,19 +1666,7 @@ index 49f3f9690..068512670 100644 "V) Save in VTK format (only linear and quadratic meshes)\n" "D) Save as a DataCollection\n" "q) Quit\n" -@@ -959,9 +961,8 @@ int main (int argc, char *argv[]) - cin >> nxyz[2]; np *= nxyz[2]; - } - } -- int *part = mesh->CartesianPartitioning(nxyz); -- partitioning = Array(part, mesh->GetNE()); -- delete [] part; -+ partitioning.MakeRef(mesh->CartesianPartitioning(nxyz), -+ mesh->GetNE(), true); - recover_bdr_partitioning(mesh, partitioning, bdr_partitioning); - } - else if (pk == 's') -@@ -972,7 +973,7 @@ int main (int argc, char *argv[]) +@@ -984,7 +986,7 @@ int main (int argc, char *argv[]) partitioning.SetSize(mesh->GetNE()); for (int i = 0; i < mesh->GetNE(); i++) { @@ -1689,19 +1675,15 @@ index 49f3f9690..068512670 100644 } recover_bdr_partitioning(mesh, partitioning, bdr_partitioning); } -@@ -985,9 +986,8 @@ int main (int argc, char *argv[]) +@@ -997,6 +999,7 @@ int main (int argc, char *argv[]) } cout << "Enter number of processors: " << flush; cin >> np; -- int *part = mesh->GeneratePartitioning(np, part_method); -- partitioning = Array(part, mesh->GetNE()); -- delete [] part; -+ partitioning.MakeRef(mesh->GeneratePartitioning(np, part_method), -+ mesh->GetNE(), true); - recover_bdr_partitioning(mesh, partitioning, bdr_partitioning); - } - if (partitioning) -@@ -1185,6 +1185,25 @@ int main (int argc, char *argv[]) ++ + int *part = mesh->GeneratePartitioning(np, part_method); + partitioning = Array(part, mesh->GetNE()); + delete [] part; +@@ -1197,6 +1200,25 @@ int main (int argc, char *argv[]) cout << "New mesh file: " << omesh_file << endl; } diff --git a/palace/deps/patch/mfem/patch_pa_libceed.diff b/palace/deps/patch/mfem/patch_pa_libceed.diff deleted file mode 100644 index b62f80880..000000000 --- a/palace/deps/patch/mfem/patch_pa_libceed.diff +++ /dev/null @@ -1,28397 +0,0 @@ -diff --git a/fem/CMakeLists.txt b/fem/CMakeLists.txt -index 6da0cfea3..f11a5a72b 100644 ---- a/fem/CMakeLists.txt -+++ b/fem/CMakeLists.txt -@@ -17,19 +17,25 @@ set(SRCS - integ/bilininteg_convection_mf.cpp - integ/bilininteg_convection_pa.cpp - integ/bilininteg_convection_ea.cpp -+ integ/bilininteg_curlcurl_mf.cpp - integ/bilininteg_curlcurl_pa.cpp - integ/bilininteg_dgtrace_pa.cpp - integ/bilininteg_dgtrace_ea.cpp - integ/bilininteg_diffusion_mf.cpp - integ/bilininteg_diffusion_pa.cpp - integ/bilininteg_diffusion_ea.cpp -+ integ/bilininteg_divdiv_mf.cpp - integ/bilininteg_divdiv_pa.cpp - integ/bilininteg_gradient_pa.cpp -- integ/bilininteg_interp_pa.cpp -+ integ/bilininteg_interp_id_pa.cpp -+ integ/bilininteg_interp_grad_pa.cpp -+ integ/bilininteg_interp_curl_pa.cpp - integ/bilininteg_mass_mf.cpp - integ/bilininteg_mass_pa.cpp - integ/bilininteg_mass_ea.cpp -+ integ/bilininteg_mixedcurl_mf.cpp - integ/bilininteg_mixedcurl_pa.cpp -+ integ/bilininteg_mixedvecgrad_mf.cpp - integ/bilininteg_mixedvecgrad_pa.cpp - integ/bilininteg_transpose_ea.cpp - integ/bilininteg_vecdiffusion_mf.cpp -@@ -38,6 +44,7 @@ set(SRCS - integ/bilininteg_vecmass_mf.cpp - integ/bilininteg_vecmass_pa.cpp - integ/bilininteg_vectorfediv_pa.cpp -+ integ/bilininteg_vectorfemass_mf.cpp - integ/bilininteg_vectorfemass_pa.cpp - integ/lininteg_boundary.cpp - integ/lininteg_boundary_flux.cpp -@@ -72,15 +79,20 @@ set(SRCS - hybridization.cpp - intrules.cpp - ceed/interface/basis.cpp -- ceed/interface/restriction.cpp - ceed/interface/operator.cpp -+ ceed/interface/restriction.cpp - ceed/interface/util.cpp -+ ceed/integrators/mass/mass.cpp - ceed/integrators/convection/convection.cpp - ceed/integrators/diffusion/diffusion.cpp - ceed/integrators/nlconvection/nlconvection.cpp -- ceed/integrators/mass/mass.cpp -+ ceed/integrators/vecfemass/vecfemass.cpp -+ ceed/integrators/divdiv/divdiv.cpp -+ ceed/integrators/curlcurl/curlcurl.cpp -+ ceed/integrators/mixedvecgrad/mixedvecgrad.cpp -+ ceed/integrators/mixedveccurl/mixedveccurl.cpp -+ ceed/integrators/interp/interp.cpp - ceed/solvers/algebraic.cpp -- ceed/solvers/full-assembly.cpp - ceed/solvers/solvers-atpmg.cpp - linearform.cpp - linearform_ext.cpp -@@ -180,18 +192,33 @@ set(HDRS - hybridization.hpp - intrules.hpp - ceed/interface/basis.hpp -+ ceed/interface/ceed.hpp -+ ceed/interface/coefficient.hpp - ceed/interface/integrator.hpp - ceed/interface/interface.hpp -+ ceed/interface/mixed_operator.hpp - ceed/interface/operator.hpp - ceed/interface/restriction.hpp - ceed/interface/util.hpp -+ ceed/integrators/mass/mass.hpp -+ ceed/integrators/mass/mass_qf.h - ceed/integrators/convection/convection.hpp -+ ceed/integrators/convection/convection_qf.h - ceed/integrators/diffusion/diffusion.hpp -- ceed/integrators/mass/mass.hpp -+ ceed/integrators/diffusion/diffusion_qf.h - ceed/integrators/nlconvection/nlconvection.hpp -- ceed/interface/coefficient.hpp -+ ceed/integrators/nlconvection/nlconvection_qf.h -+ ceed/integrators/vecfemass/vecfemass.hpp -+ ceed/integrators/vecfemass/vecfemass_qf.h -+ ceed/integrators/divdiv/divdiv.hpp -+ ceed/integrators/divdiv/divdiv_qf.h -+ ceed/integrators/curlcurl/curlcurl.hpp -+ ceed/integrators/curlcurl/curlcurl_qf.h -+ ceed/integrators/mixedvecgrad/mixedvecgrad.hpp -+ ceed/integrators/mixedveccurl/mixedveccurl.hpp -+ ceed/integrators/interp/interp.hpp -+ ceed/integrators/util/util_qf.h - ceed/solvers/algebraic.hpp -- ceed/solvers/full-assembly.hpp - ceed/solvers/solvers-atpmg.hpp - linearform.hpp - linearform_ext.hpp -diff --git a/fem/bilinearform.cpp b/fem/bilinearform.cpp -index a549d03a7..0a566cba5 100644 ---- a/fem/bilinearform.cpp -+++ b/fem/bilinearform.cpp -@@ -1353,8 +1353,8 @@ void MixedBilinearForm::Assemble(int skip_zeros) - } - for (int k = 0; k < trace_face_integs.Size(); k++) - { -- trace_face_integs[k]->AssembleFaceMatrix(*trial_face_fe, *test_fe1, -- *test_fe2, *ftr, elemmat); -+ trace_face_integs[k]->AssembleFaceMatrix2(*trial_face_fe, *test_fe1, -+ *test_fe2, *ftr, elemmat); - mat->AddSubMatrix(test_vdofs, trial_vdofs, elemmat, skip_zeros); - } - } -@@ -1409,10 +1409,10 @@ void MixedBilinearForm::Assemble(int skip_zeros) - (*boundary_trace_face_integs_marker[k])[bdr_attr-1] == 0) - { continue; } - -- boundary_trace_face_integs[k]->AssembleFaceMatrix(*trial_face_fe, -- *test_fe1, -- *test_fe2, -- *ftr, elemmat); -+ boundary_trace_face_integs[k]->AssembleFaceMatrix2(*trial_face_fe, -+ *test_fe1, -+ *test_fe2, -+ *ftr, elemmat); - mat->AddSubMatrix(test_vdofs, trial_vdofs, elemmat, skip_zeros); - } - } -diff --git a/fem/bilinearform_ext.cpp b/fem/bilinearform_ext.cpp -index 90a1655f4..0c88fdf91 100644 ---- a/fem/bilinearform_ext.cpp -+++ b/fem/bilinearform_ext.cpp -@@ -2131,6 +2131,33 @@ void PADiscreteLinearOperatorExtension::Assemble() - test_multiplicity.Reciprocal(); - } - -+void PADiscreteLinearOperatorExtension::Mult(const Vector &x, Vector &y) const -+{ -+ Array &interpolators = *a->GetDBFI(); -+ if (elem_restrict_trial) -+ { -+ elem_restrict_trial->Mult(x, local_trial); -+ } -+ if (elem_restrict_test) -+ { -+ local_test = 0.0; -+ for (BilinearFormIntegrator *interp : interpolators) -+ { -+ interp->AddMultPA(elem_restrict_trial ? local_trial : x, local_test); -+ } -+ elem_restrict_test->MultTranspose(local_test, y); -+ } -+ else -+ { -+ y = 0.0; -+ for (BilinearFormIntegrator *interp : interpolators) -+ { -+ interp->AddMultPA(elem_restrict_trial ? local_trial : x, y); -+ } -+ } -+ y *= test_multiplicity; -+} -+ - void PADiscreteLinearOperatorExtension::AddMult(const Vector &x, Vector &y, - const double c) const - { -@@ -2152,6 +2179,7 @@ void PADiscreteLinearOperatorExtension::AddMult(const Vector &x, Vector &y, - } - else - { -+ temp_test = 0.0; - for (BilinearFormIntegrator *interp : interpolators) - { - interp->AddMultPA(elem_restrict_trial ? local_trial : x, temp_test); -@@ -2165,8 +2193,10 @@ void PADiscreteLinearOperatorExtension::AddMultTranspose(const Vector &x, - Vector &y, - const double c) const - { -+ MFEM_VERIFY(c == 1.0, -+ "General coefficient case for PADiscreteLinearOperatorExtension::" -+ "AddMultTranspose is not yet supported!"); - Array &interpolators = *a->GetDBFI(); -- temp_test.SetSize(y.Size()); - temp_test.UseDevice(true); - temp_test = x; - temp_test *= test_multiplicity; -@@ -2182,26 +2212,14 @@ void PADiscreteLinearOperatorExtension::AddMultTranspose(const Vector &x, - interp->AddMultTransposePA(elem_restrict_test ? local_test : temp_test, - local_trial); - } -- if (c != 1.0) -- { -- local_trial *= c; -- } - elem_restrict_trial->AddMultTranspose(local_trial, y); - } - else - { - y.UseDevice(true); // typically this is a large vector, so store on device -- if (c != 1.0) -- { -- MFEM_ABORT("General coefficient case for PADiscreteLinearOperatorExtension::" -- "AddMultTranspose is not yet supported!"); -- } -- else -+ for (BilinearFormIntegrator *interp : interpolators) - { -- for (BilinearFormIntegrator *interp : interpolators) -- { -- interp->AddMultTransposePA(elem_restrict_test ? local_test : temp_test, y); -- } -+ interp->AddMultTransposePA(elem_restrict_test ? local_test : temp_test, y); - } - } - } -diff --git a/fem/bilinearform_ext.hpp b/fem/bilinearform_ext.hpp -index db26eb801..aa9271d8f 100644 ---- a/fem/bilinearform_ext.hpp -+++ b/fem/bilinearform_ext.hpp -@@ -227,6 +227,7 @@ public: - PADiscreteLinearOperatorExtension(DiscreteLinearOperator *linop); - - void Assemble(); -+ void Mult(const Vector &x, Vector &y) const; - void AddMult(const Vector &x, Vector &y, const double c = 1.0) const; - void AddMultTranspose(const Vector &x, Vector &y, const double c = 1.0) const; - }; -diff --git a/fem/bilininteg.cpp b/fem/bilininteg.cpp -index e6fc2a6ee..096285723 100644 ---- a/fem/bilininteg.cpp -+++ b/fem/bilininteg.cpp -@@ -141,7 +141,7 @@ void BilinearFormIntegrator::AssembleEA(const FiniteElementSpace&, - " is not implemented for this class."); - } - --void BilinearFormIntegrator::AssembleEAInteriorFaces(const FiniteElementSpace &, -+void BilinearFormIntegrator::AssembleEAInteriorFaces(const FiniteElementSpace&, - Vector&, - Vector&) - { -@@ -156,47 +156,50 @@ void BilinearFormIntegrator::AssembleEABoundaryFaces(const FiniteElementSpace&, - " is not implemented for this class."); - } - --void BilinearFormIntegrator::AssembleElementMatrix( -- const FiniteElement &el, ElementTransformation &Trans, -- DenseMatrix &elmat) -+void BilinearFormIntegrator::AssembleElementMatrix(const FiniteElement&, -+ ElementTransformation&, -+ DenseMatrix&) - { - MFEM_ABORT("BilinearFormIntegrator::AssembleElementMatrix(...)\n" - " is not implemented for this class."); - } - --void BilinearFormIntegrator::AssembleElementMatrix2( -- const FiniteElement &el1, const FiniteElement &el2, -- ElementTransformation &Trans, DenseMatrix &elmat) -+void BilinearFormIntegrator::AssembleElementMatrix2(const FiniteElement&, -+ const FiniteElement&, -+ ElementTransformation&, -+ DenseMatrix&) - { - MFEM_ABORT("BilinearFormIntegrator::AssembleElementMatrix2(...)\n" - " is not implemented for this class."); - } - --void BilinearFormIntegrator::AssembleFaceMatrix( -- const FiniteElement &el1, const FiniteElement &el2, -- FaceElementTransformations &Trans, DenseMatrix &elmat) -+void BilinearFormIntegrator::AssembleFaceMatrix(const FiniteElement&, -+ const FiniteElement&, -+ FaceElementTransformations&Trans, -+ DenseMatrix&) - { - MFEM_ABORT("BilinearFormIntegrator::AssembleFaceMatrix(...)\n" - " is not implemented for this class."); - } - --void BilinearFormIntegrator::AssembleFaceMatrix( -- const FiniteElement &trial_face_fe, const FiniteElement &test_fe1, -- const FiniteElement &test_fe2, FaceElementTransformations &Trans, -- DenseMatrix &elmat) -+void BilinearFormIntegrator::AssembleFaceMatrix2(const FiniteElement&, -+ const FiniteElement&, -+ const FiniteElement&, -+ FaceElementTransformations&, -+ DenseMatrix&) - { -- MFEM_ABORT("AssembleFaceMatrix (mixed form) is not implemented for this" -- " Integrator class."); -+ MFEM_ABORT("BilinearFormIntegrator::AssembleFaceMatrix2(...)\n" -+ " is not implemented for this class."); - } - --void BilinearFormIntegrator::AssembleTraceFaceMatrix (int elem, -- const FiniteElement &trial_face_fe, -- const FiniteElement &test_fe1, -- FaceElementTransformations &Trans, -- DenseMatrix &elmat) -+void BilinearFormIntegrator::AssembleTraceFaceMatrix(int, -+ const FiniteElement&, -+ const FiniteElement&, -+ FaceElementTransformations&, -+ DenseMatrix&) - { -- MFEM_ABORT("AssembleTraceFaceMatrix (DPG form) is not implemented for this" -- " Integrator class."); -+ MFEM_ABORT("BilinearFormIntegrator::AssembleTraceFaceMatrix(...)\n" -+ " is not implemented for this class."); - } - - void BilinearFormIntegrator::AssembleElementVector( -@@ -221,17 +224,10 @@ void BilinearFormIntegrator::AssembleFaceVector( - elmat.Mult(elfun, elvect); - } - --void TransposeIntegrator::SetIntRule(const IntegrationRule *ir) --{ -- IntRule = ir; -- bfi->SetIntRule(ir); --} -- - void TransposeIntegrator::AssembleElementMatrix( - const FiniteElement &el, ElementTransformation &Trans, DenseMatrix &elmat) - { - bfi->AssembleElementMatrix(el, Trans, bfi_elmat); -- // elmat = bfi_elmat^t - elmat.Transpose(bfi_elmat); - } - -@@ -240,7 +236,6 @@ void TransposeIntegrator::AssembleElementMatrix2( - ElementTransformation &Trans, DenseMatrix &elmat) - { - bfi->AssembleElementMatrix2(test_fe, trial_fe, Trans, bfi_elmat); -- // elmat = bfi_elmat^t - elmat.Transpose(bfi_elmat); - } - -@@ -249,16 +244,9 @@ void TransposeIntegrator::AssembleFaceMatrix( - FaceElementTransformations &Trans, DenseMatrix &elmat) - { - bfi->AssembleFaceMatrix(el1, el2, Trans, bfi_elmat); -- // elmat = bfi_elmat^t - elmat.Transpose(bfi_elmat); - } - --void LumpedIntegrator::SetIntRule(const IntegrationRule *ir) --{ -- IntRule = ir; -- bfi->SetIntRule(ir); --} -- - void LumpedIntegrator::AssembleElementMatrix( - const FiniteElement &el, ElementTransformation &Trans, DenseMatrix &elmat) - { -@@ -266,38 +254,41 @@ void LumpedIntegrator::AssembleElementMatrix( - elmat.Lump(); - } - --void InverseIntegrator::SetIntRule(const IntegrationRule *ir) --{ -- IntRule = ir; -- integrator->SetIntRule(ir); --} -- - void InverseIntegrator::AssembleElementMatrix( - const FiniteElement &el, ElementTransformation &Trans, DenseMatrix &elmat) - { -- integrator->AssembleElementMatrix(el, Trans, elmat); -+ bfi->AssembleElementMatrix(el, Trans, elmat); - elmat.Invert(); - } - -+bool SumIntegrator::SupportsCeed() const -+{ -+ for (int i = 0; i < bfis.Size(); i++) -+ { -+ if (!bfis[i]->SupportsCeed()) { return false; } -+ } -+ return true; -+} -+ - void SumIntegrator::SetIntRule(const IntegrationRule *ir) - { - IntRule = ir; -- for (int i = 0; i < integrators.Size(); i++) -+ for (int i = 0; i < bfis.Size(); i++) - { -- integrators[i]->SetIntRule(ir); -+ bfis[i]->SetIntRule(ir); - } - } - - void SumIntegrator::AssembleElementMatrix( - const FiniteElement &el, ElementTransformation &Trans, DenseMatrix &elmat) - { -- MFEM_ASSERT(integrators.Size() > 0, "empty SumIntegrator."); -+ MFEM_ASSERT(bfis.Size() > 0, "empty SumIntegrator."); - -- integrators[0]->AssembleElementMatrix(el, Trans, elmat); -- for (int i = 1; i < integrators.Size(); i++) -+ bfis[0]->AssembleElementMatrix(el, Trans, elmat); -+ for (int i = 1; i < bfis.Size(); i++) - { -- integrators[i]->AssembleElementMatrix(el, Trans, elem_mat); -- elmat += elem_mat; -+ bfis[i]->AssembleElementMatrix(el, Trans, bfi_elmat); -+ elmat += bfi_elmat; - } - } - -@@ -305,13 +296,13 @@ void SumIntegrator::AssembleElementMatrix2( - const FiniteElement &el1, const FiniteElement &el2, - ElementTransformation &Trans, DenseMatrix &elmat) - { -- MFEM_ASSERT(integrators.Size() > 0, "empty SumIntegrator."); -+ MFEM_ASSERT(bfis.Size() > 0, "empty SumIntegrator."); - -- integrators[0]->AssembleElementMatrix2(el1, el2, Trans, elmat); -- for (int i = 1; i < integrators.Size(); i++) -+ bfis[0]->AssembleElementMatrix2(el1, el2, Trans, elmat); -+ for (int i = 1; i < bfis.Size(); i++) - { -- integrators[i]->AssembleElementMatrix2(el1, el2, Trans, elem_mat); -- elmat += elem_mat; -+ bfis[i]->AssembleElementMatrix2(el1, el2, Trans, bfi_elmat); -+ elmat += bfi_elmat; - } - } - -@@ -319,134 +310,134 @@ void SumIntegrator::AssembleFaceMatrix( - const FiniteElement &el1, const FiniteElement &el2, - FaceElementTransformations &Trans, DenseMatrix &elmat) - { -- MFEM_ASSERT(integrators.Size() > 0, "empty SumIntegrator."); -+ MFEM_ASSERT(bfis.Size() > 0, "empty SumIntegrator."); - -- integrators[0]->AssembleFaceMatrix(el1, el2, Trans, elmat); -- for (int i = 1; i < integrators.Size(); i++) -+ bfis[0]->AssembleFaceMatrix(el1, el2, Trans, elmat); -+ for (int i = 1; i < bfis.Size(); i++) - { -- integrators[i]->AssembleFaceMatrix(el1, el2, Trans, elem_mat); -- elmat += elem_mat; -+ bfis[i]->AssembleFaceMatrix(el1, el2, Trans, bfi_elmat); -+ elmat += bfi_elmat; - } - } - --void SumIntegrator::AssembleFaceMatrix( -- const FiniteElement &tr_fe, -- const FiniteElement &te_fe1, const FiniteElement &te_fe2, -- FaceElementTransformations &Trans, DenseMatrix &elmat) -+void SumIntegrator::AssembleFaceMatrix2( -+ const FiniteElement &tr_fe, const FiniteElement &te_fe1, -+ const FiniteElement &te_fe2, FaceElementTransformations &Trans, -+ DenseMatrix &elmat) - { -- MFEM_ASSERT(integrators.Size() > 0, "empty SumIntegrator."); -+ MFEM_ASSERT(bfis.Size() > 0, "empty SumIntegrator."); - -- integrators[0]->AssembleFaceMatrix(tr_fe, te_fe1, te_fe2, Trans, elmat); -- for (int i = 1; i < integrators.Size(); i++) -+ bfis[0]->AssembleFaceMatrix2(tr_fe, te_fe1, te_fe2, Trans, elmat); -+ for (int i = 1; i < bfis.Size(); i++) - { -- integrators[i]->AssembleFaceMatrix(tr_fe, te_fe1, te_fe2, Trans, elem_mat); -- elmat += elem_mat; -+ bfis[i]->AssembleFaceMatrix2(tr_fe, te_fe1, te_fe2, Trans, bfi_elmat); -+ elmat += bfi_elmat; - } - } - - void SumIntegrator::AssemblePA(const FiniteElementSpace& fes) - { -- for (int i = 0; i < integrators.Size(); i++) -+ for (int i = 0; i < bfis.Size(); i++) - { -- integrators[i]->AssemblePA(fes); -+ bfis[i]->AssemblePA(fes); - } - } - - void SumIntegrator::AssemblePA(const FiniteElementSpace& trial_fes, - const FiniteElementSpace& test_fes) - { -- for (int i = 0; i < integrators.Size(); i++) -+ for (int i = 0; i < bfis.Size(); i++) - { -- integrators[i]->AssemblePA(trial_fes, test_fes); -+ bfis[i]->AssemblePA(trial_fes, test_fes); - } - } - - void SumIntegrator::AssembleDiagonalPA(Vector &diag) - { -- for (int i = 0; i < integrators.Size(); i++) -+ for (int i = 0; i < bfis.Size(); i++) - { -- integrators[i]->AssembleDiagonalPA(diag); -+ bfis[i]->AssembleDiagonalPA(diag); - } - } - - void SumIntegrator::AssemblePAInteriorFaces(const FiniteElementSpace &fes) - { -- for (int i = 0; i < integrators.Size(); i++) -+ for (int i = 0; i < bfis.Size(); i++) - { -- integrators[i]->AssemblePAInteriorFaces(fes); -+ bfis[i]->AssemblePAInteriorFaces(fes); - } - } - - void SumIntegrator::AssemblePABoundaryFaces(const FiniteElementSpace &fes) - { -- for (int i = 0; i < integrators.Size(); i++) -+ for (int i = 0; i < bfis.Size(); i++) - { -- integrators[i]->AssemblePABoundaryFaces(fes); -+ bfis[i]->AssemblePABoundaryFaces(fes); - } - } - - void SumIntegrator::AddMultPA(const Vector& x, Vector& y) const - { -- for (int i = 0; i < integrators.Size(); i++) -+ for (int i = 0; i < bfis.Size(); i++) - { -- integrators[i]->AddMultPA(x, y); -+ bfis[i]->AddMultPA(x, y); - } - } - - void SumIntegrator::AddMultTransposePA(const Vector &x, Vector &y) const - { -- for (int i = 0; i < integrators.Size(); i++) -+ for (int i = 0; i < bfis.Size(); i++) - { -- integrators[i]->AddMultTransposePA(x, y); -+ bfis[i]->AddMultTransposePA(x, y); - } - } - - void SumIntegrator::AssembleMF(const FiniteElementSpace &fes) - { -- for (int i = 0; i < integrators.Size(); i++) -+ for (int i = 0; i < bfis.Size(); i++) - { -- integrators[i]->AssembleMF(fes); -+ bfis[i]->AssembleMF(fes); - } - } - - void SumIntegrator::AssembleMF(const FiniteElementSpace& trial_fes, - const FiniteElementSpace& test_fes) - { -- for (int i = 0; i < integrators.Size(); i++) -+ for (int i = 0; i < bfis.Size(); i++) - { -- integrators[i]->AssembleMF(trial_fes, test_fes); -+ bfis[i]->AssembleMF(trial_fes, test_fes); - } - } - - void SumIntegrator::AssembleDiagonalMF(Vector &diag) - { -- for (int i = 0; i < integrators.Size(); i++) -+ for (int i = 0; i < bfis.Size(); i++) - { -- integrators[i]->AssembleDiagonalMF(diag); -+ bfis[i]->AssembleDiagonalMF(diag); - } - } - - void SumIntegrator::AddMultMF(const Vector& x, Vector& y) const - { -- for (int i = 0; i < integrators.Size(); i++) -+ for (int i = 0; i < bfis.Size(); i++) - { -- integrators[i]->AddMultTransposeMF(x, y); -+ bfis[i]->AddMultTransposeMF(x, y); - } - } - - void SumIntegrator::AddMultTransposeMF(const Vector &x, Vector &y) const - { -- for (int i = 0; i < integrators.Size(); i++) -+ for (int i = 0; i < bfis.Size(); i++) - { -- integrators[i]->AddMultMF(x, y); -+ bfis[i]->AddMultMF(x, y); - } - } - - void SumIntegrator::AssembleEA(const FiniteElementSpace &fes, Vector &emat) - { -- for (int i = 0; i < integrators.Size(); i++) -+ for (int i = 0; i < bfis.Size(); i++) - { -- integrators[i]->AssembleEA(fes, emat); -+ bfis[i]->AssembleEA(fes, emat); - } - } - -@@ -454,32 +445,41 @@ void SumIntegrator::AssembleEAInteriorFaces(const FiniteElementSpace &fes, - Vector &ea_data_int, - Vector &ea_data_ext) - { -- for (int i = 0; i < integrators.Size(); i++) -+ for (int i = 0; i < bfis.Size(); i++) - { -- integrators[i]->AssembleEAInteriorFaces(fes, ea_data_int, ea_data_ext); -+ bfis[i]->AssembleEAInteriorFaces(fes, ea_data_int, ea_data_ext); - } - } - - void SumIntegrator::AssembleEABoundaryFaces(const FiniteElementSpace &fes, - Vector &ea_data_bdr) - { -- for (int i = 0; i < integrators.Size(); i++) -+ for (int i = 0; i < bfis.Size(); i++) - { -- integrators[i]->AssembleEABoundaryFaces(fes, ea_data_bdr); -+ bfis[i]->AssembleEABoundaryFaces(fes, ea_data_bdr); - } - } - - SumIntegrator::~SumIntegrator() - { -- if (own_integrators) -+ if (own_bfis) - { -- for (int i = 0; i < integrators.Size(); i++) -+ for (int i = 0; i < bfis.Size(); i++) - { -- delete integrators[i]; -+ delete bfis[i]; - } - } - } - -+const IntegrationRule &MixedScalarIntegrator::GetRule( -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const -+{ -+ int order = this->GetIntegrationOrder(trial_fe, test_fe, Trans); -+ return IntRules.Get(trial_fe.GetGeomType(), order); -+} -+ - void MixedScalarIntegrator::AssembleElementMatrix2( - const FiniteElement &trial_fe, const FiniteElement &test_fe, - ElementTransformation &Trans, DenseMatrix &elmat) -@@ -507,12 +507,8 @@ void MixedScalarIntegrator::AssembleElementMatrix2( - - elmat.SetSize(test_nd, trial_nd); - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int ir_order = this->GetIntegrationOrder(trial_fe, test_fe, Trans); -- ir = &IntRules.Get(trial_fe.GetGeomType(), ir_order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe, -+ Trans); - - elmat = 0.0; - for (i = 0; i < ir->GetNPoints(); i++) -@@ -539,6 +535,15 @@ void MixedScalarIntegrator::AssembleElementMatrix2( - #endif - } - -+const IntegrationRule &MixedVectorIntegrator::GetRule( -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const -+{ -+ int order = this->GetIntegrationOrder(trial_fe, test_fe, Trans); -+ return IntRules.Get(trial_fe.GetGeomType(), order); -+} -+ - void MixedVectorIntegrator::AssembleElementMatrix2( - const FiniteElement &trial_fe, const FiniteElement &test_fe, - ElementTransformation &Trans, DenseMatrix &elmat) -@@ -598,12 +603,8 @@ void MixedVectorIntegrator::AssembleElementMatrix2( - - elmat.SetSize(test_nd, trial_nd); - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int ir_order = this->GetIntegrationOrder(trial_fe, test_fe, Trans); -- ir = &IntRules.Get(trial_fe.GetGeomType(), ir_order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe, -+ Trans); - - elmat = 0.0; - for (i = 0; i < ir->GetNPoints(); i++) -@@ -713,6 +714,15 @@ void MixedVectorIntegrator::AssembleElementMatrix2( - #endif - } - -+const IntegrationRule &MixedScalarVectorIntegrator::GetRule( -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const -+{ -+ int order = this->GetIntegrationOrder(trial_fe, test_fe, Trans); -+ return IntRules.Get(trial_fe.GetGeomType(), order); -+} -+ - void MixedScalarVectorIntegrator::AssembleElementMatrix2( - const FiniteElement &trial_fe, const FiniteElement &test_fe, - ElementTransformation &Trans, DenseMatrix &elmat) -@@ -754,12 +764,8 @@ void MixedScalarVectorIntegrator::AssembleElementMatrix2( - - elmat.SetSize(test_nd, trial_nd); - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int ir_order = this->GetIntegrationOrder(trial_fe, test_fe, Trans); -- ir = &IntRules.Get(trial_fe.GetGeomType(), ir_order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe, -+ Trans); - - elmat = 0.0; - for (i = 0; i < ir->GetNPoints(); i++) -@@ -787,6 +793,15 @@ void MixedScalarVectorIntegrator::AssembleElementMatrix2( - } - } - -+const IntegrationRule &GradientIntegrator::GetRule( -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const -+{ -+ int order = Trans.OrderGrad(&trial_fe) + test_fe.GetOrder() + Trans.OrderJ(); -+ return IntRules.Get(trial_fe.GetGeomType(), order); -+} -+ - void GradientIntegrator::AssembleElementMatrix2( - const FiniteElement &trial_fe, const FiniteElement &test_fe, - ElementTransformation &Trans, DenseMatrix &elmat) -@@ -843,18 +858,30 @@ void GradientIntegrator::AssembleElementMatrix2( - } - } - --const IntegrationRule &GradientIntegrator::GetRule( -- const FiniteElement &trial_fe, -- const FiniteElement &test_fe, -+const IntegrationRule &DiffusionIntegrator::GetRuleStatic( -+ const FiniteElement &trial_fe, const FiniteElement &test_fe, - ElementTransformation &Trans) - { -- int order = Trans.OrderGrad(&trial_fe) + test_fe.GetOrder() + Trans.OrderJ(); -+ int order; -+ if (trial_fe.Space() == FunctionSpace::Pk) -+ { -+ order = trial_fe.GetOrder() + test_fe.GetOrder() - 2; -+ } -+ else -+ { -+ // order = 2 * el.GetOrder() - 2; // <-- this seems to work fine too -+ order = trial_fe.GetOrder() + test_fe.GetOrder() + trial_fe.GetDim() - 1; -+ } -+ if (trial_fe.Space() == FunctionSpace::rQk) -+ { -+ return RefinedIntRules.Get(trial_fe.GetGeomType(), order); -+ } - return IntRules.Get(trial_fe.GetGeomType(), order); - } - --void DiffusionIntegrator::AssembleElementMatrix( -- const FiniteElement &el, ElementTransformation &Trans, -- DenseMatrix &elmat) -+void DiffusionIntegrator::AssembleElementMatrix(const FiniteElement &el, -+ ElementTransformation &Trans, -+ DenseMatrix &elmat) - { - int nd = el.GetDof(); - dim = el.GetDim(); -@@ -889,7 +916,7 @@ void DiffusionIntegrator::AssembleElementMatrix( - #endif - elmat.SetSize(nd); - -- const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Trans); - - elmat = 0.0; - for (int i = 0; i < ir->GetNPoints(); i++) -@@ -970,7 +997,8 @@ void DiffusionIntegrator::AssembleElementMatrix2( - #endif - elmat.SetSize(te_nd, tr_nd); - -- const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe, -+ Trans); - - elmat = 0.0; - for (int i = 0; i < ir->GetNPoints(); i++) -@@ -1048,7 +1076,7 @@ void DiffusionIntegrator::AssembleElementVector( - - elvect.SetSize(nd); - -- const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Tr); - - elvect = 0.0; - for (int i = 0; i < ir->GetNPoints(); i++) -@@ -1094,13 +1122,9 @@ void DiffusionIntegrator::AssembleElementVector( - } - - void DiffusionIntegrator::ComputeElementFlux( -- const FiniteElement &el, -- ElementTransformation &Trans, -- Vector &u, -- const FiniteElement &fluxelem, -- Vector &flux, -- bool with_coef, -- const IntegrationRule *ir) -+ const FiniteElement &el, ElementTransformation &Trans, -+ Vector &u, const FiniteElement &fluxelem, Vector &flux, -+ bool with_coef, const IntegrationRule *ir) - { - int nd, spaceDim, fnd; - -@@ -1196,11 +1220,9 @@ void DiffusionIntegrator::ComputeElementFlux( - } - } - --double DiffusionIntegrator::ComputeFluxEnergy( -- const FiniteElement &fluxelem, -- ElementTransformation &Trans, -- Vector &flux, -- Vector* d_energy) -+double DiffusionIntegrator::ComputeFluxEnergy(const FiniteElement &fluxelem, -+ ElementTransformation &Trans, -+ Vector &flux, Vector* d_energy) - { - int nd = fluxelem.GetDof(); - dim = fluxelem.GetDim(); -@@ -1218,7 +1240,7 @@ double DiffusionIntegrator::ComputeFluxEnergy( - if (d_energy) { vec.SetSize(spaceDim); } - if (MQ) { M.SetSize(spaceDim); } - -- int order = 2 * fluxelem.GetOrder(); // <-- -+ int order = 2 * fluxelem.GetOrder(); - const IntegrationRule *ir = &IntRules.Get(fluxelem.GetGeomType(), order); - - double energy = 0.0; -@@ -1274,20 +1296,13 @@ double DiffusionIntegrator::ComputeFluxEnergy( - return energy; - } - --const IntegrationRule &DiffusionIntegrator::GetRule( -+const IntegrationRule &MassIntegrator::GetRuleStatic( - const FiniteElement &trial_fe, -- const FiniteElement &test_fe) -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans, -+ int Q_order) - { -- int order; -- if (trial_fe.Space() == FunctionSpace::Pk) -- { -- order = trial_fe.GetOrder() + test_fe.GetOrder() - 2; -- } -- else -- { -- // order = 2*el.GetOrder() - 2; // <-- this seems to work fine too -- order = trial_fe.GetOrder() + test_fe.GetOrder() + trial_fe.GetDim() - 1; -- } -+ int order = trial_fe.GetOrder() + test_fe.GetOrder() + Trans.OrderW() + Q_order; - if (trial_fe.Space() == FunctionSpace::rQk) - { - return RefinedIntRules.Get(trial_fe.GetGeomType(), order); -@@ -1295,22 +1310,20 @@ const IntegrationRule &DiffusionIntegrator::GetRule( - return IntRules.Get(trial_fe.GetGeomType(), order); - } - --void MassIntegrator::AssembleElementMatrix( -- const FiniteElement &el, -- ElementTransformation &Trans, -- DenseMatrix &elmat) -+void MassIntegrator::AssembleElementMatrix(const FiniteElement &el, -+ ElementTransformation &Trans, -+ DenseMatrix &elmat) - { - int nd = el.GetDof(); -- // int dim = el.GetDim(); - double w; - - #ifdef MFEM_THREAD_SAFE - Vector shape; - #endif -- shape.SetSize(nd); - elmat.SetSize(nd); -+ shape.SetSize(nd); - -- const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el, Trans); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Trans); - - elmat = 0.0; - for (int i = 0; i < ir->GetNPoints(); i++) -@@ -1345,8 +1358,8 @@ void MassIntegrator::AssembleElementMatrix2( - shape.SetSize(tr_nd); - te_shape.SetSize(te_nd); - -- const IntegrationRule *ir = IntRule ? IntRule : -- &GetRule(trial_fe, test_fe, Trans); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe, -+ Trans); - - elmat = 0.0; - for (int i = 0; i < ir->GetNPoints(); i++) -@@ -1367,18 +1380,13 @@ void MassIntegrator::AssembleElementMatrix2( - } - } - --const IntegrationRule &MassIntegrator::GetRule(const FiniteElement &trial_fe, -- const FiniteElement &test_fe, -- ElementTransformation &Trans) -+const IntegrationRule &BoundaryMassIntegrator::GetRule( -+ const FiniteElement &el1, -+ const FiniteElement &el2, -+ FaceElementTransformations &Trans) const - { -- // int order = trial_fe.GetOrder() + test_fe.GetOrder(); -- const int order = trial_fe.GetOrder() + test_fe.GetOrder() + Trans.OrderW(); -- -- if (trial_fe.Space() == FunctionSpace::rQk) -- { -- return RefinedIntRules.Get(trial_fe.GetGeomType(), order); -- } -- return IntRules.Get(trial_fe.GetGeomType(), order); -+ int order = el1.GetOrder() + el2.GetOrder(); -+ return IntRules.Get(Trans.GetGeometryType(), order); - } - - void BoundaryMassIntegrator::AssembleFaceMatrix( -@@ -1397,13 +1405,7 @@ void BoundaryMassIntegrator::AssembleFaceMatrix( - elmat.SetSize(nd1); - shape.SetSize(nd1); - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int order = 2 * el1.GetOrder(); -- -- ir = &IntRules.Get(Trans.GetGeometryType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el1, Trans); - - elmat = 0.0; - for (int i = 0; i < ir->GetNPoints(); i++) -@@ -1427,6 +1429,15 @@ void BoundaryMassIntegrator::AssembleFaceMatrix( - } - } - -+const IntegrationRule &ConvectionIntegrator::GetRule( -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const -+{ -+ int order = Trans.OrderGrad(&trial_fe) + Trans.Order() + test_fe.GetOrder(); -+ return IntRules.Get(trial_fe.GetGeomType(), order); -+} -+ - void ConvectionIntegrator::AssembleElementMatrix( - const FiniteElement &el, ElementTransformation &Trans, DenseMatrix &elmat) - { -@@ -1446,12 +1457,7 @@ void ConvectionIntegrator::AssembleElementMatrix( - - Vector vec1; - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int order = Trans.OrderGrad(&el) + Trans.Order() + el.GetOrder(); -- ir = &IntRules.Get(el.GetGeomType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Trans); - - Q->Eval(Q_ir, Trans, *ir); - -@@ -1474,18 +1480,8 @@ void ConvectionIntegrator::AssembleElementMatrix( - } - } - --const IntegrationRule &ConvectionIntegrator::GetRule( -- const FiniteElement &fe, -- ElementTransformation &Trans) --{ -- int order = Trans.OrderGrad(&fe) + Trans.Order() + fe.GetOrder(); -- return IntRules.Get(fe.GetGeomType(), order); --} -- - void GroupConvectionIntegrator::AssembleElementMatrix( -- const FiniteElement &el, -- ElementTransformation &Trans, -- DenseMatrix &elmat) -+ const FiniteElement &el, ElementTransformation &Trans, DenseMatrix &elmat) - { - int nd = el.GetDof(); - int dim = el.GetDim(); -@@ -1496,12 +1492,7 @@ void GroupConvectionIntegrator::AssembleElementMatrix( - shape.SetSize(nd); - grad.SetSize(nd,dim); - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int order = Trans.OrderGrad(&el) + el.GetOrder(); -- ir = &IntRules.Get(el.GetGeomType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Trans); - - Q->Eval(Q_nodal, Trans, el.GetNodes()); // sets the size of Q_nodal - -@@ -1536,10 +1527,9 @@ void GroupConvectionIntegrator::AssembleElementMatrix( - } - } - --void VectorMassIntegrator::AssembleElementMatrix( -- const FiniteElement &el, -- ElementTransformation &Trans, -- DenseMatrix &elmat) -+void VectorMassIntegrator::AssembleElementMatrix(const FiniteElement &el, -+ ElementTransformation &Trans, -+ DenseMatrix &elmat) - { - int nd = el.GetDof(); - int spaceDim = Trans.GetSpaceDim(); -@@ -1561,20 +1551,7 @@ void VectorMassIntegrator::AssembleElementMatrix( - mcoeff.SetSize(vdim); - } - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int order = 2 * el.GetOrder() + Trans.OrderW() + Q_order; -- -- if (el.Space() == FunctionSpace::rQk) -- { -- ir = &RefinedIntRules.Get(el.GetGeomType(), order); -- } -- else -- { -- ir = &IntRules.Get(el.GetGeomType(), order); -- } -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Trans); - - elmat = 0.0; - for (int s = 0; s < ir->GetNPoints(); s++) -@@ -1644,21 +1621,8 @@ void VectorMassIntegrator::AssembleElementMatrix2( - mcoeff.SetSize(vdim); - } - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int order = (trial_fe.GetOrder() + test_fe.GetOrder() + -- Trans.OrderW() + Q_order); -- -- if (trial_fe.Space() == FunctionSpace::rQk) -- { -- ir = &RefinedIntRules.Get(trial_fe.GetGeomType(), order); -- } -- else -- { -- ir = &IntRules.Get(trial_fe.GetGeomType(), order); -- } -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe, -+ Trans); - - elmat = 0.0; - for (int s = 0; s < ir->GetNPoints(); s++) -@@ -1704,6 +1668,179 @@ void VectorMassIntegrator::AssembleElementMatrix2( - } - } - -+void VectorDiffusionIntegrator::AssembleElementMatrix( -+ const FiniteElement &el, -+ ElementTransformation &Trans, -+ DenseMatrix &elmat) -+{ -+ const int dof = el.GetDof(); -+ dim = el.GetDim(); -+ sdim = Trans.GetSpaceDim(); -+ -+ // If vdim is not set, set it to the space dimension; -+ vdim = (vdim <= 0) ? sdim : vdim; -+ const bool square = (dim == sdim); -+ -+ if (VQ) -+ { -+ vcoeff.SetSize(vdim); -+ } -+ else if (MQ) -+ { -+ mcoeff.SetSize(vdim); -+ } -+ -+ dshape.SetSize(dof, dim); -+ dshapedxt.SetSize(dof, sdim); -+ -+ elmat.SetSize(vdim * dof); -+ pelmat.SetSize(dof); -+ -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Trans); -+ -+ elmat = 0.0; -+ -+ for (int i = 0; i < ir->GetNPoints(); i++) -+ { -+ -+ const IntegrationPoint &ip = ir->IntPoint(i); -+ el.CalcDShape(ip, dshape); -+ -+ Trans.SetIntPoint(&ip); -+ double w = Trans.Weight(); -+ w = ip.weight / (square ? w : w*w*w); -+ // AdjugateJacobian = / adj(J), if J is square -+ // \ adj(J^t.J).J^t, otherwise -+ Mult(dshape, Trans.AdjugateJacobian(), dshapedxt); -+ -+ if (VQ) -+ { -+ VQ->Eval(vcoeff, Trans, ip); -+ for (int k = 0; k < vdim; ++k) -+ { -+ Mult_a_AAt(w*vcoeff(k), dshapedxt, pelmat); -+ elmat.AddMatrix(pelmat, dof*k, dof*k); -+ } -+ } -+ else if (MQ) -+ { -+ MQ->Eval(mcoeff, Trans, ip); -+ for (int ii = 0; ii < vdim; ++ii) -+ { -+ for (int jj = 0; jj < vdim; ++jj) -+ { -+ Mult_a_AAt(w*mcoeff(ii,jj), dshapedxt, pelmat); -+ elmat.AddMatrix(pelmat, dof*ii, dof*jj); -+ } -+ } -+ } -+ else -+ { -+ if (Q) { w *= Q->Eval(Trans, ip); } -+ Mult_a_AAt(w, dshapedxt, pelmat); -+ for (int k = 0; k < vdim; ++k) -+ { -+ elmat.AddMatrix(pelmat, dof*k, dof*k); -+ } -+ } -+ } -+} -+ -+void VectorDiffusionIntegrator::AssembleElementVector( -+ const FiniteElement &el, ElementTransformation &Tr, -+ const Vector &elfun, Vector &elvect) -+{ -+ const int dof = el.GetDof(); -+ dim = el.GetDim(); -+ sdim = Tr.GetSpaceDim(); -+ -+ // If vdim is not set, set it to the space dimension; -+ vdim = (vdim <= 0) ? sdim : vdim; -+ const bool square = (dim == sdim); -+ -+ if (VQ) -+ { -+ vcoeff.SetSize(vdim); -+ } -+ else if (MQ) -+ { -+ mcoeff.SetSize(vdim); -+ } -+ -+ dshape.SetSize(dof, dim); -+ dshapedxt.SetSize(dof, dim); -+ // pelmat.SetSize(dim); -+ -+ elvect.SetSize(dim*dof); -+ -+ // NOTE: DenseMatrix is in column-major order. This is consistent with -+ // vectors ordered byNODES. In the resulting DenseMatrix, each column -+ // corresponds to a particular vdim. -+ DenseMatrix mat_in(elfun.GetData(), dof, dim); -+ DenseMatrix mat_out(elvect.GetData(), dof, dim); -+ -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Tr); -+ -+ elvect = 0.0; -+ for (int i = 0; i < ir->GetNPoints(); i++) -+ { -+ const IntegrationPoint &ip = ir->IntPoint(i); -+ el.CalcDShape(ip, dshape); -+ -+ Tr.SetIntPoint(&ip); -+ double w = Tr.Weight(); -+ w = ip.weight / (square ? w : w*w*w); -+ Mult(dshape, Tr.AdjugateJacobian(), dshapedxt); -+ MultAAt(dshapedxt, pelmat); -+ -+ if (VQ) -+ { -+ VQ->Eval(vcoeff, Tr, ip); -+ for (int k = 0; k < vdim; ++k) -+ { -+ pelmat *= w*vcoeff(k); -+ const Vector vec_in(mat_in.GetColumn(k), dof); -+ Vector vec_out(mat_out.GetColumn(k), dof); -+ pelmat.AddMult(vec_in, vec_out); -+ } -+ } -+ else if (MQ) -+ { -+ MQ->Eval(mcoeff, Tr, ip); -+ for (int ii = 0; ii < vdim; ++ii) -+ { -+ Vector vec_out(mat_out.GetColumn(ii), dof); -+ for (int jj = 0; jj < vdim; ++jj) -+ { -+ pelmat *= w*mcoeff(ii,jj); -+ const Vector vec_in(mat_in.GetColumn(jj), dof); -+ pelmat.Mult(vec_in, vec_out); -+ } -+ } -+ } -+ else -+ { -+ if (Q) { w *= Q->Eval(Tr, ip); } -+ pelmat *= w; -+ for (int k = 0; k < vdim; ++k) -+ { -+ const Vector vec_in(mat_in.GetColumn(k), dof); -+ Vector vec_out(mat_out.GetColumn(k), dof); -+ pelmat.AddMult(vec_in, vec_out); -+ } -+ } -+ } -+} -+ -+const IntegrationRule &VectorFEDivergenceIntegrator::GetRule( -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const -+{ -+ int order = trial_fe.GetOrder() + test_fe.GetOrder() - 1; -+ return IntRules.Get(trial_fe.GetGeomType(), order); -+} -+ - void VectorFEDivergenceIntegrator::AssembleElementMatrix2( - const FiniteElement &trial_fe, const FiniteElement &test_fe, - ElementTransformation &Trans, DenseMatrix &elmat) -@@ -1719,12 +1856,8 @@ void VectorFEDivergenceIntegrator::AssembleElementMatrix2( - - elmat.SetSize(test_nd, trial_nd); - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int order = trial_fe.GetOrder() + test_fe.GetOrder() - 1; // <-- -- ir = &IntRules.Get(trial_fe.GetGeomType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe, -+ Trans); - - elmat = 0.0; - for (i = 0; i < ir->GetNPoints(); i++) -@@ -1744,6 +1877,46 @@ void VectorFEDivergenceIntegrator::AssembleElementMatrix2( - } - } - -+const IntegrationRule &VectorFEWeakDivergenceIntegrator::GetRule( -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const -+{ -+ // The integrand on the reference element is: -+ // -( Q/det(J) ) u_hat^T adj(J) adj(J)^T grad_hat(v_hat). -+ // -+ // For Trans in (P_k)^d, v_hat in P_l, u_hat in ND_m, and dim=sdim=d>=1 -+ // - J_{ij} is in P_{k-1}, so adj(J)_{ij} is in P_{(d-1)*(k-1)} -+ // - so adj(J)^T grad_hat(v_hat) is in (P_{(d-1)*(k-1)+(l-1)})^d -+ // - u_hat is in (P_m)^d -+ // - adj(J)^T u_hat is in (P_{(d-1)*(k-1)+m})^d -+ // - and u_hat^T adj(J) adj(J)^T grad_hat(v_hat) is in P_n with -+ // n = 2*(d-1)*(k-1)+(l-1)+m -+ // -+ // For Trans in (Q_k)^d, v_hat in Q_l, u_hat in ND_m, and dim=sdim=d>1 -+ // - J_{i*}, J's i-th row, is in ( Q_{k-1,k,k}, Q_{k,k-1,k}, Q_{k,k,k-1} ) -+ // - adj(J)_{*j} is in ( Q_{s,s-1,s-1}, Q_{s-1,s,s-1}, Q_{s-1,s-1,s} ) -+ // with s = (d-1)*k -+ // - adj(J)^T grad_hat(v_hat) is in Q_{(d-1)*k+(l-1)} -+ // - u_hat is in ( Q_{m-1,m,m}, Q_{m,m-1,m}, Q_{m,m,m-1} ) -+ // - adj(J)^T u_hat is in Q_{(d-1)*k+(m-1)} -+ // - and u_hat^T adj(J) adj(J)^T grad_hat(v_hat) is in Q_n with -+ // n = 2*(d-1)*k+(l-1)+(m-1) -+ // -+ // In the next formula we use the expressions for n with k=1, which means -+ // that the term Q/det(J) is disregarded: -+ int order; -+ if (trial_fe.Space() == FunctionSpace::Pk) -+ { -+ order = trial_fe.GetOrder() + test_fe.GetOrder() - 1; -+ } -+ else -+ { -+ order = trial_fe.GetOrder() + test_fe.GetOrder() + 2 * (trial_fe.GetDim() - 2); -+ } -+ return IntRules.Get(trial_fe.GetGeomType(), order); -+} -+ - void VectorFEWeakDivergenceIntegrator::AssembleElementMatrix2( - const FiniteElement &trial_fe, const FiniteElement &test_fe, - ElementTransformation &Trans, DenseMatrix &elmat) -@@ -1770,37 +1943,8 @@ void VectorFEWeakDivergenceIntegrator::AssembleElementMatrix2( - - elmat.SetSize(test_nd, trial_nd); - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- // The integrand on the reference element is: -- // -( Q/det(J) ) u_hat^T adj(J) adj(J)^T grad_hat(v_hat). -- // -- // For Trans in (P_k)^d, v_hat in P_l, u_hat in ND_m, and dim=sdim=d>=1 -- // - J_{ij} is in P_{k-1}, so adj(J)_{ij} is in P_{(d-1)*(k-1)} -- // - so adj(J)^T grad_hat(v_hat) is in (P_{(d-1)*(k-1)+(l-1)})^d -- // - u_hat is in (P_m)^d -- // - adj(J)^T u_hat is in (P_{(d-1)*(k-1)+m})^d -- // - and u_hat^T adj(J) adj(J)^T grad_hat(v_hat) is in P_n with -- // n = 2*(d-1)*(k-1)+(l-1)+m -- // -- // For Trans in (Q_k)^d, v_hat in Q_l, u_hat in ND_m, and dim=sdim=d>1 -- // - J_{i*}, J's i-th row, is in ( Q_{k-1,k,k}, Q_{k,k-1,k}, Q_{k,k,k-1} ) -- // - adj(J)_{*j} is in ( Q_{s,s-1,s-1}, Q_{s-1,s,s-1}, Q_{s-1,s-1,s} ) -- // with s = (d-1)*k -- // - adj(J)^T grad_hat(v_hat) is in Q_{(d-1)*k+(l-1)} -- // - u_hat is in ( Q_{m-1,m,m}, Q_{m,m-1,m}, Q_{m,m,m-1} ) -- // - adj(J)^T u_hat is in Q_{(d-1)*k+(m-1)} -- // - and u_hat^T adj(J) adj(J)^T grad_hat(v_hat) is in Q_n with -- // n = 2*(d-1)*k+(l-1)+(m-1) -- // -- // In the next formula we use the expressions for n with k=1, which means -- // that the term Q/det(J) is disregarded: -- int ir_order = (trial_fe.Space() == FunctionSpace::Pk) ? -- (trial_fe.GetOrder() + test_fe.GetOrder() - 1) : -- (trial_fe.GetOrder() + test_fe.GetOrder() + 2*(dim-2)); -- ir = &IntRules.Get(trial_fe.GetGeomType(), ir_order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe, -+ Trans); - - elmat = 0.0; - for (i = 0; i < ir->GetNPoints(); i++) -@@ -1826,6 +1970,15 @@ void VectorFEWeakDivergenceIntegrator::AssembleElementMatrix2( - } - } - -+const IntegrationRule &VectorFECurlIntegrator::GetRule( -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const -+{ -+ int order = trial_fe.GetOrder() + test_fe.GetOrder() - 1; -+ return IntRules.Get(trial_fe.GetGeomType(), order); -+} -+ - void VectorFECurlIntegrator::AssembleElementMatrix2( - const FiniteElement &trial_fe, const FiniteElement &test_fe, - ElementTransformation &Trans, DenseMatrix &elmat) -@@ -1863,12 +2016,8 @@ void VectorFECurlIntegrator::AssembleElementMatrix2( - - elmat.SetSize(test_nd, trial_nd); - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int order = trial_fe.GetOrder() + test_fe.GetOrder() - 1; // <-- -- ir = &IntRules.Get(trial_fe.GetGeomType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe, -+ Trans); - - elmat = 0.0; - for (i = 0; i < ir->GetNPoints(); i++) -@@ -1923,6 +2072,27 @@ void VectorFECurlIntegrator::AssembleElementMatrix2( - } - } - -+const IntegrationRule &DerivativeIntegrator::GetRule( -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const -+{ -+ int order; -+ if (trial_fe.Space() == FunctionSpace::Pk) -+ { -+ order = trial_fe.GetOrder() + test_fe.GetOrder() - 1; -+ } -+ else -+ { -+ order = trial_fe.GetOrder() + test_fe.GetOrder() + trial_fe.GetDim(); -+ } -+ if (trial_fe.Space() == FunctionSpace::rQk) -+ { -+ return RefinedIntRules.Get(trial_fe.GetGeomType(), order); -+ } -+ return IntRules.Get(trial_fe.GetGeomType(), order); -+} -+ - void DerivativeIntegrator::AssembleElementMatrix2( - const FiniteElement &trial_fe, - const FiniteElement &test_fe, -@@ -1944,28 +2114,8 @@ void DerivativeIntegrator::AssembleElementMatrix2( - invdfdx.SetSize(dim, spaceDim); - shape.SetSize(test_nd); - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int order; -- if (trial_fe.Space() == FunctionSpace::Pk) -- { -- order = trial_fe.GetOrder() + test_fe.GetOrder() - 1; -- } -- else -- { -- order = trial_fe.GetOrder() + test_fe.GetOrder() + dim; -- } -- -- if (trial_fe.Space() == FunctionSpace::rQk) -- { -- ir = &RefinedIntRules.Get(trial_fe.GetGeomType(), order); -- } -- else -- { -- ir = &IntRules.Get(trial_fe.GetGeomType(), order); -- } -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe, -+ Trans); - - elmat = 0.0; - for (i = 0; i < ir->GetNPoints(); i++) -@@ -1991,10 +2141,26 @@ void DerivativeIntegrator::AssembleElementMatrix2( - } - } - --void CurlCurlIntegrator::AssembleElementMatrix( -- const FiniteElement &el, -- ElementTransformation &Trans, -- DenseMatrix &elmat) -+const IntegrationRule &CurlCurlIntegrator::GetRule( -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const -+{ -+ int order; -+ if (trial_fe.Space() == FunctionSpace::Pk) -+ { -+ order = test_fe.GetOrder() + trial_fe.GetOrder() - 2; -+ } -+ else -+ { -+ order = test_fe.GetOrder() + trial_fe.GetOrder() + trial_fe.GetDim() - 1; -+ } -+ return IntRules.Get(trial_fe.GetGeomType(), order); -+} -+ -+void CurlCurlIntegrator::AssembleElementMatrix(const FiniteElement &el, -+ ElementTransformation &Trans, -+ DenseMatrix &elmat) - { - int nd = el.GetDof(); - dim = el.GetDim(); -@@ -2009,25 +2175,10 @@ void CurlCurlIntegrator::AssembleElementMatrix( - curlshape_dFt.SetSize(nd,dimc); - #endif - elmat.SetSize(nd); -- - if (MQ) { M.SetSize(dimc); } - if (DQ) { D.SetSize(dimc); } - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int order; -- if (el.Space() == FunctionSpace::Pk) -- { -- order = 2*el.GetOrder() - 2; -- } -- else -- { -- order = 2*el.GetOrder(); -- } -- -- ir = &IntRules.Get(el.GetGeomType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Trans); - - elmat = 0.0; - for (int i = 0; i < ir->GetNPoints(); i++) -@@ -2090,20 +2241,8 @@ void CurlCurlIntegrator::AssembleElementMatrix2(const FiniteElement &trial_fe, - if (MQ) { M.SetSize(dimc); } - if (DQ) { D.SetSize(dimc); } - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int order; -- if (trial_fe.Space() == FunctionSpace::Pk) -- { -- order = test_fe.GetOrder() + trial_fe.GetOrder() - 2; -- } -- else -- { -- order = test_fe.GetOrder() + trial_fe.GetOrder() + trial_fe.GetDim() - 1; -- } -- ir = &IntRules.Get(trial_fe.GetGeomType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe, -+ Trans); - - elmat = 0.0; - for (int i = 0; i < ir->GetNPoints(); i++) -@@ -2174,7 +2313,7 @@ double CurlCurlIntegrator::ComputeFluxEnergy(const FiniteElement &fluxelem, - pointflux.SetSize(dim); - if (d_energy) { vec.SetSize(dim); } - -- int order = 2 * fluxelem.GetOrder(); // <-- -+ int order = 2 * fluxelem.GetOrder(); - const IntegrationRule &ir = IntRules.Get(fluxelem.GetGeomType(), order); - - double energy = 0.0; -@@ -2265,10 +2404,18 @@ double CurlCurlIntegrator::ComputeFluxEnergy(const FiniteElement &fluxelem, - return energy; - } - -+const IntegrationRule &VectorCurlCurlIntegrator::GetRule( -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const -+{ -+ // Use the same integration rule as diffusion -+ int order = Trans.OrderGrad(&trial_fe) + Trans.OrderGrad(&test_fe); -+ return IntRules.Get(trial_fe.GetGeomType(), order); -+} -+ - void VectorCurlCurlIntegrator::AssembleElementMatrix( -- const FiniteElement &el, -- ElementTransformation &Trans, -- DenseMatrix &elmat) -+ const FiniteElement &el, ElementTransformation &Trans, DenseMatrix &elmat) - { - int dim = el.GetDim(); - int dof = el.GetDof(); -@@ -2284,13 +2431,7 @@ void VectorCurlCurlIntegrator::AssembleElementMatrix( - Jadj.SetSize(dim); - #endif - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- // use the same integration rule as diffusion -- int order = 2 * Trans.OrderGrad(&el); -- ir = &IntRules.Get(el.GetGeomType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Trans); - - elmat.SetSize(dof*dim); - elmat = 0.0; -@@ -2332,13 +2473,7 @@ double VectorCurlCurlIntegrator::GetElementEnergy( - #endif - DenseMatrix elfun_mat(elfun.GetData(), dof, dim); - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- // use the same integration rule as diffusion -- int order = 2 * Tr.OrderGrad(&el); -- ir = &IntRules.Get(el.GetGeomType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Tr); - - double energy = 0.; - for (int i = 0; i < ir->GetNPoints(); i++) -@@ -2380,6 +2515,15 @@ double VectorCurlCurlIntegrator::GetElementEnergy( - return 0.5 * energy; - } - -+const IntegrationRule &MixedCurlIntegrator::GetRule( -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const -+{ -+ int order = trial_fe.GetOrder() + test_fe.GetOrder() + Trans.OrderJ(); -+ return IntRules.Get(trial_fe.GetGeomType(), order); -+} -+ - void MixedCurlIntegrator::AssembleElementMatrix2( - const FiniteElement &trial_fe, const FiniteElement &test_fe, - ElementTransformation &Trans, DenseMatrix &elmat) -@@ -2415,13 +2559,9 @@ void MixedCurlIntegrator::AssembleElementMatrix2( - - double c; - Vector d_col; -- const IntegrationRule *ir = IntRule; - -- if (ir == NULL) -- { -- int order = trial_fe.GetOrder() + test_fe.GetOrder() + Trans.OrderJ(); -- ir = &IntRules.Get(trial_fe.GetGeomType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe, -+ Trans); - - for (int i = 0; i < ir->GetNPoints(); i++) - { -@@ -2458,6 +2598,15 @@ void MixedCurlIntegrator::AssembleElementMatrix2( - } - } - -+const IntegrationRule &VectorFEMassIntegrator::GetRule( -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const -+{ -+ int order = Trans.OrderW() + test_fe.GetOrder() + trial_fe.GetOrder(); -+ return IntRules.Get(test_fe.GetGeomType(), order); -+} -+ - void VectorFEMassIntegrator::AssembleElementMatrix( - const FiniteElement &el, - ElementTransformation &Trans, -@@ -2483,13 +2632,7 @@ void VectorFEMassIntegrator::AssembleElementMatrix( - elmat.SetSize(dof); - elmat = 0.0; - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- // int order = 2 * el.GetOrder(); -- int order = Trans.OrderW() + 2 * el.GetOrder(); -- ir = &IntRules.Get(el.GetGeomType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Trans); - - for (int i = 0; i < ir->GetNPoints(); i++) - { -@@ -2517,7 +2660,7 @@ void VectorFEMassIntegrator::AssembleElementMatrix( - { - if (Q) - { -- w *= Q->Eval (Trans, ip); -+ w *= Q->Eval(Trans, ip); - } - AddMult_a_AAt(w, trial_vshape, elmat); - } -@@ -2551,15 +2694,11 @@ void VectorFEMassIntegrator::AssembleElementMatrix2( - #endif - - elmat.SetSize(vdim*test_dof, trial_dof); -+ elmat = 0.0; - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int order = (Trans.OrderW() + test_fe.GetOrder() + trial_fe.GetOrder()); -- ir = &IntRules.Get(test_fe.GetGeomType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe, -+ Trans); - -- elmat = 0.0; - for (int i = 0; i < ir->GetNPoints(); i++) - { - const IntegrationPoint &ip = ir->IntPoint(i); -@@ -2652,12 +2791,8 @@ void VectorFEMassIntegrator::AssembleElementMatrix2( - - elmat.SetSize(test_dof, trial_dof); - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int order = (Trans.OrderW() + test_fe.GetOrder() + trial_fe.GetOrder()); -- ir = &IntRules.Get(test_fe.GetGeomType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe, -+ Trans); - - elmat = 0.0; - for (int i = 0; i < ir->GetNPoints(); i++) -@@ -2687,7 +2822,7 @@ void VectorFEMassIntegrator::AssembleElementMatrix2( - { - if (Q) - { -- w *= Q->Eval (Trans, ip); -+ w *= Q->Eval(Trans, ip); - } - AddMult_a_ABt(w,test_vshape,trial_vshape,elmat); - } -@@ -2700,6 +2835,15 @@ void VectorFEMassIntegrator::AssembleElementMatrix2( - } - } - -+const IntegrationRule &VectorDivergenceIntegrator::GetRule( -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const -+{ -+ int order = Trans.OrderGrad(&trial_fe) + test_fe.GetOrder() + Trans.OrderJ(); -+ return IntRules.Get(trial_fe.GetGeomType(), order); -+} -+ - void VectorDivergenceIntegrator::AssembleElementMatrix2( - const FiniteElement &trial_fe, - const FiniteElement &test_fe, -@@ -2741,7 +2885,7 @@ void VectorDivergenceIntegrator::AssembleElementMatrix2( - c = ip.weight; - if (Q) - { -- c *= Q->Eval (Trans, ip); -+ c *= Q->Eval(Trans, ip); - } - - // elmat += c * shape * divshape ^ t -@@ -2750,12 +2894,13 @@ void VectorDivergenceIntegrator::AssembleElementMatrix2( - } - } - --const IntegrationRule &VectorDivergenceIntegrator::GetRule( -+const IntegrationRule &DivDivIntegrator::GetRule( - const FiniteElement &trial_fe, - const FiniteElement &test_fe, -- ElementTransformation &Trans) -+ ElementTransformation &Trans) const - { -- int order = Trans.OrderGrad(&trial_fe) + test_fe.GetOrder() + Trans.OrderJ(); -+ int order = 2 * max(trial_fe.GetOrder(), -+ test_fe.GetOrder()) - 2; // <--- OK for RTk - return IntRules.Get(trial_fe.GetGeomType(), order); - } - -@@ -2774,12 +2919,7 @@ void DivDivIntegrator::AssembleElementMatrix( - #endif - elmat.SetSize(dof); - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int order = 2 * el.GetOrder() - 2; // <--- OK for RTk -- ir = &IntRules.Get(el.GetGeomType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Trans); - - elmat = 0.0; - for (int i = 0; i < ir->GetNPoints(); i++) -@@ -2793,7 +2933,7 @@ void DivDivIntegrator::AssembleElementMatrix( - - if (Q) - { -- c *= Q->Eval (Trans, ip); -+ c *= Q->Eval(Trans, ip); - } - - // elmat += c * divshape * divshape ^ t -@@ -2820,13 +2960,8 @@ void DivDivIntegrator::AssembleElementMatrix2( - #endif - elmat.SetSize(te_nd,tr_nd); - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int order = 2 * max(test_fe.GetOrder(), -- trial_fe.GetOrder()) - 2; // <--- OK for RTk -- ir = &IntRules.Get(test_fe.GetGeomType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe, -+ Trans); - - elmat = 0.0; - for (int i = 0; i < ir->GetNPoints(); i++) -@@ -2841,7 +2976,7 @@ void DivDivIntegrator::AssembleElementMatrix2( - - if (Q) - { -- c *= Q->Eval (Trans, ip); -+ c *= Q->Eval(Trans, ip); - } - - te_divshape *= c; -@@ -2849,176 +2984,13 @@ void DivDivIntegrator::AssembleElementMatrix2( - } - } - --void VectorDiffusionIntegrator::AssembleElementMatrix( -- const FiniteElement &el, -- ElementTransformation &Trans, -- DenseMatrix &elmat) --{ -- const int dof = el.GetDof(); -- dim = el.GetDim(); -- sdim = Trans.GetSpaceDim(); -- -- // If vdim is not set, set it to the space dimension; -- vdim = (vdim <= 0) ? sdim : vdim; -- const bool square = (dim == sdim); -- -- if (VQ) -- { -- vcoeff.SetSize(vdim); -- } -- else if (MQ) -- { -- mcoeff.SetSize(vdim); -- } -- -- dshape.SetSize(dof, dim); -- dshapedxt.SetSize(dof, sdim); -- -- elmat.SetSize(vdim * dof); -- pelmat.SetSize(dof); -- -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- ir = &DiffusionIntegrator::GetRule(el,el); -- } -- -- elmat = 0.0; -- -- for (int i = 0; i < ir->GetNPoints(); i++) -- { -- -- const IntegrationPoint &ip = ir->IntPoint(i); -- el.CalcDShape(ip, dshape); -- -- Trans.SetIntPoint(&ip); -- double w = Trans.Weight(); -- w = ip.weight / (square ? w : w*w*w); -- // AdjugateJacobian = / adj(J), if J is square -- // \ adj(J^t.J).J^t, otherwise -- Mult(dshape, Trans.AdjugateJacobian(), dshapedxt); -- -- if (VQ) -- { -- VQ->Eval(vcoeff, Trans, ip); -- for (int k = 0; k < vdim; ++k) -- { -- Mult_a_AAt(w*vcoeff(k), dshapedxt, pelmat); -- elmat.AddMatrix(pelmat, dof*k, dof*k); -- } -- } -- else if (MQ) -- { -- MQ->Eval(mcoeff, Trans, ip); -- for (int ii = 0; ii < vdim; ++ii) -- { -- for (int jj = 0; jj < vdim; ++jj) -- { -- Mult_a_AAt(w*mcoeff(ii,jj), dshapedxt, pelmat); -- elmat.AddMatrix(pelmat, dof*ii, dof*jj); -- } -- } -- } -- else -- { -- if (Q) { w *= Q->Eval(Trans, ip); } -- Mult_a_AAt(w, dshapedxt, pelmat); -- for (int k = 0; k < vdim; ++k) -- { -- elmat.AddMatrix(pelmat, dof*k, dof*k); -- } -- } -- } --} -- --void VectorDiffusionIntegrator::AssembleElementVector( -- const FiniteElement &el, ElementTransformation &Tr, -- const Vector &elfun, Vector &elvect) -+const IntegrationRule &ElasticityIntegrator::GetRule( -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const - { -- const int dof = el.GetDof(); -- dim = el.GetDim(); -- sdim = Tr.GetSpaceDim(); -- -- // If vdim is not set, set it to the space dimension; -- vdim = (vdim <= 0) ? sdim : vdim; -- const bool square = (dim == sdim); -- -- if (VQ) -- { -- vcoeff.SetSize(vdim); -- } -- else if (MQ) -- { -- mcoeff.SetSize(vdim); -- } -- -- dshape.SetSize(dof, dim); -- dshapedxt.SetSize(dof, dim); -- // pelmat.SetSize(dim); -- -- elvect.SetSize(dim*dof); -- -- // NOTE: DenseMatrix is in column-major order. This is consistent with -- // vectors ordered byNODES. In the resulting DenseMatrix, each column -- // corresponds to a particular vdim. -- DenseMatrix mat_in(elfun.GetData(), dof, dim); -- DenseMatrix mat_out(elvect.GetData(), dof, dim); -- -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- ir = &DiffusionIntegrator::GetRule(el,el); -- } -- -- elvect = 0.0; -- for (int i = 0; i < ir->GetNPoints(); i++) -- { -- const IntegrationPoint &ip = ir->IntPoint(i); -- el.CalcDShape(ip, dshape); -- -- Tr.SetIntPoint(&ip); -- double w = Tr.Weight(); -- w = ip.weight / (square ? w : w*w*w); -- Mult(dshape, Tr.AdjugateJacobian(), dshapedxt); -- MultAAt(dshapedxt, pelmat); -- -- if (VQ) -- { -- VQ->Eval(vcoeff, Tr, ip); -- for (int k = 0; k < vdim; ++k) -- { -- pelmat *= w*vcoeff(k); -- const Vector vec_in(mat_in.GetColumn(k), dof); -- Vector vec_out(mat_out.GetColumn(k), dof); -- pelmat.AddMult(vec_in, vec_out); -- } -- } -- else if (MQ) -- { -- MQ->Eval(mcoeff, Tr, ip); -- for (int ii = 0; ii < vdim; ++ii) -- { -- Vector vec_out(mat_out.GetColumn(ii), dof); -- for (int jj = 0; jj < vdim; ++jj) -- { -- pelmat *= w*mcoeff(ii,jj); -- const Vector vec_in(mat_in.GetColumn(jj), dof); -- pelmat.Mult(vec_in, vec_out); -- } -- } -- } -- else -- { -- if (Q) { w *= Q->Eval(Tr, ip); } -- pelmat *= w; -- for (int k = 0; k < vdim; ++k) -- { -- const Vector vec_in(mat_in.GetColumn(k), dof); -- Vector vec_out(mat_out.GetColumn(k), dof); -- pelmat.AddMult(vec_in, vec_out); -- } -- } -- } -+ int order = Trans.OrderGrad(&trial_fe) + Trans.OrderGrad(&test_fe); -+ return IntRules.Get(trial_fe.GetGeomType(), order); - } - - void ElasticityIntegrator::AssembleElementMatrix( -@@ -3042,12 +3014,7 @@ void ElasticityIntegrator::AssembleElementMatrix( - - elmat.SetSize(dof * dim); - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int order = 2 * Trans.OrderGrad(&el); // correct order? -- ir = &IntRules.Get(el.GetGeomType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Trans); - - elmat = 0.0; - -@@ -3210,12 +3177,7 @@ double ElasticityIntegrator::ComputeFluxEnergy(const FiniteElement &fluxelem, - // Use the same integration rule as in AssembleElementMatrix, replacing 'el' - // with 'fluxelem' when 'IntRule' is not set. - // Should we be using a different (more accurate) rule here? -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int order = 2 * Trans.OrderGrad(&fluxelem); -- ir = &IntRules.Get(fluxelem.GetGeomType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(fluxelem, Trans); - - double energy = 0.0; - -@@ -3275,6 +3237,28 @@ double ElasticityIntegrator::ComputeFluxEnergy(const FiniteElement &fluxelem, - return energy; - } - -+const IntegrationRule &DGTraceIntegrator::GetRule( -+ const FiniteElement &el1, const FiniteElement &el2, -+ FaceElementTransformations &Trans) const -+{ -+ // Assuming order(u) == order(mesh) -+ int order; -+ if (Trans.Elem2No >= 0) -+ { -+ order = (min(Trans.Elem1->OrderW(), Trans.Elem2->OrderW()) + -+ 2 * max(el1.GetOrder(), el2.GetOrder())); -+ } -+ else -+ { -+ order = Trans.Elem1->OrderW() + 2 * el1.GetOrder(); -+ } -+ if (el1.Space() == FunctionSpace::Pk) -+ { -+ order++; -+ } -+ return IntRules.Get(Trans.GetGeometryType(), order); -+} -+ - void DGTraceIntegrator::AssembleFaceMatrix(const FiniteElement &el1, - const FiniteElement &el2, - FaceElementTransformations &Trans, -@@ -3302,24 +3286,7 @@ void DGTraceIntegrator::AssembleFaceMatrix(const FiniteElement &el1, - elmat.SetSize(ndof1 + ndof2); - elmat = 0.0; - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int order; -- // Assuming order(u)==order(mesh) -- if (Trans.Elem2No >= 0) -- order = (min(Trans.Elem1->OrderW(), Trans.Elem2->OrderW()) + -- 2*max(el1.GetOrder(), el2.GetOrder())); -- else -- { -- order = Trans.Elem1->OrderW() + 2*el1.GetOrder(); -- } -- if (el1.Space() == FunctionSpace::Pk) -- { -- order++; -- } -- ir = &IntRules.Get(Trans.GetGeometryType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el1, el2, Trans); - - for (int p = 0; p < ir->GetNPoints(); p++) - { -@@ -3408,11 +3375,21 @@ void DGTraceIntegrator::AssembleFaceMatrix(const FiniteElement &el1, - } - } - --const IntegrationRule &DGTraceIntegrator::GetRule( -- Geometry::Type geom, int order, FaceElementTransformations &T) -+const IntegrationRule &DGDiffusionIntegrator::GetRuleStatic( -+ const FiniteElement &el1, const FiniteElement &el2, -+ FaceElementTransformations &Trans) - { -- int int_order = T.Elem1->OrderW() + 2*order; -- return IntRules.Get(geom, int_order); -+ // A simple choice for the integration order; is this OK? -+ int order; -+ if (Trans.Elem2No >= 0) -+ { -+ order = 2 * max(el1.GetOrder(), el2.GetOrder()); -+ } -+ else -+ { -+ order = 2 * el1.GetOrder(); -+ } -+ return IntRules.Get(Trans.GetGeometryType(), order); - } - - void DGDiffusionIntegrator::AssembleFaceMatrix( -@@ -3459,21 +3436,7 @@ void DGDiffusionIntegrator::AssembleFaceMatrix( - jmat = 0.; - } - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- // a simple choice for the integration order; is this OK? -- int order; -- if (ndof2) -- { -- order = 2*max(el1.GetOrder(), el2.GetOrder()); -- } -- else -- { -- order = 2*el1.GetOrder(); -- } -- ir = &IntRules.Get(Trans.GetGeometryType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el1, el2, Trans); - - // assemble: < {(Q \nabla u).n},[v] > --> elmat - // kappa < {h^{-1} Q} [u],[v] > --> jmat -@@ -3749,13 +3712,7 @@ void DGElasticityIntegrator::AssembleFaceMatrix( - dshape2_dnM.SetSize(ndofs2); - } - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- // a simple choice for the integration order; is this OK? -- const int order = 2 * max(el1.GetOrder(), ndofs2 ? el2.GetOrder() : 0); -- ir = &IntRules.Get(Trans.GetGeometryType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el1, el2, Trans); - - for (int pind = 0; pind < ir->GetNPoints(); ++pind) - { -@@ -3872,7 +3829,7 @@ void DGElasticityIntegrator::AssembleFaceMatrix( - } - } - --void TraceJumpIntegrator::AssembleFaceMatrix( -+void TraceJumpIntegrator::AssembleFaceMatrix2( - const FiniteElement &trial_face_fe, const FiniteElement &test_fe1, - const FiniteElement &test_fe2, FaceElementTransformations &Trans, - DenseMatrix &elmat) -@@ -3964,7 +3921,7 @@ void TraceJumpIntegrator::AssembleFaceMatrix( - } - } - --void NormalTraceJumpIntegrator::AssembleFaceMatrix( -+void NormalTraceJumpIntegrator::AssembleFaceMatrix2( - const FiniteElement &trial_face_fe, const FiniteElement &test_fe1, - const FiniteElement &test_fe2, FaceElementTransformations &Trans, - DenseMatrix &elmat) -@@ -4312,11 +4269,11 @@ struct ShapeCoefficient : public VectorCoefficient - - } - --void --ScalarProductInterpolator::AssembleElementMatrix2(const FiniteElement &dom_fe, -- const FiniteElement &ran_fe, -- ElementTransformation &Trans, -- DenseMatrix &elmat) -+void ScalarProductInterpolator::AssembleElementMatrix2( -+ const FiniteElement &dom_fe, -+ const FiniteElement &ran_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &elmat) - { - internal::ShapeCoefficient dom_shape_coeff(*Q, dom_fe); - -@@ -4327,8 +4284,7 @@ ScalarProductInterpolator::AssembleElementMatrix2(const FiniteElement &dom_fe, - ran_fe.Project(dom_shape_coeff, Trans, elmat_as_vec); - } - --void --ScalarVectorProductInterpolator::AssembleElementMatrix2( -+void ScalarVectorProductInterpolator::AssembleElementMatrix2( - const FiniteElement &dom_fe, - const FiniteElement &ran_fe, - ElementTransformation &Trans, -@@ -4361,8 +4317,7 @@ ScalarVectorProductInterpolator::AssembleElementMatrix2( - ran_fe.ProjectMatrixCoefficient(dom_shape_coeff, Trans, elmat_as_vec); - } - --void --VectorScalarProductInterpolator::AssembleElementMatrix2( -+void VectorScalarProductInterpolator::AssembleElementMatrix2( - const FiniteElement &dom_fe, - const FiniteElement &ran_fe, - ElementTransformation &Trans, -@@ -4398,8 +4353,7 @@ VectorScalarProductInterpolator::AssembleElementMatrix2( - ran_fe.ProjectMatrixCoefficient(dom_shape_coeff, Trans, elmat_as_vec); - } - --void --ScalarCrossProductInterpolator::AssembleElementMatrix2( -+void ScalarCrossProductInterpolator::AssembleElementMatrix2( - const FiniteElement &dom_fe, - const FiniteElement &ran_fe, - ElementTransformation &Trans, -@@ -4440,8 +4394,7 @@ ScalarCrossProductInterpolator::AssembleElementMatrix2( - ran_fe.Project(dom_shape_coeff, Trans, elmat_as_vec); - } - --void --VectorCrossProductInterpolator::AssembleElementMatrix2( -+void VectorCrossProductInterpolator::AssembleElementMatrix2( - const FiniteElement &dom_fe, - const FiniteElement &ran_fe, - ElementTransformation &Trans, -diff --git a/fem/bilininteg.hpp b/fem/bilininteg.hpp -index 209898714..598da405d 100644 ---- a/fem/bilininteg.hpp -+++ b/fem/bilininteg.hpp -@@ -41,10 +41,6 @@ public: - // TODO: add support for other assembly levels (in addition to PA) and their - // actions. - -- // TODO: for mixed meshes the quadrature rules to be used by methods like -- // AssemblePA() can be given as a QuadratureSpace, e.g. using a new method: -- // SetQuadratureSpace(). -- - // TODO: the methods for the various assembly levels make sense even in the - // base class NonlinearFormIntegrator, except that not all assembly levels - // make sense for the action of the nonlinear operator (but they all make -@@ -159,11 +155,11 @@ public: - - /** Abstract method used for assembling TraceFaceIntegrators in a - MixedBilinearForm. */ -- virtual void AssembleFaceMatrix(const FiniteElement &trial_face_fe, -- const FiniteElement &test_fe1, -- const FiniteElement &test_fe2, -- FaceElementTransformations &Trans, -- DenseMatrix &elmat); -+ virtual void AssembleFaceMatrix2(const FiniteElement &trial_face_fe, -+ const FiniteElement &test_fe1, -+ const FiniteElement &test_fe2, -+ FaceElementTransformations &Trans, -+ DenseMatrix &elmat); - - /** Abstract method used for assembling TraceFaceIntegrators for - DPG weak formulations. */ -@@ -173,7 +169,6 @@ public: - FaceElementTransformations &Trans, - DenseMatrix &elmat); - -- - /// @brief Perform the local action of the BilinearFormIntegrator. - /// Note that the default implementation in the base class is general but not - /// efficient. -@@ -282,7 +277,12 @@ public: - TransposeIntegrator(BilinearFormIntegrator *bfi_, bool own_bfi_ = true) - { bfi = bfi_; own_bfi = own_bfi_; } - -- virtual void SetIntRule(const IntegrationRule *ir); -+ virtual bool SupportsCeed() const { return bfi->SupportsCeed(); } -+ -+ virtual void SetIntRule(const IntegrationRule *ir) -+ { -+ IntRule = ir; bfi->SetIntRule(ir); -+ } - - virtual void AssembleElementMatrix(const FiniteElement &el, - ElementTransformation &Trans, -@@ -293,7 +293,6 @@ public: - ElementTransformation &Trans, - DenseMatrix &elmat); - -- using BilinearFormIntegrator::AssembleFaceMatrix; - virtual void AssembleFaceMatrix(const FiniteElement &el1, - const FiniteElement &el2, - FaceElementTransformations &Trans, -@@ -362,7 +361,12 @@ public: - LumpedIntegrator(BilinearFormIntegrator *bfi_, bool own_bfi_ = true) - { bfi = bfi_; own_bfi = own_bfi_; } - -- virtual void SetIntRule(const IntegrationRule *ir); -+ virtual bool SupportsCeed() const { return bfi->SupportsCeed(); } -+ -+ virtual void SetIntRule(const IntegrationRule *ir) -+ { -+ IntRule = ir; bfi->SetIntRule(ir); -+ } - - virtual void AssembleElementMatrix(const FiniteElement &el, - ElementTransformation &Trans, -@@ -375,57 +379,64 @@ public: - class InverseIntegrator : public BilinearFormIntegrator - { - private: -- bool own_integrator; -- BilinearFormIntegrator *integrator; -+ bool own_bfi; -+ BilinearFormIntegrator *bfi; - - public: -- InverseIntegrator(BilinearFormIntegrator *integ, bool own_integ = 1) -- { integrator = integ; own_integrator = own_integ; } -+ InverseIntegrator(BilinearFormIntegrator *bfi_, bool own_bfi_ = true) -+ { bfi = bfi_; own_bfi = own_bfi_; } - -- virtual void SetIntRule(const IntegrationRule *ir); -+ virtual bool SupportsCeed() const { return bfi->SupportsCeed(); } -+ -+ virtual void SetIntRule(const IntegrationRule *ir) -+ { -+ IntRule = ir; bfi->SetIntRule(ir); -+ } - - virtual void AssembleElementMatrix(const FiniteElement &el, - ElementTransformation &Trans, - DenseMatrix &elmat); - -- virtual ~InverseIntegrator() { if (own_integrator) { delete integrator; } } -+ virtual ~InverseIntegrator() { if (own_bfi) { delete bfi; } } - }; - - /// Integrator defining a sum of multiple Integrators. - class SumIntegrator : public BilinearFormIntegrator - { - private: -- bool own_integrators; -- mutable DenseMatrix elem_mat; -- Array integrators; -+ bool own_bfis; -+ Array bfis; -+ DenseMatrix bfi_elmat; - - public: -- SumIntegrator(bool own_integs = true) { own_integrators = own_integs; } -+ SumIntegrator(bool own_bfis_ = true) { own_bfis = own_bfis_; } - -- virtual void SetIntRule(const IntegrationRule *ir); -+ void AddIntegrator(BilinearFormIntegrator *bfi) -+ { bfis.Append(bfi); } -+ -+ virtual bool SupportsCeed() const; - -- void AddIntegrator(BilinearFormIntegrator *integ) -- { integrators.Append(integ); } -+ virtual void SetIntRule(const IntegrationRule *ir); - - virtual void AssembleElementMatrix(const FiniteElement &el, - ElementTransformation &Trans, - DenseMatrix &elmat); -+ - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, - ElementTransformation &Trans, - DenseMatrix &elmat); - -- using BilinearFormIntegrator::AssembleFaceMatrix; - virtual void AssembleFaceMatrix(const FiniteElement &el1, - const FiniteElement &el2, - FaceElementTransformations &Trans, - DenseMatrix &elmat); - -- virtual void AssembleFaceMatrix(const FiniteElement &trial_face_fe, -- const FiniteElement &test_fe1, -- const FiniteElement &test_fe2, -- FaceElementTransformations &Trans, -- DenseMatrix &elmat); -+ virtual void AssembleFaceMatrix2(const FiniteElement &trial_face_fe, -+ const FiniteElement &test_fe1, -+ const FiniteElement &test_fe2, -+ FaceElementTransformations &Trans, -+ DenseMatrix &elmat); - - virtual void AssemblePA(const FiniteElementSpace &fes); - virtual void AssemblePA(const FiniteElementSpace &trial_fes, -@@ -469,6 +480,11 @@ public: - class MixedScalarIntegrator: public BilinearFormIntegrator - { - public: -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const; -+ - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, - ElementTransformation &Trans, -@@ -486,39 +502,40 @@ protected: - /// result if given the same FiniteElement. The default is false. - bool same_calc_shape; - -+ Coefficient *Q; -+ - MixedScalarIntegrator() : same_calc_shape(false), Q(NULL) {} - MixedScalarIntegrator(Coefficient &q) : same_calc_shape(false), Q(&q) {} - -- inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement &trial_fe, const FiniteElement &test_fe) const -+ virtual bool VerifyFiniteElementTypes( -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && -- test_fe.GetRangeType() == mfem::FiniteElement::SCALAR ); -+ test_fe.GetRangeType() == mfem::FiniteElement::SCALAR); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarIntegrator: " - "Trial and test spaces must both be scalar fields."; - } - -- inline virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -- const FiniteElement &test_fe, -- ElementTransformation &Trans) -+ virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const - { return trial_fe.GetOrder() + test_fe.GetOrder() + Trans.OrderW(); } - -- inline virtual void CalcTestShape(const FiniteElement &test_fe, -- ElementTransformation &Trans, -- Vector &shape) -+ virtual void CalcTestShape(const FiniteElement &test_fe, -+ ElementTransformation &Trans, -+ Vector &shape) - { test_fe.CalcPhysShape(Trans, shape); } - -- inline virtual void CalcTrialShape(const FiniteElement &trial_fe, -- ElementTransformation &Trans, -- Vector &shape) -+ virtual void CalcTrialShape(const FiniteElement &trial_fe, -+ ElementTransformation &Trans, -+ Vector &shape) - { trial_fe.CalcPhysShape(Trans, shape); } - -- Coefficient *Q; -- - private: - #ifndef MFEM_THREAD_SAFE - Vector test_shape, trial_shape; -@@ -530,6 +547,11 @@ private: - class MixedVectorIntegrator: public BilinearFormIntegrator - { - public: -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const; -+ - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, - ElementTransformation &Trans, -@@ -547,6 +569,12 @@ protected: - /// result if given the same FiniteElement. The default is false. - bool same_calc_shape; - -+ int space_dim; -+ Coefficient *Q; -+ VectorCoefficient *VQ; -+ DiagonalMatrixCoefficient *DQ; -+ MatrixCoefficient *MQ; -+ - MixedVectorIntegrator() - : same_calc_shape(false), Q(NULL), VQ(NULL), DQ(NULL), MQ(NULL) {} - MixedVectorIntegrator(Coefficient &q) -@@ -557,47 +585,41 @@ protected: - MixedVectorIntegrator(MatrixCoefficient &mq) - : same_calc_shape(false), Q(NULL), VQ(NULL), DQ(NULL), MQ(&mq) {} - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -- test_fe.GetRangeType() == mfem::FiniteElement::VECTOR ); -+ test_fe.GetRangeType() == mfem::FiniteElement::VECTOR); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedVectorIntegrator: " - "Trial and test spaces must both be vector fields"; - } - -- inline virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -- const FiniteElement &test_fe, -- ElementTransformation &Trans) -+ virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const - { return trial_fe.GetOrder() + test_fe.GetOrder() + Trans.OrderW(); } - -- inline virtual int GetTestVDim(const FiniteElement &test_fe) -+ virtual int GetTestVDim(const FiniteElement &test_fe) - { return std::max(space_dim, test_fe.GetVDim()); } - -- inline virtual void CalcTestShape(const FiniteElement &test_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTestShape(const FiniteElement &test_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { test_fe.CalcVShape(Trans, shape); } - -- inline virtual int GetTrialVDim(const FiniteElement &trial_fe) -+ virtual int GetTrialVDim(const FiniteElement &trial_fe) - { return std::max(space_dim, trial_fe.GetVDim()); } - -- inline virtual void CalcTrialShape(const FiniteElement &trial_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTrialShape(const FiniteElement &trial_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { trial_fe.CalcVShape(Trans, shape); } - -- int space_dim; -- Coefficient *Q; -- VectorCoefficient *VQ; -- DiagonalMatrixCoefficient *DQ; -- MatrixCoefficient *MQ; -- - private: - #ifndef MFEM_THREAD_SAFE - Vector V, D; -@@ -611,6 +633,11 @@ private: - class MixedScalarVectorIntegrator: public BilinearFormIntegrator - { - public: -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const; -+ - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, - ElementTransformation &Trans, -@@ -627,24 +654,28 @@ public: - { AssembleElementMatrix2(fe, fe, Trans, elmat); } - - protected: -+ VectorCoefficient *VQ; -+ int space_dim; -+ bool transpose; -+ bool cross_2d; // In 2D use a cross product rather than a dot product -+ - MixedScalarVectorIntegrator(VectorCoefficient &vq, bool transpose_ = false, - bool cross_2d_ = false) - : VQ(&vq), transpose(transpose_), cross_2d(cross_2d_) {} - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return ((transpose && - trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -- test_fe.GetRangeType() == mfem::FiniteElement::SCALAR ) || -+ test_fe.GetRangeType() == mfem::FiniteElement::SCALAR) || - (!transpose && - trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && -- test_fe.GetRangeType() == mfem::FiniteElement::VECTOR ) -- ); -+ test_fe.GetRangeType() == mfem::FiniteElement::VECTOR)); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - if (transpose) - { -@@ -660,35 +691,28 @@ protected: - } - } - -- inline virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -- const FiniteElement &test_fe, -- ElementTransformation &Trans) -+ virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const - { return trial_fe.GetOrder() + test_fe.GetOrder() + Trans.OrderW(); } - -- inline virtual int GetVDim(const FiniteElement &vector_fe) -+ virtual int GetVDim(const FiniteElement &vector_fe) const - { return std::max(space_dim, vector_fe.GetVDim()); } - -- inline virtual void CalcVShape(const FiniteElement &vector_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape_) -+ virtual void CalcVShape(const FiniteElement &vector_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape_) - { vector_fe.CalcVShape(Trans, shape_); } - -- inline virtual void CalcShape(const FiniteElement &scalar_fe, -- ElementTransformation &Trans, -- Vector &shape_) -+ virtual void CalcShape(const FiniteElement &scalar_fe, -+ ElementTransformation &Trans, -+ Vector &shape_) - { scalar_fe.CalcPhysShape(Trans, shape_); } - -- VectorCoefficient *VQ; -- int space_dim; -- bool transpose; -- bool cross_2d; // In 2D use a cross product rather than a dot product -- - private: - #ifndef MFEM_THREAD_SAFE -- Vector V; -+ Vector V, shape, vshape_tmp; - DenseMatrix vshape; -- Vector shape; -- Vector vshape_tmp; - #endif - }; - -@@ -723,25 +747,25 @@ public: - : MixedScalarIntegrator(q) {} - - protected: -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetDim() == 1 && test_fe.GetDim() == 1 && -- trial_fe.GetDerivType() == mfem::FiniteElement::GRAD && -- test_fe.GetRangeType() == mfem::FiniteElement::SCALAR ); -+ trial_fe.GetDerivType() == mfem::FiniteElement::GRAD && -+ test_fe.GetRangeType() == mfem::FiniteElement::SCALAR); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarDerivativeIntegrator: " - "Trial and test spaces must both be scalar fields in 1D " - "and the trial space must implement CalcDShape."; - } - -- inline virtual void CalcTrialShape(const FiniteElement &trial_fe, -- ElementTransformation &Trans, -- Vector &shape) -+ virtual void CalcTrialShape(const FiniteElement &trial_fe, -+ ElementTransformation &Trans, -+ Vector &shape) - { - DenseMatrix dshape(shape.GetData(), shape.Size(), 1); - trial_fe.CalcPhysDShape(Trans, dshape); -@@ -758,16 +782,16 @@ public: - : MixedScalarIntegrator(q) {} - - protected: -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetDim() == 1 && test_fe.GetDim() == 1 && - trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && -- test_fe.GetDerivType() == mfem::FiniteElement::GRAD ); -+ test_fe.GetDerivType() == mfem::FiniteElement::GRAD); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarWeakDerivativeIntegrator: " - "Trial and test spaces must both be scalar fields in 1D " -@@ -775,9 +799,9 @@ protected: - "map type \"VALUE\"."; - } - -- inline virtual void CalcTestShape(const FiniteElement &test_fe, -- ElementTransformation &Trans, -- Vector &shape) -+ virtual void CalcTestShape(const FiniteElement &test_fe, -+ ElementTransformation &Trans, -+ Vector &shape) - { - DenseMatrix dshape(shape.GetData(), shape.Size(), 1); - test_fe.CalcPhysDShape(Trans, dshape); -@@ -796,29 +820,29 @@ public: - : MixedScalarIntegrator(q) {} - - protected: -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { -- return (trial_fe.GetDerivType() == mfem::FiniteElement::DIV && -- test_fe.GetRangeType() == mfem::FiniteElement::SCALAR ); -+ return (trial_fe.GetDerivType() == mfem::FiniteElement::DIV && -+ test_fe.GetRangeType() == mfem::FiniteElement::SCALAR); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarDivergenceIntegrator: " - "Trial must be H(Div) and the test space must be a " - "scalar field"; - } - -- inline virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -- const FiniteElement &test_fe, -- ElementTransformation &Trans) -+ virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const - { return trial_fe.GetOrder() + test_fe.GetOrder() + Trans.OrderW() - 1; } - -- inline virtual void CalcTrialShape(const FiniteElement &trial_fe, -- ElementTransformation &Trans, -- Vector &shape) -+ virtual void CalcTrialShape(const FiniteElement &trial_fe, -+ ElementTransformation &Trans, -+ Vector &shape) - { trial_fe.CalcPhysDivShape(Trans, shape); } - }; - -@@ -832,15 +856,15 @@ public: - : MixedScalarVectorIntegrator(vq) {} - - protected: -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { -- return (trial_fe.GetDerivType() == mfem::FiniteElement::DIV && -- test_fe.GetRangeType() == mfem::FiniteElement::VECTOR ); -+ return (trial_fe.GetDerivType() == mfem::FiniteElement::DIV && -+ test_fe.GetRangeType() == mfem::FiniteElement::VECTOR); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedVectorDivergenceIntegrator: " - "Trial must be H(Div) and the test space must be a " -@@ -849,14 +873,14 @@ protected: - - // Subtract one due to the divergence and add one for the coefficient - // which is assumed to be at least linear. -- inline virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -- const FiniteElement &test_fe, -- ElementTransformation &Trans) -+ virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const - { return trial_fe.GetOrder() + test_fe.GetOrder() + Trans.OrderW() - 1 + 1; } - -- inline virtual void CalcShape(const FiniteElement &scalar_fe, -- ElementTransformation &Trans, -- Vector &shape) -+ virtual void CalcShape(const FiniteElement &scalar_fe, -+ ElementTransformation &Trans, -+ Vector &shape) - { scalar_fe.CalcPhysDivShape(Trans, shape); } - }; - -@@ -871,24 +895,24 @@ public: - : MixedScalarIntegrator(q) {} - - protected: -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && -- test_fe.GetDerivType() == mfem::FiniteElement::DIV ); -+ test_fe.GetDerivType() == mfem::FiniteElement::DIV); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarWeakGradientIntegrator: " - "Trial space must be a scalar field " - "and the test space must be H(Div)"; - } - -- inline virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -- const FiniteElement &test_fe, -- ElementTransformation &Trans) -+ virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const - { return trial_fe.GetOrder() + test_fe.GetOrder() + Trans.OrderW() - 1; } - - virtual void CalcTestShape(const FiniteElement &test_fe, -@@ -911,7 +935,7 @@ public: - : MixedScalarIntegrator(q) {} - - protected: -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { -@@ -920,21 +944,21 @@ protected: - test_fe.GetRangeType() == mfem::FiniteElement::SCALAR); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarCurlIntegrator: " - "Trial must be H(Curl) and the test space must be a " - "scalar field"; - } - -- inline virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -- const FiniteElement &test_fe, -- ElementTransformation &Trans) -+ virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const - { return trial_fe.GetOrder() + test_fe.GetOrder() + Trans.OrderW() - 1; } - -- inline virtual void CalcTrialShape(const FiniteElement &trial_fe, -- ElementTransformation &Trans, -- Vector &shape) -+ virtual void CalcTrialShape(const FiniteElement &trial_fe, -+ ElementTransformation &Trans, -+ Vector &shape) - { - DenseMatrix dshape(shape.GetData(), shape.Size(), 1); - trial_fe.CalcPhysCurlShape(Trans, dshape); -@@ -968,25 +992,25 @@ public: - : MixedScalarIntegrator(q) {} - - protected: -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetDim() == 2 && test_fe.GetDim() == 2 && - trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && -- test_fe.GetDerivType() == mfem::FiniteElement::CURL ); -+ test_fe.GetDerivType() == mfem::FiniteElement::CURL); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarWeakCurlIntegrator: " - "Trial space must be a scalar field " - "and the test space must be H(Curl)"; - } - -- inline virtual void CalcTestShape(const FiniteElement &test_fe, -- ElementTransformation &Trans, -- Vector &shape) -+ virtual void CalcTestShape(const FiniteElement &test_fe, -+ ElementTransformation &Trans, -+ Vector &shape) - { - DenseMatrix dshape(shape.GetData(), shape.Size(), 1); - test_fe.CalcPhysCurlShape(Trans, dshape); -@@ -1026,15 +1050,15 @@ public: - MixedDotProductIntegrator(VectorCoefficient &vq) - : MixedScalarVectorIntegrator(vq, true) {} - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -- test_fe.GetRangeType() == mfem::FiniteElement::SCALAR ); -+ test_fe.GetRangeType() == mfem::FiniteElement::SCALAR); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedDotProductIntegrator: " - "Trial space must be a vector field " -@@ -1051,16 +1075,16 @@ public: - MixedWeakGradDotIntegrator(VectorCoefficient &vq) - : MixedScalarVectorIntegrator(vq, true) {} - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && - test_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -- test_fe.GetDerivType() == mfem::FiniteElement::DIV ); -+ test_fe.GetDerivType() == mfem::FiniteElement::DIV); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedWeakGradDotIntegrator: " - "Trial space must be a vector field " -@@ -1069,14 +1093,14 @@ public: - - // Subtract one due to the gradient and add one for the coefficient - // which is assumed to be at least linear. -- inline virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -- const FiniteElement &test_fe, -- ElementTransformation &Trans) -+ virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const - { return trial_fe.GetOrder() + test_fe.GetOrder() + Trans.OrderW() - 1 + 1; } - -- inline virtual void CalcShape(const FiniteElement &scalar_fe, -- ElementTransformation &Trans, -- Vector &shape) -+ virtual void CalcShape(const FiniteElement &scalar_fe, -+ ElementTransformation &Trans, -+ Vector &shape) - { scalar_fe.CalcPhysDivShape(Trans, shape); shape *= -1.0; } - }; - -@@ -1088,29 +1112,29 @@ public: - MixedWeakDivCrossIntegrator(VectorCoefficient &vq) - : MixedVectorIntegrator(vq, false) {} - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetVDim() == 3 && - trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && - test_fe.GetRangeType() == mfem::FiniteElement::SCALAR && -- test_fe.GetDerivType() == mfem::FiniteElement::GRAD ); -+ test_fe.GetDerivType() == mfem::FiniteElement::GRAD); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedWeakDivCrossIntegrator: " - "Trial space must be a vector field in 3D " - "and the test space must be a scalar field with a gradient"; - } - -- inline virtual int GetTestVDim(const FiniteElement &test_fe) -+ virtual int GetTestVDim(const FiniteElement &test_fe) - { return space_dim; } - -- inline virtual void CalcTestShape(const FiniteElement &test_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTestShape(const FiniteElement &test_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { test_fe.CalcPhysDShape(Trans, shape); shape *= -1.0; } - }; - -@@ -1128,26 +1152,26 @@ public: - MixedGradGradIntegrator(MatrixCoefficient &mq) - : MixedVectorIntegrator(mq) { same_calc_shape = true; } - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && - trial_fe.GetDerivType() == mfem::FiniteElement::GRAD && - test_fe.GetRangeType() == mfem::FiniteElement::SCALAR && -- test_fe.GetDerivType() == mfem::FiniteElement::GRAD ); -+ test_fe.GetDerivType() == mfem::FiniteElement::GRAD); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedGradGradIntegrator: " - "Trial and test spaces must both be scalar fields " - "with a gradient operator."; - } - -- inline virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -- const FiniteElement &test_fe, -- ElementTransformation &Trans) -+ virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const - { - // Same as DiffusionIntegrator - return test_fe.Space() == FunctionSpace::Pk ? -@@ -1155,20 +1179,20 @@ public: - trial_fe.GetOrder() + test_fe.GetOrder() + test_fe.GetDim() - 1; - } - -- inline virtual int GetTrialVDim(const FiniteElement &trial_fe) -+ virtual int GetTrialVDim(const FiniteElement &trial_fe) - { return space_dim; } - -- inline virtual void CalcTrialShape(const FiniteElement &trial_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTrialShape(const FiniteElement &trial_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { trial_fe.CalcPhysDShape(Trans, shape); } - -- inline virtual int GetTestVDim(const FiniteElement &test_fe) -+ virtual int GetTestVDim(const FiniteElement &test_fe) - { return space_dim; } - -- inline virtual void CalcTestShape(const FiniteElement &test_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTestShape(const FiniteElement &test_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { test_fe.CalcPhysDShape(Trans, shape); } - }; - -@@ -1180,37 +1204,37 @@ public: - MixedCrossGradGradIntegrator(VectorCoefficient &vq) - : MixedVectorIntegrator(vq, false) { same_calc_shape = true; } - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && - trial_fe.GetDerivType() == mfem::FiniteElement::GRAD && - test_fe.GetRangeType() == mfem::FiniteElement::SCALAR && -- test_fe.GetDerivType() == mfem::FiniteElement::GRAD ); -+ test_fe.GetDerivType() == mfem::FiniteElement::GRAD); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedCrossGradGradIntegrator: " - "Trial and test spaces must both be scalar fields " - "with a gradient operator."; - } - -- inline virtual int GetTrialVDim(const FiniteElement &trial_fe) -+ virtual int GetTrialVDim(const FiniteElement &trial_fe) - { return space_dim; } - -- inline virtual void CalcTrialShape(const FiniteElement &trial_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTrialShape(const FiniteElement &trial_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { trial_fe.CalcPhysDShape(Trans, shape); } - -- inline virtual int GetTestVDim(const FiniteElement &test_fe) -+ virtual int GetTestVDim(const FiniteElement &test_fe) - { return space_dim; } - -- inline virtual void CalcTestShape(const FiniteElement &test_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTestShape(const FiniteElement &test_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { test_fe.CalcPhysDShape(Trans, shape); } - }; - -@@ -1228,7 +1252,7 @@ public: - MixedCurlCurlIntegrator(MatrixCoefficient &mq) - : MixedVectorIntegrator(mq) { same_calc_shape = true; } - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { -@@ -1236,30 +1260,30 @@ public: - trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && - trial_fe.GetDerivType() == mfem::FiniteElement::CURL && - test_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -- test_fe.GetDerivType() == mfem::FiniteElement::CURL ); -+ test_fe.GetDerivType() == mfem::FiniteElement::CURL); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedCurlCurlIntegrator" - "Trial and test spaces must both be vector fields in 3D " - "with a curl."; - } - -- inline virtual int GetTrialVDim(const FiniteElement &trial_fe) -+ virtual int GetTrialVDim(const FiniteElement &trial_fe) - { return trial_fe.GetCurlDim(); } - -- inline virtual void CalcTrialShape(const FiniteElement &trial_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTrialShape(const FiniteElement &trial_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { trial_fe.CalcPhysCurlShape(Trans, shape); } - -- inline virtual int GetTestVDim(const FiniteElement &test_fe) -+ virtual int GetTestVDim(const FiniteElement &test_fe) - { return test_fe.GetCurlDim(); } - -- inline virtual void CalcTestShape(const FiniteElement &test_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTestShape(const FiniteElement &test_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { test_fe.CalcPhysCurlShape(Trans, shape); } - }; - -@@ -1271,7 +1295,7 @@ public: - MixedCrossCurlCurlIntegrator(VectorCoefficient &vq) - : MixedVectorIntegrator(vq, false) { same_calc_shape = true; } - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { -@@ -1280,30 +1304,30 @@ public: - trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && - trial_fe.GetDerivType() == mfem::FiniteElement::CURL && - test_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -- test_fe.GetDerivType() == mfem::FiniteElement::CURL ); -+ test_fe.GetDerivType() == mfem::FiniteElement::CURL); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedCrossCurlCurlIntegrator: " - "Trial and test spaces must both be vector fields in 3D " - "with a curl."; - } - -- inline virtual int GetTrialVDim(const FiniteElement &trial_fe) -+ virtual int GetTrialVDim(const FiniteElement &trial_fe) - { return trial_fe.GetCurlDim(); } - -- inline virtual void CalcTrialShape(const FiniteElement &trial_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTrialShape(const FiniteElement &trial_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { trial_fe.CalcPhysCurlShape(Trans, shape); } - -- inline virtual int GetTestVDim(const FiniteElement &test_fe) -+ virtual int GetTestVDim(const FiniteElement &test_fe) - { return test_fe.GetCurlDim(); } - -- inline virtual void CalcTestShape(const FiniteElement &test_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTestShape(const FiniteElement &test_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { test_fe.CalcPhysCurlShape(Trans, shape); } - }; - -@@ -1315,7 +1339,7 @@ public: - MixedCrossCurlGradIntegrator(VectorCoefficient &vq) - : MixedVectorIntegrator(vq, false) {} - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { -@@ -1323,30 +1347,30 @@ public: - trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && - trial_fe.GetDerivType() == mfem::FiniteElement::CURL && - test_fe.GetRangeType() == mfem::FiniteElement::SCALAR && -- test_fe.GetDerivType() == mfem::FiniteElement::GRAD ); -+ test_fe.GetDerivType() == mfem::FiniteElement::GRAD); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedCrossCurlGradIntegrator" - "Trial space must be a vector field in 3D with a curl" - "and the test space must be a scalar field with a gradient"; - } - -- inline virtual int GetTrialVDim(const FiniteElement &trial_fe) -+ virtual int GetTrialVDim(const FiniteElement &trial_fe) - { return trial_fe.GetCurlDim(); } - -- inline virtual void CalcTrialShape(const FiniteElement &trial_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTrialShape(const FiniteElement &trial_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { trial_fe.CalcPhysCurlShape(Trans, shape); } - -- inline virtual int GetTestVDim(const FiniteElement &test_fe) -+ virtual int GetTestVDim(const FiniteElement &test_fe) - { return space_dim; } - -- inline virtual void CalcTestShape(const FiniteElement &test_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTestShape(const FiniteElement &test_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { test_fe.CalcPhysDShape(Trans, shape); } - }; - -@@ -1358,7 +1382,7 @@ public: - MixedCrossGradCurlIntegrator(VectorCoefficient &vq) - : MixedVectorIntegrator(vq, false) {} - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { -@@ -1366,30 +1390,30 @@ public: - trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && - trial_fe.GetDerivType() == mfem::FiniteElement::GRAD && - test_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -- test_fe.GetDerivType() == mfem::FiniteElement::CURL ); -+ test_fe.GetDerivType() == mfem::FiniteElement::CURL); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedCrossGradCurlIntegrator" - "Trial space must be a scalar field in 3D with a gradient" - "and the test space must be a vector field with a curl"; - } - -- inline virtual int GetTrialVDim(const FiniteElement &trial_fe) -+ virtual int GetTrialVDim(const FiniteElement &trial_fe) - { return space_dim; } - -- inline virtual void CalcTrialShape(const FiniteElement &trial_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTrialShape(const FiniteElement &trial_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { trial_fe.CalcPhysDShape(Trans, shape); } - -- inline virtual int GetTestVDim(const FiniteElement &test_fe) -+ virtual int GetTestVDim(const FiniteElement &test_fe) - { return test_fe.GetCurlDim(); } - -- inline virtual void CalcTestShape(const FiniteElement &test_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTestShape(const FiniteElement &test_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { test_fe.CalcPhysCurlShape(Trans, shape); } - }; - -@@ -1402,29 +1426,29 @@ public: - MixedWeakCurlCrossIntegrator(VectorCoefficient &vq) - : MixedVectorIntegrator(vq, false) {} - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetVDim() == 3 && test_fe.GetCurlDim() == 3 && - trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && - test_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -- test_fe.GetDerivType() == mfem::FiniteElement::CURL ); -+ test_fe.GetDerivType() == mfem::FiniteElement::CURL); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedWeakCurlCrossIntegrator: " - "Trial space must be a vector field in 3D " - "and the test space must be a vector field with a curl"; - } - -- inline virtual int GetTestVDim(const FiniteElement &test_fe) -+ virtual int GetTestVDim(const FiniteElement &test_fe) - { return test_fe.GetCurlDim(); } - -- inline virtual void CalcTestShape(const FiniteElement &test_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTestShape(const FiniteElement &test_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { test_fe.CalcPhysCurlShape(Trans, shape); } - }; - -@@ -1437,26 +1461,26 @@ public: - MixedScalarWeakCurlCrossIntegrator(VectorCoefficient &vq) - : MixedScalarVectorIntegrator(vq, true, true) {} - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetDim() == 2 && test_fe.GetDim() == 2 && - trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && - test_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -- test_fe.GetDerivType() == mfem::FiniteElement::CURL ); -+ test_fe.GetDerivType() == mfem::FiniteElement::CURL); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarWeakCurlCrossIntegrator: " - "Trial space must be a vector field in 2D " - "and the test space must be a vector field with a curl"; - } - -- inline virtual void CalcShape(const FiniteElement &scalar_fe, -- ElementTransformation &Trans, -- Vector &shape) -+ virtual void CalcShape(const FiniteElement &scalar_fe, -+ ElementTransformation &Trans, -+ Vector &shape) - { - DenseMatrix dshape(shape.GetData(), shape.Size(), 1); - scalar_fe.CalcPhysCurlShape(Trans, dshape); -@@ -1472,34 +1496,34 @@ public: - MixedCrossGradIntegrator(VectorCoefficient &vq) - : MixedVectorIntegrator(vq, false) {} - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (test_fe.GetVDim() == 3 && - trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && - trial_fe.GetDerivType() == mfem::FiniteElement::GRAD && -- test_fe.GetRangeType() == mfem::FiniteElement::VECTOR ); -+ test_fe.GetRangeType() == mfem::FiniteElement::VECTOR); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedCrossGradIntegrator: " - "Trial space must be a scalar field with a gradient operator" - " and the test space must be a vector field both in 3D."; - } - -- inline virtual int GetTrialVDim(const FiniteElement &trial_fe) -+ virtual int GetTrialVDim(const FiniteElement &trial_fe) - { return space_dim; } - -- inline virtual void CalcTrialShape(const FiniteElement &trial_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTrialShape(const FiniteElement &trial_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { trial_fe.CalcPhysDShape(Trans, shape); } - -- inline virtual void CalcTestShape(const FiniteElement &test_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTestShape(const FiniteElement &test_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { test_fe.CalcVShape(Trans, shape); } - }; - -@@ -1512,29 +1536,29 @@ public: - MixedCrossCurlIntegrator(VectorCoefficient &vq) - : MixedVectorIntegrator(vq, false) {} - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetCurlDim() == 3 && test_fe.GetVDim() == 3 && - trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -- trial_fe.GetDerivType() == mfem::FiniteElement::CURL && -- test_fe.GetRangeType() == mfem::FiniteElement::VECTOR ); -+ trial_fe.GetDerivType() == mfem::FiniteElement::CURL && -+ test_fe.GetRangeType() == mfem::FiniteElement::VECTOR); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedCrossCurlIntegrator: " - "Trial space must be a vector field in 3D with a curl " - "and the test space must be a vector field"; - } - -- inline virtual int GetTrialVDim(const FiniteElement &trial_fe) -+ virtual int GetTrialVDim(const FiniteElement &trial_fe) - { return trial_fe.GetCurlDim(); } - -- inline virtual void CalcTrialShape(const FiniteElement &trial_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTrialShape(const FiniteElement &trial_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { trial_fe.CalcPhysCurlShape(Trans, shape); } - }; - -@@ -1547,26 +1571,26 @@ public: - MixedScalarCrossCurlIntegrator(VectorCoefficient &vq) - : MixedScalarVectorIntegrator(vq, false, true) {} - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetDim() == 2 && test_fe.GetDim() == 2 && - trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -- trial_fe.GetDerivType() == mfem::FiniteElement::CURL && -- test_fe.GetRangeType() == mfem::FiniteElement::VECTOR ); -+ trial_fe.GetDerivType() == mfem::FiniteElement::CURL && -+ test_fe.GetRangeType() == mfem::FiniteElement::VECTOR); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedCrossCurlIntegrator: " - "Trial space must be a vector field in 2D with a curl " - "and the test space must be a vector field"; - } - -- inline virtual void CalcShape(const FiniteElement &scalar_fe, -- ElementTransformation &Trans, -- Vector &shape) -+ virtual void CalcShape(const FiniteElement &scalar_fe, -+ ElementTransformation &Trans, -+ Vector &shape) - { - DenseMatrix dshape(shape.GetData(), shape.Size(), 1); - scalar_fe.CalcPhysCurlShape(Trans, dshape); shape *= -1.0; -@@ -1581,29 +1605,29 @@ public: - MixedScalarCrossGradIntegrator(VectorCoefficient &vq) - : MixedScalarVectorIntegrator(vq, true, true) {} - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetDim() == 2 && test_fe.GetDim() == 2 && - trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && -- trial_fe.GetDerivType() == mfem::FiniteElement::GRAD && -- test_fe.GetRangeType() == mfem::FiniteElement::SCALAR ); -+ trial_fe.GetDerivType() == mfem::FiniteElement::GRAD && -+ test_fe.GetRangeType() == mfem::FiniteElement::SCALAR); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarCrossGradIntegrator: " - "Trial space must be a scalar field in 2D with a gradient " - "and the test space must be a scalar field"; - } - -- inline int GetVDim(const FiniteElement &vector_fe) -+ virtual int GetVDim(const FiniteElement &vector_fe) const - { return space_dim; } - -- inline virtual void CalcVShape(const FiniteElement &vector_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcVShape(const FiniteElement &vector_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { vector_fe.CalcPhysDShape(Trans, shape); } - }; - -@@ -1615,16 +1639,16 @@ public: - MixedScalarCrossProductIntegrator(VectorCoefficient &vq) - : MixedScalarVectorIntegrator(vq, true, true) {} - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetDim() == 2 && test_fe.GetDim() == 2 && - trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -- test_fe.GetRangeType() == mfem::FiniteElement::SCALAR ); -+ test_fe.GetRangeType() == mfem::FiniteElement::SCALAR); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarCrossProductIntegrator: " - "Trial space must be a vector field in 2D " -@@ -1640,25 +1664,25 @@ public: - MixedScalarWeakCrossProductIntegrator(VectorCoefficient &vq) - : MixedScalarVectorIntegrator(vq, false, true) {} - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetDim() == 2 && test_fe.GetDim() == 2 && - trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && -- test_fe.GetRangeType() == mfem::FiniteElement::VECTOR ); -+ test_fe.GetRangeType() == mfem::FiniteElement::VECTOR); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarWeakCrossProductIntegrator: " - "Trial space must be a scalar field in 2D " - "and the test space must be a vector field"; - } - -- inline virtual void CalcShape(const FiniteElement &scalar_fe, -- ElementTransformation &Trans, -- Vector &shape) -+ virtual void CalcShape(const FiniteElement &scalar_fe, -+ ElementTransformation &Trans, -+ Vector &shape) - { scalar_fe.CalcPhysShape(Trans, shape); shape *= -1.0; } - }; - -@@ -1670,28 +1694,28 @@ public: - MixedDirectionalDerivativeIntegrator(VectorCoefficient &vq) - : MixedScalarVectorIntegrator(vq, true) {} - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && -- trial_fe.GetDerivType() == mfem::FiniteElement::GRAD && -- test_fe.GetRangeType() == mfem::FiniteElement::SCALAR ); -+ trial_fe.GetDerivType() == mfem::FiniteElement::GRAD && -+ test_fe.GetRangeType() == mfem::FiniteElement::SCALAR); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedDirectionalDerivativeIntegrator: " - "Trial space must be a scalar field with a gradient " - "and the test space must be a scalar field"; - } - -- inline virtual int GetVDim(const FiniteElement &vector_fe) -+ virtual int GetVDim(const FiniteElement &vector_fe) const - { return space_dim; } - -- inline virtual void CalcVShape(const FiniteElement &vector_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcVShape(const FiniteElement &vector_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { vector_fe.CalcPhysDShape(Trans, shape); } - }; - -@@ -1703,34 +1727,34 @@ public: - MixedGradDivIntegrator(VectorCoefficient &vq) - : MixedScalarVectorIntegrator(vq, true) {} - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && -- trial_fe.GetDerivType() == mfem::FiniteElement::GRAD && -+ trial_fe.GetDerivType() == mfem::FiniteElement::GRAD && - test_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -- test_fe.GetDerivType() == mfem::FiniteElement::DIV ); -+ test_fe.GetDerivType() == mfem::FiniteElement::DIV); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedGradDivIntegrator: " - "Trial space must be a scalar field with a gradient" - "and the test space must be a vector field with a divergence"; - } - -- inline virtual int GetVDim(const FiniteElement &vector_fe) -+ virtual int GetVDim(const FiniteElement &vector_fe) const - { return space_dim; } - -- inline virtual void CalcVShape(const FiniteElement &vector_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcVShape(const FiniteElement &vector_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { vector_fe.CalcPhysDShape(Trans, shape); shape *= -1.0; } - -- inline virtual void CalcShape(const FiniteElement &scalar_fe, -- ElementTransformation &Trans, -- Vector &shape) -+ virtual void CalcShape(const FiniteElement &scalar_fe, -+ ElementTransformation &Trans, -+ Vector &shape) - { scalar_fe.CalcPhysDivShape(Trans, shape); } - }; - -@@ -1742,35 +1766,34 @@ public: - MixedDivGradIntegrator(VectorCoefficient &vq) - : MixedScalarVectorIntegrator(vq, false) {} - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -- trial_fe.GetDerivType() == mfem::FiniteElement::DIV && -+ trial_fe.GetDerivType() == mfem::FiniteElement::DIV && - test_fe.GetRangeType() == mfem::FiniteElement::SCALAR && -- test_fe.GetDerivType() == mfem::FiniteElement::GRAD -- ); -+ test_fe.GetDerivType() == mfem::FiniteElement::GRAD); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedDivGradIntegrator: " - "Trial space must be a vector field with a divergence" - "and the test space must be a scalar field with a gradient"; - } - -- inline virtual int GetVDim(const FiniteElement &vector_fe) -+ virtual int GetVDim(const FiniteElement &vector_fe) const - { return space_dim; } - -- inline virtual void CalcVShape(const FiniteElement &vector_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcVShape(const FiniteElement &vector_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { vector_fe.CalcPhysDShape(Trans, shape); shape *= -1.0; } - -- inline virtual void CalcShape(const FiniteElement &scalar_fe, -- ElementTransformation &Trans, -- Vector &shape) -+ virtual void CalcShape(const FiniteElement &scalar_fe, -+ ElementTransformation &Trans, -+ Vector &shape) - { scalar_fe.CalcPhysDivShape(Trans, shape); } - }; - -@@ -1782,28 +1805,28 @@ public: - MixedScalarWeakDivergenceIntegrator(VectorCoefficient &vq) - : MixedScalarVectorIntegrator(vq, false) {} - -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && - test_fe.GetRangeType() == mfem::FiniteElement::SCALAR && -- test_fe.GetDerivType() == mfem::FiniteElement::GRAD ); -+ test_fe.GetDerivType() == mfem::FiniteElement::GRAD); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarWeakDivergenceIntegrator: " - "Trial space must be a scalar field " - "and the test space must be a scalar field with a gradient"; - } - -- inline int GetVDim(const FiniteElement &vector_fe) -+ virtual int GetVDim(const FiniteElement &vector_fe) const - { return space_dim; } - -- inline virtual void CalcVShape(const FiniteElement &vector_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcVShape(const FiniteElement &vector_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { vector_fe.CalcPhysDShape(Trans, shape); shape *= -1.0; } - }; - -@@ -1824,40 +1847,57 @@ public: - MixedVectorGradientIntegrator(MatrixCoefficient &mq) - : MixedVectorIntegrator(mq) {} - -+ virtual bool SupportsCeed() const { return DeviceCanUseCeed(); } -+ - using BilinearFormIntegrator::AssemblePA; - virtual void AssemblePA(const FiniteElementSpace &trial_fes, - const FiniteElementSpace &test_fes); - -+ using BilinearFormIntegrator::AssemblePABoundary; -+ virtual void AssemblePABoundary(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes); -+ - virtual void AddMultPA(const Vector &x, Vector &y) const; - - virtual void AddMultTransposePA(const Vector &x, Vector &y) const; - -+ using BilinearFormIntegrator::AssembleMF; -+ virtual void AssembleMF(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes); -+ -+ using BilinearFormIntegrator::AssembleMFBoundary; -+ virtual void AssembleMFBoundary(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes); -+ -+ virtual void AddMultMF(const Vector &x, Vector &y) const; -+ - protected: -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetDerivType() == mfem::FiniteElement::GRAD && -- test_fe.GetRangeType() == mfem::FiniteElement::VECTOR ); -+ test_fe.GetRangeType() == mfem::FiniteElement::VECTOR); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedVectorGradientIntegrator: " - "Trial spaces must be H1 and the test space must be a " - "vector field in 2D or 3D"; - } - -- inline virtual int GetTrialVDim(const FiniteElement &trial_fe) -+ virtual int GetTrialVDim(const FiniteElement &trial_fe) - { return space_dim; } - -- inline virtual void CalcTrialShape(const FiniteElement &trial_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTrialShape(const FiniteElement &trial_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { - trial_fe.CalcPhysDShape(Trans, shape); - } - -+private: - DenseMatrix Jinv; - - // PA extension -@@ -1882,6 +1922,8 @@ public: - MixedVectorCurlIntegrator(MatrixCoefficient &mq) - : MixedVectorIntegrator(mq) {} - -+ virtual bool SupportsCeed() const { return DeviceCanUseCeed(); } -+ - using BilinearFormIntegrator::AssemblePA; - virtual void AssemblePA(const FiniteElementSpace &trial_fes, - const FiniteElementSpace &test_fes); -@@ -1890,29 +1932,35 @@ public: - - virtual void AddMultTransposePA(const Vector &x, Vector &y) const; - -+ using BilinearFormIntegrator::AssembleMF; -+ virtual void AssembleMF(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes); -+ -+ virtual void AddMultMF(const Vector &x, Vector &y) const; -+ - protected: -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetCurlDim() == 3 && test_fe.GetVDim() == 3 && -- trial_fe.GetDerivType() == mfem::FiniteElement::CURL && -- test_fe.GetRangeType() == mfem::FiniteElement::VECTOR ); -+ trial_fe.GetDerivType() == mfem::FiniteElement::CURL && -+ test_fe.GetRangeType() == mfem::FiniteElement::VECTOR); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedVectorCurlIntegrator: " - "Trial space must be H(Curl) and the test space must be a " - "vector field in 3D"; - } - -- inline virtual int GetTrialVDim(const FiniteElement &trial_fe) -+ virtual int GetTrialVDim(const FiniteElement &trial_fe) - { return trial_fe.GetCurlDim(); } - -- inline virtual void CalcTrialShape(const FiniteElement &trial_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTrialShape(const FiniteElement &trial_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { - trial_fe.CalcPhysCurlShape(Trans, shape); - } -@@ -1942,6 +1990,8 @@ public: - MixedVectorWeakCurlIntegrator(MatrixCoefficient &mq) - : MixedVectorIntegrator(mq) {} - -+ virtual bool SupportsCeed() const { return DeviceCanUseCeed(); } -+ - using BilinearFormIntegrator::AssemblePA; - virtual void AssemblePA(const FiniteElementSpace &trial_fes, - const FiniteElementSpace &test_fes); -@@ -1950,29 +2000,35 @@ public: - - virtual void AddMultTransposePA(const Vector &x, Vector &y) const; - -+ using BilinearFormIntegrator::AssembleMF; -+ virtual void AssembleMF(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes); -+ -+ virtual void AddMultMF(const Vector &x, Vector &y) const; -+ - protected: -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetVDim() == 3 && test_fe.GetCurlDim() == 3 && - trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -- test_fe.GetDerivType() == mfem::FiniteElement::CURL ); -+ test_fe.GetDerivType() == mfem::FiniteElement::CURL); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedVectorWeakCurlIntegrator: " - "Trial space must be vector field in 3D and the " - "test space must be H(Curl)"; - } - -- inline virtual int GetTestVDim(const FiniteElement &test_fe) -+ virtual int GetTestVDim(const FiniteElement &test_fe) - { return test_fe.GetCurlDim(); } - -- inline virtual void CalcTestShape(const FiniteElement &test_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTestShape(const FiniteElement &test_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { - test_fe.CalcPhysCurlShape(Trans, shape); - } -@@ -2000,28 +2056,50 @@ public: - MixedVectorWeakDivergenceIntegrator(MatrixCoefficient &mq) - : MixedVectorIntegrator(mq) {} - -+ virtual bool SupportsCeed() const { return DeviceCanUseCeed(); } -+ -+ using BilinearFormIntegrator::AssemblePA; -+ virtual void AssemblePA(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes); -+ -+ using BilinearFormIntegrator::AssemblePABoundary; -+ virtual void AssemblePABoundary(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes); -+ -+ virtual void AddMultPA(const Vector &x, Vector &y) const; -+ -+ using BilinearFormIntegrator::AssembleMF; -+ virtual void AssembleMF(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes); -+ -+ using BilinearFormIntegrator::AssembleMFBoundary; -+ virtual void AssembleMFBoundary(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes); -+ -+ virtual void AddMultMF(const Vector &x, Vector &y) const; -+ - protected: -- inline virtual bool VerifyFiniteElementTypes( -+ virtual bool VerifyFiniteElementTypes( - const FiniteElement &trial_fe, - const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -- test_fe.GetDerivType() == mfem::FiniteElement::GRAD ); -+ test_fe.GetDerivType() == mfem::FiniteElement::GRAD); - } - -- inline virtual const char *FiniteElementTypeFailureMessage() const -+ virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedVectorWeakDivergenceIntegrator: " - "Trial space must be vector field and the " - "test space must be H1"; - } - -- inline virtual int GetTestVDim(const FiniteElement &test_fe) -+ virtual int GetTestVDim(const FiniteElement &test_fe) - { return space_dim; } - -- inline virtual void CalcTestShape(const FiniteElement &test_fe, -- ElementTransformation &Trans, -- DenseMatrix &shape) -+ virtual void CalcTestShape(const FiniteElement &test_fe, -+ ElementTransformation &Trans, -+ DenseMatrix &shape) - { - test_fe.CalcPhysDShape(Trans, shape); - shape *= -1.0; -@@ -2063,6 +2141,11 @@ public: - Q{&q}, trial_maps{NULL}, test_maps{NULL}, geom{NULL} - {} - -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const; -+ - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, - ElementTransformation &Trans, -@@ -2075,10 +2158,6 @@ public: - virtual void AddMultPA(const Vector &x, Vector &y) const; - - virtual void AddMultTransposePA(const Vector &x, Vector &y) const; -- -- static const IntegrationRule &GetRule(const FiniteElement &trial_fe, -- const FiniteElement &test_fe, -- ElementTransformation &Trans); - }; - - /** Class for integrating the bilinear form a(u,v) := (Q grad u, grad v) where Q -@@ -2128,6 +2207,18 @@ public: - : BilinearFormIntegrator(ir), - Q(NULL), VQ(NULL), MQ(&q), maps(NULL), geom(NULL) {} - -+ virtual bool SupportsCeed() const { return DeviceCanUseCeed(); } -+ -+ static const IntegrationRule &GetRuleStatic(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans); -+ -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const -+ { return GetRuleStatic(trial_fe, test_fe, Trans); } -+ - /** Given a particular Finite Element computes the element stiffness matrix - elmat. */ - virtual void AssembleElementMatrix(const FiniteElement &el, -@@ -2159,6 +2250,9 @@ public: - using BilinearFormIntegrator::AssemblePA; - virtual void AssemblePA(const FiniteElementSpace &fes); - -+ using BilinearFormIntegrator::AssemblePABoundary; -+ virtual void AssemblePABoundary(const FiniteElementSpace &fes); -+ - virtual void AssembleDiagonalPA(Vector &diag); - - virtual void AddMultPA(const Vector &x, Vector &y) const; -@@ -2168,6 +2262,9 @@ public: - using BilinearFormIntegrator::AssembleMF; - virtual void AssembleMF(const FiniteElementSpace &fes); - -+ using BilinearFormIntegrator::AssembleMFBoundary; -+ virtual void AssembleMFBoundary(const FiniteElementSpace &fes); -+ - virtual void AssembleDiagonalMF(Vector &diag); - - virtual void AddMultMF(const Vector &x, Vector &y) const; -@@ -2175,11 +2272,6 @@ public: - using BilinearFormIntegrator::AssembleEA; - virtual void AssembleEA(const FiniteElementSpace &fes, Vector &emat); - -- static const IntegrationRule &GetRule(const FiniteElement &trial_fe, -- const FiniteElement &test_fe); -- -- bool SupportsCeed() const { return DeviceCanUseCeed(); } -- - Coefficient *GetCoefficient() const { return Q; } - }; - -@@ -2210,6 +2302,19 @@ public: - MassIntegrator(Coefficient &q, const IntegrationRule *ir = NULL) - : BilinearFormIntegrator(ir), Q(&q), maps(NULL), geom(NULL) {} - -+ virtual bool SupportsCeed() const { return DeviceCanUseCeed(); } -+ -+ static const IntegrationRule &GetRuleStatic(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans, -+ int Q_order = 0); -+ -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const -+ { return GetRuleStatic(trial_fe, test_fe, Trans); } -+ - /** Given a particular Finite Element computes the element mass matrix - elmat. */ - virtual void AssembleElementMatrix(const FiniteElement &el, -@@ -2236,6 +2341,9 @@ public: - using BilinearFormIntegrator::AssembleMF; - virtual void AssembleMF(const FiniteElementSpace &fes); - -+ using BilinearFormIntegrator::AssembleMFBoundary; -+ virtual void AssembleMFBoundary(const FiniteElementSpace &fes); -+ - virtual void AssembleDiagonalMF(Vector &diag); - - virtual void AddMultMF(const Vector &x, Vector &y) const; -@@ -2243,12 +2351,6 @@ public: - using BilinearFormIntegrator::AssembleEA; - virtual void AssembleEA(const FiniteElementSpace &fes, Vector &emat); - -- static const IntegrationRule &GetRule(const FiniteElement &trial_fe, -- const FiniteElement &test_fe, -- ElementTransformation &Trans); -- -- bool SupportsCeed() const { return DeviceCanUseCeed(); } -- - const Coefficient *GetCoefficient() const { return Q; } - }; - -@@ -2258,7 +2360,10 @@ class BoundaryMassIntegrator : public MassIntegrator - public: - BoundaryMassIntegrator(Coefficient &q) : MassIntegrator(q) {} - -- using BilinearFormIntegrator::AssembleFaceMatrix; -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &el1, -+ const FiniteElement &el2, -+ FaceElementTransformations &Trans) const; - - virtual void AssembleFaceMatrix(const FiniteElement &el1, - const FiniteElement &el2, -@@ -2289,6 +2394,13 @@ public: - ConvectionIntegrator(VectorCoefficient &q, double a = 1.0) - : Q(&q) { alpha = a; } - -+ virtual bool SupportsCeed() const { return DeviceCanUseCeed(); } -+ -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const; -+ - virtual void AssembleElementMatrix(const FiniteElement &fes, - ElementTransformation &Trans, - DenseMatrix &elmat); -@@ -2296,6 +2408,9 @@ public: - using BilinearFormIntegrator::AssemblePA; - virtual void AssemblePA(const FiniteElementSpace &fes); - -+ using BilinearFormIntegrator::AssemblePABoundary; -+ virtual void AssemblePABoundary(const FiniteElementSpace &fes); -+ - virtual void AssembleDiagonalPA(Vector &diag); - - virtual void AddMultPA(const Vector &x, Vector &y) const; -@@ -2305,17 +2420,15 @@ public: - using BilinearFormIntegrator::AssembleMF; - virtual void AssembleMF(const FiniteElementSpace &fes); - -+ using BilinearFormIntegrator::AssembleMFBoundary; -+ virtual void AssembleMFBoundary(const FiniteElementSpace &fes); -+ - virtual void AssembleDiagonalMF(Vector &diag); - - virtual void AddMultMF(const Vector &x, Vector &y) const; - - using BilinearFormIntegrator::AssembleEA; - virtual void AssembleEA(const FiniteElementSpace &fes, Vector &emat); -- -- static const IntegrationRule &GetRule(const FiniteElement &fe, -- ElementTransformation &Trans); -- -- bool SupportsCeed() const { return DeviceCanUseCeed(); } - }; - - // Alias for @ConvectionIntegrator. -@@ -2349,33 +2462,154 @@ public: - DenseMatrix &); - }; - --/** Class for integrating the bilinear form a(u,v) := (Q u, v), -- where u=(u1,...,un) and v=(v1,...,vn); ui and vi are defined -- by scalar FE through standard transformation. */ --class VectorMassIntegrator: public BilinearFormIntegrator --{ --private: -- int vdim; -- Vector shape, te_shape, vec; -- DenseMatrix partelmat; -- DenseMatrix mcoeff; -- int Q_order; -+/** Integrator for - -+ (Q grad u, grad v) = sum_i (Q grad u_i, grad v_i) e_i e_i^T -+ -+ for vector FE spaces, where e_i is the unit vector in the i-th direction. -+ The resulting local element matrix is square, of size vdim*dof , -+ where \c vdim is the vector dimension space and \c dof is the local degrees -+ of freedom. The integrator is not aware of the true vector dimension and -+ must use \c VectorCoefficient, \c MatrixCoefficient, or a caller-specified -+ value to determine the vector space. For a scalar coefficient, the caller -+ may manually specify the vector dimension or the vector dimension is assumed -+ to be the spatial dimension (i.e. 2-dimension or 3-dimension). -+*/ -+class VectorDiffusionIntegrator : public BilinearFormIntegrator -+{ - protected: -- Coefficient *Q; -- VectorCoefficient *VQ; -- MatrixCoefficient *MQ; -+ Coefficient *Q = NULL; -+ VectorCoefficient *VQ = NULL; -+ MatrixCoefficient *MQ = NULL; - - // PA extension -- Vector pa_data; - const DofToQuad *maps; ///< Not owned - const GeometricFactors *geom; ///< Not owned -- int dim, ne, nq, dofs1D, quad1D; -+ int dim, sdim, ne, dofs1D, quad1D; -+ Vector pa_data; -+ -+private: -+ DenseMatrix dshape, dshapedxt, pelmat; -+ int vdim = -1; -+ DenseMatrix mcoeff; -+ Vector vcoeff; - - public: -- /// Construct an integrator with coefficient 1.0 -- VectorMassIntegrator() -- : vdim(-1), Q_order(0), Q(NULL), VQ(NULL), MQ(NULL) {} -+ VectorDiffusionIntegrator() {} -+ -+ /** \brief Integrator with unit coefficient for caller-specified vector -+ dimension. -+ -+ If the vector dimension does not match the true dimension of the space, -+ the resulting element matrix will be mathematically invalid. */ -+ VectorDiffusionIntegrator(int vector_dimension) -+ : vdim(vector_dimension) {} -+ -+ VectorDiffusionIntegrator(Coefficient &q) -+ : Q(&q) {} -+ -+ VectorDiffusionIntegrator(Coefficient &q, const IntegrationRule *ir) -+ : BilinearFormIntegrator(ir), Q(&q) {} -+ -+ /** \brief Integrator with scalar coefficient for caller-specified vector -+ dimension. -+ -+ The element matrix is block-diagonal with \c vdim copies of the element -+ matrix integrated with the \c Coefficient. -+ -+ If the vector dimension does not match the true dimension of the space, -+ the resulting element matrix will be mathematically invalid. */ -+ VectorDiffusionIntegrator(Coefficient &q, int vector_dimension) -+ : Q(&q), vdim(vector_dimension) {} -+ -+ /** \brief Integrator with \c VectorCoefficient. The vector dimension of the -+ \c FiniteElementSpace is assumed to be the same as the dimension of the -+ \c Vector. -+ -+ The element matrix is block-diagonal and each block is integrated with -+ coefficient q_i. -+ -+ If the vector dimension does not match the true dimension of the space, -+ the resulting element matrix will be mathematically invalid. */ -+ VectorDiffusionIntegrator(VectorCoefficient &vq) -+ : VQ(&vq), vdim(vq.GetVDim()) {} -+ -+ /** \brief Integrator with \c MatrixCoefficient. The vector dimension of the -+ \c FiniteElementSpace is assumed to be the same as the dimension of the -+ \c Matrix. -+ -+ The element matrix is populated in each block. Each block is integrated -+ with coefficient q_ij. -+ -+ If the vector dimension does not match the true dimension of the space, -+ the resulting element matrix will be mathematically invalid. */ -+ VectorDiffusionIntegrator(MatrixCoefficient& mq) -+ : MQ(&mq), vdim(mq.GetVDim()) {} -+ -+ virtual bool SupportsCeed() const { return DeviceCanUseCeed(); } -+ -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const -+ { return DiffusionIntegrator::GetRuleStatic(trial_fe, test_fe, Trans); } -+ -+ virtual void AssembleElementMatrix(const FiniteElement &el, -+ ElementTransformation &Trans, -+ DenseMatrix &elmat); -+ -+ virtual void AssembleElementVector(const FiniteElement &el, -+ ElementTransformation &Tr, -+ const Vector &elfun, Vector &elvect); -+ -+ using BilinearFormIntegrator::AssemblePA; -+ virtual void AssemblePA(const FiniteElementSpace &fes); -+ -+ using BilinearFormIntegrator::AssemblePABoundary; -+ virtual void AssemblePABoundary(const FiniteElementSpace &fes); -+ -+ virtual void AssembleDiagonalPA(Vector &diag); -+ -+ virtual void AddMultPA(const Vector &x, Vector &y) const; -+ -+ using BilinearFormIntegrator::AssembleMF; -+ virtual void AssembleMF(const FiniteElementSpace &fes); -+ -+ using BilinearFormIntegrator::AssembleMFBoundary; -+ virtual void AssembleMFBoundary(const FiniteElementSpace &fes); -+ -+ virtual void AssembleDiagonalMF(Vector &diag); -+ -+ virtual void AddMultMF(const Vector &x, Vector &y) const; -+}; -+ -+/** Class for integrating the bilinear form a(u,v) := (Q u, v), -+ where u=(u1,...,un) and v=(v1,...,vn); ui and vi are defined -+ by scalar FE through standard transformation. */ -+class VectorMassIntegrator: public BilinearFormIntegrator -+{ -+private: -+ int vdim; -+ Vector shape, te_shape, vec; -+ DenseMatrix partelmat; -+ DenseMatrix mcoeff; -+ int Q_order; -+ -+protected: -+ Coefficient *Q; -+ VectorCoefficient *VQ; -+ MatrixCoefficient *MQ; -+ -+ // PA extension -+ Vector pa_data; -+ const DofToQuad *maps; ///< Not owned -+ const GeometricFactors *geom; ///< Not owned -+ int dim, ne, nq, dofs1D, quad1D; -+ -+public: -+ /// Construct an integrator with coefficient 1.0 -+ VectorMassIntegrator() -+ : vdim(-1), Q_order(0), Q(NULL), VQ(NULL), MQ(NULL) {} - /** Construct an integrator with scalar coefficient q. If possible, save - memory by using a scalar integrator since the resulting matrix is block - diagonal with the same diagonal block repeated. */ -@@ -2394,6 +2628,14 @@ public: - int GetVDim() const { return vdim; } - void SetVDim(int vdim_) { vdim = vdim_; } - -+ virtual bool SupportsCeed() const { return DeviceCanUseCeed(); } -+ -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const -+ { return MassIntegrator::GetRuleStatic(trial_fe, test_fe, Trans, Q_order); } -+ - virtual void AssembleElementMatrix(const FiniteElement &el, - ElementTransformation &Trans, - DenseMatrix &elmat); -@@ -2406,6 +2648,9 @@ public: - using BilinearFormIntegrator::AssemblePA; - virtual void AssemblePA(const FiniteElementSpace &fes); - -+ using BilinearFormIntegrator::AssemblePABoundary; -+ virtual void AssemblePABoundary(const FiniteElementSpace &fes); -+ - virtual void AssembleDiagonalPA(Vector &diag); - - virtual void AddMultPA(const Vector &x, Vector &y) const; -@@ -2413,11 +2658,12 @@ public: - using BilinearFormIntegrator::AssembleMF; - virtual void AssembleMF(const FiniteElementSpace &fes); - -+ using BilinearFormIntegrator::AssembleMFBoundary; -+ virtual void AssembleMFBoundary(const FiniteElementSpace &fes); -+ - virtual void AssembleDiagonalMF(Vector &diag); - - virtual void AddMultMF(const Vector &x, Vector &y) const; -- -- bool SupportsCeed() const { return DeviceCanUseCeed(); } - }; - - /** Class for integrating (div u, p) where u is a vector field given by -@@ -2430,14 +2676,14 @@ public: - ElementTransformation Trans. */ - class VectorFEDivergenceIntegrator : public BilinearFormIntegrator - { --protected: -- Coefficient *Q; -- - private: - #ifndef MFEM_THREAD_SAFE - Vector divshape, shape; - #endif - -+protected: -+ Coefficient *Q; -+ - // PA extension - Vector pa_data; - const DofToQuad *mapsO; ///< Not owned. DOF-to-quad map, open. -@@ -2449,9 +2695,10 @@ public: - VectorFEDivergenceIntegrator() { Q = NULL; } - VectorFEDivergenceIntegrator(Coefficient &q) { Q = &q; } - -- virtual void AssembleElementMatrix(const FiniteElement &el, -- ElementTransformation &Trans, -- DenseMatrix &elmat) {} -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const; - - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, -@@ -2473,9 +2720,6 @@ public: - This is equivalent to a weak divergence of the Nedelec basis functions. */ - class VectorFEWeakDivergenceIntegrator: public BilinearFormIntegrator - { --protected: -- Coefficient *Q; -- - private: - #ifndef MFEM_THREAD_SAFE - DenseMatrix dshape; -@@ -2484,13 +2728,17 @@ private: - DenseMatrix invdfdx; - #endif - -+protected: -+ Coefficient *Q; -+ - public: - VectorFEWeakDivergenceIntegrator() { Q = NULL; } - VectorFEWeakDivergenceIntegrator(Coefficient &q) { Q = &q; } - -- virtual void AssembleElementMatrix(const FiniteElement &el, -- ElementTransformation &Trans, -- DenseMatrix &elmat) {} -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const; - - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, -@@ -2502,9 +2750,6 @@ public: - test spaces are switched, assembles the form (u, curl v). */ - class VectorFECurlIntegrator: public BilinearFormIntegrator - { --protected: -- Coefficient *Q; -- - private: - #ifndef MFEM_THREAD_SAFE - DenseMatrix curlshapeTrial; -@@ -2512,13 +2757,17 @@ private: - DenseMatrix curlshapeTrial_dFT; - #endif - -+protected: -+ Coefficient *Q; -+ - public: - VectorFECurlIntegrator() { Q = NULL; } - VectorFECurlIntegrator(Coefficient &q) { Q = &q; } - -- virtual void AssembleElementMatrix(const FiniteElement &el, -- ElementTransformation &Trans, -- DenseMatrix &elmat) {} -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const; - - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, -@@ -2540,6 +2789,11 @@ private: - public: - DerivativeIntegrator(Coefficient &q, int i) : Q(&q), xi(i) {} - -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const; -+ - virtual void AssembleElementMatrix(const FiniteElement &el, - ElementTransformation &Trans, - DenseMatrix &elmat) -@@ -2587,6 +2841,13 @@ public: - CurlCurlIntegrator(MatrixCoefficient &mq, const IntegrationRule *ir = NULL) : - BilinearFormIntegrator(ir), Q(NULL), DQ(NULL), MQ(&mq) {} - -+ virtual bool SupportsCeed() const { return DeviceCanUseCeed(); } -+ -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const; -+ - /* Given a particular Finite Element, compute the - element curl-curl matrix elmat */ - virtual void AssembleElementMatrix(const FiniteElement &el, -@@ -2611,10 +2872,23 @@ public: - using BilinearFormIntegrator::AssemblePA; - virtual void AssemblePA(const FiniteElementSpace &fes); - -+ using BilinearFormIntegrator::AssemblePABoundary; -+ virtual void AssemblePABoundary(const FiniteElementSpace &fes); -+ - virtual void AssembleDiagonalPA(Vector &diag); - - virtual void AddMultPA(const Vector &x, Vector &y) const; - -+ using BilinearFormIntegrator::AssembleMF; -+ virtual void AssembleMF(const FiniteElementSpace &fes); -+ -+ using BilinearFormIntegrator::AssembleMFBoundary; -+ virtual void AssembleMFBoundary(const FiniteElementSpace &fes); -+ -+ virtual void AssembleDiagonalMF(Vector &diag); -+ -+ virtual void AddMultMF(const Vector &x, Vector &y) const; -+ - const Coefficient *GetCoefficient() const { return Q; } - }; - -@@ -2632,13 +2906,18 @@ protected: - - public: - VectorCurlCurlIntegrator() { Q = NULL; } -- - VectorCurlCurlIntegrator(Coefficient &q) : Q(&q) {} - -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const; -+ - /// Assemble an element matrix - virtual void AssembleElementMatrix(const FiniteElement &el, - ElementTransformation &Trans, - DenseMatrix &elmat); -+ - /// Compute element energy: (1/2) (curl u, curl u)_E - virtual double GetElementEnergy(const FiniteElement &el, - ElementTransformation &Tr, -@@ -2663,11 +2942,17 @@ private: - DenseMatrix dshape; - DenseMatrix curlshape; - DenseMatrix elmat_comp; -+ - public: - MixedCurlIntegrator() : Q{NULL} {} - MixedCurlIntegrator(Coefficient *q_) : Q{q_} {} - MixedCurlIntegrator(Coefficient &q) : Q{&q} {} - -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const; -+ - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, - ElementTransformation &Trans, -@@ -2681,9 +2966,6 @@ public: - class VectorFEMassIntegrator: public BilinearFormIntegrator - { - private: -- void Init(Coefficient *q, DiagonalMatrixCoefficient *dq, MatrixCoefficient *mq) -- { Q = q; DQ = dq; MQ = mq; } -- - #ifndef MFEM_THREAD_SAFE - Vector shape; - Vector D; -@@ -2693,6 +2975,9 @@ private: - DenseMatrix trial_vshape; - #endif - -+ void Init(Coefficient *q, DiagonalMatrixCoefficient *dq, MatrixCoefficient *mq) -+ { Q = q; DQ = dq; MQ = mq; } -+ - protected: - Coefficient *Q; - DiagonalMatrixCoefficient *DQ; -@@ -2717,6 +3002,13 @@ public: - VectorFEMassIntegrator(MatrixCoefficient *mq_) { Init(NULL, NULL, mq_); } - VectorFEMassIntegrator(MatrixCoefficient &mq) { Init(NULL, NULL, &mq); } - -+ virtual bool SupportsCeed() const { return DeviceCanUseCeed(); } -+ -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const; -+ - virtual void AssembleElementMatrix(const FiniteElement &el, - ElementTransformation &Trans, - DenseMatrix &elmat); -@@ -2730,12 +3022,25 @@ public: - virtual void AssemblePA(const FiniteElementSpace &trial_fes, - const FiniteElementSpace &test_fes); - -+ using BilinearFormIntegrator::AssemblePABoundary; -+ virtual void AssemblePABoundary(const FiniteElementSpace &fes); -+ - virtual void AssembleDiagonalPA(Vector &diag); - - virtual void AddMultPA(const Vector &x, Vector &y) const; - - virtual void AddMultTransposePA(const Vector &x, Vector &y) const; - -+ using BilinearFormIntegrator::AssembleMF; -+ virtual void AssembleMF(const FiniteElementSpace &fes); -+ -+ using BilinearFormIntegrator::AssembleMFBoundary; -+ virtual void AssembleMFBoundary(const FiniteElementSpace &fes); -+ -+ virtual void AssembleDiagonalMF(Vector &diag); -+ -+ virtual void AddMultMF(const Vector &x, Vector &y) const; -+ - const Coefficient *GetCoefficient() const { return Q; } - }; - -@@ -2762,14 +3067,16 @@ private: - - public: - VectorDivergenceIntegrator() : -- Q(NULL), trial_maps(NULL), test_maps(NULL), geom(NULL) -- { } -+ Q(NULL), trial_maps(NULL), test_maps(NULL), geom(NULL) {} - VectorDivergenceIntegrator(Coefficient *q_) : -- Q(q_), trial_maps(NULL), test_maps(NULL), geom(NULL) -- {} -+ Q(q_), trial_maps(NULL), test_maps(NULL), geom(NULL) {} - VectorDivergenceIntegrator(Coefficient &q) : -- Q(&q), trial_maps(NULL), test_maps(NULL), geom(NULL) -- {} -+ Q(&q), trial_maps(NULL), test_maps(NULL), geom(NULL) {} -+ -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const; - - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, -@@ -2783,10 +3090,6 @@ public: - virtual void AddMultPA(const Vector &x, Vector &y) const; - - virtual void AddMultTransposePA(const Vector &x, Vector &y) const; -- -- static const IntegrationRule &GetRule(const FiniteElement &trial_fe, -- const FiniteElement &test_fe, -- ElementTransformation &Trans); - }; - - /// (Q div u, div v) for RT elements -@@ -2812,6 +3115,13 @@ public: - DivDivIntegrator(Coefficient &q, const IntegrationRule *ir = NULL) : - BilinearFormIntegrator(ir), Q(&q) {} - -+ virtual bool SupportsCeed() const { return DeviceCanUseCeed(); } -+ -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const; -+ - virtual void AssembleElementMatrix(const FiniteElement &el, - ElementTransformation &Trans, - DenseMatrix &elmat); -@@ -2824,107 +3134,8 @@ public: - using BilinearFormIntegrator::AssemblePA; - virtual void AssemblePA(const FiniteElementSpace &fes); - -- virtual void AssembleDiagonalPA(Vector &diag); -- -- virtual void AddMultPA(const Vector &x, Vector &y) const; -- -- const Coefficient *GetCoefficient() const { return Q; } --}; -- --/** Integrator for -- -- (Q grad u, grad v) = sum_i (Q grad u_i, grad v_i) e_i e_i^T -- -- for vector FE spaces, where e_i is the unit vector in the i-th direction. -- The resulting local element matrix is square, of size vdim*dof , -- where \c vdim is the vector dimension space and \c dof is the local degrees -- of freedom. The integrator is not aware of the true vector dimension and -- must use \c VectorCoefficient, \c MatrixCoefficient, or a caller-specified -- value to determine the vector space. For a scalar coefficient, the caller -- may manually specify the vector dimension or the vector dimension is assumed -- to be the spatial dimension (i.e. 2-dimension or 3-dimension). --*/ --class VectorDiffusionIntegrator : public BilinearFormIntegrator --{ --protected: -- Coefficient *Q = NULL; -- VectorCoefficient *VQ = NULL; -- MatrixCoefficient *MQ = NULL; -- -- // PA extension -- const DofToQuad *maps; ///< Not owned -- const GeometricFactors *geom; ///< Not owned -- int dim, sdim, ne, dofs1D, quad1D; -- Vector pa_data; -- --private: -- DenseMatrix dshape, dshapedxt, pelmat; -- int vdim = -1; -- DenseMatrix mcoeff; -- Vector vcoeff; -- --public: -- VectorDiffusionIntegrator() {} -- -- /** \brief Integrator with unit coefficient for caller-specified vector -- dimension. -- -- If the vector dimension does not match the true dimension of the space, -- the resulting element matrix will be mathematically invalid. */ -- VectorDiffusionIntegrator(int vector_dimension) -- : vdim(vector_dimension) {} -- -- VectorDiffusionIntegrator(Coefficient &q) -- : Q(&q) {} -- -- VectorDiffusionIntegrator(Coefficient &q, const IntegrationRule *ir) -- : BilinearFormIntegrator(ir), Q(&q) {} -- -- /** \brief Integrator with scalar coefficient for caller-specified vector -- dimension. -- -- The element matrix is block-diagonal with \c vdim copies of the element -- matrix integrated with the \c Coefficient. -- -- If the vector dimension does not match the true dimension of the space, -- the resulting element matrix will be mathematically invalid. */ -- VectorDiffusionIntegrator(Coefficient &q, int vector_dimension) -- : Q(&q), vdim(vector_dimension) {} -- -- /** \brief Integrator with \c VectorCoefficient. The vector dimension of the -- \c FiniteElementSpace is assumed to be the same as the dimension of the -- \c Vector. -- -- The element matrix is block-diagonal and each block is integrated with -- coefficient q_i. -- -- If the vector dimension does not match the true dimension of the space, -- the resulting element matrix will be mathematically invalid. */ -- VectorDiffusionIntegrator(VectorCoefficient &vq) -- : VQ(&vq), vdim(vq.GetVDim()) {} -- -- /** \brief Integrator with \c MatrixCoefficient. The vector dimension of the -- \c FiniteElementSpace is assumed to be the same as the dimension of the -- \c Matrix. -- -- The element matrix is populated in each block. Each block is integrated -- with coefficient q_ij. -- -- If the vector dimension does not match the true dimension of the space, -- the resulting element matrix will be mathematically invalid. */ -- VectorDiffusionIntegrator(MatrixCoefficient& mq) -- : MQ(&mq), vdim(mq.GetVDim()) {} -- -- virtual void AssembleElementMatrix(const FiniteElement &el, -- ElementTransformation &Trans, -- DenseMatrix &elmat); -- -- virtual void AssembleElementVector(const FiniteElement &el, -- ElementTransformation &Tr, -- const Vector &elfun, Vector &elvect); -- -- using BilinearFormIntegrator::AssemblePA; -- virtual void AssemblePA(const FiniteElementSpace &fes); -+ using BilinearFormIntegrator::AssemblePABoundary; -+ virtual void AssemblePABoundary(const FiniteElementSpace &fes); - - virtual void AssembleDiagonalPA(Vector &diag); - -@@ -2933,11 +3144,14 @@ public: - using BilinearFormIntegrator::AssembleMF; - virtual void AssembleMF(const FiniteElementSpace &fes); - -+ using BilinearFormIntegrator::AssembleMFBoundary; -+ virtual void AssembleMFBoundary(const FiniteElementSpace &fes); -+ - virtual void AssembleDiagonalMF(Vector &diag); - - virtual void AddMultMF(const Vector &x, Vector &y) const; - -- bool SupportsCeed() const { return DeviceCanUseCeed(); } -+ const Coefficient *GetCoefficient() const { return Q; } - }; - - /** Integrator for the linear elasticity form: -@@ -2966,6 +3180,11 @@ public: - ElasticityIntegrator(Coefficient &m, double q_l, double q_m) - { lambda = NULL; mu = &m; q_lambda = q_l; q_mu = q_m; } - -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const; -+ - virtual void AssembleElementMatrix(const FiniteElement &, - ElementTransformation &, - DenseMatrix &); -@@ -3054,7 +3273,11 @@ public: - double a, double b) - { rho = &rho_; u = &u_; alpha = a; beta = b; } - -- using BilinearFormIntegrator::AssembleFaceMatrix; -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &el1, -+ const FiniteElement &el2, -+ FaceElementTransformations &Trans) const; -+ - virtual void AssembleFaceMatrix(const FiniteElement &el1, - const FiniteElement &el2, - FaceElementTransformations &Trans, -@@ -3075,9 +3298,6 @@ public: - virtual void AssembleEABoundaryFaces(const FiniteElementSpace& fes, - Vector &ea_data_bdr); - -- static const IntegrationRule &GetRule(Geometry::Type geom, int order, -- FaceElementTransformations &T); -- - private: - void SetupPA(const FiniteElementSpace &fes, FaceType type); - }; -@@ -3137,7 +3357,16 @@ public: - DGDiffusionIntegrator(MatrixCoefficient &q, const double s, const double k) - : Q(NULL), MQ(&q), sigma(s), kappa(k) {} - -- using BilinearFormIntegrator::AssembleFaceMatrix; -+ static const IntegrationRule &GetRuleStatic(const FiniteElement &el1, -+ const FiniteElement &el2, -+ FaceElementTransformations &Trans); -+ -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &el1, -+ const FiniteElement &el2, -+ FaceElementTransformations &Trans) const -+ { return GetRuleStatic(el1, el2, Trans); } -+ - virtual void AssembleFaceMatrix(const FiniteElement &el1, - const FiniteElement &el2, - FaceElementTransformations &Trans, -@@ -3197,7 +3426,12 @@ public: - MFEM_DEPRECATED DGDiffusionBR2Integrator(class FiniteElementSpace *fes, - double e = 1.0); - -- using BilinearFormIntegrator::AssembleFaceMatrix; -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &el1, -+ const FiniteElement &el2, -+ FaceElementTransformations &Trans) const -+ { return DGDiffusionIntegrator::GetRuleStatic(el1, el2, Trans); } -+ - virtual void AssembleFaceMatrix(const FiniteElement &el1, - const FiniteElement &el2, - FaceElementTransformations &Trans, -@@ -3274,7 +3508,12 @@ public: - double alpha_, double kappa_) - : lambda(&lambda_), mu(&mu_), alpha(alpha_), kappa(kappa_) {} - -- using BilinearFormIntegrator::AssembleFaceMatrix; -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &el1, -+ const FiniteElement &el2, -+ FaceElementTransformations &Trans) const -+ { return DGDiffusionIntegrator::GetRuleStatic(el1, el2, Trans); } -+ - virtual void AssembleFaceMatrix(const FiniteElement &el1, - const FiniteElement &el2, - FaceElementTransformations &Trans, -@@ -3325,12 +3564,11 @@ private: - public: - TraceJumpIntegrator() {} - -- using BilinearFormIntegrator::AssembleFaceMatrix; -- virtual void AssembleFaceMatrix(const FiniteElement &trial_face_fe, -- const FiniteElement &test_fe1, -- const FiniteElement &test_fe2, -- FaceElementTransformations &Trans, -- DenseMatrix &elmat); -+ virtual void AssembleFaceMatrix2(const FiniteElement &trial_face_fe, -+ const FiniteElement &test_fe1, -+ const FiniteElement &test_fe2, -+ FaceElementTransformations &Trans, -+ DenseMatrix &elmat); - }; - - /** Integrator for the form: < v, [w.n] > over all faces (the interface) where -@@ -3345,12 +3583,11 @@ private: - public: - NormalTraceJumpIntegrator() {} - -- using BilinearFormIntegrator::AssembleFaceMatrix; -- virtual void AssembleFaceMatrix(const FiniteElement &trial_face_fe, -- const FiniteElement &test_fe1, -- const FiniteElement &test_fe2, -- FaceElementTransformations &Trans, -- DenseMatrix &elmat); -+ virtual void AssembleFaceMatrix2(const FiniteElement &trial_face_fe, -+ const FiniteElement &test_fe1, -+ const FiniteElement &test_fe2, -+ FaceElementTransformations &Trans, -+ DenseMatrix &elmat); - }; - - /** Integrator for the DPG form: < v, w > over a face (the interface) where -@@ -3361,13 +3598,15 @@ class TraceIntegrator : public BilinearFormIntegrator - { - private: - Vector face_shape, shape; -+ - public: -- TraceIntegrator() { } -- void AssembleTraceFaceMatrix(int elem, -- const FiniteElement &trial_face_fe, -- const FiniteElement &test_fe, -- FaceElementTransformations &Trans, -- DenseMatrix &elmat); -+ TraceIntegrator() {} -+ -+ virtual void AssembleTraceFaceMatrix(int elem, -+ const FiniteElement &trial_face_fe, -+ const FiniteElement &test_fe, -+ FaceElementTransformations &Trans, -+ DenseMatrix &elmat); - }; - - /** Integrator for the form: < v, w.n > over a face (the interface) where -@@ -3380,15 +3619,15 @@ private: - DenseMatrix shape; - - public: -- NormalTraceIntegrator() { } -- virtual void AssembleTraceFaceMatrix(int ielem, -+ NormalTraceIntegrator() {} -+ -+ virtual void AssembleTraceFaceMatrix(int elem, - const FiniteElement &trial_face_fe, - const FiniteElement &test_fe, - FaceElementTransformations &Trans, - DenseMatrix &elmat); - }; - -- - /** Integrator for the form: < v, w × n > over a face (the interface) - * In 3D the trial variable v is defined on the interface (H^-1/2(curl), trace of H(curl)) - * In 2D it's defined on the interface (H^1/2, trace of H1) -@@ -3426,17 +3665,30 @@ private: - } - - public: -- TangentTraceIntegrator() { } -- void AssembleTraceFaceMatrix(int elem, -- const FiniteElement &trial_face_fe, -- const FiniteElement &test_fe, -- FaceElementTransformations &Trans, -- DenseMatrix &elmat); -+ TangentTraceIntegrator() {} -+ -+ virtual void AssembleTraceFaceMatrix(int elem, -+ const FiniteElement &trial_face_fe, -+ const FiniteElement &test_fe, -+ FaceElementTransformations &Trans, -+ DenseMatrix &elmat); - }; - - /** Abstract class to serve as a base for local interpolators to be used in the - DiscreteLinearOperator class. */ --class DiscreteInterpolator : public BilinearFormIntegrator {}; -+class DiscreteInterpolator : public BilinearFormIntegrator -+{ -+public: -+ // This avoids an error when GetRule is called with an interpolator even if -+ // it is never used. -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) const -+ { -+ return IntRules.Get(0, 0); -+ } -+}; - - /** Class for constructing the gradient as a DiscreteLinearOperator from an - H1-conforming space to an H(curl)-conforming space. The range space can be -@@ -3447,17 +3699,14 @@ public: - GradientInterpolator() : dofquad_fe(NULL) {} - virtual ~GradientInterpolator() { delete dofquad_fe; } - -+ virtual bool SupportsCeed() const { return DeviceCanUseCeed(); } -+ - virtual void AssembleElementMatrix2(const FiniteElement &h1_fe, - const FiniteElement &nd_fe, - ElementTransformation &Trans, - DenseMatrix &elmat) - { nd_fe.ProjectGrad(h1_fe, Trans, elmat); } - -- /** @brief Setup method for PA data. -- -- @param[in] trial_fes H1 Lagrange space -- @param[in] test_fes H(curl) Nedelec space -- */ - using BilinearFormIntegrator::AssemblePA; - virtual void AssemblePA(const FiniteElementSpace &trial_fes, - const FiniteElementSpace &test_fes); -@@ -3469,7 +3718,6 @@ public: - private: - /// 1D finite element that generates and owns the 1D DofToQuad maps below - FiniteElement *dofquad_fe; -- - bool B_id; // is the B basis operator (maps_C_C) the identity? - const DofToQuad *maps_C_C; // one-d map with Lobatto rows, Lobatto columns - const DofToQuad *maps_O_C; // one-d map with Legendre rows, Lobatto columns -@@ -3482,7 +3730,9 @@ private: - class IdentityInterpolator : public DiscreteInterpolator - { - public: -- IdentityInterpolator(): dofquad_fe(NULL) { } -+ IdentityInterpolator(): dofquad_fe(NULL) {} -+ -+ virtual bool SupportsCeed() const { return DeviceCanUseCeed(); } - - virtual void AssembleElementMatrix2(const FiniteElement &dom_fe, - const FiniteElement &ran_fe, -@@ -3503,11 +3753,9 @@ public: - private: - /// 1D finite element that generates and owns the 1D DofToQuad maps below - FiniteElement *dofquad_fe; -- - const DofToQuad *maps_C_C; // one-d map with Lobatto rows, Lobatto columns - const DofToQuad *maps_O_C; // one-d map with Legendre rows, Lobatto columns - int dim, ne, o_dofs1D, c_dofs1D; -- - Vector pa_data; - }; - -@@ -3517,11 +3765,21 @@ private: - class CurlInterpolator : public DiscreteInterpolator - { - public: -+ virtual bool SupportsCeed() const { return DeviceCanUseCeed(); } -+ - virtual void AssembleElementMatrix2(const FiniteElement &dom_fe, - const FiniteElement &ran_fe, - ElementTransformation &Trans, - DenseMatrix &elmat) - { ran_fe.ProjectCurl(dom_fe, Trans, elmat); } -+ -+ using BilinearFormIntegrator::AssemblePA; -+ virtual void AssemblePA(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes); -+ -+ virtual void AddMultPA(const Vector &x, Vector &y) const; -+ -+ virtual void AddMultTransposePA(const Vector &x, Vector &y) const; - }; - - /** Class for constructing the (local) discrete divergence matrix which can -@@ -3577,8 +3835,7 @@ protected: - class ScalarVectorProductInterpolator : public DiscreteInterpolator - { - public: -- ScalarVectorProductInterpolator(Coefficient &sc) -- : Q(&sc) {} -+ ScalarVectorProductInterpolator(Coefficient &sc) : Q(&sc) {} - - virtual void AssembleElementMatrix2(const FiniteElement &dom_fe, - const FiniteElement &ran_fe, -@@ -3595,8 +3852,7 @@ protected: - class VectorScalarProductInterpolator : public DiscreteInterpolator - { - public: -- VectorScalarProductInterpolator(VectorCoefficient &vc) -- : VQ(&vc) {} -+ VectorScalarProductInterpolator(VectorCoefficient &vc) : VQ(&vc) {} - - virtual void AssembleElementMatrix2(const FiniteElement &dom_fe, - const FiniteElement &ran_fe, -@@ -3630,8 +3886,7 @@ protected: - class VectorCrossProductInterpolator : public DiscreteInterpolator - { - public: -- VectorCrossProductInterpolator(VectorCoefficient &vc) -- : VQ(&vc) {} -+ VectorCrossProductInterpolator(VectorCoefficient &vc) : VQ(&vc) {} - - virtual void AssembleElementMatrix2(const FiniteElement &nd_fe, - const FiniteElement &rt_fe, -diff --git a/fem/ceed/integrators/convection/convection.cpp b/fem/ceed/integrators/convection/convection.cpp -index c5560f354..c980123ba 100644 ---- a/fem/ceed/integrators/convection/convection.cpp -+++ b/fem/ceed/integrators/convection/convection.cpp -@@ -25,81 +25,94 @@ namespace ceed - #ifdef MFEM_USE_CEED - struct ConvectionOperatorInfo : public OperatorInfo - { -- ConvectionContext ctx; -- ConvectionOperatorInfo(int dim, double alpha) -+ ConvectionContext ctx = {0}; -+ ConvectionOperatorInfo(const mfem::FiniteElementSpace &fes, -+ mfem::VectorCoefficient *VQ, double alpha, -+ bool use_bdr = false, bool use_mf = false) - { -+ MFEM_VERIFY(VQ && VQ->GetVDim() == fes.GetMesh()->SpaceDimension(), -+ "Incorrect coefficient dimensions in ceed::ConvectionOperatorInfo!"); -+ ctx.dim = fes.GetMesh()->Dimension() - use_bdr; -+ ctx.space_dim = fes.GetMesh()->SpaceDimension(); -+ ctx.alpha = alpha; -+ if (!use_mf) -+ { -+ apply_func = ":f_apply_conv"; -+ apply_qf = &f_apply_conv; -+ } -+ else -+ { -+ build_func = ""; -+ build_qf = nullptr; -+ } -+ if (mfem::VectorConstantCoefficient *const_coeff = -+ dynamic_cast(VQ)) -+ { -+ const int vdim = VQ->GetVDim(); -+ MFEM_VERIFY(vdim <= LIBCEED_CONV_COEFF_COMP_MAX, -+ "VectorCoefficient dimension exceeds context storage!"); -+ const mfem::Vector &val = const_coeff->GetVec(); -+ for (int i = 0; i < vdim; i++) -+ { -+ ctx.coeff[i] = val[i]; -+ } -+ if (!use_mf) -+ { -+ build_func = ":f_build_conv_const"; -+ build_qf = &f_build_conv_const; -+ } -+ else -+ { -+ apply_func = ":f_apply_conv_mf_const"; -+ apply_qf = &f_apply_conv_mf_const; -+ } -+ } -+ else -+ { -+ if (!use_mf) -+ { -+ build_func = ":f_build_conv_quad"; -+ build_qf = &f_build_conv_quad; -+ } -+ else -+ { -+ apply_func = ":f_apply_conv_mf_quad"; -+ apply_qf = &f_apply_conv_mf_quad; -+ } -+ } - header = "/integrators/convection/convection_qf.h"; -- build_func_const = ":f_build_conv_const"; -- build_qf_const = &f_build_conv_const; -- build_func_quad = ":f_build_conv_quad"; -- build_qf_quad = &f_build_conv_quad; -- apply_func = ":f_apply_conv"; -- apply_qf = &f_apply_conv; -- apply_func_mf_const = ":f_apply_conv_mf_const"; -- apply_qf_mf_const = &f_apply_conv_mf_const; -- apply_func_mf_quad = ":f_apply_conv_mf_quad"; -- apply_qf_mf_quad = &f_apply_conv_mf_quad; - trial_op = EvalMode::Grad; - test_op = EvalMode::Interp; -- qdatasize = dim * (dim + 1) / 2; -- ctx.alpha = alpha; -+ qdatasize = ctx.dim; - } - }; - #endif - - PAConvectionIntegrator::PAConvectionIntegrator( -+ const mfem::ConvectionIntegrator &integ, - const mfem::FiniteElementSpace &fes, -- const mfem::IntegrationRule &irm, -- mfem::VectorCoefficient *Q, -- const double alpha) -- : PAIntegrator() --{ --#ifdef MFEM_USE_CEED -- ConvectionOperatorInfo info(fes.GetMesh()->Dimension(), alpha); -- Assemble(info, fes, irm, Q); --#else -- MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); --#endif --} -- --MixedPAConvectionIntegrator::MixedPAConvectionIntegrator( -- const ConvectionIntegrator &integ, -- const mfem::FiniteElementSpace &fes, -- mfem::VectorCoefficient *Q, -- const double alpha) -+ mfem::VectorCoefficient *VQ, -+ const double alpha, -+ const bool use_bdr) - { - #ifdef MFEM_USE_CEED -- ConvectionOperatorInfo info(fes.GetMesh()->Dimension(), alpha); -- Assemble(integ, info, fes, Q); -+ ConvectionOperatorInfo info(fes, VQ, alpha, use_bdr); -+ Assemble(integ, info, fes, VQ, use_bdr); - #else - MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); - #endif - } - - MFConvectionIntegrator::MFConvectionIntegrator( -+ const mfem::ConvectionIntegrator &integ, - const mfem::FiniteElementSpace &fes, -- const mfem::IntegrationRule &irm, -- mfem::VectorCoefficient *Q, -- const double alpha) -- : MFIntegrator() --{ --#ifdef MFEM_USE_CEED -- ConvectionOperatorInfo info(fes.GetMesh()->Dimension(), alpha); -- Assemble(info, fes, irm, Q); --#else -- MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); --#endif --} -- --MixedMFConvectionIntegrator::MixedMFConvectionIntegrator( -- const ConvectionIntegrator &integ, -- const mfem::FiniteElementSpace &fes, -- mfem::VectorCoefficient *Q, -- const double alpha) -+ mfem::VectorCoefficient *VQ, -+ const double alpha, -+ const bool use_bdr) - { - #ifdef MFEM_USE_CEED -- ConvectionOperatorInfo info(fes.GetMesh()->Dimension(), alpha); -- Assemble(integ, info, fes, Q); -+ ConvectionOperatorInfo info(fes, VQ, alpha, use_bdr, true); -+ Assemble(integ, info, fes, VQ, use_bdr, true); - #else - MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); - #endif -diff --git a/fem/ceed/integrators/convection/convection.hpp b/fem/ceed/integrators/convection/convection.hpp -index 1cd968770..713b98c2c 100644 ---- a/fem/ceed/integrators/convection/convection.hpp -+++ b/fem/ceed/integrators/convection/convection.hpp -@@ -13,7 +13,7 @@ - #define MFEM_LIBCEED_CONV_HPP - - #include "../../interface/integrator.hpp" --#include "../../interface/mixed_integrator.hpp" -+#include "../../interface/mixed_operator.hpp" - #include "../../../fespace.hpp" - - namespace mfem -@@ -23,41 +23,25 @@ namespace ceed - { - - /// Represent a ConvectionIntegrator with AssemblyLevel::Partial using libCEED. --class PAConvectionIntegrator : public PAIntegrator -+class PAConvectionIntegrator : public MixedOperator - { - public: -- PAConvectionIntegrator(const mfem::FiniteElementSpace &fes, -- const mfem::IntegrationRule &ir, -- mfem::VectorCoefficient *Q, -- const double alpha); --}; -- --class MixedPAConvectionIntegrator : public MixedIntegrator --{ --public: -- MixedPAConvectionIntegrator(const ConvectionIntegrator &integ, -- const mfem::FiniteElementSpace &fes, -- mfem::VectorCoefficient *Q, -- const double alpha); -+ PAConvectionIntegrator(const mfem::ConvectionIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ mfem::VectorCoefficient *VQ, -+ const double alpha, -+ const bool use_bdr = false); - }; - - /// Represent a ConvectionIntegrator with AssemblyLevel::None using libCEED. --class MFConvectionIntegrator : public MFIntegrator --{ --public: -- MFConvectionIntegrator(const mfem::FiniteElementSpace &fes, -- const mfem::IntegrationRule &ir, -- mfem::VectorCoefficient *Q, -- const double alpha); --}; -- --class MixedMFConvectionIntegrator : public MixedIntegrator -+class MFConvectionIntegrator : public MixedOperator - { - public: -- MixedMFConvectionIntegrator(const ConvectionIntegrator &integ, -- const mfem::FiniteElementSpace &fes, -- mfem::VectorCoefficient *Q, -- const double alpha); -+ MFConvectionIntegrator(const mfem::ConvectionIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ mfem::VectorCoefficient *VQ, -+ const double alpha, -+ const bool use_bdr = false); - }; - - } -diff --git a/fem/ceed/integrators/convection/convection_qf.h b/fem/ceed/integrators/convection/convection_qf.h -index 68e96895e..0dd11387c 100644 ---- a/fem/ceed/integrators/convection/convection_qf.h -+++ b/fem/ceed/integrators/convection/convection_qf.h -@@ -9,207 +9,151 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --/// A structure used to pass additional data to f_build_conv and f_apply_conv --struct ConvectionContext { -- CeedInt dim, space_dim, vdim; -- CeedScalar coeff[3]; -+#ifndef MFEM_LIBCEED_CONV_QF_H -+#define MFEM_LIBCEED_CONV_QF_H -+ -+#include "../util/util_qf.h" -+ -+#define LIBCEED_CONV_COEFF_COMP_MAX 3 -+ -+struct ConvectionContext -+{ -+ CeedInt dim, space_dim; - CeedScalar alpha; -+ CeedScalar coeff[LIBCEED_CONV_COEFF_COMP_MAX]; - }; - --/// libCEED Q-function for building quadrature data for a convection operator -+/// libCEED QFunction for building quadrature data for a convection operator - /// with a constant coefficient - CEED_QFUNCTION(f_build_conv_const)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) - { -- ConvectionContext *bc = (ConvectionContext*)ctx; -- // in[0] is Jacobians with shape [dim, nc=dim, Q] -+ ConvectionContext *bc = (ConvectionContext *)ctx; -+ // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] - // in[1] is quadrature weights, size (Q) - // -- // At every quadrature point, compute and store qw * adj(J). -- const CeedScalar coeff0 = bc->coeff[0]; -- const CeedScalar coeff1 = bc->coeff[1]; -- const CeedScalar coeff2 = bc->coeff[2]; -+ // At every quadrature point, compute and store qw * α * c^T adj(J)^T - const CeedScalar alpha = bc->alpha; -+ const CeedScalar *coeff = bc->coeff; - const CeedScalar *J = in[0], *qw = in[1]; - CeedScalar *qd = out[0]; -- switch (bc->dim + 10 * bc->space_dim) -+ switch (10 * bc->space_dim + bc->dim) - { - case 11: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- qd[i] = alpha * coeff0 * qw[i] * J[i]; -+ const CeedScalar coeff0 = coeff[0]; -+ qd[i] = qw[i] * alpha * coeff0 * J[i]; -+ } -+ break; -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultCtAdjJt21(J + i, Q, coeff, 1, qw[i] * alpha, Q, qd + i); - } - break; - case 22: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultCtAdjJt22(J + i, Q, coeff, 1, qw[i] * alpha, Q, qd + i); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- // J: 0 2 qd: 0 1 adj(J): J22 -J12 -- // 1 3 1 2 -J21 J11 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J12 = J[i + Q * 2]; -- const CeedScalar J22 = J[i + Q * 3]; -- const CeedScalar w = alpha * qw[i]; -- const CeedScalar wx = w * coeff0; -- const CeedScalar wy = w * coeff1; -- qd[i + Q * 0] = wx * J22 - wy * J12; -- qd[i + Q * 1] = -wx * J21 + wy * J11; -+ MultCtAdjJt32(J + i, Q, coeff, 1, qw[i] * alpha, Q, qd + i); - } - break; - case 33: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- // J: 0 3 6 qd: 0 1 2 -- // 1 4 7 1 3 4 -- // 2 5 8 2 4 5 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J31 = J[i + Q * 2]; -- const CeedScalar J12 = J[i + Q * 3]; -- const CeedScalar J22 = J[i + Q * 4]; -- const CeedScalar J32 = J[i + Q * 5]; -- const CeedScalar J13 = J[i + Q * 6]; -- const CeedScalar J23 = J[i + Q * 7]; -- const CeedScalar J33 = J[i + Q * 8]; -- const CeedScalar A11 = J22 * J33 - J23 * J32; -- const CeedScalar A12 = J13 * J32 - J12 * J33; -- const CeedScalar A13 = J12 * J23 - J13 * J22; -- const CeedScalar A21 = J23 * J31 - J21 * J33; -- const CeedScalar A22 = J11 * J33 - J13 * J31; -- const CeedScalar A23 = J13 * J21 - J11 * J23; -- const CeedScalar A31 = J21 * J32 - J22 * J31; -- const CeedScalar A32 = J12 * J31 - J11 * J32; -- const CeedScalar A33 = J11 * J22 - J12 * J21; -- const CeedScalar w = alpha * qw[i]; -- const CeedScalar wx = w * coeff0; -- const CeedScalar wy = w * coeff1; -- const CeedScalar wz = w * coeff2; -- qd[i + Q * 0] = wx * A11 + wy * A12 + wz * A13; -- qd[i + Q * 1] = wx * A21 + wy * A22 + wz * A23; -- qd[i + Q * 2] = wx * A31 + wy * A32 + wz * A33; -+ MultCtAdjJt33(J + i, Q, coeff, 1, qw[i] * alpha, Q, qd + i); - } - break; - } - return 0; - } - --/// libCEED Q-function for building quadrature data for a convection operator --/// coefficient evaluated at quadrature points. -+/// libCEED QFunction for building quadrature data for a convection operator -+/// with a coefficient evaluated at quadrature points - CEED_QFUNCTION(f_build_conv_quad)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) - { - ConvectionContext *bc = (ConvectionContext *)ctx; -- // in[1] is Jacobians with shape [dim, nc=dim, Q] -+ // in[0] is coefficients with shape [ncomp=space_dim, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] - // in[2] is quadrature weights, size (Q) - // -- // At every quadrature point, compute and store qw * adj(J). -- const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; -+ // At every quadrature point, compute and store qw * α * c^T adj(J)^T - const CeedScalar alpha = bc->alpha; -+ const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; - CeedScalar *qd = out[0]; -- switch (bc->dim + 10 * bc->space_dim) -+ switch (10 * bc->space_dim + bc->dim) - { - case 11: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- const CeedScalar coeff = c[i]; -- qd[i] = alpha * coeff * qw[i] * J[i]; -+ qd[i] = qw[i] * alpha * c[i] * J[i]; -+ } -+ break; -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultCtAdjJt21(J + i, Q, c + i, Q, qw[i] * alpha, Q, qd + i); - } - break; - case 22: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- // J: 0 2 qd: 0 1 adj(J): J22 -J12 -- // 1 3 1 2 -J21 J11 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J12 = J[i + Q * 2]; -- const CeedScalar J22 = J[i + Q * 3]; -- const CeedScalar w = alpha * qw[i]; -- const CeedScalar wx = w * c[i + Q * 0]; -- const CeedScalar wy = w * c[i + Q * 1]; -- qd[i + Q * 0] = wx * J22 - wy * J12; -- qd[i + Q * 1] = -wx * J21 + wy * J11; -+ MultCtAdjJt22(J + i, Q, c + i, Q, qw[i] * alpha, Q, qd + i); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultCtAdjJt32(J + i, Q, c + i, Q, qw[i] * alpha, Q, qd + i); - } - break; - case 33: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- // J: 0 3 6 qd: 0 1 2 -- // 1 4 7 1 3 4 -- // 2 5 8 2 4 5 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J31 = J[i + Q * 2]; -- const CeedScalar J12 = J[i + Q * 3]; -- const CeedScalar J22 = J[i + Q * 4]; -- const CeedScalar J32 = J[i + Q * 5]; -- const CeedScalar J13 = J[i + Q * 6]; -- const CeedScalar J23 = J[i + Q * 7]; -- const CeedScalar J33 = J[i + Q * 8]; -- const CeedScalar A11 = J22 * J33 - J23 * J32; -- const CeedScalar A12 = J13 * J32 - J12 * J33; -- const CeedScalar A13 = J12 * J23 - J13 * J22; -- const CeedScalar A21 = J23 * J31 - J21 * J33; -- const CeedScalar A22 = J11 * J33 - J13 * J31; -- const CeedScalar A23 = J13 * J21 - J11 * J23; -- const CeedScalar A31 = J21 * J32 - J22 * J31; -- const CeedScalar A32 = J12 * J31 - J11 * J32; -- const CeedScalar A33 = J11 * J22 - J12 * J21; -- const CeedScalar w = alpha * qw[i]; -- const CeedScalar wx = w * c[i + Q * 0]; -- const CeedScalar wy = w * c[i + Q * 1]; -- const CeedScalar wz = w * c[i + Q * 2]; -- qd[i + Q * 0] = wx * A11 + wy * A12 + wz * A13; -- qd[i + Q * 1] = wx * A21 + wy * A22 + wz * A23; -- qd[i + Q * 2] = wx * A31 + wy * A32 + wz * A33; -+ MultCtAdjJt33(J + i, Q, c + i, Q, qw[i] * alpha, Q, qd + i); - } - break; - } - return 0; - } - --/// libCEED Q-function for applying a conv operator -+/// libCEED QFunction for applying a convection operator - CEED_QFUNCTION(f_apply_conv)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) - { - ConvectionContext *bc = (ConvectionContext *)ctx; -- // in[0], out[0] have shape [dim, nc=1, Q] -+ // in[0] has shape [dim, ncomp=1, Q] -+ // out[0] has shape [ncomp=1, Q] - const CeedScalar *ug = in[0], *qd = in[1]; - CeedScalar *vg = out[0]; -- switch (10*bc->dim + bc->vdim) -+ switch (bc->dim) - { -- case 11: -- for (CeedInt i = 0; i < Q; i++) -+ case 1: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- vg[i] = ug[i] * qd[i]; -+ vg[i] = qd[i] * ug[i]; - } - break; -- case 21: -- for (CeedInt i = 0; i < Q; i++) -+ case 2: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar ug0 = ug[i + Q * 0]; - const CeedScalar ug1 = ug[i + Q * 1]; - vg[i] = qd[i + Q * 0] * ug0 + qd[i + Q * 1] * ug1; - } - break; -- case 22: -- for (CeedInt i = 0; i < Q; i++) -- { -- const CeedScalar qd0 = qd[i + Q * 0]; -- const CeedScalar qd1 = qd[i + Q * 1]; -- for (CeedInt c = 0; c < 2; c++) -- { -- const CeedScalar ug0 = ug[i + Q * (c+2*0)]; -- const CeedScalar ug1 = ug[i + Q * (c+2*1)]; -- vg[i + Q * c] = qd0 * ug0 + qd1 * ug1; -- } -- } -- break; -- case 31: -- for (CeedInt i = 0; i < Q; i++) -+ case 3: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar ug0 = ug[i + Q * 0]; - const CeedScalar ug1 = ug[i + Q * 1]; -@@ -217,313 +161,147 @@ CEED_QFUNCTION(f_apply_conv)(void *ctx, CeedInt Q, - vg[i] = qd[i + Q * 0] * ug0 + qd[i + Q * 1] * ug1 + qd[i + Q * 2] * ug2; - } - break; -- case 33: -- for (CeedInt i = 0; i < Q; i++) -- { -- const CeedScalar qd0 = qd[i + Q * 0]; -- const CeedScalar qd1 = qd[i + Q * 1]; -- const CeedScalar qd2 = qd[i + Q * 2]; -- for (CeedInt c = 0; c < 3; c++) -- { -- const CeedScalar ug0 = ug[i + Q * (c+3*0)]; -- const CeedScalar ug1 = ug[i + Q * (c+3*1)]; -- const CeedScalar ug2 = ug[i + Q * (c+3*2)]; -- vg[i + Q * c] = qd0 * ug0 + qd1 * ug1 + qd2 * ug2; -- } -- } -- break; - } - return 0; - } - --/// libCEED Q-function for applying a conv operator -+/// libCEED QFunction for applying a convection operator with a constant -+/// coefficient - CEED_QFUNCTION(f_apply_conv_mf_const)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) - { -- ConvectionContext *bc = (ConvectionContext*)ctx; -- // in[0], out[0] have shape [dim, nc=1, Q] -- // in[1] is Jacobians with shape [dim, nc=dim, Q] -+ ConvectionContext *bc = (ConvectionContext *)ctx; -+ // in[0] has shape [dim, ncomp=1, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] - // in[2] is quadrature weights, size (Q) -+ // out[0] has shape [ncomp=1, Q] - // -- // At every quadrature point, compute qw * adj(J). -- const CeedScalar coeff0 = bc->coeff[0]; -- const CeedScalar coeff1 = bc->coeff[1]; -- const CeedScalar coeff2 = bc->coeff[2]; -+ // At every quadrature point, compute qw * α * c^T adj(J)^T - const CeedScalar alpha = bc->alpha; -+ const CeedScalar *coeff = bc->coeff; - const CeedScalar *ug = in[0], *J = in[1], *qw = in[2]; - CeedScalar *vg = out[0]; -- switch (10 * bc->dim + bc->vdim) -+ switch (10 * bc->space_dim + bc->dim) - { - case 11: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- const CeedScalar qd = alpha * coeff0 * qw[i] * J[i]; -- vg[i] = ug[i] * qd; -+ const CeedScalar coeff0 = coeff[0]; -+ const CeedScalar qd = qw[i] * alpha * coeff0 * J[i]; -+ vg[i] = qd * ug[i]; - } - break; - case 21: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- // J: 0 2 qd: 0 1 adj(J): J22 -J12 -- // 1 3 1 2 -J21 J11 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J12 = J[i + Q * 2]; -- const CeedScalar J22 = J[i + Q * 3]; -- const CeedScalar w = alpha * qw[i]; -- const CeedScalar wx = w * coeff0; -- const CeedScalar wy = w * coeff1; -- const CeedScalar qd0 = wx * J22 - wy * J12; -- const CeedScalar qd1 = -wx * J21 + wy * J11; -- const CeedScalar ug0 = ug[i + Q * 0]; -- const CeedScalar ug1 = ug[i + Q * 1]; -- vg[i] = qd0 * ug0 + qd1 * ug1; -+ CeedScalar qd; -+ MultCtAdjJt21(J + i, Q, coeff, 1, qw[i] * alpha, 1, &qd); -+ vg[i] = qd * ug[i]; - } - break; - case 22: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- // J: 0 2 qd: 0 1 adj(J): J22 -J12 -- // 1 3 1 2 -J21 J11 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J12 = J[i + Q * 2]; -- const CeedScalar J22 = J[i + Q * 3]; -- const CeedScalar w = alpha * qw[i]; -- const CeedScalar wx = w * coeff0; -- const CeedScalar wy = w * coeff1; -- const CeedScalar qd0 = wx * J22 - wy * J12; -- const CeedScalar qd1 = -wx * J21 + wy * J11; -- for (CeedInt c = 0; c < 2; c++) -- { -- const CeedScalar ug0 = ug[i + Q * (c+2*0)]; -- const CeedScalar ug1 = ug[i + Q * (c+2*1)]; -- vg[i + Q * c] = qd0 * ug0 + qd1 * ug1; -- } -+ CeedScalar qd[2]; -+ MultCtAdjJt22(J + i, Q, coeff, 1, qw[i] * alpha, 1, qd); -+ const CeedScalar ug0 = ug[i + Q * 0]; -+ const CeedScalar ug1 = ug[i + Q * 1]; -+ vg[i] = qd[0] * ug0 + qd[1] * ug1; - } - break; -- case 31: -- for (CeedInt i = 0; i < Q; i++) -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- // J: 0 3 6 qd: 0 1 2 -- // 1 4 7 1 3 4 -- // 2 5 8 2 4 5 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J31 = J[i + Q * 2]; -- const CeedScalar J12 = J[i + Q * 3]; -- const CeedScalar J22 = J[i + Q * 4]; -- const CeedScalar J32 = J[i + Q * 5]; -- const CeedScalar J13 = J[i + Q * 6]; -- const CeedScalar J23 = J[i + Q * 7]; -- const CeedScalar J33 = J[i + Q * 8]; -- const CeedScalar A11 = J22 * J33 - J23 * J32; -- const CeedScalar A12 = J13 * J32 - J12 * J33; -- const CeedScalar A13 = J12 * J23 - J13 * J22; -- const CeedScalar A21 = J23 * J31 - J21 * J33; -- const CeedScalar A22 = J11 * J33 - J13 * J31; -- const CeedScalar A23 = J13 * J21 - J11 * J23; -- const CeedScalar A31 = J21 * J32 - J22 * J31; -- const CeedScalar A32 = J12 * J31 - J11 * J32; -- const CeedScalar A33 = J11 * J22 - J12 * J21; -- const CeedScalar w = alpha * qw[i]; -- const CeedScalar wx = w * coeff0; -- const CeedScalar wy = w * coeff1; -- const CeedScalar wz = w * coeff2; -- const CeedScalar qd0 = wx * A11 + wy * A12 + wz * A13; -- const CeedScalar qd1 = wx * A21 + wy * A22 + wz * A23; -- const CeedScalar qd2 = wx * A31 + wy * A32 + wz * A33; -+ CeedScalar qd[2]; -+ MultCtAdjJt32(J + i, Q, coeff, 1, qw[i] * alpha, 1, qd); - const CeedScalar ug0 = ug[i + Q * 0]; - const CeedScalar ug1 = ug[i + Q * 1]; -- const CeedScalar ug2 = ug[i + Q * 2]; -- vg[i] = qd0 * ug0 + qd1 * ug1 + qd2 * ug2; -+ vg[i] = qd[0] * ug0 + qd[1] * ug1; - } - break; - case 33: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- // J: 0 3 6 qd: 0 1 2 -- // 1 4 7 1 3 4 -- // 2 5 8 2 4 5 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J31 = J[i + Q * 2]; -- const CeedScalar J12 = J[i + Q * 3]; -- const CeedScalar J22 = J[i + Q * 4]; -- const CeedScalar J32 = J[i + Q * 5]; -- const CeedScalar J13 = J[i + Q * 6]; -- const CeedScalar J23 = J[i + Q * 7]; -- const CeedScalar J33 = J[i + Q * 8]; -- const CeedScalar A11 = J22 * J33 - J23 * J32; -- const CeedScalar A12 = J13 * J32 - J12 * J33; -- const CeedScalar A13 = J12 * J23 - J13 * J22; -- const CeedScalar A21 = J23 * J31 - J21 * J33; -- const CeedScalar A22 = J11 * J33 - J13 * J31; -- const CeedScalar A23 = J13 * J21 - J11 * J23; -- const CeedScalar A31 = J21 * J32 - J22 * J31; -- const CeedScalar A32 = J12 * J31 - J11 * J32; -- const CeedScalar A33 = J11 * J22 - J12 * J21; -- const CeedScalar w = alpha * qw[i]; -- const CeedScalar wx = w * coeff0; -- const CeedScalar wy = w * coeff1; -- const CeedScalar wz = w * coeff2; -- const CeedScalar qd0 = wx * A11 + wy * A12 + wz * A13; -- const CeedScalar qd1 = wx * A21 + wy * A22 + wz * A23; -- const CeedScalar qd2 = wx * A31 + wy * A32 + wz * A33; -- for (CeedInt c = 0; c < 3; c++) -- { -- const CeedScalar ug0 = ug[i + Q * (c+3*0)]; -- const CeedScalar ug1 = ug[i + Q * (c+3*1)]; -- const CeedScalar ug2 = ug[i + Q * (c+3*2)]; -- vg[i + Q * c] = qd0 * ug0 + qd1 * ug1 + qd2 * ug2; -- } -+ CeedScalar qd[3]; -+ MultCtAdjJt33(J + i, Q, coeff, 1, qw[i] * alpha, 1, qd); -+ const CeedScalar ug0 = ug[i + Q * 0]; -+ const CeedScalar ug1 = ug[i + Q * 1]; -+ const CeedScalar ug2 = ug[i + Q * 2]; -+ vg[i] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; - } - break; - } - return 0; - } - -+/// libCEED QFunction for applying a convection operator with a coefficient -+/// evaluated at quadrature points - CEED_QFUNCTION(f_apply_conv_mf_quad)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) - { -- ConvectionContext *bc = (ConvectionContext*)ctx; -- // in[0], out[0] have shape [dim, nc=1, Q] -- // in[1] is Jacobians with shape [dim, nc=dim, Q] -- // in[2] is quadrature weights, size (Q) -+ ConvectionContext *bc = (ConvectionContext *)ctx; -+ // in[0] has shape [dim, ncomp=1, Q] -+ // in[1] is coefficients with shape [ncomp=space_dim, Q] -+ // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[3] is quadrature weights, size (Q) -+ // out[0] has shape [ncomp=1, Q] - // -- // At every quadrature point, compute qw * adj(J). -- const CeedScalar *c = in[0], *ug = in[1], *J = in[2], *qw = in[3]; -+ // At every quadrature point, compute qw * α * c^T adj(J)^T - const CeedScalar alpha = bc->alpha; -+ const CeedScalar *ug = in[0], *c = in[1], *J = in[2], *qw = in[3]; - CeedScalar *vg = out[0]; -- switch (10 * bc->dim + bc->vdim) -+ switch (10 * bc->space_dim + bc->dim) - { - case 11: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- const CeedScalar qd = alpha * c[i] * qw[i] * J[i]; -- vg[i] = ug[i] * qd; -+ const CeedScalar qd = qw[i] * alpha * c[i] * J[i]; -+ vg[i] = qd * ug[i]; - } - break; - case 21: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- // J: 0 2 qd: 0 1 adj(J): J22 -J12 -- // 1 3 1 2 -J21 J11 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J12 = J[i + Q * 2]; -- const CeedScalar J22 = J[i + Q * 3]; -- const CeedScalar w = alpha * qw[i]; -- const CeedScalar wx = w * c[i + Q * 0]; -- const CeedScalar wy = w * c[i + Q * 1]; -- const CeedScalar qd0 = wx * J22 - wy * J12; -- const CeedScalar qd1 = -wx * J21 + wy * J11; -- const CeedScalar ug0 = ug[i + Q * 0]; -- const CeedScalar ug1 = ug[i + Q * 1]; -- vg[i] = qd0 * ug0 + qd1 * ug1; -+ CeedScalar qd; -+ MultCtAdjJt21(J + i, Q, c + i, Q, qw[i] * alpha, 1, &qd); -+ vg[i] = qd * ug[i]; - } - break; - case 22: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- // J: 0 2 qd: 0 1 adj(J): J22 -J12 -- // 1 3 1 2 -J21 J11 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J12 = J[i + Q * 2]; -- const CeedScalar J22 = J[i + Q * 3]; -- const CeedScalar w = alpha * qw[i]; -- const CeedScalar wx = w * c[i + Q * 0]; -- const CeedScalar wy = w * c[i + Q * 1]; -- const CeedScalar qd0 = wx * J22 - wy * J12; -- const CeedScalar qd1 = -wx * J21 + wy * J11; -- for (CeedInt d = 0; d < 2; d++) -- { -- const CeedScalar ug0 = ug[i + Q * (d+2*0)]; -- const CeedScalar ug1 = ug[i + Q * (d+2*1)]; -- vg[i + Q * d] = qd0 * ug0 + qd1 * ug1; -- } -+ CeedScalar qd[2]; -+ MultCtAdjJt22(J + i, Q, c + i, Q, qw[i] * alpha, 1, qd); -+ const CeedScalar ug0 = ug[i + Q * 0]; -+ const CeedScalar ug1 = ug[i + Q * 1]; -+ vg[i] = qd[0] * ug0 + qd[1] * ug1; - } - break; -- case 31: -- for (CeedInt i = 0; i < Q; i++) -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- // J: 0 3 6 qd: 0 1 2 -- // 1 4 7 1 3 4 -- // 2 5 8 2 4 5 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J31 = J[i + Q * 2]; -- const CeedScalar J12 = J[i + Q * 3]; -- const CeedScalar J22 = J[i + Q * 4]; -- const CeedScalar J32 = J[i + Q * 5]; -- const CeedScalar J13 = J[i + Q * 6]; -- const CeedScalar J23 = J[i + Q * 7]; -- const CeedScalar J33 = J[i + Q * 8]; -- const CeedScalar A11 = J22 * J33 - J23 * J32; -- const CeedScalar A12 = J13 * J32 - J12 * J33; -- const CeedScalar A13 = J12 * J23 - J13 * J22; -- const CeedScalar A21 = J23 * J31 - J21 * J33; -- const CeedScalar A22 = J11 * J33 - J13 * J31; -- const CeedScalar A23 = J13 * J21 - J11 * J23; -- const CeedScalar A31 = J21 * J32 - J22 * J31; -- const CeedScalar A32 = J12 * J31 - J11 * J32; -- const CeedScalar A33 = J11 * J22 - J12 * J21; -- const CeedScalar w = alpha * qw[i]; -- const CeedScalar wx = w * c[i + Q * 0]; -- const CeedScalar wy = w * c[i + Q * 1]; -- const CeedScalar wz = w * c[i + Q * 2]; -- const CeedScalar qd0 = wx * A11 + wy * A12 + wz * A13; -- const CeedScalar qd1 = wx * A21 + wy * A22 + wz * A23; -- const CeedScalar qd2 = wx * A31 + wy * A32 + wz * A33; -+ CeedScalar qd[2]; -+ MultCtAdjJt32(J + i, Q, c + i, Q, qw[i] * alpha, 1, qd); - const CeedScalar ug0 = ug[i + Q * 0]; - const CeedScalar ug1 = ug[i + Q * 1]; -- const CeedScalar ug2 = ug[i + Q * 2]; -- vg[i] = qd0 * ug0 + qd1 * ug1 + qd2 * ug2; -+ vg[i] = qd[0] * ug0 + qd[1] * ug1; - } - break; - case 33: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- // J: 0 3 6 qd: 0 1 2 -- // 1 4 7 1 3 4 -- // 2 5 8 2 4 5 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J31 = J[i + Q * 2]; -- const CeedScalar J12 = J[i + Q * 3]; -- const CeedScalar J22 = J[i + Q * 4]; -- const CeedScalar J32 = J[i + Q * 5]; -- const CeedScalar J13 = J[i + Q * 6]; -- const CeedScalar J23 = J[i + Q * 7]; -- const CeedScalar J33 = J[i + Q * 8]; -- const CeedScalar A11 = J22 * J33 - J23 * J32; -- const CeedScalar A12 = J13 * J32 - J12 * J33; -- const CeedScalar A13 = J12 * J23 - J13 * J22; -- const CeedScalar A21 = J23 * J31 - J21 * J33; -- const CeedScalar A22 = J11 * J33 - J13 * J31; -- const CeedScalar A23 = J13 * J21 - J11 * J23; -- const CeedScalar A31 = J21 * J32 - J22 * J31; -- const CeedScalar A32 = J12 * J31 - J11 * J32; -- const CeedScalar A33 = J11 * J22 - J12 * J21; -- const CeedScalar w = alpha * qw[i]; -- const CeedScalar wx = w * c[i + Q * 0]; -- const CeedScalar wy = w * c[i + Q * 1]; -- const CeedScalar wz = w * c[i + Q * 2]; -- const CeedScalar qd0 = wx * A11 + wy * A12 + wz * A13; -- const CeedScalar qd1 = wx * A21 + wy * A22 + wz * A23; -- const CeedScalar qd2 = wx * A31 + wy * A32 + wz * A33; -- for (CeedInt d = 0; d < 3; d++) -- { -- const CeedScalar ug0 = ug[i + Q * (d+3*0)]; -- const CeedScalar ug1 = ug[i + Q * (d+3*1)]; -- const CeedScalar ug2 = ug[i + Q * (d+3*2)]; -- vg[i + Q * d] = qd0 * ug0 + qd1 * ug1 + qd2 * ug2; -- } -+ CeedScalar qd[3]; -+ MultCtAdjJt33(J + i, Q, c + i, Q, qw[i] * alpha, 1, qd); -+ const CeedScalar ug0 = ug[i + Q * 0]; -+ const CeedScalar ug1 = ug[i + Q * 1]; -+ const CeedScalar ug2 = ug[i + Q * 2]; -+ vg[i] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; - } - break; - } - return 0; - } -+ -+#endif // MFEM_LIBCEED_CONV_QF_H -diff --git a/fem/ceed/integrators/curlcurl/curlcurl.cpp b/fem/ceed/integrators/curlcurl/curlcurl.cpp -new file mode 100644 -index 000000000..ace7c18e4 ---- /dev/null -+++ b/fem/ceed/integrators/curlcurl/curlcurl.cpp -@@ -0,0 +1,244 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#include "curlcurl.hpp" -+ -+#include "../../../../config/config.hpp" -+#ifdef MFEM_USE_CEED -+#include "curlcurl_qf.h" -+#endif -+ -+namespace mfem -+{ -+ -+namespace ceed -+{ -+ -+#ifdef MFEM_USE_CEED -+struct CurlCurlOperatorInfo : public OperatorInfo -+{ -+ CurlCurlContext ctx = {0}; -+ template -+ CurlCurlOperatorInfo(const mfem::FiniteElementSpace &fes, CoeffType *Q, -+ bool use_bdr = false, bool use_mf = false) -+ { -+ MFEM_VERIFY(fes.GetVDim() == 1, -+ "libCEED interface for vector FE does not support vdim > 1!"); -+ ctx.dim = fes.GetMesh()->Dimension() - use_bdr; -+ MFEM_VERIFY(ctx.dim == 2 || ctx.dim == 3, -+ "CurlCurlIntegrator requires dim == 2 or dim == 3!"); -+ ctx.space_dim = fes.GetMesh()->SpaceDimension(); -+ ctx.curl_dim = (ctx.dim < 3) ? 1 : ctx.dim; -+ if (!use_mf) -+ { -+ apply_func = ":f_apply_curlcurl"; -+ apply_qf = &f_apply_curlcurl; -+ } -+ else -+ { -+ build_func = ""; -+ build_qf = nullptr; -+ } -+ if (Q == nullptr) -+ { -+ ctx.coeff[0] = 1.0; -+ if (!use_mf) -+ { -+ build_func = ":f_build_curlcurl_const_scalar"; -+ build_qf = &f_build_curlcurl_const_scalar; -+ } -+ else -+ { -+ apply_func = ":f_apply_curlcurl_mf_const_scalar"; -+ apply_qf = &f_apply_curlcurl_mf_const_scalar; -+ } -+ } -+ else -+ { -+ InitCoefficient(*Q, use_mf); -+ } -+ header = "/integrators/curlcurl/curlcurl_qf.h"; -+ trial_op = EvalMode::Curl; -+ test_op = EvalMode::Curl; -+ qdatasize = (ctx.curl_dim * (ctx.curl_dim + 1)) / 2; -+ } -+ void InitCoefficient(mfem::Coefficient &Q, bool use_mf) -+ { -+ if (mfem::ConstantCoefficient *const_coeff = -+ dynamic_cast(&Q)) -+ { -+ ctx.coeff[0] = const_coeff->constant; -+ if (!use_mf) -+ { -+ build_func = ":f_build_curlcurl_const_scalar"; -+ build_qf = &f_build_curlcurl_const_scalar; -+ } -+ else -+ { -+ apply_func = ":f_apply_curlcurl_mf_const_scalar"; -+ apply_qf = &f_apply_curlcurl_mf_const_scalar; -+ } -+ } -+ else -+ { -+ if (!use_mf) -+ { -+ build_func = ":f_build_curlcurl_quad_scalar"; -+ build_qf = &f_build_curlcurl_quad_scalar; -+ } -+ else -+ { -+ apply_func = ":f_apply_curlcurl_mf_quad_scalar"; -+ apply_qf = &f_apply_curlcurl_mf_quad_scalar; -+ } -+ } -+ } -+ void InitCoefficient(mfem::VectorCoefficient &VQ, bool use_mf) -+ { -+ if (mfem::VectorConstantCoefficient *const_coeff = -+ dynamic_cast(&VQ)) -+ { -+ const int vdim = VQ.GetVDim(); -+ MFEM_VERIFY(vdim <= LIBCEED_CURLCURL_COEFF_COMP_MAX, -+ "VectorCoefficient dimension exceeds context storage!"); -+ const mfem::Vector &val = const_coeff->GetVec(); -+ for (int i = 0; i < vdim; i++) -+ { -+ ctx.coeff[i] = val[i]; -+ } -+ if (!use_mf) -+ { -+ build_func = ":f_build_curlcurl_const_vector"; -+ build_qf = &f_build_curlcurl_const_vector; -+ } -+ else -+ { -+ apply_func = ":f_apply_curlcurl_mf_const_vector"; -+ apply_qf = &f_apply_curlcurl_mf_const_vector; -+ } -+ } -+ else -+ { -+ if (!use_mf) -+ { -+ build_func = ":f_build_curlcurl_quad_vector"; -+ build_qf = &f_build_curlcurl_quad_vector; -+ } -+ else -+ { -+ apply_func = ":f_apply_curlcurl_mf_quad_vector"; -+ apply_qf = &f_apply_curlcurl_mf_quad_vector; -+ } -+ } -+ } -+ void InitCoefficient(mfem::MatrixCoefficient &MQ, bool use_mf) -+ { -+ // Assumes matrix coefficient is symmetric -+ if (mfem::MatrixConstantCoefficient *const_coeff = -+ dynamic_cast(&MQ)) -+ { -+ const int vdim = MQ.GetVDim(); -+ MFEM_VERIFY((vdim * (vdim + 1)) / 2 <= LIBCEED_CURLCURL_COEFF_COMP_MAX, -+ "MatrixCoefficient dimensions exceed context storage!"); -+ const mfem::DenseMatrix &val = const_coeff->GetMatrix(); -+ for (int j = 0; j < vdim; j++) -+ { -+ for (int i = j; i < vdim; i++) -+ { -+ const int idx = (j * vdim) - (((j - 1) * j) / 2) + i - j; -+ ctx.coeff[idx] = val(i, j); -+ } -+ } -+ if (!use_mf) -+ { -+ build_func = ":f_build_curlcurl_const_matrix"; -+ build_qf = &f_build_curlcurl_const_matrix; -+ } -+ else -+ { -+ apply_func = ":f_apply_curlcurl_mf_const_matrix"; -+ apply_qf = &f_apply_curlcurl_mf_const_matrix; -+ } -+ } -+ else -+ { -+ if (!use_mf) -+ { -+ build_func = ":f_build_curlcurl_quad_matrix"; -+ build_qf = &f_build_curlcurl_quad_matrix; -+ } -+ else -+ { -+ apply_func = ":f_apply_curlcurl_mf_quad_matrix"; -+ apply_qf = &f_apply_curlcurl_mf_quad_matrix; -+ } -+ } -+ } -+}; -+#endif -+ -+template -+PACurlCurlIntegrator::PACurlCurlIntegrator( -+ const mfem::CurlCurlIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ CoeffType *Q, -+ const bool use_bdr) -+{ -+#ifdef MFEM_USE_CEED -+ CurlCurlOperatorInfo info(fes, Q, use_bdr); -+ Assemble(integ, info, fes, Q, use_bdr); -+#else -+ MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); -+#endif -+} -+ -+template -+MFCurlCurlIntegrator::MFCurlCurlIntegrator( -+ const mfem::CurlCurlIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ CoeffType *Q, -+ const bool use_bdr) -+{ -+#ifdef MFEM_USE_CEED -+ CurlCurlOperatorInfo info(fes, Q, use_bdr, true); -+ Assemble(integ, info, fes, Q, use_bdr, true); -+#else -+ MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); -+#endif -+} -+ -+// @cond DOXYGEN_SKIP -+ -+template PACurlCurlIntegrator::PACurlCurlIntegrator( -+ const mfem::CurlCurlIntegrator &, const mfem::FiniteElementSpace &, -+ mfem::Coefficient *, const bool); -+template PACurlCurlIntegrator::PACurlCurlIntegrator( -+ const mfem::CurlCurlIntegrator &, const mfem::FiniteElementSpace &, -+ mfem::VectorCoefficient *, const bool); -+template PACurlCurlIntegrator::PACurlCurlIntegrator( -+ const mfem::CurlCurlIntegrator &, const mfem::FiniteElementSpace &, -+ mfem::MatrixCoefficient *, const bool); -+ -+template MFCurlCurlIntegrator::MFCurlCurlIntegrator( -+ const mfem::CurlCurlIntegrator &, const mfem::FiniteElementSpace &, -+ mfem::Coefficient *, const bool); -+template MFCurlCurlIntegrator::MFCurlCurlIntegrator( -+ const mfem::CurlCurlIntegrator &, const mfem::FiniteElementSpace &, -+ mfem::VectorCoefficient *, const bool); -+template MFCurlCurlIntegrator::MFCurlCurlIntegrator( -+ const mfem::CurlCurlIntegrator &, const mfem::FiniteElementSpace &, -+ mfem::MatrixCoefficient *, const bool); -+ -+// @endcond -+ -+} // namespace ceed -+ -+} // namespace mfem -diff --git a/fem/ceed/integrators/curlcurl/curlcurl.hpp b/fem/ceed/integrators/curlcurl/curlcurl.hpp -new file mode 100644 -index 000000000..71d62c915 ---- /dev/null -+++ b/fem/ceed/integrators/curlcurl/curlcurl.hpp -@@ -0,0 +1,51 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#ifndef MFEM_LIBCEED_CURLCURL_HPP -+#define MFEM_LIBCEED_CURLCURL_HPP -+ -+#include "../../interface/integrator.hpp" -+#include "../../interface/mixed_operator.hpp" -+#include "../../../fespace.hpp" -+ -+namespace mfem -+{ -+ -+namespace ceed -+{ -+ -+/// Represent a CurlCurlIntegrator with AssemblyLevel::Partial using libCEED. -+class PACurlCurlIntegrator : public MixedOperator -+{ -+public: -+ template -+ PACurlCurlIntegrator(const mfem::CurlCurlIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ CoeffType *Q, -+ const bool use_bdr = false); -+}; -+ -+/// Represent a CurlCurlIntegrator with AssemblyLevel::None using libCEED. -+class MFCurlCurlIntegrator : public MixedOperator -+{ -+public: -+ template -+ MFCurlCurlIntegrator(const mfem::CurlCurlIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ CoeffType *Q, -+ const bool use_bdr = false); -+}; -+ -+} -+ -+} -+ -+#endif // MFEM_LIBCEED_CURLCURL_HPP -diff --git a/fem/ceed/integrators/curlcurl/curlcurl_qf.h b/fem/ceed/integrators/curlcurl/curlcurl_qf.h -new file mode 100644 -index 000000000..6fbace54b ---- /dev/null -+++ b/fem/ceed/integrators/curlcurl/curlcurl_qf.h -@@ -0,0 +1,479 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#ifndef MFEM_LIBCEED_CURLCURL_QF_H -+#define MFEM_LIBCEED_CURLCURL_QF_H -+ -+#include "../util/util_qf.h" -+ -+#define LIBCEED_CURLCURL_COEFF_COMP_MAX 6 -+ -+struct CurlCurlContext -+{ -+ CeedInt dim, space_dim, curl_dim; -+ CeedScalar coeff[LIBCEED_CURLCURL_COEFF_COMP_MAX]; -+}; -+ -+/// libCEED QFunction for building quadrature data for a curl-curl operator -+/// with a scalar constant coefficient -+CEED_QFUNCTION(f_build_curlcurl_const_scalar)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ CurlCurlContext *bc = (CurlCurlContext *)ctx; -+ // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[1] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) J^T C J and store the -+ // symmetric part of the result. In 2D, compute and store qw * c / det(J) -+ const CeedScalar *coeff = bc->coeff; -+ const CeedScalar *J = in[0], *qw = in[1]; -+ CeedScalar *qd = out[0]; -+ switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) -+ { -+ case 221: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar coeff0 = coeff[0]; -+ qd[i] = qw[i] * coeff0 / DetJ22(J + i, Q); -+ } -+ break; -+ case 321: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar coeff0 = coeff[0]; -+ qd[i] = qw[i] * coeff0 / DetJ32(J + i, Q); -+ } -+ break; -+ case 333: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ33(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for building quadrature data for a curl-curl operator -+/// with a vector constant coefficient -+CEED_QFUNCTION(f_build_curlcurl_const_vector)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ CurlCurlContext *bc = (CurlCurlContext *)ctx; -+ // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[1] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) J^T C J and store the -+ // symmetric part of the result. In 2D, compute and store qw * c / det(J) -+ const CeedScalar *coeff = bc->coeff; -+ const CeedScalar *J = in[0], *qw = in[1]; -+ CeedScalar *qd = out[0]; -+ switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) -+ { -+ case 333: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ33(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for building quadrature data for a curl-curl operator -+/// with a matrix constant coefficient -+CEED_QFUNCTION(f_build_curlcurl_const_matrix)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ CurlCurlContext *bc = (CurlCurlContext *)ctx; -+ // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[1] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) J^T C J and store the -+ // symmetric part of the result. In 2D, compute and store qw * c / det(J) -+ const CeedScalar *coeff = bc->coeff; -+ const CeedScalar *J = in[0], *qw = in[1]; -+ CeedScalar *qd = out[0]; -+ switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) -+ { -+ case 333: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ33(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for building quadrature data for a curl-curl operator -+/// with a scalar coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_build_curlcurl_quad_scalar)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ CurlCurlContext *bc = (CurlCurlContext *)ctx; -+ // in[0] is coefficients with shape [ncomp=1, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) J^T C J and store the -+ // symmetric part of the result. In 2D, compute and store qw * c / det(J) -+ const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *qd = out[0]; -+ switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) -+ { -+ case 221: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ qd[i] = qw[i] * c[i] / DetJ22(J + i, Q); -+ } -+ break; -+ case 321: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ qd[i] = qw[i] * c[i] / DetJ32(J + i, Q); -+ } -+ break; -+ case 333: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for building quadrature data for a curl-curl operator -+/// with a vector coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_build_curlcurl_quad_vector)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ CurlCurlContext *bc = (CurlCurlContext *)ctx; -+ // in[0] is coefficients with shape [ncomp=space_dim, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) J^T C J and store the -+ // symmetric part of the result. In 2D, compute and store qw * c / det(J) -+ const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *qd = out[0]; -+ switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) -+ { -+ case 333: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for building quadrature data for a curl-curl operator -+/// with a matrix coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_build_curlcurl_quad_matrix)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ CurlCurlContext *bc = (CurlCurlContext *)ctx; -+ // in[0] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) J^T C J and store the -+ // symmetric part of the result. In 2D, compute and store qw * c / det(J) -+ const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *qd = out[0]; -+ switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) -+ { -+ case 333: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying a curl-curl operator -+CEED_QFUNCTION(f_apply_curlcurl)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ CurlCurlContext *bc = (CurlCurlContext *)ctx; -+ // in[0], out[0] have shape [curl_dim, ncomp=1, Q] -+ const CeedScalar *uc = in[0], *qd = in[1]; -+ CeedScalar *vc = out[0]; -+ switch (10 * bc->dim + bc->curl_dim) -+ { -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ vc[i] = qd[i] * uc[i]; -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar uc0 = uc[i + Q * 0]; -+ const CeedScalar uc1 = uc[i + Q * 1]; -+ const CeedScalar uc2 = uc[i + Q * 2]; -+ vc[i + Q * 0] = qd[i + Q * 0] * uc0 + qd[i + Q * 1] * uc1 + qd[i + Q * 2] * uc2; -+ vc[i + Q * 1] = qd[i + Q * 1] * uc0 + qd[i + Q * 3] * uc1 + qd[i + Q * 4] * uc2; -+ vc[i + Q * 2] = qd[i + Q * 2] * uc0 + qd[i + Q * 4] * uc1 + qd[i + Q * 5] * uc2; -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying a curl-curl operator with a scalar constant -+/// coefficient -+CEED_QFUNCTION(f_apply_curlcurl_mf_const_scalar)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ CurlCurlContext *bc = (CurlCurlContext *)ctx; -+ // in[0], out[0] have shape [curl_dim, ncomp=1, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) J^T C J -+ const CeedScalar *coeff = bc->coeff; -+ const CeedScalar *uc = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *vc = out[0]; -+ switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) -+ { -+ case 221: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar coeff0 = coeff[0]; -+ const CeedScalar qd = qw[i] * coeff0 / DetJ22(J + i, Q); -+ vc[i] = qd * uc[i]; -+ } -+ break; -+ case 321: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar coeff0 = coeff[0]; -+ const CeedScalar qd = qw[i] * coeff0 / DetJ32(J + i, Q); -+ vc[i] = qd * uc[i]; -+ } -+ break; -+ case 333: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultJtCJ33(J + i, Q, coeff, 1, 1, qw[i], 1, qd); -+ const CeedScalar uc0 = uc[i + Q * 0]; -+ const CeedScalar uc1 = uc[i + Q * 1]; -+ const CeedScalar uc2 = uc[i + Q * 2]; -+ vc[i + Q * 0] = qd[0] * uc0 + qd[1] * uc1 + qd[2] * uc2; -+ vc[i + Q * 1] = qd[1] * uc0 + qd[3] * uc1 + qd[4] * uc2; -+ vc[i + Q * 2] = qd[2] * uc0 + qd[4] * uc1 + qd[5] * uc2; -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying a curl-curl operator with a vector constant -+/// coefficient -+CEED_QFUNCTION(f_apply_curlcurl_mf_const_vector)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ CurlCurlContext *bc = (CurlCurlContext *)ctx; -+ // in[0], out[0] have shape [curl_dim, ncomp=1, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) J^T C J -+ const CeedScalar *coeff = bc->coeff; -+ const CeedScalar *uc = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *vc = out[0]; -+ switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) -+ { -+ case 333: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultJtCJ33(J + i, Q, coeff, 1, 3, qw[i], 1, qd); -+ const CeedScalar uc0 = uc[i + Q * 0]; -+ const CeedScalar uc1 = uc[i + Q * 1]; -+ const CeedScalar uc2 = uc[i + Q * 2]; -+ vc[i + Q * 0] = qd[0] * uc0 + qd[1] * uc1 + qd[2] * uc2; -+ vc[i + Q * 1] = qd[1] * uc0 + qd[3] * uc1 + qd[4] * uc2; -+ vc[i + Q * 2] = qd[2] * uc0 + qd[4] * uc1 + qd[5] * uc2; -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying a curl-curl operator with a matrix constant -+/// coefficient -+CEED_QFUNCTION(f_apply_curlcurl_mf_const_matrix)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ CurlCurlContext *bc = (CurlCurlContext *)ctx; -+ // in[0], out[0] have shape [curl_dim, ncomp=1, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) J^T C J -+ const CeedScalar *coeff = bc->coeff; -+ const CeedScalar *uc = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *vc = out[0]; -+ switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) -+ { -+ case 333: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultJtCJ33(J + i, Q, coeff, 1, 6, qw[i], 1, qd); -+ const CeedScalar uc0 = uc[i + Q * 0]; -+ const CeedScalar uc1 = uc[i + Q * 1]; -+ const CeedScalar uc2 = uc[i + Q * 2]; -+ vc[i + Q * 0] = qd[0] * uc0 + qd[1] * uc1 + qd[2] * uc2; -+ vc[i + Q * 1] = qd[1] * uc0 + qd[3] * uc1 + qd[4] * uc2; -+ vc[i + Q * 2] = qd[2] * uc0 + qd[4] * uc1 + qd[5] * uc2; -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying a curl-curl operator with a scalar -+/// coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_apply_curlcurl_mf_quad_scalar)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ CurlCurlContext *bc = (CurlCurlContext *)ctx; -+ // in[0], out[0] have shape [curl_dim, ncomp=1, Q] -+ // in[1] is coefficients with shape [ncomp=1, Q] -+ // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[3] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) J^T C J -+ const CeedScalar *uc = in[0], *c = in[1], *J = in[2], *qw = in[3]; -+ CeedScalar *vc = out[0]; -+ switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) -+ { -+ case 221: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar qd = qw[i] * c[i] / DetJ22(J + i, Q); -+ vc[i] = qd * uc[i]; -+ } -+ break; -+ case 321: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar qd = qw[i] * c[i] / DetJ32(J + i, Q); -+ vc[i] = qd * uc[i]; -+ } -+ break; -+ case 333: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultJtCJ33(J + i, Q, c + i, Q, 1, qw[i], 1, qd); -+ const CeedScalar uc0 = uc[i + Q * 0]; -+ const CeedScalar uc1 = uc[i + Q * 1]; -+ const CeedScalar uc2 = uc[i + Q * 2]; -+ vc[i + Q * 0] = qd[0] * uc0 + qd[1] * uc1 + qd[2] * uc2; -+ vc[i + Q * 1] = qd[1] * uc0 + qd[3] * uc1 + qd[4] * uc2; -+ vc[i + Q * 2] = qd[2] * uc0 + qd[4] * uc1 + qd[5] * uc2; -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying a curl-curl operator with a vector -+/// coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_apply_curlcurl_mf_quad_vector)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ CurlCurlContext *bc = (CurlCurlContext *)ctx; -+ // in[0], out[0] have shape [curl_dim, ncomp=1, Q] -+ // in[1] is coefficients with shape [ncomp=space_dim, Q] -+ // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[3] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) J^T C J -+ const CeedScalar *uc = in[0], *c = in[1], *J = in[2], *qw = in[3]; -+ CeedScalar *vc = out[0]; -+ switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) -+ { -+ case 333: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultJtCJ33(J + i, Q, c + i, Q, 3, qw[i], 1, qd); -+ const CeedScalar uc0 = uc[i + Q * 0]; -+ const CeedScalar uc1 = uc[i + Q * 1]; -+ const CeedScalar uc2 = uc[i + Q * 2]; -+ vc[i + Q * 0] = qd[0] * uc0 + qd[1] * uc1 + qd[2] * uc2; -+ vc[i + Q * 1] = qd[1] * uc0 + qd[3] * uc1 + qd[4] * uc2; -+ vc[i + Q * 2] = qd[2] * uc0 + qd[4] * uc1 + qd[5] * uc2; -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying a curl-curl operator with a matrix -+/// coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_apply_curlcurl_mf_quad_matrix)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ CurlCurlContext *bc = (CurlCurlContext *)ctx; -+ // in[0], out[0] have shape [curl_dim, ncomp=1, Q] -+ // in[1] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q] -+ // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[3] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) J^T C J -+ const CeedScalar *uc = in[0], *c = in[1], *J = in[2], *qw = in[3]; -+ CeedScalar *vc = out[0]; -+ switch (100 * bc->space_dim + 10 * bc->dim + bc->curl_dim) -+ { -+ case 333: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultJtCJ33(J + i, Q, c + i, Q, 6, qw[i], 1, qd); -+ const CeedScalar uc0 = uc[i + Q * 0]; -+ const CeedScalar uc1 = uc[i + Q * 1]; -+ const CeedScalar uc2 = uc[i + Q * 2]; -+ vc[i + Q * 0] = qd[0] * uc0 + qd[1] * uc1 + qd[2] * uc2; -+ vc[i + Q * 1] = qd[1] * uc0 + qd[3] * uc1 + qd[4] * uc2; -+ vc[i + Q * 2] = qd[2] * uc0 + qd[4] * uc1 + qd[5] * uc2; -+ } -+ break; -+ } -+ return 0; -+} -+ -+#endif // MFEM_LIBCEED_CURLCURL_QF_H -diff --git a/fem/ceed/integrators/diffusion/diffusion.cpp b/fem/ceed/integrators/diffusion/diffusion.cpp -index 4cd68669f..d2f56db82 100644 ---- a/fem/ceed/integrators/diffusion/diffusion.cpp -+++ b/fem/ceed/integrators/diffusion/diffusion.cpp -@@ -25,106 +25,253 @@ namespace ceed - #ifdef MFEM_USE_CEED - struct DiffusionOperatorInfo : public OperatorInfo - { -- DiffusionContext ctx; -- DiffusionOperatorInfo(int dim) -+ DiffusionContext ctx = {0}; -+ template -+ DiffusionOperatorInfo(const mfem::FiniteElementSpace &fes, CoeffType *Q, -+ bool use_bdr = false, bool use_mf = false) - { -+ ctx.dim = fes.GetMesh()->Dimension() - use_bdr; -+ ctx.space_dim = fes.GetMesh()->SpaceDimension(); -+ ctx.vdim = fes.GetVDim(); -+ if (!use_mf) -+ { -+ apply_func = ":f_apply_diff"; -+ apply_qf = &f_apply_diff; -+ } -+ else -+ { -+ build_func = ""; -+ build_qf = nullptr; -+ } -+ if (Q == nullptr) -+ { -+ ctx.coeff[0] = 1.0; -+ if (!use_mf) -+ { -+ build_func = ":f_build_diff_const_scalar"; -+ build_qf = &f_build_diff_const_scalar; -+ } -+ else -+ { -+ apply_func = ":f_apply_diff_mf_const_scalar"; -+ apply_qf = &f_apply_diff_mf_const_scalar; -+ } -+ } -+ else -+ { -+ InitCoefficient(*Q, use_mf); -+ } - header = "/integrators/diffusion/diffusion_qf.h"; -- build_func_const = ":f_build_diff_const"; -- build_qf_const = &f_build_diff_const; -- build_func_quad = ":f_build_diff_quad"; -- build_qf_quad = &f_build_diff_quad; -- apply_func = ":f_apply_diff"; -- apply_qf = &f_apply_diff; -- apply_func_mf_const = ":f_apply_diff_mf_const"; -- apply_qf_mf_const = &f_apply_diff_mf_const; -- apply_func_mf_quad = ":f_apply_diff_mf_quad"; -- apply_qf_mf_quad = &f_apply_diff_mf_quad; - trial_op = EvalMode::Grad; - test_op = EvalMode::Grad; -- qdatasize = dim*(dim+1)/2; -+ qdatasize = (ctx.dim * (ctx.dim + 1)) / 2; -+ } -+ void InitCoefficient(mfem::Coefficient &Q, bool use_mf) -+ { -+ if (mfem::ConstantCoefficient *const_coeff = -+ dynamic_cast(&Q)) -+ { -+ ctx.coeff[0] = const_coeff->constant; -+ if (!use_mf) -+ { -+ build_func = ":f_build_diff_const_scalar"; -+ build_qf = &f_build_diff_const_scalar; -+ } -+ else -+ { -+ apply_func = ":f_apply_diff_mf_const_scalar"; -+ apply_qf = &f_apply_diff_mf_const_scalar; -+ } -+ } -+ else -+ { -+ if (!use_mf) -+ { -+ build_func = ":f_build_diff_quad_scalar"; -+ build_qf = &f_build_diff_quad_scalar; -+ } -+ else -+ { -+ apply_func = ":f_apply_diff_mf_quad_scalar"; -+ apply_qf = &f_apply_diff_mf_quad_scalar; -+ } -+ } -+ } -+ void InitCoefficient(mfem::VectorCoefficient &VQ, bool use_mf) -+ { -+ if (mfem::VectorConstantCoefficient *const_coeff = -+ dynamic_cast(&VQ)) -+ { -+ const int vdim = VQ.GetVDim(); -+ MFEM_VERIFY(vdim <= LIBCEED_DIFF_COEFF_COMP_MAX, -+ "VectorCoefficient dimension exceeds context storage!"); -+ const mfem::Vector &val = const_coeff->GetVec(); -+ for (int i = 0; i < vdim; i++) -+ { -+ ctx.coeff[i] = val[i]; -+ } -+ if (!use_mf) -+ { -+ build_func = ":f_build_diff_const_vector"; -+ build_qf = &f_build_diff_const_vector; -+ } -+ else -+ { -+ apply_func = ":f_apply_diff_mf_const_vector"; -+ apply_qf = &f_apply_diff_mf_const_vector; -+ } -+ } -+ else -+ { -+ if (!use_mf) -+ { -+ build_func = ":f_build_diff_quad_vector"; -+ build_qf = &f_build_diff_quad_vector; -+ } -+ else -+ { -+ apply_func = ":f_apply_diff_mf_quad_vector"; -+ apply_qf = &f_apply_diff_mf_quad_vector; -+ } -+ } -+ } -+ void InitCoefficient(mfem::MatrixCoefficient &MQ, bool use_mf) -+ { -+ // Assumes matrix coefficient is symmetric -+ if (mfem::MatrixConstantCoefficient *const_coeff = -+ dynamic_cast(&MQ)) -+ { -+ const int vdim = MQ.GetVDim(); -+ MFEM_VERIFY((vdim * (vdim + 1)) / 2 <= LIBCEED_DIFF_COEFF_COMP_MAX, -+ "MatrixCoefficient dimensions exceed context storage!"); -+ const mfem::DenseMatrix &val = const_coeff->GetMatrix(); -+ for (int j = 0; j < vdim; j++) -+ { -+ for (int i = j; i < vdim; i++) -+ { -+ const int idx = (j * vdim) - (((j - 1) * j) / 2) + i - j; -+ ctx.coeff[idx] = val(i, j); -+ } -+ } -+ if (!use_mf) -+ { -+ build_func = ":f_build_diff_const_matrix"; -+ build_qf = &f_build_diff_const_matrix; -+ } -+ else -+ { -+ apply_func = ":f_apply_diff_mf_const_matrix"; -+ apply_qf = &f_apply_diff_mf_const_matrix; -+ } -+ } -+ else -+ { -+ if (!use_mf) -+ { -+ build_func = ":f_build_diff_quad_matrix"; -+ build_qf = &f_build_diff_quad_matrix; -+ } -+ else -+ { -+ apply_func = ":f_apply_diff_mf_quad_matrix"; -+ apply_qf = &f_apply_diff_mf_quad_matrix; -+ } -+ } - } - }; - #endif - -+template - PADiffusionIntegrator::PADiffusionIntegrator( -+ const mfem::DiffusionIntegrator &integ, - const mfem::FiniteElementSpace &fes, -- const mfem::IntegrationRule &irm, -- mfem::Coefficient *Q) -- : PAIntegrator() -+ CoeffType *Q, -+ const bool use_bdr) - { - #ifdef MFEM_USE_CEED -- DiffusionOperatorInfo info(fes.GetMesh()->Dimension()); -- Assemble(info, fes, irm, Q); -+ DiffusionOperatorInfo info(fes, Q, use_bdr); -+ Assemble(integ, info, fes, Q, use_bdr); - #else - MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); - #endif - } - --MixedPADiffusionIntegrator::MixedPADiffusionIntegrator( -- const DiffusionIntegrator &integ, -+template -+PADiffusionIntegrator::PADiffusionIntegrator( -+ const mfem::VectorDiffusionIntegrator &integ, - const mfem::FiniteElementSpace &fes, -- mfem::Coefficient *Q) -+ CoeffType *Q, -+ const bool use_bdr) - { - #ifdef MFEM_USE_CEED -- DiffusionOperatorInfo info(fes.GetMesh()->Dimension()); -- Assemble(integ, info, fes, Q); -+ DiffusionOperatorInfo info(fes, Q, use_bdr); -+ Assemble(integ, info, fes, Q, use_bdr); - #else - MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); - #endif - } - --MixedPADiffusionIntegrator::MixedPADiffusionIntegrator( -- const VectorDiffusionIntegrator &integ, -+template -+MFDiffusionIntegrator::MFDiffusionIntegrator( -+ const mfem::DiffusionIntegrator &integ, - const mfem::FiniteElementSpace &fes, -- mfem::Coefficient *Q) -+ CoeffType *Q, -+ const bool use_bdr) - { - #ifdef MFEM_USE_CEED -- DiffusionOperatorInfo info(fes.GetMesh()->Dimension()); -- Assemble(integ, info, fes, Q); -+ DiffusionOperatorInfo info(fes, Q, use_bdr, true); -+ Assemble(integ, info, fes, Q, use_bdr, true); - #else - MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); - #endif - } - -+template - MFDiffusionIntegrator::MFDiffusionIntegrator( -+ const mfem::VectorDiffusionIntegrator &integ, - const mfem::FiniteElementSpace &fes, -- const mfem::IntegrationRule &irm, -- mfem::Coefficient *Q) -- : MFIntegrator() -+ CoeffType *Q, -+ const bool use_bdr) - { - #ifdef MFEM_USE_CEED -- DiffusionOperatorInfo info(fes.GetMesh()->Dimension()); -- Assemble(info, fes, irm, Q); -+ DiffusionOperatorInfo info(fes, Q, use_bdr, true); -+ Assemble(integ, info, fes, Q, use_bdr, true); - #else - MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); - #endif - } - --MixedMFDiffusionIntegrator::MixedMFDiffusionIntegrator( -- const DiffusionIntegrator &integ, -- const mfem::FiniteElementSpace &fes, -- mfem::Coefficient *Q) --{ --#ifdef MFEM_USE_CEED -- DiffusionOperatorInfo info(fes.GetMesh()->Dimension()); -- Assemble(integ, info, fes, Q); --#else -- MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); --#endif --} -+// @cond DOXYGEN_SKIP - --MixedMFDiffusionIntegrator::MixedMFDiffusionIntegrator( -- const VectorDiffusionIntegrator &integ, -- const mfem::FiniteElementSpace &fes, -- mfem::Coefficient *Q) --{ --#ifdef MFEM_USE_CEED -- DiffusionOperatorInfo info(fes.GetMesh()->Dimension()); -- Assemble(integ, info, fes, Q); --#else -- MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); --#endif --} -+template PADiffusionIntegrator::PADiffusionIntegrator( -+ const mfem::DiffusionIntegrator &, const mfem::FiniteElementSpace &, -+ mfem::Coefficient *, const bool); -+template PADiffusionIntegrator::PADiffusionIntegrator( -+ const mfem::DiffusionIntegrator &, const mfem::FiniteElementSpace &, -+ mfem::VectorCoefficient *, const bool); -+template PADiffusionIntegrator::PADiffusionIntegrator( -+ const mfem::DiffusionIntegrator &, const mfem::FiniteElementSpace &, -+ mfem::MatrixCoefficient *, const bool); -+ -+template PADiffusionIntegrator::PADiffusionIntegrator( -+ const mfem::VectorDiffusionIntegrator &, const mfem::FiniteElementSpace &, -+ mfem::Coefficient *, const bool); -+ -+template MFDiffusionIntegrator::MFDiffusionIntegrator( -+ const mfem::DiffusionIntegrator &, const mfem::FiniteElementSpace &, -+ mfem::Coefficient *, const bool); -+template MFDiffusionIntegrator::MFDiffusionIntegrator( -+ const mfem::DiffusionIntegrator &, const mfem::FiniteElementSpace &, -+ mfem::VectorCoefficient *, const bool); -+template MFDiffusionIntegrator::MFDiffusionIntegrator( -+ const mfem::DiffusionIntegrator &, const mfem::FiniteElementSpace &, -+ mfem::MatrixCoefficient *, const bool); -+ -+template MFDiffusionIntegrator::MFDiffusionIntegrator( -+ const mfem::VectorDiffusionIntegrator &, const mfem::FiniteElementSpace &, -+ mfem::Coefficient *, const bool); -+ -+// @endcond - - } // namespace ceed - -diff --git a/fem/ceed/integrators/diffusion/diffusion.hpp b/fem/ceed/integrators/diffusion/diffusion.hpp -index dd28c9d16..b92710bad 100644 ---- a/fem/ceed/integrators/diffusion/diffusion.hpp -+++ b/fem/ceed/integrators/diffusion/diffusion.hpp -@@ -13,7 +13,7 @@ - #define MFEM_LIBCEED_DIFF_HPP - - #include "../../interface/integrator.hpp" --#include "../../interface/mixed_integrator.hpp" -+#include "../../interface/mixed_operator.hpp" - #include "../../../fespace.hpp" - - namespace mfem -@@ -23,45 +23,37 @@ namespace ceed - { - - /// Represent a DiffusionIntegrator with AssemblyLevel::Partial using libCEED. --class PADiffusionIntegrator : public PAIntegrator -+class PADiffusionIntegrator : public MixedOperator - { - public: -- PADiffusionIntegrator(const mfem::FiniteElementSpace &fes, -- const mfem::IntegrationRule &ir, -- mfem::Coefficient *Q); --}; -- --class MixedPADiffusionIntegrator : public MixedIntegrator --{ --public: -- MixedPADiffusionIntegrator(const DiffusionIntegrator &integ, -- const mfem::FiniteElementSpace &fes, -- mfem::Coefficient *Q); -- -- MixedPADiffusionIntegrator(const VectorDiffusionIntegrator &integ, -- const mfem::FiniteElementSpace &fes, -- mfem::Coefficient *Q); -+ template -+ PADiffusionIntegrator(const mfem::DiffusionIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ CoeffType *Q, -+ const bool use_bdr = false); -+ -+ template -+ PADiffusionIntegrator(const mfem::VectorDiffusionIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ CoeffType *Q, -+ const bool use_bdr = false); - }; - - /// Represent a DiffusionIntegrator with AssemblyLevel::None using libCEED. --class MFDiffusionIntegrator : public MFIntegrator -+class MFDiffusionIntegrator : public MixedOperator - { - public: -- MFDiffusionIntegrator(const mfem::FiniteElementSpace &fes, -- const mfem::IntegrationRule &ir, -- mfem::Coefficient *Q); --}; -- --class MixedMFDiffusionIntegrator : public MixedIntegrator --{ --public: -- MixedMFDiffusionIntegrator(const DiffusionIntegrator &integ, -- const mfem::FiniteElementSpace &fes, -- mfem::Coefficient *Q); -- -- MixedMFDiffusionIntegrator(const VectorDiffusionIntegrator &integ, -- const mfem::FiniteElementSpace &fes, -- mfem::Coefficient *Q); -+ template -+ MFDiffusionIntegrator(const mfem::DiffusionIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ CoeffType *Q, -+ const bool use_bdr = false); -+ -+ template -+ MFDiffusionIntegrator(const mfem::VectorDiffusionIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ CoeffType *Q, -+ const bool use_bdr = false); - }; - - } -diff --git a/fem/ceed/integrators/diffusion/diffusion_qf.h b/fem/ceed/integrators/diffusion/diffusion_qf.h -index aa4850e37..9ab50a3ed 100644 ---- a/fem/ceed/integrators/diffusion/diffusion_qf.h -+++ b/fem/ceed/integrators/diffusion/diffusion_qf.h -@@ -9,180 +9,331 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - -+#ifndef MFEM_LIBCEED_DIFF_QF_H -+#define MFEM_LIBCEED_DIFF_QF_H - --/// A structure used to pass additional data to f_build_diff and f_apply_diff --struct DiffusionContext { CeedInt dim, space_dim, vdim; CeedScalar coeff; }; -+#include "../util/util_qf.h" - --/// libCEED Q-function for building quadrature data for a diffusion operator --/// with a constant coefficient --CEED_QFUNCTION(f_build_diff_const)(void *ctx, CeedInt Q, -- const CeedScalar *const *in, -- CeedScalar *const *out) -+#define LIBCEED_DIFF_COEFF_COMP_MAX 6 -+ -+struct DiffusionContext -+{ -+ CeedInt dim, space_dim, vdim; -+ CeedScalar coeff[LIBCEED_DIFF_COEFF_COMP_MAX]; -+}; -+ -+/// libCEED QFunction for building quadrature data for a diffusion operator -+/// with a scalar constant coefficient -+CEED_QFUNCTION(f_build_diff_const_scalar)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) - { -- DiffusionContext *bc = (DiffusionContext*)ctx; -- // in[0] is Jacobians with shape [dim, nc=dim, Q] -+ DiffusionContext *bc = (DiffusionContext *)ctx; -+ // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] - // in[1] is quadrature weights, size (Q) - // -- // At every quadrature point, compute qw/det(J).adj(J).adj(J)^T and store -- // the symmetric part of the result. -- const CeedScalar coeff = bc->coeff; -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T and store -+ // the symmetric part of the result -+ const CeedScalar *coeff = bc->coeff; - const CeedScalar *J = in[0], *qw = in[1]; - CeedScalar *qd = out[0]; -- switch (bc->dim + 10 * bc->space_dim) -+ switch (10 * bc->space_dim + bc->dim) - { - case 11: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar coeff0 = coeff[0]; -+ qd[i] = qw[i] * coeff0 / J[i]; -+ } -+ break; -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt21(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for building quadrature data for a diffusion operator -+/// with a vector constant coefficient -+CEED_QFUNCTION(f_build_diff_const_vector)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ DiffusionContext *bc = (DiffusionContext *)ctx; -+ // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[1] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T and store -+ // the symmetric part of the result -+ const CeedScalar *coeff = bc->coeff; -+ const CeedScalar *J = in[0], *qw = in[1]; -+ CeedScalar *qd = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i); -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt22(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for building quadrature data for a diffusion operator -+/// with a matrix constant coefficient -+CEED_QFUNCTION(f_build_diff_const_matrix)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ DiffusionContext *bc = (DiffusionContext *)ctx; -+ // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[1] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T and store -+ // the symmetric part of the result -+ const CeedScalar *coeff = bc->coeff; -+ const CeedScalar *J = in[0], *qw = in[1]; -+ CeedScalar *qd = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- qd[i] = coeff * qw[i] / J[i]; -+ MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); - } - break; - case 22: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- // J: 0 2 qd: 0 1 adj(J): J22 -J12 -- // 1 3 1 2 -J21 J11 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J12 = J[i + Q * 2]; -- const CeedScalar J22 = J[i + Q * 3]; -- const CeedScalar w = qw[i] / (J11 * J22 - J21 * J12); -- qd[i + Q * 0] = coeff * w * (J12 * J12 + J22 * J22); -- qd[i + Q * 1] = - coeff * w * (J11 * J12 + J21 * J22); -- qd[i + Q * 2] = coeff * w * (J11 * J11 + J21 * J21); -+ MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i); - } - break; - case 33: -- for (CeedInt i = 0; i < Q; i++) -- { -- // J: 0 3 6 qd: 0 1 2 -- // 1 4 7 1 3 4 -- // 2 5 8 2 4 5 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J31 = J[i + Q * 2]; -- const CeedScalar J12 = J[i + Q * 3]; -- const CeedScalar J22 = J[i + Q * 4]; -- const CeedScalar J32 = J[i + Q * 5]; -- const CeedScalar J13 = J[i + Q * 6]; -- const CeedScalar J23 = J[i + Q * 7]; -- const CeedScalar J33 = J[i + Q * 8]; -- const CeedScalar A11 = J22 * J33 - J23 * J32; -- const CeedScalar A12 = J13 * J32 - J12 * J33; -- const CeedScalar A13 = J12 * J23 - J13 * J22; -- const CeedScalar A21 = J23 * J31 - J21 * J33; -- const CeedScalar A22 = J11 * J33 - J13 * J31; -- const CeedScalar A23 = J13 * J21 - J11 * J23; -- const CeedScalar A31 = J21 * J32 - J22 * J31; -- const CeedScalar A32 = J12 * J31 - J11 * J32; -- const CeedScalar A33 = J11 * J22 - J12 * J21; -- const CeedScalar w = qw[i] / (J11 * A11 + J21 * A12 + J31 * A13); -- qd[i + Q * 0] = coeff * w * (A11 * A11 + A12 * A12 + A13 * A13); -- qd[i + Q * 1] = coeff * w * (A11 * A21 + A12 * A22 + A13 * A23); -- qd[i + Q * 2] = coeff * w * (A11 * A31 + A12 * A32 + A13 * A33); -- qd[i + Q * 3] = coeff * w * (A21 * A21 + A22 * A22 + A23 * A23); -- qd[i + Q * 4] = coeff * w * (A21 * A31 + A22 * A32 + A23 * A33); -- qd[i + Q * 5] = coeff * w * (A31 * A31 + A32 * A32 + A33 * A33); -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i); - } - break; - } - return 0; - } - --/// libCEED Q-function for building quadrature data for a diffusion operator --/// coefficient evaluated at quadrature points. --CEED_QFUNCTION(f_build_diff_quad)(void *ctx, CeedInt Q, -- const CeedScalar *const *in, -- CeedScalar *const *out) -+/// libCEED QFunction for building quadrature data for a diffusion operator -+/// with a scalar coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_build_diff_quad_scalar)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) - { - DiffusionContext *bc = (DiffusionContext *)ctx; -- // in[1] is Jacobians with shape [dim, nc=dim, Q] -+ // in[0] is coefficients with shape [ncomp=1, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] - // in[2] is quadrature weights, size (Q) - // -- // At every quadrature point, compute qw/det(J).adj(J).adj(J)^T and store -- // the symmetric part of the result. -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T and store -+ // the symmetric part of the result - const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; - CeedScalar *qd = out[0]; -- switch (bc->dim + 10 * bc->space_dim) -+ switch (10 * bc->space_dim + bc->dim) - { - case 11: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- qd[i] = c[i] * qw[i] / J[i]; -+ qd[i] = qw[i] * c[i] / J[i]; -+ } -+ break; -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt21(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); - } - break; - case 22: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- // J: 0 2 qd: 0 1 adj(J): J22 -J12 -- // 1 3 1 2 -J21 J11 -- const CeedScalar coeff = c[i]; -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J12 = J[i + Q * 2]; -- const CeedScalar J22 = J[i + Q * 3]; -- const CeedScalar w = qw[i] / (J11 * J22 - J21 * J12); -- qd[i + Q * 0] = coeff * w * (J12 * J12 + J22 * J22); -- qd[i + Q * 1] = - coeff * w * (J11 * J12 + J21 * J22); -- qd[i + Q * 2] = coeff * w * (J11 * J11 + J21 * J21); -+ MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); - } - break; - case 33: -- for (CeedInt i = 0; i < Q; i++) -- { -- // J: 0 3 6 qd: 0 1 2 -- // 1 4 7 1 3 4 -- // 2 5 8 2 4 5 -- const CeedScalar coeff = c[i]; -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J31 = J[i + Q * 2]; -- const CeedScalar J12 = J[i + Q * 3]; -- const CeedScalar J22 = J[i + Q * 4]; -- const CeedScalar J32 = J[i + Q * 5]; -- const CeedScalar J13 = J[i + Q * 6]; -- const CeedScalar J23 = J[i + Q * 7]; -- const CeedScalar J33 = J[i + Q * 8]; -- const CeedScalar A11 = J22 * J33 - J23 * J32; -- const CeedScalar A12 = J13 * J32 - J12 * J33; -- const CeedScalar A13 = J12 * J23 - J13 * J22; -- const CeedScalar A21 = J23 * J31 - J21 * J33; -- const CeedScalar A22 = J11 * J33 - J13 * J31; -- const CeedScalar A23 = J13 * J21 - J11 * J23; -- const CeedScalar A31 = J21 * J32 - J22 * J31; -- const CeedScalar A32 = J12 * J31 - J11 * J32; -- const CeedScalar A33 = J11 * J22 - J12 * J21; -- const CeedScalar w = qw[i] / (J11 * A11 + J21 * A12 + J31 * A13); -- qd[i + Q * 0] = coeff * w * (A11 * A11 + A12 * A12 + A13 * A13); -- qd[i + Q * 1] = coeff * w * (A11 * A21 + A12 * A22 + A13 * A23); -- qd[i + Q * 2] = coeff * w * (A11 * A31 + A12 * A32 + A13 * A33); -- qd[i + Q * 3] = coeff * w * (A21 * A21 + A22 * A22 + A23 * A23); -- qd[i + Q * 4] = coeff * w * (A21 * A31 + A22 * A32 + A23 * A33); -- qd[i + Q * 5] = coeff * w * (A31 * A31 + A32 * A32 + A33 * A33); -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); - } - break; - } - return 0; - } - --/// libCEED Q-function for applying a diff operator -+/// libCEED QFunction for building quadrature data for a diffusion operator -+/// with a vector coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_build_diff_quad_vector)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ DiffusionContext *bc = (DiffusionContext *)ctx; -+ // in[0] is coefficients with shape [ncomp=space_dim, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T and store -+ // the symmetric part of the result -+ const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *qd = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i); -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for building quadrature data for a diffusion operator -+/// with a matrix coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_build_diff_quad_matrix)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ DiffusionContext *bc = (DiffusionContext *)ctx; -+ // in[0] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T and store -+ // the symmetric part of the result -+ const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *qd = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i); -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying a diffusion operator - CEED_QFUNCTION(f_apply_diff)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) - { - DiffusionContext *bc = (DiffusionContext *)ctx; -- // in[0], out[0] have shape [dim, nc=1, Q] -+ // in[0], out[0] have shape [dim, ncomp=vdim, Q] - const CeedScalar *ug = in[0], *qd = in[1]; - CeedScalar *vg = out[0]; -- switch (10*bc->dim + bc->vdim) -+ switch (10 * bc->dim + bc->vdim) - { - case 11: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- vg[i] = ug[i] * qd[i]; -+ vg[i] = qd[i] * ug[i]; -+ } -+ break; -+ case 12: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar qd0 = qd[i]; -+ CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) -+ { -+ vg[i + Q * d] = qd0 * ug[i + Q * d]; -+ } - } - break; - case 21: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar ug0 = ug[i + Q * 0]; - const CeedScalar ug1 = ug[i + Q * 1]; -@@ -191,23 +342,23 @@ CEED_QFUNCTION(f_apply_diff)(void *ctx, CeedInt Q, - } - break; - case 22: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar qd00 = qd[i + Q * 0]; - const CeedScalar qd01 = qd[i + Q * 1]; - const CeedScalar qd10 = qd01; - const CeedScalar qd11 = qd[i + Q * 2]; -- for (CeedInt c = 0; c < 2; c++) -+ CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) - { -- const CeedScalar ug0 = ug[i + Q * (c+2*0)]; -- const CeedScalar ug1 = ug[i + Q * (c+2*1)]; -- vg[i + Q * (c+2*0)] = qd00 * ug0 + qd01 * ug1; -- vg[i + Q * (c+2*1)] = qd10 * ug0 + qd11 * ug1; -+ const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)]; -+ const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)]; -+ vg[i + Q * (d + 2 * 0)] = qd00 * ug0 + qd01 * ug1; -+ vg[i + Q * (d + 2 * 1)] = qd10 * ug0 + qd11 * ug1; - } - } - break; - case 31: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar ug0 = ug[i + Q * 0]; - const CeedScalar ug1 = ug[i + Q * 1]; -@@ -217,8 +368,24 @@ CEED_QFUNCTION(f_apply_diff)(void *ctx, CeedInt Q, - vg[i + Q * 2] = qd[i + Q * 2] * ug0 + qd[i + Q * 4] * ug1 + qd[i + Q * 5] * ug2; - } - break; -+ case 23: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar qd00 = qd[i + Q * 0]; -+ const CeedScalar qd01 = qd[i + Q * 1]; -+ const CeedScalar qd10 = qd01; -+ const CeedScalar qd11 = qd[i + Q * 2]; -+ CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) -+ { -+ const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; -+ const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; -+ vg[i + Q * (d + 3 * 0)] = qd00 * ug0 + qd01 * ug1; -+ vg[i + Q * (d + 3 * 1)] = qd10 * ug0 + qd11 * ug1; -+ } -+ } -+ break; - case 33: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar qd00 = qd[i + Q * 0]; - const CeedScalar qd01 = qd[i + Q * 1]; -@@ -229,14 +396,14 @@ CEED_QFUNCTION(f_apply_diff)(void *ctx, CeedInt Q, - const CeedScalar qd20 = qd02; - const CeedScalar qd21 = qd12; - const CeedScalar qd22 = qd[i + Q * 5]; -- for (CeedInt c = 0; c < 3; c++) -+ CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) - { -- const CeedScalar ug0 = ug[i + Q * (c+3*0)]; -- const CeedScalar ug1 = ug[i + Q * (c+3*1)]; -- const CeedScalar ug2 = ug[i + Q * (c+3*2)]; -- vg[i + Q * (c+3*0)] = qd00 * ug0 + qd01 * ug1 + qd02 * ug2; -- vg[i + Q * (c+3*1)] = qd10 * ug0 + qd11 * ug1 + qd12 * ug2; -- vg[i + Q * (c+3*2)] = qd20 * ug0 + qd21 * ug1 + qd22 * ug2; -+ const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; -+ const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; -+ const CeedScalar ug2 = ug[i + Q * (d + 3 * 2)]; -+ vg[i + Q * (d + 3 * 0)] = qd00 * ug0 + qd01 * ug1 + qd02 * ug2; -+ vg[i + Q * (d + 3 * 1)] = qd10 * ug0 + qd11 * ug1 + qd12 * ug2; -+ vg[i + Q * (d + 3 * 2)] = qd20 * ug0 + qd21 * ug1 + qd22 * ug2; - } - } - break; -@@ -244,104 +411,105 @@ CEED_QFUNCTION(f_apply_diff)(void *ctx, CeedInt Q, - return 0; - } - --/// libCEED Q-function for applying a diff operator --CEED_QFUNCTION(f_apply_diff_mf_const)(void *ctx, CeedInt Q, -- const CeedScalar *const *in, -- CeedScalar *const *out) -+/// libCEED QFunction for applying a diffusion operator with a scalar constant -+/// coefficient -+CEED_QFUNCTION(f_apply_diff_mf_const_scalar)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) - { -- DiffusionContext *bc = (DiffusionContext*)ctx; -- // in[0], out[0] have shape [dim, nc=1, Q] -- // in[1] is Jacobians with shape [dim, nc=dim, Q] -+ DiffusionContext *bc = (DiffusionContext *)ctx; -+ // in[0], out[0] have shape [dim, ncomp=vdim, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] - // in[2] is quadrature weights, size (Q) - // -- // At every quadrature point, compute qw/det(J).adj(J).adj(J)^T -- const CeedScalar coeff = bc->coeff; -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T -+ const CeedScalar *coeff = bc->coeff; - const CeedScalar *ug = in[0], *J = in[1], *qw = in[2]; - CeedScalar *vg = out[0]; -- switch (10 * bc->dim + bc->vdim) -+ switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim) - { -- case 11: -- for (CeedInt i = 0; i < Q; i++) -+ case 111: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- const CeedScalar qd = coeff * qw[i] / J[i]; -- vg[i] = ug[i] * qd; -+ const CeedScalar coeff0 = coeff[0]; -+ const CeedScalar qd = qw[i] * coeff0 / J[i]; -+ vg[i] = qd * ug[i]; - } - break; -- case 21: -- for (CeedInt i = 0; i < Q; i++) -- { -- // J: 0 2 qd: 0 1 adj(J): J22 -J12 -- // 1 3 1 2 -J21 J11 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J12 = J[i + Q * 2]; -- const CeedScalar J22 = J[i + Q * 3]; -- const CeedScalar w = qw[i] / (J11 * J22 - J21 * J12); -+ case 211: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultAdjJCAdjJt21(J + i, Q, coeff, 1, 1, qw[i], 1, &qd); -+ vg[i] = qd * ug[i]; -+ } -+ break; -+ case 212: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultAdjJCAdjJt21(J + i, Q, coeff, 1, 1, qw[i], 1, &qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) -+ { -+ vg[i + Q * d] = qd * ug[i + Q * d]; -+ } -+ } -+ break; -+ case 221: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { - CeedScalar qd[3]; -- qd[0] = coeff * w * (J12 * J12 + J22 * J22); -- qd[1] = - coeff * w * (J11 * J12 + J21 * J22); -- qd[2] = coeff * w * (J11 * J11 + J21 * J21); -+ MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], 1, qd); - const CeedScalar ug0 = ug[i + Q * 0]; - const CeedScalar ug1 = ug[i + Q * 1]; - vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; - vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; - } - break; -- case 22: -- for (CeedInt i = 0; i < Q; i++) -- { -- // J: 0 2 qd: 0 1 adj(J): J22 -J12 -- // 1 3 1 2 -J21 J11 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J12 = J[i + Q * 2]; -- const CeedScalar J22 = J[i + Q * 3]; -- const CeedScalar w = qw[i] / (J11 * J22 - J21 * J12); -+ case 222: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { - CeedScalar qd[3]; -- qd[0] = coeff * w * (J12 * J12 + J22 * J22); -- qd[1] = - coeff * w * (J11 * J12 + J21 * J22); -- qd[2] = coeff * w * (J11 * J11 + J21 * J21); -- for (CeedInt c = 0; c < 2; c++) -+ MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], 1, qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) - { -- const CeedScalar ug0 = ug[i + Q * (c+2*0)]; -- const CeedScalar ug1 = ug[i + Q * (c+2*1)]; -- vg[i + Q * (c+2*0)] = qd[0] * ug0 + qd[1] * ug1; -- vg[i + Q * (c+2*1)] = qd[1] * ug0 + qd[2] * ug1; -+ const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)]; -+ const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)]; -+ vg[i + Q * (d + 2 * 0)] = qd[0] * ug0 + qd[1] * ug1; -+ vg[i + Q * (d + 2 * 1)] = qd[1] * ug0 + qd[2] * ug1; - } - } - break; -- case 31: -- for (CeedInt i = 0; i < Q; i++) -- { -- // J: 0 3 6 qd: 0 1 2 -- // 1 4 7 1 3 4 -- // 2 5 8 2 4 5 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J31 = J[i + Q * 2]; -- const CeedScalar J12 = J[i + Q * 3]; -- const CeedScalar J22 = J[i + Q * 4]; -- const CeedScalar J32 = J[i + Q * 5]; -- const CeedScalar J13 = J[i + Q * 6]; -- const CeedScalar J23 = J[i + Q * 7]; -- const CeedScalar J33 = J[i + Q * 8]; -- const CeedScalar A11 = J22 * J33 - J23 * J32; -- const CeedScalar A12 = J13 * J32 - J12 * J33; -- const CeedScalar A13 = J12 * J23 - J13 * J22; -- const CeedScalar A21 = J23 * J31 - J21 * J33; -- const CeedScalar A22 = J11 * J33 - J13 * J31; -- const CeedScalar A23 = J13 * J21 - J11 * J23; -- const CeedScalar A31 = J21 * J32 - J22 * J31; -- const CeedScalar A32 = J12 * J31 - J11 * J32; -- const CeedScalar A33 = J11 * J22 - J12 * J21; -- const CeedScalar w = qw[i] / (J11 * A11 + J21 * A12 + J31 * A13); -+ case 321: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], 1, qd); -+ const CeedScalar ug0 = ug[i + Q * 0]; -+ const CeedScalar ug1 = ug[i + Q * 1]; -+ vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; -+ vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; -+ } -+ break; -+ case 323: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], 1, qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) -+ { -+ const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; -+ const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; -+ vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1; -+ vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[2] * ug1; -+ } -+ } -+ break; -+ case 331: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { - CeedScalar qd[6]; -- qd[0] = coeff * w * (A11 * A11 + A12 * A12 + A13 * A13); -- qd[1] = coeff * w * (A11 * A21 + A12 * A22 + A13 * A23); -- qd[2] = coeff * w * (A11 * A31 + A12 * A32 + A13 * A33); -- qd[3] = coeff * w * (A21 * A21 + A22 * A22 + A23 * A23); -- qd[4] = coeff * w * (A21 * A31 + A22 * A32 + A23 * A33); -- qd[5] = coeff * w * (A31 * A31 + A32 * A32 + A33 * A33); -+ MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], 1, qd); - const CeedScalar ug0 = ug[i + Q * 0]; - const CeedScalar ug1 = ug[i + Q * 1]; - const CeedScalar ug2 = ug[i + Q * 2]; -@@ -350,46 +518,19 @@ CEED_QFUNCTION(f_apply_diff_mf_const)(void *ctx, CeedInt Q, - vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; - } - break; -- case 33: -- for (CeedInt i = 0; i < Q; i++) -- { -- // J: 0 3 6 qd: 0 1 2 -- // 1 4 7 1 3 4 -- // 2 5 8 2 4 5 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J31 = J[i + Q * 2]; -- const CeedScalar J12 = J[i + Q * 3]; -- const CeedScalar J22 = J[i + Q * 4]; -- const CeedScalar J32 = J[i + Q * 5]; -- const CeedScalar J13 = J[i + Q * 6]; -- const CeedScalar J23 = J[i + Q * 7]; -- const CeedScalar J33 = J[i + Q * 8]; -- const CeedScalar A11 = J22 * J33 - J23 * J32; -- const CeedScalar A12 = J13 * J32 - J12 * J33; -- const CeedScalar A13 = J12 * J23 - J13 * J22; -- const CeedScalar A21 = J23 * J31 - J21 * J33; -- const CeedScalar A22 = J11 * J33 - J13 * J31; -- const CeedScalar A23 = J13 * J21 - J11 * J23; -- const CeedScalar A31 = J21 * J32 - J22 * J31; -- const CeedScalar A32 = J12 * J31 - J11 * J32; -- const CeedScalar A33 = J11 * J22 - J12 * J21; -- const CeedScalar w = qw[i] / (J11 * A11 + J21 * A12 + J31 * A13); -+ case 333: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { - CeedScalar qd[6]; -- qd[0] = coeff * w * (A11 * A11 + A12 * A12 + A13 * A13); -- qd[1] = coeff * w * (A11 * A21 + A12 * A22 + A13 * A23); -- qd[2] = coeff * w * (A11 * A31 + A12 * A32 + A13 * A33); -- qd[3] = coeff * w * (A21 * A21 + A22 * A22 + A23 * A23); -- qd[4] = coeff * w * (A21 * A31 + A22 * A32 + A23 * A33); -- qd[5] = coeff * w * (A31 * A31 + A32 * A32 + A33 * A33); -- for (CeedInt c = 0; c < 3; c++) -+ MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], 1, qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) - { -- const CeedScalar ug0 = ug[i + Q * (c+3*0)]; -- const CeedScalar ug1 = ug[i + Q * (c+3*1)]; -- const CeedScalar ug2 = ug[i + Q * (c+3*2)]; -- vg[i + Q * (c+3*0)] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; -- vg[i + Q * (c+3*1)] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; -- vg[i + Q * (c+3*2)] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; -+ const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; -+ const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; -+ const CeedScalar ug2 = ug[i + Q * (d + 3 * 2)]; -+ vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; -+ vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; -+ vg[i + Q * (d + 3 * 2)] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; - } - } - break; -@@ -397,105 +538,97 @@ CEED_QFUNCTION(f_apply_diff_mf_const)(void *ctx, CeedInt Q, - return 0; - } - --CEED_QFUNCTION(f_apply_diff_mf_quad)(void *ctx, CeedInt Q, -- const CeedScalar *const *in, -- CeedScalar *const *out) -+/// libCEED QFunction for applying a diffusion operator with a vector constant -+/// coefficient -+CEED_QFUNCTION(f_apply_diff_mf_const_vector)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) - { -- DiffusionContext *bc = (DiffusionContext*)ctx; -- // in[0], out[0] have shape [dim, nc=1, Q] -- // in[1] is Jacobians with shape [dim, nc=dim, Q] -+ DiffusionContext *bc = (DiffusionContext *)ctx; -+ // in[0], out[0] have shape [dim, ncomp=vdim, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] - // in[2] is quadrature weights, size (Q) - // -- // At every quadrature point, compute qw/det(J).adj(J).adj(J)^T -- const CeedScalar *c = in[0], *ug = in[1], *J = in[2], *qw = in[3]; -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T -+ const CeedScalar *coeff = bc->coeff; -+ const CeedScalar *ug = in[0], *J = in[1], *qw = in[2]; - CeedScalar *vg = out[0]; -- switch (10 * bc->dim + bc->vdim) -+ switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim) - { -- case 11: -- for (CeedInt i = 0; i < Q; i++) -+ case 211: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- const CeedScalar qd = c[i] * qw[i] / J[i]; -- vg[i] = ug[i] * qd; -+ CeedScalar qd; -+ MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], 1, &qd); -+ vg[i] = qd * ug[i]; - } - break; -- case 21: -- for (CeedInt i = 0; i < Q; i++) -- { -- // J: 0 2 qd: 0 1 adj(J): J22 -J12 -- // 1 3 1 2 -J21 J11 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J12 = J[i + Q * 2]; -- const CeedScalar J22 = J[i + Q * 3]; -- const CeedScalar w = qw[i] / (J11 * J22 - J21 * J12); -+ case 212: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], 1, &qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) -+ { -+ vg[i + Q * d] = qd * ug[i + Q * d]; -+ } -+ } -+ break; -+ case 221: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { - CeedScalar qd[3]; -- const CeedScalar coeff = c[i]; -- qd[0] = coeff * w * (J12 * J12 + J22 * J22); -- qd[1] = - coeff * w * (J11 * J12 + J21 * J22); -- qd[2] = coeff * w * (J11 * J11 + J21 * J21); -+ MultAdjJCAdjJt22(J + i, Q, coeff, 1, 2, qw[i], 1, qd); - const CeedScalar ug0 = ug[i + Q * 0]; - const CeedScalar ug1 = ug[i + Q * 1]; - vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; - vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; - } - break; -- case 22: -- for (CeedInt i = 0; i < Q; i++) -- { -- // J: 0 2 qd: 0 1 adj(J): J22 -J12 -- // 1 3 1 2 -J21 J11 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J12 = J[i + Q * 2]; -- const CeedScalar J22 = J[i + Q * 3]; -- const CeedScalar w = qw[i] / (J11 * J22 - J21 * J12); -+ case 222: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { - CeedScalar qd[3]; -- const CeedScalar coeff = c[i]; -- qd[0] = coeff * w * (J12 * J12 + J22 * J22); -- qd[1] = - coeff * w * (J11 * J12 + J21 * J22); -- qd[2] = coeff * w * (J11 * J11 + J21 * J21); -- for (CeedInt d = 0; d < 2; d++) -+ MultAdjJCAdjJt22(J + i, Q, coeff, 1, 2, qw[i], 1, qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) - { -- const CeedScalar ug0 = ug[i + Q * (d+2*0)]; -- const CeedScalar ug1 = ug[i + Q * (d+2*1)]; -- vg[i + Q * (d+2*0)] = qd[0] * ug0 + qd[1] * ug1; -- vg[i + Q * (d+2*1)] = qd[1] * ug0 + qd[2] * ug1; -+ const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)]; -+ const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)]; -+ vg[i + Q * (d + 2 * 0)] = qd[0] * ug0 + qd[1] * ug1; -+ vg[i + Q * (d + 2 * 1)] = qd[1] * ug0 + qd[2] * ug1; - } - } - break; -- case 31: -- for (CeedInt i = 0; i < Q; i++) -- { -- // J: 0 3 6 qd: 0 1 2 -- // 1 4 7 1 3 4 -- // 2 5 8 2 4 5 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J31 = J[i + Q * 2]; -- const CeedScalar J12 = J[i + Q * 3]; -- const CeedScalar J22 = J[i + Q * 4]; -- const CeedScalar J32 = J[i + Q * 5]; -- const CeedScalar J13 = J[i + Q * 6]; -- const CeedScalar J23 = J[i + Q * 7]; -- const CeedScalar J33 = J[i + Q * 8]; -- const CeedScalar A11 = J22 * J33 - J23 * J32; -- const CeedScalar A12 = J13 * J32 - J12 * J33; -- const CeedScalar A13 = J12 * J23 - J13 * J22; -- const CeedScalar A21 = J23 * J31 - J21 * J33; -- const CeedScalar A22 = J11 * J33 - J13 * J31; -- const CeedScalar A23 = J13 * J21 - J11 * J23; -- const CeedScalar A31 = J21 * J32 - J22 * J31; -- const CeedScalar A32 = J12 * J31 - J11 * J32; -- const CeedScalar A33 = J11 * J22 - J12 * J21; -- const CeedScalar w = qw[i] / (J11 * A11 + J21 * A12 + J31 * A13); -+ case 321: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], 1, qd); -+ const CeedScalar ug0 = ug[i + Q * 0]; -+ const CeedScalar ug1 = ug[i + Q * 1]; -+ vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; -+ vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; -+ } -+ break; -+ case 323: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], 1, qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) -+ { -+ const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; -+ const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; -+ vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1; -+ vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[2] * ug1; -+ } -+ } -+ break; -+ case 331: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { - CeedScalar qd[6]; -- const CeedScalar coeff = c[i]; -- qd[0] = coeff * w * (A11 * A11 + A12 * A12 + A13 * A13); -- qd[1] = coeff * w * (A11 * A21 + A12 * A22 + A13 * A23); -- qd[2] = coeff * w * (A11 * A31 + A12 * A32 + A13 * A33); -- qd[3] = coeff * w * (A21 * A21 + A22 * A22 + A23 * A23); -- qd[4] = coeff * w * (A21 * A31 + A22 * A32 + A23 * A33); -- qd[5] = coeff * w * (A31 * A31 + A32 * A32 + A33 * A33); -+ MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], 1, qd); - const CeedScalar ug0 = ug[i + Q * 0]; - const CeedScalar ug1 = ug[i + Q * 1]; - const CeedScalar ug2 = ug[i + Q * 2]; -@@ -504,50 +637,507 @@ CEED_QFUNCTION(f_apply_diff_mf_quad)(void *ctx, CeedInt Q, - vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; - } - break; -- case 33: -- for (CeedInt i = 0; i < Q; i++) -- { -- // J: 0 3 6 qd: 0 1 2 -- // 1 4 7 1 3 4 -- // 2 5 8 2 4 5 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J31 = J[i + Q * 2]; -- const CeedScalar J12 = J[i + Q * 3]; -- const CeedScalar J22 = J[i + Q * 4]; -- const CeedScalar J32 = J[i + Q * 5]; -- const CeedScalar J13 = J[i + Q * 6]; -- const CeedScalar J23 = J[i + Q * 7]; -- const CeedScalar J33 = J[i + Q * 8]; -- const CeedScalar A11 = J22 * J33 - J23 * J32; -- const CeedScalar A12 = J13 * J32 - J12 * J33; -- const CeedScalar A13 = J12 * J23 - J13 * J22; -- const CeedScalar A21 = J23 * J31 - J21 * J33; -- const CeedScalar A22 = J11 * J33 - J13 * J31; -- const CeedScalar A23 = J13 * J21 - J11 * J23; -- const CeedScalar A31 = J21 * J32 - J22 * J31; -- const CeedScalar A32 = J12 * J31 - J11 * J32; -- const CeedScalar A33 = J11 * J22 - J12 * J21; -- const CeedScalar w = qw[i] / (J11 * A11 + J21 * A12 + J31 * A13); -+ case 333: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { - CeedScalar qd[6]; -- const CeedScalar coeff = c[i]; -- qd[0] = coeff * w * (A11 * A11 + A12 * A12 + A13 * A13); -- qd[1] = coeff * w * (A11 * A21 + A12 * A22 + A13 * A23); -- qd[2] = coeff * w * (A11 * A31 + A12 * A32 + A13 * A33); -- qd[3] = coeff * w * (A21 * A21 + A22 * A22 + A23 * A23); -- qd[4] = coeff * w * (A21 * A31 + A22 * A32 + A23 * A33); -- qd[5] = coeff * w * (A31 * A31 + A32 * A32 + A33 * A33); -- for (CeedInt d = 0; d < 3; d++) -+ MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], 1, qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) - { -- const CeedScalar ug0 = ug[i + Q * (d+3*0)]; -- const CeedScalar ug1 = ug[i + Q * (d+3*1)]; -- const CeedScalar ug2 = ug[i + Q * (d+3*2)]; -- vg[i + Q * (d+3*0)] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; -- vg[i + Q * (d+3*1)] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; -- vg[i + Q * (d+3*2)] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; -+ const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; -+ const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; -+ const CeedScalar ug2 = ug[i + Q * (d + 3 * 2)]; -+ vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; -+ vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; -+ vg[i + Q * (d + 3 * 2)] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; - } - } - break; - } - return 0; - } -+ -+/// libCEED QFunction for applying a diffusion operator with a matrix constant -+/// coefficient -+CEED_QFUNCTION(f_apply_diff_mf_const_matrix)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ DiffusionContext *bc = (DiffusionContext *)ctx; -+ // in[0], out[0] have shape [dim, ncomp=vdim, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T -+ const CeedScalar *coeff = bc->coeff; -+ const CeedScalar *ug = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *vg = out[0]; -+ switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim) -+ { -+ case 211: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], 1, &qd); -+ vg[i] = qd * ug[i]; -+ } -+ break; -+ case 212: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], 1, &qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) -+ { -+ vg[i + Q * d] = qd * ug[i + Q * d]; -+ } -+ } -+ break; -+ case 221: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], 1, qd); -+ const CeedScalar ug0 = ug[i + Q * 0]; -+ const CeedScalar ug1 = ug[i + Q * 1]; -+ vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; -+ vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; -+ } -+ break; -+ case 222: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], 1, qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) -+ { -+ const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)]; -+ const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)]; -+ vg[i + Q * (d + 2 * 0)] = qd[0] * ug0 + qd[1] * ug1; -+ vg[i + Q * (d + 2 * 1)] = qd[1] * ug0 + qd[2] * ug1; -+ } -+ } -+ break; -+ case 321: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], 1, qd); -+ const CeedScalar ug0 = ug[i + Q * 0]; -+ const CeedScalar ug1 = ug[i + Q * 1]; -+ vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; -+ vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; -+ } -+ break; -+ case 323: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], 1, qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) -+ { -+ const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; -+ const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; -+ vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1; -+ vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[2] * ug1; -+ } -+ } -+ break; -+ case 331: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], 1, qd); -+ const CeedScalar ug0 = ug[i + Q * 0]; -+ const CeedScalar ug1 = ug[i + Q * 1]; -+ const CeedScalar ug2 = ug[i + Q * 2]; -+ vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; -+ vg[i + Q * 1] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; -+ vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; -+ } -+ break; -+ case 333: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], 1, qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) -+ { -+ const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; -+ const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; -+ const CeedScalar ug2 = ug[i + Q * (d + 3 * 2)]; -+ vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; -+ vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; -+ vg[i + Q * (d + 3 * 2)] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; -+ } -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying a diffusion operator with a scalar -+/// coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_apply_diff_mf_quad_scalar)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ DiffusionContext *bc = (DiffusionContext *)ctx; -+ // in[0], out[0] have shape [dim, ncomp=vdim, Q] -+ // in[1] is coefficients with shape [ncomp=1, Q] -+ // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[3] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T -+ const CeedScalar *ug = in[0], *c = in[1], *J = in[2], *qw = in[3]; -+ CeedScalar *vg = out[0]; -+ switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim) -+ { -+ case 111: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar qd = qw[i] * c[i] / J[i]; -+ vg[i] = qd * ug[i]; -+ } -+ break; -+ case 211: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultAdjJCAdjJt21(J + i, Q, c + i, Q, 1, qw[i], 1, &qd); -+ vg[i] = qd * ug[i]; -+ } -+ break; -+ case 212: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultAdjJCAdjJt21(J + i, Q, c + i, Q, 1, qw[i], 1, &qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) -+ { -+ vg[i + Q * d] = qd * ug[i + Q * d]; -+ } -+ } -+ break; -+ case 221: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], 1, qd); -+ const CeedScalar ug0 = ug[i + Q * 0]; -+ const CeedScalar ug1 = ug[i + Q * 1]; -+ vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; -+ vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; -+ } -+ break; -+ case 222: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], 1, qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) -+ { -+ const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)]; -+ const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)]; -+ vg[i + Q * (d + 2 * 0)] = qd[0] * ug0 + qd[1] * ug1; -+ vg[i + Q * (d + 2 * 1)] = qd[1] * ug0 + qd[2] * ug1; -+ } -+ } -+ break; -+ case 321: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], 1, qd); -+ const CeedScalar ug0 = ug[i + Q * 0]; -+ const CeedScalar ug1 = ug[i + Q * 1]; -+ vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; -+ vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; -+ } -+ break; -+ case 323: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], 1, qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) -+ { -+ const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; -+ const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; -+ vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1; -+ vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[2] * ug1; -+ } -+ } -+ break; -+ case 331: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], 1, qd); -+ const CeedScalar ug0 = ug[i + Q * 0]; -+ const CeedScalar ug1 = ug[i + Q * 1]; -+ const CeedScalar ug2 = ug[i + Q * 2]; -+ vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; -+ vg[i + Q * 1] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; -+ vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; -+ } -+ break; -+ case 333: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], 1, qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) -+ { -+ const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; -+ const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; -+ const CeedScalar ug2 = ug[i + Q * (d + 3 * 2)]; -+ vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; -+ vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; -+ vg[i + Q * (d + 3 * 2)] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; -+ } -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying a diffusion operator with a vector -+/// coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_apply_diff_mf_quad_vector)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ DiffusionContext *bc = (DiffusionContext *)ctx; -+ // in[0], out[0] have shape [dim, ncomp=vdim, Q] -+ // in[1] is coefficients with shape [ncomp=space_dim, Q] -+ // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[3] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T -+ const CeedScalar *ug = in[0], *c = in[1], *J = in[2], *qw = in[3]; -+ CeedScalar *vg = out[0]; -+ switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim) -+ { -+ case 211: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], 1, &qd); -+ vg[i] = qd * ug[i]; -+ } -+ break; -+ case 212: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], 1, &qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) -+ { -+ vg[i + Q * d] = qd * ug[i + Q * d]; -+ } -+ } -+ break; -+ case 221: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], 1, qd); -+ const CeedScalar ug0 = ug[i + Q * 0]; -+ const CeedScalar ug1 = ug[i + Q * 1]; -+ vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; -+ vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; -+ } -+ break; -+ case 222: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], 1, qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) -+ { -+ const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)]; -+ const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)]; -+ vg[i + Q * (d + 2 * 0)] = qd[0] * ug0 + qd[1] * ug1; -+ vg[i + Q * (d + 2 * 1)] = qd[1] * ug0 + qd[2] * ug1; -+ } -+ } -+ break; -+ case 321: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], 1, qd); -+ const CeedScalar ug0 = ug[i + Q * 0]; -+ const CeedScalar ug1 = ug[i + Q * 1]; -+ vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; -+ vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; -+ } -+ break; -+ case 323: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], 1, qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) -+ { -+ const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; -+ const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; -+ vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1; -+ vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[2] * ug1; -+ } -+ } -+ break; -+ case 331: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], 1, qd); -+ const CeedScalar ug0 = ug[i + Q * 0]; -+ const CeedScalar ug1 = ug[i + Q * 1]; -+ const CeedScalar ug2 = ug[i + Q * 2]; -+ vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; -+ vg[i + Q * 1] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; -+ vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; -+ } -+ break; -+ case 333: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], 1, qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) -+ { -+ const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; -+ const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; -+ const CeedScalar ug2 = ug[i + Q * (d + 3 * 2)]; -+ vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; -+ vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; -+ vg[i + Q * (d + 3 * 2)] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; -+ } -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying a diffusion operator with a matrix -+/// coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_apply_diff_mf_quad_matrix)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ DiffusionContext *bc = (DiffusionContext *)ctx; -+ // in[0], out[0] have shape [dim, ncomp=vdim, Q] -+ // in[1] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q] -+ // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[3] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T -+ const CeedScalar *ug = in[0], *c = in[1], *J = in[2], *qw = in[3]; -+ CeedScalar *vg = out[0]; -+ switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim) -+ { -+ case 211: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], 1, &qd); -+ vg[i] = qd * ug[i]; -+ } -+ break; -+ case 212: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], 1, &qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) -+ { -+ vg[i + Q * d] = qd * ug[i + Q * d]; -+ } -+ } -+ break; -+ case 221: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], 1, qd); -+ const CeedScalar ug0 = ug[i + Q * 0]; -+ const CeedScalar ug1 = ug[i + Q * 1]; -+ vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; -+ vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; -+ } -+ break; -+ case 222: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], 1, qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 2; d++) -+ { -+ const CeedScalar ug0 = ug[i + Q * (d + 2 * 0)]; -+ const CeedScalar ug1 = ug[i + Q * (d + 2 * 1)]; -+ vg[i + Q * (d + 2 * 0)] = qd[0] * ug0 + qd[1] * ug1; -+ vg[i + Q * (d + 2 * 1)] = qd[1] * ug0 + qd[2] * ug1; -+ } -+ } -+ break; -+ case 321: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], 1, qd); -+ const CeedScalar ug0 = ug[i + Q * 0]; -+ const CeedScalar ug1 = ug[i + Q * 1]; -+ vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1; -+ vg[i + Q * 1] = qd[1] * ug0 + qd[2] * ug1; -+ } -+ break; -+ case 323: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], 1, qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) -+ { -+ const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; -+ const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; -+ vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1; -+ vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[2] * ug1; -+ } -+ } -+ break; -+ case 331: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], 1, qd); -+ const CeedScalar ug0 = ug[i + Q * 0]; -+ const CeedScalar ug1 = ug[i + Q * 1]; -+ const CeedScalar ug2 = ug[i + Q * 2]; -+ vg[i + Q * 0] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; -+ vg[i + Q * 1] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; -+ vg[i + Q * 2] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; -+ } -+ break; -+ case 333: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], 1, qd); -+ CeedPragmaSIMD for (CeedInt d = 0; d < 3; d++) -+ { -+ const CeedScalar ug0 = ug[i + Q * (d + 3 * 0)]; -+ const CeedScalar ug1 = ug[i + Q * (d + 3 * 1)]; -+ const CeedScalar ug2 = ug[i + Q * (d + 3 * 2)]; -+ vg[i + Q * (d + 3 * 0)] = qd[0] * ug0 + qd[1] * ug1 + qd[2] * ug2; -+ vg[i + Q * (d + 3 * 1)] = qd[1] * ug0 + qd[3] * ug1 + qd[4] * ug2; -+ vg[i + Q * (d + 3 * 2)] = qd[2] * ug0 + qd[4] * ug1 + qd[5] * ug2; -+ } -+ } -+ break; -+ } -+ return 0; -+} -+ -+#endif // MFEM_LIBCEED_DIFF_QF_H -diff --git a/fem/ceed/integrators/divdiv/divdiv.cpp b/fem/ceed/integrators/divdiv/divdiv.cpp -new file mode 100644 -index 000000000..f574c0f4e ---- /dev/null -+++ b/fem/ceed/integrators/divdiv/divdiv.cpp -@@ -0,0 +1,124 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#include "divdiv.hpp" -+ -+#include "../../../../config/config.hpp" -+#ifdef MFEM_USE_CEED -+#include "divdiv_qf.h" -+#endif -+ -+namespace mfem -+{ -+ -+namespace ceed -+{ -+ -+#ifdef MFEM_USE_CEED -+struct DivDivOperatorInfo : public OperatorInfo -+{ -+ DivDivContext ctx = {0}; -+ DivDivOperatorInfo(const mfem::FiniteElementSpace &fes, mfem::Coefficient *Q, -+ bool use_bdr = false, bool use_mf = false) -+ { -+ MFEM_VERIFY(fes.GetVDim() == 1, -+ "libCEED interface for vector FE does not support vdim > 1!"); -+ ctx.dim = fes.GetMesh()->Dimension() - use_bdr; -+ ctx.space_dim = fes.GetMesh()->SpaceDimension(); -+ if (!use_mf) -+ { -+ apply_func = ":f_apply_divdiv"; -+ apply_qf = &f_apply_divdiv; -+ } -+ else -+ { -+ build_func = ""; -+ build_qf = nullptr; -+ } -+ if (Q == nullptr) -+ { -+ ctx.coeff = 1.0; -+ if (!use_mf) -+ { -+ build_func = ":f_build_divdiv_const"; -+ build_qf = &f_build_divdiv_const; -+ } -+ else -+ { -+ apply_func = ":f_apply_divdiv_mf_const"; -+ apply_qf = &f_apply_divdiv_mf_const; -+ } -+ } -+ else if (mfem::ConstantCoefficient *const_coeff = -+ dynamic_cast(Q)) -+ { -+ ctx.coeff = const_coeff->constant; -+ if (!use_mf) -+ { -+ build_func = ":f_build_divdiv_const"; -+ build_qf = &f_build_divdiv_const; -+ } -+ else -+ { -+ apply_func = ":f_apply_divdiv_mf_const"; -+ apply_qf = &f_apply_divdiv_mf_const; -+ } -+ } -+ else -+ { -+ if (!use_mf) -+ { -+ build_func = ":f_build_divdiv_quad"; -+ build_qf = &f_build_divdiv_quad; -+ } -+ else -+ { -+ apply_func = ":f_apply_divdiv_mf_quad"; -+ apply_qf = &f_apply_divdiv_mf_quad; -+ } -+ } -+ header = "/integrators/divdiv/divdiv_qf.h"; -+ trial_op = EvalMode::Div; -+ test_op = EvalMode::Div; -+ qdatasize = 1; -+ } -+}; -+#endif -+ -+PADivDivIntegrator::PADivDivIntegrator(const mfem::DivDivIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ mfem::Coefficient *Q, -+ const bool use_bdr) -+{ -+#ifdef MFEM_USE_CEED -+ DivDivOperatorInfo info(fes, Q, use_bdr); -+ Assemble(integ, info, fes, Q, use_bdr); -+#else -+ MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); -+#endif -+} -+ -+MFDivDivIntegrator::MFDivDivIntegrator(const mfem::DivDivIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ mfem::Coefficient *Q, -+ const bool use_bdr) -+{ -+#ifdef MFEM_USE_CEED -+ DivDivOperatorInfo info(fes, Q, use_bdr, true); -+ Assemble(integ, info, fes, Q, use_bdr, true); -+#else -+ MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); -+#endif -+} -+ -+} // namespace ceed -+ -+} // namespace mfem -diff --git a/fem/ceed/integrators/divdiv/divdiv.hpp b/fem/ceed/integrators/divdiv/divdiv.hpp -new file mode 100644 -index 000000000..1e5bf163e ---- /dev/null -+++ b/fem/ceed/integrators/divdiv/divdiv.hpp -@@ -0,0 +1,49 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#ifndef MFEM_LIBCEED_DIVDIV_HPP -+#define MFEM_LIBCEED_DIVDIV_HPP -+ -+#include "../../interface/integrator.hpp" -+#include "../../interface/mixed_operator.hpp" -+#include "../../../fespace.hpp" -+ -+namespace mfem -+{ -+ -+namespace ceed -+{ -+ -+/// Represent a DivDivIntegrator with AssemblyLevel::Partial using libCEED. -+class PADivDivIntegrator : public MixedOperator -+{ -+public: -+ PADivDivIntegrator(const mfem::DivDivIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ mfem::Coefficient *Q, -+ const bool use_bdr = false); -+}; -+ -+/// Represent a DivDivIntegrator with AssemblyLevel::None using libCEED. -+class MFDivDivIntegrator : public MixedOperator -+{ -+public: -+ MFDivDivIntegrator(const mfem::DivDivIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ mfem::Coefficient *Q, -+ const bool use_bdr = false); -+}; -+ -+} -+ -+} -+ -+#endif // MFEM_LIBCEED_DIVDIV_HPP -diff --git a/fem/ceed/integrators/divdiv/divdiv_qf.h b/fem/ceed/integrators/divdiv/divdiv_qf.h -new file mode 100644 -index 000000000..853aa0011 ---- /dev/null -+++ b/fem/ceed/integrators/divdiv/divdiv_qf.h -@@ -0,0 +1,250 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#ifndef MFEM_LIBCEED_DIVDIV_QF_H -+#define MFEM_LIBCEED_DIVDIV_QF_H -+ -+#include "../util/util_qf.h" -+ -+struct DivDivContext -+{ -+ CeedInt dim, space_dim; -+ CeedScalar coeff; -+}; -+ -+/// libCEED QFunction for building quadrature data for a div-div operator -+/// with a constant coefficient -+CEED_QFUNCTION(f_build_divdiv_const)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ DivDivContext *bc = (DivDivContext *)ctx; -+ // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[1] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute and store qw * c / det(J) -+ const CeedScalar coeff = bc->coeff; -+ const CeedScalar *J = in[0], *qw = in[1]; -+ CeedScalar *qd = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 11: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ qd[i] = qw[i] * coeff / J[i]; -+ } -+ break; -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ qd[i] = qw[i] * coeff / DetJ21(J + i, Q); -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ qd[i] = qw[i] * coeff / DetJ22(J + i, Q); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ qd[i] = qw[i] * coeff / DetJ32(J + i, Q); -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ qd[i] = qw[i] * coeff / DetJ33(J + i, Q); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for building quadrature data for a div-div operator -+/// with a coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_build_divdiv_quad)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ DivDivContext *bc = (DivDivContext *)ctx; -+ // in[0] is coefficients, size (Q) -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute and store qw * c / det(J) -+ const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *qd = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 11: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ qd[i] = qw[i] * c[i] / J[i]; -+ } -+ break; -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ qd[i] = qw[i] * c[i] / DetJ21(J + i, Q); -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ qd[i] = qw[i] * c[i] / DetJ22(J + i, Q); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ qd[i] = qw[i] * c[i] / DetJ32(J + i, Q); -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ qd[i] = qw[i] * c[i] / DetJ33(J + i, Q); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying a div-div operator -+CEED_QFUNCTION(f_apply_divdiv)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ // in[0], out[0] have shape [ncomp=1, Q] -+ const CeedScalar *ud = in[0], *qd = in[1]; -+ CeedScalar *vd = out[0]; -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ vd[i] = qd[i] * ud[i]; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying a div-div operator with a constant -+/// coefficient -+CEED_QFUNCTION(f_apply_divdiv_mf_const)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ DivDivContext *bc = (DivDivContext *)ctx; -+ // in[0], out[0] have shape [ncomp=1, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw * c / det(J) -+ const CeedScalar coeff = bc->coeff; -+ const CeedScalar *ud = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *vd = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 11: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar qd = qw[i] * coeff / J[i]; -+ vd[i] = qd * ud[i]; -+ } -+ break; -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar qd = qw[i] * coeff / DetJ21(J + i, Q); -+ vd[i] = qd * ud[i]; -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar qd = qw[i] * coeff / DetJ22(J + i, Q); -+ vd[i] = qd * ud[i]; -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar qd = qw[i] * coeff / DetJ32(J + i, Q); -+ vd[i] = qd * ud[i]; -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar qd = qw[i] * coeff / DetJ33(J + i, Q); -+ vd[i] = qd * ud[i]; -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying a div-div operator with a coefficient -+/// evaluated at quadrature points -+CEED_QFUNCTION(f_apply_divdiv_mf_quad)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ DivDivContext *bc = (DivDivContext *)ctx; -+ // in[0], out[0] have shape [ncomp=1, Q] -+ // in[0] is coefficients, size (Q) -+ // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[3] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw * c / det(J) -+ const CeedScalar *ud = in[0], *c = in[1], *J = in[2], *qw = in[3]; -+ CeedScalar *vd = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 11: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar qd = qw[i] * c[i] / J[i]; -+ vd[i] = qd * ud[i]; -+ } -+ break; -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar qd = qw[i] * c[i] / DetJ21(J + i, Q); -+ vd[i] = qd * ud[i]; -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar qd = qw[i] * c[i] / DetJ22(J + i, Q); -+ vd[i] = qd * ud[i]; -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar qd = qw[i] * c[i] / DetJ32(J + i, Q); -+ vd[i] = qd * ud[i]; -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar qd = qw[i] * c[i] / DetJ33(J + i, Q); -+ vd[i] = qd * ud[i]; -+ } -+ break; -+ } -+ return 0; -+} -+ -+#endif // MFEM_LIBCEED_DIVDIV_QF_H -diff --git a/fem/ceed/integrators/interp/interp.cpp b/fem/ceed/integrators/interp/interp.cpp -new file mode 100644 -index 000000000..6aab4f47f ---- /dev/null -+++ b/fem/ceed/integrators/interp/interp.cpp -@@ -0,0 +1,58 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#include "interp.hpp" -+ -+#include "../../../../config/config.hpp" -+ -+namespace mfem -+{ -+ -+namespace ceed -+{ -+ -+#ifdef MFEM_USE_CEED -+struct DiscreteInterpolatorOperatorInfo : public OperatorInfo -+{ -+ DiscreteInterpolatorOperatorInfo() -+ { -+ // Discrete interpolators use a built-in QFunction -+ header = ""; -+ header = ""; -+ build_func = ""; -+ build_qf = nullptr; -+ apply_func = ""; -+ apply_qf = nullptr; -+ apply_func_mf = ""; -+ apply_qf_mf = nullptr; -+ trial_op = EvalMode::Interp; -+ test_op = EvalMode::None; -+ qdatasize = 0; -+ } -+}; -+#endif -+ -+PADiscreteInterpolator::PADiscreteInterpolator( -+ const mfem::DiscreteInterpolator &interp, -+ const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes) -+{ -+#ifdef MFEM_USE_CEED -+ DiscreteInterpolatorOperatorInfo info; -+ Assemble(interp, info, trial_fes, test_fes, (mfem::Coefficient *)nullptr); -+#else -+ MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); -+#endif -+} -+ -+} // namespace ceed -+ -+} // namespace mfem -diff --git a/fem/ceed/solvers/full-assembly.hpp b/fem/ceed/integrators/interp/interp.hpp -similarity index 50% -rename from fem/ceed/solvers/full-assembly.hpp -rename to fem/ceed/integrators/interp/interp.hpp -index ed338f725..a923df572 100644 ---- a/fem/ceed/solvers/full-assembly.hpp -+++ b/fem/ceed/integrators/interp/interp.hpp -@@ -9,12 +9,12 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#ifndef MFEM_CEED_ASSEMBLE_HPP --#define MFEM_CEED_ASSEMBLE_HPP -+#ifndef MFEM_LIBCEED_INTERP_HPP -+#define MFEM_LIBCEED_INTERP_HPP - --#include "../interface/ceed.hpp" -- --#ifdef MFEM_USE_CEED -+#include "../../interface/integrator.hpp" -+#include "../../interface/mixed_operator.hpp" -+#include "../../../fespace.hpp" - - namespace mfem - { -@@ -22,18 +22,19 @@ namespace mfem - namespace ceed - { - --/** @brief Assembles a CeedOperator as an mfem::SparseMatrix -- -- In parallel, this assembles independently on each processor, that is, it -- assembles at the L-vector level. The assembly procedure is always performed -- on the host, but this works also for operators stored on device by copying -- memory. */ --int CeedOperatorFullAssemble(CeedOperator op, SparseMatrix **mat); -- --} // namespace ceed -+/** Represent DiscreteInterpolator classes with AssemblyLevel::Partial -+ using libCEED. */ -+class PADiscreteInterpolator : public MixedOperator -+{ -+public: -+ PADiscreteInterpolator( -+ const mfem::DiscreteInterpolator &interp, -+ const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes); -+}; - --} // namespace mfem -+} - --#endif -+} - --#endif -+#endif // MFEM_LIBCEED_INTERP_HPP -diff --git a/fem/ceed/integrators/mass/mass.cpp b/fem/ceed/integrators/mass/mass.cpp -index dfcc9a8ce..6a8d67ddc 100644 ---- a/fem/ceed/integrators/mass/mass.cpp -+++ b/fem/ceed/integrators/mass/mass.cpp -@@ -25,20 +25,66 @@ namespace ceed - #ifdef MFEM_USE_CEED - struct MassOperatorInfo : public OperatorInfo - { -- MassContext ctx; -- MassOperatorInfo() -+ MassContext ctx = {0}; -+ MassOperatorInfo(const mfem::FiniteElementSpace &fes, mfem::Coefficient *Q, -+ bool use_bdr = false, bool use_mf = false) - { -+ ctx.dim = fes.GetMesh()->Dimension() - use_bdr; -+ ctx.space_dim = fes.GetMesh()->SpaceDimension(); -+ ctx.vdim = fes.GetVDim(); -+ if (!use_mf) -+ { -+ apply_func = ":f_apply_mass"; -+ apply_qf = &f_apply_mass; -+ } -+ else -+ { -+ build_func = ""; -+ build_qf = nullptr; -+ } -+ if (Q == nullptr) -+ { -+ ctx.coeff = 1.0; -+ if (!use_mf) -+ { -+ build_func = ":f_build_mass_const"; -+ build_qf = &f_build_mass_const; -+ } -+ else -+ { -+ apply_func = ":f_apply_mass_mf_const"; -+ apply_qf = &f_apply_mass_mf_const; -+ } -+ } -+ else if (mfem::ConstantCoefficient *const_coeff = -+ dynamic_cast(Q)) -+ { -+ ctx.coeff = const_coeff->constant; -+ if (!use_mf) -+ { -+ build_func = ":f_build_mass_const"; -+ build_qf = &f_build_mass_const; -+ } -+ else -+ { -+ apply_func = ":f_apply_mass_mf_const"; -+ apply_qf = &f_apply_mass_mf_const; -+ } -+ } -+ else -+ { -+ if (!use_mf) -+ { -+ build_func = ":f_build_mass_quad"; -+ build_qf = &f_build_mass_quad; -+ } -+ else -+ { -+ apply_func = ":f_apply_mass_mf_quad"; -+ apply_qf = &f_apply_mass_mf_quad; -+ } -+ } - header = "/integrators/mass/mass_qf.h"; -- build_func_const = ":f_build_mass_const"; -- build_qf_const = &f_build_mass_const; -- build_func_quad = ":f_build_mass_quad"; -- build_qf_quad = &f_build_mass_quad; -- apply_func = ":f_apply_mass"; -- apply_qf = &f_apply_mass; -- apply_func_mf_const = ":f_apply_mass_mf_const"; -- apply_qf_mf_const = &f_apply_mass_mf_const; -- apply_func_mf_quad = ":f_apply_mass_mf_quad"; -- apply_qf_mf_quad = &f_apply_mass_mf_quad; - trial_op = EvalMode::Interp; - test_op = EvalMode::Interp; - qdatasize = 1; -@@ -46,75 +92,53 @@ struct MassOperatorInfo : public OperatorInfo - }; - #endif - --PAMassIntegrator::PAMassIntegrator(const mfem::FiniteElementSpace &fes, -- const mfem::IntegrationRule &irm, -- mfem::Coefficient *Q) -- : PAIntegrator() -+PAMassIntegrator::PAMassIntegrator(const mfem::MassIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ mfem::Coefficient *Q, -+ const bool use_bdr) - { - #ifdef MFEM_USE_CEED -- MassOperatorInfo info; -- Assemble(info, fes, irm, Q); -+ MassOperatorInfo info(fes, Q, use_bdr); -+ Assemble(integ, info, fes, Q, use_bdr); - #else - MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); - #endif - } - --MixedPAMassIntegrator::MixedPAMassIntegrator(const MassIntegrator &integ, -- const mfem::FiniteElementSpace &fes, -- mfem::Coefficient *Q) -+PAMassIntegrator::PAMassIntegrator(const mfem::VectorMassIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ mfem::Coefficient *Q, -+ const bool use_bdr) - { - #ifdef MFEM_USE_CEED -- MassOperatorInfo info; -- Assemble(integ, info, fes, Q); -+ MassOperatorInfo info(fes, Q, use_bdr); -+ Assemble(integ, info, fes, Q, use_bdr); - #else - MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); - #endif - } - --MixedPAMassIntegrator::MixedPAMassIntegrator(const VectorMassIntegrator &integ, -- const mfem::FiniteElementSpace &fes, -- mfem::Coefficient *Q) -+MFMassIntegrator::MFMassIntegrator(const mfem::MassIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ mfem::Coefficient *Q, -+ const bool use_bdr) - { - #ifdef MFEM_USE_CEED -- MassOperatorInfo info; -- Assemble(integ, info, fes, Q); -+ MassOperatorInfo info(fes, Q, use_bdr, true); -+ Assemble(integ, info, fes, Q, use_bdr, true); - #else - MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); - #endif - } - --MFMassIntegrator::MFMassIntegrator(const mfem::FiniteElementSpace &fes, -- const mfem::IntegrationRule &irm, -- mfem::Coefficient *Q) -- : MFIntegrator() -+MFMassIntegrator::MFMassIntegrator(const mfem::VectorMassIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ mfem::Coefficient *Q, -+ const bool use_bdr) - { - #ifdef MFEM_USE_CEED -- MassOperatorInfo info; -- Assemble(info, fes, irm, Q); --#else -- MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); --#endif --} -- --MixedMFMassIntegrator::MixedMFMassIntegrator(const MassIntegrator &integ, -- const mfem::FiniteElementSpace &fes, -- mfem::Coefficient *Q) --{ --#ifdef MFEM_USE_CEED -- MassOperatorInfo info; -- Assemble(integ, info, fes, Q); --#else -- MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); --#endif --} -- --MixedMFMassIntegrator::MixedMFMassIntegrator(const VectorMassIntegrator &integ, -- const mfem::FiniteElementSpace &fes, -- mfem::Coefficient *Q) --{ --#ifdef MFEM_USE_CEED -- MassOperatorInfo info; -- Assemble(integ, info, fes, Q); -+ MassOperatorInfo info(fes, Q, use_bdr, true); -+ Assemble(integ, info, fes, Q, use_bdr, true); - #else - MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); - #endif -diff --git a/fem/ceed/integrators/mass/mass.hpp b/fem/ceed/integrators/mass/mass.hpp -index 696f8c3dc..4125fc6ed 100644 ---- a/fem/ceed/integrators/mass/mass.hpp -+++ b/fem/ceed/integrators/mass/mass.hpp -@@ -13,7 +13,7 @@ - #define MFEM_LIBCEED_MASS_HPP - - #include "../../interface/integrator.hpp" --#include "../../interface/mixed_integrator.hpp" -+#include "../../interface/mixed_operator.hpp" - #include "../../../fespace.hpp" - - namespace mfem -@@ -23,45 +23,33 @@ namespace ceed - { - - /// Represent a MassIntegrator with AssemblyLevel::Partial using libCEED. --class PAMassIntegrator : public PAIntegrator -+class PAMassIntegrator : public MixedOperator - { - public: -- PAMassIntegrator(const mfem::FiniteElementSpace &fes, -- const mfem::IntegrationRule &ir, -- mfem::Coefficient *Q); --}; -- --class MixedPAMassIntegrator : public MixedIntegrator --{ --public: -- MixedPAMassIntegrator(const MassIntegrator &integ, -- const mfem::FiniteElementSpace &fes, -- mfem::Coefficient *Q); -- -- MixedPAMassIntegrator(const VectorMassIntegrator &integ, -- const mfem::FiniteElementSpace &fes, -- mfem::Coefficient *Q); -+ PAMassIntegrator(const mfem::MassIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ mfem::Coefficient *Q, -+ const bool use_bdr = false); -+ -+ PAMassIntegrator(const mfem::VectorMassIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ mfem::Coefficient *Q, -+ const bool use_bdr = false); - }; - - /// Represent a MassIntegrator with AssemblyLevel::None using libCEED. --class MFMassIntegrator : public MFIntegrator -+class MFMassIntegrator : public MixedOperator - { - public: -- MFMassIntegrator(const mfem::FiniteElementSpace &fes, -- const mfem::IntegrationRule &ir, -- mfem::Coefficient *Q); --}; -- --class MixedMFMassIntegrator : public MixedIntegrator --{ --public: -- MixedMFMassIntegrator(const MassIntegrator &integ, -- const mfem::FiniteElementSpace &fes, -- mfem::Coefficient *Q); -- -- MixedMFMassIntegrator(const VectorMassIntegrator &integ, -- const mfem::FiniteElementSpace &fes, -- mfem::Coefficient *Q); -+ MFMassIntegrator(const mfem::MassIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ mfem::Coefficient *Q, -+ const bool use_bdr = false); -+ -+ MFMassIntegrator(const mfem::VectorMassIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ mfem::Coefficient *Q, -+ const bool use_bdr = false); - }; - - } -diff --git a/fem/ceed/integrators/mass/mass_qf.h b/fem/ceed/integrators/mass/mass_qf.h -index 85002ae04..3cdd3b5e3 100644 ---- a/fem/ceed/integrators/mass/mass_qf.h -+++ b/fem/ceed/integrators/mass/mass_qf.h -@@ -9,128 +9,151 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - -+#ifndef MFEM_LIBCEED_MASS_QF_H -+#define MFEM_LIBCEED_MASS_QF_H - --/// A structure used to pass additional data to f_build_diff and f_apply_diff --struct MassContext { CeedInt dim, space_dim, vdim; CeedScalar coeff; }; -+#include "../util/util_qf.h" - --/// libCEED Q-function for building quadrature data for a mass operator with a --/// constant coefficient -+struct MassContext -+{ -+ CeedInt dim, space_dim, vdim; -+ CeedScalar coeff; -+}; -+ -+/// libCEED QFunction for building quadrature data for a mass operator -+/// with a constant coefficient - CEED_QFUNCTION(f_build_mass_const)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) - { -- // in[0] is Jacobians with shape [dim, nc=dim, Q] -+ // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] - // in[1] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute and store qw * c * det(J) - MassContext *bc = (MassContext *)ctx; - const CeedScalar coeff = bc->coeff; - const CeedScalar *J = in[0], *qw = in[1]; -- CeedScalar *rho = out[0]; -- switch (bc->dim + 10*bc->space_dim) -+ CeedScalar *qd = out[0]; -+ switch (10 * bc->space_dim + bc->dim) - { - case 11: -- for (CeedInt i=0; idim + 10*bc->space_dim) -+ CeedScalar *qd = out[0]; -+ switch (10 * bc->space_dim + bc->dim) - { - case 11: -- for (CeedInt i=0; ivdim) - { - case 1: -- for (CeedInt i=0; icoeff; - const CeedScalar *u = in[0], *J = in[1], *qw = in[2]; - CeedScalar *v = out[0]; -- switch (10 * bc->dim + bc->vdim) -+ switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim) - { -- case 11: -- for (CeedInt i = 0; i < Q; i++) -+ case 111: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- const CeedScalar rho = coeff * qw[i] / J[i]; -- v[i] = rho * u[i]; -+ const CeedScalar qd = qw[i] * coeff * J[i]; -+ v[i] = qd * u[i]; - } - break; -- case 21: -- for (CeedInt i = 0; i < Q; i++) -+ case 211: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- const CeedScalar rho = coeff * (J[i+Q*0]*J[i+Q*3] - J[i+Q*1]*J[i+Q*2]) * qw[i]; -- v[i] = rho * u[i]; -+ const CeedScalar qd = qw[i] * coeff * DetJ21(J + i, Q); -+ v[i] = qd * u[i]; - } - break; -- case 22: -- for (CeedInt i=0; idim + bc->vdim) -+ switch (100 * bc->space_dim + 10 * bc->dim + bc->vdim) - { -- case 11: -- for (CeedInt i=0; i -+ MixedVectorCurlOperatorInfoBase(const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, bool use_bdr = false, -+ bool use_mf = false) -+ { -+ // Reuse H(div) quadrature functions for CurlCurlIntegrator -+ MFEM_VERIFY(trial_fes.GetVDim() == 1 && test_fes.GetVDim() == 1, -+ "libCEED interface for vector FE does not support vdim > 1!"); -+ ctx.dim = trial_fes.GetMesh()->Dimension() - use_bdr; -+ MFEM_VERIFY(ctx.dim == 3, -+ "MixedVectorCurlIntegrator and MixedVectorWeakCurlIntegrator " -+ "require dim == 3!"); -+ ctx.space_dim = trial_fes.GetMesh()->SpaceDimension(); -+ ctx.curl_dim = (ctx.dim < 3) ? 1 : ctx.dim; -+ if (!use_mf) -+ { -+ apply_func = ":f_apply_curlcurl"; -+ apply_qf = &f_apply_curlcurl; -+ } -+ else -+ { -+ build_func = ""; -+ build_qf = nullptr; -+ } -+ if (Q == nullptr) -+ { -+ ctx.coeff[0] = 1.0; -+ if (!use_mf) -+ { -+ build_func = ":f_build_curlcurl_const_scalar"; -+ build_qf = &f_build_curlcurl_const_scalar; -+ } -+ else -+ { -+ apply_func = ":f_apply_curlcurl_mf_const_scalar"; -+ apply_qf = &f_apply_curlcurl_mf_const_scalar; -+ } -+ } -+ else -+ { -+ InitCoefficient(*Q, use_mf); -+ } -+ header = "/integrators/curlcurl/curlcurl_qf.h"; -+ qdatasize = (ctx.curl_dim * (ctx.curl_dim + 1)) / 2; -+ } -+ void InitCoefficient(mfem::Coefficient &Q, bool use_mf) -+ { -+ if (mfem::ConstantCoefficient *const_coeff = -+ dynamic_cast(&Q)) -+ { -+ ctx.coeff[0] = const_coeff->constant; -+ if (!use_mf) -+ { -+ build_func = ":f_build_curlcurl_const_scalar"; -+ build_qf = &f_build_curlcurl_const_scalar; -+ } -+ else -+ { -+ apply_func = ":f_apply_curlcurl_mf_const_scalar"; -+ apply_qf = &f_apply_curlcurl_mf_const_scalar; -+ } -+ } -+ else -+ { -+ if (!use_mf) -+ { -+ build_func = ":f_build_curlcurl_quad_scalar"; -+ build_qf = &f_build_curlcurl_quad_scalar; -+ } -+ else -+ { -+ apply_func = ":f_apply_curlcurl_mf_quad_scalar"; -+ apply_qf = &f_apply_curlcurl_mf_quad_scalar; -+ } -+ } -+ } -+ void InitCoefficient(mfem::VectorCoefficient &VQ, bool use_mf) -+ { -+ if (mfem::VectorConstantCoefficient *const_coeff = -+ dynamic_cast(&VQ)) -+ { -+ const int vdim = VQ.GetVDim(); -+ MFEM_VERIFY(vdim <= LIBCEED_CURLCURL_COEFF_COMP_MAX, -+ "VectorCoefficient dimension exceeds context storage!"); -+ const mfem::Vector &val = const_coeff->GetVec(); -+ for (int i = 0; i < vdim; i++) -+ { -+ ctx.coeff[i] = val[i]; -+ } -+ if (!use_mf) -+ { -+ build_func = ":f_build_curlcurl_const_vector"; -+ build_qf = &f_build_curlcurl_const_vector; -+ } -+ else -+ { -+ apply_func = ":f_apply_curlcurl_mf_const_vector"; -+ apply_qf = &f_apply_curlcurl_mf_const_vector; -+ } -+ } -+ else -+ { -+ if (!use_mf) -+ { -+ build_func = ":f_build_curlcurl_quad_vector"; -+ build_qf = &f_build_curlcurl_quad_vector; -+ } -+ else -+ { -+ apply_func = ":f_apply_curlcurl_mf_quad_vector"; -+ apply_qf = &f_apply_curlcurl_mf_quad_vector; -+ } -+ } -+ } -+ void InitCoefficient(mfem::MatrixCoefficient &MQ, bool use_mf) -+ { -+ // Assumes matrix coefficient is symmetric -+ if (mfem::MatrixConstantCoefficient *const_coeff = -+ dynamic_cast(&MQ)) -+ { -+ const int vdim = MQ.GetVDim(); -+ MFEM_VERIFY((vdim * (vdim + 1)) / 2 <= LIBCEED_CURLCURL_COEFF_COMP_MAX, -+ "MatrixCoefficient dimensions exceed context storage!"); -+ const mfem::DenseMatrix &val = const_coeff->GetMatrix(); -+ for (int j = 0; j < vdim; j++) -+ { -+ for (int i = j; i < vdim; i++) -+ { -+ const int idx = (j * vdim) - (((j - 1) * j) / 2) + i - j; -+ ctx.coeff[idx] = val(i, j); -+ } -+ } -+ if (!use_mf) -+ { -+ build_func = ":f_build_curlcurl_const_matrix"; -+ build_qf = &f_build_curlcurl_const_matrix; -+ } -+ else -+ { -+ apply_func = ":f_apply_curlcurl_mf_const_matrix"; -+ apply_qf = &f_apply_curlcurl_mf_const_matrix; -+ } -+ } -+ else -+ { -+ if (!use_mf) -+ { -+ build_func = ":f_build_curlcurl_quad_matrix"; -+ build_qf = &f_build_curlcurl_quad_matrix; -+ } -+ else -+ { -+ apply_func = ":f_apply_curlcurl_mf_quad_matrix"; -+ apply_qf = &f_apply_curlcurl_mf_quad_matrix; -+ } -+ } -+ } -+}; -+ -+struct MixedVectorCurlOperatorInfo : public MixedVectorCurlOperatorInfoBase -+{ -+ template -+ MixedVectorCurlOperatorInfo(const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, bool use_bdr = false, -+ bool use_mf = false) -+ : MixedVectorCurlOperatorInfoBase(trial_fes, test_fes, Q, use_bdr, use_mf) -+ { -+ MFEM_VERIFY( -+ trial_fes.FEColl()->GetDerivMapType(ctx.dim) == mfem::FiniteElement::H_DIV && -+ test_fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_DIV, -+ "libCEED interface for MixedVectorCurlIntegrator requires " -+ "H(curl) domain and H(div) range FE spaces!"); -+ trial_op = EvalMode::Curl; -+ test_op = EvalMode::Interp; -+ } -+}; -+ -+struct MixedVectorWeakCurlOperatorInfo : public MixedVectorCurlOperatorInfoBase -+{ -+ template -+ MixedVectorWeakCurlOperatorInfo(const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, bool use_bdr = false, -+ bool use_mf = false) -+ : MixedVectorCurlOperatorInfoBase(trial_fes, test_fes, Q, use_bdr, use_mf) -+ { -+ MFEM_VERIFY( -+ trial_fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_DIV && -+ test_fes.FEColl()->GetDerivMapType(ctx.dim) == mfem::FiniteElement::H_DIV, -+ "libCEED interface for MixedVectorWeakCurlIntegrator requires " -+ "H(div) domain and H(curl) range FE spaces!"); -+ trial_op = EvalMode::Interp; -+ test_op = EvalMode::Curl; -+ } -+}; -+#endif -+ -+template -+PAMixedVectorCurlIntegrator::PAMixedVectorCurlIntegrator( -+ const mfem::MixedVectorCurlIntegrator &integ, -+ const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, -+ const bool use_bdr) -+{ -+#ifdef MFEM_USE_CEED -+ MixedVectorCurlOperatorInfo info(trial_fes, test_fes, Q, use_bdr); -+ Assemble(integ, info, trial_fes, test_fes, Q, use_bdr); -+#else -+ MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); -+#endif -+} -+ -+template -+MFMixedVectorCurlIntegrator::MFMixedVectorCurlIntegrator( -+ const mfem::MixedVectorCurlIntegrator &integ, -+ const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, -+ const bool use_bdr) -+{ -+#ifdef MFEM_USE_CEED -+ MixedVectorCurlOperatorInfo info(trial_fes, test_fes, Q, use_bdr, true); -+ Assemble(integ, info, trial_fes, test_fes, Q, use_bdr, true); -+#else -+ MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); -+#endif -+} -+ -+template -+PAMixedVectorWeakCurlIntegrator::PAMixedVectorWeakCurlIntegrator( -+ const mfem::MixedVectorWeakCurlIntegrator &integ, -+ const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, -+ const bool use_bdr) -+{ -+#ifdef MFEM_USE_CEED -+ MixedVectorWeakCurlOperatorInfo info(trial_fes, test_fes, Q, use_bdr); -+ Assemble(integ, info, trial_fes, test_fes, Q, use_bdr); -+#else -+ MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); -+#endif -+} -+ -+template -+MFMixedVectorWeakCurlIntegrator::MFMixedVectorWeakCurlIntegrator( -+ const mfem::MixedVectorWeakCurlIntegrator &integ, -+ const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, -+ const bool use_bdr) -+{ -+#ifdef MFEM_USE_CEED -+ MixedVectorWeakCurlOperatorInfo info(trial_fes, test_fes, Q, use_bdr, true); -+ Assemble(integ, info, trial_fes, test_fes, Q, use_bdr, true); -+#else -+ MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); -+#endif -+} -+ -+// @cond DOXYGEN_SKIP -+ -+template PAMixedVectorCurlIntegrator::PAMixedVectorCurlIntegrator( -+ const mfem::MixedVectorCurlIntegrator &, const mfem::FiniteElementSpace &, -+ const mfem::FiniteElementSpace &, mfem::Coefficient *, const bool); -+template PAMixedVectorCurlIntegrator::PAMixedVectorCurlIntegrator( -+ const mfem::MixedVectorCurlIntegrator &, const mfem::FiniteElementSpace &, -+ const mfem::FiniteElementSpace &, mfem::VectorCoefficient *, const bool); -+template PAMixedVectorCurlIntegrator::PAMixedVectorCurlIntegrator( -+ const mfem::MixedVectorCurlIntegrator &, const mfem::FiniteElementSpace &, -+ const mfem::FiniteElementSpace &, mfem::MatrixCoefficient *, const bool); -+ -+template MFMixedVectorCurlIntegrator::MFMixedVectorCurlIntegrator( -+ const mfem::MixedVectorCurlIntegrator &, const mfem::FiniteElementSpace &, -+ const mfem::FiniteElementSpace &, mfem::Coefficient *, const bool); -+template MFMixedVectorCurlIntegrator::MFMixedVectorCurlIntegrator( -+ const mfem::MixedVectorCurlIntegrator &, const mfem::FiniteElementSpace &, -+ const mfem::FiniteElementSpace &, mfem::VectorCoefficient *, const bool); -+template MFMixedVectorCurlIntegrator::MFMixedVectorCurlIntegrator( -+ const mfem::MixedVectorCurlIntegrator &, const mfem::FiniteElementSpace &, -+ const mfem::FiniteElementSpace &, mfem::MatrixCoefficient *, const bool); -+ -+template PAMixedVectorWeakCurlIntegrator::PAMixedVectorWeakCurlIntegrator( -+ const mfem::MixedVectorWeakCurlIntegrator &, const mfem::FiniteElementSpace &, -+ const mfem::FiniteElementSpace &, mfem::Coefficient *, const bool); -+template PAMixedVectorWeakCurlIntegrator::PAMixedVectorWeakCurlIntegrator( -+ const mfem::MixedVectorWeakCurlIntegrator &, const mfem::FiniteElementSpace &, -+ const mfem::FiniteElementSpace &, mfem::VectorCoefficient *, const bool); -+template PAMixedVectorWeakCurlIntegrator::PAMixedVectorWeakCurlIntegrator( -+ const mfem::MixedVectorWeakCurlIntegrator &, const mfem::FiniteElementSpace &, -+ const mfem::FiniteElementSpace &, mfem::MatrixCoefficient *, const bool); -+ -+template MFMixedVectorWeakCurlIntegrator::MFMixedVectorWeakCurlIntegrator( -+ const mfem::MixedVectorWeakCurlIntegrator &, const mfem::FiniteElementSpace &, -+ const mfem::FiniteElementSpace &, mfem::Coefficient *, const bool); -+template MFMixedVectorWeakCurlIntegrator::MFMixedVectorWeakCurlIntegrator( -+ const mfem::MixedVectorWeakCurlIntegrator &, const mfem::FiniteElementSpace &, -+ const mfem::FiniteElementSpace &, mfem::VectorCoefficient *, const bool); -+template MFMixedVectorWeakCurlIntegrator::MFMixedVectorWeakCurlIntegrator( -+ const mfem::MixedVectorWeakCurlIntegrator &, const mfem::FiniteElementSpace &, -+ const mfem::FiniteElementSpace &, mfem::MatrixCoefficient *, const bool); -+ -+// @endcond -+ -+} // namespace ceed -+ -+} // namespace mfem -diff --git a/fem/ceed/integrators/mixedveccurl/mixedveccurl.hpp b/fem/ceed/integrators/mixedveccurl/mixedveccurl.hpp -new file mode 100644 -index 000000000..3bae19e0c ---- /dev/null -+++ b/fem/ceed/integrators/mixedveccurl/mixedveccurl.hpp -@@ -0,0 +1,85 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#ifndef MFEM_LIBCEED_MIXEDVECCURL_HPP -+#define MFEM_LIBCEED_MIXEDVECCURL_HPP -+ -+#include "../../interface/integrator.hpp" -+#include "../../interface/mixed_operator.hpp" -+#include "../../../fespace.hpp" -+ -+namespace mfem -+{ -+ -+namespace ceed -+{ -+ -+/** Represent a MixedVectorCurlIntegrator with AssemblyLevel::Partial -+ using libCEED. */ -+class PAMixedVectorCurlIntegrator : public MixedOperator -+{ -+public: -+ template -+ PAMixedVectorCurlIntegrator( -+ const mfem::MixedVectorCurlIntegrator &integ, -+ const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, -+ const bool use_bdr = false); -+}; -+ -+/** Represent a MixedVectorCurlIntegrator with AssemblyLevel::None -+ using libCEED. */ -+class MFMixedVectorCurlIntegrator : public MixedOperator -+{ -+public: -+ template -+ MFMixedVectorCurlIntegrator( -+ const mfem::MixedVectorCurlIntegrator &integ, -+ const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, -+ const bool use_bdr = false); -+}; -+ -+/** Represent a MixedVectorWeakCurlIntegrator with AssemblyLevel::Partial -+ using libCEED. */ -+class PAMixedVectorWeakCurlIntegrator : public MixedOperator -+{ -+public: -+ template -+ PAMixedVectorWeakCurlIntegrator( -+ const mfem::MixedVectorWeakCurlIntegrator &integ, -+ const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, -+ const bool use_bdr = false); -+}; -+ -+/** Represent a MixedVectorWeakCurlIntegrator with AssemblyLevel::None -+ using libCEED. */ -+class MFMixedVectorWeakCurlIntegrator : public MixedOperator -+{ -+public: -+ template -+ MFMixedVectorWeakCurlIntegrator( -+ const mfem::MixedVectorWeakCurlIntegrator &integ, -+ const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, -+ const bool use_bdr = false); -+}; -+ -+} -+ -+} -+ -+#endif // MFEM_LIBCEED_MIXEDVECCURL_HPP -diff --git a/fem/ceed/integrators/mixedvecgrad/mixedvecgrad.cpp b/fem/ceed/integrators/mixedvecgrad/mixedvecgrad.cpp -new file mode 100644 -index 000000000..f8f708bab ---- /dev/null -+++ b/fem/ceed/integrators/mixedvecgrad/mixedvecgrad.cpp -@@ -0,0 +1,396 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#include "mixedvecgrad.hpp" -+ -+#include "../../../../config/config.hpp" -+#ifdef MFEM_USE_CEED -+#include "../diffusion/diffusion_qf.h" -+#endif -+ -+namespace mfem -+{ -+ -+namespace ceed -+{ -+ -+#ifdef MFEM_USE_CEED -+struct MixedVectorGradientOperatorInfoBase : public OperatorInfo -+{ -+ DiffusionContext ctx = {0}; -+ template -+ MixedVectorGradientOperatorInfoBase(const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, bool use_bdr = false, -+ bool use_mf = false) -+ { -+ // Reuse H(curl) quadrature functions for DiffusionIntegrator -+ MFEM_VERIFY(trial_fes.GetVDim() == 1 && test_fes.GetVDim() == 1, -+ "libCEED interface for vector FE does not support vdim > 1!"); -+ ctx.dim = trial_fes.GetMesh()->Dimension() - use_bdr; -+ MFEM_VERIFY(ctx.dim == 2 || ctx.dim == 3, -+ "MixedVectorGradientIntegrator and MixedVectorWeakDivergenceIntegrator " -+ "require dim == 2 or dim == 3!"); -+ ctx.space_dim = trial_fes.GetMesh()->SpaceDimension(); -+ ctx.vdim = 1; -+ if (!use_mf) -+ { -+ apply_func = ":f_apply_diff"; -+ apply_qf = &f_apply_diff; -+ } -+ else -+ { -+ build_func = ""; -+ build_qf = nullptr; -+ } -+ if (Q == nullptr) -+ { -+ ctx.coeff[0] = 1.0; -+ if (!use_mf) -+ { -+ build_func = ":f_build_diff_const_scalar"; -+ build_qf = &f_build_diff_const_scalar; -+ } -+ else -+ { -+ apply_func = ":f_apply_diff_mf_const_scalar"; -+ apply_qf = &f_apply_diff_mf_const_scalar; -+ } -+ } -+ else -+ { -+ InitCoefficient(*Q, use_mf); -+ } -+ header = "/integrators/diffusion/diffusion_qf.h"; -+ qdatasize = (ctx.dim * (ctx.dim + 1)) / 2; -+ } -+ void InitCoefficient(mfem::Coefficient &Q, bool use_mf) -+ { -+ if (mfem::ConstantCoefficient *const_coeff = -+ dynamic_cast(&Q)) -+ { -+ ctx.coeff[0] = const_coeff->constant; -+ if (!use_mf) -+ { -+ build_func = ":f_build_diff_const_scalar"; -+ build_qf = &f_build_diff_const_scalar; -+ } -+ else -+ { -+ apply_func = ":f_apply_diff_mf_const_scalar"; -+ apply_qf = &f_apply_diff_mf_const_scalar; -+ } -+ } -+ else -+ { -+ if (!use_mf) -+ { -+ build_func = ":f_build_diff_quad_scalar"; -+ build_qf = &f_build_diff_quad_scalar; -+ } -+ else -+ { -+ apply_func = ":f_apply_diff_mf_quad_scalar"; -+ apply_qf = &f_apply_diff_mf_quad_scalar; -+ } -+ } -+ } -+ void InitCoefficient(mfem::VectorCoefficient &VQ, bool use_mf) -+ { -+ if (mfem::VectorConstantCoefficient *const_coeff = -+ dynamic_cast(&VQ)) -+ { -+ const int vdim = VQ.GetVDim(); -+ MFEM_VERIFY(vdim <= LIBCEED_DIFF_COEFF_COMP_MAX, -+ "VectorCoefficient dimension exceeds context storage!"); -+ const mfem::Vector &val = const_coeff->GetVec(); -+ for (int i = 0; i < vdim; i++) -+ { -+ ctx.coeff[i] = val[i]; -+ } -+ if (!use_mf) -+ { -+ build_func = ":f_build_diff_const_vector"; -+ build_qf = &f_build_diff_const_vector; -+ } -+ else -+ { -+ apply_func = ":f_apply_diff_mf_const_vector"; -+ apply_qf = &f_apply_diff_mf_const_vector; -+ } -+ } -+ else -+ { -+ if (!use_mf) -+ { -+ build_func = ":f_build_diff_quad_vector"; -+ build_qf = &f_build_diff_quad_vector; -+ } -+ else -+ { -+ apply_func = ":f_apply_diff_mf_quad_vector"; -+ apply_qf = &f_apply_diff_mf_quad_vector; -+ } -+ } -+ } -+ void InitCoefficient(mfem::MatrixCoefficient &MQ, bool use_mf) -+ { -+ // Assumes matrix coefficient is symmetric -+ if (mfem::MatrixConstantCoefficient *const_coeff = -+ dynamic_cast(&MQ)) -+ { -+ const int vdim = MQ.GetVDim(); -+ MFEM_VERIFY((vdim * (vdim + 1)) / 2 <= LIBCEED_DIFF_COEFF_COMP_MAX, -+ "MatrixCoefficient dimensions exceed context storage!"); -+ const mfem::DenseMatrix &val = const_coeff->GetMatrix(); -+ for (int j = 0; j < vdim; j++) -+ { -+ for (int i = j; i < vdim; i++) -+ { -+ const int idx = (j * vdim) - (((j - 1) * j) / 2) + i - j; -+ ctx.coeff[idx] = val(i, j); -+ } -+ } -+ if (!use_mf) -+ { -+ build_func = ":f_build_diff_const_matrix"; -+ build_qf = &f_build_diff_const_matrix; -+ } -+ else -+ { -+ apply_func = ":f_apply_diff_mf_const_matrix"; -+ apply_qf = &f_apply_diff_mf_const_matrix; -+ } -+ } -+ else -+ { -+ if (!use_mf) -+ { -+ build_func = ":f_build_diff_quad_matrix"; -+ build_qf = &f_build_diff_quad_matrix; -+ } -+ else -+ { -+ apply_func = ":f_apply_diff_mf_quad_matrix"; -+ apply_qf = &f_apply_diff_mf_quad_matrix; -+ } -+ } -+ } -+}; -+ -+struct MixedVectorGradientOperatorInfo : -+ public MixedVectorGradientOperatorInfoBase -+{ -+ template -+ MixedVectorGradientOperatorInfo(const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, bool use_bdr = false, -+ bool use_mf = false) -+ : MixedVectorGradientOperatorInfoBase(trial_fes, test_fes, Q, use_bdr, use_mf) -+ { -+ MFEM_VERIFY( -+ (trial_fes.FEColl()->GetDerivMapType(ctx.dim) == mfem::FiniteElement::H_CURL && -+ test_fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_CURL), -+ "libCEED interface for MixedVectorGradientIntegrator requires " -+ "H^1 domain and H(curl) range FE spaces!"); -+ trial_op = EvalMode::Grad; -+ test_op = EvalMode::Interp; -+ } -+}; -+ -+struct MixedVectorWeakDivergenceOperatorInfo : -+ public MixedVectorGradientOperatorInfoBase -+{ -+ template -+ MixedVectorWeakDivergenceOperatorInfo(const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, bool use_bdr = false, -+ bool use_mf = false) -+ : MixedVectorGradientOperatorInfoBase(trial_fes, test_fes, Q, use_bdr, use_mf) -+ { -+ MFEM_VERIFY( -+ (trial_fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_CURL && -+ test_fes.FEColl()->GetDerivMapType(ctx.dim) == mfem::FiniteElement::H_CURL), -+ "libCEED interface for MixedVectorWeakDivergenceIntegrator requires " -+ "H(curl) domain and H^1 range FE spaces!"); -+ trial_op = EvalMode::Interp; -+ test_op = EvalMode::Grad; -+ for (int i = 0; i < LIBCEED_DIFF_COEFF_COMP_MAX; i++) -+ { -+ ctx.coeff[i] *= -1.0; -+ } -+ } -+}; -+#endif -+ -+template -+PAMixedVectorGradientIntegrator::PAMixedVectorGradientIntegrator( -+ const mfem::MixedVectorGradientIntegrator &integ, -+ const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, -+ const bool use_bdr) -+{ -+#ifdef MFEM_USE_CEED -+ MixedVectorGradientOperatorInfo info(trial_fes, test_fes, Q, use_bdr); -+ Assemble(integ, info, trial_fes, test_fes, Q, use_bdr); -+#else -+ MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); -+#endif -+} -+ -+template -+MFMixedVectorGradientIntegrator::MFMixedVectorGradientIntegrator( -+ const mfem::MixedVectorGradientIntegrator &integ, -+ const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, -+ const bool use_bdr) -+{ -+#ifdef MFEM_USE_CEED -+ MixedVectorGradientOperatorInfo info(trial_fes, test_fes, Q, use_bdr, true); -+ Assemble(integ, info, trial_fes, test_fes, Q, use_bdr, true); -+#else -+ MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); -+#endif -+} -+ -+namespace -+{ -+ -+#ifdef MFEM_USE_CEED -+mfem::Coefficient *NegativeCoeff(mfem::Coefficient &Q) -+{ -+ return (dynamic_cast(&Q) != nullptr) ? -+ nullptr : new mfem::ProductCoefficient(-1.0, Q); -+} -+ -+mfem::VectorCoefficient *NegativeCoeff(mfem::VectorCoefficient &Q) -+{ -+ return (dynamic_cast(&Q) != nullptr) ? -+ nullptr : new mfem::ScalarVectorProductCoefficient(-1.0, Q); -+} -+ -+mfem::MatrixCoefficient *NegativeCoeff(mfem::MatrixCoefficient &Q) -+{ -+ return (dynamic_cast(&Q) != nullptr) ? -+ nullptr : new mfem::ScalarMatrixProductCoefficient(-1.0, Q); -+} -+#endif -+ -+} // namespace -+ -+template -+PAMixedVectorWeakDivergenceIntegrator::PAMixedVectorWeakDivergenceIntegrator( -+ const mfem::MixedVectorWeakDivergenceIntegrator &integ, -+ const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, -+ const bool use_bdr) -+{ -+#ifdef MFEM_USE_CEED -+ MixedVectorWeakDivergenceOperatorInfo info(trial_fes, test_fes, Q, use_bdr); -+ if (Q) -+ { -+ // Does not inherit ownership of old Q -+ auto *nQ = NegativeCoeff(*Q); -+ Assemble(integ, info, trial_fes, test_fes, nQ, use_bdr); -+ delete nQ; -+ } -+ else -+ { -+ Assemble(integ, info, trial_fes, test_fes, Q, use_bdr); -+ } -+#else -+ MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); -+#endif -+} -+ -+template -+MFMixedVectorWeakDivergenceIntegrator::MFMixedVectorWeakDivergenceIntegrator( -+ const mfem::MixedVectorWeakDivergenceIntegrator &integ, -+ const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, -+ const bool use_bdr) -+{ -+#ifdef MFEM_USE_CEED -+ MixedVectorWeakDivergenceOperatorInfo info(trial_fes, test_fes, Q, use_bdr, -+ true); -+ if (Q) -+ { -+ // Does not inherit ownership of old Q -+ auto *nQ = NegativeCoeff(*Q); -+ Assemble(integ, info, trial_fes, test_fes, nQ, use_bdr, true); -+ delete nQ; -+ } -+ else -+ { -+ Assemble(integ, info, trial_fes, test_fes, Q, use_bdr, true); -+ } -+#else -+ MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); -+#endif -+} -+ -+// @cond DOXYGEN_SKIP -+ -+template PAMixedVectorGradientIntegrator::PAMixedVectorGradientIntegrator( -+ const mfem::MixedVectorGradientIntegrator &, const mfem::FiniteElementSpace &, -+ const mfem::FiniteElementSpace &, mfem::Coefficient *, const bool); -+template PAMixedVectorGradientIntegrator::PAMixedVectorGradientIntegrator( -+ const mfem::MixedVectorGradientIntegrator &, const mfem::FiniteElementSpace &, -+ const mfem::FiniteElementSpace &, mfem::VectorCoefficient *, const bool); -+template PAMixedVectorGradientIntegrator::PAMixedVectorGradientIntegrator( -+ const mfem::MixedVectorGradientIntegrator &, const mfem::FiniteElementSpace &, -+ const mfem::FiniteElementSpace &, mfem::MatrixCoefficient *, const bool); -+ -+template PAMixedVectorWeakDivergenceIntegrator::PAMixedVectorWeakDivergenceIntegrator( -+ const mfem::MixedVectorWeakDivergenceIntegrator &, -+ const mfem::FiniteElementSpace &, const mfem::FiniteElementSpace &, -+ mfem::Coefficient *, const bool); -+template PAMixedVectorWeakDivergenceIntegrator::PAMixedVectorWeakDivergenceIntegrator( -+ const mfem::MixedVectorWeakDivergenceIntegrator &, -+ const mfem::FiniteElementSpace &, const mfem::FiniteElementSpace &, -+ mfem::VectorCoefficient *, const bool); -+template PAMixedVectorWeakDivergenceIntegrator::PAMixedVectorWeakDivergenceIntegrator( -+ const mfem::MixedVectorWeakDivergenceIntegrator &, -+ const mfem::FiniteElementSpace &, const mfem::FiniteElementSpace &, -+ mfem::MatrixCoefficient *, const bool); -+ -+template MFMixedVectorGradientIntegrator::MFMixedVectorGradientIntegrator( -+ const mfem::MixedVectorGradientIntegrator &, const mfem::FiniteElementSpace &, -+ const mfem::FiniteElementSpace &, mfem::Coefficient *, const bool); -+template MFMixedVectorGradientIntegrator::MFMixedVectorGradientIntegrator( -+ const mfem::MixedVectorGradientIntegrator &, const mfem::FiniteElementSpace &, -+ const mfem::FiniteElementSpace &, mfem::VectorCoefficient *, const bool); -+template MFMixedVectorGradientIntegrator::MFMixedVectorGradientIntegrator( -+ const mfem::MixedVectorGradientIntegrator &, const mfem::FiniteElementSpace &, -+ const mfem::FiniteElementSpace &, mfem::MatrixCoefficient *, const bool); -+ -+template MFMixedVectorWeakDivergenceIntegrator::MFMixedVectorWeakDivergenceIntegrator( -+ const mfem::MixedVectorWeakDivergenceIntegrator &, -+ const mfem::FiniteElementSpace &, const mfem::FiniteElementSpace &, -+ mfem::Coefficient *, const bool); -+template MFMixedVectorWeakDivergenceIntegrator::MFMixedVectorWeakDivergenceIntegrator( -+ const mfem::MixedVectorWeakDivergenceIntegrator &, -+ const mfem::FiniteElementSpace &, const mfem::FiniteElementSpace &, -+ mfem::VectorCoefficient *, const bool); -+template MFMixedVectorWeakDivergenceIntegrator::MFMixedVectorWeakDivergenceIntegrator( -+ const mfem::MixedVectorWeakDivergenceIntegrator &, -+ const mfem::FiniteElementSpace &, const mfem::FiniteElementSpace &, -+ mfem::MatrixCoefficient *, const bool); -+ -+// @endcond -+ -+} // namespace ceed -+ -+} // namespace mfem -diff --git a/fem/ceed/integrators/mixedvecgrad/mixedvecgrad.hpp b/fem/ceed/integrators/mixedvecgrad/mixedvecgrad.hpp -new file mode 100644 -index 000000000..c4220ea0f ---- /dev/null -+++ b/fem/ceed/integrators/mixedvecgrad/mixedvecgrad.hpp -@@ -0,0 +1,85 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#ifndef MFEM_LIBCEED_MIXEDVECGRAD_HPP -+#define MFEM_LIBCEED_MIXEDVECGRAD_HPP -+ -+#include "../../interface/integrator.hpp" -+#include "../../interface/mixed_operator.hpp" -+#include "../../../fespace.hpp" -+ -+namespace mfem -+{ -+ -+namespace ceed -+{ -+ -+/** Represent a MixedVectorGradientIntegrator with AssemblyLevel::Partial -+ using libCEED. */ -+class PAMixedVectorGradientIntegrator : public MixedOperator -+{ -+public: -+ template -+ PAMixedVectorGradientIntegrator( -+ const mfem::MixedVectorGradientIntegrator &integ, -+ const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, -+ const bool use_bdr = false); -+}; -+ -+/** Represent a MixedVectorGradientIntegrator with AssemblyLevel::None -+ using libCEED. */ -+class MFMixedVectorGradientIntegrator : public MixedOperator -+{ -+public: -+ template -+ MFMixedVectorGradientIntegrator( -+ const mfem::MixedVectorGradientIntegrator &integ, -+ const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, -+ const bool use_bdr = false); -+}; -+ -+/** Represent a MixedVectorWeakDivergenceIntegrator with AssemblyLevel::Partial -+ using libCEED. */ -+class PAMixedVectorWeakDivergenceIntegrator : public MixedOperator -+{ -+public: -+ template -+ PAMixedVectorWeakDivergenceIntegrator( -+ const mfem::MixedVectorWeakDivergenceIntegrator &integ, -+ const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, -+ const bool use_bdr = false); -+}; -+ -+/** Represent a MixedVectorWeakDivergenceIntegrator with AssemblyLevel::None -+ using libCEED. */ -+class MFMixedVectorWeakDivergenceIntegrator : public MixedOperator -+{ -+public: -+ template -+ MFMixedVectorWeakDivergenceIntegrator( -+ const mfem::MixedVectorWeakDivergenceIntegrator &integ, -+ const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, -+ const bool use_bdr = false); -+}; -+ -+} -+ -+} -+ -+#endif // MFEM_LIBCEED_MIXEDVECGRAD_HPP -diff --git a/fem/ceed/integrators/nlconvection/nlconvection.cpp b/fem/ceed/integrators/nlconvection/nlconvection.cpp -index ba4a274dc..c285051ee 100644 ---- a/fem/ceed/integrators/nlconvection/nlconvection.cpp -+++ b/fem/ceed/integrators/nlconvection/nlconvection.cpp -@@ -25,76 +25,98 @@ namespace ceed - #ifdef MFEM_USE_CEED - struct NLConvectionOperatorInfo : public OperatorInfo - { -- NLConvectionContext ctx; -- NLConvectionOperatorInfo(int dim) -+ NLConvectionContext ctx = {0}; -+ NLConvectionOperatorInfo(const mfem::FiniteElementSpace &fes, -+ mfem::Coefficient *Q, bool use_bdr = false, -+ bool use_mf = false) - { -+ MFEM_VERIFY(fes.GetVDim() == fes.GetMesh()->SpaceDimension(), -+ "Missing coefficient in ceed::NLConvectionOperatorInfo!"); -+ ctx.dim = fes.GetMesh()->Dimension() - use_bdr; -+ ctx.space_dim = fes.GetMesh()->SpaceDimension(); -+ if (!use_mf) -+ { -+ apply_func = ":f_apply_conv"; -+ apply_qf = &f_apply_conv; -+ } -+ else -+ { -+ build_func = ""; -+ build_qf = nullptr; -+ } -+ if (Q == nullptr) -+ { -+ ctx.coeff = 1.0; -+ if (!use_mf) -+ { -+ build_func = ":f_build_conv_const"; -+ build_qf = &f_build_conv_const; -+ } -+ else -+ { -+ apply_func = ":f_apply_conv_mf_const"; -+ apply_qf = &f_apply_conv_mf_const; -+ } -+ } -+ else if (mfem::ConstantCoefficient *const_coeff = -+ dynamic_cast(Q)) -+ { -+ ctx.coeff = const_coeff->constant; -+ if (!use_mf) -+ { -+ build_func = ":f_build_conv_const"; -+ build_qf = &f_build_conv_const; -+ } -+ else -+ { -+ apply_func = ":f_apply_conv_mf_const"; -+ apply_qf = &f_apply_conv_mf_const; -+ } -+ } -+ else -+ { -+ if (!use_mf) -+ { -+ build_func = ":f_build_conv_quad"; -+ build_qf = &f_build_conv_quad; -+ } -+ else -+ { -+ apply_func = ":f_apply_conv_mf_quad"; -+ apply_qf = &f_apply_conv_mf_quad; -+ } -+ } - header = "/integrators/nlconvection/nlconvection_qf.h"; -- build_func_const = ":f_build_conv_const"; -- build_qf_const = &f_build_conv_const; -- build_func_quad = ":f_build_conv_quad"; -- build_qf_quad = &f_build_conv_quad; -- apply_func = ":f_apply_conv"; -- apply_qf = &f_apply_conv; -- apply_func_mf_const = ":f_apply_conv_mf_const"; -- apply_qf_mf_const = &f_apply_conv_mf_const; -- apply_func_mf_quad = ":f_apply_conv_mf_quad"; -- apply_qf_mf_quad = &f_apply_conv_mf_quad; - trial_op = EvalMode::InterpAndGrad; - test_op = EvalMode::Interp; -- qdatasize = dim * dim; -+ qdatasize = ctx.dim * ctx.space_dim; - } - }; - #endif - --PAVectorConvectionNLFIntegrator::PAVectorConvectionNLFIntegrator( -+PAVectorConvectionNLIntegrator::PAVectorConvectionNLIntegrator( -+ const mfem::VectorConvectionNLFIntegrator &integ, - const mfem::FiniteElementSpace &fes, -- const mfem::IntegrationRule &irm, -- mfem::Coefficient *Q) -- : PAIntegrator() -+ mfem::Coefficient *Q, -+ const bool use_bdr) - { - #ifdef MFEM_USE_CEED -- NLConvectionOperatorInfo info(fes.GetMesh()->Dimension()); -- Assemble(info, fes, irm, Q); -+ NLConvectionOperatorInfo info(fes, Q, use_bdr); -+ Assemble(integ, info, fes, Q, use_bdr); - #else - MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); - #endif - } - --MixedPAVectorConvectionNLIntegrator::MixedPAVectorConvectionNLIntegrator( -- const VectorConvectionNLFIntegrator &integ, -+MFVectorConvectionNLIntegrator::MFVectorConvectionNLIntegrator( -+ const mfem::VectorConvectionNLFIntegrator &integ, - const mfem::FiniteElementSpace &fes, -- mfem::Coefficient *Q) -+ mfem::Coefficient *Q, -+ const bool use_bdr) - { - #ifdef MFEM_USE_CEED -- NLConvectionOperatorInfo info(fes.GetMesh()->Dimension()); -- Assemble(integ, info, fes, Q); --#else -- MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); --#endif --} -- --MFVectorConvectionNLFIntegrator::MFVectorConvectionNLFIntegrator( -- const mfem::FiniteElementSpace &fes, -- const mfem::IntegrationRule &irm, -- mfem::Coefficient *Q) -- : MFIntegrator() --{ --#ifdef MFEM_USE_CEED -- NLConvectionOperatorInfo info(fes.GetMesh()->Dimension()); -- Assemble(info, fes, irm, Q); --#else -- MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); --#endif --} -- --MixedMFVectorConvectionNLIntegrator::MixedMFVectorConvectionNLIntegrator( -- const VectorConvectionNLFIntegrator &integ, -- const mfem::FiniteElementSpace &fes, -- mfem::Coefficient *Q) --{ --#ifdef MFEM_USE_CEED -- NLConvectionOperatorInfo info(fes.GetMesh()->Dimension()); -- Assemble(integ, info, fes, Q); -+ NLConvectionOperatorInfo info(fes, Q, use_bdr, true); -+ Assemble(integ, info, fes, Q, use_bdr, true); - #else - MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); - #endif -diff --git a/fem/ceed/integrators/nlconvection/nlconvection.hpp b/fem/ceed/integrators/nlconvection/nlconvection.hpp -index 3efe88728..cf245322a 100644 ---- a/fem/ceed/integrators/nlconvection/nlconvection.hpp -+++ b/fem/ceed/integrators/nlconvection/nlconvection.hpp -@@ -13,7 +13,7 @@ - #define MFEM_LIBCEED_NLCONV_HPP - - #include "../../interface/integrator.hpp" --#include "../../interface/mixed_integrator.hpp" -+#include "../../interface/mixed_operator.hpp" - #include "../../../fespace.hpp" - - namespace mfem -@@ -24,40 +24,26 @@ namespace ceed - - /** Represent a VectorConvectionNLFIntegrator with AssemblyLevel::Partial - using libCEED. */ --class PAVectorConvectionNLFIntegrator : public PAIntegrator -+class PAVectorConvectionNLIntegrator : public MixedOperator - { - public: -- PAVectorConvectionNLFIntegrator(const mfem::FiniteElementSpace &fes, -- const mfem::IntegrationRule &irm, -- mfem::Coefficient *coeff); --}; -- --class MixedPAVectorConvectionNLIntegrator : public MixedIntegrator --{ --public: -- MixedPAVectorConvectionNLIntegrator( -- const VectorConvectionNLFIntegrator &integ, -+ PAVectorConvectionNLIntegrator( -+ const mfem::VectorConvectionNLFIntegrator &integ, - const mfem::FiniteElementSpace &fes, -- mfem::Coefficient *Q); -+ mfem::Coefficient *Q, -+ const bool use_bdr = false); - }; - - /** Represent a VectorConvectionNLFIntegrator with AssemblyLevel::None - using libCEED. */ --class MFVectorConvectionNLFIntegrator : public MFIntegrator --{ --public: -- MFVectorConvectionNLFIntegrator(const mfem::FiniteElementSpace &fes, -- const mfem::IntegrationRule &irm, -- mfem::Coefficient *coeff); --}; -- --class MixedMFVectorConvectionNLIntegrator : public MixedIntegrator -+class MFVectorConvectionNLIntegrator : public MixedOperator - { - public: -- MixedMFVectorConvectionNLIntegrator( -- const VectorConvectionNLFIntegrator &integ, -+ MFVectorConvectionNLIntegrator( -+ const mfem::VectorConvectionNLFIntegrator &integ, - const mfem::FiniteElementSpace &fes, -- mfem::Coefficient *Q); -+ mfem::Coefficient *Q, -+ const bool use_bdr = false); - }; - - } -diff --git a/fem/ceed/integrators/nlconvection/nlconvection_qf.h b/fem/ceed/integrators/nlconvection/nlconvection_qf.h -index ef0d41327..ee1782784 100644 ---- a/fem/ceed/integrators/nlconvection/nlconvection_qf.h -+++ b/fem/ceed/integrators/nlconvection/nlconvection_qf.h -@@ -9,186 +9,155 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --/// A structure used to pass additional data to f_build_conv and f_apply_conv --struct NLConvectionContext { CeedInt dim, space_dim, vdim; CeedScalar coeff; }; -+#ifndef MFEM_LIBCEED_NLCONV_QF_H -+#define MFEM_LIBCEED_NLCONV_QF_H - --/// libCEED Q-function for building quadrature data for a convection operator --/// with a constant coefficient -+#include "../util/util_qf.h" -+ -+struct NLConvectionContext -+{ -+ CeedInt dim, space_dim; -+ CeedScalar coeff; -+}; -+ -+/// libCEED QFunction for building quadrature data for a convection -+/// operator with a constant coefficient - CEED_QFUNCTION(f_build_conv_const)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) - { -- NLConvectionContext *bc = (NLConvectionContext*)ctx; -- // in[0] is Jacobians with shape [dim, nc=dim, Q] -+ NLConvectionContext *bc = (NLConvectionContext *)ctx; -+ // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] - // in[1] is quadrature weights, size (Q) - // -- // At every quadrature point, compute and store qw * adj(J). -+ // At every quadrature point, compute and store qw * c * adj(J)^T - const CeedScalar coeff = bc->coeff; - const CeedScalar *J = in[0], *qw = in[1]; - CeedScalar *qd = out[0]; -- switch (bc->dim + 10 * bc->space_dim) -+ switch (10 * bc->space_dim + bc->dim) - { - case 11: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ qd[i] = qw[i] * coeff * J[i]; -+ } -+ break; -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- qd[i] = coeff * qw[i] * J[i]; -+ MultAdjJt21(J + i, Q, qw[i] * coeff, Q, qd + i); - } - break; - case 22: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- // J: 0 2 qd: 0 2 adj(J): J22 -J12 -- // 1 3 1 3 -J21 J11 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J12 = J[i + Q * 2]; -- const CeedScalar J22 = J[i + Q * 3]; -- const CeedScalar w = qw[i] * coeff; -- qd[i + Q * 0] = w * J22; -- qd[i + Q * 1] = -w * J21; -- qd[i + Q * 2] = -w * J12; -- qd[i + Q * 3] = w * J11; -+ MultAdjJt22(J + i, Q, qw[i] * coeff, Q, qd + i); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJt32(J + i, Q, qw[i] * coeff, Q, qd + i); - } - break; - case 33: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- // J: 0 3 6 qd: 0 3 6 -- // 1 4 7 1 4 7 -- // 2 5 8 2 5 8 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J31 = J[i + Q * 2]; -- const CeedScalar J12 = J[i + Q * 3]; -- const CeedScalar J22 = J[i + Q * 4]; -- const CeedScalar J32 = J[i + Q * 5]; -- const CeedScalar J13 = J[i + Q * 6]; -- const CeedScalar J23 = J[i + Q * 7]; -- const CeedScalar J33 = J[i + Q * 8]; -- const CeedScalar A11 = J22 * J33 - J23 * J32; -- const CeedScalar A12 = J13 * J32 - J12 * J33; -- const CeedScalar A13 = J12 * J23 - J13 * J22; -- const CeedScalar A21 = J23 * J31 - J21 * J33; -- const CeedScalar A22 = J11 * J33 - J13 * J31; -- const CeedScalar A23 = J13 * J21 - J11 * J23; -- const CeedScalar A31 = J21 * J32 - J22 * J31; -- const CeedScalar A32 = J12 * J31 - J11 * J32; -- const CeedScalar A33 = J11 * J22 - J12 * J21; -- const CeedScalar w = qw[i] * coeff; -- qd[i + Q * 0] = w * A11; -- qd[i + Q * 1] = w * A21; -- qd[i + Q * 2] = w * A31; -- qd[i + Q * 3] = w * A12; -- qd[i + Q * 4] = w * A22; -- qd[i + Q * 5] = w * A32; -- qd[i + Q * 6] = w * A13; -- qd[i + Q * 7] = w * A23; -- qd[i + Q * 8] = w * A33; -+ MultAdjJt33(J + i, Q, qw[i] * coeff, Q, qd + i); - } - break; - } - return 0; - } - --/// libCEED Q-function for building quadrature data for a convection operator --/// coefficient evaluated at quadrature points. -+/// libCEED QFunction for building quadrature data for a convection -+/// operator with a coefficient evaluated at quadrature points - CEED_QFUNCTION(f_build_conv_quad)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) - { - NLConvectionContext *bc = (NLConvectionContext *)ctx; -- // in[1] is Jacobians with shape [dim, nc=dim, Q] -+ // in[0] is coefficients, size (Q) -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] - // in[2] is quadrature weights, size (Q) - // -- // At every quadrature point, compute and store qw * adj(J). -+ // At every quadrature point, compute and store qw * c * adj(J)^T - const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; - CeedScalar *qd = out[0]; -- switch (bc->dim + 10 * bc->space_dim) -+ switch (10 * bc->space_dim + bc->dim) - { - case 11: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ qd[i] = qw[i] * c[i] * J[i]; -+ } -+ break; -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- const CeedScalar coeff = c[i]; -- qd[i] = coeff * qw[i] * J[i]; -+ MultAdjJt21(J + i, Q, qw[i] * c[i], Q, qd + i); - } - break; - case 22: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- // J: 0 2 qd: 0 2 adj(J): J22 -J12 -- // 1 3 1 3 -J21 J11 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J12 = J[i + Q * 2]; -- const CeedScalar J22 = J[i + Q * 3]; -- const CeedScalar coeff = c[i]; -- const CeedScalar w = qw[i] * coeff; -- qd[i + Q * 0] = w * J22; -- qd[i + Q * 1] = -w * J21; -- qd[i + Q * 2] = -w * J12; -- qd[i + Q * 3] = w * J11; -+ MultAdjJt22(J + i, Q, qw[i] * c[i], Q, qd + i); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJt32(J + i, Q, qw[i] * c[i], Q, qd + i); - } - break; - case 33: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- // J: 0 3 6 qd: 0 3 6 -- // 1 4 7 1 4 7 -- // 2 5 8 2 5 8 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J31 = J[i + Q * 2]; -- const CeedScalar J12 = J[i + Q * 3]; -- const CeedScalar J22 = J[i + Q * 4]; -- const CeedScalar J32 = J[i + Q * 5]; -- const CeedScalar J13 = J[i + Q * 6]; -- const CeedScalar J23 = J[i + Q * 7]; -- const CeedScalar J33 = J[i + Q * 8]; -- const CeedScalar A11 = J22 * J33 - J23 * J32; -- const CeedScalar A12 = J13 * J32 - J12 * J33; -- const CeedScalar A13 = J12 * J23 - J13 * J22; -- const CeedScalar A21 = J23 * J31 - J21 * J33; -- const CeedScalar A22 = J11 * J33 - J13 * J31; -- const CeedScalar A23 = J13 * J21 - J11 * J23; -- const CeedScalar A31 = J21 * J32 - J22 * J31; -- const CeedScalar A32 = J12 * J31 - J11 * J32; -- const CeedScalar A33 = J11 * J22 - J12 * J21; -- const CeedScalar coeff = c[i]; -- const CeedScalar w = qw[i] * coeff; -- qd[i + Q * 0] = w * A11; -- qd[i + Q * 1] = w * A21; -- qd[i + Q * 2] = w * A31; -- qd[i + Q * 3] = w * A12; -- qd[i + Q * 4] = w * A22; -- qd[i + Q * 5] = w * A32; -- qd[i + Q * 6] = w * A13; -- qd[i + Q * 7] = w * A23; -- qd[i + Q * 8] = w * A33; -+ MultAdjJt33(J + i, Q, qw[i] * c[i], Q, qd + i); - } - break; - } - return 0; - } - --/// libCEED Q-function for applying a conv operator -+/// libCEED QFunction for applying a convection operator - CEED_QFUNCTION(f_apply_conv)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) - { - NLConvectionContext *bc = (NLConvectionContext *)ctx; -- // in[0], out[0] have shape [dim, nc=1, Q] -+ // in[0] has shape [ncomp=space_dim, Q] -+ // in[1] has shape [dim, ncomp=space_dim, Q] -+ // out[0] has shape [ncomp=space_dim, Q] - const CeedScalar *u = in[0], *ug = in[1], *qd = in[2]; - CeedScalar *vg = out[0]; -- switch (10*bc->dim + bc->vdim) -+ switch (10 * bc->space_dim + bc->dim) - { - case 11: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ vg[i] = qd[i] * u[i] * ug[i]; -+ } -+ break; -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- vg[i] = u[i] * ug[i] * qd[i]; -+ const CeedScalar qd00 = qd[i + Q * 0]; -+ const CeedScalar qd10 = qd[i + Q * 1]; -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ const CeedScalar ug00 = ug[i + Q * 0]; -+ const CeedScalar ug10 = ug[i + Q * 1]; -+ const CeedScalar Dxu0 = qd00 * ug00; -+ const CeedScalar Dyu0 = qd10 * ug00; -+ const CeedScalar Dxu1 = qd00 * ug10; -+ const CeedScalar Dyu1 = qd10 * ug10; -+ vg[i + Q * 0] = u0 * Dxu0 + u1 * Dyu0; -+ vg[i + Q * 1] = u0 * Dxu1 + u1 * Dyu1; - } - break; - case 22: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar qd00 = qd[i + Q * 0]; - const CeedScalar qd10 = qd[i + Q * 1]; -@@ -200,16 +169,47 @@ CEED_QFUNCTION(f_apply_conv)(void *ctx, CeedInt Q, - const CeedScalar ug10 = ug[i + Q * 1]; - const CeedScalar ug01 = ug[i + Q * 2]; - const CeedScalar ug11 = ug[i + Q * 3]; -- const CeedScalar Dxu0 = ug00 * qd00 + ug01 * qd10; -- const CeedScalar Dyu0 = ug00 * qd01 + ug01 * qd11; -- const CeedScalar Dxu1 = ug10 * qd00 + ug11 * qd10; -- const CeedScalar Dyu1 = ug10 * qd01 + ug11 * qd11; -+ const CeedScalar Dxu0 = qd00 * ug00 + qd01 * ug01; -+ const CeedScalar Dyu0 = qd10 * ug00 + qd11 * ug01; -+ const CeedScalar Dxu1 = qd00 * ug10 + qd01 * ug11; -+ const CeedScalar Dyu1 = qd10 * ug10 + qd11 * ug11; - vg[i + Q * 0] = u0 * Dxu0 + u1 * Dyu0; - vg[i + Q * 1] = u0 * Dxu1 + u1 * Dyu1; - } - break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar qd00 = qd[i + Q * 0]; -+ const CeedScalar qd10 = qd[i + Q * 1]; -+ const CeedScalar qd20 = qd[i + Q * 2]; -+ const CeedScalar qd01 = qd[i + Q * 3]; -+ const CeedScalar qd11 = qd[i + Q * 4]; -+ const CeedScalar qd21 = qd[i + Q * 5]; -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ const CeedScalar u2 = u[i + Q * 2]; -+ const CeedScalar ug00 = ug[i + Q * 0]; -+ const CeedScalar ug10 = ug[i + Q * 1]; -+ const CeedScalar ug20 = ug[i + Q * 2]; -+ const CeedScalar ug01 = ug[i + Q * 3]; -+ const CeedScalar ug11 = ug[i + Q * 4]; -+ const CeedScalar ug21 = ug[i + Q * 5]; -+ const CeedScalar Dxu0 = qd00 * ug00 + qd01 * ug01; -+ const CeedScalar Dyu0 = qd10 * ug00 + qd11 * ug01; -+ const CeedScalar Dzu0 = qd20 * ug00 + qd21 * ug01; -+ const CeedScalar Dxu1 = qd00 * ug10 + qd01 * ug11; -+ const CeedScalar Dyu1 = qd10 * ug10 + qd11 * ug11; -+ const CeedScalar Dzu1 = qd20 * ug10 + qd21 * ug11; -+ const CeedScalar Dxu2 = qd00 * ug20 + qd01 * ug21; -+ const CeedScalar Dyu2 = qd10 * ug20 + qd11 * ug21; -+ const CeedScalar Dzu2 = qd20 * ug20 + qd21 * ug21; -+ vg[i + Q * 0] = u0 * Dxu0 + u1 * Dyu0 + u2 * Dzu0; -+ vg[i + Q * 1] = u0 * Dxu1 + u1 * Dyu1 + u2 * Dzu1; -+ vg[i + Q * 2] = u0 * Dxu2 + u1 * Dyu2 + u2 * Dzu2; -+ } - case 33: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { - const CeedScalar qd00 = qd[i + Q * 0]; - const CeedScalar qd10 = qd[i + Q * 1]; -@@ -232,15 +232,15 @@ CEED_QFUNCTION(f_apply_conv)(void *ctx, CeedInt Q, - const CeedScalar ug02 = ug[i + Q * 6]; - const CeedScalar ug12 = ug[i + Q * 7]; - const CeedScalar ug22 = ug[i + Q * 8]; -- const CeedScalar Dxu0 = ug00 * qd00 + ug01 * qd10 + ug02 * qd20; -- const CeedScalar Dyu0 = ug00 * qd01 + ug01 * qd11 + ug02 * qd21; -- const CeedScalar Dzu0 = ug00 * qd02 + ug01 * qd12 + ug02 * qd22; -- const CeedScalar Dxu1 = ug10 * qd00 + ug11 * qd10 + ug12 * qd20; -- const CeedScalar Dyu1 = ug10 * qd01 + ug11 * qd11 + ug12 * qd21; -- const CeedScalar Dzu1 = ug10 * qd02 + ug11 * qd12 + ug12 * qd22; -- const CeedScalar Dxu2 = ug20 * qd00 + ug21 * qd10 + ug22 * qd20; -- const CeedScalar Dyu2 = ug20 * qd01 + ug21 * qd11 + ug22 * qd21; -- const CeedScalar Dzu2 = ug20 * qd02 + ug21 * qd12 + ug22 * qd22; -+ const CeedScalar Dxu0 = qd00 * ug00 + qd01 * ug01 + qd02 * ug02; -+ const CeedScalar Dyu0 = qd10 * ug00 + qd11 * ug01 + qd12 * ug02; -+ const CeedScalar Dzu0 = qd20 * ug00 + qd21 * ug01 + qd22 * ug02; -+ const CeedScalar Dxu1 = qd00 * ug10 + qd01 * ug11 + qd02 * ug12; -+ const CeedScalar Dyu1 = qd10 * ug10 + qd11 * ug11 + qd12 * ug12; -+ const CeedScalar Dzu1 = qd20 * ug10 + qd21 * ug11 + qd22 * ug12; -+ const CeedScalar Dxu2 = qd00 * ug20 + qd01 * ug21 + qd02 * ug22; -+ const CeedScalar Dyu2 = qd10 * ug20 + qd11 * ug21 + qd12 * ug22; -+ const CeedScalar Dzu2 = qd20 * ug20 + qd21 * ug21 + qd22 * ug22; - vg[i + Q * 0] = u0 * Dxu0 + u1 * Dyu0 + u2 * Dzu0; - vg[i + Q * 1] = u0 * Dxu1 + u1 * Dyu1 + u2 * Dzu1; - vg[i + Q * 2] = u0 * Dxu2 + u1 * Dyu2 + u2 * Dzu2; -@@ -250,91 +250,101 @@ CEED_QFUNCTION(f_apply_conv)(void *ctx, CeedInt Q, - return 0; - } - --/// libCEED Q-function for applying a conv operator -+/// libCEED QFunction for applying a convection operator with a constant -+/// coefficient - CEED_QFUNCTION(f_apply_conv_mf_const)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) - { -- NLConvectionContext *bc = (NLConvectionContext*)ctx; -- // in[0], out[0] have shape [dim, nc=1, Q] -- // in[1] is Jacobians with shape [dim, nc=dim, Q] -- // in[2] is quadrature weights, size (Q) -+ NLConvectionContext *bc = (NLConvectionContext *)ctx; -+ // in[0] has shape [ncomp=space_dim, Q] -+ // in[1] has shape [dim, ncomp=space_dim, Q] -+ // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[3] is quadrature weights, size (Q) -+ // out[0] has shape [ncomp=space_dim, Q] - // -- // At every quadrature point, compute qw * adj(J). -+ // At every quadrature point, compute qw * c * adj(J)^T - const CeedScalar coeff = bc->coeff; - const CeedScalar *u = in[0], *ug = in[1], *J = in[2], *qw = in[3]; - CeedScalar *vg = out[0]; -- switch (10 * bc->dim + bc->vdim) -+ switch (10 * bc->space_dim + bc->dim) - { - case 11: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar qd = qw[i] * coeff * J[i]; -+ vg[i] = u[i] * qd * ug[i]; -+ } -+ break; -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- const CeedScalar qd = coeff * qw[i] * J[i]; -- vg[i] = u[i] * ug[i] * qd; -+ CeedScalar qd[2]; -+ MultAdjJt21(J + i, Q, qw[i] * coeff, 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ const CeedScalar ug00 = ug[i + Q * 0]; -+ const CeedScalar ug10 = ug[i + Q * 1]; -+ const CeedScalar Dxu0 = qd[0] * ug00; -+ const CeedScalar Dyu0 = qd[1] * ug00; -+ const CeedScalar Dxu1 = qd[0] * ug10; -+ const CeedScalar Dyu1 = qd[1] * ug10; -+ vg[i + Q * 0] = u0 * Dxu0 + u1 * Dyu0; -+ vg[i + Q * 1] = u0 * Dxu1 + u1 * Dyu1; - } - break; - case 22: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- // J: 0 2 qd: 0 2 adj(J): J22 -J12 -- // 1 3 1 3 -J21 J11 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J12 = J[i + Q * 2]; -- const CeedScalar J22 = J[i + Q * 3]; -- const CeedScalar w = qw[i] * coeff; -- const CeedScalar qd00 = w * J22; -- const CeedScalar qd10 = -w * J21; -- const CeedScalar qd01 = -w * J12; -- const CeedScalar qd11 = w * J11; -+ CeedScalar qd[4]; -+ MultAdjJt22(J + i, Q, qw[i] * coeff, 1, qd); - const CeedScalar u0 = u[i + Q * 0]; - const CeedScalar u1 = u[i + Q * 1]; - const CeedScalar ug00 = ug[i + Q * 0]; - const CeedScalar ug10 = ug[i + Q * 1]; - const CeedScalar ug01 = ug[i + Q * 2]; - const CeedScalar ug11 = ug[i + Q * 3]; -- const CeedScalar Dxu0 = ug00 * qd00 + ug01 * qd10; -- const CeedScalar Dyu0 = ug00 * qd01 + ug01 * qd11; -- const CeedScalar Dxu1 = ug10 * qd00 + ug11 * qd10; -- const CeedScalar Dyu1 = ug10 * qd01 + ug11 * qd11; -+ const CeedScalar Dxu0 = qd[0] * ug00 + qd[2] * ug01; -+ const CeedScalar Dyu0 = qd[1] * ug00 + qd[3] * ug01; -+ const CeedScalar Dxu1 = qd[0] * ug10 + qd[2] * ug11; -+ const CeedScalar Dyu1 = qd[1] * ug10 + qd[3] * ug11; - vg[i + Q * 0] = u0 * Dxu0 + u1 * Dyu0; - vg[i + Q * 1] = u0 * Dxu1 + u1 * Dyu1; - } - break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultAdjJt32(J + i, Q, qw[i] * coeff, 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ const CeedScalar u2 = u[i + Q * 2]; -+ const CeedScalar ug00 = ug[i + Q * 0]; -+ const CeedScalar ug10 = ug[i + Q * 1]; -+ const CeedScalar ug20 = ug[i + Q * 2]; -+ const CeedScalar ug01 = ug[i + Q * 3]; -+ const CeedScalar ug11 = ug[i + Q * 4]; -+ const CeedScalar ug21 = ug[i + Q * 5]; -+ const CeedScalar Dxu0 = qd[0] * ug00 + qd[3] * ug01; -+ const CeedScalar Dyu0 = qd[1] * ug00 + qd[4] * ug01; -+ const CeedScalar Dzu0 = qd[2] * ug00 + qd[5] * ug01; -+ const CeedScalar Dxu1 = qd[0] * ug10 + qd[3] * ug11; -+ const CeedScalar Dyu1 = qd[1] * ug10 + qd[4] * ug11; -+ const CeedScalar Dzu1 = qd[2] * ug10 + qd[5] * ug11; -+ const CeedScalar Dxu2 = qd[0] * ug20 + qd[3] * ug21; -+ const CeedScalar Dyu2 = qd[1] * ug20 + qd[4] * ug21; -+ const CeedScalar Dzu2 = qd[2] * ug20 + qd[5] * ug21; -+ vg[i + Q * 0] = u0 * Dxu0 + u1 * Dyu0 + u2 * Dzu0; -+ vg[i + Q * 1] = u0 * Dxu1 + u1 * Dyu1 + u2 * Dzu1; -+ vg[i + Q * 2] = u0 * Dxu2 + u1 * Dyu2 + u2 * Dzu2; -+ } -+ break; - case 33: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- // J: 0 3 6 qd: 0 3 6 -- // 1 4 7 1 4 7 -- // 2 5 8 2 5 8 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J31 = J[i + Q * 2]; -- const CeedScalar J12 = J[i + Q * 3]; -- const CeedScalar J22 = J[i + Q * 4]; -- const CeedScalar J32 = J[i + Q * 5]; -- const CeedScalar J13 = J[i + Q * 6]; -- const CeedScalar J23 = J[i + Q * 7]; -- const CeedScalar J33 = J[i + Q * 8]; -- const CeedScalar A11 = J22 * J33 - J23 * J32; -- const CeedScalar A12 = J13 * J32 - J12 * J33; -- const CeedScalar A13 = J12 * J23 - J13 * J22; -- const CeedScalar A21 = J23 * J31 - J21 * J33; -- const CeedScalar A22 = J11 * J33 - J13 * J31; -- const CeedScalar A23 = J13 * J21 - J11 * J23; -- const CeedScalar A31 = J21 * J32 - J22 * J31; -- const CeedScalar A32 = J12 * J31 - J11 * J32; -- const CeedScalar A33 = J11 * J22 - J12 * J21; -- const CeedScalar w = qw[i] * coeff; -- const CeedScalar qd00 = w * A11; -- const CeedScalar qd10 = w * A21; -- const CeedScalar qd20 = w * A31; -- const CeedScalar qd01 = w * A12; -- const CeedScalar qd11 = w * A22; -- const CeedScalar qd21 = w * A32; -- const CeedScalar qd02 = w * A13; -- const CeedScalar qd12 = w * A23; -- const CeedScalar qd22 = w * A33; -+ CeedScalar qd[9]; -+ MultAdjJt33(J + i, Q, qw[i] * coeff, 1, qd); - const CeedScalar u0 = u[i + Q * 0]; - const CeedScalar u1 = u[i + Q * 1]; - const CeedScalar u2 = u[i + Q * 2]; -@@ -347,15 +357,15 @@ CEED_QFUNCTION(f_apply_conv_mf_const)(void *ctx, CeedInt Q, - const CeedScalar ug02 = ug[i + Q * 6]; - const CeedScalar ug12 = ug[i + Q * 7]; - const CeedScalar ug22 = ug[i + Q * 8]; -- const CeedScalar Dxu0 = ug00 * qd00 + ug01 * qd10 + ug02 * qd20; -- const CeedScalar Dyu0 = ug00 * qd01 + ug01 * qd11 + ug02 * qd21; -- const CeedScalar Dzu0 = ug00 * qd02 + ug01 * qd12 + ug02 * qd22; -- const CeedScalar Dxu1 = ug10 * qd00 + ug11 * qd10 + ug12 * qd20; -- const CeedScalar Dyu1 = ug10 * qd01 + ug11 * qd11 + ug12 * qd21; -- const CeedScalar Dzu1 = ug10 * qd02 + ug11 * qd12 + ug12 * qd22; -- const CeedScalar Dxu2 = ug20 * qd00 + ug21 * qd10 + ug22 * qd20; -- const CeedScalar Dyu2 = ug20 * qd01 + ug21 * qd11 + ug22 * qd21; -- const CeedScalar Dzu2 = ug20 * qd02 + ug21 * qd12 + ug22 * qd22; -+ const CeedScalar Dxu0 = qd[0] * ug00 + qd[3] * ug01 + qd[6] * ug02; -+ const CeedScalar Dyu0 = qd[1] * ug00 + qd[4] * ug01 + qd[7] * ug02; -+ const CeedScalar Dzu0 = qd[2] * ug00 + qd[5] * ug01 + qd[8] * ug02; -+ const CeedScalar Dxu1 = qd[0] * ug10 + qd[3] * ug11 + qd[6] * ug12; -+ const CeedScalar Dyu1 = qd[1] * ug10 + qd[4] * ug11 + qd[7] * ug12; -+ const CeedScalar Dzu1 = qd[2] * ug10 + qd[5] * ug11 + qd[8] * ug12; -+ const CeedScalar Dxu2 = qd[0] * ug20 + qd[3] * ug21 + qd[6] * ug22; -+ const CeedScalar Dyu2 = qd[1] * ug20 + qd[4] * ug21 + qd[7] * ug22; -+ const CeedScalar Dzu2 = qd[2] * ug20 + qd[5] * ug21 + qd[8] * ug22; - vg[i + Q * 0] = u0 * Dxu0 + u1 * Dyu0 + u2 * Dzu0; - vg[i + Q * 1] = u0 * Dxu1 + u1 * Dyu1 + u2 * Dzu1; - vg[i + Q * 2] = u0 * Dxu2 + u1 * Dyu2 + u2 * Dzu2; -@@ -365,89 +375,101 @@ CEED_QFUNCTION(f_apply_conv_mf_const)(void *ctx, CeedInt Q, - return 0; - } - -+/// libCEED QFunction for applying a convection operator with a coefficient -+/// evaluated at quadrature points - CEED_QFUNCTION(f_apply_conv_mf_quad)(void *ctx, CeedInt Q, - const CeedScalar *const *in, - CeedScalar *const *out) - { -- NLConvectionContext *bc = (NLConvectionContext*)ctx; -- // in[0], out[0] have shape [dim, nc=1, Q] -- // in[1] is Jacobians with shape [dim, nc=dim, Q] -- // in[2] is quadrature weights, size (Q) -+ NLConvectionContext *bc = (NLConvectionContext *)ctx; -+ // in[0] has shape [ncomp=space_dim, Q] -+ // in[1] has shape [dim, ncomp=space_dim, Q] -+ // in[2] is coefficients, size (Q) -+ // in[3] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[4] is quadrature weights, size (Q) -+ // out[0] has shape [ncomp=space_dim, Q] - // -- // At every quadrature point, compute qw * adj(J). -- const CeedScalar *c = in[0], *u = in[1], *ug = in[2], *J = in[3], *qw = in[4]; -+ // At every quadrature point, compute qw * c * adj(J)^T -+ const CeedScalar *u = in[0], *ug = in[1], *c = in[2], *J = in[3], *qw = in[4]; - CeedScalar *vg = out[0]; -- switch (10 * bc->dim + bc->vdim) -+ switch (10 * bc->space_dim + bc->dim) - { - case 11: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- const CeedScalar qd = c[i] * qw[i] * J[i]; -- vg[i] = u[i] * ug[i] * qd; -+ const CeedScalar qd = qw[i] * c[i] * J[i]; -+ vg[i] = u[i] * qd * ug[i]; -+ } -+ break; -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[2]; -+ MultAdjJt21(J + i, Q, qw[i] * c[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ const CeedScalar ug00 = ug[i + Q * 0]; -+ const CeedScalar ug10 = ug[i + Q * 1]; -+ const CeedScalar Dxu0 = qd[0] * ug00; -+ const CeedScalar Dyu0 = qd[1] * ug00; -+ const CeedScalar Dxu1 = qd[0] * ug10; -+ const CeedScalar Dyu1 = qd[1] * ug10; -+ vg[i + Q * 0] = u0 * Dxu0 + u1 * Dyu0; -+ vg[i + Q * 1] = u0 * Dxu1 + u1 * Dyu1; - } - break; - case 22: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- // J: 0 2 qd: 0 2 adj(J): J22 -J12 -- // 1 3 1 3 -J21 J11 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J12 = J[i + Q * 2]; -- const CeedScalar J22 = J[i + Q * 3]; -- const CeedScalar w = qw[i] * c[i]; -- const CeedScalar qd00 = w * J22; -- const CeedScalar qd10 = -w * J21; -- const CeedScalar qd01 = -w * J12; -- const CeedScalar qd11 = w * J11; -+ CeedScalar qd[4]; -+ MultAdjJt22(J + i, Q, qw[i] * c[i], 1, qd); - const CeedScalar u0 = u[i + Q * 0]; - const CeedScalar u1 = u[i + Q * 1]; - const CeedScalar ug00 = ug[i + Q * 0]; - const CeedScalar ug10 = ug[i + Q * 1]; - const CeedScalar ug01 = ug[i + Q * 2]; - const CeedScalar ug11 = ug[i + Q * 3]; -- const CeedScalar Dxu0 = ug00 * qd00 + ug01 * qd10; -- const CeedScalar Dyu0 = ug00 * qd01 + ug01 * qd11; -- const CeedScalar Dxu1 = ug10 * qd00 + ug11 * qd10; -- const CeedScalar Dyu1 = ug10 * qd01 + ug11 * qd11; -+ const CeedScalar Dxu0 = qd[0] * ug00 + qd[2] * ug01; -+ const CeedScalar Dyu0 = qd[1] * ug00 + qd[3] * ug01; -+ const CeedScalar Dxu1 = qd[0] * ug10 + qd[2] * ug11; -+ const CeedScalar Dyu1 = qd[1] * ug10 + qd[3] * ug11; - vg[i + Q * 0] = u0 * Dxu0 + u1 * Dyu0; - vg[i + Q * 1] = u0 * Dxu1 + u1 * Dyu1; - } - break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultAdjJt32(J + i, Q, qw[i] * c[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ const CeedScalar u2 = u[i + Q * 2]; -+ const CeedScalar ug00 = ug[i + Q * 0]; -+ const CeedScalar ug10 = ug[i + Q * 1]; -+ const CeedScalar ug20 = ug[i + Q * 2]; -+ const CeedScalar ug01 = ug[i + Q * 3]; -+ const CeedScalar ug11 = ug[i + Q * 4]; -+ const CeedScalar ug21 = ug[i + Q * 5]; -+ const CeedScalar Dxu0 = qd[0] * ug00 + qd[3] * ug01; -+ const CeedScalar Dyu0 = qd[1] * ug00 + qd[4] * ug01; -+ const CeedScalar Dzu0 = qd[2] * ug00 + qd[5] * ug01; -+ const CeedScalar Dxu1 = qd[0] * ug10 + qd[3] * ug11; -+ const CeedScalar Dyu1 = qd[1] * ug10 + qd[4] * ug11; -+ const CeedScalar Dzu1 = qd[2] * ug10 + qd[5] * ug11; -+ const CeedScalar Dxu2 = qd[0] * ug20 + qd[3] * ug21; -+ const CeedScalar Dyu2 = qd[1] * ug20 + qd[4] * ug21; -+ const CeedScalar Dzu2 = qd[2] * ug20 + qd[5] * ug21; -+ vg[i + Q * 0] = u0 * Dxu0 + u1 * Dyu0 + u2 * Dzu0; -+ vg[i + Q * 1] = u0 * Dxu1 + u1 * Dyu1 + u2 * Dzu1; -+ vg[i + Q * 2] = u0 * Dxu2 + u1 * Dyu2 + u2 * Dzu2; -+ } -+ break; - case 33: -- for (CeedInt i = 0; i < Q; i++) -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) - { -- // J: 0 3 6 qd: 0 3 6 -- // 1 4 7 1 4 7 -- // 2 5 8 2 5 8 -- const CeedScalar J11 = J[i + Q * 0]; -- const CeedScalar J21 = J[i + Q * 1]; -- const CeedScalar J31 = J[i + Q * 2]; -- const CeedScalar J12 = J[i + Q * 3]; -- const CeedScalar J22 = J[i + Q * 4]; -- const CeedScalar J32 = J[i + Q * 5]; -- const CeedScalar J13 = J[i + Q * 6]; -- const CeedScalar J23 = J[i + Q * 7]; -- const CeedScalar J33 = J[i + Q * 8]; -- const CeedScalar A11 = J22 * J33 - J23 * J32; -- const CeedScalar A12 = J13 * J32 - J12 * J33; -- const CeedScalar A13 = J12 * J23 - J13 * J22; -- const CeedScalar A21 = J23 * J31 - J21 * J33; -- const CeedScalar A22 = J11 * J33 - J13 * J31; -- const CeedScalar A23 = J13 * J21 - J11 * J23; -- const CeedScalar A31 = J21 * J32 - J22 * J31; -- const CeedScalar A32 = J12 * J31 - J11 * J32; -- const CeedScalar A33 = J11 * J22 - J12 * J21; -- const CeedScalar w = qw[i] * c[i]; -- const CeedScalar qd00 = w * A11; -- const CeedScalar qd10 = w * A21; -- const CeedScalar qd20 = w * A31; -- const CeedScalar qd01 = w * A12; -- const CeedScalar qd11 = w * A22; -- const CeedScalar qd21 = w * A32; -- const CeedScalar qd02 = w * A13; -- const CeedScalar qd12 = w * A23; -- const CeedScalar qd22 = w * A33; -+ CeedScalar qd[9]; -+ MultAdjJt33(J + i, Q, qw[i] * c[i], 1, qd); - const CeedScalar u0 = u[i + Q * 0]; - const CeedScalar u1 = u[i + Q * 1]; - const CeedScalar u2 = u[i + Q * 2]; -@@ -460,15 +482,15 @@ CEED_QFUNCTION(f_apply_conv_mf_quad)(void *ctx, CeedInt Q, - const CeedScalar ug02 = ug[i + Q * 6]; - const CeedScalar ug12 = ug[i + Q * 7]; - const CeedScalar ug22 = ug[i + Q * 8]; -- const CeedScalar Dxu0 = ug00 * qd00 + ug01 * qd10 + ug02 * qd20; -- const CeedScalar Dyu0 = ug00 * qd01 + ug01 * qd11 + ug02 * qd21; -- const CeedScalar Dzu0 = ug00 * qd02 + ug01 * qd12 + ug02 * qd22; -- const CeedScalar Dxu1 = ug10 * qd00 + ug11 * qd10 + ug12 * qd20; -- const CeedScalar Dyu1 = ug10 * qd01 + ug11 * qd11 + ug12 * qd21; -- const CeedScalar Dzu1 = ug10 * qd02 + ug11 * qd12 + ug12 * qd22; -- const CeedScalar Dxu2 = ug20 * qd00 + ug21 * qd10 + ug22 * qd20; -- const CeedScalar Dyu2 = ug20 * qd01 + ug21 * qd11 + ug22 * qd21; -- const CeedScalar Dzu2 = ug20 * qd02 + ug21 * qd12 + ug22 * qd22; -+ const CeedScalar Dxu0 = qd[0] * ug00 + qd[3] * ug01 + qd[6] * ug02; -+ const CeedScalar Dyu0 = qd[1] * ug00 + qd[4] * ug01 + qd[7] * ug02; -+ const CeedScalar Dzu0 = qd[2] * ug00 + qd[5] * ug01 + qd[8] * ug02; -+ const CeedScalar Dxu1 = qd[0] * ug10 + qd[3] * ug11 + qd[6] * ug12; -+ const CeedScalar Dyu1 = qd[1] * ug10 + qd[4] * ug11 + qd[7] * ug12; -+ const CeedScalar Dzu1 = qd[2] * ug10 + qd[5] * ug11 + qd[8] * ug12; -+ const CeedScalar Dxu2 = qd[0] * ug20 + qd[3] * ug21 + qd[6] * ug22; -+ const CeedScalar Dyu2 = qd[1] * ug20 + qd[4] * ug21 + qd[7] * ug22; -+ const CeedScalar Dzu2 = qd[2] * ug20 + qd[5] * ug21 + qd[8] * ug22; - vg[i + Q * 0] = u0 * Dxu0 + u1 * Dyu0 + u2 * Dzu0; - vg[i + Q * 1] = u0 * Dxu1 + u1 * Dyu1 + u2 * Dzu1; - vg[i + Q * 2] = u0 * Dxu2 + u1 * Dyu2 + u2 * Dzu2; -@@ -477,3 +499,5 @@ CEED_QFUNCTION(f_apply_conv_mf_quad)(void *ctx, CeedInt Q, - } - return 0; - } -+ -+#endif // MFEM_LIBCEED_NLCONV_QF_H -diff --git a/fem/ceed/integrators/util/util_qf.h b/fem/ceed/integrators/util/util_qf.h -new file mode 100644 -index 000000000..fa7ca763b ---- /dev/null -+++ b/fem/ceed/integrators/util/util_qf.h -@@ -0,0 +1,855 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#ifndef MFEM_LIBCEED_UTIL_QF_H -+#define MFEM_LIBCEED_UTIL_QF_H -+ -+#include -+ -+CEED_QFUNCTION_HELPER CeedScalar DetJ22(const CeedScalar *J, -+ const CeedInt J_stride) -+{ -+ // J: 0 2 -+ // 1 3 -+ return J[J_stride * 0] * J[J_stride * 3] - -+ J[J_stride * 1] * J[J_stride * 2]; -+} -+ -+CEED_QFUNCTION_HELPER CeedScalar DetJ21(const CeedScalar *J, -+ const CeedInt J_stride) -+{ -+ // J: 0 -+ // 1 -+ return sqrt(J[J_stride * 0] * J[J_stride * 0] + -+ J[J_stride * 1] * J[J_stride * 1]); -+} -+ -+CEED_QFUNCTION_HELPER CeedScalar DetJ33(const CeedScalar *J, -+ const CeedInt J_stride) -+{ -+ // J: 0 3 6 -+ // 1 4 7 -+ // 2 5 8 -+ return J[J_stride * 0] * (J[J_stride * 4] * J[J_stride * 8] - -+ J[J_stride * 5] * J[J_stride * 7]) - -+ J[J_stride * 1] * (J[J_stride * 3] * J[J_stride * 8] - -+ J[J_stride * 5] * J[J_stride * 6]) + -+ J[J_stride * 2] * (J[J_stride * 3] * J[J_stride * 7] - -+ J[J_stride * 4] * J[J_stride * 6]); -+} -+ -+CEED_QFUNCTION_HELPER CeedScalar DetJ32(const CeedScalar *J, -+ const CeedInt J_stride) -+{ -+ // J: 0 3 -+ // 1 4 -+ // 2 5 -+ const CeedScalar E = J[J_stride * 0] * J[J_stride * 0] + -+ J[J_stride * 1] * J[J_stride * 1] + -+ J[J_stride * 2] * J[J_stride * 2]; -+ const CeedScalar G = J[J_stride * 3] * J[J_stride * 3] + -+ J[J_stride * 4] * J[J_stride * 4] + -+ J[J_stride * 5] * J[J_stride * 5]; -+ const CeedScalar F = J[J_stride * 0] * J[J_stride * 3] + -+ J[J_stride * 1] * J[J_stride * 4] + -+ J[J_stride * 2] * J[J_stride * 5]; -+ return sqrt(E * G - F * F); -+} -+ -+CEED_QFUNCTION_HELPER void MultAdjJCAdjJt22(const CeedScalar *J, -+ const CeedInt J_stride, -+ const CeedScalar *c, -+ const CeedInt c_stride, -+ const CeedInt c_comp, -+ const CeedScalar qw, -+ const CeedInt qd_stride, -+ CeedScalar *qd) -+{ -+ // compute qw/det(J) adj(J) C adj(J)^T and store the symmetric part of the result -+ // J: 0 2 adj(J): J22 -J12 qd: 0 1 -+ // 1 3 -J21 J11 1 2 -+ const CeedScalar J11 = J[J_stride * 0]; -+ const CeedScalar J21 = J[J_stride * 1]; -+ const CeedScalar J12 = J[J_stride * 2]; -+ const CeedScalar J22 = J[J_stride * 3]; -+ const CeedScalar w = qw / (J11 * J22 - J21 * J12); -+ if (c_comp == 3) // Matrix coefficient (symmetric) -+ { -+ // First compute entries of R = C adj(J)^T -+ // c: 0 1 -+ // 1 2 -+ const CeedScalar R11 = c[c_stride * 0] * J22 - c[c_stride * 1] * J12; -+ const CeedScalar R21 = c[c_stride * 1] * J22 - c[c_stride * 2] * J12; -+ const CeedScalar R12 = -c[c_stride * 0] * J21 + c[c_stride * 1] * J11; -+ const CeedScalar R22 = -c[c_stride * 1] * J21 + c[c_stride * 2] * J11; -+ qd[qd_stride * 0] = w * (J22 * R11 - J12 * R21); -+ qd[qd_stride * 1] = w * (J11 * R21 - J21 * R11); -+ qd[qd_stride * 2] = w * (J11 * R22 - J21 * R12); -+ } -+ else if (c_comp == 2) // Vector coefficient -+ { -+ // c: 0 -+ // 1 -+ qd[qd_stride * 0] = w * (c[c_stride * 1] * J12 * J12 + -+ c[c_stride * 0] * J22 * J22); -+ qd[qd_stride * 1] = -w * (c[c_stride * 1] * J11 * J12 + -+ c[c_stride * 0] * J21 * J22); -+ qd[qd_stride * 2] = w * (c[c_stride * 1] * J11 * J11 + -+ c[c_stride * 0] * J21 * J21); -+ } -+ else // Scalar coefficient -+ { -+ qd[qd_stride * 0] = w * c[c_stride * 0] * (J12 * J12 + J22 * J22); -+ qd[qd_stride * 1] = -w * c[c_stride * 0] * (J11 * J12 + J21 * J22); -+ qd[qd_stride * 2] = w * c[c_stride * 0] * (J11 * J11 + J21 * J21); -+ } -+} -+ -+CEED_QFUNCTION_HELPER void MultAdjJCAdjJt21(const CeedScalar *J, -+ const CeedInt J_stride, -+ const CeedScalar *c, -+ const CeedInt c_stride, -+ const CeedInt c_comp, -+ const CeedScalar qw, -+ const CeedInt qd_stride, -+ CeedScalar *qd) -+{ -+ // compute qw/det(J) adj(J) C adj(J)^T and store the symmetric part of the result -+ // J: 0 adj(J): 1/sqrt(J^T J) J^T qd: 0 -+ // 1 -+ const CeedScalar J11 = J[J_stride * 0]; -+ const CeedScalar J21 = J[J_stride * 1]; -+ const CeedScalar d = J11 * J11 + J21 * J21; -+ const CeedScalar w = qw / sqrt(d); -+ if (c_comp == 3) // Matrix coefficient (symmetric) -+ { -+ // First compute entries of R = C adj(J)^T -+ // c: 0 1 -+ // 1 2 -+ const CeedScalar R11 = c[c_stride * 0] * J11 + c[c_stride * 1] * J21; -+ const CeedScalar R21 = c[c_stride * 1] * J11 + c[c_stride * 2] * J21; -+ qd[qd_stride * 0] = w * (J11 * R11 + J21 * R21) / d; -+ } -+ else if (c_comp == 2) // Vector coefficient -+ { -+ // c: 0 -+ // 1 -+ qd[qd_stride * 0] = w * (c[c_stride * 0] * J11 * J11 + -+ c[c_stride * 1] * J21 * J21) / d; -+ } -+ else // Scalar coefficient -+ { -+ qd[qd_stride * 0] = w * c[c_stride * 0]; -+ } -+} -+ -+CEED_QFUNCTION_HELPER void MultAdjJCAdjJt33(const CeedScalar *J, -+ const CeedInt J_stride, -+ const CeedScalar *c, -+ const CeedInt c_stride, -+ const CeedInt c_comp, -+ const CeedScalar qw, -+ const CeedInt qd_stride, -+ CeedScalar *qd) -+{ -+ // compute qw/det(J) adj(J) C adj(J)^T and store the symmetric part of the result -+ // J: 0 3 6 qd: 0 1 2 -+ // 1 4 7 1 3 4 -+ // 2 5 8 2 4 5 -+ const CeedScalar J11 = J[J_stride * 0]; -+ const CeedScalar J21 = J[J_stride * 1]; -+ const CeedScalar J31 = J[J_stride * 2]; -+ const CeedScalar J12 = J[J_stride * 3]; -+ const CeedScalar J22 = J[J_stride * 4]; -+ const CeedScalar J32 = J[J_stride * 5]; -+ const CeedScalar J13 = J[J_stride * 6]; -+ const CeedScalar J23 = J[J_stride * 7]; -+ const CeedScalar J33 = J[J_stride * 8]; -+ const CeedScalar A11 = J22 * J33 - J23 * J32; -+ const CeedScalar A12 = J13 * J32 - J12 * J33; -+ const CeedScalar A13 = J12 * J23 - J13 * J22; -+ const CeedScalar A21 = J23 * J31 - J21 * J33; -+ const CeedScalar A22 = J11 * J33 - J13 * J31; -+ const CeedScalar A23 = J13 * J21 - J11 * J23; -+ const CeedScalar A31 = J21 * J32 - J22 * J31; -+ const CeedScalar A32 = J12 * J31 - J11 * J32; -+ const CeedScalar A33 = J11 * J22 - J12 * J21; -+ const CeedScalar w = qw / (J11 * A11 + J21 * A12 + J31 * A13); -+ if (c_comp == 6) // Matrix coefficient (symmetric) -+ { -+ // First compute entries of R = C adj(J)^T -+ // c: 0 1 2 -+ // 1 3 4 -+ // 2 4 5 -+ const CeedScalar R11 = c[c_stride * 0] * A11 + -+ c[c_stride * 1] * A12 + -+ c[c_stride * 2] * A13; -+ const CeedScalar R12 = c[c_stride * 0] * A21 + -+ c[c_stride * 1] * A22 + -+ c[c_stride * 2] * A23; -+ const CeedScalar R13 = c[c_stride * 0] * A31 + -+ c[c_stride * 1] * A32 + -+ c[c_stride * 2] * A33; -+ const CeedScalar R21 = c[c_stride * 1] * A11 + -+ c[c_stride * 3] * A12 + -+ c[c_stride * 4] * A13; -+ const CeedScalar R22 = c[c_stride * 1] * A21 + -+ c[c_stride * 3] * A22 + -+ c[c_stride * 4] * A23; -+ const CeedScalar R23 = c[c_stride * 1] * A31 + -+ c[c_stride * 3] * A32 + -+ c[c_stride * 4] * A33; -+ const CeedScalar R31 = c[c_stride * 2] * A11 + -+ c[c_stride * 4] * A12 + -+ c[c_stride * 5] * A13; -+ const CeedScalar R32 = c[c_stride * 2] * A21 + -+ c[c_stride * 4] * A22 + -+ c[c_stride * 5] * A23; -+ const CeedScalar R33 = c[c_stride * 2] * A31 + -+ c[c_stride * 4] * A32 + -+ c[c_stride * 5] * A33; -+ qd[qd_stride * 0] = w * (A11 * R11 + A12 * R21 + A13 * R31); -+ qd[qd_stride * 1] = w * (A11 * R12 + A12 * R22 + A13 * R32); -+ qd[qd_stride * 2] = w * (A11 * R13 + A12 * R23 + A13 * R33); -+ qd[qd_stride * 3] = w * (A21 * R12 + A22 * R22 + A23 * R32); -+ qd[qd_stride * 4] = w * (A21 * R13 + A22 * R23 + A23 * R33); -+ qd[qd_stride * 5] = w * (A31 * R13 + A32 * R23 + A33 * R33); -+ } -+ else if (c_comp == 3) // Vector coefficient -+ { -+ // c: 0 -+ // 1 -+ // 2 -+ qd[qd_stride * 0] = w * (c[c_stride * 0] * A11 * A11 + -+ c[c_stride * 1] * A12 * A12 + -+ c[c_stride * 2] * A13 * A13); -+ qd[qd_stride * 1] = w * (c[c_stride * 0] * A11 * A21 + -+ c[c_stride * 1] * A12 * A22 + -+ c[c_stride * 2] * A13 * A23); -+ qd[qd_stride * 2] = w * (c[c_stride * 0] * A11 * A31 + -+ c[c_stride * 1] * A12 * A32 + -+ c[c_stride * 2] * A13 * A33); -+ qd[qd_stride * 3] = w * (c[c_stride * 0] * A21 * A21 + -+ c[c_stride * 1] * A22 * A22 + -+ c[c_stride * 2] * A23 * A23); -+ qd[qd_stride * 4] = w * (c[c_stride * 0] * A21 * A31 + -+ c[c_stride * 1] * A22 * A32 + -+ c[c_stride * 2] * A23 * A33); -+ qd[qd_stride * 5] = w * (c[c_stride * 0] * A31 * A31 + -+ c[c_stride * 1] * A32 * A32 + -+ c[c_stride * 2] * A33 * A33); -+ } -+ else // Scalar coefficient -+ { -+ qd[qd_stride * 0] = -+ w * c[c_stride * 0] * (A11 * A11 + A12 * A12 + A13 * A13); -+ qd[qd_stride * 1] = -+ w * c[c_stride * 0] * (A11 * A21 + A12 * A22 + A13 * A23); -+ qd[qd_stride * 2] = -+ w * c[c_stride * 0] * (A11 * A31 + A12 * A32 + A13 * A33); -+ qd[qd_stride * 3] = -+ w * c[c_stride * 0] * (A21 * A21 + A22 * A22 + A23 * A23); -+ qd[qd_stride * 4] = -+ w * c[c_stride * 0] * (A21 * A31 + A22 * A32 + A23 * A33); -+ qd[qd_stride * 5] = -+ w * c[c_stride * 0] * (A31 * A31 + A32 * A32 + A33 * A33); -+ } -+} -+ -+CEED_QFUNCTION_HELPER void MultAdjJCAdjJt32(const CeedScalar *J, -+ const CeedInt J_stride, -+ const CeedScalar *c, -+ const CeedInt c_stride, -+ const CeedInt c_comp, -+ const CeedScalar qw, -+ const CeedInt qd_stride, -+ CeedScalar *qd) -+{ -+ // compute qw/det(J) adj(J) C adj(J)^T and store the symmetric part of the result -+ // J: 0 3 qd: 0 1 -+ // 1 4 1 2 -+ // 2 5 -+ const CeedScalar J11 = J[J_stride * 0]; -+ const CeedScalar J21 = J[J_stride * 1]; -+ const CeedScalar J31 = J[J_stride * 2]; -+ const CeedScalar J12 = J[J_stride * 3]; -+ const CeedScalar J22 = J[J_stride * 4]; -+ const CeedScalar J32 = J[J_stride * 5]; -+ const CeedScalar E = J11 * J11 + J21 * J21 + J31 * J31; -+ const CeedScalar G = J12 * J12 + J22 * J22 + J32 * J32; -+ const CeedScalar F = J11 * J12 + J21 * J22 + J31 * J32; -+ const CeedScalar d = E * G - F * F; -+ const CeedScalar w = qw / sqrt(d); -+ if (c_comp == 6) // Matrix coefficient (symmetric) -+ { -+ // First compute entries of R = C adj(J)^T -+ // c: 0 1 2 -+ // 1 3 4 -+ // 2 4 5 -+ const CeedScalar R11 = G * (c[c_stride * 0] * J11 + -+ c[c_stride * 1] * J21 + -+ c[c_stride * 2] * J31) - -+ F * (c[c_stride * 0] * J12 + -+ c[c_stride * 1] * J22 + -+ c[c_stride * 2] * J32); -+ const CeedScalar R21 = G * (c[c_stride * 1] * J11 + -+ c[c_stride * 3] * J21 + -+ c[c_stride * 4] * J31) - -+ F * (c[c_stride * 1] * J12 + -+ c[c_stride * 3] * J22 + -+ c[c_stride * 4] * J32); -+ const CeedScalar R31 = G * (c[c_stride * 2] * J11 + -+ c[c_stride * 4] * J21 + -+ c[c_stride * 5] * J31) - -+ F * (c[c_stride * 2] * J12 + -+ c[c_stride * 4] * J22 + -+ c[c_stride * 5] * J32); -+ const CeedScalar R12 = E * (c[c_stride * 0] * J12 + -+ c[c_stride * 1] * J22 + -+ c[c_stride * 2] * J32) - -+ F * (c[c_stride * 0] * J11 + -+ c[c_stride * 1] * J21 + -+ c[c_stride * 2] * J31); -+ const CeedScalar R22 = E * (c[c_stride * 1] * J12 + -+ c[c_stride * 3] * J22 + -+ c[c_stride * 4] * J32) - -+ F * (c[c_stride * 1] * J11 + -+ c[c_stride * 3] * J21 + -+ c[c_stride * 4] * J31); -+ const CeedScalar R32 = E * (c[c_stride * 2] * J12 + -+ c[c_stride * 4] * J22 + -+ c[c_stride * 5] * J32) - -+ F * (c[c_stride * 2] * J11 + -+ c[c_stride * 4] * J21 + -+ c[c_stride * 5] * J31); -+ qd[qd_stride * 0] = w * (G * (J11 * R11 + J21 * R21 + J31 * R31) - -+ F * (J12 * R11 + J22 * R21 + J32 * R31)) / d; -+ qd[qd_stride * 1] = w * (G * (J11 * R12 + J21 * R22 + J31 * R32) - -+ F * (J12 * R12 + J22 * R22 + J32 * R32)) / d; -+ qd[qd_stride * 2] = w * (E * (J12 * R12 + J22 * R22 + J32 * R32) - -+ F * (J11 * R12 + J21 * R22 + J31 * R32)) / d; -+ } -+ else if (c_comp == 3) // Vector coefficient -+ { -+ // First compute entries of R = C adj(J)^T -+ // c: 0 -+ // 1 -+ // 2 -+ const CeedScalar R11 = c[c_stride * 0] * (G * J11 - F * J12); -+ const CeedScalar R21 = c[c_stride * 1] * (G * J21 - F * J22); -+ const CeedScalar R31 = c[c_stride * 2] * (G * J31 - F * J32); -+ const CeedScalar R12 = c[c_stride * 0] * (E * J12 - F * J11); -+ const CeedScalar R22 = c[c_stride * 1] * (E * J22 - F * J21); -+ const CeedScalar R32 = c[c_stride * 2] * (E * J32 - F * J31); -+ qd[qd_stride * 0] = w * (G * (J11 * R11 + J21 * R21 + J31 * R31) - -+ F * (J12 * R11 + J22 * R21 + J32 * R31)) / d; -+ qd[qd_stride * 1] = w * (G * (J11 * R12 + J21 * R22 + J31 * R32) - -+ F * (J12 * R12 + J22 * R22 + J32 * R32)) / d; -+ qd[qd_stride * 2] = w * (E * (J12 * R12 + J22 * R22 + J32 * R32) - -+ F * (J11 * R12 + J21 * R22 + J31 * R32)) / d; -+ } -+ else // Scalar coefficient -+ { -+ qd[qd_stride * 0] = w * c[c_stride * 0] * G; -+ qd[qd_stride * 1] = -w * c[c_stride * 0] * F; -+ qd[qd_stride * 2] = w * c[c_stride * 0] * E; -+ } -+} -+ -+CEED_QFUNCTION_HELPER void MultJtCJ22(const CeedScalar *J, -+ const CeedInt J_stride, -+ const CeedScalar *c, -+ const CeedInt c_stride, -+ const CeedInt c_comp, -+ const CeedScalar qw, -+ const CeedInt qd_stride, -+ CeedScalar *qd) -+{ -+ // compute qw/det(J) J^T C J and store the symmetric part of the result -+ // J: 0 2 qd: 0 1 -+ // 1 3 1 2 -+ const CeedScalar J11 = J[J_stride * 0]; -+ const CeedScalar J21 = J[J_stride * 1]; -+ const CeedScalar J12 = J[J_stride * 2]; -+ const CeedScalar J22 = J[J_stride * 3]; -+ const CeedScalar w = qw / (J11 * J22 - J21 * J12); -+ if (c_comp == 3) // Matrix coefficient (symmetric) -+ { -+ // First compute entries of R = C J -+ // c: 0 1 -+ // 1 2 -+ const CeedScalar R11 = c[c_stride * 0] * J11 + c[c_stride * 1] * J21; -+ const CeedScalar R21 = c[c_stride * 1] * J11 + c[c_stride * 2] * J21; -+ const CeedScalar R12 = c[c_stride * 0] * J12 + c[c_stride * 1] * J22; -+ const CeedScalar R22 = c[c_stride * 1] * J12 + c[c_stride * 2] * J22; -+ qd[qd_stride * 0] = w * (J11 * R11 + J21 * R21); -+ qd[qd_stride * 1] = w * (J11 * R12 + J21 * R22); -+ qd[qd_stride * 2] = w * (J12 * R12 + J22 * R22); -+ } -+ else if (c_comp == 2) // Vector coefficient -+ { -+ // c: 0 -+ // 1 -+ qd[qd_stride * 0] = w * (c[c_stride * 0] * J11 * J11 + -+ c[c_stride * 1] * J21 * J21); -+ qd[qd_stride * 1] = w * (c[c_stride * 0] * J11 * J12 + -+ c[c_stride * 1] * J21 * J22); -+ qd[qd_stride * 2] = w * (c[c_stride * 0] * J12 * J12 + -+ c[c_stride * 1] * J22 * J22); -+ } -+ else // Scalar coefficient -+ { -+ qd[qd_stride * 0] = w * c[c_stride * 0] * (J11 * J11 + J21 * J21); -+ qd[qd_stride * 1] = w * c[c_stride * 0] * (J11 * J12 + J21 * J22); -+ qd[qd_stride * 2] = w * c[c_stride * 0] * (J12 * J12 + J22 * J22); -+ } -+} -+ -+CEED_QFUNCTION_HELPER void MultJtCJ21(const CeedScalar *J, -+ const CeedInt J_stride, -+ const CeedScalar *c, -+ const CeedInt c_stride, -+ const CeedInt c_comp, -+ const CeedScalar qw, -+ const CeedInt qd_stride, -+ CeedScalar *qd) -+{ -+ // compute qw/det(J) J^T C J and store the symmetric part of the result -+ // J: 0 qd: 0 -+ // 1 -+ const CeedScalar J11 = J[J_stride * 0]; -+ const CeedScalar J21 = J[J_stride * 1]; -+ if (c_comp == 3) // Matrix coefficient (symmetric) -+ { -+ // First compute entries of R = C J -+ // c: 0 1 -+ // 1 2 -+ const CeedScalar w = qw / sqrt(J11 * J11 + J21 * J21); -+ const CeedScalar R11 = c[c_stride * 0] * J11 + c[c_stride * 1] * J21; -+ const CeedScalar R21 = c[c_stride * 1] * J11 + c[c_stride * 2] * J21; -+ qd[qd_stride * 0] = w * (J11 * R11 + J21 * R21); -+ } -+ else if (c_comp == 2) // Vector coefficient -+ { -+ // c: 0 -+ // 1 -+ const CeedScalar w = qw / sqrt(J11 * J11 + J21 * J21); -+ qd[qd_stride * 0] = w * (c[c_stride * 0] * J11 * J11 + -+ c[c_stride * 1] * J21 * J21); -+ } -+ else // Scalar coefficient -+ { -+ qd[qd_stride * 0] = qw * c[c_stride * 0] * sqrt(J11 * J11 + J21 * J21); -+ } -+} -+ -+CEED_QFUNCTION_HELPER void MultJtCJ33(const CeedScalar *J, -+ const CeedInt J_stride, -+ const CeedScalar *c, -+ const CeedInt c_stride, -+ const CeedInt c_comp, -+ const CeedScalar qw, -+ const CeedInt qd_stride, -+ CeedScalar *qd) -+{ -+ // compute qw/det(J) J^T C J and store the symmetric part of the result -+ // J: 0 3 6 qd: 0 1 2 -+ // 1 4 7 1 3 4 -+ // 2 5 8 2 4 5 -+ const CeedScalar J11 = J[J_stride * 0]; -+ const CeedScalar J21 = J[J_stride * 1]; -+ const CeedScalar J31 = J[J_stride * 2]; -+ const CeedScalar J12 = J[J_stride * 3]; -+ const CeedScalar J22 = J[J_stride * 4]; -+ const CeedScalar J32 = J[J_stride * 5]; -+ const CeedScalar J13 = J[J_stride * 6]; -+ const CeedScalar J23 = J[J_stride * 7]; -+ const CeedScalar J33 = J[J_stride * 8]; -+ const CeedScalar w = qw / (J11 * (J22 * J33 - J23 * J32) + -+ J21 * (J13 * J32 - J12 * J33) + -+ J31 * (J12 * J23 - J13 * J22)); -+ if (c_comp == 6) // Matrix coefficient (symmetric) -+ { -+ // First compute entries of R = C J -+ // c: 0 1 2 -+ // 1 3 4 -+ // 2 4 5 -+ const CeedScalar R11 = c[c_stride * 0] * J11 + -+ c[c_stride * 1] * J21 + -+ c[c_stride * 2] * J31; -+ const CeedScalar R12 = c[c_stride * 0] * J12 + -+ c[c_stride * 1] * J22 + -+ c[c_stride * 2] * J32; -+ const CeedScalar R13 = c[c_stride * 0] * J13 + -+ c[c_stride * 1] * J23 + -+ c[c_stride * 2] * J33; -+ const CeedScalar R21 = c[c_stride * 1] * J11 + -+ c[c_stride * 3] * J21 + -+ c[c_stride * 4] * J31; -+ const CeedScalar R22 = c[c_stride * 1] * J12 + -+ c[c_stride * 3] * J22 + -+ c[c_stride * 4] * J32; -+ const CeedScalar R23 = c[c_stride * 1] * J13 + -+ c[c_stride * 3] * J23 + -+ c[c_stride * 4] * J33; -+ const CeedScalar R31 = c[c_stride * 2] * J11 + -+ c[c_stride * 4] * J21 + -+ c[c_stride * 5] * J31; -+ const CeedScalar R32 = c[c_stride * 2] * J12 + -+ c[c_stride * 4] * J22 + -+ c[c_stride * 5] * J32; -+ const CeedScalar R33 = c[c_stride * 2] * J13 + -+ c[c_stride * 4] * J23 + -+ c[c_stride * 5] * J33; -+ qd[qd_stride * 0] = w * (J11 * R11 + J21 * R21 + J31 * R31); -+ qd[qd_stride * 1] = w * (J11 * R12 + J21 * R22 + J31 * R32); -+ qd[qd_stride * 2] = w * (J11 * R13 + J21 * R23 + J31 * R33); -+ qd[qd_stride * 3] = w * (J12 * R12 + J22 * R22 + J32 * R32); -+ qd[qd_stride * 4] = w * (J12 * R13 + J22 * R23 + J32 * R33); -+ qd[qd_stride * 5] = w * (J13 * R13 + J23 * R23 + J33 * R33); -+ } -+ else if (c_comp == 3) // Vector coefficient -+ { -+ // c: 0 -+ // 1 -+ // 2 -+ qd[qd_stride * 0] = w * (c[c_stride * 0] * J11 * J11 + -+ c[c_stride * 1] * J21 * J21 + -+ c[c_stride * 2] * J31 * J31); -+ qd[qd_stride * 1] = w * (c[c_stride * 0] * J11 * J12 + -+ c[c_stride * 1] * J21 * J22 + -+ c[c_stride * 2] * J31 * J32); -+ qd[qd_stride * 2] = w * (c[c_stride * 0] * J11 * J13 + -+ c[c_stride * 1] * J21 * J23 + -+ c[c_stride * 2] * J31 * J33); -+ qd[qd_stride * 3] = w * (c[c_stride * 0] * J12 * J12 + -+ c[c_stride * 1] * J22 * J22 + -+ c[c_stride * 2] * J32 * J32); -+ qd[qd_stride * 4] = w * (c[c_stride * 0] * J12 * J13 + -+ c[c_stride * 1] * J22 * J23 + -+ c[c_stride * 2] * J32 * J33); -+ qd[qd_stride * 5] = w * (c[c_stride * 0] * J13 * J13 + -+ c[c_stride * 1] * J23 * J23 + -+ c[c_stride * 2] * J33 * J33); -+ } -+ else // Scalar coefficient -+ { -+ qd[qd_stride * 0] = -+ w * c[c_stride * 0] * (J11 * J11 + J21 * J21 + J31 * J31); -+ qd[qd_stride * 1] = -+ w * c[c_stride * 0] * (J11 * J12 + J21 * J22 + J31 * J32); -+ qd[qd_stride * 2] = -+ w * c[c_stride * 0] * (J11 * J13 + J21 * J23 + J31 * J33); -+ qd[qd_stride * 3] = -+ w * c[c_stride * 0] * (J12 * J12 + J22 * J22 + J32 * J32); -+ qd[qd_stride * 4] = -+ w * c[c_stride * 0] * (J12 * J13 + J22 * J23 + J32 * J33); -+ qd[qd_stride * 5] = -+ w * c[c_stride * 0] * (J13 * J13 + J23 * J23 + J33 * J33); -+ } -+} -+ -+CEED_QFUNCTION_HELPER void MultJtCJ32(const CeedScalar *J, -+ const CeedInt J_stride, -+ const CeedScalar *c, -+ const CeedInt c_stride, -+ const CeedInt c_comp, -+ const CeedScalar qw, -+ const CeedInt qd_stride, -+ CeedScalar *qd) -+{ -+ // compute qw/det(J) J^T C J and store the symmetric part of the result -+ // J: 0 3 qd: 0 1 -+ // 1 4 1 2 -+ // 2 5 -+ const CeedScalar J11 = J[J_stride * 0]; -+ const CeedScalar J21 = J[J_stride * 1]; -+ const CeedScalar J31 = J[J_stride * 2]; -+ const CeedScalar J12 = J[J_stride * 3]; -+ const CeedScalar J22 = J[J_stride * 4]; -+ const CeedScalar J32 = J[J_stride * 5]; -+ const CeedScalar E = J11 * J11 + J21 * J21 + J31 * J31; -+ const CeedScalar G = J12 * J12 + J22 * J22 + J32 * J32; -+ const CeedScalar F = J11 * J12 + J21 * J22 + J31 * J32; -+ const CeedScalar w = qw / sqrt(E * G - F * F); -+ if (c_comp == 6) // Matrix coefficient (symmetric) -+ { -+ // First compute entries of R = C J -+ // c: 0 1 2 -+ // 1 3 4 -+ // 2 4 5 -+ const CeedScalar R11 = c[c_stride * 0] * J11 + -+ c[c_stride * 1] * J21 + -+ c[c_stride * 2] * J31; -+ const CeedScalar R21 = c[c_stride * 1] * J11 + -+ c[c_stride * 3] * J21 + -+ c[c_stride * 4] * J31; -+ const CeedScalar R31 = c[c_stride * 2] * J11 + -+ c[c_stride * 4] * J21 + -+ c[c_stride * 5] * J31; -+ const CeedScalar R12 = c[c_stride * 0] * J12 + -+ c[c_stride * 1] * J22 + -+ c[c_stride * 2] * J32; -+ const CeedScalar R22 = c[c_stride * 1] * J12 + -+ c[c_stride * 3] * J22 + -+ c[c_stride * 4] * J32; -+ const CeedScalar R32 = c[c_stride * 2] * J12 + -+ c[c_stride * 4] * J22 + -+ c[c_stride * 5] * J32; -+ qd[qd_stride * 0] = w * (J11 * R11 + J21 * R21 + J31 * R31); -+ qd[qd_stride * 1] = w * (J11 * R12 + J21 * R22 + J31 * R32); -+ qd[qd_stride * 2] = w * (J12 * R12 + J22 * R22 + J32 * R32); -+ } -+ else if (c_comp == 3) // Vector coefficient -+ { -+ // c: 0 -+ // 1 -+ // 2 -+ qd[qd_stride * 0] = w * (c[c_stride * 0] * J11 * J11 + -+ c[c_stride * 1] * J21 * J21 + -+ c[c_stride * 2] * J31 * J31); -+ qd[qd_stride * 1] = w * (c[c_stride * 0] * J11 * J12 + -+ c[c_stride * 1] * J21 * J22 + -+ c[c_stride * 2] * J31 * J32); -+ qd[qd_stride * 2] = w * (c[c_stride * 0] * J12 * J12 + -+ c[c_stride * 1] * J22 * J22 + -+ c[c_stride * 2] * J32 * J32); -+ } -+ else // Scalar coefficient -+ { -+ qd[qd_stride * 0] = w * c[c_stride * 0] * E; -+ qd[qd_stride * 1] = w * c[c_stride * 0] * F; -+ qd[qd_stride * 2] = w * c[c_stride * 0] * G; -+ } -+} -+ -+CEED_QFUNCTION_HELPER void MultCtAdjJt22(const CeedScalar *J, -+ const CeedInt J_stride, -+ const CeedScalar *c, -+ const CeedInt c_stride, -+ const CeedScalar qw, -+ const CeedInt qd_stride, -+ CeedScalar *qd) -+{ -+ // compute qw c^T adj(J)^T and store the result vector -+ // J: 0 2 adj(J): J22 -J12 -+ // 1 3 -J21 J11 -+ const CeedScalar J11 = J[J_stride * 0]; -+ const CeedScalar J21 = J[J_stride * 1]; -+ const CeedScalar J12 = J[J_stride * 2]; -+ const CeedScalar J22 = J[J_stride * 3]; -+ const CeedScalar w1 = qw * c[c_stride * 0]; -+ const CeedScalar w2 = qw * c[c_stride * 1]; -+ qd[qd_stride * 0] = w1 * J22 - w2 * J12; -+ qd[qd_stride * 1] = -w1 * J21 + w2 * J11; -+} -+ -+CEED_QFUNCTION_HELPER void MultCtAdjJt21(const CeedScalar *J, -+ const CeedInt J_stride, -+ const CeedScalar *c, -+ const CeedInt c_stride, -+ const CeedScalar qw, -+ const CeedInt qd_stride, -+ CeedScalar *qd) -+{ -+ // compute qw c^T adj(J)^T and store the result vector -+ // J: 0 adj(J): 1/sqrt(J^T J) J^T -+ // 1 -+ const CeedScalar J11 = J[J_stride * 0]; -+ const CeedScalar J21 = J[J_stride * 1]; -+ const CeedScalar w = qw / sqrt(J11 * J11 + J21 * J21); -+ const CeedScalar w1 = w * c[c_stride * 0]; -+ const CeedScalar w2 = w * c[c_stride * 1]; -+ qd[qd_stride * 0] = w1 * J11 + w2 * J21; -+} -+ -+CEED_QFUNCTION_HELPER void MultCtAdjJt33(const CeedScalar *J, -+ const CeedInt J_stride, -+ const CeedScalar *c, -+ const CeedInt c_stride, -+ const CeedScalar qw, -+ const CeedInt qd_stride, -+ CeedScalar *qd) -+{ -+ // compute qw c^T adj(J)^T and store the result vector -+ // J: 0 3 6 -+ // 1 4 7 -+ // 2 5 8 -+ const CeedScalar J11 = J[J_stride * 0]; -+ const CeedScalar J21 = J[J_stride * 1]; -+ const CeedScalar J31 = J[J_stride * 2]; -+ const CeedScalar J12 = J[J_stride * 3]; -+ const CeedScalar J22 = J[J_stride * 4]; -+ const CeedScalar J32 = J[J_stride * 5]; -+ const CeedScalar J13 = J[J_stride * 6]; -+ const CeedScalar J23 = J[J_stride * 7]; -+ const CeedScalar J33 = J[J_stride * 8]; -+ const CeedScalar A11 = J22 * J33 - J23 * J32; -+ const CeedScalar A12 = J13 * J32 - J12 * J33; -+ const CeedScalar A13 = J12 * J23 - J13 * J22; -+ const CeedScalar A21 = J23 * J31 - J21 * J33; -+ const CeedScalar A22 = J11 * J33 - J13 * J31; -+ const CeedScalar A23 = J13 * J21 - J11 * J23; -+ const CeedScalar A31 = J21 * J32 - J22 * J31; -+ const CeedScalar A32 = J12 * J31 - J11 * J32; -+ const CeedScalar A33 = J11 * J22 - J12 * J21; -+ const CeedScalar w1 = qw * c[c_stride * 0]; -+ const CeedScalar w2 = qw * c[c_stride * 1]; -+ const CeedScalar w3 = qw * c[c_stride * 2]; -+ qd[qd_stride * 0] = w1 * A11 + w2 * A12 + w3 * A13; -+ qd[qd_stride * 1] = w1 * A21 + w2 * A22 + w3 * A23; -+ qd[qd_stride * 2] = w1 * A31 + w2 * A32 + w3 * A33; -+} -+ -+CEED_QFUNCTION_HELPER void MultCtAdjJt32(const CeedScalar *J, -+ const CeedInt J_stride, -+ const CeedScalar *c, -+ const CeedInt c_stride, -+ const CeedScalar qw, -+ const CeedInt qd_stride, -+ CeedScalar *qd) -+{ -+ // compute qw c^T adj(J)^T and store the result vector -+ // J: 0 3 -+ // 1 4 -+ // 2 5 -+ const CeedScalar J11 = J[J_stride * 0]; -+ const CeedScalar J21 = J[J_stride * 1]; -+ const CeedScalar J31 = J[J_stride * 2]; -+ const CeedScalar J12 = J[J_stride * 3]; -+ const CeedScalar J22 = J[J_stride * 4]; -+ const CeedScalar J32 = J[J_stride * 5]; -+ const CeedScalar E = J11 * J11 + J21 * J21 + J31 * J31; -+ const CeedScalar G = J12 * J12 + J22 * J22 + J32 * J32; -+ const CeedScalar F = J11 * J12 + J21 * J22 + J31 * J32; -+ const CeedScalar A11 = G * J11 - F * J12; -+ const CeedScalar A21 = E * J12 - F * J11; -+ const CeedScalar A12 = G * J21 - F * J22; -+ const CeedScalar A22 = E * J22 - F * J21; -+ const CeedScalar A13 = G * J31 - F * J32; -+ const CeedScalar A23 = E * J32 - F * J31; -+ const CeedScalar w = qw / sqrt(E * G - F * F); -+ const CeedScalar w1 = w * c[c_stride * 0]; -+ const CeedScalar w2 = w * c[c_stride * 1]; -+ const CeedScalar w3 = w * c[c_stride * 2]; -+ qd[qd_stride * 0] = w1 * A11 + w2 * A12 + w3 * A13; -+ qd[qd_stride * 1] = w1 * A21 + w2 * A22 + w3 * A23; -+} -+ -+CEED_QFUNCTION_HELPER void MultAdjJt22(const CeedScalar *J, -+ const CeedInt J_stride, -+ const CeedScalar qw, -+ const CeedInt qd_stride, -+ CeedScalar *qd) -+{ -+ // compute qw adj(J)^T and store the result matrix -+ // J: 0 2 adj(J): J22 -J12 qd: 0 2 -+ // 1 3 -J21 J11 1 3 -+ const CeedScalar J11 = J[J_stride * 0]; -+ const CeedScalar J21 = J[J_stride * 1]; -+ const CeedScalar J12 = J[J_stride * 2]; -+ const CeedScalar J22 = J[J_stride * 3]; -+ qd[qd_stride * 0] = qw * J22; -+ qd[qd_stride * 1] = -qw * J12; -+ qd[qd_stride * 2] = -qw * J21; -+ qd[qd_stride * 3] = qw * J11; -+} -+ -+CEED_QFUNCTION_HELPER void MultAdjJt21(const CeedScalar *J, -+ const CeedInt J_stride, -+ const CeedScalar qw, -+ const CeedInt qd_stride, -+ CeedScalar *qd) -+{ -+ // compute qw adj(J)^T and store the result matrix -+ // J: 0 adj(J): 1/sqrt(J^T J) J^T qd: 0 -+ // 1 1 -+ const CeedScalar J11 = J[J_stride * 0]; -+ const CeedScalar J21 = J[J_stride * 1]; -+ const CeedScalar w = qw / sqrt(J11 * J11 + J21 * J21); -+ qd[qd_stride * 0] = w * J11; -+ qd[qd_stride * 1] = w * J21; -+} -+ -+CEED_QFUNCTION_HELPER void MultAdjJt33(const CeedScalar *J, -+ const CeedInt J_stride, -+ const CeedScalar qw, -+ const CeedInt qd_stride, -+ CeedScalar *qd) -+{ -+ // compute qw adj(J)^T and store the result matrix -+ // J: 0 3 6 qd: 0 3 6 -+ // 1 4 7 1 4 7 -+ // 2 5 8 2 5 8 -+ const CeedScalar J11 = J[J_stride * 0]; -+ const CeedScalar J21 = J[J_stride * 1]; -+ const CeedScalar J31 = J[J_stride * 2]; -+ const CeedScalar J12 = J[J_stride * 3]; -+ const CeedScalar J22 = J[J_stride * 4]; -+ const CeedScalar J32 = J[J_stride * 5]; -+ const CeedScalar J13 = J[J_stride * 6]; -+ const CeedScalar J23 = J[J_stride * 7]; -+ const CeedScalar J33 = J[J_stride * 8]; -+ const CeedScalar A11 = J22 * J33 - J23 * J32; -+ const CeedScalar A12 = J13 * J32 - J12 * J33; -+ const CeedScalar A13 = J12 * J23 - J13 * J22; -+ const CeedScalar A21 = J23 * J31 - J21 * J33; -+ const CeedScalar A22 = J11 * J33 - J13 * J31; -+ const CeedScalar A23 = J13 * J21 - J11 * J23; -+ const CeedScalar A31 = J21 * J32 - J22 * J31; -+ const CeedScalar A32 = J12 * J31 - J11 * J32; -+ const CeedScalar A33 = J11 * J22 - J12 * J21; -+ qd[qd_stride * 0] = qw * A11; -+ qd[qd_stride * 1] = qw * A12; -+ qd[qd_stride * 2] = qw * A13; -+ qd[qd_stride * 3] = qw * A21; -+ qd[qd_stride * 4] = qw * A22; -+ qd[qd_stride * 5] = qw * A23; -+ qd[qd_stride * 6] = qw * A31; -+ qd[qd_stride * 7] = qw * A32; -+ qd[qd_stride * 8] = qw * A33; -+} -+ -+CEED_QFUNCTION_HELPER void MultAdjJt32(const CeedScalar *J, -+ const CeedInt J_stride, -+ const CeedScalar qw, -+ const CeedInt qd_stride, -+ CeedScalar *qd) -+{ -+ // compute qw adj(J)^T and store the result matrix -+ // J: 0 3 qd: 0 3 -+ // 1 4 1 4 -+ // 2 5 2 5 -+ const CeedScalar J11 = J[J_stride * 0]; -+ const CeedScalar J21 = J[J_stride * 1]; -+ const CeedScalar J31 = J[J_stride * 2]; -+ const CeedScalar J12 = J[J_stride * 3]; -+ const CeedScalar J22 = J[J_stride * 4]; -+ const CeedScalar J32 = J[J_stride * 5]; -+ const CeedScalar E = J11 * J11 + J21 * J21 + J31 * J31; -+ const CeedScalar G = J12 * J12 + J22 * J22 + J32 * J32; -+ const CeedScalar F = J11 * J12 + J21 * J22 + J31 * J32; -+ const CeedScalar A11 = G * J11 - F * J12; -+ const CeedScalar A21 = E * J12 - F * J11; -+ const CeedScalar A12 = G * J21 - F * J22; -+ const CeedScalar A22 = E * J22 - F * J21; -+ const CeedScalar A13 = G * J31 - F * J32; -+ const CeedScalar A23 = E * J32 - F * J31; -+ const CeedScalar w = qw / sqrt(E * G - F * F); -+ qd[qd_stride * 0] = w * A11; -+ qd[qd_stride * 1] = w * A12; -+ qd[qd_stride * 2] = w * A13; -+ qd[qd_stride * 3] = w * A21; -+ qd[qd_stride * 4] = w * A22; -+ qd[qd_stride * 5] = w * A23; -+} -+ -+#endif // MFEM_LIBCEED_UTIL_QF_H -diff --git a/fem/ceed/integrators/vecfemass/vecfemass.cpp b/fem/ceed/integrators/vecfemass/vecfemass.cpp -new file mode 100644 -index 000000000..87d624eb6 ---- /dev/null -+++ b/fem/ceed/integrators/vecfemass/vecfemass.cpp -@@ -0,0 +1,274 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#include "vecfemass.hpp" -+ -+#include "../../../../config/config.hpp" -+#ifdef MFEM_USE_CEED -+#include "vecfemass_qf.h" -+#endif -+ -+namespace mfem -+{ -+ -+namespace ceed -+{ -+ -+#ifdef MFEM_USE_CEED -+struct VectorFEMassOperatorInfo : public OperatorInfo -+{ -+ VectorFEMassContext ctx = {0}; -+ template -+ VectorFEMassOperatorInfo(const mfem::FiniteElementSpace &fes, CoeffType *Q, -+ bool use_bdr = false, bool use_mf = false) -+ { -+ MFEM_VERIFY(fes.GetVDim() == 1, -+ "libCEED interface for vector FE does not support vdim > 1!"); -+ ctx.dim = fes.GetMesh()->Dimension() - use_bdr; -+ ctx.space_dim = fes.GetMesh()->SpaceDimension(); -+ bool is_hdiv = (fes.FEColl()->GetMapType(ctx.dim) == -+ mfem::FiniteElement::H_DIV); -+ MFEM_VERIFY(is_hdiv || -+ fes.FEColl()->GetMapType(ctx.dim) == mfem::FiniteElement::H_CURL, -+ "VectorFEMassIntegrator requires H(div) or H(curl) FE space!"); -+ if (!use_mf) -+ { -+ apply_func = ":f_apply_vecfemass"; -+ apply_qf = &f_apply_vecfemass; -+ } -+ else -+ { -+ build_func = ""; -+ build_qf = nullptr; -+ } -+ if (Q == nullptr) -+ { -+ ctx.coeff[0] = 1.0; -+ if (!use_mf) -+ { -+ build_func = is_hdiv ? ":f_build_hdivmass_const_scalar" : -+ ":f_build_hcurlmass_const_scalar"; -+ build_qf = is_hdiv ? &f_build_hdivmass_const_scalar : -+ &f_build_hcurlmass_const_scalar; -+ } -+ else -+ { -+ apply_func = is_hdiv ? ":f_apply_hdivmass_mf_const_scalar" : -+ ":f_apply_hcurlmass_mf_const_scalar"; -+ apply_qf = is_hdiv ? &f_apply_hdivmass_mf_const_scalar : -+ &f_apply_hcurlmass_mf_const_scalar; -+ } -+ } -+ else -+ { -+ InitCoefficient(*Q, is_hdiv, use_mf); -+ } -+ header = "/integrators/vecfemass/vecfemass_qf.h"; -+ trial_op = EvalMode::Interp; -+ test_op = EvalMode::Interp; -+ qdatasize = (ctx.dim * (ctx.dim + 1)) / 2; -+ } -+ void InitCoefficient(mfem::Coefficient &Q, bool is_hdiv, bool use_mf) -+ { -+ if (mfem::ConstantCoefficient *const_coeff = -+ dynamic_cast(&Q)) -+ { -+ ctx.coeff[0] = const_coeff->constant; -+ if (!use_mf) -+ { -+ build_func = is_hdiv ? ":f_build_hdivmass_const_scalar" : -+ ":f_build_hcurlmass_const_scalar"; -+ build_qf = is_hdiv ? &f_build_hdivmass_const_scalar : -+ &f_build_hcurlmass_const_scalar; -+ } -+ else -+ { -+ apply_func = is_hdiv ? ":f_apply_hdivmass_mf_const_scalar" : -+ ":f_apply_hcurlmass_mf_const_scalar"; -+ apply_qf = is_hdiv ? &f_apply_hdivmass_mf_const_scalar : -+ &f_apply_hcurlmass_mf_const_scalar; -+ } -+ } -+ else -+ { -+ if (!use_mf) -+ { -+ build_func = is_hdiv ? ":f_build_hdivmass_quad_scalar" : -+ ":f_build_hcurlmass_quad_scalar"; -+ build_qf = is_hdiv ? &f_build_hdivmass_quad_scalar : -+ &f_build_hcurlmass_quad_scalar; -+ } -+ else -+ { -+ apply_func = is_hdiv ? ":f_apply_hdivmass_mf_quad_scalar" : -+ ":f_apply_hcurlmass_mf_quad_scalar"; -+ apply_qf = is_hdiv ? &f_apply_hdivmass_mf_quad_scalar : -+ &f_apply_hcurlmass_mf_quad_scalar; -+ } -+ } -+ } -+ void InitCoefficient(mfem::VectorCoefficient &VQ, bool is_hdiv, bool use_mf) -+ { -+ if (mfem::VectorConstantCoefficient *const_coeff = -+ dynamic_cast(&VQ)) -+ { -+ const int vdim = VQ.GetVDim(); -+ MFEM_VERIFY(vdim <= LIBCEED_VECFEMASS_COEFF_COMP_MAX, -+ "VectorCoefficient dimension exceeds context storage!"); -+ const mfem::Vector &val = const_coeff->GetVec(); -+ for (int i = 0; i < vdim; i++) -+ { -+ ctx.coeff[i] = val[i]; -+ } -+ if (!use_mf) -+ { -+ build_func = is_hdiv ? ":f_build_hdivmass_const_vector" : -+ ":f_build_hcurlmass_const_vector"; -+ build_qf = is_hdiv ? &f_build_hdivmass_const_vector : -+ &f_build_hcurlmass_const_vector; -+ } -+ else -+ { -+ apply_func = is_hdiv ? ":f_apply_hdivmass_mf_const_vector" : -+ ":f_apply_hcurlmass_mf_const_vector"; -+ apply_qf = is_hdiv ? &f_apply_hdivmass_mf_const_vector : -+ &f_apply_hcurlmass_mf_const_vector; -+ } -+ } -+ else -+ { -+ if (!use_mf) -+ { -+ build_func = is_hdiv ? ":f_build_hdivmass_quad_vector" : -+ ":f_build_hcurlmass_quad_vector"; -+ build_qf = is_hdiv ? &f_build_hdivmass_quad_vector : -+ &f_build_hcurlmass_quad_vector; -+ } -+ else -+ { -+ apply_func = is_hdiv ? ":f_apply_hdivmass_mf_quad_vector" : -+ ":f_apply_hcurlmass_mf_quad_vector"; -+ apply_qf = is_hdiv ? &f_apply_hdivmass_mf_quad_vector : -+ &f_apply_hcurlmass_mf_quad_vector; -+ } -+ } -+ } -+ void InitCoefficient(mfem::MatrixCoefficient &MQ, bool is_hdiv, bool use_mf) -+ { -+ // Assumes matrix coefficient is symmetric -+ if (mfem::MatrixConstantCoefficient *const_coeff = -+ dynamic_cast(&MQ)) -+ { -+ const int vdim = MQ.GetVDim(); -+ MFEM_VERIFY((vdim * (vdim + 1)) / 2 <= LIBCEED_VECFEMASS_COEFF_COMP_MAX, -+ "MatrixCoefficient dimensions exceed context storage!"); -+ const mfem::DenseMatrix &val = const_coeff->GetMatrix(); -+ for (int j = 0; j < vdim; j++) -+ { -+ for (int i = j; i < vdim; i++) -+ { -+ const int idx = (j * vdim) - (((j - 1) * j) / 2) + i - j; -+ ctx.coeff[idx] = val(i, j); -+ } -+ } -+ if (!use_mf) -+ { -+ build_func = is_hdiv ? ":f_build_hdivmass_const_matrix" : -+ ":f_build_hcurlmass_const_matrix"; -+ build_qf = is_hdiv ? &f_build_hdivmass_const_matrix : -+ &f_build_hcurlmass_const_matrix; -+ } -+ else -+ { -+ apply_func = is_hdiv ? ":f_apply_hdivmass_mf_const_matrix" : -+ ":f_apply_hcurlmass_mf_const_matrix"; -+ apply_qf = is_hdiv ? &f_apply_hdivmass_mf_const_matrix : -+ &f_apply_hcurlmass_mf_const_matrix; -+ } -+ } -+ else -+ { -+ if (!use_mf) -+ { -+ build_func = is_hdiv ? ":f_build_hdivmass_quad_matrix" : -+ ":f_build_hcurlmass_quad_matrix"; -+ build_qf = is_hdiv ? &f_build_hdivmass_quad_matrix : -+ &f_build_hcurlmass_quad_matrix; -+ } -+ else -+ { -+ apply_func = is_hdiv ? ":f_apply_hdivmass_mf_quad_matrix" : -+ ":f_apply_hcurlmass_mf_quad_matrix"; -+ apply_qf = is_hdiv ? &f_apply_hdivmass_mf_quad_matrix : -+ &f_apply_hcurlmass_mf_quad_matrix; -+ } -+ } -+ } -+}; -+#endif -+ -+template -+PAVectorFEMassIntegrator::PAVectorFEMassIntegrator( -+ const mfem::VectorFEMassIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ CoeffType *Q, -+ const bool use_bdr) -+{ -+#ifdef MFEM_USE_CEED -+ VectorFEMassOperatorInfo info(fes, Q, use_bdr); -+ Assemble(integ, info, fes, Q, use_bdr); -+#else -+ MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); -+#endif -+} -+ -+template -+MFVectorFEMassIntegrator::MFVectorFEMassIntegrator( -+ const mfem::VectorFEMassIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ CoeffType *Q, -+ const bool use_bdr) -+{ -+#ifdef MFEM_USE_CEED -+ VectorFEMassOperatorInfo info(fes, Q, use_bdr, true); -+ Assemble(integ, info, fes, Q, use_bdr, true); -+#else -+ MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); -+#endif -+} -+ -+// @cond DOXYGEN_SKIP -+ -+template PAVectorFEMassIntegrator::PAVectorFEMassIntegrator( -+ const mfem::VectorFEMassIntegrator &, const mfem::FiniteElementSpace &, -+ mfem::Coefficient *, const bool); -+template PAVectorFEMassIntegrator::PAVectorFEMassIntegrator( -+ const mfem::VectorFEMassIntegrator &, const mfem::FiniteElementSpace &, -+ mfem::VectorCoefficient *, const bool); -+template PAVectorFEMassIntegrator::PAVectorFEMassIntegrator( -+ const mfem::VectorFEMassIntegrator &, const mfem::FiniteElementSpace &, -+ mfem::MatrixCoefficient *, const bool); -+ -+template MFVectorFEMassIntegrator::MFVectorFEMassIntegrator( -+ const mfem::VectorFEMassIntegrator &, const mfem::FiniteElementSpace &, -+ mfem::Coefficient *, const bool); -+template MFVectorFEMassIntegrator::MFVectorFEMassIntegrator( -+ const mfem::VectorFEMassIntegrator &, const mfem::FiniteElementSpace &, -+ mfem::VectorCoefficient *, const bool); -+template MFVectorFEMassIntegrator::MFVectorFEMassIntegrator( -+ const mfem::VectorFEMassIntegrator &, const mfem::FiniteElementSpace &, -+ mfem::MatrixCoefficient *, const bool); -+ -+// @endcond -+ -+} // namespace ceed -+ -+} // namespace mfem -diff --git a/fem/ceed/integrators/vecfemass/vecfemass.hpp b/fem/ceed/integrators/vecfemass/vecfemass.hpp -new file mode 100644 -index 000000000..aa0ca2ea3 ---- /dev/null -+++ b/fem/ceed/integrators/vecfemass/vecfemass.hpp -@@ -0,0 +1,51 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#ifndef MFEM_LIBCEED_VECFEMASS_HPP -+#define MFEM_LIBCEED_VECFEMASS_HPP -+ -+#include "../../interface/integrator.hpp" -+#include "../../interface/mixed_operator.hpp" -+#include "../../../fespace.hpp" -+ -+namespace mfem -+{ -+ -+namespace ceed -+{ -+ -+/// Represent a VectorFEMassIntegrator with AssemblyLevel::Partial using libCEED. -+class PAVectorFEMassIntegrator : public MixedOperator -+{ -+public: -+ template -+ PAVectorFEMassIntegrator(const mfem::VectorFEMassIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ CoeffType *Q, -+ const bool use_bdr = false); -+}; -+ -+/// Represent a VectorFEMassIntegrator with AssemblyLevel::None using libCEED. -+class MFVectorFEMassIntegrator : public MixedOperator -+{ -+public: -+ template -+ MFVectorFEMassIntegrator(const mfem::VectorFEMassIntegrator &integ, -+ const mfem::FiniteElementSpace &fes, -+ CoeffType *Q, -+ const bool use_bdr = false); -+}; -+ -+} -+ -+} -+ -+#endif // MFEM_LIBCEED_VECFEMASS_HPP -diff --git a/fem/ceed/integrators/vecfemass/vecfemass_qf.h b/fem/ceed/integrators/vecfemass/vecfemass_qf.h -new file mode 100644 -index 000000000..571316ba8 ---- /dev/null -+++ b/fem/ceed/integrators/vecfemass/vecfemass_qf.h -@@ -0,0 +1,1454 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#ifndef MFEM_LIBCEED_VECFEMASS_QF_H -+#define MFEM_LIBCEED_VECFEMASS_QF_H -+ -+#include "../util/util_qf.h" -+ -+#define LIBCEED_VECFEMASS_COEFF_COMP_MAX 6 -+ -+struct VectorFEMassContext -+{ -+ CeedInt dim, space_dim; -+ CeedScalar coeff[LIBCEED_VECFEMASS_COEFF_COMP_MAX]; -+}; -+ -+/// libCEED QFunction for building quadrature data for an H(div) mass operator -+/// with a scalar constant coefficient -+CEED_QFUNCTION(f_build_hdivmass_const_scalar)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[1] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part -+ // of the result -+ const CeedScalar *coeff = bc->coeff; -+ const CeedScalar *J = in[0], *qw = in[1]; -+ CeedScalar *qd = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 11: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar coeff0 = coeff[0]; -+ qd[i] = qw[i] * coeff0 * J[i]; -+ } -+ break; -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ21(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ22(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ32(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ33(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for building quadrature data for an H(div) mass operator -+/// with a vector constant coefficient -+CEED_QFUNCTION(f_build_hdivmass_const_vector)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[1] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part -+ // of the result -+ const CeedScalar *coeff = bc->coeff; -+ const CeedScalar *J = in[0], *qw = in[1]; -+ CeedScalar *qd = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ21(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i); -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ22(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ32(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ33(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for building quadrature data for an H(div) mass operator -+/// with a matrix constant coefficient -+CEED_QFUNCTION(f_build_hdivmass_const_matrix)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[1] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part -+ // of the result -+ const CeedScalar *coeff = bc->coeff; -+ const CeedScalar *J = in[0], *qw = in[1]; -+ CeedScalar *qd = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ21(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ22(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ32(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i); -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ33(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for building quadrature data for an H(curl) mass operator -+/// with a scalar constant coefficient -+CEED_QFUNCTION(f_build_hcurlmass_const_scalar)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[1] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part -+ // of the result -+ const CeedScalar *coeff = bc->coeff; -+ const CeedScalar *J = in[0], *qw = in[1]; -+ CeedScalar *qd = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 11: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar coeff0 = coeff[0]; -+ qd[i] = qw[i] * coeff0 / J[i]; -+ } -+ break; -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt21(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], Q, qd + i); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for building quadrature data for an H(curl) mass operator -+/// with a vector constant coefficient -+CEED_QFUNCTION(f_build_hcurlmass_const_vector)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[1] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part -+ // of the result -+ const CeedScalar *coeff = bc->coeff; -+ const CeedScalar *J = in[0], *qw = in[1]; -+ CeedScalar *qd = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i); -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt22(J + i, Q, coeff, 1, 2, qw[i], Q, qd + i); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for building quadrature data for an H(curl) mass operator -+/// with a matrix constant coefficient -+CEED_QFUNCTION(f_build_hcurlmass_const_matrix)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[1] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part -+ // of the result -+ const CeedScalar *coeff = bc->coeff; -+ const CeedScalar *J = in[0], *qw = in[1]; -+ CeedScalar *qd = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], Q, qd + i); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i); -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], Q, qd + i); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for building quadrature data for an H(div) mass operator -+/// with a scalar coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_build_hdivmass_quad_scalar)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0] is coefficients with shape [ncomp=1, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part -+ // of the result -+ const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *qd = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 11: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ qd[i] = qw[i] * c[i] * J[i]; -+ } -+ break; -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ21(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ22(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ32(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for building quadrature data for an H(div) mass operator -+/// with a vector coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_build_hdivmass_quad_vector)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0] is coefficients with shape [ncomp=space_dim, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part -+ // of the result -+ const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *qd = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ21(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i); -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ22(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ32(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for building quadrature data for an H(div) mass operator -+/// with a matrix coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_build_hdivmass_quad_matrix)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part -+ // of the result -+ const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *qd = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ21(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ22(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ32(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i); -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultJtCJ33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for building quadrature data for an H(curl) mass operator -+/// with a scalar coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_build_hcurlmass_quad_scalar)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0] is coefficients with shape [ncomp=1, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part -+ // of the result -+ const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *qd = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 11: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ qd[i] = qw[i] * c[i] / J[i]; -+ } -+ break; -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt21(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], Q, qd + i); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for building quadrature data for an H(curl) mass operator -+/// with a vector coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_build_hcurlmass_quad_vector)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0] is coefficients with shape [ncomp=space_dim, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part -+ // of the result -+ const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *qd = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i); -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], Q, qd + i); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for building quadrature data for an H(curl) mass operator -+/// with a matrix coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_build_hcurlmass_quad_matrix)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) and store the symmetric part -+ // of the result -+ const CeedScalar *c = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *qd = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], Q, qd + i); -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i); -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], Q, qd + i); -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying a vector FE mass operator -+CEED_QFUNCTION(f_apply_vecfemass)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0], out[0] have shape [dim, ncomp=1, Q] -+ const CeedScalar *u = in[0], *qd = in[1]; -+ CeedScalar *v = out[0]; -+ switch (bc->dim) -+ { -+ case 1: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ v[i] = qd[i] * u[i]; -+ } -+ break; -+ case 2: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[i + Q * 0] * u0 + qd[i + Q * 1] * u1; -+ v[i + Q * 1] = qd[i + Q * 1] * u0 + qd[i + Q * 2] * u1; -+ } -+ break; -+ case 3: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ const CeedScalar u2 = u[i + Q * 2]; -+ v[i + Q * 0] = qd[i + Q * 0] * u0 + qd[i + Q * 1] * u1 + qd[i + Q * 2] * u2; -+ v[i + Q * 1] = qd[i + Q * 1] * u0 + qd[i + Q * 3] * u1 + qd[i + Q * 4] * u2; -+ v[i + Q * 2] = qd[i + Q * 2] * u0 + qd[i + Q * 4] * u1 + qd[i + Q * 5] * u2; -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying an H(div) mass operator with a scalar -+/// constant coefficient -+CEED_QFUNCTION(f_apply_hdivmass_mf_const_scalar)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0], out[0] have shape [dim, ncomp=1, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) -+ const CeedScalar *coeff = bc->coeff; -+ const CeedScalar *u = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *v = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 11: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar coeff0 = coeff[0]; -+ const CeedScalar qd = qw[i] * coeff0 * J[i]; -+ v[i] = qd * u[i]; -+ } -+ break; -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultJtCJ21(J + i, Q, coeff, 1, 1, qw[i], 1, &qd); -+ v[i] = qd * u[i]; -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultJtCJ22(J + i, Q, coeff, 1, 1, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultJtCJ32(J + i, Q, coeff, 1, 1, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultJtCJ33(J + i, Q, coeff, 1, 1, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ const CeedScalar u2 = u[i + Q * 2]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2; -+ v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2; -+ v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying an H(div) mass operator with a vector -+/// constant coefficient -+CEED_QFUNCTION(f_apply_hdivmass_mf_const_vector)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0], out[0] have shape [dim, ncomp=1, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) -+ const CeedScalar *coeff = bc->coeff; -+ const CeedScalar *u = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *v = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultJtCJ21(J + i, Q, coeff, 1, 2, qw[i], 1, &qd); -+ v[i] = qd * u[i]; -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultJtCJ22(J + i, Q, coeff, 1, 2, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultJtCJ32(J + i, Q, coeff, 1, 3, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultJtCJ33(J + i, Q, coeff, 1, 3, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ const CeedScalar u2 = u[i + Q * 2]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2; -+ v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2; -+ v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying an H(div) mass operator with a matrix -+/// constant coefficient -+CEED_QFUNCTION(f_apply_hdivmass_mf_const_matrix)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0], out[0] have shape [dim, ncomp=1, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) -+ const CeedScalar *coeff = bc->coeff; -+ const CeedScalar *u = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *v = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultJtCJ21(J + i, Q, coeff, 1, 3, qw[i], 1, &qd); -+ v[i] = qd * u[i]; -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultJtCJ22(J + i, Q, coeff, 1, 3, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultJtCJ32(J + i, Q, coeff, 1, 6, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultJtCJ33(J + i, Q, coeff, 1, 6, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ const CeedScalar u2 = u[i + Q * 2]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2; -+ v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2; -+ v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying an H(curl) mass operator with a scalar -+/// constant coefficient -+CEED_QFUNCTION(f_apply_hcurlmass_mf_const_scalar)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0], out[0] have shape [dim, ncomp=1, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) -+ const CeedScalar *coeff = bc->coeff; -+ const CeedScalar *u = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *v = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 11: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar coeff0 = coeff[0]; -+ const CeedScalar qd = qw[i] * coeff0 / J[i]; -+ v[i] = qd * u[i]; -+ } -+ break; -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultAdjJCAdjJt21(J + i, Q, coeff, 1, 1, qw[i], 1, &qd); -+ v[i] = qd * u[i]; -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt22(J + i, Q, coeff, 1, 1, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt32(J + i, Q, coeff, 1, 1, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultAdjJCAdjJt33(J + i, Q, coeff, 1, 1, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ const CeedScalar u2 = u[i + Q * 2]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2; -+ v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2; -+ v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying an H(curl) mass operator with a vector -+/// constant coefficient -+CEED_QFUNCTION(f_apply_hcurlmass_mf_const_vector)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0], out[0] have shape [dim, ncomp=1, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) -+ const CeedScalar *coeff = bc->coeff; -+ const CeedScalar *u = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *v = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultAdjJCAdjJt21(J + i, Q, coeff, 1, 2, qw[i], 1, &qd); -+ v[i] = qd * u[i]; -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt22(J + i, Q, coeff, 1, 2, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt32(J + i, Q, coeff, 1, 3, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultAdjJCAdjJt33(J + i, Q, coeff, 1, 3, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ const CeedScalar u2 = u[i + Q * 2]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2; -+ v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2; -+ v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying an H(curl) mass operator with a matrix -+/// constant coefficient -+CEED_QFUNCTION(f_apply_hcurlmass_mf_const_matrix)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0], out[0] have shape [dim, ncomp=1, Q] -+ // in[1] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[2] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) -+ const CeedScalar *coeff = bc->coeff; -+ const CeedScalar *u = in[0], *J = in[1], *qw = in[2]; -+ CeedScalar *v = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultAdjJCAdjJt21(J + i, Q, coeff, 1, 3, qw[i], 1, &qd); -+ v[i] = qd * u[i]; -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt22(J + i, Q, coeff, 1, 3, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt32(J + i, Q, coeff, 1, 6, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultAdjJCAdjJt33(J + i, Q, coeff, 1, 6, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ const CeedScalar u2 = u[i + Q * 2]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2; -+ v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2; -+ v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying an H(div) operator with a scalar -+/// coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_apply_hdivmass_mf_quad_scalar)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0], out[0] have shape [dim, ncomp=1, Q] -+ // in[1] is coefficients with shape [ncomp=1, Q] -+ // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[3] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) -+ const CeedScalar *u = in[0], *c = in[1], *J = in[2], *qw = in[3]; -+ CeedScalar *v = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 11: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar qd = qw[i] * c[i] * J[i]; -+ v[i] = qd * u[i]; -+ } -+ break; -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultJtCJ21(J + i, Q, c + i, Q, 1, qw[i], 1, &qd); -+ v[i] = qd * u[i]; -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultJtCJ22(J + i, Q, c + i, Q, 1, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultJtCJ32(J + i, Q, c + i, Q, 1, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultJtCJ33(J + i, Q, c + i, Q, 1, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ const CeedScalar u2 = u[i + Q * 2]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2; -+ v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2; -+ v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying an H(div) operator with a vector -+/// coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_apply_hdivmass_mf_quad_vector)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0], out[0] have shape [dim, ncomp=1, Q] -+ // in[1] is coefficients with shape [ncomp=space_dim, Q] -+ // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[3] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) -+ const CeedScalar *u = in[0], *c = in[1], *J = in[2], *qw = in[3]; -+ CeedScalar *v = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultJtCJ21(J + i, Q, c + i, Q, 2, qw[i], 1, &qd); -+ v[i] = qd * u[i]; -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultJtCJ22(J + i, Q, c + i, Q, 2, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultJtCJ32(J + i, Q, c + i, Q, 3, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultJtCJ33(J + i, Q, c + i, Q, 3, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ const CeedScalar u2 = u[i + Q * 2]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2; -+ v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2; -+ v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying an H(div) operator with a matrix -+/// coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_apply_hdivmass_mf_quad_matrix)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0], out[0] have shape [dim, ncomp=1, Q] -+ // in[1] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q] -+ // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[3] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) -+ const CeedScalar *u = in[0], *c = in[1], *J = in[2], *qw = in[3]; -+ CeedScalar *v = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultJtCJ21(J + i, Q, c + i, Q, 3, qw[i], 1, &qd); -+ v[i] = qd * u[i]; -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultJtCJ22(J + i, Q, c + i, Q, 3, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultJtCJ32(J + i, Q, c + i, Q, 6, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultJtCJ33(J + i, Q, c + i, Q, 6, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ const CeedScalar u2 = u[i + Q * 2]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2; -+ v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2; -+ v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying an H(curl) operator with a scalar -+/// coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_apply_hcurlmass_mf_quad_scalar)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0], out[0] have shape [dim, ncomp=1, Q] -+ // in[1] is coefficients with shape [ncomp=1, Q] -+ // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[3] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) -+ const CeedScalar *u = in[0], *c = in[1], *J = in[2], *qw = in[3]; -+ CeedScalar *v = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 11: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ const CeedScalar qd = qw[i] * c[i] / J[i]; -+ v[i] = qd * u[i]; -+ } -+ break; -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultAdjJCAdjJt21(J + i, Q, c + i, Q, 1, qw[i], 1, &qd); -+ v[i] = qd * u[i]; -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt22(J + i, Q, c + i, Q, 1, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt32(J + i, Q, c + i, Q, 1, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultAdjJCAdjJt33(J + i, Q, c + i, Q, 1, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ const CeedScalar u2 = u[i + Q * 2]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2; -+ v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2; -+ v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying an H(curl) operator with a vector -+/// coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_apply_hcurlmass_mf_quad_vector)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0], out[0] have shape [dim, ncomp=1, Q] -+ // in[1] is coefficients with shape [ncomp=space_dim, Q] -+ // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[3] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) -+ const CeedScalar *u = in[0], *c = in[1], *J = in[2], *qw = in[3]; -+ CeedScalar *v = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultAdjJCAdjJt21(J + i, Q, c + i, Q, 2, qw[i], 1, &qd); -+ v[i] = qd * u[i]; -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt22(J + i, Q, c + i, Q, 2, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt32(J + i, Q, c + i, Q, 3, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultAdjJCAdjJt33(J + i, Q, c + i, Q, 3, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ const CeedScalar u2 = u[i + Q * 2]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2; -+ v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2; -+ v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; -+ } -+ break; -+ } -+ return 0; -+} -+ -+/// libCEED QFunction for applying an H(curl) operator with a matrix -+/// coefficient evaluated at quadrature points -+CEED_QFUNCTION(f_apply_hcurlmass_mf_quad_matrix)(void *ctx, CeedInt Q, -+ const CeedScalar *const *in, -+ CeedScalar *const *out) -+{ -+ VectorFEMassContext *bc = (VectorFEMassContext *)ctx; -+ // in[0], out[0] have shape [dim, ncomp=1, Q] -+ // in[1] is coefficients with shape [ncomp=space_dim*(space_dim+1)/2, Q] -+ // in[2] is Jacobians with shape [dim, ncomp=space_dim, Q] -+ // in[3] is quadrature weights, size (Q) -+ // -+ // At every quadrature point, compute qw/det(J) adj(J) C adj(J)^T (for -+ // H(curl)) or qw/det(J) J^T C J (for H(div)) -+ const CeedScalar *u = in[0], *c = in[1], *J = in[2], *qw = in[3]; -+ CeedScalar *v = out[0]; -+ switch (10 * bc->space_dim + bc->dim) -+ { -+ case 21: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd; -+ MultAdjJCAdjJt21(J + i, Q, c + i, Q, 3, qw[i], 1, &qd); -+ v[i] = qd * u[i]; -+ } -+ break; -+ case 22: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt22(J + i, Q, c + i, Q, 3, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 32: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[3]; -+ MultAdjJCAdjJt32(J + i, Q, c + i, Q, 6, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1; -+ v[i + Q * 1] = qd[1] * u0 + qd[2] * u1; -+ } -+ break; -+ case 33: -+ CeedPragmaSIMD for (CeedInt i = 0; i < Q; i++) -+ { -+ CeedScalar qd[6]; -+ MultAdjJCAdjJt33(J + i, Q, c + i, Q, 6, qw[i], 1, qd); -+ const CeedScalar u0 = u[i + Q * 0]; -+ const CeedScalar u1 = u[i + Q * 1]; -+ const CeedScalar u2 = u[i + Q * 2]; -+ v[i + Q * 0] = qd[0] * u0 + qd[1] * u1 + qd[2] * u2; -+ v[i + Q * 1] = qd[1] * u0 + qd[3] * u1 + qd[4] * u2; -+ v[i + Q * 2] = qd[2] * u0 + qd[4] * u1 + qd[5] * u2; -+ } -+ break; -+ } -+ return 0; -+} -+ -+#endif // MFEM_LIBCEED_VECFEMASS_QF_H -diff --git a/fem/ceed/interface/basis.cpp b/fem/ceed/interface/basis.cpp -index 37858cb78..487108ef4 100644 ---- a/fem/ceed/interface/basis.cpp -+++ b/fem/ceed/interface/basis.cpp -@@ -9,7 +9,8 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "../../gridfunc.hpp" -+#include "basis.hpp" -+ - #include "util.hpp" - - namespace mfem -@@ -47,75 +48,171 @@ static CeedElemTopology GetCeedTopology(Geometry::Type geom) - static void InitNonTensorBasis(const mfem::FiniteElementSpace &fes, - const mfem::FiniteElement &fe, - const mfem::IntegrationRule &ir, -- Ceed ceed, CeedBasis *basis) -+ Ceed ceed, -+ CeedBasis *basis) - { - const mfem::DofToQuad &maps = fe.GetDofToQuad(ir, mfem::DofToQuad::FULL); -- mfem::Mesh *mesh = fes.GetMesh(); -- const int dim = mesh->Dimension(); -- const int ndofs = maps.ndof; -- const int nqpts = maps.nqpt; -- mfem::DenseMatrix qX(dim,nqpts); -- mfem::Vector qW(nqpts); -- for (int i = 0; i < nqpts; i++) -+ const int dim = fe.GetDim(); -+ const int ncomp = fes.GetVDim(); -+ const int P = maps.ndof; -+ const int Q = maps.nqpt; -+ mfem::DenseMatrix qX(dim, Q); -+ mfem::Vector qW(Q); -+ for (int i = 0; i < Q; i++) - { - const mfem::IntegrationPoint &ip = ir.IntPoint(i); -- qX(0,i) = ip.x; -- if (dim>1) { qX(1,i) = ip.y; } -- if (dim>2) { qX(2,i) = ip.z; } -+ qX(0, i) = ip.x; -+ if (dim > 1) { qX(1, i) = ip.y; } -+ if (dim > 2) { qX(2, i) = ip.z; } - qW(i) = ip.weight; - } -- CeedBasisCreateH1(ceed, GetCeedTopology(fe.GetGeomType()), -- fes.GetVDim(), ndofs, nqpts, -- maps.Bt.GetData(), maps.Gt.GetData(), -- qX.GetData(), qW.GetData(), basis); -+ if (fe.GetMapType() == mfem::FiniteElement::H_DIV) -+ { -+ CeedBasisCreateHdiv(ceed, GetCeedTopology(fe.GetGeomType()), ncomp, P, Q, -+ maps.Bt.GetData(), maps.Gt.GetData(), -+ qX.GetData(), qW.GetData(), basis); -+ } -+ else if (fe.GetMapType() == mfem::FiniteElement::H_CURL) -+ { -+ CeedBasisCreateHcurl(ceed, GetCeedTopology(fe.GetGeomType()), ncomp, P, Q, -+ maps.Bt.GetData(), maps.Gt.GetData(), -+ qX.GetData(), qW.GetData(), basis); -+ } -+ else -+ { -+ CeedBasisCreateH1(ceed, GetCeedTopology(fe.GetGeomType()), ncomp, P, Q, -+ maps.Bt.GetData(), maps.Gt.GetData(), -+ qX.GetData(), qW.GetData(), basis); -+ } - } - - static void InitTensorBasis(const mfem::FiniteElementSpace &fes, - const mfem::FiniteElement &fe, - const mfem::IntegrationRule &ir, -- Ceed ceed, CeedBasis *basis) -+ Ceed ceed, -+ CeedBasis *basis) - { - const mfem::DofToQuad &maps = fe.GetDofToQuad(ir, mfem::DofToQuad::TENSOR); -- mfem::Mesh *mesh = fes.GetMesh(); -- const int ndofs = maps.ndof; -- const int nqpts = maps.nqpt; -- mfem::Vector qX(nqpts), qW(nqpts); -- // The x-coordinates of the first `nqpts` points of the integration rule are -+ const int dim = fe.GetDim(); -+ const int ncomp = fes.GetVDim(); -+ const int P = maps.ndof; -+ const int Q = maps.nqpt; -+ mfem::Vector qX(Q), qW(Q); -+ // The x-coordinates of the first `Q` points of the integration rule are - // the points of the corresponding 1D rule. We also scale the weights - // accordingly. - double w_sum = 0.0; -- for (int i = 0; i < nqpts; i++) -+ for (int i = 0; i < Q; i++) - { - const mfem::IntegrationPoint &ip = ir.IntPoint(i); - qX(i) = ip.x; - qW(i) = ip.weight; - w_sum += ip.weight; - } -- qW *= 1.0/w_sum; -- CeedBasisCreateTensorH1(ceed, mesh->Dimension(), fes.GetVDim(), ndofs, -- nqpts, maps.Bt.GetData(), -- maps.Gt.GetData(), qX.GetData(), -- qW.GetData(), basis); -+ qW *= 1.0 / w_sum; -+ CeedBasisCreateTensorH1(ceed, dim, ncomp, P, Q, -+ maps.Bt.GetData(), maps.Gt.GetData(), -+ qX.GetData(), qW.GetData(), basis); -+} -+ -+#if 0 -+static void InitCeedInterpolatorBasis(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes, -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ Ceed ceed, -+ CeedBasis *basis) -+{ -+ // Basis projection operator using libCEED -+ CeedBasis trial_basis, test_basis; -+ const int P = std::max(trial_fe.GetDof(), test_fe.GetDof()), ir_order_max = 100; -+ int ir_order = std::max(trial_fe.GetOrder(), test_fe.GetOrder()); -+ for (; ir_order < ir_order_max; ir_order++) -+ { -+ if (IntRules.Get(trial_fe.GetGeomType(), ir_order).GetNPoints() >= P) { break; } -+ } -+ const IntegrationRule &ir = IntRules.Get(trial_fe.GetGeomType(), ir_order); -+ InitBasis(trial_fes, trial_fe, ir, ceed, &trial_basis); -+ InitBasis(test_fes, test_fe, ir, ceed, &test_basis); -+ CeedBasisCreateProjection(trial_basis, test_basis, basis); -+} -+#endif -+ -+static void InitMfemInterpolatorBasis(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes, -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ Ceed ceed, -+ CeedBasis *basis) -+{ -+ MFEM_VERIFY(trial_fes.GetVDim() == test_fes.GetVDim(), -+ "libCEED discrete linear operator requires same vdim for trial " -+ "and test FE spaces."); -+ const int dim = trial_fe.GetDim(); -+ const int ncomp = trial_fes.GetVDim(); -+ const int trial_P = trial_fe.GetDof(); -+ const int test_P = test_fe.GetDof(); -+ mfem::DenseMatrix qX(dim, test_P), Gt(trial_P, test_P * dim), Bt; -+ mfem::Vector qW(test_P); -+ mfem::IsoparametricTransformation dummy; -+ dummy.SetIdentityTransformation(trial_fe.GetGeomType()); -+ if (trial_fe.GetMapType() == test_fe.GetMapType()) -+ { -+ // Prolongation -+ test_fe.GetTransferMatrix(trial_fe, dummy, Bt); -+ } -+ else if (trial_fe.GetMapType() == mfem::FiniteElement::VALUE && -+ test_fe.GetMapType() == mfem::FiniteElement::H_CURL) -+ { -+ // Discrete gradient interpolator -+ test_fe.ProjectGrad(trial_fe, dummy, Bt); -+ } -+ else if (trial_fe.GetMapType() == mfem::FiniteElement::H_CURL && -+ test_fe.GetMapType() == mfem::FiniteElement::H_DIV) -+ { -+ // Discrete curl interpolator -+ test_fe.ProjectCurl(trial_fe, dummy, Bt); -+ } -+ else if (trial_fe.GetMapType() == mfem::FiniteElement::H_DIV && -+ test_fe.GetMapType() == mfem::FiniteElement::INTEGRAL) -+ { -+ // Discrete divergence interpolator -+ test_fe.ProjectDiv(trial_fe, dummy, Bt); -+ } -+ else -+ { -+ MFEM_ABORT("Unsupported trial/test FE spaces for libCEED discrete " -+ "linear operator"); -+ } -+ Bt.Transpose(); -+ Gt = 0.0; -+ qX = 0.0; -+ qW = 0.0; -+ CeedBasisCreateH1(ceed, GetCeedTopology(trial_fe.GetGeomType()), ncomp, -+ trial_P, test_P, Bt.GetData(), Gt.GetData(), -+ qX.GetData(), qW.GetData(), basis); - } - --static void InitBasisImpl(const FiniteElementSpace &fes, -- const FiniteElement &fe, -- const IntegrationRule &ir, -- Ceed ceed, CeedBasis *basis) -+void InitBasis(const FiniteElementSpace &fes, -+ const FiniteElement &fe, -+ const IntegrationRule &ir, -+ Ceed ceed, -+ CeedBasis *basis) - { -- // Check for FES -> basis, restriction in hash tables -+ // Check for fes -> basis in hash table -+ const int ncomp = fes.GetVDim(); - const int P = fe.GetDof(); - const int Q = ir.GetNPoints(); -- const int ncomp = fes.GetVDim(); -- BasisKey basis_key(&fes, &ir, ncomp, P, Q); -+ BasisKey basis_key(&fes, nullptr, &ir, {ncomp, P, Q}); - auto basis_itr = mfem::internal::ceed_basis_map.find(basis_key); -- const bool tensor = dynamic_cast -- (&fe) != nullptr; - - // Init or retrieve key values - if (basis_itr == mfem::internal::ceed_basis_map.end()) - { -- if ( tensor ) -+ const bool tensor = -+ dynamic_cast(&fe) != nullptr; -+ const bool vector = fe.GetRangeType() == mfem::FiniteElement::VECTOR; -+ if (tensor && !vector) - { - InitTensorBasis(fes, fe, ir, ceed, basis); - } -@@ -131,22 +228,41 @@ static void InitBasisImpl(const FiniteElementSpace &fes, - } - } - --void InitBasis(const FiniteElementSpace &fes, -- const IntegrationRule &ir, -- Ceed ceed, CeedBasis *basis) -+void InitInterpolatorBasis(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes, -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ Ceed ceed, -+ CeedBasis *basis) - { -- const mfem::FiniteElement &fe = *fes.GetFE(0); -- InitBasisImpl(fes, fe, ir, ceed, basis); --} -+ // Check for fes -> basis in hash table -+ const int ncomp = trial_fes.GetVDim() + test_fes.GetVDim(); -+ const int P = trial_fe.GetDof(); -+ const int Q = test_fe.GetDof(); -+ BasisKey basis_key(&trial_fes, &test_fes, nullptr, {ncomp, P, Q}); -+ auto basis_itr = mfem::internal::ceed_basis_map.find(basis_key); - --void InitBasisWithIndices(const FiniteElementSpace &fes, -- const IntegrationRule &ir, -- int nelem, -- const int* indices, -- Ceed ceed, CeedBasis *basis) --{ -- const mfem::FiniteElement &fe = *fes.GetFE(indices[0]); -- InitBasisImpl(fes, fe, ir, ceed, basis); -+ // Init or retrieve key values -+ if (basis_itr == mfem::internal::ceed_basis_map.end()) -+ { -+#if 0 -+ if (trial_fe.GetMapType() == test_fe.GetMapType()) -+ { -+ InitCeedInterpolatorBasis(trial_fes, test_fes, trial_fe, test_fe, -+ ceed, basis); -+ } -+ else -+#endif -+ { -+ InitMfemInterpolatorBasis(trial_fes, test_fes, trial_fe, test_fe, -+ ceed, basis); -+ } -+ mfem::internal::ceed_basis_map[basis_key] = *basis; -+ } -+ else -+ { -+ *basis = basis_itr->second; -+ } - } - - #endif -diff --git a/fem/ceed/interface/basis.hpp b/fem/ceed/interface/basis.hpp -index 3781f4cf7..361f97863 100644 ---- a/fem/ceed/interface/basis.hpp -+++ b/fem/ceed/interface/basis.hpp -@@ -12,6 +12,7 @@ - #ifndef MFEM_LIBCEED_BASIS - #define MFEM_LIBCEED_BASIS - -+#include "../../fespace.hpp" - #include "ceed.hpp" - - namespace mfem -@@ -22,31 +23,117 @@ namespace ceed - - #ifdef MFEM_USE_CEED - --/** @brief Initialize a CeedBasis for non-mixed meshes. -+/** @brief Initialize a CeedBasis based on an mfem::FiniteElementSpace @a fes, -+ an mfem::FiniteElement @a fe, and an mfem::IntegrationRule @a ir. - -- @param[in] fes Input finite element space. -- @param[in] ir Input integration rule. -- @param[in] ceed Input Ceed object. -- @param[out] basis The address of the initialized CeedBasis object. --*/ -+ @param[in] fes The finite element space. -+ @param[in] fe The finite element. -+ @param[in] ir The integration rule. -+ @param[in] ceed The Ceed object. -+ @param[out] basis The `CeedBasis` to initialize. */ - void InitBasis(const FiniteElementSpace &fes, -+ const FiniteElement &fe, - const IntegrationRule &ir, -- Ceed ceed, CeedBasis *basis); -+ Ceed ceed, -+ CeedBasis *basis); - --/** @brief Initialize a CeedBasis for mixed meshes. -+/** @brief Initialize a CeedBasis based on an mfem::FiniteElementSpace @a fes, -+ an mfem::IntegrationRule @a ir, and an optional list of element indices -+ @a indices. - - @param[in] fes The finite element space. -- @param[in] ir is the integration rule for the operator. -- @param[in] nelem The number of elements. -+ @param[in] ir The integration rule. -+ @param[in] use_bdr Create the basis and restriction for boundary elements. -+ @param[in] indices The indices of the elements of same type in the -+ `FiniteElementSpace`. If `indices == nullptr`, assumes -+ that the `FiniteElementSpace` is not mixed. -+ @param[in] ceed The Ceed object. -+ @param[out] basis The `CeedBasis` to initialize. */ -+inline void InitBasis(const FiniteElementSpace &fes, -+ const IntegrationRule &ir, -+ bool use_bdr, -+ const int *indices, -+ Ceed ceed, -+ CeedBasis *basis) -+{ -+ const mfem::FiniteElement *fe; -+ if (indices) -+ { -+ fe = use_bdr ? fes.GetBE(indices[0]) : fes.GetFE(indices[0]); -+ } -+ else -+ { -+ fe = use_bdr ? fes.GetBE(0) : fes.GetFE(0); -+ } -+ InitBasis(fes, *fe, ir, ceed, basis); -+} -+ -+inline void InitBasis(const FiniteElementSpace &fes, -+ const IntegrationRule &ir, -+ bool use_bdr, -+ Ceed ceed, -+ CeedBasis *basis) -+{ -+ InitBasis(fes, ir, use_bdr, nullptr, ceed, basis); -+} -+ -+/** @brief Initialize a CeedBasis based on an interpolation from -+ mfem::FiniteElementSpace @a trial_fes to @a test_fes. The type of -+ interpolation will be chosen based on the map type of the provided -+ mfem::FiniteElement objects. -+ -+ @param[in] trial_fes The trial finite element space. -+ @param[in] test_fes The test finite element space. -+ @param[in] trial_fe The trial finite element. -+ @param[in] test_fe The test finite element. -+ @param[in] ceed The Ceed object. -+ @param[out] basis The `CeedBasis` to initialize. */ -+void InitInterpolatorBasis(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes, -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ Ceed ceed, -+ CeedBasis *basis); -+ -+/** @brief Initialize a CeedBasis based on an interpolation from -+ mfem::FiniteElementSpace @a trial_fes to @a test_fes, with an optional list -+ of element indices @a indices. The type of interpolation will be chosen -+ based on the map type of the provided spaces. -+ -+ @param[in] trial_fes The trial finite element space. -+ @param[in] test_fes The test finite element space. - @param[in] indices The indices of the elements of same type in the -- `FiniteElementSpace`. -+ `FiniteElementSpace`. If `indices == nullptr`, assumes -+ that the `FiniteElementSpace` is not mixed. - @param[in] ceed The Ceed object. - @param[out] basis The `CeedBasis` to initialize. */ --void InitBasisWithIndices(const FiniteElementSpace &fes, -- const IntegrationRule &ir, -- int nelem, -- const int* indices, -- Ceed ceed, CeedBasis *basis); -+inline void InitInterpolatorBasis(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes, -+ const int *indices, -+ Ceed ceed, -+ CeedBasis *basis) -+{ -+ const mfem::FiniteElement *trial_fe, *test_fe; -+ if (indices) -+ { -+ trial_fe = trial_fes.GetFE(indices[0]); -+ test_fe = test_fes.GetFE(indices[0]); -+ } -+ else -+ { -+ trial_fe = trial_fes.GetFE(0); -+ test_fe = test_fes.GetFE(0); -+ } -+ InitInterpolatorBasis(trial_fes, test_fes, *trial_fe, *test_fe, ceed, basis); -+} -+ -+inline void InitInterpolatorBasis(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes, -+ Ceed ceed, -+ CeedBasis *basis) -+{ -+ InitInterpolatorBasis(trial_fes, test_fes, nullptr, ceed, basis); -+} - - #endif - -diff --git a/fem/ceed/interface/ceed.hpp b/fem/ceed/interface/ceed.hpp -index 1e06d9ab5..1945ce3e5 100644 ---- a/fem/ceed/interface/ceed.hpp -+++ b/fem/ceed/interface/ceed.hpp -@@ -12,6 +12,7 @@ - #ifndef MFEM_LIBCEED_CEED - #define MFEM_LIBCEED_CEED - -+#include "../../../config/config.hpp" - #ifdef MFEM_USE_CEED - #include - #if !CEED_VERSION_GE(0,10,0) -@@ -24,6 +25,7 @@ namespace mfem - namespace internal - { - -+// Definition in general/device.cpp. - extern Ceed ceed; - - } // namespace internal -diff --git a/fem/ceed/interface/coefficient.hpp b/fem/ceed/interface/coefficient.hpp -index abb70e8b8..d4e067d7a 100644 ---- a/fem/ceed/interface/coefficient.hpp -+++ b/fem/ceed/interface/coefficient.hpp -@@ -15,7 +15,6 @@ - #ifdef MFEM_USE_CEED - - #include "../../../general/forall.hpp" --#include "../../../config/config.hpp" - #include "../../../linalg/vector.hpp" - #include "../../../linalg/dtensor.hpp" - #include "../../../mesh/mesh.hpp" -@@ -27,54 +26,38 @@ - namespace mfem - { - --class Mesh; --class IntegrationRule; --class Coefficient; --class VectorCoefficient; --class GridFunction; -- - namespace ceed - { - - struct Coefficient - { -+ CeedVector coeff_vector = nullptr; - const int ncomp; -- Coefficient(int ncomp_) : ncomp(ncomp_) { } -- virtual bool IsConstant() const { return true; } -- virtual ~Coefficient() { } --}; -- --struct VariableCoefficient : Coefficient --{ -- CeedVector coeffVector = nullptr; - const CeedEvalMode emode; -- VariableCoefficient(int ncomp_, CeedEvalMode emode_) -- : Coefficient(ncomp_), emode(emode_) { } -- virtual bool IsConstant() const override { return false; } -- ~VariableCoefficient() -+ Coefficient(int ncomp_, CeedEvalMode emode_) : ncomp(ncomp_), emode(emode_) {} -+ virtual ~Coefficient() - { -- CeedVectorDestroy(&coeffVector); -+ CeedVectorDestroy(&coeff_vector); - } - }; - --struct GridCoefficient : VariableCoefficient -+struct GridCoefficient : Coefficient - { - const mfem::GridFunction &gf; -- CeedBasis basis; -- CeedElemRestriction restr; -+ CeedBasis basis = nullptr; -+ CeedElemRestriction restr = nullptr; - GridCoefficient(const mfem::GridFunction &gf_) -- : VariableCoefficient(gf_.VectorDim(), CEED_EVAL_INTERP), -- gf(gf_) -+ : Coefficient(gf_.VectorDim(), CEED_EVAL_INTERP), gf(gf_) - { -- InitVector(gf, coeffVector); -+ InitVector(gf, coeff_vector); - } - }; - --struct QuadCoefficient : VariableCoefficient -+struct QuadCoefficient : Coefficient - { -- mfem::Vector coeff; -- CeedElemRestriction restr; -- QuadCoefficient(int ncomp_) : VariableCoefficient(ncomp_, CEED_EVAL_NONE) { } -+ mfem::Vector vector; -+ CeedElemRestriction restr = nullptr; -+ QuadCoefficient(int ncomp_) : Coefficient(ncomp_, CEED_EVAL_NONE) {} - }; - - /** @brief Initializes an mfem::ceed::Coefficient @a coeff_ptr from an -@@ -84,73 +67,65 @@ struct QuadCoefficient : VariableCoefficient - @param[in] Q is the coefficient from the `Integrator`. - @param[in] mesh is the mesh. - @param[in] ir is the integration rule. -+ @param[in] use_bdr is a flag to construct the coefficient on mesh boundaries. - @param[out] coeff_ptr is the structure to store the coefficient for the -- `CeedOperator`. -- @param[out] ctx is the Context associated to the QFunction. */ --template --void InitCoefficient(mfem::Coefficient *Q, mfem::Mesh &mesh, -- const mfem::IntegrationRule &ir, -- Coefficient*& coeff_ptr, Context &ctx) -+ `CeedOperator`. */ -+inline void InitCoefficient(mfem::Coefficient *Q, mfem::Mesh &mesh, -+ const mfem::IntegrationRule &ir, bool use_bdr, -+ Coefficient *&coeff_ptr) - { -- if ( Q == nullptr ) -+ if (Q == nullptr || dynamic_cast(Q)) - { -- Coefficient *ceedCoeff = new Coefficient(1); -- ctx.coeff = 1.0; -- coeff_ptr = ceedCoeff; -+ // The constant coefficient case is handled by the QFunction context -+ coeff_ptr = nullptr; - } -- else if (ConstantCoefficient *const_coeff = -- dynamic_cast(Q)) -+ else if (mfem::GridFunctionCoefficient *gf_coeff = -+ dynamic_cast(Q)) - { -- Coefficient *ceedCoeff = new Coefficient(1); -- ctx.coeff = const_coeff->constant; -- coeff_ptr = ceedCoeff; -- } -- else if (GridFunctionCoefficient* gf_coeff = -- dynamic_cast(Q)) -- { -- GridCoefficient *ceedCoeff = -+ GridCoefficient *ceed_coeff = - new GridCoefficient(*gf_coeff->GetGridFunction()); -- coeff_ptr = ceedCoeff; -+ coeff_ptr = ceed_coeff; - } -- else if (QuadratureFunctionCoefficient *cQ = -- dynamic_cast(Q)) -+ else if (mfem::QuadratureFunctionCoefficient *qf_coeff = -+ dynamic_cast(Q)) - { -- QuadCoefficient *ceedCoeff = new QuadCoefficient(1); -- const int ne = mesh.GetNE(); -+ const int ne = use_bdr ? mesh.GetNBE() : mesh.GetNE(); - const int nq = ir.GetNPoints(); -- const mfem::QuadratureFunction &qFun = cQ->GetQuadFunction(); -- MFEM_VERIFY(qFun.Size() == nq * ne, -- "Incompatible QuadratureFunction dimension \n"); -- -- MFEM_VERIFY(&ir == &qFun.GetSpace()->GetIntRule(0), -+ QuadCoefficient *ceed_coeff = new QuadCoefficient(1); -+ const mfem::QuadratureFunction &qfunc = qf_coeff->GetQuadFunction(); -+ MFEM_VERIFY(qfunc.Size() == nq * ne, -+ "Incompatible QuadratureFunction dimension."); -+ MFEM_VERIFY(&ir == &qfunc.GetSpace()->GetIntRule(0), - "IntegrationRule used within integrator and in" -- " QuadratureFunction appear to be different"); -- qFun.Read(); -- ceedCoeff->coeff.MakeRef(const_cast(qFun),0); -- InitVector(ceedCoeff->coeff, ceedCoeff->coeffVector); -- coeff_ptr = ceedCoeff; -+ " QuadratureFunction appear to be different."); -+ qfunc.Read(); -+ ceed_coeff->vector.MakeRef(const_cast(qfunc), 0); -+ InitVector(ceed_coeff->vector, ceed_coeff->coeff_vector); -+ coeff_ptr = ceed_coeff; - } - else - { -- QuadCoefficient *ceedCoeff = new QuadCoefficient(1); -- const int ne = mesh.GetNE(); -+ const int ne = use_bdr ? mesh.GetNBE() : mesh.GetNE(); - const int nq = ir.GetNPoints(); -- ceedCoeff->coeff.SetSize(nq * ne); -- auto C = Reshape(ceedCoeff->coeff.HostWrite(), nq, ne); -+ QuadCoefficient *ceed_coeff = new QuadCoefficient(1); -+ ceed_coeff->vector.SetSize(nq * ne); -+ auto C = Reshape(ceed_coeff->vector.HostWrite(), nq, ne); - for (int e = 0; e < ne; ++e) - { -- mfem::ElementTransformation &T = *mesh.GetElementTransformation(e); -+ auto &T = use_bdr ? *mesh.GetBdrElementTransformation(e) : -+ *mesh.GetElementTransformation(e); - for (int q = 0; q < nq; ++q) - { -- C(q,e) = Q->Eval(T, ir.IntPoint(q)); -+ const IntegrationPoint &ip = ir.IntPoint(q); -+ T.SetIntPoint(&ip); -+ C(q, e) = Q->Eval(T, ip); - } - } -- InitVector(ceedCoeff->coeff, ceedCoeff->coeffVector); -- coeff_ptr = ceedCoeff; -+ InitVector(ceed_coeff->vector, ceed_coeff->coeff_vector); -+ coeff_ptr = ceed_coeff; - } - } - -- - /** @brief Initializes an mfem::ceed::Coefficient @a coeff_ptr from an - mfem::VectorCoefficient @a VQ, an mfem::Mesh @a mesh, and an - mfem::IntegrationRule @a ir. -@@ -158,75 +133,121 @@ void InitCoefficient(mfem::Coefficient *Q, mfem::Mesh &mesh, - @param[in] VQ is the vector coefficient from the `Integrator`. - @param[in] mesh is the mesh. - @param[in] ir is the integration rule. -+ @param[in] use_bdr is a flag to construct the coefficient on mesh boundaries. - @param[out] coeff_ptr is the structure to store the coefficient for the -- `CeedOperator`. -- @param[out] ctx is the Context associated to the QFunction. */ --template --void InitCoefficient(mfem::VectorCoefficient *VQ, mfem::Mesh &mesh, -- const mfem::IntegrationRule &ir, -- Coefficient *&coeff_ptr, Context &ctx) -+ `CeedOperator`. */ -+inline void InitCoefficient(mfem::VectorCoefficient *VQ, mfem::Mesh &mesh, -+ const mfem::IntegrationRule &ir, bool use_bdr, -+ Coefficient *&coeff_ptr) - { -- if (VectorConstantCoefficient *const_coeff = -- dynamic_cast(VQ)) -+ if (VQ == nullptr || dynamic_cast(VQ)) - { -- const int vdim = const_coeff->GetVDim(); -- const mfem::Vector &val = const_coeff->GetVec(); -- Coefficient *ceedCoeff = new Coefficient(vdim); -- for (int i = 0; i < vdim; i++) -- { -- ctx.coeff[i] = val[i]; -- } -- coeff_ptr = ceedCoeff; -+ // The constant coefficient case is handled by the QFunction context -+ coeff_ptr = nullptr; - } -- else if (VectorGridFunctionCoefficient* vgf_coeff = -- dynamic_cast(VQ)) -+ else if (mfem::VectorGridFunctionCoefficient *vgf_coeff = -+ dynamic_cast(VQ)) - { -- GridCoefficient *ceedCoeff = -+ GridCoefficient *ceed_coeff = - new GridCoefficient(*vgf_coeff->GetGridFunction()); -- coeff_ptr = ceedCoeff; -+ coeff_ptr = ceed_coeff; - } -- else if (VectorQuadratureFunctionCoefficient *cQ = -- dynamic_cast(VQ)) -+ else if (mfem::VectorQuadratureFunctionCoefficient *vqf_coeff = -+ dynamic_cast(VQ)) - { -- QuadCoefficient *ceedCoeff = new QuadCoefficient(cQ->GetVDim()); -- const int dim = mesh.Dimension(); -- const int ne = mesh.GetNE(); -+ const int vdim = vqf_coeff->GetVDim(); -+ const int ne = use_bdr ? mesh.GetNBE() : mesh.GetNE(); - const int nq = ir.GetNPoints(); -- const mfem::QuadratureFunction &qFun = cQ->GetQuadFunction(); -- MFEM_VERIFY(qFun.Size() == dim * nq * ne, -- "Incompatible QuadratureFunction dimension \n"); -- -- MFEM_VERIFY(&ir == &qFun.GetSpace()->GetIntRule(0), -+ QuadCoefficient *ceed_coeff = new QuadCoefficient(vdim); -+ const mfem::QuadratureFunction &qfunc = vqf_coeff->GetQuadFunction(); -+ MFEM_VERIFY(qfunc.Size() == vdim * nq * ne, -+ "Incompatible QuadratureFunction dimension."); -+ MFEM_VERIFY(&ir == &qfunc.GetSpace()->GetIntRule(0), - "IntegrationRule used within integrator and in" -- " QuadratureFunction appear to be different"); -- qFun.Read(); -- ceedCoeff->coeff.MakeRef(const_cast(qFun),0); -- InitVector(ceedCoeff->coeff, ceedCoeff->coeffVector); -- coeff_ptr = ceedCoeff; -+ " QuadratureFunction appear to be different."); -+ qfunc.Read(); -+ ceed_coeff->vector.MakeRef(const_cast(qfunc), 0); -+ InitVector(ceed_coeff->vector, ceed_coeff->coeff_vector); -+ coeff_ptr = ceed_coeff; - } - else - { -- const int dim = mesh.Dimension(); -- QuadCoefficient *ceedCoeff = new QuadCoefficient(dim); -- const int ne = mesh.GetNE(); -+ const int vdim = VQ->GetVDim(); -+ const int ne = use_bdr ? mesh.GetNBE() : mesh.GetNE(); - const int nq = ir.GetNPoints(); -- ceedCoeff->coeff.SetSize(dim * nq * ne); -- auto C = Reshape(ceedCoeff->coeff.HostWrite(), dim, nq, ne); -+ QuadCoefficient *ceed_coeff = new QuadCoefficient(vdim); -+ ceed_coeff->vector.SetSize(vdim * nq * ne); -+ auto C = Reshape(ceed_coeff->vector.HostWrite(), vdim, nq, ne); - mfem::DenseMatrix Q_ir; - for (int e = 0; e < ne; ++e) - { -- mfem::ElementTransformation &T = *mesh.GetElementTransformation(e); -+ auto &T = use_bdr ? *mesh.GetBdrElementTransformation(e) : -+ *mesh.GetElementTransformation(e); - VQ->Eval(Q_ir, T, ir); - for (int q = 0; q < nq; ++q) - { -- for (int i = 0; i < dim; ++i) -+ for (int i = 0; i < vdim; ++i) -+ { -+ C(i, q, e) = Q_ir(i, q); -+ } -+ } -+ } -+ InitVector(ceed_coeff->vector, ceed_coeff->coeff_vector); -+ coeff_ptr = ceed_coeff; -+ } -+} -+ -+/** @brief Initializes an mfem::ceed::Coefficient @a coeff_ptr from an -+ mfem::MatrixCoefficient @a MQ, an mfem::Mesh @a mesh, and an -+ mfem::IntegrationRule @a ir. -+ -+ @param[in] MQ is the matrix coefficient from the `Integrator`. -+ @param[in] mesh is the mesh. -+ @param[in] ir is the integration rule. -+ @param[in] use_bdr is a flag to construct the coefficient on mesh boundaries. -+ @param[out] coeff_ptr is the structure to store the coefficient for the -+ `CeedOperator`. */ -+inline void InitCoefficient(mfem::MatrixCoefficient *MQ, mfem::Mesh &mesh, -+ const mfem::IntegrationRule &ir, bool use_bdr, -+ Coefficient *&coeff_ptr) -+{ -+ if (MQ == nullptr || dynamic_cast(MQ)) -+ { -+ // The constant coefficient case is handled by the QFunction context -+ coeff_ptr = nullptr; -+ } -+ else -+ { -+ // Assumes matrix coefficient is symmetric -+ const int vdim = MQ->GetVDim(); -+ const int ncomp = (vdim * (vdim + 1)) / 2; -+ const int ne = use_bdr ? mesh.GetNBE() : mesh.GetNE(); -+ const int nq = ir.GetNPoints(); -+ QuadCoefficient *ceed_coeff = new QuadCoefficient(ncomp); -+ ceed_coeff->vector.SetSize(ncomp * nq * ne); -+ auto C = Reshape(ceed_coeff->vector.HostWrite(), ncomp, nq, ne); -+ mfem::DenseMatrix Q_ip; -+ for (int e = 0; e < ne; ++e) -+ { -+ auto &T = use_bdr ? *mesh.GetBdrElementTransformation(e) : -+ *mesh.GetElementTransformation(e); -+ for (int q = 0; q < nq; ++q) -+ { -+ const IntegrationPoint &ip = ir.IntPoint(q); -+ T.SetIntPoint(&ip); -+ MQ->Eval(Q_ip, T, ip); -+ for (int j = 0; j < vdim; ++j) - { -- C(i,q,e) = Q_ir(i,q); -+ for (int i = j; i < vdim; ++i) -+ { -+ const int idx = (j * vdim) - (((j - 1) * j) / 2) + i - j; -+ C(idx, q, e) = Q_ip(i, j); // Column-major -+ } - } - } - } -- InitVector(ceedCoeff->coeff, ceedCoeff->coeffVector); -- coeff_ptr = ceedCoeff; -+ InitVector(ceed_coeff->vector, ceed_coeff->coeff_vector); -+ coeff_ptr = ceed_coeff; - } - } - -@@ -237,57 +258,49 @@ void InitCoefficient(mfem::VectorCoefficient *VQ, mfem::Mesh &mesh, - @param[in] Q is the coefficient from the `Integrator`. - @param[in] mesh is the mesh. - @param[in] ir is the integration rule. -- @param[in] nelem The number of elements. -- @param[in] indices The indices of the elements of same type in the -+ @param[in] use_bdr is a flag to construct the coefficient on mesh boundaries. -+ @param[in] nelem is the number of elements. -+ @param[in] indices are the indices of the elements of same type in the - `FiniteElementSpace`. - @param[out] coeff_ptr is the structure to store the coefficient for the -- `CeedOperator`. -- @param[out] ctx is the Context associated to the QFunction. */ --template --void InitCoefficientWithIndices(mfem::Coefficient *Q, mfem::Mesh &mesh, -- const mfem::IntegrationRule &ir, -- int nelem, -- const int* indices, -- Coefficient*& coeff_ptr, Context &ctx) -+ `CeedOperator`. */ -+inline void InitCoefficientWithIndices(mfem::Coefficient *Q, -+ mfem::Mesh &mesh, -+ const mfem::IntegrationRule &ir, -+ bool use_bdr, -+ int nelem, -+ const int *indices, -+ Coefficient *&coeff_ptr) - { -- if ( Q == nullptr ) -- { -- Coefficient *ceedCoeff = new Coefficient(1); -- ctx.coeff = 1.0; -- coeff_ptr = ceedCoeff; -- } -- else if (ConstantCoefficient *const_coeff = -- dynamic_cast(Q)) -+ if (Q == nullptr || dynamic_cast(Q)) - { -- Coefficient *ceedCoeff = new Coefficient(1); -- ctx.coeff = const_coeff->constant; -- coeff_ptr = ceedCoeff; -+ // The constant coefficient case is handled by the QFunction context -+ coeff_ptr = nullptr; - } -- else if (GridFunctionCoefficient* gf_coeff = -- dynamic_cast(Q)) -+ else if (mfem::GridFunctionCoefficient *gf_coeff = -+ dynamic_cast(Q)) - { -- GridCoefficient *ceedCoeff = -+ GridCoefficient *ceed_coeff = - new GridCoefficient(*gf_coeff->GetGridFunction()); -- coeff_ptr = ceedCoeff; -+ coeff_ptr = ceed_coeff; - } -- else if (QuadratureFunctionCoefficient *cQ = -- dynamic_cast(Q)) -+ else if (mfem::QuadratureFunctionCoefficient *qf_coeff = -+ dynamic_cast(Q)) - { -- QuadCoefficient *ceedCoeff = new QuadCoefficient(1); -- const int ne = mesh.GetNE(); -+ const int ne = use_bdr ? mesh.GetNBE() : mesh.GetNE(); - const int nq = ir.GetNPoints(); -- const mfem::QuadratureFunction &qFun = cQ->GetQuadFunction(); -- MFEM_VERIFY(qFun.Size() == nq * ne, -- "Incompatible QuadratureFunction dimension \n"); -- -- MFEM_VERIFY(&ir == &qFun.GetSpace()->GetIntRule(0), -+ QuadCoefficient *ceed_coeff = new QuadCoefficient(1); -+ ceed_coeff->vector.SetSize(nq * nelem); -+ const mfem::QuadratureFunction &qfunc = qf_coeff->GetQuadFunction(); -+ MFEM_VERIFY(qfunc.Size() == nq * ne, -+ "Incompatible QuadratureFunction dimension."); -+ MFEM_VERIFY(&ir == &qfunc.GetSpace()->GetIntRule(0), - "IntegrationRule used within integrator and in" -- " QuadratureFunction appear to be different"); -- ceedCoeff->coeff.SetSize(nq * nelem); -+ " QuadratureFunction appear to be different."); - Memory m_indices((int*)indices, nelem, false); -- auto in = Reshape(qFun.Read(), nq, ne); -+ auto in = Reshape(qfunc.Read(), nq, ne); - auto d_indices = Read(m_indices, nelem); -- auto out = Reshape(ceedCoeff->coeff.Write(), nq, nelem); -+ auto out = Reshape(ceed_coeff->vector.Write(), nq, nelem); - mfem::forall(nelem * nq, [=] MFEM_HOST_DEVICE (int i) - { - const int q = i%nq; -@@ -296,30 +309,32 @@ void InitCoefficientWithIndices(mfem::Coefficient *Q, mfem::Mesh &mesh, - out(q, sub_e) = in(q, e); - }); - m_indices.DeleteDevice(); -- InitVector(ceedCoeff->coeff, ceedCoeff->coeffVector); -- coeff_ptr = ceedCoeff; -+ InitVector(ceed_coeff->vector, ceed_coeff->coeff_vector); -+ coeff_ptr = ceed_coeff; - } - else - { -- QuadCoefficient *ceedCoeff = new QuadCoefficient(1); - const int nq = ir.GetNPoints(); -- ceedCoeff->coeff.SetSize(nq * nelem); -- auto C = Reshape(ceedCoeff->coeff.HostWrite(), nq, nelem); -+ QuadCoefficient *ceed_coeff = new QuadCoefficient(1); -+ ceed_coeff->vector.SetSize(nq * nelem); -+ auto C = Reshape(ceed_coeff->vector.HostWrite(), nq, nelem); - for (int i = 0; i < nelem; ++i) - { - const int e = indices[i]; -- mfem::ElementTransformation &T = *mesh.GetElementTransformation(e); -+ auto &T = use_bdr ? *mesh.GetBdrElementTransformation(e) : -+ *mesh.GetElementTransformation(e); - for (int q = 0; q < nq; ++q) - { -- C(q, i) = Q->Eval(T, ir.IntPoint(q)); -+ const IntegrationPoint &ip = ir.IntPoint(q); -+ T.SetIntPoint(&ip); -+ C(q, i) = Q->Eval(T, ip); - } - } -- InitVector(ceedCoeff->coeff, ceedCoeff->coeffVector); -- coeff_ptr = ceedCoeff; -+ InitVector(ceed_coeff->vector, ceed_coeff->coeff_vector); -+ coeff_ptr = ceed_coeff; - } - } - -- - /** @brief Initializes an mfem::ceed::Coefficient @a coeff_ptr from an - mfem::VectorCoefficient @a Q, an mfem::Mesh @a mesh, and an - mfem::IntegrationRule @a ir for the elements given by the indices @a indices. -@@ -327,109 +342,165 @@ void InitCoefficientWithIndices(mfem::Coefficient *Q, mfem::Mesh &mesh, - @param[in] VQ is the vector coefficient from the `Integrator`. - @param[in] mesh is the mesh. - @param[in] ir is the integration rule. -- @param[in] nelem The number of elements. -- @param[in] indices The indices of the elements of same type in the -+ @param[in] use_bdr is a flag to construct the coefficient on mesh boundaries. -+ @param[in] nelem is the number of elements. -+ @param[in] indices are the indices of the elements of same type in the - `FiniteElementSpace`. - @param[out] coeff_ptr is the structure to store the coefficient for the -- `CeedOperator`. -- @param[out] ctx is the Context associated to the QFunction. */ --template --void InitCoefficientWithIndices(mfem::VectorCoefficient *VQ, mfem::Mesh &mesh, -- const mfem::IntegrationRule &ir, -- int nelem, -- const int* indices, -- Coefficient *&coeff_ptr, Context &ctx) -+ `CeedOperator`. */ -+inline void InitCoefficientWithIndices(mfem::VectorCoefficient *VQ, -+ mfem::Mesh &mesh, -+ const mfem::IntegrationRule &ir, -+ bool use_bdr, -+ int nelem, const int *indices, -+ Coefficient *&coeff_ptr) - { -- if (VectorConstantCoefficient *const_coeff = -- dynamic_cast(VQ)) -+ if (VQ == nullptr || dynamic_cast(VQ)) - { -- const int vdim = const_coeff->GetVDim(); -- const mfem::Vector &val = const_coeff->GetVec(); -- Coefficient *ceedCoeff = new Coefficient(vdim); -- for (int i = 0; i < vdim; i++) -- { -- ctx.coeff[i] = val[i]; -- } -- coeff_ptr = ceedCoeff; -+ // The constant coefficient case is handled by the QFunction context -+ coeff_ptr = nullptr; - } -- else if (VectorGridFunctionCoefficient* vgf_coeff = -- dynamic_cast(VQ)) -+ else if (mfem::VectorGridFunctionCoefficient *vgf_coeff = -+ dynamic_cast(VQ)) - { -- GridCoefficient *ceedCoeff = -+ GridCoefficient *ceed_coeff = - new GridCoefficient(*vgf_coeff->GetGridFunction()); -- coeff_ptr = ceedCoeff; -+ coeff_ptr = ceed_coeff; - } -- else if (VectorQuadratureFunctionCoefficient *cQ = -- dynamic_cast(VQ)) -+ else if (mfem::VectorQuadratureFunctionCoefficient *vqf_coeff = -+ dynamic_cast(VQ)) - { -- QuadCoefficient *ceedCoeff = new QuadCoefficient(cQ->GetVDim()); -- const int dim = mesh.Dimension(); -- const int ne = mesh.GetNE(); -+ const int vdim = vqf_coeff->GetVDim(); -+ const int ne = use_bdr ? mesh.GetNBE() : mesh.GetNE(); - const int nq = ir.GetNPoints(); -- const mfem::QuadratureFunction &qFun = cQ->GetQuadFunction(); -- MFEM_VERIFY(qFun.Size() == dim * nq * ne, -- "Incompatible QuadratureFunction dimension \n"); -- -- MFEM_VERIFY(&ir == &qFun.GetSpace()->GetIntRule(0), -+ QuadCoefficient *ceed_coeff = new QuadCoefficient(vdim); -+ ceed_coeff->vector.SetSize(vdim * nq * nelem); -+ const mfem::QuadratureFunction &qfunc = vqf_coeff->GetQuadFunction(); -+ MFEM_VERIFY(qfunc.Size() == vdim * nq * ne, -+ "Incompatible QuadratureFunction dimension."); -+ MFEM_VERIFY(&ir == &qfunc.GetSpace()->GetIntRule(0), - "IntegrationRule used within integrator and in" -- " QuadratureFunction appear to be different"); -- ceedCoeff->coeff.SetSize(dim * nq * nelem); -+ " QuadratureFunction appear to be different."); - Memory m_indices((int*)indices, nelem, false); -- auto in = Reshape(qFun.Read(), dim, nq, ne); -+ auto in = Reshape(qfunc.Read(), vdim, nq, ne); - auto d_indices = Read(m_indices, nelem); -- auto out = Reshape(ceedCoeff->coeff.Write(), dim, nq, nelem); -+ auto out = Reshape(ceed_coeff->vector.Write(), vdim, nq, nelem); - mfem::forall(nelem * nq, [=] MFEM_HOST_DEVICE (int i) - { - const int q = i%nq; - const int sub_e = i/nq; - const int e = d_indices[sub_e]; -- for (int d = 0; d < dim; d++) -+ for (int d = 0; d < vdim; d++) - { - out(d, q, sub_e) = in(d, q, e); - } - }); - m_indices.DeleteDevice(); -- InitVector(ceedCoeff->coeff, ceedCoeff->coeffVector); -- coeff_ptr = ceedCoeff; -+ InitVector(ceed_coeff->vector, ceed_coeff->coeff_vector); -+ coeff_ptr = ceed_coeff; - } - else - { -- const int dim = mesh.Dimension(); -- QuadCoefficient *ceedCoeff = new QuadCoefficient(dim); -+ const int vdim = VQ->GetVDim(); - const int nq = ir.GetNPoints(); -- ceedCoeff->coeff.SetSize(dim * nq * nelem); -- auto C = Reshape(ceedCoeff->coeff.HostWrite(), dim, nq, nelem); -+ QuadCoefficient *ceed_coeff = new QuadCoefficient(vdim); -+ ceed_coeff->vector.SetSize(vdim * nq * nelem); -+ auto C = Reshape(ceed_coeff->vector.HostWrite(), vdim, nq, nelem); - mfem::DenseMatrix Q_ir; - for (int i = 0; i < nelem; ++i) - { - const int e = indices[i]; -- mfem::ElementTransformation &T = *mesh.GetElementTransformation(e); -+ auto &T = use_bdr ? *mesh.GetBdrElementTransformation(e) : -+ *mesh.GetElementTransformation(e); - VQ->Eval(Q_ir, T, ir); - for (int q = 0; q < nq; ++q) - { -- for (int d = 0; d < dim; ++d) -+ for (int d = 0; d < vdim; ++d) - { - C(d, q, i) = Q_ir(d, q); - } - } - } -- InitVector(ceedCoeff->coeff, ceedCoeff->coeffVector); -- coeff_ptr = ceedCoeff; -+ InitVector(ceed_coeff->vector, ceed_coeff->coeff_vector); -+ coeff_ptr = ceed_coeff; -+ } -+} -+ -+/** @brief Initializes an mfem::ceed::Coefficient @a coeff_ptr from an -+ mfem::MatrixCoefficient @a Q, an mfem::Mesh @a mesh, and an -+ mfem::IntegrationRule @a ir for the elements given by the indices @a indices. -+ -+ @param[in] MQ is the matrix coefficient from the `Integrator`. -+ @param[in] mesh is the mesh. -+ @param[in] ir is the integration rule. -+ @param[in] use_bdr is a flag to construct the coefficient on mesh boundaries. -+ @param[in] nelem is the number of elements. -+ @param[in] indices are the indices of the elements of same type in the -+ `FiniteElementSpace`. -+ @param[out] coeff_ptr is the structure to store the coefficient for the -+ `CeedOperator`. */ -+inline void InitCoefficientWithIndices(mfem::MatrixCoefficient *MQ, -+ mfem::Mesh &mesh, -+ const mfem::IntegrationRule &ir, -+ bool use_bdr, -+ int nelem, const int *indices, -+ Coefficient *&coeff_ptr) -+{ -+ if (MQ == nullptr || dynamic_cast(MQ)) -+ { -+ // The constant coefficient case is handled by the QFunction context -+ coeff_ptr = nullptr; -+ } -+ else -+ { -+ // Assumes matrix coefficient is symmetric -+ const int vdim = MQ->GetVDim(); -+ const int ncomp = (vdim * (vdim + 1)) / 2; -+ const int nq = ir.GetNPoints(); -+ QuadCoefficient *ceed_coeff = new QuadCoefficient(ncomp); -+ ceed_coeff->vector.SetSize(ncomp * nq * nelem); -+ auto C = Reshape(ceed_coeff->vector.HostWrite(), ncomp, nq, nelem); -+ mfem::DenseMatrix Q_ip; -+ for (int i = 0; i < nelem; ++i) -+ { -+ const int e = indices[i]; -+ auto &T = use_bdr ? *mesh.GetBdrElementTransformation(e) : -+ *mesh.GetElementTransformation(e); -+ for (int q = 0; q < nq; ++q) -+ { -+ const IntegrationPoint &ip = ir.IntPoint(q); -+ T.SetIntPoint(&ip); -+ MQ->Eval(Q_ip, T, ip); -+ for (int dj = 0; dj < vdim; ++dj) -+ { -+ for (int di = dj; di < vdim; ++di) -+ { -+ const int idx = (dj * vdim) - (((dj - 1) * dj) / 2) + di - dj; -+ C(idx, q, i) = Q_ip(di, dj); // Column-major -+ } -+ } -+ } -+ } -+ InitVector(ceed_coeff->vector, ceed_coeff->coeff_vector); -+ coeff_ptr = ceed_coeff; - } - } - --template --void InitCoefficient(Coeff *Q, mfem::Mesh &mesh, -- const mfem::IntegrationRule &ir, int nelem, -- const int* indices, Coefficient *&coeff_ptr, Context &ctx) -+template -+inline void InitCoefficient(CoeffType *Q, mfem::Mesh &mesh, -+ const mfem::IntegrationRule &ir, -+ bool use_bdr, -+ int nelem, -+ const int *indices, -+ Coefficient *&coeff_ptr) - { - if (indices) - { -- InitCoefficientWithIndices(Q, mesh, ir, nelem, indices, coeff_ptr, ctx); -+ InitCoefficientWithIndices(Q, mesh, ir, use_bdr, nelem, indices, coeff_ptr); - } - else - { -- InitCoefficient(Q, mesh, ir, coeff_ptr, ctx); -+ InitCoefficient(Q, mesh, ir, use_bdr, coeff_ptr); - } - } - -diff --git a/fem/ceed/interface/integrator.hpp b/fem/ceed/interface/integrator.hpp -index eea79388d..aa24ec207 100644 ---- a/fem/ceed/interface/integrator.hpp -+++ b/fem/ceed/interface/integrator.hpp -@@ -9,14 +9,14 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#ifndef MFEM_LIBCEED_INTEG --#define MFEM_LIBCEED_INTEG -+#ifndef MFEM_LIBCEED_INTEGRATOR -+#define MFEM_LIBCEED_INTEGRATOR - --#include "../../../config/config.hpp" - #include "../../fespace.hpp" - #include "../../gridfunc.hpp" --#include "operator.hpp" -+#include "basis.hpp" - #include "coefficient.hpp" -+#include "operator.hpp" - #include "restriction.hpp" - #include "util.hpp" - #include "ceed.hpp" -@@ -28,43 +28,27 @@ namespace ceed - { - - /** The different evaluation modes available for PA and MF CeedIntegrator. */ --enum class EvalMode { None, Interp, Grad, InterpAndGrad }; -+enum class EvalMode { None, Interp, Grad, InterpAndGrad, Div, Curl }; - - #ifdef MFEM_USE_CEED - /** This structure is a template interface for the Assemble methods of - PAIntegrator and MFIntegrator. See ceed/mass.cpp for an example. */ - struct OperatorInfo - { -- /** The path to the qFunction header. */ -+ /** The path to the QFunction header. */ - const char *header; -- /** The name of the qFunction to build a partially assembled CeedOperator -- with a constant Coefficient. */ -- const char *build_func_const; -- /** The qFunction to build a partially assembled CeedOperator with a constant -- Coefficient. */ -- CeedQFunctionUser build_qf_const; -- /** The name of the qFunction to build a partially assembled CeedOperator -- with a variable Coefficient. */ -- const char *build_func_quad; -- /** The qFunction to build a partially assembled CeedOperator with a variable -- Coefficient. */ -- CeedQFunctionUser build_qf_quad; -- /** The name of the qFunction to apply a partially assembled CeedOperator. */ -+ /** The name of the QFunction to build a partially assembled CeedOperator. */ -+ const char *build_func; -+ /** The QFunction to build a partially assembled CeedOperator. */ -+ CeedQFunctionUser build_qf; -+ /** The name of the QFunction to apply a partially assembled CeedOperator. */ - const char *apply_func; -- /** The qFunction to apply a partially assembled CeedOperator. */ -+ /** The QFunction to apply a partially assembled CeedOperator. */ - CeedQFunctionUser apply_qf; -- /** The name of the qFunction to apply a matrix-free CeedOperator with a -- constant Coefficient. */ -- const char *apply_func_mf_const; -- /** The qFunction to apply a matrix-free CeedOperator with a constant -- Coefficient. */ -- CeedQFunctionUser apply_qf_mf_const; -- /** The name of the qFunction to apply a matrix-free CeedOperator with a -- variable Coefficient. */ -- const char *apply_func_mf_quad; -- /** The qFunction to apply a matrix-free CeedOperator with a variable -- Coefficient. */ -- CeedQFunctionUser apply_qf_mf_quad; -+ /** The name of the QFunction to apply a matrix-free CeedOperator. */ -+ const char *apply_func_mf; -+ /** The QFunction to apply a matrix-free CeedOperator. */ -+ CeedQFunctionUser apply_qf_mf; - /** The EvalMode on the trial basis functions. */ - EvalMode trial_op; - /** The EvalMode on the test basis functions. */ -@@ -74,284 +58,347 @@ struct OperatorInfo - }; - #endif - --/** This class represent a partially assembled operator using libCEED. */ --class PAIntegrator : public ceed::Operator -+/** This class represents a matrix-free or partially assembled bilinear, -+ mixed bilinear, or nonlinear form operator using libCEED. */ -+class Integrator : public Operator - { - #ifdef MFEM_USE_CEED - protected: -- CeedBasis trial_basis, test_basis, mesh_basis; -- CeedElemRestriction trial_restr, test_restr, mesh_restr, restr_i; -- CeedQFunction build_qfunc, apply_qfunc; -+ CeedBasis trial_basis, test_basis, mesh_basis; -+ CeedElemRestriction trial_restr, test_restr, mesh_restr, qdata_restr; -+ CeedQFunction apply_qfunc; -+ CeedQFunctionContext apply_ctx; - CeedVector node_coords, qdata; - Coefficient *coeff; -- CeedQFunctionContext build_ctx; -- CeedOperator build_oper; - - public: -- PAIntegrator() -+ Integrator() - : Operator(), - trial_basis(nullptr), test_basis(nullptr), mesh_basis(nullptr), - trial_restr(nullptr), test_restr(nullptr), mesh_restr(nullptr), -- restr_i(nullptr), -- build_qfunc(nullptr), apply_qfunc(nullptr), node_coords(nullptr), -- qdata(nullptr), coeff(nullptr), build_ctx(nullptr), build_oper(nullptr) -- { } -+ qdata_restr(nullptr), -+ apply_qfunc(nullptr), apply_ctx(nullptr), -+ node_coords(nullptr), qdata(nullptr), coeff(nullptr) {} - -- /** @brief This method assembles the `PAIntegrator` with the given -+ /** @brief This method assembles the `Integrator` with the given - `CeedOperatorInfo` @a info, an `mfem::FiniteElementSpace` @a fes, an -- `mfem::IntegrationRule` @a ir, and `mfem::Coefficient` or -- `mfem::VectorCoefficient` @a Q. -+ `mfem::IntegrationRule` @a ir, and `mfem::Coefficient`, -+ `mfem::VectorCoefficient`, or `mfem::MatrixCoefficient` @a Q. - The `CeedOperatorInfo` type is expected to inherit from `OperatorInfo`, -- and contain a `Context` type relevant to the qFunctions. -- -- @param[in] info is the structure describing the CeedOperator to assemble. -- @param[in] fes is the finite element space. -- @param[in] ir is the integration rule for the operator. -- @param[in] Q is the coefficient from the `Integrator`. */ -+ and contain a `Context` type relevant to the QFunctions. -+ -+ @param[in] info The structure describing the CeedOperator to assemble. -+ @param[in] fes The finite element space. -+ @param[in] ir The integration rule for the operator. -+ @param[in] Q The coefficient from the `Integrator`. -+ @param[in] use_bdr Controls whether to construct the operator for the domain -+ or domain boundary. -+ @param[in] use_mf Controls whether to construct a matrix-free or partially -+ assembled operator. */ - template - void Assemble(CeedOperatorInfo &info, - const mfem::FiniteElementSpace &fes, - const mfem::IntegrationRule &ir, -- CoeffType *Q) -+ CoeffType *Q, -+ const bool use_bdr = false, -+ const bool use_mf = false) - { -- Assemble(info, fes, ir, fes.GetNE(), nullptr, Q); -+ Assemble(info, fes, fes, ir, -+ use_bdr ? fes.GetNBE() : fes.GetNE(), -+ nullptr, Q, use_bdr, use_mf); - } - -- /** @brief This method assembles the `PAIntegrator` with the given -+ /** @brief This method assembles the `Integrator` with the given - `CeedOperatorInfo` @a info, an `mfem::FiniteElementSpace` @a fes, an -- `mfem::IntegrationRule` @a ir, and `mfem::Coefficient` or -- `mfem::VectorCoefficient` @a Q for the elements given by the indices -- @a indices. -+ `mfem::IntegrationRule` @a ir, and `mfem::Coefficient`, -+ `mfem::VectorCoefficient`, or `mfem::MatrixCoefficient` @a Q for the -+ elements given by the indices @a indices. - The `CeedOperatorInfo` type is expected to inherit from `OperatorInfo`, -- and contain a `Context` type relevant to the qFunctions. -+ and contain a `Context` type relevant to the QFunctions. - -- @param[in] info is the structure describing the CeedOperator to assemble. -- @param[in] fes is the finite element space. -- @param[in] ir is the integration rule for the operator. -+ @param[in] info The structure describing the CeedOperator to assemble. -+ @param[in] fes The finite element space. -+ @param[in] ir The integration rule for the operator. - @param[in] nelem The number of elements. - @param[in] indices The indices of the elements of same type in the - `FiniteElementSpace`. If `indices == nullptr`, assumes - that the `FiniteElementSpace` is not mixed. -- @param[in] Q is the coefficient from the `Integrator`. */ -+ @param[in] Q The coefficient from the `Integrator`. -+ @param[in] use_bdr Controls whether to construct the operator for the domain -+ or domain boundary. -+ @param[in] use_mf Controls whether to construct a matrix-free or partially -+ assembled operator. */ - template - void Assemble(CeedOperatorInfo &info, - const mfem::FiniteElementSpace &fes, - const mfem::IntegrationRule &ir, - int nelem, -- const int* indices, -- CoeffType *Q) -+ const int *indices, -+ CoeffType *Q, -+ const bool use_bdr = false, -+ const bool use_mf = false) - { -- Assemble(info, fes, fes, ir, nelem, indices, Q); -+ Assemble(info, fes, fes, ir, nelem, indices, Q, use_bdr, use_mf); - } - -- /** This method assembles the PAIntegrator for mixed forms. -+ /** This method assembles the `Integrator` for mixed forms. - -- @param[in] info the `CeedOperatorInfo` describing the `CeedOperator`, -+ @param[in] info The `CeedOperatorInfo` describing the `CeedOperator`, - the `CeedOperatorInfo` type is expected to inherit from - `OperatorInfo` and contain a `Context` type relevant to -- the qFunctions. -- @param[in] trial_fes the trial `FiniteElementSpace` for the form, -- @param[in] test_fes the test `FiniteElementSpace` for the form, -- @param[in] ir the `IntegrationRule` for the numerical integration, -- @param[in] Q `Coefficient` or `VectorCoefficient`. */ -+ the QFunctions. -+ @param[in] trial_fes The trial `FiniteElementSpace` for the form. -+ @param[in] test_fes The test `FiniteElementSpace` for the form. -+ @param[in] ir The `IntegrationRule` for the numerical integration. -+ @param[in] Q `Coefficient`, `VectorCoefficient`, or -+ `MatrixCoefficient`. -+ @param[in] use_bdr Controls whether to construct the operator for the domain -+ or domain boundary. -+ @param[in] use_mf Controls whether to construct a matrix-free or partially -+ assembled operator. */ - template - void Assemble(CeedOperatorInfo &info, - const mfem::FiniteElementSpace &trial_fes, - const mfem::FiniteElementSpace &test_fes, - const mfem::IntegrationRule &ir, -- CoeffType *Q) -+ CoeffType *Q, -+ const bool use_bdr = false, -+ const bool use_mf = false) - { -- Assemble(info, trial_fes, test_fes, ir, trial_fes.GetNE(), nullptr, Q); -+ Assemble(info, trial_fes, test_fes, ir, -+ use_bdr ? trial_fes.GetNBE() : trial_fes.GetNE(), -+ nullptr, Q, use_bdr, use_mf); - } - -- /** This method assembles the PAIntegrator for mixed forms on mixed meshes. -+ /** This method assembles the `Integrator` for mixed forms on mixed meshes. - -- @param[in] info the `CeedOperatorInfo` describing the `CeedOperator`, -+ @param[in] info The `CeedOperatorInfo` describing the `CeedOperator`, - the `CeedOperatorInfo` type is expected to inherit from - `OperatorInfo` and contain a `Context` type relevant to -- the qFunctions. -- @param[in] trial_fes the trial `FiniteElementSpace` for the form, -- @param[in] test_fes the test `FiniteElementSpace` for the form, -- @param[in] ir the `IntegrationRule` for the numerical integration, -- @param[in] nelem The number of elements, -+ the QFunctions. -+ @param[in] trial_fes The trial `FiniteElementSpace` for the form. -+ @param[in] test_fes The test `FiniteElementSpace` for the form. -+ @param[in] ir The `IntegrationRule` for the numerical integration. -+ @param[in] nelem The number of elements. - @param[in] indices The indices of the elements of same type in the - `FiniteElementSpace`. If `indices == nullptr`, assumes -- that the `FiniteElementSpace` is not mixed, -- @param[in] Q `Coefficient` or `VectorCoefficient`. */ -+ that the `FiniteElementSpace` is not mixed. -+ @param[in] Q `Coefficient`, `VectorCoefficient`, or -+ `MatrixCoefficient`. -+ @param[in] use_bdr Controls whether to construct the operator for the domain -+ or domain boundary. -+ @param[in] use_mf Controls whether to construct a matrix-free or partially -+ assembled operator. */ - template - void Assemble(CeedOperatorInfo &info, - const mfem::FiniteElementSpace &trial_fes, - const mfem::FiniteElementSpace &test_fes, - const mfem::IntegrationRule &ir, - int nelem, -- const int* indices, -- CoeffType *Q) -+ const int *indices, -+ CoeffType *Q, -+ const bool use_bdr = false, -+ const bool use_mf = false) - { - Ceed ceed(internal::ceed); - mfem::Mesh &mesh = *trial_fes.GetMesh(); -- MFEM_VERIFY(!(!indices && mesh.GetNumGeometries(mesh.Dimension()) > 1), -- "Use ceed::MixedIntegrator on mixed meshes."); -- InitCoefficient(Q, mesh, ir, nelem, indices, coeff, info.ctx); -- bool const_coeff = coeff->IsConstant(); -- std::string build_func = const_coeff ? info.build_func_const -- : info.build_func_quad; -- CeedQFunctionUser build_qf = const_coeff ? info.build_qf_const -- : info.build_qf_quad; -- PAOperator op {info.qdatasize, info.header, -- build_func, build_qf, -- info.apply_func, info.apply_qf, -- info.trial_op, -- info.test_op -- }; -- CeedInt dim = mesh.SpaceDimension(); -+ CeedInt dim = mesh.Dimension() - use_bdr; -+ CeedInt space_dim = mesh.SpaceDimension(); -+ CeedInt curl_dim = (dim < 3) ? 1 : dim; - CeedInt trial_vdim = trial_fes.GetVDim(); - CeedInt test_vdim = test_fes.GetVDim(); -- -- mesh.EnsureNodes(); -- if ( &trial_fes == &test_fes ) -+ bool trial_vectorfe = -+ (trial_fes.FEColl()->GetRangeType(dim) == mfem::FiniteElement::VECTOR); -+ bool test_vectorfe = -+ (test_fes.FEColl()->GetRangeType(dim) == mfem::FiniteElement::VECTOR); -+ MFEM_VERIFY(!(!indices && mesh.GetNumGeometries(dim) > 1), -+ "Use ceed::MixedOperator on mixed meshes."); -+ InitCoefficient(Q, mesh, ir, use_bdr, nelem, indices, coeff); -+ -+ if (&trial_fes == &test_fes) - { -- InitBasisAndRestriction(trial_fes, ir, nelem, indices, -- ceed, &trial_basis, &trial_restr); -- test_basis = trial_basis; -- test_restr = trial_restr; -+ InitBasis(trial_fes, ir, use_bdr, indices, ceed, -+ &trial_basis); -+ InitRestriction(trial_fes, use_bdr, nelem, indices, ceed, -+ &trial_restr); -+ CeedBasisReferenceCopy(trial_basis, &test_basis); -+ CeedElemRestrictionReferenceCopy(trial_restr, &test_restr); - } - else - { -- InitBasisAndRestriction(trial_fes, ir, nelem, indices, -- ceed, &trial_basis, &trial_restr); -- InitBasisAndRestriction(test_fes, ir, nelem, indices, -- ceed, &test_basis, &test_restr); -+ InitBasis(trial_fes, ir, use_bdr, indices, ceed, -+ &trial_basis); -+ InitBasis(test_fes, ir, use_bdr, indices, ceed, -+ &test_basis); -+ InitRestriction(trial_fes, use_bdr, nelem, indices, ceed, -+ &trial_restr); -+ InitRestriction(test_fes, use_bdr, nelem, indices, ceed, -+ &test_restr); - } - -- const mfem::FiniteElementSpace *mesh_fes = mesh.GetNodalFESpace(); -- MFEM_VERIFY(mesh_fes, "the Mesh has no nodal FE space"); -- InitBasisAndRestriction(*mesh_fes, ir, nelem, indices, -- ceed, &mesh_basis, &mesh_restr); -- - CeedInt trial_nqpts, test_nqpts; - CeedBasisGetNumQuadraturePoints(trial_basis, &trial_nqpts); - CeedBasisGetNumQuadraturePoints(test_basis, &test_nqpts); - MFEM_VERIFY(trial_nqpts == test_nqpts, - "Trial and test basis must have the same number of quadrature" - " points."); -- CeedInt nqpts = trial_nqpts; -- -- const int qdatasize = op.qdatasize; -- InitStridedRestriction(*mesh_fes, nelem, nqpts, qdatasize, -- CEED_STRIDES_BACKEND, -- &restr_i); -+ const CeedInt nqpts = trial_nqpts; - -+ mesh.EnsureNodes(); -+ const mfem::FiniteElementSpace *mesh_fes = mesh.GetNodalFESpace(); -+ MFEM_VERIFY(mesh_fes, "The mesh has no nodal FE space."); -+ InitBasis(*mesh_fes, ir, use_bdr, indices, ceed, &mesh_basis); -+ InitRestriction(*mesh_fes, use_bdr, nelem, indices, ceed, &mesh_restr); - InitVector(*mesh.GetNodes(), node_coords); - -- CeedVectorCreate(ceed, nelem * nqpts * qdatasize, &qdata); -- -- // Context data to be passed to the Q-function. -- info.ctx.dim = mesh.Dimension(); -- info.ctx.space_dim = mesh.SpaceDimension(); -- info.ctx.vdim = trial_fes.GetVDim(); -- -- std::string qf_file = GetCeedPath() + op.header; -- std::string qf = qf_file + op.build_func; -- CeedQFunctionCreateInterior(ceed, 1, op.build_qf, qf.c_str(), -- &build_qfunc); -- -- // Create the Q-function that builds the operator (i.e. computes its -- // quadrature data) and set its context data. -- if (VariableCoefficient *var_coeff = -- dynamic_cast(coeff)) -- { -- CeedQFunctionAddInput(build_qfunc, "coeff", coeff->ncomp, -- var_coeff->emode); -- } -- CeedQFunctionAddInput(build_qfunc, "dx", dim * dim, CEED_EVAL_GRAD); -- CeedQFunctionAddInput(build_qfunc, "weights", 1, CEED_EVAL_WEIGHT); -- CeedQFunctionAddOutput(build_qfunc, "qdata", qdatasize, CEED_EVAL_NONE); -- -- CeedQFunctionContextCreate(ceed, &build_ctx); -- CeedQFunctionContextSetData(build_ctx, CEED_MEM_HOST, -+ CeedQFunctionContextCreate(ceed, &apply_ctx); -+ CeedQFunctionContextSetData(apply_ctx, CEED_MEM_HOST, - CEED_COPY_VALUES, - sizeof(info.ctx), - &info.ctx); -- CeedQFunctionSetContext(build_qfunc, build_ctx); - -- // Create the operator that builds the quadrature data for the operator. -- CeedOperatorCreate(ceed, build_qfunc, NULL, NULL, &build_oper); -- if (GridCoefficient *gridCoeff = dynamic_cast(coeff)) -- { -- InitBasisAndRestriction(*gridCoeff->gf.FESpace(), ir, -- nelem, indices, ceed, -- &gridCoeff->basis, -- &gridCoeff->restr); -- CeedOperatorSetField(build_oper, "coeff", gridCoeff->restr, -- gridCoeff->basis, gridCoeff->coeffVector); -- } -- else if (QuadCoefficient *quadCoeff = -- dynamic_cast(coeff)) -+ if (!use_mf) - { -- const int ncomp = quadCoeff->ncomp; -- CeedInt strides[3] = {ncomp, 1, ncomp*nqpts}; -- InitStridedRestriction(*mesh.GetNodalFESpace(), -- nelem, nqpts, ncomp, strides, -- &quadCoeff->restr); -- CeedOperatorSetField(build_oper, "coeff", quadCoeff->restr, -- CEED_BASIS_COLLOCATED, quadCoeff->coeffVector); -+ const int qdatasize = info.qdatasize; -+ InitStridedRestriction(*mesh_fes, nelem, nqpts, qdatasize, -+ CEED_STRIDES_BACKEND, ceed, -+ &qdata_restr); -+ CeedVectorCreate(ceed, nelem * nqpts * qdatasize, &qdata); -+ -+ // Create the QFunction that builds the operator (i.e. computes its -+ // quadrature data) and set its context data. -+ CeedQFunction build_qfunc; -+ std::string qf = GetCeedPath() + info.header + info.build_func; -+ CeedQFunctionCreateInterior(ceed, 1, info.build_qf, qf.c_str(), -+ &build_qfunc); -+ if (coeff) -+ { -+ CeedQFunctionAddInput(build_qfunc, "coeff", coeff->ncomp, coeff->emode); -+ } -+ CeedQFunctionAddInput(build_qfunc, "dx", dim * space_dim, CEED_EVAL_GRAD); -+ CeedQFunctionAddInput(build_qfunc, "weights", 1, CEED_EVAL_WEIGHT); -+ CeedQFunctionAddOutput(build_qfunc, "qdata", qdatasize, CEED_EVAL_NONE); -+ CeedQFunctionSetContext(build_qfunc, apply_ctx); -+ -+ // Create the operator that builds the quadrature data for the operator. -+ CeedOperator build_oper; -+ CeedOperatorCreate(ceed, build_qfunc, NULL, NULL, &build_oper); -+ if (GridCoefficient *grid_coeff = dynamic_cast(coeff)) -+ { -+ const mfem::FiniteElementSpace *coeff_fes = grid_coeff->gf.FESpace(); -+ InitBasis(*coeff_fes, ir, use_bdr, indices, ceed, -+ &grid_coeff->basis); -+ InitRestriction(*coeff_fes, use_bdr, nelem, indices, ceed, -+ &grid_coeff->restr); -+ CeedOperatorSetField(build_oper, "coeff", grid_coeff->restr, -+ grid_coeff->basis, grid_coeff->coeff_vector); -+ } -+ else if (QuadCoefficient *quad_coeff = dynamic_cast(coeff)) -+ { -+ const int ncomp = quad_coeff->ncomp; -+ CeedInt strides[3] = {ncomp, 1, ncomp * nqpts}; -+ InitStridedRestriction(*mesh_fes, nelem, nqpts, ncomp, strides, ceed, -+ &quad_coeff->restr); -+ CeedOperatorSetField(build_oper, "coeff", quad_coeff->restr, -+ CEED_BASIS_COLLOCATED, quad_coeff->coeff_vector); -+ } -+ CeedOperatorSetField(build_oper, "dx", mesh_restr, -+ mesh_basis, CEED_VECTOR_ACTIVE); -+ CeedOperatorSetField(build_oper, "weights", CEED_ELEMRESTRICTION_NONE, -+ mesh_basis, CEED_VECTOR_NONE); -+ CeedOperatorSetField(build_oper, "qdata", qdata_restr, -+ CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); -+ CeedOperatorCheckReady(build_oper); -+ -+ // Compute the quadrature data for the operator. -+ CeedOperatorApply(build_oper, node_coords, qdata, CEED_REQUEST_IMMEDIATE); -+ -+ CeedOperatorDestroy(&build_oper); -+ CeedQFunctionDestroy(&build_qfunc); -+ -+ CeedVectorDestroy(&node_coords); -+ node_coords = nullptr; -+ delete coeff; -+ coeff = nullptr; - } -- CeedOperatorSetField(build_oper, "dx", mesh_restr, -- mesh_basis, CEED_VECTOR_ACTIVE); -- CeedOperatorSetField(build_oper, "weights", CEED_ELEMRESTRICTION_NONE, -- mesh_basis, CEED_VECTOR_NONE); -- CeedOperatorSetField(build_oper, "qdata", restr_i, -- CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); -- -- // Compute the quadrature data for the operator. -- CeedOperatorApply(build_oper, node_coords, qdata, CEED_REQUEST_IMMEDIATE); -- -- // Create the Q-function that defines the action of the operator. -- qf = qf_file + op.apply_func; -- CeedQFunctionCreateInterior(ceed, 1, op.apply_qf, qf.c_str(), -+ -+ // Create the QFunction that defines the action of the operator. -+ std::string qf = GetCeedPath() + info.header + info.apply_func; -+ CeedQFunctionCreateInterior(ceed, 1, info.apply_qf, qf.c_str(), - &apply_qfunc); - // input -- switch (op.trial_op) -+ switch (info.trial_op) - { - case EvalMode::None: - CeedQFunctionAddInput(apply_qfunc, "u", trial_vdim, CEED_EVAL_NONE); - break; - case EvalMode::Interp: -- CeedQFunctionAddInput(apply_qfunc, "u", trial_vdim, CEED_EVAL_INTERP); -+ CeedQFunctionAddInput(apply_qfunc, "u", trial_vdim * (trial_vectorfe ? dim : 1), -+ CEED_EVAL_INTERP); - break; - case EvalMode::Grad: -- CeedQFunctionAddInput(apply_qfunc, "gu", trial_vdim*dim, CEED_EVAL_GRAD); -+ CeedQFunctionAddInput(apply_qfunc, "gu", trial_vdim * dim, CEED_EVAL_GRAD); - break; - case EvalMode::InterpAndGrad: -+ MFEM_VERIFY(!trial_vectorfe, -+ "EvalMode::InterpAndGrad is not intended for vector FE."); - CeedQFunctionAddInput(apply_qfunc, "u", trial_vdim, CEED_EVAL_INTERP); -- CeedQFunctionAddInput(apply_qfunc, "gu", trial_vdim*dim, CEED_EVAL_GRAD); -+ CeedQFunctionAddInput(apply_qfunc, "gu", trial_vdim * dim, CEED_EVAL_GRAD); -+ break; -+ case EvalMode::Div: -+ CeedQFunctionAddInput(apply_qfunc, "du", trial_vdim, CEED_EVAL_DIV); -+ break; -+ case EvalMode::Curl: -+ CeedQFunctionAddInput(apply_qfunc, "cu", trial_vdim * curl_dim, CEED_EVAL_CURL); - break; - } -- // qdata -- CeedQFunctionAddInput(apply_qfunc, "qdata", qdatasize, CEED_EVAL_NONE); -+ if (use_mf) -+ { -+ if (coeff) -+ { -+ // coefficient -+ CeedQFunctionAddInput(apply_qfunc, "coeff", coeff->ncomp, coeff->emode); -+ } -+ CeedQFunctionAddInput(apply_qfunc, "dx", dim * space_dim, CEED_EVAL_GRAD); -+ CeedQFunctionAddInput(apply_qfunc, "weights", 1, CEED_EVAL_WEIGHT); -+ } -+ else -+ { -+ // qdata -+ CeedQFunctionAddInput(apply_qfunc, "qdata", info.qdatasize, CEED_EVAL_NONE); -+ } - // output -- switch (op.test_op) -+ switch (info.test_op) - { - case EvalMode::None: - CeedQFunctionAddOutput(apply_qfunc, "v", test_vdim, CEED_EVAL_NONE); - break; - case EvalMode::Interp: -- CeedQFunctionAddOutput(apply_qfunc, "v", test_vdim, CEED_EVAL_INTERP); -+ CeedQFunctionAddOutput(apply_qfunc, "v", test_vdim * (test_vectorfe ? dim : 1), -+ CEED_EVAL_INTERP); - break; - case EvalMode::Grad: -- CeedQFunctionAddOutput(apply_qfunc, "gv", test_vdim*dim, CEED_EVAL_GRAD); -+ CeedQFunctionAddOutput(apply_qfunc, "gv", test_vdim * dim, CEED_EVAL_GRAD); - break; - case EvalMode::InterpAndGrad: -+ MFEM_VERIFY(!test_vectorfe, -+ "EvalMode::InterpAndGrad is not intended for vector FE."); - CeedQFunctionAddOutput(apply_qfunc, "v", test_vdim, CEED_EVAL_INTERP); -- CeedQFunctionAddOutput(apply_qfunc, "gv", test_vdim*dim, CEED_EVAL_GRAD); -+ CeedQFunctionAddOutput(apply_qfunc, "gv", test_vdim * dim, CEED_EVAL_GRAD); -+ break; -+ case EvalMode::Div: -+ CeedQFunctionAddOutput(apply_qfunc, "dv", test_vdim, CEED_EVAL_DIV); -+ break; -+ case EvalMode::Curl: -+ CeedQFunctionAddOutput(apply_qfunc, "cv", test_vdim * curl_dim, CEED_EVAL_CURL); - break; - } -- CeedQFunctionSetContext(apply_qfunc, build_ctx); -+ CeedQFunctionSetContext(apply_qfunc, apply_ctx); - - // Create the operator. - CeedOperatorCreate(ceed, apply_qfunc, NULL, NULL, &oper); - // input -- switch (op.trial_op) -+ switch (info.trial_op) - { - case EvalMode::None: - CeedOperatorSetField(oper, "u", trial_restr, -@@ -367,12 +414,46 @@ public: - CeedOperatorSetField(oper, "u", trial_restr, trial_basis, CEED_VECTOR_ACTIVE); - CeedOperatorSetField(oper, "gu", trial_restr, trial_basis, CEED_VECTOR_ACTIVE); - break; -+ case EvalMode::Div: -+ CeedOperatorSetField(oper, "du", trial_restr, trial_basis, CEED_VECTOR_ACTIVE); -+ break; -+ case EvalMode::Curl: -+ CeedOperatorSetField(oper, "cu", trial_restr, trial_basis, CEED_VECTOR_ACTIVE); -+ break; -+ } -+ if (use_mf) -+ { -+ // coefficient -+ if (GridCoefficient *grid_coeff = dynamic_cast(coeff)) -+ { -+ const mfem::FiniteElementSpace *coeff_fes = grid_coeff->gf.FESpace(); -+ InitBasis(*coeff_fes, ir, use_bdr, indices, ceed, -+ &grid_coeff->basis); -+ InitRestriction(*coeff_fes, use_bdr, nelem, indices, ceed, -+ &grid_coeff->restr); -+ CeedOperatorSetField(oper, "coeff", grid_coeff->restr, -+ grid_coeff->basis, grid_coeff->coeff_vector); -+ } -+ else if (QuadCoefficient *quad_coeff = dynamic_cast(coeff)) -+ { -+ const int ncomp = quad_coeff->ncomp; -+ CeedInt strides[3] = {ncomp, 1, ncomp * nqpts}; -+ InitStridedRestriction(*mesh_fes, nelem, nqpts, ncomp, strides, ceed, -+ &quad_coeff->restr); -+ CeedOperatorSetField(oper, "coeff", quad_coeff->restr, -+ CEED_BASIS_COLLOCATED, quad_coeff->coeff_vector); -+ } -+ CeedOperatorSetField(oper, "dx", mesh_restr, mesh_basis, node_coords); -+ CeedOperatorSetField(oper, "weights", CEED_ELEMRESTRICTION_NONE, -+ mesh_basis, CEED_VECTOR_NONE); -+ } -+ else -+ { -+ // qdata -+ CeedOperatorSetField(oper, "qdata", qdata_restr, CEED_BASIS_COLLOCATED, qdata); - } -- // qdata -- CeedOperatorSetField(oper, "qdata", restr_i, CEED_BASIS_COLLOCATED, -- qdata); - // output -- switch (op.test_op) -+ switch (info.test_op) - { - case EvalMode::None: - CeedOperatorSetField(oper, "v", test_restr, -@@ -388,385 +469,154 @@ public: - CeedOperatorSetField(oper, "v", test_restr, test_basis, CEED_VECTOR_ACTIVE); - CeedOperatorSetField(oper, "gv", test_restr, test_basis, CEED_VECTOR_ACTIVE); - break; -+ case EvalMode::Div: -+ CeedOperatorSetField(oper, "dv", test_restr, test_basis, CEED_VECTOR_ACTIVE); -+ break; -+ case EvalMode::Curl: -+ CeedOperatorSetField(oper, "cv", test_restr, test_basis, CEED_VECTOR_ACTIVE); -+ break; - } -+ CeedOperatorCheckReady(oper); - -- CeedVectorCreate(ceed, trial_vdim*trial_fes.GetNDofs(), &u); -- CeedVectorCreate(ceed, test_vdim*test_fes.GetNDofs(), &v); -+ CeedVectorCreate(ceed, trial_vdim * trial_fes.GetNDofs(), &u); -+ CeedVectorCreate(ceed, test_vdim * test_fes.GetNDofs(), &v); - } - -- virtual ~PAIntegrator() -+ virtual ~Integrator() - { -- CeedQFunctionDestroy(&build_qfunc); -+ // All basis and restriction objects are destroyed by fes destructor - CeedQFunctionDestroy(&apply_qfunc); -- CeedQFunctionContextDestroy(&build_ctx); -+ CeedQFunctionContextDestroy(&apply_ctx); - CeedVectorDestroy(&node_coords); - CeedVectorDestroy(&qdata); - delete coeff; -- CeedOperatorDestroy(&build_oper); - } -- --private: -- /** This structure contains the data to assemble a partially assembled -- operator with libCEED. */ -- struct PAOperator -- { -- /** The number of quadrature data at each quadrature point. */ -- int qdatasize; -- /** The path to the header containing the functions for libCEED. */ -- std::string header; -- /** The name of the Qfunction to build the quadrature data. */ -- std::string build_func; -- /** The Qfunction to build the quadrature data. */ -- CeedQFunctionUser build_qf; -- /** The name of the Qfunction to apply the operator. */ -- std::string apply_func; -- /** The Qfunction to apply the operator. */ -- CeedQFunctionUser apply_qf; -- /** The evaluation mode to apply to the trial function (CEED_EVAL_INTERP, -- CEED_EVAL_GRAD, etc.) */ -- EvalMode trial_op; -- /** The evaluation mode to apply to the test function ( CEED_EVAL_INTERP, -- CEED_EVAL_GRAD, etc.)*/ -- EvalMode test_op; -- }; - #endif - }; - --/** This class represent a matrix-free operator using libCEED. */ --class MFIntegrator : public ceed::Operator -+/** This class represents a matrix-free or partially assembled discrete linear -+ operator using libCEED. */ -+class Interpolator : public Operator - { - #ifdef MFEM_USE_CEED - protected: -- CeedBasis trial_basis, test_basis, mesh_basis; -- CeedElemRestriction trial_restr, test_restr, mesh_restr, restr_i; -- CeedQFunction apply_qfunc; -- CeedVector node_coords, qdata; -- Coefficient *coeff; -- CeedQFunctionContext build_ctx; -+ CeedBasis basis_ctof; -+ CeedElemRestriction trial_restr, test_restr; -+ CeedQFunction apply_qfunc, apply_qfunc_t; - - public: -- MFIntegrator() -+ Interpolator() - : Operator(), -- trial_basis(nullptr), test_basis(nullptr), mesh_basis(nullptr), -- trial_restr(nullptr), test_restr(nullptr), mesh_restr(nullptr), -- restr_i(nullptr), -- apply_qfunc(nullptr), node_coords(nullptr), -- qdata(nullptr), coeff(nullptr), build_ctx(nullptr) { } -- -- /** @brief This method assembles the `MFIntegrator` with the given -- `CeedOperatorInfo` @a info, an `mfem::FiniteElementSpace` @a fes, an -- `mfem::IntegrationRule` @a ir, and `mfem::Coefficient` or -- `mfem::VectorCoefficient` @a Q. -- The `CeedOperatorInfo` type is expected to inherit from `OperatorInfo`, -- and contain a `Context` type relevant to the qFunctions. -- -- @param[in] info is the structure describing the CeedOperator to assemble. -- @param[in] fes is the finite element space. -- @param[in] ir is the integration rule for the operator. -- @param[in] Q is the coefficient from the `Integrator`. */ -- template -- void Assemble(CeedOperatorInfo &info, -- const mfem::FiniteElementSpace &fes, -- const mfem::IntegrationRule &ir, -- CoeffType *Q) -- { -- Assemble(info, fes, ir, fes.GetNE(), nullptr, Q); -- } -- -- /** @brief This method assembles the `MFIntegrator` with the given -- `CeedOperatorInfo` @a info, an `mfem::FiniteElementSpace` @a fes, an -- `mfem::IntegrationRule` @a ir, and `mfem::Coefficient` or -- `mfem::VectorCoefficient` @a Q for the elements given by the indices -- @a indices. -- The `CeedOperatorInfo` type is expected to inherit from `OperatorInfo`, -- and contain a `Context` type relevant to the qFunctions. -+ basis_ctof(nullptr), -+ trial_restr(nullptr), test_restr(nullptr), -+ apply_qfunc(nullptr), apply_qfunc_t(nullptr) {} - -- @param[in] info is the structure describing the CeedOperator to assemble. -- @param[in] fes is the finite element space. -- @param[in] ir is the integration rule for the operator. -- @param[in] nelem The number of elements. -- @param[in] indices The indices of the elements of same type in the -- `FiniteElementSpace`. If `indices == nullptr`, assumes -- that the `FiniteElementSpace` is not mixed. -- @param[in] Q is the coefficient from the `Integrator`. */ -- template -- void Assemble(CeedOperatorInfo &info, -- const mfem::FiniteElementSpace &fes, -- const mfem::IntegrationRule &ir, -- int nelem, -- const int* indices, -- CoeffType *Q) -- { -- Assemble(info, fes, fes, ir, nelem, indices, Q); -- } -- -- /** This method assembles the MFIntegrator for mixed forms. -+ /** This method assembles the `Interpolator`. - -- @param[in] info the `CeedOperatorInfo` describing the `CeedOperator`, -+ @param[in] info The `CeedOperatorInfo` describing the `CeedOperator`, - the `CeedOperatorInfo` type is expected to inherit from - `OperatorInfo` and contain a `Context` type relevant to -- the qFunctions. -- @param[in] trial_fes the trial `FiniteElementSpace` for the form, -- @param[in] test_fes the test `FiniteElementSpace` for the form, -- @param[in] ir the `IntegrationRule` for the numerical integration, -- @param[in] Q `Coefficient` or `VectorCoefficient`. */ -+ the QFunctions. -+ @param[in] trial_fes The trial `FiniteElementSpace` for the form. -+ @param[in] test_fes The test `FiniteElementSpace` for the form. -+ @param[in] ir Not supported by `Interpolator`. -+ @param[in] Q Not supported by `Interpolator`. -+ @param[in] use_bdr Not supported by `Interpolator`. -+ @param[in] use_mf Controls whether to construct a matrix-free or partially -+ assembled operator. */ - template - void Assemble(CeedOperatorInfo &info, - const mfem::FiniteElementSpace &trial_fes, - const mfem::FiniteElementSpace &test_fes, - const mfem::IntegrationRule &ir, -- CoeffType *Q) -+ CoeffType *Q, -+ const bool use_bdr = false, -+ const bool use_mf = false) - { -- Assemble(info, trial_fes, test_fes, ir, trial_fes.GetNE(), nullptr, Q); -+ Assemble(info, trial_fes, test_fes, ir, -+ use_bdr ? trial_fes.GetNBE() : trial_fes.GetNE(), -+ nullptr, Q, use_bdr, use_mf); - } - -- /** This method assembles the MFIntegrator for mixed forms. -+ /** This method assembles the `Interpolator` on mixed meshes. Its signature -+ matches that for `Integrator`. - -- @param[in] info the `CeedOperatorInfo` describing the `CeedOperator`, -+ @param[in] info The `CeedOperatorInfo` describing the `CeedOperator`, - the `CeedOperatorInfo` type is expected to inherit from - `OperatorInfo` and contain a `Context` type relevant to -- the qFunctions. -- @param[in] trial_fes the trial `FiniteElementSpace` for the form, -- @param[in] test_fes the test `FiniteElementSpace` for the form, -- @param[in] ir the `IntegrationRule` for the numerical integration, -- @param[in] nelem The number of elements, -+ the QFunctions. -+ @param[in] trial_fes The trial `FiniteElementSpace` for the form. -+ @param[in] test_fes The test `FiniteElementSpace` for the form. -+ @param[in] ir Not supported by `Interpolator`. -+ @param[in] nelem The number of elements. - @param[in] indices The indices of the elements of same type in the - `FiniteElementSpace`. If `indices == nullptr`, assumes -- that the `FiniteElementSpace` is not mixed, -- @param[in] Q `Coefficient` or `VectorCoefficient`. */ -+ that the `FiniteElementSpace` is not mixed. -+ @param[in] Q Not supported by `Interpolator`. -+ @param[in] use_bdr Not supported by `Interpolator`. -+ @param[in] use_mf Controls whether to construct a matrix-free or partially -+ assembled operator. */ - template - void Assemble(CeedOperatorInfo &info, - const mfem::FiniteElementSpace &trial_fes, - const mfem::FiniteElementSpace &test_fes, - const mfem::IntegrationRule &ir, - int nelem, -- const int* indices, -- CoeffType *Q) -+ const int *indices, -+ CoeffType *Q, -+ const bool use_bdr = false, -+ const bool use_mf = false) - { - Ceed ceed(internal::ceed); -- Mesh &mesh = *trial_fes.GetMesh(); -- MFEM_VERIFY(!(!indices && mesh.GetNumGeometries(mesh.Dimension()) > 1), -- "Use ceed::MixedIntegrator on mixed meshes."); -- InitCoefficient(Q, mesh, ir, nelem, indices, coeff, info.ctx); -- bool const_coeff = coeff->IsConstant(); -- std::string apply_func = const_coeff ? info.apply_func_mf_const -- : info.apply_func_mf_quad; -- CeedQFunctionUser apply_qf = const_coeff ? info.apply_qf_mf_const -- : info.apply_qf_mf_quad; -- MFOperator op {info.header, -- apply_func, apply_qf, -- info.trial_op, -- info.test_op -- }; -- -- CeedInt dim = mesh.SpaceDimension(); - CeedInt trial_vdim = trial_fes.GetVDim(); - CeedInt test_vdim = test_fes.GetVDim(); -- -- mesh.EnsureNodes(); -- if ( &trial_fes == &test_fes ) -- { -- InitBasisAndRestriction(trial_fes, ir, nelem, indices, ceed, -- &trial_basis, &trial_restr); -- test_basis = trial_basis; -- test_restr = trial_restr; -- } -- else -- { -- InitBasisAndRestriction(trial_fes, ir, nelem, indices, ceed, -- &trial_basis, &trial_restr); -- InitBasisAndRestriction(test_fes, ir, nelem, indices, ceed, -- &test_basis, &test_restr); -- } -- -- const mfem::FiniteElementSpace *mesh_fes = mesh.GetNodalFESpace(); -- MFEM_VERIFY(mesh_fes, "the Mesh has no nodal FE space"); -- InitBasisAndRestriction(*mesh_fes, ir, nelem, indices, ceed, &mesh_basis, -- &mesh_restr); -- -- CeedInt trial_nqpts, test_nqpts; -- CeedBasisGetNumQuadraturePoints(trial_basis, &trial_nqpts); -- CeedBasisGetNumQuadraturePoints(trial_basis, &test_nqpts); -- MFEM_VERIFY(trial_nqpts == test_nqpts, -- "Trial and test basis must have the same number of quadrature" -- " points."); -- CeedInt nqpts = trial_nqpts; -- -- InitVector(*mesh.GetNodes(), node_coords); -- -- // Context data to be passed to the Q-function. -- info.ctx.dim = mesh.Dimension(); -- info.ctx.space_dim = mesh.SpaceDimension(); -- info.ctx.vdim = trial_fes.GetVDim(); -- -- std::string qf_file = GetCeedPath() + op.header; -- std::string qf = qf_file + op.apply_func; -- CeedQFunctionCreateInterior(ceed, 1, op.apply_qf, qf.c_str(), -+ MFEM_VERIFY(!Q, "ceed:Interpolator does not support coefficients."); -+ MFEM_VERIFY(!use_bdr, -+ "ceed:Interpolator does not support boundary interpolators."); -+ MFEM_VERIFY(trial_vdim == 1 && test_vdim == 1, -+ "ceed:Interpolator does not support spaces with vdim > 1."); -+ -+ InitInterpolatorBasis(trial_fes, test_fes, indices, ceed, &basis_ctof); -+ InitInterpolatorRestrictions(trial_fes, test_fes, nelem, indices, ceed, -+ &trial_restr, &test_restr); -+ MFEM_VERIFY(info.trial_op == EvalMode::Interp, -+ "ceed:Interpolator only supports trial_op == Interp."); -+ MFEM_VERIFY(info.test_op == EvalMode::None, -+ "ceed:Interpolator only supports test_op == None."); -+ -+ // Create the QFunction that defines the action of the operator -+ // (only an identity as element dof multiplicity is handled outside of libCEED) -+ CeedQFunctionCreateIdentity(ceed, trial_vdim, CEED_EVAL_INTERP, CEED_EVAL_NONE, - &apply_qfunc); -+ CeedQFunctionCreateIdentity(ceed, trial_vdim, CEED_EVAL_NONE, CEED_EVAL_INTERP, -+ &apply_qfunc_t); - -- // Create the Q-function that builds the operator (i.e. computes its -- // quadrature data) and set its context data. -- if (VariableCoefficient *var_coeff = -- dynamic_cast(coeff)) -- { -- CeedQFunctionAddInput(apply_qfunc, "coeff", coeff->ncomp, -- var_coeff->emode); -- } -- // input -- switch (op.trial_op) -- { -- case EvalMode::None: -- CeedQFunctionAddInput(apply_qfunc, "u", trial_vdim, -- CEED_EVAL_NONE); -- break; -- case EvalMode::Interp: -- CeedQFunctionAddInput(apply_qfunc, "u", trial_vdim, -- CEED_EVAL_INTERP); -- break; -- case EvalMode::Grad: -- CeedQFunctionAddInput(apply_qfunc, "gu", trial_vdim*dim, -- CEED_EVAL_GRAD); -- break; -- case EvalMode::InterpAndGrad: -- CeedQFunctionAddInput(apply_qfunc, "u", trial_vdim, -- CEED_EVAL_INTERP); -- CeedQFunctionAddInput(apply_qfunc, "gu", trial_vdim*dim, -- CEED_EVAL_GRAD); -- break; -- } -- CeedQFunctionAddInput(apply_qfunc, "dx", dim * dim, CEED_EVAL_GRAD); -- CeedQFunctionAddInput(apply_qfunc, "weights", 1, CEED_EVAL_WEIGHT); -- // output -- switch (op.test_op) -- { -- case EvalMode::None: -- CeedQFunctionAddOutput(apply_qfunc, "v", test_vdim, -- CEED_EVAL_NONE); -- break; -- case EvalMode::Interp: -- CeedQFunctionAddOutput(apply_qfunc, "v", test_vdim, -- CEED_EVAL_INTERP); -- break; -- case EvalMode::Grad: -- CeedQFunctionAddOutput(apply_qfunc, "gv", test_vdim*dim, -- CEED_EVAL_GRAD); -- break; -- case EvalMode::InterpAndGrad: -- CeedQFunctionAddOutput(apply_qfunc, "v", test_vdim, -- CEED_EVAL_INTERP); -- CeedQFunctionAddOutput(apply_qfunc, "gv", test_vdim*dim, -- CEED_EVAL_GRAD); -- break; -- } -- -- CeedQFunctionContextCreate(ceed, &build_ctx); -- CeedQFunctionContextSetData(build_ctx, CEED_MEM_HOST, -- CEED_COPY_VALUES, -- sizeof(info.ctx), -- &info.ctx); -- CeedQFunctionSetContext(apply_qfunc, build_ctx); -- -- // Create the operator. -+ // Create the operator - CeedOperatorCreate(ceed, apply_qfunc, NULL, NULL, &oper); -- // coefficient -- if (GridCoefficient *gridCoeff = dynamic_cast(coeff)) -- { -- InitBasisAndRestriction(*gridCoeff->gf.FESpace(), ir, nelem, indices, -- ceed, &gridCoeff->basis, &gridCoeff->restr); -- CeedOperatorSetField(oper, "coeff", gridCoeff->restr, -- gridCoeff->basis, gridCoeff->coeffVector); -- } -- else if (QuadCoefficient *quadCoeff = -- dynamic_cast(coeff)) -- { -- const int ncomp = quadCoeff->ncomp; -- CeedInt strides[3] = {ncomp, 1, ncomp*nqpts}; -- InitStridedRestriction(*mesh.GetNodalFESpace(), -- nelem, nqpts, ncomp, strides, -- &quadCoeff->restr); -- CeedOperatorSetField(oper, "coeff", quadCoeff->restr, -- CEED_BASIS_COLLOCATED, quadCoeff->coeffVector); -- } -- // input -- switch (op.trial_op) -- { -- case EvalMode::None: -- CeedOperatorSetField(oper, "u", trial_restr, -- CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); -- break; -- case EvalMode::Interp: -- CeedOperatorSetField(oper, "u", trial_restr, trial_basis, -- CEED_VECTOR_ACTIVE); -- break; -- case EvalMode::Grad: -- CeedOperatorSetField(oper, "gu", trial_restr, trial_basis, -- CEED_VECTOR_ACTIVE); -- break; -- case EvalMode::InterpAndGrad: -- CeedOperatorSetField(oper, "u", trial_restr, trial_basis, -- CEED_VECTOR_ACTIVE); -- CeedOperatorSetField(oper, "gu", trial_restr, trial_basis, -- CEED_VECTOR_ACTIVE); -- break; -- } -- CeedOperatorSetField(oper, "dx", mesh_restr, -- mesh_basis, node_coords); -- CeedOperatorSetField(oper, "weights", CEED_ELEMRESTRICTION_NONE, -- mesh_basis, CEED_VECTOR_NONE); -- // output -- switch (op.test_op) -- { -- case EvalMode::None: -- CeedOperatorSetField(oper, "v", test_restr, -- CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); -- break; -- case EvalMode::Interp: -- CeedOperatorSetField(oper, "v", test_restr, test_basis, -- CEED_VECTOR_ACTIVE); -- break; -- case EvalMode::Grad: -- CeedOperatorSetField(oper, "gv", test_restr, test_basis, -- CEED_VECTOR_ACTIVE); -- break; -- case EvalMode::InterpAndGrad: -- CeedOperatorSetField(oper, "v", test_restr, test_basis, -- CEED_VECTOR_ACTIVE); -- CeedOperatorSetField(oper, "gv", test_restr, test_basis, -- CEED_VECTOR_ACTIVE); -- break; -- } -- -- CeedVectorCreate(ceed, trial_vdim*trial_fes.GetNDofs(), &u); -- CeedVectorCreate(ceed, test_vdim*test_fes.GetNDofs(), &v); -+ CeedOperatorSetField(oper, "input", trial_restr, basis_ctof, -+ CEED_VECTOR_ACTIVE); -+ CeedOperatorSetField(oper, "output", test_restr, CEED_BASIS_COLLOCATED, -+ CEED_VECTOR_ACTIVE); -+ CeedOperatorCheckReady(oper); -+ -+ // Create the transpose operator -+ CeedOperatorCreate(ceed, apply_qfunc_t, NULL, NULL, &oper_t); -+ CeedOperatorSetField(oper_t, "input", test_restr, CEED_BASIS_COLLOCATED, -+ CEED_VECTOR_ACTIVE); -+ CeedOperatorSetField(oper_t, "output", trial_restr, basis_ctof, -+ CEED_VECTOR_ACTIVE); -+ CeedOperatorCheckReady(oper_t); -+ -+ CeedVectorCreate(ceed, trial_vdim * trial_fes.GetNDofs(), &u); -+ CeedVectorCreate(ceed, test_vdim * test_fes.GetNDofs(), &v); - } - -- virtual ~MFIntegrator() -+ virtual ~Interpolator() - { -+ // All basis and restriction objects are destroyed by fes destructor - CeedQFunctionDestroy(&apply_qfunc); -- CeedQFunctionContextDestroy(&build_ctx); -- CeedVectorDestroy(&node_coords); -- CeedVectorDestroy(&qdata); -- delete coeff; -+ CeedQFunctionDestroy(&apply_qfunc_t); - } -- --private: -- /** This structure contains the data to assemble a matrix-free operator with -- libCEED. */ -- struct MFOperator -- { -- /** The path to the header containing the functions for libCEED. */ -- std::string header; -- /** The name of the Qfunction to apply the operator. */ -- std::string apply_func; -- /** The Qfunction to apply the operator. */ -- CeedQFunctionUser apply_qf; -- /** The evaluation mode to apply to the trial function (CEED_EVAL_INTERP, -- CEED_EVAL_GRAD, etc.) */ -- EvalMode trial_op; -- /** The evaluation mode to apply to the test function ( CEED_EVAL_INTERP, -- CEED_EVAL_GRAD, etc.) */ -- EvalMode test_op; -- }; - #endif - }; - -@@ -774,4 +624,4 @@ private: - - } // namespace mfem - --#endif // MFEM_LIBCEED_INTEG -+#endif // MFEM_LIBCEED_INTEGRATOR -diff --git a/fem/ceed/interface/interface.hpp b/fem/ceed/interface/interface.hpp -index 0a69121ad..8b877188c 100644 ---- a/fem/ceed/interface/interface.hpp -+++ b/fem/ceed/interface/interface.hpp -@@ -14,6 +14,8 @@ - - // Object wrapping a CeedOperator in a mfem::Operator. - #include "operator.hpp" -+// Operator supporting mixed finite element spaces. -+#include "mixed_operator.hpp" - // Functions to initialize CeedBasis objects. - #include "basis.hpp" - // Functions to initialize CeedRestriction objects. -@@ -22,8 +24,6 @@ - #include "coefficient.hpp" - // PA or MF Operator using libCEED. - #include "integrator.hpp" --// PA Operator supporting mixed finite element spaces. --#include "mixed_integrator.hpp" - // Utility functions - #include "util.hpp" - // Wrapper to include -diff --git a/fem/ceed/interface/mixed_integrator.hpp b/fem/ceed/interface/mixed_integrator.hpp -deleted file mode 100644 -index 8d344f4d9..000000000 ---- a/fem/ceed/interface/mixed_integrator.hpp -+++ /dev/null -@@ -1,126 +0,0 @@ --// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced --// at the Lawrence Livermore National Laboratory. All Rights reserved. See files --// LICENSE and NOTICE for details. LLNL-CODE-806117. --// --// This file is part of the MFEM library. For more information and source code --// availability visit https://mfem.org. --// --// MFEM is free software; you can redistribute it and/or modify it under the --// terms of the BSD-3 license. We welcome feedback and contributions, see file --// CONTRIBUTING.md for details. -- --#ifndef MFEM_LIBCEED_MIXED_INTEGRATOR --#define MFEM_LIBCEED_MIXED_INTEGRATOR -- --#include "ceed.hpp" --#include "integrator.hpp" --#include -- --namespace mfem --{ -- --namespace ceed --{ -- --/** @brief This class wraps a `ceed::PAIntegrator` or `ceed::MFIntegrator` to -- support mixed finite element spaces. */ --template --class MixedIntegrator : public ceed::Operator --{ --#ifdef MFEM_USE_CEED -- using ElementKey = std::pair; //< Element::Type, Order > -- struct key_hash -- { -- std::size_t operator()(const ElementKey& k) const -- { -- return k.first + 2 * k.second; -- } -- }; -- using ElementsMap = std::unordered_map; -- std::vector sub_ops; -- --public: -- template -- void Assemble(const Integrator &integ, -- CeedOperatorInfo &info, -- const mfem::FiniteElementSpace &fes, -- CoeffType *Q) -- { -- ElementsMap count; -- ElementsMap element_indices; -- ElementsMap offsets; -- -- // Count the number of elements of each type -- for (int i = 0; i < fes.GetNE(); i++) -- { -- ElementKey key(fes.GetElementType(i), fes.GetElementOrder(i)); -- auto value = count.find(key); -- if (value == count.end()) -- { -- count[key] = new int(1); -- } -- else -- { -- (*value->second)++; -- } -- } -- -- // Initialization of the arrays -- for ( const auto& value : count ) -- { -- element_indices[value.first] = new int[*value.second]; -- offsets[value.first] = new int(0); -- } -- -- // Populates the indices arrays for each element type -- for (int i = 0; i < fes.GetNE(); i++) -- { -- ElementKey key(fes.GetElementType(i), fes.GetElementOrder(i)); -- int &offset = *(offsets[key]); -- int* indices_array = element_indices[key]; -- indices_array[offset] = i; -- offset++; -- } -- -- // Create composite CeedOperator -- CeedCompositeOperatorCreate(internal::ceed, &oper); -- -- // Create each sub-CeedOperator -- sub_ops.reserve(element_indices.size()); -- for (const auto& value : element_indices) -- { -- const int* indices = value.second; -- const int first_index = indices[0]; -- const mfem::FiniteElement &el = *fes.GetFE(first_index); -- auto &T = *fes.GetMesh()->GetElementTransformation(first_index); -- MFEM_ASSERT(!integ.GetIntegrationRule(), -- "Mixed mesh integrators should not have an" -- " IntegrationRule."); -- const IntegrationRule &ir = GetRule(integ, el, el, T); -- auto sub_op = new CeedInteg(); -- int nelem = *count[value.first]; -- sub_op->Assemble(info, fes, ir, nelem, indices, Q); -- sub_ops.push_back(sub_op); -- CeedCompositeOperatorAddSub(oper, sub_op->GetCeedOperator()); -- } -- -- const int ndofs = fes.GetVDim() * fes.GetNDofs(); -- CeedVectorCreate(internal::ceed, ndofs, &u); -- CeedVectorCreate(internal::ceed, ndofs, &v); -- } -- -- virtual ~MixedIntegrator() -- { -- for (auto sub_op : sub_ops) -- { -- delete sub_op; -- } -- } --#endif --}; -- --} // namespace ceed -- --} // namespace mfem -- --#endif // MFEM_LIBCEED_MIXED_INTEGRATOR -diff --git a/fem/ceed/interface/mixed_operator.hpp b/fem/ceed/interface/mixed_operator.hpp -new file mode 100644 -index 000000000..963e367be ---- /dev/null -+++ b/fem/ceed/interface/mixed_operator.hpp -@@ -0,0 +1,204 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#ifndef MFEM_LIBCEED_MIXED_OPERATOR -+#define MFEM_LIBCEED_MIXED_OPERATOR -+ -+#include -+#include -+#include "../../fespace.hpp" -+#include "operator.hpp" -+#include "ceed.hpp" -+#ifdef MFEM_USE_CEED -+#include -+#endif -+ -+namespace mfem -+{ -+ -+namespace ceed -+{ -+ -+/** @brief This class wraps one or more `OpType` objects to support -+ finite element spaces on mixed meshes. */ -+template -+class MixedOperator : public Operator -+{ -+#ifdef MFEM_USE_CEED -+ using ElementKey = -+ std::array; // -+ struct key_hash -+ { -+ std::size_t operator()(const ElementKey &k) const -+ { -+ return CeedHashCombine( -+ CeedHashCombine(CeedHashInt(k[0]), -+ CeedHashInt(k[1])), -+ CeedHashInt(k[2])); -+ } -+ }; -+ using ElementsMap = std::unordered_map; -+ std::vector sub_ops; -+ -+public: -+ template -+ void Assemble(const IntegratorType &integ, -+ CeedOperatorInfo &info, -+ const mfem::FiniteElementSpace &fes, -+ CoeffType *Q, -+ const bool use_bdr = false, -+ const bool use_mf = false) -+ { -+ Assemble(integ, info, fes, fes, Q, use_bdr, use_mf); -+ } -+ -+ template -+ void Assemble(const IntegratorType &integ, -+ CeedOperatorInfo &info, -+ const mfem::FiniteElementSpace &trial_fes, -+ const mfem::FiniteElementSpace &test_fes, -+ CoeffType *Q, -+ const bool use_bdr = false, -+ const bool use_mf = false) -+ { -+ MFEM_VERIFY(trial_fes.GetMesh() == test_fes.GetMesh(), -+ "Trial and test basis must correspond to the same Mesh."); -+ mfem::Mesh &mesh = *trial_fes.GetMesh(); -+ const bool mixed = -+ mesh.GetNumGeometries(mesh.Dimension() - use_bdr) > 1 || -+ trial_fes.IsVariableOrder() || test_fes.IsVariableOrder(); -+ if (!mixed) -+ { -+ const mfem::FiniteElement &trial_fe = use_bdr ? *trial_fes.GetBE(0) : -+ *trial_fes.GetFE(0); -+ const mfem::FiniteElement &test_fe = use_bdr ? *test_fes.GetBE(0) : -+ *test_fes.GetFE(0); -+ auto &T = use_bdr ? *mesh.GetBdrElementTransformation(0) : -+ *mesh.GetElementTransformation(0); -+ const mfem::IntegrationRule &ir = -+ integ.GetIntegrationRule() ? *integ.GetIntegrationRule() : -+ integ.GetRule(trial_fe, test_fe, T); -+ sub_ops.reserve(1); -+ auto *sub_op = new OpType(); -+ sub_op->Assemble(info, trial_fes, test_fes, ir, Q, use_bdr, use_mf); -+ sub_ops.push_back(sub_op); -+ -+ CeedOperatorReferenceCopy(sub_op->GetCeedOperator(), &oper); -+ if (sub_op->GetCeedOperatorTranspose()) -+ { -+ CeedOperatorReferenceCopy(sub_op->GetCeedOperatorTranspose(), &oper_t); -+ } -+ CeedVectorReferenceCopy(sub_op->GetCeedVectorU(), &u); -+ CeedVectorReferenceCopy(sub_op->GetCeedVectorV(), &v); -+ return; -+ } -+ -+ // Count the number of elements of each type -+ ElementsMap count; -+ ElementsMap element_indices; -+ ElementsMap offsets; -+ -+ const int ne = use_bdr ? mesh.GetNBE() : mesh.GetNE(); -+ for (int i = 0; i < ne; i++) -+ { -+ const mfem::FiniteElement &trial_fe = use_bdr ? *trial_fes.GetBE(i) : -+ *trial_fes.GetFE(i); -+ const mfem::FiniteElement &test_fe = use_bdr ? *test_fes.GetBE(i) : -+ *test_fes.GetFE(i); -+ mfem::Element::Type type = use_bdr ? mesh.GetBdrElementType(i) : -+ mesh.GetElementType(i); -+ ElementKey key = {type, trial_fe.GetOrder(), test_fe.GetOrder()}; -+ auto value = count.find(key); -+ if (value == count.end()) -+ { -+ count[key] = new int(1); -+ } -+ else -+ { -+ (*value->second)++; -+ } -+ } -+ -+ // Initialization of the arrays -+ for (const auto &value : count) -+ { -+ element_indices[value.first] = new int[*value.second]; -+ offsets[value.first] = new int(0); -+ } -+ -+ // Populates the indices arrays for each element type -+ for (int i = 0; i < ne; i++) -+ { -+ const mfem::FiniteElement &trial_fe = use_bdr ? *trial_fes.GetBE(i) : -+ *trial_fes.GetFE(i); -+ const mfem::FiniteElement &test_fe = use_bdr ? *test_fes.GetBE(i) : -+ *test_fes.GetFE(i); -+ mfem::Element::Type type = use_bdr ? mesh.GetBdrElementType(i) : -+ mesh.GetElementType(i); -+ ElementKey key = {type, trial_fe.GetOrder(), test_fe.GetOrder()}; -+ int &offset = *(offsets[key]); -+ int *indices_array = element_indices[key]; -+ indices_array[offset] = i; -+ offset++; -+ } -+ -+ // Create composite CeedOperator -+ CeedCompositeOperatorCreate(internal::ceed, &oper); -+ -+ // Create each sub-CeedOperator -+ sub_ops.reserve(element_indices.size()); -+ for (const auto &value : element_indices) -+ { -+ const int *indices = value.second; -+ const int first_index = indices[0]; -+ const mfem::FiniteElement &trial_fe = -+ use_bdr ? *trial_fes.GetBE(first_index) : *trial_fes.GetFE(first_index); -+ const mfem::FiniteElement &test_fe = -+ use_bdr ? *test_fes.GetBE(first_index) : *test_fes.GetFE(first_index); -+ auto &T = use_bdr ? *mesh.GetBdrElementTransformation(first_index) : -+ *mesh.GetElementTransformation(first_index); -+ MFEM_VERIFY(!integ.GetIntegrationRule(), -+ "Mixed mesh integrators should not have an IntegrationRule."); -+ const IntegrationRule &ir = integ.GetRule(trial_fe, test_fe, T); -+ auto *sub_op = new OpType(); -+ sub_op->Assemble(info, trial_fes, test_fes, ir, *count[value.first], indices, Q, -+ use_bdr, use_mf); -+ sub_ops.push_back(sub_op); -+ CeedCompositeOperatorAddSub(oper, sub_op->GetCeedOperator()); -+ if (sub_op->GetCeedOperatorTranspose()) -+ { -+ if (!oper_t) { CeedCompositeOperatorCreate(internal::ceed, &oper_t); } -+ CeedCompositeOperatorAddSub(oper_t, sub_op->GetCeedOperatorTranspose()); -+ } -+ } -+ CeedOperatorCheckReady(oper); -+ if (oper_t) { CeedOperatorCheckReady(oper_t); } -+ -+ CeedVectorCreate(internal::ceed, trial_fes.GetVDim() * trial_fes.GetNDofs(), -+ &u); -+ CeedVectorCreate(internal::ceed, test_fes.GetVDim() * test_fes.GetNDofs(), &v); -+ } -+ -+ virtual ~MixedOperator() -+ { -+ for (auto *sub_op : sub_ops) -+ { -+ delete sub_op; -+ } -+ } -+#endif -+}; -+ -+} // namespace ceed -+ -+} // namespace mfem -+ -+#endif // MFEM_LIBCEED_MIXED_OPERATOR -diff --git a/fem/ceed/interface/operator.cpp b/fem/ceed/interface/operator.cpp -index 745e474e5..7f21e7ce4 100644 ---- a/fem/ceed/interface/operator.cpp -+++ b/fem/ceed/interface/operator.cpp -@@ -11,11 +11,9 @@ - - #include "operator.hpp" - --#include "../../../config/config.hpp" - #include "../../../linalg/vector.hpp" - #include "../../fespace.hpp" - #include "util.hpp" --#include "ceed.hpp" - - namespace mfem - { -@@ -27,6 +25,7 @@ namespace ceed - Operator::Operator(CeedOperator op) - { - oper = op; -+ oper_t = nullptr; - CeedSize in_len, out_len; - int ierr = CeedOperatorGetActiveVectorLengths(oper, &in_len, &out_len); - PCeedChk(ierr); -@@ -39,9 +38,15 @@ Operator::Operator(CeedOperator op) - } - #endif - --void Operator::Mult(const mfem::Vector &x, mfem::Vector &y) const -+namespace - { -+ - #ifdef MFEM_USE_CEED -+void CeedAddMult(CeedOperator oper, CeedVector u, CeedVector v, -+ const mfem::Vector &x, mfem::Vector &y, double a) -+{ -+ MFEM_VERIFY(a == 0.0 || a == 1.0, -+ "General coefficient case is not yet supported!"); - const CeedScalar *x_ptr; - CeedScalar *y_ptr; - CeedMemType mem; -@@ -49,21 +54,37 @@ void Operator::Mult(const mfem::Vector &x, mfem::Vector &y) const - if (Device::Allows(Backend::DEVICE_MASK) && mem == CEED_MEM_DEVICE) - { - x_ptr = x.Read(); -- y_ptr = y.Write(); -+ y_ptr = (a == 0.0) ? y.Write() : y.ReadWrite(); - } - else - { - x_ptr = x.HostRead(); -- y_ptr = y.HostWrite(); -+ y_ptr = (a == 0.0) ? y.HostWrite() : y.HostReadWrite(); - mem = CEED_MEM_HOST; - } - CeedVectorSetArray(u, mem, CEED_USE_POINTER, const_cast(x_ptr)); - CeedVectorSetArray(v, mem, CEED_USE_POINTER, y_ptr); - -- CeedOperatorApply(oper, u, v, CEED_REQUEST_IMMEDIATE); -+ if (a == 0.0) -+ { -+ CeedOperatorApply(oper, u, v, CEED_REQUEST_IMMEDIATE); -+ } -+ else -+ { -+ CeedOperatorApplyAdd(oper, u, v, CEED_REQUEST_IMMEDIATE); -+ } - - CeedVectorTakeArray(u, mem, const_cast(&x_ptr)); - CeedVectorTakeArray(v, mem, &y_ptr); -+} -+#endif -+ -+} // namespace -+ -+void Operator::Mult(const mfem::Vector &x, mfem::Vector &y) const -+{ -+#ifdef MFEM_USE_CEED -+ CeedAddMult(oper, u, v, x, y, 0.0); - #else - MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); - #endif -@@ -73,29 +94,30 @@ void Operator::AddMult(const mfem::Vector &x, mfem::Vector &y, - const double a) const - { - #ifdef MFEM_USE_CEED -- MFEM_VERIFY(a == 1.0, "General coefficient case is not yet supported!"); -- const CeedScalar *x_ptr; -- CeedScalar *y_ptr; -- CeedMemType mem; -- CeedGetPreferredMemType(mfem::internal::ceed, &mem); -- if (Device::Allows(Backend::DEVICE_MASK) && mem == CEED_MEM_DEVICE) -- { -- x_ptr = x.Read(); -- y_ptr = y.ReadWrite(); -- } -- else -- { -- x_ptr = x.HostRead(); -- y_ptr = y.HostReadWrite(); -- mem = CEED_MEM_HOST; -- } -- CeedVectorSetArray(u, mem, CEED_USE_POINTER, const_cast(x_ptr)); -- CeedVectorSetArray(v, mem, CEED_USE_POINTER, y_ptr); -+ CeedAddMult(oper, u, v, x, y, 1.0); -+#else -+ MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); -+#endif -+} - -- CeedOperatorApplyAdd(oper, u, v, CEED_REQUEST_IMMEDIATE); -+void Operator::MultTranspose(const mfem::Vector &x, mfem::Vector &y) const -+{ -+#ifdef MFEM_USE_CEED -+ MFEM_ASSERT(oper_t, -+ "No transpose operator defined for ceed::Operator::MultTranspose."); -+ CeedAddMult(oper_t, v, u, x, y, 0.0); -+#else -+ MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); -+#endif -+} - -- CeedVectorTakeArray(u, mem, const_cast(&x_ptr)); -- CeedVectorTakeArray(v, mem, &y_ptr); -+void Operator::AddMultTranspose(const mfem::Vector &x, mfem::Vector &y, -+ const double a) const -+{ -+#ifdef MFEM_USE_CEED -+ MFEM_ASSERT(oper_t, -+ "No transpose operator defined for ceed::Operator::AddMultTranspose."); -+ CeedAddMult(oper_t, v, u, x, y, 1.0); - #else - MFEM_ABORT("MFEM must be built with MFEM_USE_CEED=YES to use libCEED."); - #endif -diff --git a/fem/ceed/interface/operator.hpp b/fem/ceed/interface/operator.hpp -index 9e4a4faaf..2f131e64d 100644 ---- a/fem/ceed/interface/operator.hpp -+++ b/fem/ceed/interface/operator.hpp -@@ -26,35 +26,40 @@ class Operator : public mfem::Operator - { - protected: - #ifdef MFEM_USE_CEED -- CeedOperator oper; -+ CeedOperator oper, oper_t; - CeedVector u, v; - -- Operator() : oper(nullptr), u(nullptr), v(nullptr) { } -+ Operator() : oper(nullptr), oper_t(nullptr), u(nullptr), v(nullptr) {} - #endif - - public: - #ifdef MFEM_USE_CEED - /// This class takes ownership of op and will delete it - Operator(CeedOperator op); -+ -+ CeedOperator &GetCeedOperator() { return oper; } -+ CeedOperator &GetCeedOperatorTranspose() { return oper_t; } -+ CeedVector &GetCeedVectorU() { return u; } -+ CeedVector &GetCeedVectorV() { return v; } - #endif - - void Mult(const mfem::Vector &x, mfem::Vector &y) const override; - void AddMult(const mfem::Vector &x, mfem::Vector &y, - const double a = 1.0) const override; -+ void MultTranspose(const mfem::Vector &x, mfem::Vector &y) const override; -+ void AddMultTranspose(const mfem::Vector &x, mfem::Vector &y, -+ const double a = 1.0) const override; - void GetDiagonal(mfem::Vector &diag) const; - - virtual ~Operator() - { - #ifdef MFEM_USE_CEED - CeedOperatorDestroy(&oper); -+ CeedOperatorDestroy(&oper_t); - CeedVectorDestroy(&u); - CeedVectorDestroy(&v); - #endif - } -- --#ifdef MFEM_USE_CEED -- CeedOperator& GetCeedOperator() { return oper; } --#endif - }; - - } // namespace ceed -diff --git a/fem/ceed/interface/restriction.cpp b/fem/ceed/interface/restriction.cpp -index e7e8539bd..32a00223f 100644 ---- a/fem/ceed/interface/restriction.cpp -+++ b/fem/ceed/interface/restriction.cpp -@@ -9,8 +9,9 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "../../../fem/gridfunc.hpp" --#include "ceed.hpp" -+#include "restriction.hpp" -+ -+#include "util.hpp" - - namespace mfem - { -@@ -20,222 +21,368 @@ namespace ceed - - #ifdef MFEM_USE_CEED - --static void InitNativeRestr(const mfem::FiniteElementSpace &fes, -- Ceed ceed, CeedElemRestriction *restr) -+enum RestrType {Strided = 0, Lexico, Native, NativeRange}; -+ -+static void InitLexicoRestr(const mfem::FiniteElementSpace &fes, -+ bool use_bdr, -+ int nelem, -+ Ceed ceed, -+ CeedElemRestriction *restr) - { -- const mfem::FiniteElement *fe = fes.GetFE(0); -+ const mfem::FiniteElement *fe = use_bdr ? fes.GetBE(0) : -+ fes.GetFE(0); - const int P = fe->GetDof(); -- CeedInt compstride = fes.GetOrdering()==Ordering::byVDIM ? 1 : fes.GetNDofs(); -- const mfem::Table &el_dof = fes.GetElementToDofTable(); -- mfem::Array tp_el_dof(el_dof.Size_of_connections()); -- const mfem::TensorBasisElement * tfe = -+ const mfem::TensorBasisElement *tfe = - dynamic_cast(fe); -- const int stride = compstride == 1 ? fes.GetVDim() : 1; -- const mfem::Array& dof_map = tfe->GetDofMap(); -+ const mfem::Array &dof_map = tfe->GetDofMap(); -+ CeedInt compstride = -+ (fes.GetOrdering() == Ordering::byVDIM) ? 1 : fes.GetNDofs(); -+ const int stride = (compstride == 1) ? fes.GetVDim() : 1; -+ const mfem::Table &el_dof = use_bdr ? fes.GetBdrElementToDofTable() : -+ fes.GetElementToDofTable(); -+ const int *el_map = el_dof.GetJ(); -+ mfem::Array tp_el_dof(el_dof.Size_of_connections()); -+ mfem::Array tp_el_orients(el_dof.Size_of_connections()); -+ bool use_orients = false; - -- for (int i = 0; i < fes.GetNE(); i++) -+ for (int i = 0; i < nelem; i++) - { -- const int el_offset = P * i; -+ // No need to handle DofTransformation for tensor-product elements - for (int j = 0; j < P; j++) - { -- tp_el_dof[j+el_offset] = stride*el_dof.GetJ()[dof_map[j]+el_offset]; -+ const int sdid = dof_map[j]; // signed -+ const int did = (sdid >= 0) ? sdid : -1 - sdid; -+ const int sgid = el_map[did + P * i]; // signed -+ const int gid = (sgid >= 0) ? sgid : -1 - sgid; -+ tp_el_dof[j + P * i] = stride * gid; -+ tp_el_orients[j + P * i] = -+ (sgid >= 0 && sdid < 0) || (sgid < 0 && sdid >= 0); -+ use_orients = use_orients || tp_el_orients[j + P * i]; - } - } - -- CeedElemRestrictionCreate(ceed, fes.GetNE(), P, fes.GetVDim(), -- compstride, (fes.GetVDim())*(fes.GetNDofs()), -- CEED_MEM_HOST, CEED_COPY_VALUES, -- tp_el_dof.GetData(), restr); -+ if (use_orients) -+ { -+ CeedElemRestrictionCreateOriented(ceed, nelem, P, fes.GetVDim(), -+ compstride, fes.GetVDim() * fes.GetNDofs(), -+ CEED_MEM_HOST, CEED_COPY_VALUES, -+ tp_el_dof.GetData(), tp_el_orients.GetData(), -+ restr); -+ } -+ else -+ { -+ CeedElemRestrictionCreate(ceed, nelem, P, fes.GetVDim(), -+ compstride, fes.GetVDim() * fes.GetNDofs(), -+ CEED_MEM_HOST, CEED_COPY_VALUES, -+ tp_el_dof.GetData(), restr); -+ } - } - --static void InitLexicoRestr(const mfem::FiniteElementSpace &fes, -- Ceed ceed, CeedElemRestriction *restr) -+static void InitNativeRestr(const mfem::FiniteElementSpace &fes, -+ bool use_bdr, -+ int nelem, -+ Ceed ceed, -+ CeedElemRestriction *restr) - { -- const mfem::FiniteElement *fe = fes.GetFE(0); -+ const mfem::FiniteElement *fe = use_bdr ? fes.GetBE(0) : -+ fes.GetFE(0); - const int P = fe->GetDof(); -- CeedInt compstride = fes.GetOrdering()==Ordering::byVDIM ? 1 : fes.GetNDofs(); -- const mfem::Table &el_dof = fes.GetElementToDofTable(); -+ CeedInt compstride = -+ (fes.GetOrdering() == Ordering::byVDIM) ? 1 : fes.GetNDofs(); -+ const int stride = (compstride == 1) ? fes.GetVDim() : 1; -+ const mfem::Table &el_dof = use_bdr ? fes.GetBdrElementToDofTable() : -+ fes.GetElementToDofTable(); -+ const int *el_map = el_dof.GetJ(); - mfem::Array tp_el_dof(el_dof.Size_of_connections()); -- const int stride = compstride == 1 ? fes.GetVDim() : 1; -+ mfem::Array tp_el_orients(el_dof.Size_of_connections()); -+ bool use_orients = false; - -- for (int e = 0; e < fes.GetNE(); e++) -+ for (int i = 0; i < nelem; i++) - { -- for (int i = 0; i < P; i++) -+ // DofTransformation support uses InitNativeRestrWithIndices -+ for (int j = 0; j < P; j++) - { -- tp_el_dof[i + e*P] = stride*el_dof.GetJ()[i + e*P]; -+ const int sgid = el_map[j + P * i]; // signed -+ const int gid = (sgid >= 0) ? sgid : -1 - sgid; -+ tp_el_dof[j + P * i] = stride * gid; -+ tp_el_orients[j + P * i] = (sgid < 0); -+ use_orients = use_orients || tp_el_orients[j + P * i]; - } - } - -- CeedElemRestrictionCreate(ceed, fes.GetNE(), P, fes.GetVDim(), -- compstride, (fes.GetVDim())*(fes.GetNDofs()), -- CEED_MEM_HOST, CEED_COPY_VALUES, -- tp_el_dof.GetData(), restr); --} -- --static void InitRestrictionImpl(const mfem::FiniteElementSpace &fes, -- Ceed ceed, CeedElemRestriction *restr) --{ -- const mfem::FiniteElement *fe = fes.GetFE(0); -- const mfem::TensorBasisElement * tfe = -- dynamic_cast(fe); -- if ( tfe && tfe->GetDofMap().Size()>0 ) // Native ordering using dof_map -+ if (use_orients) - { -- InitNativeRestr(fes, ceed, restr); -+ CeedElemRestrictionCreateOriented(ceed, nelem, P, fes.GetVDim(), -+ compstride, fes.GetVDim() * fes.GetNDofs(), -+ CEED_MEM_HOST, CEED_COPY_VALUES, -+ tp_el_dof.GetData(), tp_el_orients.GetData(), -+ restr); - } -- else // Lexicographic ordering -+ else - { -- InitLexicoRestr(fes, ceed, restr); -+ CeedElemRestrictionCreate(ceed, nelem, P, fes.GetVDim(), -+ compstride, fes.GetVDim() * fes.GetNDofs(), -+ CEED_MEM_HOST, CEED_COPY_VALUES, -+ tp_el_dof.GetData(), restr); - } - } - --static void InitNativeRestrWithIndices( -- const mfem::FiniteElementSpace &fes, -- int nelem, -- const int* indices, -- Ceed ceed, CeedElemRestriction *restr) -+static void InitLexicoRestrWithIndices(const mfem::FiniteElementSpace &fes, -+ bool use_bdr, -+ int nelem, -+ const int *indices, -+ Ceed ceed, -+ CeedElemRestriction *restr) - { -- const mfem::FiniteElement *fe = fes.GetFE(indices[0]); -+ const mfem::FiniteElement *fe = use_bdr ? fes.GetBE(indices[0]) : -+ fes.GetFE(indices[0]); - const int P = fe->GetDof(); -- CeedInt compstride = fes.GetOrdering()==Ordering::byVDIM ? 1 : fes.GetNDofs(); -- mfem::Array tp_el_dof(nelem*P); -- const mfem::TensorBasisElement * tfe = -+ const mfem::TensorBasisElement *tfe = - dynamic_cast(fe); -- Array dofs; -- const int stride = compstride == 1 ? fes.GetVDim() : 1; -- const mfem::Array& dof_map = tfe->GetDofMap(); -+ const mfem::Array &dof_map = tfe->GetDofMap(); -+ CeedInt compstride = -+ (fes.GetOrdering() == Ordering::byVDIM) ? 1 : fes.GetNDofs(); -+ const int stride = (compstride == 1) ? fes.GetVDim() : 1; -+ mfem::Array tp_el_dof(nelem * P), dofs; -+ mfem::Array tp_el_orients(nelem * P); -+ bool use_orients = false; - - for (int i = 0; i < nelem; i++) - { -+ // No need to handle DofTransformation for tensor-product elements - const int elem_index = indices[i]; -- fes.GetElementDofs(elem_index, dofs); -- const int el_offset = P * i; -- for (int j = 0; j < P; j++) -+ mfem::DofTransformation *dof_trans; -+ if (use_bdr) - { -- tp_el_dof[j + el_offset] = stride*dofs[dof_map[j]]; -+ dof_trans = fes.GetBdrElementDofs(elem_index, dofs); - } -- } -- -- CeedElemRestrictionCreate(ceed, nelem, P, fes.GetVDim(), -- compstride, (fes.GetVDim())*(fes.GetNDofs()), -- CEED_MEM_HOST, CEED_COPY_VALUES, -- tp_el_dof.GetData(), restr); --} -- --static void InitLexicoRestrWithIndices( -- const mfem::FiniteElementSpace &fes, -- int nelem, -- const int* indices, -- Ceed ceed, CeedElemRestriction *restr) --{ -- const mfem::FiniteElement *fe = fes.GetFE(indices[0]); -- const int P = fe->GetDof(); -- CeedInt compstride = fes.GetOrdering()==Ordering::byVDIM ? 1 : fes.GetNDofs(); -- mfem::Array tp_el_dof(nelem*P); -- Array dofs; -- const int stride = compstride == 1 ? fes.GetVDim() : 1; -- -- for (int i = 0; i < nelem; i++) -- { -- const int elem_index = indices[i]; -- fes.GetElementDofs(elem_index, dofs); -- const int el_offset = P * i; -+ else -+ { -+ dof_trans = fes.GetElementDofs(elem_index, dofs); -+ } -+ MFEM_VERIFY(!dof_trans, -+ "Unexpected DofTransformation for lexicographic element " -+ "restriction."); - for (int j = 0; j < P; j++) - { -- tp_el_dof[j + el_offset] = stride*dofs[j]; -+ const int sdid = dof_map[j]; // signed -+ const int did = (sdid >= 0) ? sdid : -1 - sdid; -+ const int sgid = dofs[did]; // signed -+ const int gid = (sgid >= 0) ? sgid : -1 - sgid; -+ tp_el_dof[j + P * i] = stride * gid; -+ tp_el_orients[j + P * i] = -+ (sgid >= 0 && sdid < 0) || (sgid < 0 && sdid >= 0); -+ use_orients = use_orients || tp_el_orients[j + P * i]; - } - } - -- CeedElemRestrictionCreate(ceed, nelem, P, fes.GetVDim(), -- compstride, (fes.GetVDim())*(fes.GetNDofs()), -- CEED_MEM_HOST, CEED_COPY_VALUES, -- tp_el_dof.GetData(), restr); -+ if (use_orients) -+ { -+ CeedElemRestrictionCreateOriented(ceed, nelem, P, fes.GetVDim(), -+ compstride, fes.GetVDim() * fes.GetNDofs(), -+ CEED_MEM_HOST, CEED_COPY_VALUES, -+ tp_el_dof.GetData(), tp_el_orients.GetData(), -+ restr); -+ } -+ else -+ { -+ CeedElemRestrictionCreate(ceed, nelem, P, fes.GetVDim(), -+ compstride, fes.GetVDim() * fes.GetNDofs(), -+ CEED_MEM_HOST, CEED_COPY_VALUES, -+ tp_el_dof.GetData(), restr); -+ } - } - --static void InitRestrictionWithIndicesImpl( -- const mfem::FiniteElementSpace &fes, -- int nelem, -- const int* indices, -- Ceed ceed, CeedElemRestriction *restr) -+static void InitNativeRestrWithIndices(const mfem::FiniteElementSpace &fes, -+ bool use_bdr, -+ bool is_interp_range, -+ int nelem, -+ const int *indices, -+ Ceed ceed, -+ CeedElemRestriction *restr) - { -- const mfem::FiniteElement *fe = fes.GetFE(indices[0]); -- const mfem::TensorBasisElement * tfe = -- dynamic_cast(fe); -- if ( tfe && tfe->GetDofMap().Size()>0 ) // Native ordering using dof_map -+ const int i0 = indices ? indices[0] : 0; -+ const mfem::FiniteElement *fe = use_bdr ? fes.GetBE(i0) : fes.GetFE(i0); -+ const int P = fe->GetDof(); -+ CeedInt compstride = -+ (fes.GetOrdering() == Ordering::byVDIM) ? 1 : fes.GetNDofs(); -+ const int stride = (compstride == 1) ? fes.GetVDim() : 1; -+ mfem::Array tp_el_dof(nelem * P), dofs; -+ mfem::Array tp_el_orients; -+ mfem::Array tp_el_curl_orients; -+ mfem::Vector el_trans_j; -+ mfem::DofTransformation *dof_trans = use_bdr ? fes.GetBdrElementDofs(i0, dofs) : -+ fes.GetElementDofs(i0, dofs); -+ if (!dof_trans || dof_trans->IsEmpty()) - { -- InitNativeRestrWithIndices(fes, nelem, indices, ceed, restr); -+ tp_el_orients.SetSize(nelem * P); - } -- else // Lexicographic ordering -+ else - { -- InitLexicoRestrWithIndices(fes, nelem, indices, ceed, restr); -+ tp_el_curl_orients.SetSize(nelem * P * 3, 0.0); -+ el_trans_j.SetSize(P); - } --} - --static void InitCoeffRestrictionWithIndicesImpl( -- const mfem::FiniteElementSpace &fes, -- int nelem, -- const int* indices, -- int nquads, -- int ncomp, -- Ceed ceed, -- CeedElemRestriction *restr) --{ -- mfem::Array tp_el_dof(nelem*nquads); -- const int stride_quad = ncomp; -- const int stride_elem = ncomp*nquads; -- // TODO generalize to support different #quads - for (int i = 0; i < nelem; i++) - { -- const int elem_index = indices[i]; -- const int el_offset = elem_index * stride_elem; -- for (int j = 0; j < nquads; j++) -+ const int elem_index = indices ? indices[i] : i; -+ if (use_bdr) -+ { -+ dof_trans = fes.GetBdrElementDofs(elem_index, dofs); -+ } -+ else - { -- tp_el_dof[j + nquads * i] = j * stride_quad + el_offset; -+ dof_trans = fes.GetElementDofs(elem_index, dofs); -+ } -+ if (!dof_trans || dof_trans->IsEmpty()) -+ { -+ for (int j = 0; j < P; j++) -+ { -+ const int sgid = dofs[j]; // signed -+ const int gid = (sgid >= 0) ? sgid : -1 - sgid; -+ tp_el_dof[j + P * i] = stride * gid; -+ tp_el_orients[j + P * i] = (sgid < 0); -+ } -+ } -+ else -+ { -+ for (int j = 0; j < P; j++) -+ { -+ const int sgid = dofs[j]; // signed -+ const int gid = (sgid >= 0) ? sgid : -1 - sgid; -+ tp_el_dof[j + P * i] = stride * gid; -+ -+ // Fill column j of element tridiagonal matrix tp_el_curl_orients -+ el_trans_j = 0.0; -+ el_trans_j(j) = 1.0; -+ if (is_interp_range) -+ { -+ dof_trans->InvTransformDual(el_trans_j); -+ } -+ else -+ { -+ dof_trans->InvTransformPrimal(el_trans_j); -+ } -+ el_trans_j *= (sgid < 0) ? -1.0 : 1.0; -+ tp_el_curl_orients[3 * (j + 0 + P * i) + 1] = el_trans_j(j + 0); -+ if (j > 0) -+ { -+ tp_el_curl_orients[3 * (j - 1 + P * i) + 2] = el_trans_j(j - 1); -+ } -+ if (j < P - 1) -+ { -+ tp_el_curl_orients[3 * (j + 1 + P * i) + 0] = el_trans_j(j + 1); -+ } -+#ifdef MFEM_DEBUG -+ int nnz = 0; -+ for (int k = 0; k < P; k++) -+ { -+ if (k < j - 1 && k > j + 1 && el_trans_j(k) != 0.0) { nnz++; } -+ } -+ MFEM_ASSERT(nnz == 0, -+ "Element transformation matrix is not tridiagonal at column " -+ << j << " (nnz = " << nnz << ")!"); -+#endif -+ } - } - } -- CeedElemRestrictionCreate(ceed, nelem, nquads, ncomp, 1, -- ncomp*fes.GetNE()*nquads, -- CEED_MEM_HOST, CEED_COPY_VALUES, -- tp_el_dof.GetData(), restr); --} - --void InitStridedRestriction(const mfem::FiniteElementSpace &fes, -- CeedInt nelem, CeedInt nqpts, CeedInt qdatasize, -- const CeedInt *strides, -- CeedElemRestriction *restr) --{ -- RestrKey restr_key(&fes, nelem, nqpts, qdatasize, restr_type::Strided); -- auto restr_itr = mfem::internal::ceed_restr_map.find(restr_key); -- if (restr_itr == mfem::internal::ceed_restr_map.end()) -+ if (tp_el_curl_orients.Size()) - { -- CeedElemRestrictionCreateStrided(mfem::internal::ceed, nelem, nqpts, qdatasize, -- nelem*nqpts*qdatasize, -- strides, -- restr); -- // Will be automatically destroyed when @a fes gets destroyed. -- mfem::internal::ceed_restr_map[restr_key] = *restr; -+ CeedElemRestrictionCreateCurlOriented(ceed, nelem, P, fes.GetVDim(), -+ compstride, fes.GetVDim() * fes.GetNDofs(), -+ CEED_MEM_HOST, CEED_COPY_VALUES, -+ tp_el_dof.GetData(), tp_el_curl_orients.GetData(), -+ restr); -+ } -+ else if (tp_el_orients.Size()) -+ { -+ CeedElemRestrictionCreateOriented(ceed, nelem, P, fes.GetVDim(), -+ compstride, fes.GetVDim() * fes.GetNDofs(), -+ CEED_MEM_HOST, CEED_COPY_VALUES, -+ tp_el_dof.GetData(), tp_el_orients.GetData(), -+ restr); - } - else - { -- *restr = restr_itr->second; -+ CeedElemRestrictionCreate(ceed, nelem, P, fes.GetVDim(), -+ compstride, fes.GetVDim() * fes.GetNDofs(), -+ CEED_MEM_HOST, CEED_COPY_VALUES, -+ tp_el_dof.GetData(), restr); - } - } - - void InitRestriction(const FiniteElementSpace &fes, -+ bool use_bdr, -+ int nelem, -+ const int *indices, - Ceed ceed, - CeedElemRestriction *restr) - { -- // Check for FES -> basis, restriction in hash tables -- const mfem::FiniteElement *fe = fes.GetFE(0); -+ // Check for fes -> restriction in hash table -+ // {-1, -1, -1} is unique from CEED_STRIDES_BACKEND for strided restrictions -+ const mfem::FiniteElement *fe; -+ if (indices) -+ { -+ fe = use_bdr ? fes.GetBE(indices[0]) : fes.GetFE(indices[0]); -+ } -+ else -+ { -+ fe = use_bdr ? fes.GetBE(0) : fes.GetFE(0); -+ } - const int P = fe->GetDof(); -- const int nelem = fes.GetNE(); - const int ncomp = fes.GetVDim(); -- RestrKey restr_key(&fes, nelem, P, ncomp, restr_type::Standard); -+ const mfem::TensorBasisElement *tfe = -+ dynamic_cast(fe); -+ const bool vector = fe->GetRangeType() == mfem::FiniteElement::VECTOR; -+ const RestrType type = (tfe && tfe->GetDofMap().Size() > 0 && !vector) ? -+ RestrType::Lexico : RestrType::Native; -+ RestrKey restr_key(&fes, {nelem, P, ncomp}, {-1, -1, -1}, type); - auto restr_itr = mfem::internal::ceed_restr_map.find(restr_key); - - // Init or retrieve key values - if (restr_itr == mfem::internal::ceed_restr_map.end()) - { -- InitRestrictionImpl(fes, ceed, restr); -+ if (indices) -+ { -+ if (type == RestrType::Lexico) -+ { -+ // Lexicographic ordering using dof_map -+ InitLexicoRestrWithIndices(fes, use_bdr, nelem, indices, -+ ceed, restr); -+ } -+ else -+ { -+ // Native ordering -+ InitNativeRestrWithIndices(fes, use_bdr, false, nelem, indices, -+ ceed, restr); -+ } -+ } -+ else -+ { -+ mfem::Array dofs; -+ mfem::DofTransformation *dof_trans = use_bdr ? fes.GetBdrElementDofs(0, dofs) : -+ fes.GetElementDofs(0, dofs); -+ if (type == RestrType::Lexico) -+ { -+ // Lexicographic ordering using dof_map -+ MFEM_VERIFY(!dof_trans, -+ "Unexpected DofTransformation for lexicographic element " -+ "restriction."); -+ InitLexicoRestr(fes, use_bdr, nelem, ceed, restr); -+ } -+ else if (!dof_trans || dof_trans->IsEmpty()) -+ { -+ // Native ordering without dof_trans -+ InitNativeRestr(fes, use_bdr, nelem, ceed, restr); -+ } -+ else -+ { -+ // Native ordering with dof_trans -+ InitNativeRestrWithIndices(fes, use_bdr, false, nelem, nullptr, -+ ceed, restr); -+ } -+ } - mfem::internal::ceed_restr_map[restr_key] = *restr; - } - else -@@ -244,48 +391,116 @@ void InitRestriction(const FiniteElementSpace &fes, - } - } - --void InitRestrictionWithIndices(const FiniteElementSpace &fes, -- int nelem, -- const int* indices, -- Ceed ceed, -- CeedElemRestriction *restr) -+void InitInterpolatorRestrictions(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes, -+ int nelem, -+ const int *indices, -+ Ceed ceed, -+ CeedElemRestriction *trial_restr, -+ CeedElemRestriction *test_restr) - { -- // Check for FES -> basis, restriction in hash tables -- const mfem::FiniteElement *fe = fes.GetFE(indices[0]); -- const int P = fe->GetDof(); -- const int ncomp = fes.GetVDim(); -- RestrKey restr_key(&fes, nelem, P, ncomp, restr_type::Standard); -- auto restr_itr = mfem::internal::ceed_restr_map.find(restr_key); -- -- // Init or retrieve key values -- if (restr_itr == mfem::internal::ceed_restr_map.end()) -+ // Check for fes -> restriction in hash table -+ // {-1, -1, -1} is unique from CEED_STRIDES_BACKEND for strided restrictions -+ const mfem::FiniteElement *trial_fe, *test_fe; -+ if (indices) - { -- InitRestrictionWithIndicesImpl(fes, nelem, indices, ceed, restr); -- mfem::internal::ceed_restr_map[restr_key] = *restr; -+ trial_fe = trial_fes.GetFE(indices[0]); -+ test_fe = test_fes.GetFE(indices[0]); - } - else - { -- *restr = restr_itr->second; -+ trial_fe = trial_fes.GetFE(0); -+ test_fe = test_fes.GetFE(0); -+ } -+ for (int s = 0; s < 2; s++) -+ { -+ // The restriction for the test space is slightly different as the output -+ // is a primal vector instead of a dual vector, and lexicographic ordering -+ // is never used (no use of tensor-product basis) -+ CeedElemRestriction *restr = (s == 0) ? trial_restr : test_restr; -+ const FiniteElementSpace &fes = (s == 0) ? trial_fes : test_fes; -+ const mfem::FiniteElement *fe = (s == 0) ? trial_fe : test_fe; -+ const int P = fe->GetDof(); -+ const int ncomp = fes.GetVDim(); -+ mfem::Array dofs; -+ mfem::DofTransformation *dof_trans = -+ indices ? fes.GetElementDofs(indices[0], dofs) : fes.GetElementDofs(0, dofs); -+ const RestrType type = (dof_trans && s > 0) ? RestrType::NativeRange : -+ RestrType::Native; -+ RestrKey restr_key(&fes, {nelem, P, ncomp}, {-1, -1, -1}, type); -+ auto restr_itr = mfem::internal::ceed_restr_map.find(restr_key); -+ -+ // Init or retrieve key values -+ if (restr_itr == mfem::internal::ceed_restr_map.end()) -+ { -+ if (indices) -+ { -+ if (type == RestrType::Lexico) -+ { -+ // Lexicographic ordering using dof_map -+ MFEM_VERIFY(!dof_trans, -+ "Unexpected DofTransformation for lexicographic element " -+ "restriction."); -+ InitLexicoRestrWithIndices(fes, false, nelem, indices, -+ ceed, restr); -+ } -+ else -+ { -+ // Native ordering -+ InitNativeRestrWithIndices(fes, false, (s > 0), nelem, indices, -+ ceed, restr); -+ } -+ } -+ else -+ { -+ if (type == RestrType::Lexico) -+ { -+ // Lexicographic ordering using dof_map -+ MFEM_VERIFY(!dof_trans, -+ "Unexpected DofTransformation for lexicographic element " -+ "restriction."); -+ InitLexicoRestr(fes, false, nelem, ceed, restr); -+ } -+ else if (!dof_trans || dof_trans->IsEmpty()) -+ { -+ // Native ordering without dof_trans -+ InitNativeRestr(fes, false, nelem, ceed, restr); -+ } -+ else -+ { -+ // Native ordering with dof_trans -+ InitNativeRestrWithIndices(fes, false, (s > 0), nelem, nullptr, -+ ceed, restr); -+ } -+ } -+ mfem::internal::ceed_restr_map[restr_key] = *restr; -+ } -+ else -+ { -+ *restr = restr_itr->second; -+ } - } - } - --void InitCoeffRestrictionWithIndices(const FiniteElementSpace &fes, -- int nelem, -- const int* indices, -- int nquads, -- int ncomp, -- Ceed ceed, -- CeedElemRestriction *restr) -+void InitStridedRestriction(const mfem::FiniteElementSpace &fes, -+ CeedInt nelem, -+ CeedInt nqpts, -+ CeedInt qdatasize, -+ const CeedInt strides[3], -+ Ceed ceed, -+ CeedElemRestriction *restr) - { -- // Check for FES -> basis, restriction in hash tables -- RestrKey restr_key(&fes, nelem, nquads, ncomp, restr_type::Coeff); -+ // Check for fes -> restriction in hash table -+ RestrKey restr_key(&fes, {nelem, nqpts, qdatasize}, -+ {strides[0], strides[1], strides[2]}, RestrType::Strided); - auto restr_itr = mfem::internal::ceed_restr_map.find(restr_key); - - // Init or retrieve key values - if (restr_itr == mfem::internal::ceed_restr_map.end()) - { -- InitCoeffRestrictionWithIndicesImpl(fes, nelem, indices, nquads, ncomp, -- ceed, restr); -+ CeedElemRestrictionCreateStrided(ceed, nelem, nqpts, qdatasize, -+ nelem * nqpts * qdatasize, strides, -+ restr); - mfem::internal::ceed_restr_map[restr_key] = *restr; - } - else -diff --git a/fem/ceed/interface/restriction.hpp b/fem/ceed/interface/restriction.hpp -index 221716b39..612754dc7 100644 ---- a/fem/ceed/interface/restriction.hpp -+++ b/fem/ceed/interface/restriction.hpp -@@ -12,6 +12,7 @@ - #ifndef MFEM_LIBCEED_RESTR - #define MFEM_LIBCEED_RESTR - -+#include "../../fespace.hpp" - #include "ceed.hpp" - - namespace mfem -@@ -21,65 +22,89 @@ namespace ceed - { - - #ifdef MFEM_USE_CEED --/** @brief Initialize a CeedElemRestriction for non-mixed meshes. - -- @param[in] fes Input finite element space. -- @param[in] ceed Input Ceed object. -- @param[out] restr The address of the initialized CeedElemRestriction object. --*/ -+/** @brief Initialize a CeedElemRestriction based on an -+ mfem::FiniteElementSpace @a fes and an optional list of @a nelem elements -+ of indices @a indices. -+ -+ @param[in] fes The finite element space. -+ @param[in] use_bdr Create the basis and restriction for boundary elements. -+ @param[in] nelem The number of elements. -+ @param[in] indices The indices of the elements of same type in the -+ `FiniteElementSpace`. If `indices == nullptr`, assumes -+ that the `FiniteElementSpace` is not mixed. -+ @param[in] ceed The Ceed object. -+ @param[out] restr The `CeedElemRestriction` to initialize. */ - void InitRestriction(const FiniteElementSpace &fes, -+ bool use_bdr, -+ int nelem, -+ const int *indices, - Ceed ceed, - CeedElemRestriction *restr); - --/** @brief Initialize a CeedElemRestriction for mixed meshes. -+inline void InitRestriction(const FiniteElementSpace &fes, -+ bool use_bdr, -+ Ceed ceed, -+ CeedElemRestriction *restr) -+{ -+ InitRestriction(fes, use_bdr, use_bdr ? fes.GetNBE() : fes.GetNE(), -+ nullptr, ceed, restr); -+} - -- @param[in] fes The finite element space. -- @param[in] ceed The Ceed object. -+/** @brief Initialize a pair of CeedElemRestriction objects based on a -+ mfem::FiniteElementSpace @a trial_fes and @a test_fes, and an optional list -+ of @a nelem elements of indices @a indices. -+ -+ @param[in] trial_fes The trial finite element space. -+ @param[in] test_fes The test finite element space. - @param[in] nelem The number of elements. - @param[in] indices The indices of the elements of same type in the -- `FiniteElementSpace`. -- @param[out] restr The `CeedElemRestriction` to initialize. */ --void InitRestrictionWithIndices(const FiniteElementSpace &fes, -- int nelem, -- const int* indices, -- Ceed ceed, -- CeedElemRestriction *restr); -+ `FiniteElementSpace`. If `indices == nullptr`, assumes -+ that the `FiniteElementSpace` is not mixed. -+ @param[in] ceed The Ceed object. -+ @param[out] trial_restr The `CeedElemRestriction` to initialize for the -+ trial space. -+ @param[out] test_restr The `CeedElemRestriction` to initialize for the -+ test space. */ -+void InitInterpolatorRestrictions(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes, -+ int nelem, -+ const int *indices, -+ Ceed ceed, -+ CeedElemRestriction *trial_restr, -+ CeedElemRestriction *test_restr); -+ -+inline void InitInterpolatorRestrictions(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes, -+ Ceed ceed, -+ CeedElemRestriction *trial_restr, -+ CeedElemRestriction *test_restr) -+{ -+ InitInterpolatorRestrictions(trial_fes, test_fes, trial_fes.GetNE(), -+ nullptr, ceed, trial_restr, test_restr); -+} - --/** @brief Initialize a strided CeedElemRestriction -+/** @brief Initialize a strided CeedElemRestriction. - - @param[in] fes Input finite element space. - @param[in] nelem is the number of elements. - @param[in] nqpts is the total number of quadrature points. - @param[in] qdatasize is the number of data per quadrature point. - @param[in] strides Array for strides between [nodes, components, elements]. -- Data for node i, component j, element k can be found in the L-vector at -- index i*strides[0] + j*strides[1] + k*strides[2]. CEED_STRIDES_BACKEND may -- be used with vectors created by a Ceed backend. -+ Data for node i, component j, element k can be found in -+ the L-vector at index i*strides[0] + j*strides[1] + -+ k*strides[2]. CEED_STRIDES_BACKEND may be used with -+ vectors created by a Ceed backend. -+ @param[in] ceed The Ceed object. - @param[out] restr The `CeedElemRestriction` to initialize. */ - void InitStridedRestriction(const mfem::FiniteElementSpace &fes, -- CeedInt nelem, CeedInt nqpts, CeedInt qdatasize, -- const CeedInt *strides, -+ CeedInt nelem, -+ CeedInt nqpts, -+ CeedInt qdatasize, -+ const CeedInt strides[3], -+ Ceed ceed, - CeedElemRestriction *restr); - --/** @brief Initialize a CeedElemRestriction for a mfem::Coefficient on a mixed -- mesh. -- -- @param[in] fes The finite element space. -- @param[in] nelem is the number of elements. -- @param[in] indices The indices of the elements of same type in the -- `FiniteElementSpace`. -- @param[in] nquads is the total number of quadrature points -- @param[in] ncomp is the number of data per quadrature point -- @param[in] ceed The Ceed object. -- @param[out] restr The `CeedElemRestriction` to initialize. */ --void InitCoeffRestrictionWithIndices(const FiniteElementSpace &fes, -- int nelem, -- const int* indices, -- int nquads, -- int ncomp, -- Ceed ceed, -- CeedElemRestriction *restr); -- - #endif - - } // namespace ceed -diff --git a/fem/ceed/interface/util.cpp b/fem/ceed/interface/util.cpp -index b65fd2197..4eecc7841 100644 ---- a/fem/ceed/interface/util.cpp -+++ b/fem/ceed/interface/util.cpp -@@ -9,14 +9,11 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - -+#include "util.hpp" -+ - #include "../../../general/device.hpp" --#include "../../../fem/gridfunc.hpp" - #include "../../../linalg/dtensor.hpp" -- --#include "basis.hpp" --#include "restriction.hpp" --#include "ceed.hpp" -- -+#include "../../gridfunc.hpp" - #include - #include - #if !defined(_WIN32) || !defined(_MSC_VER) -@@ -44,7 +41,7 @@ void RemoveBasisAndRestriction(const mfem::FiniteElementSpace *fes) - auto itb = mfem::internal::ceed_basis_map.begin(); - while (itb != mfem::internal::ceed_basis_map.end()) - { -- if (std::get<0>(itb->first)==fes) -+ if (std::get<0>(itb->first) == fes) - { - CeedBasisDestroy(&itb->second); - itb = mfem::internal::ceed_basis_map.erase(itb); -@@ -57,7 +54,7 @@ void RemoveBasisAndRestriction(const mfem::FiniteElementSpace *fes) - auto itr = mfem::internal::ceed_restr_map.begin(); - while (itr != mfem::internal::ceed_restr_map.end()) - { -- if (std::get<0>(itr->first)==fes) -+ if (std::get<0>(itr->first) == fes) - { - CeedElemRestrictionDestroy(&itr->second); - itr = mfem::internal::ceed_restr_map.erase(itr); -@@ -78,78 +75,41 @@ void InitVector(const mfem::Vector &v, CeedVector &cv) - CeedScalar *cv_ptr; - CeedMemType mem; - CeedGetPreferredMemType(mfem::internal::ceed, &mem); -- if ( Device::Allows(Backend::DEVICE_MASK) && mem==CEED_MEM_DEVICE ) -+ if (Device::Allows(Backend::DEVICE_MASK) && mem == CEED_MEM_DEVICE) - { -- cv_ptr = const_cast(v.Read()); -+ cv_ptr = const_cast(v.Read()); - } - else - { -- cv_ptr = const_cast(v.HostRead()); -+ cv_ptr = const_cast(v.HostRead()); - mem = CEED_MEM_HOST; - } - CeedVectorSetArray(cv, mem, CEED_USE_POINTER, cv_ptr); - } - --void InitBasisAndRestriction(const FiniteElementSpace &fes, -- const IntegrationRule &irm, -- Ceed ceed, CeedBasis *basis, -- CeedElemRestriction *restr) --{ -- InitBasis(fes, irm, ceed, basis); -- InitRestriction(fes, ceed, restr); --} -- --void InitBasisAndRestrictionWithIndices(const FiniteElementSpace &fes, -- const IntegrationRule &irm, -- int nelem, -- const int* indices, -- Ceed ceed, CeedBasis *basis, -- CeedElemRestriction *restr) --{ -- InitBasisWithIndices(fes, irm, nelem, indices, ceed, basis); -- InitRestrictionWithIndices(fes, nelem, indices, ceed, restr); --} -- --void InitBasisAndRestriction(const FiniteElementSpace &fes, -- const IntegrationRule &irm, -- int nelem, -- const int* indices, -- Ceed ceed, CeedBasis *basis, -- CeedElemRestriction *restr) --{ -- if (indices) -- { -- InitBasisAndRestrictionWithIndices(fes,irm,nelem,indices,ceed,basis,restr); -- } -- else -- { -- InitBasisAndRestriction(fes,irm,ceed,basis,restr); -- } --} -- - // Assumes a tensor-product operator with one active field - int CeedOperatorGetActiveField(CeedOperator oper, CeedOperatorField *field) - { - int ierr; - Ceed ceed; -- ierr = CeedOperatorGetCeed(oper, &ceed); CeedChk(ierr); -+ ierr = CeedOperatorGetCeed(oper, &ceed); PCeedChk(ierr); - - CeedQFunction qf; - bool isComposite; -- ierr = CeedOperatorIsComposite(oper, &isComposite); CeedChk(ierr); -+ ierr = CeedOperatorIsComposite(oper, &isComposite); PCeedChk(ierr); - CeedOperator *subops; - if (isComposite) - { - #if CEED_VERSION_GE(0, 10, 2) -- ierr = CeedCompositeOperatorGetSubList(oper, &subops); CeedChk(ierr); -+ ierr = CeedCompositeOperatorGetSubList(oper, &subops); PCeedChk(ierr); - #else -- ierr = CeedOperatorGetSubList(oper, &subops); CeedChk(ierr); -+ ierr = CeedOperatorGetSubList(oper, &subops); PCeedChk(ierr); - #endif -- ierr = CeedOperatorGetQFunction(subops[0], &qf); CeedChk(ierr); -+ ierr = CeedOperatorGetQFunction(subops[0], &qf); PCeedChk(ierr); - } - else - { -- ierr = CeedOperatorGetQFunction(oper, &qf); CeedChk(ierr); -+ ierr = CeedOperatorGetQFunction(oper, &qf); PCeedChk(ierr); - } - CeedInt numinputfields, numoutputfields; - ierr = CeedQFunctionGetNumArgs(qf, &numinputfields, &numoutputfields); -@@ -157,12 +117,12 @@ int CeedOperatorGetActiveField(CeedOperator oper, CeedOperatorField *field) - if (isComposite) - { - ierr = CeedOperatorGetFields(subops[0], &numinputfields, &inputfields, -- &numoutputfields, NULL); CeedChk(ierr); -+ &numoutputfields, NULL); PCeedChk(ierr); - } - else - { - ierr = CeedOperatorGetFields(oper, &numinputfields, &inputfields, -- &numoutputfields, NULL); CeedChk(ierr); -+ &numoutputfields, NULL); PCeedChk(ierr); - } - - CeedVector if_vector; -@@ -170,7 +130,7 @@ int CeedOperatorGetActiveField(CeedOperator oper, CeedOperatorField *field) - int found_index = -1; - for (int i = 0; i < numinputfields; ++i) - { -- ierr = CeedOperatorFieldGetVector(inputfields[i], &if_vector); CeedChk(ierr); -+ ierr = CeedOperatorFieldGetVector(inputfields[i], &if_vector); PCeedChk(ierr); - if (if_vector == CEED_VECTOR_ACTIVE) - { - if (found) -@@ -190,66 +150,6 @@ int CeedOperatorGetActiveField(CeedOperator oper, CeedOperatorField *field) - return 0; - } - --template <> --const IntegrationRule & GetRule( -- const MassIntegrator &integ, -- const FiniteElement &trial_fe, -- const FiniteElement &test_fe, -- ElementTransformation &trans) --{ -- return MassIntegrator::GetRule(trial_fe, test_fe, trans); --} -- --template <> --const IntegrationRule & GetRule( -- const VectorMassIntegrator &integ, -- const FiniteElement &trial_fe, -- const FiniteElement &test_fe, -- ElementTransformation &trans) --{ -- return MassIntegrator::GetRule(trial_fe, test_fe, trans); --} -- --template <> --const IntegrationRule & GetRule( -- const ConvectionIntegrator &integ, -- const FiniteElement &trial_fe, -- const FiniteElement &test_fe, -- ElementTransformation &trans) --{ -- return ConvectionIntegrator::GetRule(trial_fe, trans); --} -- --template <> --const IntegrationRule & GetRule( -- const VectorConvectionNLFIntegrator &integ, -- const FiniteElement &trial_fe, -- const FiniteElement &test_fe, -- ElementTransformation &trans) --{ -- return VectorConvectionNLFIntegrator::GetRule(trial_fe, trans); --} -- --template <> --const IntegrationRule & GetRule( -- const DiffusionIntegrator &integ, -- const FiniteElement &trial_fe, -- const FiniteElement &test_fe, -- ElementTransformation &trans) --{ -- return DiffusionIntegrator::GetRule(trial_fe, test_fe); --} -- --template <> --const IntegrationRule & GetRule( -- const VectorDiffusionIntegrator &integ, -- const FiniteElement &trial_fe, -- const FiniteElement &test_fe, -- ElementTransformation &trans) --{ -- return DiffusionIntegrator::GetRule(trial_fe, test_fe); --} -- - std::string ceed_path; - - const std::string &GetCeedPath() -diff --git a/fem/ceed/interface/util.hpp b/fem/ceed/interface/util.hpp -index 17ae4adae..823cd09e3 100644 ---- a/fem/ceed/interface/util.hpp -+++ b/fem/ceed/interface/util.hpp -@@ -12,15 +12,15 @@ - #ifndef MFEM_LIBCEED_UTIL - #define MFEM_LIBCEED_UTIL - --#include "../../../config/config.hpp" -+#include - #include - #include - #include -- -+#include "../../../general/error.hpp" - #include "ceed.hpp" - #ifdef MFEM_USE_CEED - #include --#include // for CeedOperatorField -+#include - #endif - - namespace mfem -@@ -40,7 +40,7 @@ namespace ceed - { - - /** @brief Remove from ceed_basis_map and ceed_restr_map the entries associated -- with the given @a fes. */ -+ with the given @a fes when @a fes gets destroyed. */ - void RemoveBasisAndRestriction(const mfem::FiniteElementSpace *fes); - - #ifdef MFEM_USE_CEED -@@ -48,7 +48,7 @@ void RemoveBasisAndRestriction(const mfem::FiniteElementSpace *fes); - #define PCeedChk(err) do { \ - if ((err)) \ - { \ -- const char * errmsg; \ -+ const char *errmsg; \ - CeedGetErrorMessage(internal::ceed, &errmsg); \ - MFEM_ABORT(errmsg); \ - } \ -@@ -57,91 +57,54 @@ void RemoveBasisAndRestriction(const mfem::FiniteElementSpace *fes); - /// Initialize a CeedVector from an mfem::Vector - void InitVector(const mfem::Vector &v, CeedVector &cv); - --/** @brief Initialize a CeedBasis and a CeedElemRestriction based on an -- mfem::FiniteElementSpace @a fes, and an mfem::IntegrationRule @a ir. -- -- @param[in] fes The finite element space. -- @param[in] ir The integration rule. -- @param[in] ceed The Ceed object. -- @param[out] basis The `CeedBasis` to initialize. -- @param[out] restr The `CeedElemRestriction` to initialize. -- -- @warning Only for non-mixed finite element spaces. */ --void InitBasisAndRestriction(const mfem::FiniteElementSpace &fes, -- const mfem::IntegrationRule &ir, -- Ceed ceed, CeedBasis *basis, -- CeedElemRestriction *restr); -- --/** @brief Initialize a CeedBasis and a CeedElemRestriction based on an -- mfem::FiniteElementSpace @a fes, and an mfem::IntegrationRule @a ir, -- and a list of @a nelem elements of indices @a indices. -- -- @param[in] fes The finite element space. -- @param[in] ir The integration rule. -- @param[in] nelem The number of elements. -- @param[in] indices The indices of the elements of same type in the -- `FiniteElementSpace`. If `indices == nullptr`, assumes -- that the `FiniteElementSpace` is not mixed. -- @param[in] ceed The Ceed object. -- @param[out] basis The `CeedBasis` to initialize. -- @param[out] restr The `CeedElemRestriction` to initialize. */ --void InitBasisAndRestriction(const FiniteElementSpace &fes, -- const IntegrationRule &ir, -- int nelem, -- const int* indices, -- Ceed ceed, CeedBasis *basis, -- CeedElemRestriction *restr); -- - int CeedOperatorGetActiveField(CeedOperator oper, CeedOperatorField *field); - -- --template --const IntegrationRule & GetRule( -- const Integrator &integ, -- const FiniteElement &trial_fe, -- const FiniteElement &test_fe, -- ElementTransformation &Trans); -- --/// Return the path to the libCEED q-function headers. -+/// Return the path to the libCEED QFunction headers. - const std::string &GetCeedPath(); - - // Hash table for CeedBasis --using BasisKey = std::tuple; -+using BasisKey = -+ std::tuple>; - struct BasisHash - { -- std::size_t operator()(const BasisKey& k) const -+ std::size_t operator()(const BasisKey &k) const - { - return CeedHashCombine( - CeedHashCombine( -- CeedHashInt(reinterpret_cast(std::get<0>(k))), -- CeedHashInt(reinterpret_cast(std::get<1>(k)))), -+ CeedHashCombine( -+ CeedHashInt(reinterpret_cast(std::get<0>(k))), -+ CeedHashInt(reinterpret_cast(std::get<1>(k)))), -+ CeedHashInt(reinterpret_cast(std::get<2>(k)))), - CeedHashCombine( -- CeedHashCombine(CeedHashInt(std::get<2>(k)), -- CeedHashInt(std::get<3>(k))), -- CeedHashInt(std::get<4>(k)))); -+ CeedHashCombine(CeedHashInt(std::get<3>(k)[0]), -+ CeedHashInt(std::get<3>(k)[1])), -+ CeedHashInt(std::get<3>(k)[2]))); - } - }; - using BasisMap = std::unordered_map; - --enum restr_type {Standard, Strided, Coeff}; -- - // Hash table for CeedElemRestriction - using RestrKey = -- std::tuple; -+ std::tuple, -+ std::array, int>; - struct RestrHash - { -- std::size_t operator()(const RestrKey& k) const -+ std::size_t operator()(const RestrKey &k) const - { - return CeedHashCombine( -+ CeedHashInt(reinterpret_cast(std::get<0>(k))), - CeedHashCombine( - CeedHashCombine( -- CeedHashInt(reinterpret_cast(std::get<0>(k))), -- CeedHashInt(std::get<1>(k))), -- CeedHashCombine(CeedHashInt(std::get<2>(k)), -- CeedHashInt(std::get<3>(k)))), -- CeedHashInt(std::get<4>(k))); -+ CeedHashCombine( -+ CeedHashCombine(CeedHashInt(std::get<1>(k)[0]), -+ CeedHashInt(std::get<1>(k)[1])), -+ CeedHashInt(std::get<1>(k)[2])), -+ CeedHashCombine( -+ CeedHashCombine(CeedHashInt(std::get<2>(k)[0]), -+ CeedHashInt(std::get<2>(k)[1])), -+ CeedHashInt(std::get<2>(k)[2]))), -+ CeedHashInt(std::get<3>(k)))); - } - }; - using RestrMap = -@@ -156,7 +119,7 @@ namespace internal - - #ifdef MFEM_USE_CEED - /** @warning These maps have a tendency to create bugs when adding new "types" -- of CeedBasis and CeedElemRestriction. */ -+ of CeedBasis and CeedElemRestriction. Definitions in general/device.cpp. */ - extern ceed::BasisMap ceed_basis_map; - extern ceed::RestrMap ceed_restr_map; - #endif -diff --git a/fem/ceed/solvers/algebraic.cpp b/fem/ceed/solvers/algebraic.cpp -index 280a19960..72eca5c2e 100644 ---- a/fem/ceed/solvers/algebraic.cpp -+++ b/fem/ceed/solvers/algebraic.cpp -@@ -11,14 +11,16 @@ - - #include "algebraic.hpp" - -+#include "../../../general/forall.hpp" - #include "../../bilinearform.hpp" - #include "../../fespace.hpp" - #include "../../pfespace.hpp" --#include "../../../general/forall.hpp" --#include "solvers-atpmg.hpp" --#include "full-assembly.hpp" - #include "../interface/restriction.hpp" --#include "../interface/ceed.hpp" -+#include "../interface/util.hpp" -+#include "solvers-atpmg.hpp" -+#ifdef MFEM_USE_CEED -+#include -+#endif - - namespace mfem - { -@@ -28,6 +30,13 @@ namespace ceed - - #ifdef MFEM_USE_CEED - -+int CeedInternalFree(void *p) -+{ -+ free(*(void **)p); -+ *(void **)p = NULL; -+ return 0; -+} -+ - /** Wraps a CeedOperator in an mfem::Operator, with essential boundary - conditions and a prolongation operator for parallel application. */ - class ConstrainedOperator : public mfem::Operator -@@ -91,29 +100,22 @@ const mfem::Operator *ConstrainedOperator::GetProlongation() const - return P; - } - --/// assumes a square operator (you could do rectangular, you'd have --/// to find separate active input and output fields/restrictions) --int CeedOperatorGetSize(CeedOperator oper, CeedInt * size) --{ -- CeedSize in_len, out_len; -- int ierr = CeedOperatorGetActiveVectorLengths(oper, &in_len, &out_len); -- CeedChk(ierr); -- *size = (CeedInt)in_len; -- MFEM_VERIFY(in_len == out_len, "not a square CeedOperator"); -- MFEM_VERIFY(in_len == *size, "size overflow"); -- return 0; --} -- - Solver *BuildSmootherFromCeed(ConstrainedOperator &op, bool chebyshev) - { - int ierr; -+ - CeedOperator ceed_op = op.GetCeedOperator(); - const Array &ess_tdofs = op.GetEssentialTrueDofs(); - const mfem::Operator *P = op.GetProlongation(); -+ - // Assemble the a local diagonal, in the sense of L-vector - CeedVector diagceed; -- CeedInt length; -- ierr = CeedOperatorGetSize(ceed_op, &length); PCeedChk(ierr); -+ CeedSize l_in, l_out; -+ ierr = CeedOperatorGetActiveVectorLengths(ceed_op, &l_in, &l_out); -+ PCeedChk(ierr); -+ MFEM_VERIFY(l_in == l_out, "Not a square CeedOperator."); -+ MFEM_VERIFY((CeedInt)l_in == l_in, "Size overflow."); -+ CeedInt length = l_in; - ierr = CeedVectorCreate(internal::ceed, length, &diagceed); PCeedChk(ierr); - CeedMemType mem; - ierr = CeedGetPreferredMemType(internal::ceed, &mem); PCeedChk(ierr); -@@ -239,20 +241,18 @@ CeedOperator CreateCeedCompositeOperatorFromBilinearForm(BilinearForm &form) - int ierr; - CeedOperator op; - ierr = CeedCompositeOperatorCreate(internal::ceed, &op); PCeedChk(ierr); -- -- MFEM_VERIFY(form.GetBBFI()->Size() == 0, -- "Not implemented for this integrator!"); -- MFEM_VERIFY(form.GetFBFI()->Size() == 0, -- "Not implemented for this integrator!"); -- MFEM_VERIFY(form.GetBFBFI()->Size() == 0, -- "Not implemented for this integrator!"); -- -- // Get the domain bilinear form integrators (DBFIs) -- Array *bffis = form.GetDBFI(); -- for (int i = 0; i < bffis->Size(); ++i) -+ for (BilinearFormIntegrator *integ : *form.GetDBFI()) -+ { -+ AddToCompositeOperator(integ, op); -+ } -+ for (BilinearFormIntegrator *integ : *form.GetBBFI()) - { -- AddToCompositeOperator((*bffis)[i], op); -+ AddToCompositeOperator(integ, op); - } -+ MFEM_VERIFY(form.GetFBFI()->Size() == 0, "AddInteriorFaceIntegrator is not " -+ "currently supported in CreateCeedCompositeOperatorFromBilinearForm"); -+ MFEM_VERIFY(form.GetBFBFI()->Size() == 0, "AddBdrFaceIntegrator is not " -+ "currently supported in CreateCeedCompositeOperatorFromBilinearForm"); - return op; - } - -@@ -266,7 +266,8 @@ CeedOperator CoarsenCeedCompositeOperator( - int ierr; - bool isComposite; - ierr = CeedOperatorIsComposite(op, &isComposite); PCeedChk(ierr); -- MFEM_ASSERT(isComposite, ""); -+ MFEM_ASSERT(isComposite, -+ "CoarsenCeedCompositeOperator requires a composite operator."); - - CeedOperator op_coarse; - ierr = CeedCompositeOperatorCreate(internal::ceed, -@@ -376,67 +377,68 @@ int AlgebraicInterpolation::Initialize( - - CeedSize height, width; - ierr = CeedElemRestrictionGetLVectorSize(erestrictu_coarse, &width); -- CeedChk(ierr); -+ PCeedChk(ierr); - ierr = CeedElemRestrictionGetLVectorSize(erestrictu_fine, &height); -- CeedChk(ierr); -+ PCeedChk(ierr); - - // interpolation qfunction - const int bp3_ncompu = 1; - CeedQFunction l_qf_restrict, l_qf_prolong; - ierr = CeedQFunctionCreateIdentity(ceed, bp3_ncompu, CEED_EVAL_NONE, -- CEED_EVAL_INTERP, &l_qf_restrict); CeedChk(ierr); -+ CEED_EVAL_INTERP, &l_qf_restrict); PCeedChk(ierr); - ierr = CeedQFunctionCreateIdentity(ceed, bp3_ncompu, CEED_EVAL_INTERP, -- CEED_EVAL_NONE, &l_qf_prolong); CeedChk(ierr); -+ CEED_EVAL_NONE, &l_qf_prolong); PCeedChk(ierr); - - qf_restrict = l_qf_restrict; - qf_prolong = l_qf_prolong; - - CeedVector c_fine_multiplicity; -- ierr = CeedVectorCreate(ceed, height, &c_fine_multiplicity); CeedChk(ierr); -- ierr = CeedVectorSetValue(c_fine_multiplicity, 0.0); CeedChk(ierr); -+ ierr = CeedVectorCreate(ceed, height, &c_fine_multiplicity); PCeedChk(ierr); -+ ierr = CeedVectorSetValue(c_fine_multiplicity, 0.0); PCeedChk(ierr); - - // Create the restriction operator - // Restriction - Fine to coarse - ierr = CeedOperatorCreate(ceed, qf_restrict, CEED_QFUNCTION_NONE, -- CEED_QFUNCTION_NONE, &op_restrict); CeedChk(ierr); -+ CEED_QFUNCTION_NONE, &op_restrict); PCeedChk(ierr); - ierr = CeedOperatorSetField(op_restrict, "input", erestrictu_fine, -- CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); CeedChk(ierr); -+ CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); PCeedChk(ierr); - ierr = CeedOperatorSetField(op_restrict, "output", erestrictu_coarse, -- basisctof, CEED_VECTOR_ACTIVE); CeedChk(ierr); -+ basisctof, CEED_VECTOR_ACTIVE); PCeedChk(ierr); - - // Interpolation - Coarse to fine - // Create the prolongation operator - ierr = CeedOperatorCreate(ceed, qf_prolong, CEED_QFUNCTION_NONE, -- CEED_QFUNCTION_NONE, &op_interp); CeedChk(ierr); -+ CEED_QFUNCTION_NONE, &op_interp); PCeedChk(ierr); - ierr = CeedOperatorSetField(op_interp, "input", erestrictu_coarse, -- basisctof, CEED_VECTOR_ACTIVE); CeedChk(ierr); -+ basisctof, CEED_VECTOR_ACTIVE); PCeedChk(ierr); - ierr = CeedOperatorSetField(op_interp, "output", erestrictu_fine, -- CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); CeedChk(ierr); -+ CEED_BASIS_COLLOCATED, CEED_VECTOR_ACTIVE); PCeedChk(ierr); - - ierr = CeedElemRestrictionGetMultiplicity(erestrictu_fine, -- c_fine_multiplicity); CeedChk(ierr); -- ierr = CeedVectorCreate(ceed, height, &fine_multiplicity_r); CeedChk(ierr); -+ c_fine_multiplicity); PCeedChk(ierr); -+ ierr = CeedVectorCreate(ceed, height, &fine_multiplicity_r); PCeedChk(ierr); - - CeedScalar* fine_r_data; - const CeedScalar* fine_data; - ierr = CeedVectorGetArrayWrite(fine_multiplicity_r, CEED_MEM_HOST, -- &fine_r_data); CeedChk(ierr); -+ &fine_r_data); PCeedChk(ierr); - ierr = CeedVectorGetArrayRead(c_fine_multiplicity, CEED_MEM_HOST, -- &fine_data); CeedChk(ierr); -+ &fine_data); PCeedChk(ierr); - for (CeedSize i = 0; i < height; ++i) - { - fine_r_data[i] = 1.0 / fine_data[i]; - } - -- ierr = CeedVectorRestoreArray(fine_multiplicity_r, &fine_r_data); CeedChk(ierr); -+ ierr = CeedVectorRestoreArray(fine_multiplicity_r, &fine_r_data); -+ PCeedChk(ierr); - ierr = CeedVectorRestoreArrayRead(c_fine_multiplicity, &fine_data); -- CeedChk(ierr); -- ierr = CeedVectorDestroy(&c_fine_multiplicity); CeedChk(ierr); -+ PCeedChk(ierr); -+ ierr = CeedVectorDestroy(&c_fine_multiplicity); PCeedChk(ierr); - -- ierr = CeedVectorCreate(ceed, height, &fine_work); CeedChk(ierr); -+ ierr = CeedVectorCreate(ceed, height, &fine_work); PCeedChk(ierr); - -- ierr = CeedVectorCreate(ceed, height, &v_); CeedChk(ierr); -- ierr = CeedVectorCreate(ceed, width, &u_); CeedChk(ierr); -+ ierr = CeedVectorCreate(ceed, height, &v_); PCeedChk(ierr); -+ ierr = CeedVectorCreate(ceed, width, &u_); PCeedChk(ierr); - - return 0; - } -@@ -445,12 +447,12 @@ int AlgebraicInterpolation::Finalize() - { - int ierr; - -- ierr = CeedQFunctionDestroy(&qf_restrict); CeedChk(ierr); -- ierr = CeedQFunctionDestroy(&qf_prolong); CeedChk(ierr); -- ierr = CeedOperatorDestroy(&op_interp); CeedChk(ierr); -- ierr = CeedOperatorDestroy(&op_restrict); CeedChk(ierr); -- ierr = CeedVectorDestroy(&fine_multiplicity_r); CeedChk(ierr); -- ierr = CeedVectorDestroy(&fine_work); CeedChk(ierr); -+ ierr = CeedQFunctionDestroy(&qf_restrict); PCeedChk(ierr); -+ ierr = CeedQFunctionDestroy(&qf_prolong); PCeedChk(ierr); -+ ierr = CeedOperatorDestroy(&op_interp); PCeedChk(ierr); -+ ierr = CeedOperatorDestroy(&op_restrict); PCeedChk(ierr); -+ ierr = CeedVectorDestroy(&fine_multiplicity_r); PCeedChk(ierr); -+ ierr = CeedVectorDestroy(&fine_work); PCeedChk(ierr); - - return 0; - } -@@ -468,8 +470,8 @@ AlgebraicInterpolation::AlgebraicInterpolation( - &ho_nldofs); PCeedChk(ierr); - height = (int)ho_nldofs; - width = (int)lo_nldofs; -- MFEM_VERIFY(ho_nldofs == height, "height overflow"); -- MFEM_VERIFY(lo_nldofs == width, "width overflow"); -+ MFEM_VERIFY(ho_nldofs == height, "Height overflow."); -+ MFEM_VERIFY(lo_nldofs == width, "Width overflow."); - owns_basis_ = false; - ierr = Initialize(ceed, basisctof, erestrictu_coarse, erestrictu_fine); - PCeedChk(ierr); -@@ -488,7 +490,6 @@ AlgebraicInterpolation::~AlgebraicInterpolation() - } - - /// a = a (pointwise*) b --/// @todo: using MPI_FORALL in this Ceed-like function is ugly - int CeedVectorPointwiseMult(CeedVector a, const CeedVector b) - { - int ierr; -@@ -496,8 +497,8 @@ int CeedVectorPointwiseMult(CeedVector a, const CeedVector b) - CeedVectorGetCeed(a, &ceed); - - CeedSize length, length2; -- ierr = CeedVectorGetLength(a, &length); CeedChk(ierr); -- ierr = CeedVectorGetLength(b, &length2); CeedChk(ierr); -+ ierr = CeedVectorGetLength(a, &length); PCeedChk(ierr); -+ ierr = CeedVectorGetLength(b, &length2); PCeedChk(ierr); - if (length != length2) - { - return CeedError(ceed, 1, "Vector sizes don't match"); -@@ -514,14 +515,16 @@ int CeedVectorPointwiseMult(CeedVector a, const CeedVector b) - } - CeedScalar *a_data; - const CeedScalar *b_data; -- ierr = CeedVectorGetArray(a, mem, &a_data); CeedChk(ierr); -- ierr = CeedVectorGetArrayRead(b, mem, &b_data); CeedChk(ierr); -- MFEM_VERIFY(int(length) == length, "length overflow"); -+ ierr = CeedVectorGetArray(a, mem, &a_data); PCeedChk(ierr); -+ ierr = CeedVectorGetArrayRead(b, mem, &b_data); PCeedChk(ierr); -+ MFEM_VERIFY(int(length) == length, "Length overflow."); - mfem::forall(length, [=] MFEM_HOST_DEVICE (int i) -- {a_data[i] *= b_data[i];}); -+ { -+ a_data[i] *= b_data[i]; -+ }); - -- ierr = CeedVectorRestoreArray(a, &a_data); CeedChk(ierr); -- ierr = CeedVectorRestoreArrayRead(b, &b_data); CeedChk(ierr); -+ ierr = CeedVectorRestoreArray(a, &a_data); PCeedChk(ierr); -+ ierr = CeedVectorRestoreArrayRead(b, &b_data); PCeedChk(ierr); - - return 0; - } -@@ -590,7 +593,7 @@ void AlgebraicInterpolation::MultTranspose(const mfem::Vector& x, - ierr = CeedVectorGetArrayRead(fine_multiplicity_r, mem, - &multiplicitydata); PCeedChk(ierr); - ierr = CeedVectorGetArrayWrite(fine_work, mem, &workdata); PCeedChk(ierr); -- MFEM_VERIFY((int)length == length, "length overflow"); -+ MFEM_VERIFY((int)length == length, "Length overflow."); - mfem::forall(length, [=] MFEM_HOST_DEVICE (int i) - {workdata[i] = in_ptr[i] * multiplicitydata[i];}); - ierr = CeedVectorRestoreArrayRead(fine_multiplicity_r, -@@ -636,7 +639,7 @@ AlgebraicSpaceHierarchy::AlgebraicSpaceHierarchy(FiniteElementSpace &fes) - current_order = order; - - Ceed ceed = internal::ceed; -- InitRestriction(fes, ceed, &fine_er); -+ InitRestriction(fes, false, ceed, &fine_er); - CeedElemRestriction er = fine_er; - - int dim = fes.GetMesh()->Dimension(); -@@ -715,7 +718,7 @@ AlgebraicCoarseSpace::AlgebraicCoarseSpace( - ierr = CeedElemRestrictionGetLVectorSize(ceed_elem_restriction, &ndofs_); - PCeedChk(ierr); - ndofs = ndofs_; -- MFEM_VERIFY(ndofs == ndofs_, "ndofs overflow"); -+ MFEM_VERIFY(ndofs == ndofs_, "Overflow in ndofs."); - - mesh = fine_fes.GetMesh(); - } -@@ -741,9 +744,8 @@ ParAlgebraicCoarseSpace::ParAlgebraicCoarseSpace( - { - CeedSize lsize; - CeedElemRestrictionGetLVectorSize(ceed_elem_restriction, &lsize); -+ MFEM_VERIFY((int)lsize == lsize, "Size overflow."); - const Table &group_ldof_fine = gc_fine->GroupLDofTable(); -- -- MFEM_VERIFY((int)lsize == lsize, "size overflow"); - ldof_group.SetSize(lsize); - ldof_group = 0; - -@@ -814,7 +816,8 @@ HypreParMatrix *ParAlgebraicCoarseSpace::GetProlongationHypreParMatrix() - if (P_mat) { return P_mat; } - - ParMesh *pmesh = dynamic_cast(mesh); -- MFEM_VERIFY(pmesh != NULL, ""); -+ MFEM_VERIFY(pmesh != NULL, -+ "ParAlgebraicCoarseSpace requires a ParMesh mesh object."); - Array dof_offsets, tdof_offsets, tdof_nb_offsets; - Array *offsets[2] = {&dof_offsets, &tdof_offsets}; - int ltsize = R_mat->Height(); -@@ -943,7 +946,7 @@ AlgebraicSolver::AlgebraicSolver(BilinearForm &form, - form.GetAssemblyLevel() == AssemblyLevel::NONE, - "AlgebraicSolver requires partial assembly or fully matrix-free."); - MFEM_VERIFY(UsesTensorBasis(*form.FESpace()), -- "AlgebraicSolver requires tensor product basis functions."); -+ "AlgebraicSolver requires tensor-product basis functions."); - #ifdef MFEM_USE_CEED - fespaces = new AlgebraicSpaceHierarchy(*form.FESpace()); - multigrid = new AlgebraicMultigrid(*fespaces, form, ess_tdofs); -@@ -974,6 +977,88 @@ void AlgebraicSolver::SetOperator(const mfem::Operator& op) - #endif - } - -+#ifdef MFEM_USE_CEED -+SparseMatrix *CeedOperatorFullAssemble(BilinearForm &form, bool set) -+{ -+ Array mat_i; -+ for (BilinearFormIntegrator *integ : *form.GetDBFI()) -+ { -+ if (!integ->SupportsCeed()) { continue; } -+ SparseMatrix *mat_integ; -+ int ierr = CeedOperatorFullAssemble(integ->GetCeedOp().GetCeedOperator(), -+ &mat_integ, set); -+ PCeedChk(ierr); -+ mat_i.Append(mat_integ); -+ } -+ for (BilinearFormIntegrator *integ : *form.GetBBFI()) -+ { -+ if (!integ->SupportsCeed()) { continue; } -+ SparseMatrix *mat_integ; -+ int ierr = CeedOperatorFullAssemble(integ->GetCeedOp().GetCeedOperator(), -+ &mat_integ, set); -+ PCeedChk(ierr); -+ mat_i.Append(mat_integ); -+ } -+ MFEM_VERIFY(form.GetFBFI()->Size() == 0, "AddInteriorFaceIntegrator is not " -+ "currently supported in CeedOperatorFullAssemble"); -+ MFEM_VERIFY(form.GetBFBFI()->Size() == 0, "AddBdrFaceIntegrator is not " -+ "currently supported in CeedOperatorFullAssemble"); -+ -+ SparseMatrix *mat = Add(mat_i); -+ for (SparseMatrix *mat_integ : mat_i) -+ { -+ delete mat_integ; -+ } -+ return mat; -+} -+ -+int CeedOperatorFullAssemble(CeedOperator op, SparseMatrix **mat, bool set) -+{ -+ int ierr; -+ Ceed ceed; -+ ierr = CeedOperatorGetCeed(op, &ceed); PCeedChk(ierr); -+ -+ CeedSize l_in, l_out; -+ ierr = CeedOperatorGetActiveVectorLengths(op, &l_in, &l_out); PCeedChk(ierr); -+ MFEM_VERIFY((int)l_in == l_in && (int)l_out == l_out, "Size overflow."); -+ *mat = new SparseMatrix(l_out, l_in); -+ -+ CeedSize nnz; -+ CeedInt *rows, *cols; -+ ierr = CeedOperatorLinearAssembleSymbolic(op, &nnz, &rows, &cols); -+ PCeedChk(ierr); -+ -+ CeedVector vals; -+ ierr = CeedVectorCreate(ceed, nnz, &vals); PCeedChk(ierr); -+ ierr = CeedOperatorLinearAssemble(op, vals); PCeedChk(ierr); -+ -+ const CeedScalar *val_array; -+ ierr = CeedVectorGetArrayRead(vals, CEED_MEM_HOST, &val_array); PCeedChk(ierr); -+ for (CeedSize k = 0; k < nnz; ++k) -+ { -+ if (!set) -+ { -+ (*mat)->Add(rows[k], cols[k], val_array[k]); -+ } -+ else -+ { -+ (*mat)->Set(rows[k], cols[k], val_array[k]); -+ } -+ } -+ ierr = CeedVectorRestoreArrayRead(vals, &val_array); PCeedChk(ierr); -+ -+ ierr = CeedVectorDestroy(&vals); PCeedChk(ierr); -+ ierr = CeedInternalFree(&rows); PCeedChk(ierr); -+ ierr = CeedInternalFree(&cols); PCeedChk(ierr); -+ -+ // Enforce structurally symmetric for later elimination -+ const int skip_zeros = 0; -+ (*mat)->Finalize(skip_zeros); -+ -+ return 0; -+} -+#endif -+ - } // namespace ceed - - } // namespace mfem -diff --git a/fem/ceed/solvers/algebraic.hpp b/fem/ceed/solvers/algebraic.hpp -index 8ede8324e..b8a37b7ec 100644 ---- a/fem/ceed/solvers/algebraic.hpp -+++ b/fem/ceed/solvers/algebraic.hpp -@@ -12,9 +12,9 @@ - #ifndef MFEM_CEED_ALGEBRAIC_HPP - #define MFEM_CEED_ALGEBRAIC_HPP - -+#include "../../../linalg/sparsemat.hpp" - #include "../../fespacehierarchy.hpp" - #include "../../multigrid.hpp" --#include "../interface/operator.hpp" - #include "../interface/ceed.hpp" - - namespace mfem -@@ -190,7 +190,7 @@ private: - #endif - - public: -- /** @brief Constructs algebraic multigrid hierarchy and solver. -+ /** @brief Constructs algebraic multigrid hierarchy and solver - - This only works if the Ceed device backend is enabled. - -@@ -204,6 +204,26 @@ public: - void SetOperator(const mfem::Operator& op); - }; - -+#ifdef MFEM_USE_CEED -+/** @brief Assemble the CeedOperators from a BilinearForm as an -+ mfem::SparseMatrix -+ -+ In parallel, this assembles independently on each processor, that is, it -+ assembles at the L-vector level. The assembly procedure is always performed -+ on the host, but this works also for operators stored on device by copying -+ memory. */ -+SparseMatrix *CeedOperatorFullAssemble(BilinearForm &form, bool set = false); -+ -+/** @brief Assembles a CeedOperator as an mfem::SparseMatrix -+ -+ In parallel, this assembles independently on each processor, that is, it -+ assembles at the L-vector level. The assembly procedure is always performed -+ on the host, but this works also for operators stored on device by copying -+ memory. */ -+int CeedOperatorFullAssemble(CeedOperator op, SparseMatrix **mat, -+ bool set = false); -+#endif -+ - } // namespace ceed - - } // namespace mfem -diff --git a/fem/ceed/solvers/full-assembly.cpp b/fem/ceed/solvers/full-assembly.cpp -deleted file mode 100644 -index ccf9b145a..000000000 ---- a/fem/ceed/solvers/full-assembly.cpp -+++ /dev/null -@@ -1,341 +0,0 @@ --// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced --// at the Lawrence Livermore National Laboratory. All Rights reserved. See files --// LICENSE and NOTICE for details. LLNL-CODE-806117. --// --// This file is part of the MFEM library. For more information and source code --// availability visit https://mfem.org. --// --// MFEM is free software; you can redistribute it and/or modify it under the --// terms of the BSD-3 license. We welcome feedback and contributions, see file --// CONTRIBUTING.md for details. -- --#include "full-assembly.hpp" -- --#include "../../../linalg/sparsemat.hpp" --#include "../interface/util.hpp" --#include "../interface/ceed.hpp" -- --#ifdef MFEM_USE_CEED -- --namespace mfem --{ -- --namespace ceed --{ -- --int CeedHackReallocArray(size_t n, size_t unit, void *p) --{ -- *(void **)p = realloc(*(void **)p, n*unit); -- if (n && unit && !*(void **)p) -- return CeedError(NULL, 1, "realloc failed to allocate %zd members of size " -- "%zd\n", n, unit); -- return 0; --} -- --#define CeedHackRealloc(n, p) CeedHackReallocArray((n), sizeof(**(p)), p) -- --int CeedHackFree(void *p) --{ -- free(*(void **)p); -- *(void **)p = NULL; -- return 0; --} -- --int CeedSingleOperatorFullAssemble(CeedOperator op, SparseMatrix *out) --{ -- int ierr; -- Ceed ceed; -- ierr = CeedOperatorGetCeed(op, &ceed); CeedChk(ierr); -- -- // Assemble QFunction -- CeedQFunction qf; -- ierr = CeedOperatorGetQFunction(op, &qf); CeedChk(ierr); -- CeedInt numinputfields, numoutputfields; -- CeedChk(ierr); -- CeedVector assembledqf; -- CeedElemRestriction rstr_q; -- ierr = CeedOperatorLinearAssembleQFunction( -- op, &assembledqf, &rstr_q, CEED_REQUEST_IMMEDIATE); CeedChk(ierr); -- -- CeedSize qflength; -- ierr = CeedVectorGetLength(assembledqf, &qflength); CeedChk(ierr); -- -- CeedOperatorField *input_fields; -- CeedOperatorField *output_fields; -- ierr = CeedOperatorGetFields(op, &numinputfields, &input_fields, -- &numoutputfields, &output_fields); -- CeedChk(ierr); -- -- // Determine active input basis -- CeedQFunctionField *qffields; -- ierr = CeedQFunctionGetFields(qf, &numinputfields, &qffields, -- &numoutputfields, NULL); -- CeedChk(ierr); -- CeedInt numemodein = 0, ncomp, dim = 1; -- CeedEvalMode *emodein = NULL; -- CeedBasis basisin = NULL; -- CeedElemRestriction rstrin = NULL; -- for (CeedInt i=0; iHeight() == nnodes, "Sizes don't match!"); -- MFEM_ASSERT(out->Width() == nnodes, "Sizes don't match!"); -- const CeedScalar *interpin, *gradin; -- ierr = CeedBasisGetInterp(basisin, &interpin); CeedChk(ierr); -- ierr = CeedBasisGetGrad(basisin, &gradin); CeedChk(ierr); -- -- const CeedScalar * assembledqfarray; -- ierr = CeedVectorGetArrayRead(assembledqf, CEED_MEM_HOST, &assembledqfarray); -- CeedChk(ierr); -- -- CeedInt layout[3]; -- ierr = CeedElemRestrictionGetELayout(rstr_q, &layout); CeedChk(ierr); -- ierr = CeedElemRestrictionDestroy(&rstr_q); CeedChk(ierr); -- -- // enforce structurally symmetric for later elimination -- const int skip_zeros = 0; -- MFEM_ASSERT(numemodein == numemodeout, -- "Ceed full assembly not implemented for this case."); -- for (int e = 0; e < nelem; ++e) -- { -- // get Array for use in SparseMatrix::AddSubMatrix() -- Array rows(elemsize); -- for (int i = 0; i < elemsize; ++i) -- { -- rows[i] = elem_dof_a[e * elemsize + i]; -- } -- -- // form element matrix itself -- DenseMatrix Bmat(nqpts * numemodein, elemsize); -- Bmat = 0.0; -- // Store block-diagonal D matrix as collection of small dense blocks -- DenseTensor Dmat(numemodeout, numemodein, nqpts); -- Dmat = 0.0; -- DenseMatrix elem_mat(elemsize, elemsize); -- elem_mat = 0.0; -- for (int q = 0; q < nqpts; ++q) -- { -- for (int n = 0; n < elemsize; ++n) -- { -- CeedInt din = -1; -- for (int ein = 0; ein < numemodein; ++ein) -- { -- if (emodein[ein] == CEED_EVAL_INTERP) -- { -- Bmat(numemodein * q + ein, n) += interpin[q * elemsize + n]; -- } -- else if (emodein[ein] == CEED_EVAL_GRAD) -- { -- din += 1; -- Bmat(numemodein * q + ein, n) += gradin[(din*nqpts+q) * elemsize + n]; -- } -- else -- { -- MFEM_ASSERT(false, "Not implemented!"); -- } -- } -- } -- for (int ei = 0; ei < numemodein; ++ei) -- { -- for (int ej = 0; ej < numemodein; ++ej) -- { -- const int comp = ei * numemodein + ej; -- const int index = q*layout[0] + comp*layout[1] + e*layout[2]; -- Dmat(ei, ej, q) += assembledqfarray[index]; -- } -- } -- } -- DenseMatrix BTD(elemsize, nqpts*numemodein); -- // Compute B^T*D -- BTD = 0.0; -- for (int j=0; jAddSubMatrix(rows, rows, elem_mat, skip_zeros); -- } -- -- ierr = CeedVectorRestoreArrayRead(elem_dof, &elem_dof_a); CeedChk(ierr); -- ierr = CeedVectorDestroy(&elem_dof); CeedChk(ierr); -- ierr = CeedVectorRestoreArrayRead(assembledqf, &assembledqfarray); -- CeedChk(ierr); -- ierr = CeedVectorDestroy(&assembledqf); CeedChk(ierr); -- ierr = CeedHackFree(&emodein); CeedChk(ierr); -- ierr = CeedHackFree(&emodeout); CeedChk(ierr); -- -- return 0; --} -- --int CeedOperatorFullAssemble(CeedOperator op, SparseMatrix **mat) --{ -- int ierr; -- -- CeedSize in_len, out_len; -- ierr = CeedOperatorGetActiveVectorLengths(op, &in_len, &out_len); -- CeedChk(ierr); -- const int nnodes = in_len; -- MFEM_VERIFY(in_len == out_len, "not a square CeedOperator"); -- MFEM_VERIFY(in_len == nnodes, "size overflow"); -- -- SparseMatrix *out = new SparseMatrix(nnodes, nnodes); -- -- bool isComposite; -- ierr = CeedOperatorIsComposite(op, &isComposite); CeedChk(ierr); -- if (isComposite) -- { -- CeedInt numsub; -- CeedOperator *subops; --#if CEED_VERSION_GE(0, 10, 2) -- CeedCompositeOperatorGetNumSub(op, &numsub); -- ierr = CeedCompositeOperatorGetSubList(op, &subops); CeedChk(ierr); --#else -- CeedOperatorGetNumSub(op, &numsub); -- ierr = CeedOperatorGetSubList(op, &subops); CeedChk(ierr); --#endif -- for (int i = 0; i < numsub; ++i) -- { -- ierr = CeedSingleOperatorFullAssemble(subops[i], out); CeedChk(ierr); -- } -- } -- else -- { -- ierr = CeedSingleOperatorFullAssemble(op, out); CeedChk(ierr); -- } -- // enforce structurally symmetric for later elimination -- const int skip_zeros = 0; -- out->Finalize(skip_zeros); -- *mat = out; -- -- return 0; --} -- --} // namespace ceed -- --} // namespace mfem -- --#endif -diff --git a/fem/ceed/solvers/solvers-atpmg.cpp b/fem/ceed/solvers/solvers-atpmg.cpp -index 8f4be00b2..052f21927 100644 ---- a/fem/ceed/solvers/solvers-atpmg.cpp -+++ b/fem/ceed/solvers/solvers-atpmg.cpp -@@ -11,15 +11,13 @@ - - #include "solvers-atpmg.hpp" - --#include "../interface/ceed.hpp" -+#include - #include "../interface/util.hpp" -- - #ifdef MFEM_USE_CEED - #include -+#endif - --#include --// todo: should probably use Ceed memory wrappers instead of calloc/free? --#include -+#ifdef MFEM_USE_CEED - - namespace mfem - { -@@ -86,17 +84,17 @@ int CeedATPMGElemRestriction(int order, - { - int ierr; - Ceed ceed; -- ierr = CeedElemRestrictionGetCeed(er_in, &ceed); CeedChk(ierr); -+ ierr = CeedElemRestrictionGetCeed(er_in, &ceed); PCeedChk(ierr); - - CeedInt numelem, numcomp, elemsize; - CeedSize numnodes; -- ierr = CeedElemRestrictionGetNumElements(er_in, &numelem); CeedChk(ierr); -- ierr = CeedElemRestrictionGetLVectorSize(er_in, &numnodes); CeedChk(ierr); -- ierr = CeedElemRestrictionGetElementSize(er_in, &elemsize); CeedChk(ierr); -- ierr = CeedElemRestrictionGetNumComponents(er_in, &numcomp); CeedChk(ierr); -+ ierr = CeedElemRestrictionGetNumElements(er_in, &numelem); PCeedChk(ierr); -+ ierr = CeedElemRestrictionGetLVectorSize(er_in, &numnodes); PCeedChk(ierr); -+ ierr = CeedElemRestrictionGetElementSize(er_in, &elemsize); PCeedChk(ierr); -+ ierr = CeedElemRestrictionGetNumComponents(er_in, &numcomp); PCeedChk(ierr); - if (numcomp != 1) - { -- // todo: multi-component will require more thought -+ // TODO: multi-component will require more thought - return CeedError(ceed, 1, "Algebraic element restriction not " - "implemented for multiple components."); - } -@@ -107,31 +105,31 @@ int CeedATPMGElemRestriction(int order, - - CeedVector in_lvec, in_evec; - ierr = CeedElemRestrictionCreateVector(er_in, &in_lvec, &in_evec); -- CeedChk(ierr); -+ PCeedChk(ierr); - - // Create the elem_dof array from the given high-order ElemRestriction - // by using it to map the L-vector indices to an E-vector - CeedScalar * lvec_data; - ierr = CeedVectorGetArrayWrite(in_lvec, CEED_MEM_HOST, &lvec_data); -- CeedChk(ierr); -+ PCeedChk(ierr); - for (CeedSize i = 0; i < numnodes; ++i) - { - lvec_data[i] = (CeedScalar) i; - } -- ierr = CeedVectorRestoreArray(in_lvec, &lvec_data); CeedChk(ierr); -+ ierr = CeedVectorRestoreArray(in_lvec, &lvec_data); PCeedChk(ierr); - CeedInt in_layout[3]; -- ierr = CeedElemRestrictionGetELayout(er_in, &in_layout); CeedChk(ierr); -+ ierr = CeedElemRestrictionGetELayout(er_in, &in_layout); PCeedChk(ierr); - if (in_layout[0] == 0 && in_layout[1] == 0 && in_layout[2] == 0) - { - return CeedError(ceed, 1, "Cannot interpret e-vector ordering of given" - "CeedElemRestriction!"); - } - ierr = CeedElemRestrictionApply(er_in, CEED_NOTRANSPOSE, in_lvec, in_evec, -- CEED_REQUEST_IMMEDIATE); CeedChk(ierr); -- ierr = CeedVectorDestroy(&in_lvec); CeedChk(ierr); -+ CEED_REQUEST_IMMEDIATE); PCeedChk(ierr); -+ ierr = CeedVectorDestroy(&in_lvec); PCeedChk(ierr); - const CeedScalar * in_elem_dof; - ierr = CeedVectorGetArrayRead(in_evec, CEED_MEM_HOST, &in_elem_dof); -- CeedChk(ierr); -+ PCeedChk(ierr); - - // Create a map (dof_map) that maps high-order ldof indices to - // low-order ldof indices, with -1 indicating no correspondence -@@ -469,13 +467,13 @@ int CeedATPMGElemRestriction(int order, - "CeedATPMGElemRestriction does not yet support this dimension."); - } - -- ierr = CeedVectorRestoreArrayRead(in_evec, &in_elem_dof); CeedChk(ierr); -- ierr = CeedVectorDestroy(&in_evec); CeedChk(ierr); -+ ierr = CeedVectorRestoreArrayRead(in_evec, &in_elem_dof); PCeedChk(ierr); -+ ierr = CeedVectorDestroy(&in_evec); PCeedChk(ierr); - - ierr = CeedElemRestrictionCreate(ceed, numelem, coarse_elemsize, numcomp, - 0, running_out_ldof_count, - CEED_MEM_HOST, CEED_COPY_VALUES, out_elem_dof, -- er_out); CeedChk(ierr); -+ er_out); PCeedChk(ierr); - - delete [] out_elem_dof; - -@@ -491,7 +489,7 @@ int CeedBasisATPMGCoarseToFine(Ceed ceed, int P1d, int dim, int order_reduction, - // calling the following Ceed function) - int ierr; - ierr = CeedBasisCreateTensorH1Lagrange(ceed, dim, 1, P1d - order_reduction, P1d, -- CEED_GAUSS_LOBATTO, basisc2f); CeedChk(ierr); -+ CEED_GAUSS_LOBATTO, basisc2f); PCeedChk(ierr); - return 0; - } - -@@ -501,13 +499,13 @@ int CeedBasisATPMGCoarseToFine(CeedBasis basisin, - { - int ierr; - Ceed ceed; -- ierr = CeedBasisGetCeed(basisin, &ceed); CeedChk(ierr); -+ ierr = CeedBasisGetCeed(basisin, &ceed); PCeedChk(ierr); - - CeedInt dim, P1d; -- ierr = CeedBasisGetDimension(basisin, &dim); CeedChk(ierr); -- ierr = CeedBasisGetNumNodes1D(basisin, &P1d); CeedChk(ierr); -+ ierr = CeedBasisGetDimension(basisin, &dim); PCeedChk(ierr); -+ ierr = CeedBasisGetNumNodes1D(basisin, &P1d); PCeedChk(ierr); - ierr = CeedBasisATPMGCoarseToFine(ceed, P1d, dim, order_reduction, -- basisc2f); CeedChk(ierr); -+ basisc2f); PCeedChk(ierr); - return 0; - } - -@@ -518,38 +516,38 @@ int CeedBasisATPMGCoarsen(CeedBasis basisin, - { - int ierr; - Ceed ceed; -- ierr = CeedBasisGetCeed(basisin, &ceed); CeedChk(ierr); -+ ierr = CeedBasisGetCeed(basisin, &ceed); PCeedChk(ierr); - - CeedInt dim, ncomp, P1d, Q1d; -- ierr = CeedBasisGetDimension(basisin, &dim); CeedChk(ierr); -- ierr = CeedBasisGetNumComponents(basisin, &ncomp); CeedChk(ierr); -- ierr = CeedBasisGetNumNodes1D(basisin, &P1d); CeedChk(ierr); -- ierr = CeedBasisGetNumQuadraturePoints1D(basisin, &Q1d); CeedChk(ierr); -+ ierr = CeedBasisGetDimension(basisin, &dim); PCeedChk(ierr); -+ ierr = CeedBasisGetNumComponents(basisin, &ncomp); PCeedChk(ierr); -+ ierr = CeedBasisGetNumNodes1D(basisin, &P1d); PCeedChk(ierr); -+ ierr = CeedBasisGetNumQuadraturePoints1D(basisin, &Q1d); PCeedChk(ierr); - - CeedInt coarse_P1d = P1d - order_reduction; - - const CeedScalar *interp1d; -- ierr = CeedBasisGetInterp1D(basisin, &interp1d); CeedChk(ierr); -+ ierr = CeedBasisGetInterp1D(basisin, &interp1d); PCeedChk(ierr); - const CeedScalar * grad1d; -- ierr = CeedBasisGetGrad1D(basisin, &grad1d); CeedChk(ierr); -+ ierr = CeedBasisGetGrad1D(basisin, &grad1d); PCeedChk(ierr); - - CeedScalar * coarse_interp1d = new CeedScalar[coarse_P1d * Q1d]; - CeedScalar * coarse_grad1d = new CeedScalar[coarse_P1d * Q1d]; - CeedScalar * fine_nodal_points = new CeedScalar[P1d]; - - // these things are in [-1, 1], not [0, 1], which matters -- // (todo: how can we determine this or something related, algebraically?) -+ // (TODO: how can we determine this or something related, algebraically?) - /* one way you might be able to tell is to just run this algorithm - with coarse_P1d = 2 (i.e., linear) and look for symmetry in the coarse - basis matrix? */ -- ierr = CeedLobattoQuadrature(P1d, fine_nodal_points, NULL); CeedChk(ierr); -+ ierr = CeedLobattoQuadrature(P1d, fine_nodal_points, NULL); PCeedChk(ierr); - for (int i = 0; i < P1d; ++i) - { - fine_nodal_points[i] = 0.5 * fine_nodal_points[i] + 0.5; // cheating - } - - const CeedScalar *interp_ctof; -- ierr = CeedBasisGetInterp1D(basisc2f, &interp_ctof); CeedChk(ierr); -+ ierr = CeedBasisGetInterp1D(basisc2f, &interp_ctof); PCeedChk(ierr); - - for (int i = 0; i < Q1d; ++i) - { -@@ -568,12 +566,12 @@ int CeedBasisATPMGCoarsen(CeedBasis basisin, - } - - const CeedScalar * qref1d; -- ierr = CeedBasisGetQRef(basisin, &qref1d); CeedChk(ierr); -+ ierr = CeedBasisGetQRef(basisin, &qref1d); PCeedChk(ierr); - const CeedScalar * qweight1d; -- ierr = CeedBasisGetQWeights(basisin, &qweight1d); CeedChk(ierr); -+ ierr = CeedBasisGetQWeights(basisin, &qweight1d); PCeedChk(ierr); - ierr = CeedBasisCreateTensorH1(ceed, dim, ncomp, - coarse_P1d, Q1d, coarse_interp1d, coarse_grad1d, -- qref1d, qweight1d, basisout); CeedChk(ierr); -+ qref1d, qweight1d, basisout); PCeedChk(ierr); - - delete [] fine_nodal_points; - delete [] coarse_interp1d; -@@ -593,19 +591,19 @@ int CeedATPMGOperator(CeedOperator oper, int order_reduction, - - int ierr; - Ceed ceed; -- ierr = CeedOperatorGetCeed(oper, &ceed); CeedChk(ierr); -+ ierr = CeedOperatorGetCeed(oper, &ceed); PCeedChk(ierr); - - CeedQFunction qf; -- ierr = CeedOperatorGetQFunction(oper, &qf); CeedChk(ierr); -+ ierr = CeedOperatorGetQFunction(oper, &qf); PCeedChk(ierr); - CeedInt numinputfields, numoutputfields; - CeedQFunctionField *inputqfields, *outputqfields; - ierr = CeedQFunctionGetFields(qf, &numinputfields, &inputqfields, - &numoutputfields, &outputqfields); -- CeedChk(ierr); -+ PCeedChk(ierr); - CeedOperatorField *inputfields, *outputfields; - ierr = CeedOperatorGetFields(oper, &numinputfields, &inputfields, - &numoutputfields, &outputfields); -- CeedChk(ierr); -+ PCeedChk(ierr); - - CeedElemRestriction * er_input = new CeedElemRestriction[numinputfields]; - CeedElemRestriction * er_output = new CeedElemRestriction[numoutputfields]; -@@ -619,10 +617,11 @@ int CeedATPMGOperator(CeedOperator oper, int order_reduction, - for (int i = 0; i < numinputfields; ++i) - { - ierr = CeedOperatorFieldGetElemRestriction(inputfields[i], -- &er_input[i]); CeedChk(ierr); -- ierr = CeedOperatorFieldGetVector(inputfields[i], &if_vector[i]); CeedChk(ierr); -+ &er_input[i]); PCeedChk(ierr); -+ ierr = CeedOperatorFieldGetVector(inputfields[i], &if_vector[i]); -+ PCeedChk(ierr); - ierr = CeedOperatorFieldGetBasis(inputfields[i], &basis_input[i]); -- CeedChk(ierr); -+ PCeedChk(ierr); - if (if_vector[i] == CEED_VECTOR_ACTIVE) - { - if (active_input_basis < 0) -@@ -638,11 +637,11 @@ int CeedATPMGOperator(CeedOperator oper, int order_reduction, - for (int i = 0; i < numoutputfields; ++i) - { - ierr = CeedOperatorFieldGetElemRestriction(outputfields[i], -- &er_output[i]); CeedChk(ierr); -+ &er_output[i]); PCeedChk(ierr); - ierr = CeedOperatorFieldGetVector(outputfields[i], &of_vector[i]); -- CeedChk(ierr); -+ PCeedChk(ierr); - ierr = CeedOperatorFieldGetBasis(outputfields[i], &basis_output[i]); -- CeedChk(ierr); -+ PCeedChk(ierr); - if (of_vector[i] == CEED_VECTOR_ACTIVE) - { - // should already be coarsened -@@ -659,36 +658,36 @@ int CeedATPMGOperator(CeedOperator oper, int order_reduction, - - CeedOperator coper; - ierr = CeedOperatorCreate(ceed, qf, CEED_QFUNCTION_NONE, CEED_QFUNCTION_NONE, -- &coper); CeedChk(ierr); -+ &coper); PCeedChk(ierr); - - for (int i = 0; i < numinputfields; ++i) - { - char * fieldname; -- ierr = CeedQFunctionFieldGetName(inputqfields[i], &fieldname); CeedChk(ierr); -+ ierr = CeedQFunctionFieldGetName(inputqfields[i], &fieldname); PCeedChk(ierr); - if (if_vector[i] == CEED_VECTOR_ACTIVE) - { - ierr = CeedOperatorSetField(coper, fieldname, coarse_er, cbasis, -- if_vector[i]); CeedChk(ierr); -+ if_vector[i]); PCeedChk(ierr); - } - else - { - ierr = CeedOperatorSetField(coper, fieldname, er_input[i], basis_input[i], -- if_vector[i]); CeedChk(ierr); -+ if_vector[i]); PCeedChk(ierr); - } - } - for (int i = 0; i < numoutputfields; ++i) - { - char * fieldname; -- ierr = CeedQFunctionFieldGetName(outputqfields[i], &fieldname); CeedChk(ierr); -+ ierr = CeedQFunctionFieldGetName(outputqfields[i], &fieldname); PCeedChk(ierr); - if (of_vector[i] == CEED_VECTOR_ACTIVE) - { - ierr = CeedOperatorSetField(coper, fieldname, coarse_er, cbasis, -- of_vector[i]); CeedChk(ierr); -+ of_vector[i]); PCeedChk(ierr); - } - else - { - ierr = CeedOperatorSetField(coper, fieldname, er_output[i], basis_output[i], -- of_vector[i]); CeedChk(ierr); -+ of_vector[i]); PCeedChk(ierr); - } - } - delete [] er_input; -@@ -711,21 +710,21 @@ int CeedATPMGOperator(CeedOperator oper, int order_reduction, - int ierr; - - CeedQFunction qf; -- ierr = CeedOperatorGetQFunction(oper, &qf); CeedChk(ierr); -+ ierr = CeedOperatorGetQFunction(oper, &qf); PCeedChk(ierr); - CeedInt numinputfields, numoutputfields; - CeedOperatorField *inputfields; - ierr = CeedOperatorGetFields(oper, &numinputfields, &inputfields, - &numoutputfields, NULL); -- CeedChk(ierr); -+ PCeedChk(ierr); - - CeedBasis basis; -- ierr = CeedOperatorGetActiveBasis(oper, &basis); CeedChk(ierr); -+ ierr = CeedOperatorGetActiveBasis(oper, &basis); PCeedChk(ierr); - ierr = CeedBasisATPMGCoarseToFine(basis, basis_ctof_out, order_reduction); -- CeedChk(ierr); -+ PCeedChk(ierr); - ierr = CeedBasisATPMGCoarsen(basis, *basis_ctof_out, coarse_basis_out, -- order_reduction); CeedChk(ierr); -+ order_reduction); PCeedChk(ierr); - ierr = CeedATPMGOperator(oper, order_reduction, coarse_er, *coarse_basis_out, -- *basis_ctof_out, out); CeedChk(ierr); -+ *basis_ctof_out, out); PCeedChk(ierr); - return 0; - } - -@@ -734,11 +733,11 @@ int CeedOperatorGetOrder(CeedOperator oper, CeedInt * order) - int ierr; - - CeedOperatorField active_field; -- ierr = CeedOperatorGetActiveField(oper, &active_field); CeedChk(ierr); -+ ierr = CeedOperatorGetActiveField(oper, &active_field); PCeedChk(ierr); - CeedBasis basis; -- ierr = CeedOperatorFieldGetBasis(active_field, &basis); CeedChk(ierr); -+ ierr = CeedOperatorFieldGetBasis(active_field, &basis); PCeedChk(ierr); - int P1d; -- ierr = CeedBasisGetNumNodes1D(basis, &P1d); CeedChk(ierr); -+ ierr = CeedBasisGetNumNodes1D(basis, &P1d); PCeedChk(ierr); - *order = P1d - 1; - - return 0; -@@ -753,13 +752,13 @@ int CeedATPMGBundle(CeedOperator oper, int order_reduction, - { - int ierr; - CeedInt order; -- ierr = CeedOperatorGetOrder(oper, &order); CeedChk(ierr); -+ ierr = CeedOperatorGetOrder(oper, &order); PCeedChk(ierr); - CeedElemRestriction ho_er; -- ierr = CeedOperatorGetActiveElemRestriction(oper, &ho_er); CeedChk(ierr); -+ ierr = CeedOperatorGetActiveElemRestriction(oper, &ho_er); PCeedChk(ierr); - ierr = CeedATPMGElemRestriction(order, order_reduction, ho_er, er_out, dof_map); -- CeedChk(ierr); -+ PCeedChk(ierr); - ierr = CeedATPMGOperator(oper, order_reduction, *er_out, coarse_basis_out, -- basis_ctof_out, coarse_oper); CeedChk(ierr); -+ basis_ctof_out, coarse_oper); PCeedChk(ierr); - return 0; - } - -diff --git a/fem/ceed/solvers/solvers-atpmg.hpp b/fem/ceed/solvers/solvers-atpmg.hpp -index 8d85b1840..62a1640fe 100644 ---- a/fem/ceed/solvers/solvers-atpmg.hpp -+++ b/fem/ceed/solvers/solvers-atpmg.hpp -@@ -25,7 +25,7 @@ namespace ceed - /** @brief Take given (high-order) CeedElemRestriction and make a new - CeedElemRestriction, which corresponds to a lower-order problem. - -- Assumes a Gauss-Lobatto basis and tensor product elements, and assumes that -+ Assumes a Gauss-Lobatto basis and tensor-product elements, and assumes that - the nodes in er_in are ordered in a tensor-product way. - - This is a setup routine that operates on the host. -diff --git a/fem/fe/fe_base.cpp b/fem/fe/fe_base.cpp -index 6f665bfa3..ded2ffc9c 100644 ---- a/fem/fe/fe_base.cpp -+++ b/fem/fe/fe_base.cpp -@@ -401,7 +401,7 @@ const DofToQuad &FiniteElement::GetDofToQuad(const IntegrationRule &ir, - } - } - } -- else -+ else if (range_type == VECTOR) - { - d2q->B.SetSize(nqpt*dim*dof); - d2q->Bt.SetSize(dof*nqpt*dim); -@@ -419,6 +419,10 @@ const DofToQuad &FiniteElement::GetDofToQuad(const IntegrationRule &ir, - } - } - } -+ else -+ { -+ // Skip B and Bt for unknown range type -+ } - switch (deriv_type) - { - case GRAD: -@@ -472,7 +476,7 @@ const DofToQuad &FiniteElement::GetDofToQuad(const IntegrationRule &ir, - { - for (int j = 0; j < dof; j++) - { -- d2q->G[i+nqpt*(d+dim*j)] = d2q->Gt[j+dof*(i+nqpt*d)] = curlshape(j, d); -+ d2q->G[i+nqpt*(d+cdim*j)] = d2q->Gt[j+dof*(i+nqpt*d)] = curlshape(j, d); - } - } - } -@@ -480,7 +484,8 @@ const DofToQuad &FiniteElement::GetDofToQuad(const IntegrationRule &ir, - } - case NONE: - default: -- MFEM_ABORT("invalid finite element derivative type"); -+ // Skip G and Gt for unknown derivative type -+ break; - } - dof2quad_array.Append(d2q); - return *d2q; -diff --git a/fem/fe/fe_base.hpp b/fem/fe/fe_base.hpp -index b533525f0..6582140df 100644 ---- a/fem/fe/fe_base.hpp -+++ b/fem/fe/fe_base.hpp -@@ -1288,9 +1288,9 @@ public: - const DofToQuad &GetDofToQuad(const IntegrationRule &ir, - DofToQuad::Mode mode) const override - { -- MFEM_VERIFY(mode != DofToQuad::FULL, "invalid mode requested"); -- return GetTensorDofToQuad(*this, ir, mode, basis1d, true, -- dof2quad_array); -+ return (mode == DofToQuad::FULL) ? -+ FiniteElement::GetDofToQuad(ir, mode) : -+ GetTensorDofToQuad(*this, ir, mode, basis1d, true, dof2quad_array); - } - - const DofToQuad &GetDofToQuadOpen(const IntegrationRule &ir, -diff --git a/fem/fespace.cpp b/fem/fespace.cpp -index cb82c6008..59b4b7e31 100644 ---- a/fem/fespace.cpp -+++ b/fem/fespace.cpp -@@ -1599,7 +1599,7 @@ void FiniteElementSpace::RefinementOperator - - subY.SetSize(lP.Height()); - -- DofTransformation *doftrans = fespace->GetElementDofs(k, dofs); -+ const DofTransformation *doftrans = fespace->GetElementDofs(k, dofs); - old_elem_dof->GetRow(emb.parent, old_dofs); - - if (!doftrans) -@@ -1620,9 +1620,9 @@ void FiniteElementSpace::RefinementOperator - old_elem_fos->GetRow(emb.parent, old_Fo); - old_DoFTrans[geom]->SetFaceOrientations(old_Fo); - -- DofTransformation *new_doftrans = NULL; -- VDofTransformation *vdoftrans = -- dynamic_cast(doftrans); -+ const DofTransformation *new_doftrans = NULL; -+ const VDofTransformation *vdoftrans = -+ dynamic_cast(doftrans); - if (vdoftrans) - { - new_doftrans = doftrans; -@@ -1675,7 +1675,7 @@ void FiniteElementSpace::RefinementOperator - const Geometry::Type geom = mesh_ref->GetElementBaseGeometry(k); - const DenseMatrix &lP = localP[geom](emb.matrix); - -- DofTransformation * doftrans = fespace->GetElementDofs(k, f_dofs); -+ const DofTransformation * doftrans = fespace->GetElementDofs(k, f_dofs); - old_elem_dof->GetRow(emb.parent, c_dofs); - - if (!doftrans) -@@ -1710,9 +1710,9 @@ void FiniteElementSpace::RefinementOperator - old_elem_fos->GetRow(emb.parent, old_Fo); - old_DoFTrans[geom]->SetFaceOrientations(old_Fo); - -- DofTransformation *new_doftrans = NULL; -- VDofTransformation *vdoftrans = -- dynamic_cast(doftrans); -+ const DofTransformation *new_doftrans = NULL; -+ const VDofTransformation *vdoftrans = -+ dynamic_cast(doftrans); - if (vdoftrans) - { - new_doftrans = doftrans; -diff --git a/fem/fespace.hpp b/fem/fespace.hpp -index 00b290c09..588de9199 100644 ---- a/fem/fespace.hpp -+++ b/fem/fespace.hpp -@@ -271,7 +271,7 @@ protected: - int own_ext; - mutable Array face_to_be; // NURBS FE space only - -- Array DoFTrans; -+ Array DoFTrans; - mutable VDofTransformation VDoFTrans; - - /** Matrix representing the prolongation from the global conforming dofs to -@@ -1268,11 +1268,9 @@ public: - /// @brief Return true if the mesh contains only one topology and the elements are tensor elements. - inline bool UsesTensorBasis(const FiniteElementSpace& fes) - { -- Mesh & mesh = *fes.GetMesh(); -- const bool mixed = mesh.GetNumGeometries(mesh.Dimension()) > 1; -- // Potential issue: empty local mesh --> no element 0. -- return !mixed && -- dynamic_cast(fes.GetFE(0))!=nullptr; -+ Mesh &mesh = *fes.GetMesh(); -+ return mesh.GetNE() > 0 && mesh.GetNumGeometries(mesh.Dimension()) == 1 && -+ dynamic_cast(fes.GetFE(0)) != nullptr; - } - - } -diff --git a/fem/hybridization.cpp b/fem/hybridization.cpp -index f9d4699c2..06934064c 100644 ---- a/fem/hybridization.cpp -+++ b/fem/hybridization.cpp -@@ -120,10 +120,10 @@ void Hybridization::ConstructC() - vdofs[s1+j] = o2 + j; - } - c_fes->GetFaceVDofs(i, c_vdofs); -- c_bfi->AssembleFaceMatrix(*c_fes->GetFaceElement(i), -- *fes->GetFE(FTr->Elem1No), -- *fes->GetFE(FTr->Elem2No), -- *FTr, elmat); -+ c_bfi->AssembleFaceMatrix2(*c_fes->GetFaceElement(i), -+ *fes->GetFE(FTr->Elem1No), -+ *fes->GetFE(FTr->Elem2No), -+ *FTr, elmat); - // zero-out small elements in elmat - elmat.Threshold(1e-12 * elmat.MaxMaxNorm()); - Ct->AddSubMatrix(vdofs, c_vdofs, elmat, skip_zeros); -@@ -165,7 +165,7 @@ void Hybridization::ConstructC() - vdofs[j] = o1 + j; - } - fe = fes->GetFE(FTr->Elem1No); -- c_bfi->AssembleFaceMatrix(*face_fe, *fe, *fe, *FTr, elmat); -+ c_bfi->AssembleFaceMatrix2(*face_fe, *fe, *fe, *FTr, elmat); - // zero-out small elements in elmat - elmat.Threshold(1e-12 * elmat.MaxMaxNorm()); - Ct->AddSubMatrix(vdofs, c_vdofs, elmat, skip_zeros); -diff --git a/fem/integ/bilininteg_br2.cpp b/fem/integ/bilininteg_br2.cpp -index 159947029..846d72c7c 100644 ---- a/fem/integ/bilininteg_br2.cpp -+++ b/fem/integ/bilininteg_br2.cpp -@@ -152,20 +152,7 @@ void DGDiffusionBR2Integrator::AssembleFaceMatrix( - elmat.SetSize(ndofs); - elmat = 0.0; - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int order; -- if (ndof2) -- { -- order = 2*std::max(el1.GetOrder(), el2.GetOrder()); -- } -- else -- { -- order = 2*el1.GetOrder(); -- } -- ir = &IntRules.Get(Trans.FaceGeom, order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el1, el2, Trans); - - for (int p = 0; p < ir->GetNPoints(); p++) - { -diff --git a/fem/integ/bilininteg_convection_mf.cpp b/fem/integ/bilininteg_convection_mf.cpp -index bbaf82788..c7078d407 100644 ---- a/fem/integ/bilininteg_convection_mf.cpp -+++ b/fem/integ/bilininteg_convection_mf.cpp -@@ -19,31 +19,42 @@ namespace mfem - - void ConvectionIntegrator::AssembleMF(const FiniteElementSpace &fes) - { -- // Assuming the same element type - Mesh *mesh = fes.GetMesh(); - if (mesh->GetNE() == 0) { return; } -- const FiniteElement &el = *fes.GetFE(0); -- ElementTransformation &Trans = *fes.GetElementTransformation(0); -- const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Trans); - if (DeviceCanUseCeed()) - { - delete ceedOp; -- const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 || -- fes.IsVariableOrder(); -- if (mixed) -- { -- ceedOp = new ceed::MixedMFConvectionIntegrator(*this, fes, Q, alpha); -- } -- else -- { -- ceedOp = new ceed::MFConvectionIntegrator(fes, *ir, Q, alpha); -- } -+ ceedOp = new ceed::MFConvectionIntegrator(*this, fes, Q, alpha); - return; - } -+ -+ // Assuming the same element type -+ // const FiniteElement &el = *fes.GetFE(0); -+ // ElementTransformation &T = *fes.GetElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); - MFEM_ABORT("Error: ConvectionIntegrator::AssembleMF only implemented with" - " libCEED"); - } - -+void ConvectionIntegrator::AssembleMFBoundary(const FiniteElementSpace &fes) -+{ -+ Mesh *mesh = fes.GetMesh(); -+ if (mesh->GetNBE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ ceedOp = new ceed::MFConvectionIntegrator(*this, fes, Q, alpha, true); -+ return; -+ } -+ -+ // Assuming the same element type -+ // const FiniteElement &el = *fes.GetBE(0); -+ // ElementTransformation &T = *fes.GetBdrElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); -+ MFEM_ABORT("Error: ConvectionIntegrator::AssembleMFBoundary only implemented with" -+ " libCEED"); -+} -+ - void ConvectionIntegrator::AssembleDiagonalMF(Vector &diag) - { - if (DeviceCanUseCeed()) -diff --git a/fem/integ/bilininteg_convection_pa.cpp b/fem/integ/bilininteg_convection_pa.cpp -index 25928f002..74156c726 100644 ---- a/fem/integ/bilininteg_convection_pa.cpp -+++ b/fem/integ/bilininteg_convection_pa.cpp -@@ -116,26 +116,19 @@ void ConvectionIntegrator::AssemblePA(const FiniteElementSpace &fes) - { - const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ? - Device::GetDeviceMemoryType() : pa_mt; -- // Assumes tensor-product elements - Mesh *mesh = fes.GetMesh(); -- const FiniteElement &el = *fes.GetFE(0); -- ElementTransformation &Trans = *fes.GetElementTransformation(0); -- const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Trans); -+ if (mesh->GetNE() == 0) { return; } - if (DeviceCanUseCeed()) - { - delete ceedOp; -- const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 || -- fes.IsVariableOrder(); -- if (mixed) -- { -- ceedOp = new ceed::MixedPAConvectionIntegrator(*this, fes, Q, alpha); -- } -- else -- { -- ceedOp = new ceed::PAConvectionIntegrator(fes, *ir, Q, alpha); -- } -+ ceedOp = new ceed::PAConvectionIntegrator(*this, fes, Q, alpha); - return; - } -+ -+ // Assumes tensor-product elements -+ const FiniteElement &el = *fes.GetFE(0); -+ ElementTransformation &T = *fes.GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); - const int dims = el.GetDim(); - const int symmDims = dims; - nq = ir->GetNPoints(); -@@ -166,6 +159,25 @@ void ConvectionIntegrator::AssemblePA(const FiniteElementSpace &fes) - } - } - -+void ConvectionIntegrator::AssemblePABoundary(const FiniteElementSpace &fes) -+{ -+ Mesh *mesh = fes.GetMesh(); -+ if (mesh->GetNBE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ ceedOp = new ceed::PAConvectionIntegrator(*this, fes, Q, alpha, true); -+ return; -+ } -+ -+ // Assuming the same element type -+ // const FiniteElement &el = *fes.GetBE(0); -+ // ElementTransformation &T = *fes.GetBdrElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); -+ MFEM_ABORT("Error: ConvectionIntegrator::AssemblePABoundary only implemented with" -+ " libCEED"); -+} -+ - void ConvectionIntegrator::AssembleDiagonalPA(Vector &diag) - { - if (DeviceCanUseCeed()) -diff --git a/fem/integ/bilininteg_curlcurl_mf.cpp b/fem/integ/bilininteg_curlcurl_mf.cpp -new file mode 100644 -index 000000000..54c88055b ---- /dev/null -+++ b/fem/integ/bilininteg_curlcurl_mf.cpp -@@ -0,0 +1,89 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../ceed/integrators/curlcurl/curlcurl.hpp" -+ -+using namespace std; -+ -+namespace mfem -+{ -+ -+void CurlCurlIntegrator::AssembleMF(const FiniteElementSpace &fes) -+{ -+ Mesh *mesh = fes.GetMesh(); -+ if (mesh->GetNE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ if (MQ) { ceedOp = new ceed::MFCurlCurlIntegrator(*this, fes, MQ); } -+ else if (DQ) { ceedOp = new ceed::MFCurlCurlIntegrator(*this, fes, DQ); } -+ else { ceedOp = new ceed::MFCurlCurlIntegrator(*this, fes, Q); } -+ return; -+ } -+ -+ // Assumes tensor-product elements -+ // const FiniteElement &el = *fes.GetFE(0); -+ // ElementTransformation &T = *mesh->GetElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(*el, T); -+ MFEM_ABORT("Error: CurlCurlIntegrator::AssembleMF only implemented with" -+ " libCEED"); -+} -+ -+void CurlCurlIntegrator::AssembleMFBoundary(const FiniteElementSpace &fes) -+{ -+ Mesh *mesh = fes.GetMesh(); -+ if (mesh->GetNBE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ if (MQ) { ceedOp = new ceed::MFCurlCurlIntegrator(*this, fes, MQ, true); } -+ else if (DQ) { ceedOp = new ceed::MFCurlCurlIntegrator(*this, fes, DQ, true); } -+ else { ceedOp = new ceed::MFCurlCurlIntegrator(*this, fes, Q, true); } -+ return; -+ } -+ -+ // Assumes tensor-product elements -+ // const FiniteElement &el = *fes.GetBE(0); -+ // ElementTransformation &T = *mesh->GetBdrElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(*el, T); -+ MFEM_ABORT("Error: CurlCurlIntegrator::AssembleMFBoundary only implemented with" -+ " libCEED"); -+} -+ -+void CurlCurlIntegrator::AssembleDiagonalMF(Vector &diag) -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->GetDiagonal(diag); -+ } -+ else -+ { -+ MFEM_ABORT("Error: CurlCurlIntegrator::AssembleDiagonalMF only" -+ " implemented with libCEED"); -+ } -+} -+ -+void CurlCurlIntegrator::AddMultMF(const Vector &x, Vector &y) const -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->AddMult(x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Error: CurlCurlIntegrator::AddMultMF only implemented with" -+ " libCEED"); -+ } -+} -+ -+} -diff --git a/fem/integ/bilininteg_curlcurl_pa.cpp b/fem/integ/bilininteg_curlcurl_pa.cpp -index 3d12d978a..7b17fc94d 100644 ---- a/fem/integ/bilininteg_curlcurl_pa.cpp -+++ b/fem/integ/bilininteg_curlcurl_pa.cpp -@@ -13,6 +13,7 @@ - #include "../bilininteg.hpp" - #include "../gridfunc.hpp" - #include "../qfunction.hpp" -+#include "../ceed/integrators/curlcurl/curlcurl.hpp" - #include "bilininteg_hcurl_kernels.hpp" - - namespace mfem -@@ -20,32 +21,35 @@ namespace mfem - - void CurlCurlIntegrator::AssemblePA(const FiniteElementSpace &fes) - { -- // Assumes tensor-product elements - Mesh *mesh = fes.GetMesh(); -- const FiniteElement *fel = fes.GetFE(0); -+ if (mesh->GetNE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ if (MQ) { ceedOp = new ceed::PACurlCurlIntegrator(*this, fes, MQ); } -+ else if (DQ) { ceedOp = new ceed::PACurlCurlIntegrator(*this, fes, DQ); } -+ else { ceedOp = new ceed::PACurlCurlIntegrator(*this, fes, Q); } -+ return; -+ } - -+ // Assumes tensor-product elements -+ const FiniteElement *fel = fes.GetFE(0); - const VectorTensorFiniteElement *el = - dynamic_cast(fel); - MFEM_VERIFY(el != NULL, "Only VectorTensorFiniteElement is supported!"); -- -- const IntegrationRule *ir -- = IntRule ? IntRule : &MassIntegrator::GetRule(*el, *el, -- *mesh->GetElementTransformation(0)); -- -+ ElementTransformation &T = *mesh->GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(*el, T); - const int dims = el->GetDim(); - MFEM_VERIFY(dims == 2 || dims == 3, ""); -- - nq = ir->GetNPoints(); - dim = mesh->Dimension(); - MFEM_VERIFY(dim == 2 || dim == 3, ""); -- - ne = fes.GetNE(); - geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS); - mapsC = &el->GetDofToQuad(*ir, DofToQuad::TENSOR); - mapsO = &el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); - dofs1D = mapsC->ndof; - quad1D = mapsC->nqpt; -- - MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, ""); - - QuadratureSpace qs(*mesh, *ir); -@@ -78,131 +82,153 @@ void CurlCurlIntegrator::AssemblePA(const FiniteElementSpace &fes) - } - } - --void CurlCurlIntegrator::AssembleDiagonalPA(Vector& diag) -+void CurlCurlIntegrator::AssemblePABoundary(const FiniteElementSpace &fes) - { -- if (dim == 3) -+ Mesh *mesh = fes.GetMesh(); -+ if (mesh->GetNBE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ if (MQ) { ceedOp = new ceed::PACurlCurlIntegrator(*this, fes, MQ, true); } -+ else if (DQ) { ceedOp = new ceed::PACurlCurlIntegrator(*this, fes, DQ, true); } -+ else { ceedOp = new ceed::PACurlCurlIntegrator(*this, fes, Q, true); } -+ return; -+ } -+ -+ // Assumes tensor-product elements -+ // const FiniteElement &el = *fes.GetBE(0); -+ // ElementTransformation &T = *mesh->GetBdrElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(*el, T); -+ MFEM_ABORT("Error: CurlCurlIntegrator::AssemblePABoundary only implemented with" -+ " libCEED"); -+} -+ -+void CurlCurlIntegrator::AssembleDiagonalPA(Vector &diag) -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->GetDiagonal(diag); -+ } -+ else - { -- if (Device::Allows(Backend::DEVICE_MASK)) -+ if (dim == 3) - { -- const int ID = (dofs1D << 4) | quad1D; -- switch (ID) -+ if (Device::Allows(Backend::DEVICE_MASK)) - { -- case 0x23: -- return internal::SmemPACurlCurlAssembleDiagonal3D<2,3>( -- dofs1D, -- quad1D, -- symmetric, ne, -- mapsO->B, mapsC->B, -- mapsO->G, mapsC->G, -- pa_data, diag); -- case 0x34: -- return internal::SmemPACurlCurlAssembleDiagonal3D<3,4>( -- dofs1D, -- quad1D, -- symmetric, ne, -- mapsO->B, mapsC->B, -- mapsO->G, mapsC->G, -- pa_data, diag); -- case 0x45: -- return internal::SmemPACurlCurlAssembleDiagonal3D<4,5>( -- dofs1D, -- quad1D, -- symmetric, ne, -- mapsO->B, mapsC->B, -- mapsO->G, mapsC->G, -- pa_data, diag); -- case 0x56: -- return internal::SmemPACurlCurlAssembleDiagonal3D<5,6>( -- dofs1D, -- quad1D, -- symmetric, ne, -- mapsO->B, mapsC->B, -- mapsO->G, mapsC->G, -- pa_data, diag); -- default: -- return internal::SmemPACurlCurlAssembleDiagonal3D( -- dofs1D, quad1D, -- symmetric, ne, -- mapsO->B, mapsC->B, -- mapsO->G, mapsC->G, -- pa_data, diag); -+ const int ID = (dofs1D << 4) | quad1D; -+ switch (ID) -+ { -+ case 0x23: -+ return internal::SmemPACurlCurlAssembleDiagonal3D<2,3>( -+ dofs1D, quad1D, symmetric, ne, -+ mapsO->B, mapsC->B, -+ mapsO->G, mapsC->G, -+ pa_data, diag); -+ case 0x34: -+ return internal::SmemPACurlCurlAssembleDiagonal3D<3,4>( -+ dofs1D, quad1D, symmetric, ne, -+ mapsO->B, mapsC->B, -+ mapsO->G, mapsC->G, -+ pa_data, diag); -+ case 0x45: -+ return internal::SmemPACurlCurlAssembleDiagonal3D<4,5>( -+ dofs1D, quad1D, symmetric, ne, -+ mapsO->B, mapsC->B, -+ mapsO->G, mapsC->G, -+ pa_data, diag); -+ case 0x56: -+ return internal::SmemPACurlCurlAssembleDiagonal3D<5,6>( -+ dofs1D, quad1D, symmetric, ne, -+ mapsO->B, mapsC->B, -+ mapsO->G, mapsC->G, -+ pa_data, diag); -+ default: -+ return internal::SmemPACurlCurlAssembleDiagonal3D( -+ dofs1D, quad1D, symmetric, ne, -+ mapsO->B, mapsC->B, -+ mapsO->G, mapsC->G, -+ pa_data, diag); -+ } - } -+ else -+ { -+ internal::PACurlCurlAssembleDiagonal3D(dofs1D, quad1D, symmetric, ne, -+ mapsO->B, mapsC->B, -+ mapsO->G, mapsC->G, -+ pa_data, diag); -+ } -+ } -+ else if (dim == 2) -+ { -+ internal::PACurlCurlAssembleDiagonal2D(dofs1D, quad1D, ne, -+ mapsO->B, mapsC->G, pa_data, diag); - } - else - { -- internal::PACurlCurlAssembleDiagonal3D(dofs1D, quad1D, symmetric, ne, -- mapsO->B, mapsC->B, -- mapsO->G, mapsC->G, -- pa_data, diag); -+ MFEM_ABORT("Unsupported dimension!"); - } - } -- else if (dim == 2) -- { -- internal::PACurlCurlAssembleDiagonal2D(dofs1D, quad1D, ne, -- mapsO->B, mapsC->G, pa_data, diag); -- } -- else -- { -- MFEM_ABORT("Unsupported dimension!"); -- } - } - - void CurlCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const - { -- if (dim == 3) -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->AddMult(x, y); -+ } -+ else - { -- if (Device::Allows(Backend::DEVICE_MASK)) -+ if (dim == 3) - { -- const int ID = (dofs1D << 4) | quad1D; -- switch (ID) -+ if (Device::Allows(Backend::DEVICE_MASK)) -+ { -+ const int ID = (dofs1D << 4) | quad1D; -+ switch (ID) -+ { -+ case 0x23: -+ return internal::SmemPACurlCurlApply3D<2,3>( -+ dofs1D, quad1D, symmetric, ne, -+ mapsO->B, mapsC->B, mapsO->Bt, mapsC->Bt, -+ mapsC->G, mapsC->Gt, pa_data, x, y); -+ case 0x34: -+ return internal::SmemPACurlCurlApply3D<3,4>( -+ dofs1D, quad1D, symmetric, ne, -+ mapsO->B, mapsC->B, mapsO->Bt, mapsC->Bt, -+ mapsC->G, mapsC->Gt, pa_data, x, y); -+ case 0x45: -+ return internal::SmemPACurlCurlApply3D<4,5>( -+ dofs1D, quad1D, symmetric, ne, -+ mapsO->B, mapsC->B, mapsO->Bt, mapsC->Bt, -+ mapsC->G, mapsC->Gt, pa_data, x, y); -+ case 0x56: -+ return internal::SmemPACurlCurlApply3D<5,6>( -+ dofs1D, quad1D, symmetric, ne, -+ mapsO->B, mapsC->B, mapsO->Bt, mapsC->Bt, -+ mapsC->G, mapsC->Gt, pa_data, x, y); -+ default: -+ return internal::SmemPACurlCurlApply3D( -+ dofs1D, quad1D, symmetric, ne, -+ mapsO->B, mapsC->B, mapsO->Bt, mapsC->Bt, -+ mapsC->G, mapsC->Gt, pa_data, x, y); -+ } -+ } -+ else - { -- case 0x23: -- return internal::SmemPACurlCurlApply3D<2,3>( -- dofs1D, quad1D, -- symmetric, ne, -- mapsO->B, mapsC->B, mapsO->Bt, mapsC->Bt, -- mapsC->G, mapsC->Gt, pa_data, x, y); -- case 0x34: -- return internal::SmemPACurlCurlApply3D<3,4>( -- dofs1D, quad1D, -- symmetric, ne, -- mapsO->B, mapsC->B, mapsO->Bt, mapsC->Bt, -- mapsC->G, mapsC->Gt, pa_data, x, y); -- case 0x45: -- return internal::SmemPACurlCurlApply3D<4,5>( -- dofs1D, quad1D, -- symmetric, ne, -- mapsO->B, mapsC->B, mapsO->Bt, mapsC->Bt, -- mapsC->G, mapsC->Gt, pa_data, x, y); -- case 0x56: -- return internal::SmemPACurlCurlApply3D<5,6>( -- dofs1D, quad1D, -- symmetric, ne, -- mapsO->B, mapsC->B, mapsO->Bt, mapsC->Bt, -- mapsC->G, mapsC->Gt, pa_data, x, y); -- default: -- return internal::SmemPACurlCurlApply3D( -- dofs1D, quad1D, symmetric, ne, -- mapsO->B, mapsC->B, mapsO->Bt, mapsC->Bt, -- mapsC->G, mapsC->Gt, pa_data, x, y); -+ internal::PACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B, mapsC->B, -+ mapsO->Bt, mapsC->Bt, mapsC->G, mapsC->Gt, -+ pa_data, x, y); - } - } -+ else if (dim == 2) -+ { -+ internal::PACurlCurlApply2D(dofs1D, quad1D, ne, mapsO->B, mapsO->Bt, -+ mapsC->G, mapsC->Gt, pa_data, x, y); -+ } - else - { -- internal::PACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B, mapsC->B, -- mapsO->Bt, mapsC->Bt, mapsC->G, mapsC->Gt, -- pa_data, x, y); -+ MFEM_ABORT("Unsupported dimension!"); - } - } -- else if (dim == 2) -- { -- internal::PACurlCurlApply2D(dofs1D, quad1D, ne, mapsO->B, mapsO->Bt, -- mapsC->G, mapsC->Gt, pa_data, x, y); -- } -- else -- { -- MFEM_ABORT("Unsupported dimension!"); -- } - } - - } // namespace mfem -diff --git a/fem/integ/bilininteg_dgtrace_pa.cpp b/fem/integ/bilininteg_dgtrace_pa.cpp -index f4b8d837c..284191c13 100644 ---- a/fem/integ/bilininteg_dgtrace_pa.cpp -+++ b/fem/integ/bilininteg_dgtrace_pa.cpp -@@ -123,9 +123,7 @@ void DGTraceIntegrator::SetupPA(const FiniteElementSpace &fes, FaceType type) - *fes.GetTraceElement(0, fes.GetMesh()->GetFaceGeometry(0)); - FaceElementTransformations &T0 = - *fes.GetMesh()->GetFaceElementTransformations(0); -- const IntegrationRule *ir = IntRule? -- IntRule: -- &GetRule(el.GetGeomType(), el.GetOrder(), T0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(*fes.GetFE(0), T0); - const int symmDims = 4; - nq = ir->GetNPoints(); - dim = mesh->Dimension(); -diff --git a/fem/integ/bilininteg_diffusion_mf.cpp b/fem/integ/bilininteg_diffusion_mf.cpp -index 0896b8bf9..0a39a442c 100644 ---- a/fem/integ/bilininteg_diffusion_mf.cpp -+++ b/fem/integ/bilininteg_diffusion_mf.cpp -@@ -18,33 +18,46 @@ namespace mfem - - void DiffusionIntegrator::AssembleMF(const FiniteElementSpace &fes) - { -- // Assuming the same element type - Mesh *mesh = fes.GetMesh(); - if (mesh->GetNE() == 0) { return; } -- const FiniteElement &el = *fes.GetFE(0); -- const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el); - if (DeviceCanUseCeed()) - { - delete ceedOp; -- MFEM_VERIFY(!VQ && !MQ, -- "Only scalar coefficient supported for DiffusionIntegrator" -- " with libCEED"); -- const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 || -- fes.IsVariableOrder(); -- if (mixed) -- { -- ceedOp = new ceed::MixedMFDiffusionIntegrator(*this, fes, Q); -- } -- else -- { -- ceedOp = new ceed::MFDiffusionIntegrator(fes, *ir, Q); -- } -+ if (MQ) { ceedOp = new ceed::MFDiffusionIntegrator(*this, fes, MQ); } -+ else if (VQ) { ceedOp = new ceed::MFDiffusionIntegrator(*this, fes, VQ); } -+ else { ceedOp = new ceed::MFDiffusionIntegrator(*this, fes, Q); } - return; - } -+ -+ // Assuming the same element type -+ // const FiniteElement &el = *fes.GetFE(0); -+ // ElementTransformation &T = *mesh->GetElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); - MFEM_ABORT("Error: DiffusionIntegrator::AssembleMF only implemented with" - " libCEED"); - } - -+void DiffusionIntegrator::AssembleMFBoundary(const FiniteElementSpace &fes) -+{ -+ Mesh *mesh = fes.GetMesh(); -+ if (mesh->GetNBE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ if (MQ) { ceedOp = new ceed::MFDiffusionIntegrator(*this, fes, MQ, true); } -+ else if (VQ) { ceedOp = new ceed::MFDiffusionIntegrator(*this, fes, VQ, true); } -+ else { ceedOp = new ceed::MFDiffusionIntegrator(*this, fes, Q, true); } -+ return; -+ } -+ -+ // Assuming the same element type -+ // const FiniteElement &el = *fes.GetBE(0); -+ // ElementTransformation &T = *mesh->GetBdrElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); -+ MFEM_ABORT("Error: DiffusionIntegrator::AssembleMFBoundary only implemented with" -+ " libCEED"); -+} -+ - void DiffusionIntegrator::AssembleDiagonalMF(Vector &diag) - { - if (DeviceCanUseCeed()) -diff --git a/fem/integ/bilininteg_diffusion_pa.cpp b/fem/integ/bilininteg_diffusion_pa.cpp -index a966c8520..7cf050870 100644 ---- a/fem/integ/bilininteg_diffusion_pa.cpp -+++ b/fem/integ/bilininteg_diffusion_pa.cpp -@@ -22,29 +22,21 @@ void DiffusionIntegrator::AssemblePA(const FiniteElementSpace &fes) - { - const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ? - Device::GetDeviceMemoryType() : pa_mt; -- // Assuming the same element type - Mesh *mesh = fes.GetMesh(); - if (mesh->GetNE() == 0) { return; } -- const FiniteElement &el = *fes.GetFE(0); -- const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el); - if (DeviceCanUseCeed()) - { - delete ceedOp; -- MFEM_VERIFY(!VQ && !MQ, -- "Only scalar coefficient supported for DiffusionIntegrator" -- " with libCEED"); -- const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 || -- fes.IsVariableOrder(); -- if (mixed) -- { -- ceedOp = new ceed::MixedPADiffusionIntegrator(*this, fes, Q); -- } -- else -- { -- ceedOp = new ceed::PADiffusionIntegrator(fes, *ir, Q); -- } -+ if (MQ) { ceedOp = new ceed::PADiffusionIntegrator(*this, fes, MQ); } -+ else if (VQ) { ceedOp = new ceed::PADiffusionIntegrator(*this, fes, VQ); } -+ else { ceedOp = new ceed::PADiffusionIntegrator(*this, fes, Q); } - return; - } -+ -+ // Assuming the same element type -+ const FiniteElement &el = *fes.GetFE(0); -+ ElementTransformation &T = *mesh->GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); - const int dims = el.GetDim(); - const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6 - const int nq = ir->GetNPoints(); -@@ -73,6 +65,27 @@ void DiffusionIntegrator::AssemblePA(const FiniteElementSpace &fes) - ir->GetWeights(), geom->J, coeff, pa_data); - } - -+void DiffusionIntegrator::AssemblePABoundary(const FiniteElementSpace &fes) -+{ -+ Mesh *mesh = fes.GetMesh(); -+ if (mesh->GetNBE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ if (MQ) { ceedOp = new ceed::PADiffusionIntegrator(*this, fes, MQ, true); } -+ else if (VQ) { ceedOp = new ceed::PADiffusionIntegrator(*this, fes, VQ, true); } -+ else { ceedOp = new ceed::PADiffusionIntegrator(*this, fes, Q, true); } -+ return; -+ } -+ -+ // Assuming the same element type -+ // const FiniteElement &el = *fes.GetBE(0); -+ // ElementTransformation &T = *mesh->GetBdrElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); -+ MFEM_ABORT("Error: DiffusionIntegrator::AssemblePABoundary only implemented with" -+ " libCEED"); -+} -+ - void DiffusionIntegrator::AssembleDiagonalPA(Vector &diag) - { - if (DeviceCanUseCeed()) -diff --git a/fem/integ/bilininteg_divdiv_mf.cpp b/fem/integ/bilininteg_divdiv_mf.cpp -new file mode 100644 -index 000000000..7d8ea409e ---- /dev/null -+++ b/fem/integ/bilininteg_divdiv_mf.cpp -@@ -0,0 +1,85 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../ceed/integrators/divdiv/divdiv.hpp" -+ -+using namespace std; -+ -+namespace mfem -+{ -+ -+void DivDivIntegrator::AssembleMF(const FiniteElementSpace &fes) -+{ -+ Mesh *mesh = fes.GetMesh(); -+ if (mesh->GetNE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ ceedOp = new ceed::MFDivDivIntegrator(*this, fes, Q); -+ return; -+ } -+ -+ // Assumes tensor-product elements -+ // const FiniteElement &el = *fes.GetFE(0); -+ // ElementTransformation &T = *mesh->GetElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(*el, T); -+ MFEM_ABORT("Error: DivDivIntegrator::AssembleMF only implemented with" -+ " libCEED"); -+} -+ -+void DivDivIntegrator::AssembleMFBoundary(const FiniteElementSpace &fes) -+{ -+ Mesh *mesh = fes.GetMesh(); -+ if (mesh->GetNBE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ ceedOp = new ceed::MFDivDivIntegrator(*this, fes, Q, true); -+ return; -+ } -+ -+ // Assumes tensor-product elements -+ // const FiniteElement &el = *fes.GetBE(0); -+ // ElementTransformation &T = *mesh->GetBdrElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(*el, T); -+ MFEM_ABORT("Error: DivDivIntegrator::AssembleMFBoundary only implemented with" -+ " libCEED"); -+} -+ -+void DivDivIntegrator::AssembleDiagonalMF(Vector &diag) -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->GetDiagonal(diag); -+ } -+ else -+ { -+ MFEM_ABORT("Error: DivDivIntegrator::AssembleDiagonalMF only" -+ " implemented with libCEED"); -+ } -+} -+ -+void DivDivIntegrator::AddMultMF(const Vector &x, Vector &y) const -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->AddMult(x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Error: DivDivIntegrator::AddMultMF only implemented with" -+ " libCEED"); -+ } -+} -+ -+} -diff --git a/fem/integ/bilininteg_divdiv_pa.cpp b/fem/integ/bilininteg_divdiv_pa.cpp -index 8abf233a7..ec85f6c22 100644 ---- a/fem/integ/bilininteg_divdiv_pa.cpp -+++ b/fem/integ/bilininteg_divdiv_pa.cpp -@@ -13,6 +13,7 @@ - #include "../bilininteg.hpp" - #include "../gridfunc.hpp" - #include "../qfunction.hpp" -+#include "../ceed/integrators/divdiv/divdiv.hpp" - #include "bilininteg_hdiv_kernels.hpp" - - namespace mfem -@@ -20,33 +21,34 @@ namespace mfem - - void DivDivIntegrator::AssemblePA(const FiniteElementSpace &fes) - { -- // Assumes tensor-product elements - Mesh *mesh = fes.GetMesh(); -- const FiniteElement *fel = fes.GetFE(0); -+ if (mesh->GetNE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ ceedOp = new ceed::PADivDivIntegrator(*this, fes, Q); -+ return; -+ } - -+ // Assumes tensor-product elements -+ const FiniteElement *fel = fes.GetFE(0); - const VectorTensorFiniteElement *el = - dynamic_cast(fel); - MFEM_VERIFY(el != NULL, "Only VectorTensorFiniteElement is supported!"); -- -- const IntegrationRule *ir = IntRule ? IntRule : &MassIntegrator::GetRule -- (*el, *el, *mesh->GetElementTransformation(0)); -- -+ ElementTransformation &T = *mesh->GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(*el, T); - const int dims = el->GetDim(); - MFEM_VERIFY(dims == 2 || dims == 3, ""); -- - const int nq = ir->GetNPoints(); - dim = mesh->Dimension(); - MFEM_VERIFY(dim == 2 || dim == 3, ""); -- - ne = fes.GetNE(); - geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS); - mapsC = &el->GetDofToQuad(*ir, DofToQuad::TENSOR); - mapsO = &el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); - dofs1D = mapsC->ndof; - quad1D = mapsC->nqpt; -- - MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, ""); -- - pa_data.SetSize(nq * ne, Device::GetMemoryType()); - - QuadratureSpace qs(*mesh, *ir); -@@ -68,31 +70,72 @@ void DivDivIntegrator::AssemblePA(const FiniteElementSpace &fes) - } - } - --void DivDivIntegrator::AssembleDiagonalPA(Vector& diag) -+void DivDivIntegrator::AssemblePABoundary(const FiniteElementSpace &fes) - { -- if (dim == 3) -+ Mesh *mesh = fes.GetMesh(); -+ if (mesh->GetNBE() == 0) { return; } -+ if (DeviceCanUseCeed()) - { -- internal::PADivDivAssembleDiagonal3D(dofs1D, quad1D, ne, -- mapsO->B, mapsC->G, pa_data, diag); -+ delete ceedOp; -+ ceedOp = new ceed::PADivDivIntegrator(*this, fes, Q, true); -+ return; -+ } -+ -+ // Assumes tensor-product elements -+ // const FiniteElement &el = *fes.GetBE(0); -+ // ElementTransformation &T = *mesh->GetBdrElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(*el, T); -+ MFEM_ABORT("Error: DivDivIntegrator::AssemblePABoundary only implemented with" -+ " libCEED"); -+} -+ -+void DivDivIntegrator::AssembleDiagonalPA(Vector &diag) -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->GetDiagonal(diag); - } - else - { -- internal::PADivDivAssembleDiagonal2D(dofs1D, quad1D, ne, -- mapsO->B, mapsC->G, pa_data, diag); -+ if (dim == 3) -+ { -+ internal::PADivDivAssembleDiagonal3D(dofs1D, quad1D, ne, -+ mapsO->B, mapsC->G, pa_data, diag); -+ } -+ else if (dim == 2) -+ { -+ internal::PADivDivAssembleDiagonal2D(dofs1D, quad1D, ne, -+ mapsO->B, mapsC->G, pa_data, diag); -+ } -+ else -+ { -+ MFEM_ABORT("Unsupported dimension!"); -+ } - } - } - - void DivDivIntegrator::AddMultPA(const Vector &x, Vector &y) const - { -- if (dim == 3) -- internal::PADivDivApply3D(dofs1D, quad1D, ne, mapsO->B, mapsC->G, -- mapsO->Bt, mapsC->Gt, pa_data, x, y); -- else if (dim == 2) -- internal::PADivDivApply2D(dofs1D, quad1D, ne, mapsO->B, mapsC->G, -- mapsO->Bt, mapsC->Gt, pa_data, x, y); -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->AddMult(x, y); -+ } - else - { -- MFEM_ABORT("Unsupported dimension!"); -+ if (dim == 3) -+ { -+ internal::PADivDivApply3D(dofs1D, quad1D, ne, mapsO->B, mapsC->G, -+ mapsO->Bt, mapsC->Gt, pa_data, x, y); -+ } -+ else if (dim == 2) -+ { -+ internal::PADivDivApply2D(dofs1D, quad1D, ne, mapsO->B, mapsC->G, -+ mapsO->Bt, mapsC->Gt, pa_data, x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Unsupported dimension!"); -+ } - } - } - -diff --git a/fem/integ/bilininteg_gradient_pa.cpp b/fem/integ/bilininteg_gradient_pa.cpp -index 20ef4684d..cb37e981a 100644 ---- a/fem/integ/bilininteg_gradient_pa.cpp -+++ b/fem/integ/bilininteg_gradient_pa.cpp -@@ -167,9 +167,8 @@ void GradientIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, - Mesh *mesh = trial_fes.GetMesh(); - const FiniteElement &trial_fe = *trial_fes.GetFE(0); - const FiniteElement &test_fe = *test_fes.GetFE(0); -- ElementTransformation *trans = mesh->GetElementTransformation(0); -- const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe, -- *trans); -+ ElementTransformation &T = *mesh->GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe, T); - const int dims = trial_fe.GetDim(); - const int dimsToStore = dims * dims; - nq = ir->GetNPoints(); -diff --git a/fem/integ/bilininteg_interp_curl_pa.cpp b/fem/integ/bilininteg_interp_curl_pa.cpp -new file mode 100644 -index 000000000..cc9502ce0 ---- /dev/null -+++ b/fem/integ/bilininteg_interp_curl_pa.cpp -@@ -0,0 +1,65 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../qfunction.hpp" -+#include "../ceed/integrators/interp/interp.hpp" -+ -+namespace mfem -+{ -+ -+void CurlInterpolator::AssemblePA(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) -+{ -+ Mesh *mesh = trial_fes.GetMesh(); -+ if (mesh->GetNE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ ceedOp = new ceed::PADiscreteInterpolator(*this, trial_fes, test_fes); -+ return; -+ } -+ -+ // Assumes tensor-product elements, with a vector test space and H^1 trial space. -+ // const FiniteElement *trial_fel = trial_fes.GetFE(0); -+ // const FiniteElement *test_fel = test_fes.GetFE(0); -+ MFEM_ABORT("Error: CurlInterpolator::AssemblePA only implemented with libCEED"); -+} -+ -+void CurlInterpolator::AddMultPA(const Vector &x, Vector &y) const -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->AddMult(x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Error: CurlInterpolator::AddMultPA only implemented with" -+ " libCEED"); -+ } -+} -+ -+void CurlInterpolator::AddMultTransposePA(const Vector &x, Vector &y) const -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->AddMultTranspose(x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Error: CurlInterpolator::AddMultTransposePA only implemented" -+ "with libCEED"); -+ } -+} -+ -+} // namespace mfem -diff --git a/fem/integ/bilininteg_interp_pa.cpp b/fem/integ/bilininteg_interp_grad_pa.cpp -similarity index 55% -rename from fem/integ/bilininteg_interp_pa.cpp -rename to fem/integ/bilininteg_interp_grad_pa.cpp -index 3cac18c65..4ec50e083 100644 ---- a/fem/integ/bilininteg_interp_pa.cpp -+++ b/fem/integ/bilininteg_interp_grad_pa.cpp -@@ -13,10 +13,68 @@ - #include "../bilininteg.hpp" - #include "../gridfunc.hpp" - #include "../qfunction.hpp" -+#include "../ceed/integrators/interp/interp.hpp" - - namespace mfem - { - -+void GradientInterpolator::AssemblePA(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) -+{ -+ Mesh *mesh = trial_fes.GetMesh(); -+ if (mesh->GetNE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ ceedOp = new ceed::PADiscreteInterpolator(*this, trial_fes, test_fes); -+ return; -+ } -+ -+ // Assumes tensor-product elements, with a vector test space and H^1 trial space. -+ const FiniteElement *trial_fel = trial_fes.GetFE(0); -+ const FiniteElement *test_fel = test_fes.GetFE(0); -+ -+ const NodalTensorFiniteElement *trial_el = -+ dynamic_cast(trial_fel); -+ MFEM_VERIFY(trial_el != NULL, "Only NodalTensorFiniteElement is supported!"); -+ -+ const VectorTensorFiniteElement *test_el = -+ dynamic_cast(test_fel); -+ MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!"); -+ -+ const int dims = trial_el->GetDim(); -+ MFEM_VERIFY(dims == 2 || dims == 3, "Bad dimension!"); -+ dim = mesh->Dimension(); -+ MFEM_VERIFY(dim == 2 || dim == 3, "Bad dimension!"); -+ MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), -+ "Orders do not match!"); -+ ne = trial_fes.GetNE(); -+ -+ const int order = trial_el->GetOrder(); -+ dofquad_fe = new H1_SegmentElement(order, trial_el->GetBasisType()); -+ mfem::QuadratureFunctions1D qf1d; -+ mfem::IntegrationRule closed_ir; -+ closed_ir.SetSize(order + 1); -+ qf1d.GaussLobatto(order + 1, &closed_ir); -+ mfem::IntegrationRule open_ir; -+ open_ir.SetSize(order); -+ qf1d.GaussLegendre(order, &open_ir); -+ -+ maps_O_C = &dofquad_fe->GetDofToQuad(open_ir, DofToQuad::TENSOR); -+ o_dofs1D = maps_O_C->nqpt; -+ if (trial_el->GetBasisType() == BasisType::GaussLobatto) -+ { -+ B_id = true; -+ c_dofs1D = maps_O_C->ndof; -+ } -+ else -+ { -+ B_id = false; -+ maps_C_C = &dofquad_fe->GetDofToQuad(closed_ir, DofToQuad::TENSOR); -+ c_dofs1D = maps_C_C->nqpt; -+ } -+} -+ - // Apply to x corresponding to DOFs in H^1 (domain) the (topological) gradient - // to get a dof in H(curl) (range). You can think of the range as the "test" space - // and the domain as the "trial" space, but there's no integration. -@@ -1017,920 +1075,85 @@ static void PAHcurlApplyGradientTranspose3DBId( - }); - } - --void GradientInterpolator::AssemblePA(const FiniteElementSpace &trial_fes, -- const FiniteElementSpace &test_fes) -+void GradientInterpolator::AddMultPA(const Vector &x, Vector &y) const - { -- // Assumes tensor-product elements, with a vector test space and H^1 trial space. -- Mesh *mesh = trial_fes.GetMesh(); -- const FiniteElement *trial_fel = trial_fes.GetFE(0); -- const FiniteElement *test_fel = test_fes.GetFE(0); -- -- const NodalTensorFiniteElement *trial_el = -- dynamic_cast(trial_fel); -- MFEM_VERIFY(trial_el != NULL, "Only NodalTensorFiniteElement is supported!"); -- -- const VectorTensorFiniteElement *test_el = -- dynamic_cast(test_fel); -- MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!"); -- -- const int dims = trial_el->GetDim(); -- MFEM_VERIFY(dims == 2 || dims == 3, "Bad dimension!"); -- dim = mesh->Dimension(); -- MFEM_VERIFY(dim == 2 || dim == 3, "Bad dimension!"); -- MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), -- "Orders do not match!"); -- ne = trial_fes.GetNE(); -- -- const int order = trial_el->GetOrder(); -- dofquad_fe = new H1_SegmentElement(order, trial_el->GetBasisType()); -- mfem::QuadratureFunctions1D qf1d; -- mfem::IntegrationRule closed_ir; -- closed_ir.SetSize(order + 1); -- qf1d.GaussLobatto(order + 1, &closed_ir); -- mfem::IntegrationRule open_ir; -- open_ir.SetSize(order); -- qf1d.GaussLegendre(order, &open_ir); -- -- maps_O_C = &dofquad_fe->GetDofToQuad(open_ir, DofToQuad::TENSOR); -- o_dofs1D = maps_O_C->nqpt; -- if (trial_el->GetBasisType() == BasisType::GaussLobatto) -+ if (DeviceCanUseCeed()) - { -- B_id = true; -- c_dofs1D = maps_O_C->ndof; -+ ceedOp->AddMult(x, y); - } - else - { -- B_id = false; -- maps_C_C = &dofquad_fe->GetDofToQuad(closed_ir, DofToQuad::TENSOR); -- c_dofs1D = maps_C_C->nqpt; -- } --} -- --void GradientInterpolator::AddMultPA(const Vector &x, Vector &y) const --{ -- if (dim == 3) -- { -- if (B_id) -+ if (dim == 3) - { -- PAHcurlApplyGradient3DBId(c_dofs1D, o_dofs1D, ne, -+ if (B_id) -+ { -+ PAHcurlApplyGradient3DBId(c_dofs1D, o_dofs1D, ne, -+ maps_O_C->G, x, y); -+ } -+ else -+ { -+ PAHcurlApplyGradient3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, - maps_O_C->G, x, y); -+ } - } -- else -- { -- PAHcurlApplyGradient3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, -- maps_O_C->G, x, y); -- } -- } -- else if (dim == 2) -- { -- if (B_id) -+ else if (dim == 2) - { -- PAHcurlApplyGradient2DBId(c_dofs1D, o_dofs1D, ne, -- maps_O_C->G, x, y); -+ if (B_id) -+ { -+ PAHcurlApplyGradient2DBId(c_dofs1D, o_dofs1D, ne, -+ maps_O_C->G, x, y); -+ } -+ else -+ { -+ PAHcurlApplyGradient2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->G, -+ x, y); -+ } - } - else - { -- PAHcurlApplyGradient2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->G, -- x, y); -+ MFEM_ABORT("Bad dimension!"); - } - } -- else -- { -- mfem_error("Bad dimension!"); -- } - } - - void GradientInterpolator::AddMultTransposePA(const Vector &x, Vector &y) const - { -- if (dim == 3) -- { -- if (B_id) -- { -- PAHcurlApplyGradientTranspose3DBId(c_dofs1D, o_dofs1D, ne, -- maps_O_C->G, x, y); -- } -- else -- { -- PAHcurlApplyGradientTranspose3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, -- maps_O_C->G, x, y); -- } -- } -- else if (dim == 2) -+ if (DeviceCanUseCeed()) - { -- if (B_id) -- { -- PAHcurlApplyGradientTranspose2DBId(c_dofs1D, o_dofs1D, ne, -- maps_O_C->G, x, y); -- } -- else -- { -- PAHcurlApplyGradientTranspose2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, -- maps_O_C->G, x, y); -- } -+ ceedOp->AddMultTranspose(x, y); - } - else - { -- mfem_error("Bad dimension!"); -- } --} -- --static void PAHcurlVecH1IdentityApply2D(const int c_dofs1D, -- const int o_dofs1D, -- const int NE, -- const Array &Bclosed, -- const Array &Bopen, -- const Vector &pa_data, -- const Vector &x_, -- Vector &y_) --{ -- auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D); -- auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D); -- -- auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, 2, NE); -- auto y = Reshape(y_.ReadWrite(), (2 * c_dofs1D * o_dofs1D), NE); -- -- auto vk = Reshape(pa_data.Read(), 2, (2 * c_dofs1D * o_dofs1D), NE); -- -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- -- MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double w[2][MAX_D1D][MAX_D1D]; -- -- // dofs that point parallel to x-axis (open in x, closed in y) -- -- // contract in y -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int j=0; j<2; ++j) -- { -- w[j][dx][ey] = 0.0; -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- w[j][dx][ey] += Bc(ey, dy) * x(dx, dy, j, e); -- } -- } -- } -- } -- -- // contract in x -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int ex = 0; ex < o_dofs1D; ++ex) -- { -- for (int j=0; j<2; ++j) -- { -- double s = 0.0; -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- s += Bo(ex, dx) * w[j][dx][ey]; -- } -- const int local_index = ey*o_dofs1D + ex; -- y(local_index, e) += s * vk(j, local_index, e); -- } -- } -- } -- -- // dofs that point parallel to y-axis (open in y, closed in x) -- -- // contract in y -- for (int ey = 0; ey < o_dofs1D; ++ey) -+ if (dim == 3) - { -- for (int dx = 0; dx < c_dofs1D; ++dx) -+ if (B_id) - { -- for (int j=0; j<2; ++j) -- { -- w[j][dx][ey] = 0.0; -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- w[j][dx][ey] += Bo(ey, dy) * x(dx, dy, j, e); -- } -- } -+ PAHcurlApplyGradientTranspose3DBId(c_dofs1D, o_dofs1D, ne, -+ maps_O_C->G, x, y); - } -- } -- -- // contract in x -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- for (int ex = 0; ex < c_dofs1D; ++ex) -+ else - { -- for (int j=0; j<2; ++j) -- { -- double s = 0.0; -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- s += Bc(ex, dx) * w[j][dx][ey]; -- } -- const int local_index = c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -- y(local_index, e) += s * vk(j, local_index, e); -- } -+ PAHcurlApplyGradientTranspose3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, -+ maps_O_C->G, x, y); - } - } -- }); --} -- --static void PAHcurlVecH1IdentityApplyTranspose2D(const int c_dofs1D, -- const int o_dofs1D, -- const int NE, -- const Array &Bclosed, -- const Array &Bopen, -- const Vector &pa_data, -- const Vector &x_, -- Vector &y_) --{ -- auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D); -- auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D); -- -- auto x = Reshape(x_.Read(), (2 * c_dofs1D * o_dofs1D), NE); -- auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, 2, NE); -- -- auto vk = Reshape(pa_data.Read(), 2, (2 * c_dofs1D * o_dofs1D), NE); -- -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- //constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -- -- MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double w[2][MAX_D1D][MAX_D1D]; -- -- // dofs that point parallel to x-axis (open in x, closed in y) -- -- // contract in x -- for (int ey = 0; ey < c_dofs1D; ++ey) -+ else if (dim == 2) - { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int j=0; j<2; ++j) { w[j][dx][ey] = 0.0; } -- } -- for (int ex = 0; ex < o_dofs1D; ++ex) -+ if (B_id) - { -- const int local_index = ey*o_dofs1D + ex; -- const double xd = x(local_index, e); -- -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int j=0; j<2; ++j) -- { -- w[j][dx][ey] += Bo(ex, dx) * xd * vk(j, local_index, e); -- } -- } -+ PAHcurlApplyGradientTranspose2DBId(c_dofs1D, o_dofs1D, ne, -+ maps_O_C->G, x, y); - } -- } -- -- // contract in y -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -+ else - { -- for (int j=0; j<2; ++j) -- { -- double s = 0.0; -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- s += w[j][dx][ey] * Bc(ey, dy); -- } -- y(dx, dy, j, e) += s; -- } -+ PAHcurlApplyGradientTranspose2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, -+ maps_O_C->G, x, y); - } - } -- -- // dofs that point parallel to y-axis (open in y, closed in x) -- -- // contract in x -- for (int ey = 0; ey < o_dofs1D; ++ey) -+ else - { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int j=0; j<2; ++j) { w[j][dx][ey] = 0.0; } -- } -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- const int local_index = c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -- const double xd = x(local_index, e); -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int j=0; j<2; ++j) -- { -- w[j][dx][ey] += Bc(ex, dx) * xd * vk(j, local_index, e); -- } -- } -- } -+ MFEM_ABORT("Bad dimension!"); - } -- -- // contract in y -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int j=0; j<2; ++j) -- { -- double s = 0.0; -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- s += w[j][dx][ey] * Bo(ey, dy); -- } -- y(dx, dy, j, e) += s; -- } -- } -- } -- }); --} -- --static void PAHcurlVecH1IdentityApply3D(const int c_dofs1D, -- const int o_dofs1D, -- const int NE, -- const Array &Bclosed, -- const Array &Bopen, -- const Vector &pa_data, -- const Vector &x_, -- Vector &y_) --{ -- auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D); -- auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D); -- -- auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, 3, NE); -- auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE); -- -- auto vk = Reshape(pa_data.Read(), 3, (3 * c_dofs1D * c_dofs1D * o_dofs1D), -- NE); -- -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double w1[3][MAX_D1D][MAX_D1D][MAX_D1D]; -- double w2[3][MAX_D1D][MAX_D1D][MAX_D1D]; -- -- // dofs that point parallel to x-axis (open in x, closed in y, z) -- -- // contract in z -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int j=0; j<3; ++j) -- { -- w1[j][dx][dy][ez] = 0.0; -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- w1[j][dx][dy][ez] += Bc(ez, dz) * x(dx, dy, dz, j, e); -- } -- } -- } -- } -- } -- -- // contract in y -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int j=0; j<3; ++j) -- { -- w2[j][dx][ey][ez] = 0.0; -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- w2[j][dx][ey][ez] += Bc(ey, dy) * w1[j][dx][dy][ez]; -- } -- } -- } -- } -- } -- -- // contract in x -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int ex = 0; ex < o_dofs1D; ++ex) -- { -- for (int j=0; j<3; ++j) -- { -- double s = 0.0; -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- s += Bo(ex, dx) * w2[j][dx][ey][ez]; -- } -- const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex; -- y(local_index, e) += s * vk(j, local_index, e); -- } -- } -- } -- } -- -- // dofs that point parallel to y-axis (open in y, closed in x, z) -- -- // contract in z -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int j=0; j<3; ++j) -- { -- w1[j][dx][dy][ez] = 0.0; -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- w1[j][dx][dy][ez] += Bc(ez, dz) * x(dx, dy, dz, j, e); -- } -- } -- } -- } -- } -- -- // contract in y -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int j=0; j<3; ++j) -- { -- w2[j][dx][ey][ez] = 0.0; -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- w2[j][dx][ey][ez] += Bo(ey, dy) * w1[j][dx][dy][ez]; -- } -- } -- } -- } -- } -- -- // contract in x -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- for (int j=0; j<3; ++j) -- { -- double s = 0.0; -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- s += Bc(ex, dx) * w2[j][dx][ey][ez]; -- } -- const int local_index = c_dofs1D*c_dofs1D*o_dofs1D + -- ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -- y(local_index, e) += s * vk(j, local_index, e); -- } -- } -- } -- } -- -- // dofs that point parallel to z-axis (open in z, closed in x, y) -- -- // contract in z -- for (int ez = 0; ez < o_dofs1D; ++ez) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int j=0; j<3; ++j) -- { -- w1[j][dx][dy][ez] = 0.0; -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- w1[j][dx][dy][ez] += Bo(ez, dz) * x(dx, dy, dz, j, e); -- } -- } -- } -- } -- } -- -- // contract in y -- for (int ez = 0; ez < o_dofs1D; ++ez) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int j=0; j<3; ++j) -- { -- w2[j][dx][ey][ez] = 0.0; -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- w2[j][dx][ey][ez] += Bc(ey, dy) * w1[j][dx][dy][ez]; -- } -- } -- } -- } -- } -- -- // contract in x -- for (int ez = 0; ez < o_dofs1D; ++ez) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- for (int j=0; j<3; ++j) -- { -- double s = 0.0; -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- s += Bc(ex, dx) * w2[j][dx][ey][ez]; -- } -- const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D + -- ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex; -- y(local_index, e) += s * vk(j, local_index, e); -- } -- } -- } -- } -- }); --} -- --static void PAHcurlVecH1IdentityApplyTranspose3D(const int c_dofs1D, -- const int o_dofs1D, -- const int NE, -- const Array &Bclosed, -- const Array &Bopen, -- const Vector &pa_data, -- const Vector &x_, -- Vector &y_) --{ -- auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D); -- auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D); -- -- auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE); -- auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, 3, NE); -- -- auto vk = Reshape(pa_data.Read(), 3, (3 * c_dofs1D * c_dofs1D * o_dofs1D), -- NE); -- -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- -- MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double w1[3][MAX_D1D][MAX_D1D][MAX_D1D]; -- double w2[3][MAX_D1D][MAX_D1D][MAX_D1D]; -- -- // dofs that point parallel to x-axis (open in x, closed in y, z) -- -- // contract in x -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int j=0; j<3; ++j) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- w2[j][dx][ey][ez] = 0.0; -- } -- for (int ex = 0; ex < o_dofs1D; ++ex) -- { -- const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex; -- const double xv = x(local_index, e) * vk(j, local_index, e); -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- w2[j][dx][ey][ez] += xv * Bo(ex, dx); -- } -- } -- } -- } -- } -- -- // contract in y -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int j=0; j<3; ++j) -- { -- w1[j][dx][dy][ez] = 0.0; -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- w1[j][dx][dy][ez] += w2[j][dx][ey][ez] * Bc(ey, dy); -- } -- } -- } -- } -- } -- -- // contract in z -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int j=0; j<3; ++j) -- { -- double s = 0.0; -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- s += w1[j][dx][dy][ez] * Bc(ez, dz); -- } -- y(dx, dy, dz, j, e) += s; -- } -- } -- } -- } -- -- // dofs that point parallel to y-axis (open in y, closed in x, z) -- -- // contract in x -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- for (int j=0; j<3; ++j) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- w2[j][dx][ey][ez] = 0.0; -- } -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- const int local_index = c_dofs1D*c_dofs1D*o_dofs1D + -- ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -- const double xv = x(local_index, e) * vk(j, local_index, e); -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- w2[j][dx][ey][ez] += xv * Bc(ex, dx); -- } -- } -- } -- } -- } -- -- // contract in y -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int j=0; j<3; ++j) -- { -- w1[j][dx][dy][ez] = 0.0; -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- w1[j][dx][dy][ez] += w2[j][dx][ey][ez] * Bo(ey, dy); -- } -- } -- } -- } -- } -- -- // contract in z -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int j=0; j<3; ++j) -- { -- double s = 0.0; -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- s += w1[j][dx][dy][ez] * Bc(ez, dz); -- } -- y(dx, dy, dz, j, e) += s; -- } -- } -- } -- } -- -- // dofs that point parallel to z-axis (open in z, closed in x, y) -- -- // contract in x -- for (int ez = 0; ez < o_dofs1D; ++ez) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int j=0; j<3; ++j) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- w2[j][dx][ey][ez] = 0.0; -- } -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D + -- ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex; -- const double xv = x(local_index, e) * vk(j, local_index, e); -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- w2[j][dx][ey][ez] += xv * Bc(ex, dx); -- } -- } -- } -- } -- } -- -- // contract in y -- for (int ez = 0; ez < o_dofs1D; ++ez) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int j=0; j<3; ++j) -- { -- w1[j][dx][dy][ez] = 0.0; -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- w1[j][dx][dy][ez] += w2[j][dx][ey][ez] * Bc(ey, dy); -- } -- } -- } -- } -- } -- -- // contract in z -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int j=0; j<3; ++j) -- { -- double s = 0.0; -- for (int ez = 0; ez < o_dofs1D; ++ez) -- { -- s += w1[j][dx][dy][ez] * Bo(ez, dz); -- } -- y(dx, dy, dz, j, e) += s; -- } -- } -- } -- } -- }); --} -- --void IdentityInterpolator::AssemblePA(const FiniteElementSpace &trial_fes, -- const FiniteElementSpace &test_fes) --{ -- // Assumes tensor-product elements, with a vector test space and H^1 trial space. -- Mesh *mesh = trial_fes.GetMesh(); -- const FiniteElement *trial_fel = trial_fes.GetFE(0); -- const FiniteElement *test_fel = test_fes.GetFE(0); -- -- const NodalTensorFiniteElement *trial_el = -- dynamic_cast(trial_fel); -- MFEM_VERIFY(trial_el != NULL, "Only NodalTensorFiniteElement is supported!"); -- -- const VectorTensorFiniteElement *test_el = -- dynamic_cast(test_fel); -- MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!"); -- -- const int dims = trial_el->GetDim(); -- MFEM_VERIFY(dims == 2 || dims == 3, ""); -- -- dim = mesh->Dimension(); -- MFEM_VERIFY(dim == 2 || dim == 3, ""); -- -- MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), ""); -- -- ne = trial_fes.GetNE(); -- -- const int order = trial_el->GetOrder(); -- dofquad_fe = new H1_SegmentElement(order); -- mfem::QuadratureFunctions1D qf1d; -- mfem::IntegrationRule closed_ir; -- closed_ir.SetSize(order + 1); -- qf1d.GaussLobatto(order + 1, &closed_ir); -- mfem::IntegrationRule open_ir; -- open_ir.SetSize(order); -- qf1d.GaussLegendre(order, &open_ir); -- -- maps_C_C = &dofquad_fe->GetDofToQuad(closed_ir, DofToQuad::TENSOR); -- maps_O_C = &dofquad_fe->GetDofToQuad(open_ir, DofToQuad::TENSOR); -- -- o_dofs1D = maps_O_C->nqpt; -- c_dofs1D = maps_C_C->nqpt; -- MFEM_VERIFY(maps_O_C->ndof == c_dofs1D && -- maps_C_C->ndof == c_dofs1D, "Discrepancy in the number of DOFs"); -- -- const int ndof_test = (dim == 3) ? 3 * c_dofs1D * c_dofs1D * o_dofs1D -- : 2 * c_dofs1D * o_dofs1D; -- -- const IntegrationRule & Nodes = test_el->GetNodes(); -- -- pa_data.SetSize(dim * ndof_test * ne, Device::GetMemoryType()); -- auto op = Reshape(pa_data.HostWrite(), dim, ndof_test, ne); -- -- const Array &dofmap = test_el->GetDofMap(); -- -- if (dim == 3) -- { -- // Note that ND_HexahedronElement uses 6 vectors in tk rather than 3, with -- // the last 3 having negative signs. Here the signs are all positive, as -- // signs are applied in ElementRestriction. -- -- const double tk[9] = { 1.,0.,0., 0.,1.,0., 0.,0.,1. }; -- -- for (int c=0; c<3; ++c) -- { -- for (int i=0; i= 0) ? dofmap[d] : -1 - dofmap[d]; -- -- for (int e=0; eGetElementTransformation(e); -- tr->SetIntPoint(&Nodes.IntPoint(id)); -- tr->Jacobian().Mult(tk + dof2tk*dim, v); -- -- for (int j=0; j<3; ++j) -- { -- op(j,d,e) = v[j]; -- } -- } -- } -- } -- } -- else // 2D case -- { -- const double tk[4] = { 1.,0., 0.,1. }; -- for (int c=0; c<2; ++c) -- { -- for (int i=0; i= 0) ? dofmap[d] : -1 - dofmap[d]; -- -- for (int e=0; eGetElementTransformation(e); -- tr->SetIntPoint(&Nodes.IntPoint(id)); -- tr->Jacobian().Mult(tk + dof2tk*dim, v); -- -- for (int j=0; j<2; ++j) -- { -- op(j,d,e) = v[j]; -- } -- } -- } -- } -- } --} -- --void IdentityInterpolator::AddMultPA(const Vector &x, Vector &y) const --{ -- if (dim == 3) -- { -- PAHcurlVecH1IdentityApply3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->B, -- pa_data, x, y); -- } -- else if (dim == 2) -- { -- PAHcurlVecH1IdentityApply2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->B, -- pa_data, x, y); -- } -- else -- { -- mfem_error("Bad dimension!"); -- } --} -- --void IdentityInterpolator::AddMultTransposePA(const Vector &x, Vector &y) const --{ -- if (dim == 3) -- { -- PAHcurlVecH1IdentityApplyTranspose3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, -- maps_O_C->B, pa_data, x, y); -- } -- else if (dim == 2) -- { -- PAHcurlVecH1IdentityApplyTranspose2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, -- maps_O_C->B, pa_data, x, y); -- } -- else -- { -- mfem_error("Bad dimension!"); - } - } - -diff --git a/fem/integ/bilininteg_interp_id_pa.cpp b/fem/integ/bilininteg_interp_id_pa.cpp -new file mode 100644 -index 000000000..efabe7c80 ---- /dev/null -+++ b/fem/integ/bilininteg_interp_id_pa.cpp -@@ -0,0 +1,843 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../qfunction.hpp" -+#include "../ceed/integrators/interp/interp.hpp" -+ -+namespace mfem -+{ -+ -+void IdentityInterpolator::AssemblePA(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) -+{ -+ Mesh *mesh = trial_fes.GetMesh(); -+ if (mesh->GetNE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ ceedOp = new ceed::PADiscreteInterpolator(*this, trial_fes, test_fes); -+ return; -+ } -+ -+ // Assumes tensor-product elements, with a vector test space and H^1 trial space. -+ const FiniteElement *trial_fel = trial_fes.GetFE(0); -+ const FiniteElement *test_fel = test_fes.GetFE(0); -+ -+ const NodalTensorFiniteElement *trial_el = -+ dynamic_cast(trial_fel); -+ MFEM_VERIFY(trial_el != NULL, "Only NodalTensorFiniteElement is supported!"); -+ -+ const VectorTensorFiniteElement *test_el = -+ dynamic_cast(test_fel); -+ MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!"); -+ -+ const int dims = trial_el->GetDim(); -+ MFEM_VERIFY(dims == 2 || dims == 3, ""); -+ -+ dim = mesh->Dimension(); -+ MFEM_VERIFY(dim == 2 || dim == 3, ""); -+ -+ MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), ""); -+ -+ ne = trial_fes.GetNE(); -+ -+ const int order = trial_el->GetOrder(); -+ dofquad_fe = new H1_SegmentElement(order); -+ mfem::QuadratureFunctions1D qf1d; -+ mfem::IntegrationRule closed_ir; -+ closed_ir.SetSize(order + 1); -+ qf1d.GaussLobatto(order + 1, &closed_ir); -+ mfem::IntegrationRule open_ir; -+ open_ir.SetSize(order); -+ qf1d.GaussLegendre(order, &open_ir); -+ -+ maps_C_C = &dofquad_fe->GetDofToQuad(closed_ir, DofToQuad::TENSOR); -+ maps_O_C = &dofquad_fe->GetDofToQuad(open_ir, DofToQuad::TENSOR); -+ -+ o_dofs1D = maps_O_C->nqpt; -+ c_dofs1D = maps_C_C->nqpt; -+ MFEM_VERIFY(maps_O_C->ndof == c_dofs1D && -+ maps_C_C->ndof == c_dofs1D, "Discrepancy in the number of DOFs"); -+ -+ const int ndof_test = (dim == 3) ? 3 * c_dofs1D * c_dofs1D * o_dofs1D -+ : 2 * c_dofs1D * o_dofs1D; -+ -+ const IntegrationRule & Nodes = test_el->GetNodes(); -+ -+ pa_data.SetSize(dim * ndof_test * ne, Device::GetMemoryType()); -+ auto op = Reshape(pa_data.HostWrite(), dim, ndof_test, ne); -+ -+ const Array &dofmap = test_el->GetDofMap(); -+ -+ if (dim == 3) -+ { -+ // Note that ND_HexahedronElement uses 6 vectors in tk rather than 3, with -+ // the last 3 having negative signs. Here the signs are all positive, as -+ // signs are applied in ElementRestriction. -+ -+ const double tk[9] = { 1.,0.,0., 0.,1.,0., 0.,0.,1. }; -+ -+ for (int c=0; c<3; ++c) -+ { -+ for (int i=0; i= 0) ? dofmap[d] : -1 - dofmap[d]; -+ -+ for (int e=0; eGetElementTransformation(e); -+ tr->SetIntPoint(&Nodes.IntPoint(id)); -+ tr->Jacobian().Mult(tk + dof2tk*dim, v); -+ -+ for (int j=0; j<3; ++j) -+ { -+ op(j,d,e) = v[j]; -+ } -+ } -+ } -+ } -+ } -+ else // 2D case -+ { -+ const double tk[4] = { 1.,0., 0.,1. }; -+ for (int c=0; c<2; ++c) -+ { -+ for (int i=0; i= 0) ? dofmap[d] : -1 - dofmap[d]; -+ -+ for (int e=0; eGetElementTransformation(e); -+ tr->SetIntPoint(&Nodes.IntPoint(id)); -+ tr->Jacobian().Mult(tk + dof2tk*dim, v); -+ -+ for (int j=0; j<2; ++j) -+ { -+ op(j,d,e) = v[j]; -+ } -+ } -+ } -+ } -+ } -+} -+ -+static void PAHcurlVecH1IdentityApply2D(const int c_dofs1D, -+ const int o_dofs1D, -+ const int NE, -+ const Array &Bclosed, -+ const Array &Bopen, -+ const Vector &pa_data, -+ const Vector &x_, -+ Vector &y_) -+{ -+ auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D); -+ auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D); -+ -+ auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, 2, NE); -+ auto y = Reshape(y_.ReadWrite(), (2 * c_dofs1D * o_dofs1D), NE); -+ -+ auto vk = Reshape(pa_data.Read(), 2, (2 * c_dofs1D * o_dofs1D), NE); -+ -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ -+ MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double w[2][MAX_D1D][MAX_D1D]; -+ -+ // dofs that point parallel to x-axis (open in x, closed in y) -+ -+ // contract in y -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int j=0; j<2; ++j) -+ { -+ w[j][dx][ey] = 0.0; -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ w[j][dx][ey] += Bc(ey, dy) * x(dx, dy, j, e); -+ } -+ } -+ } -+ } -+ -+ // contract in x -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int ex = 0; ex < o_dofs1D; ++ex) -+ { -+ for (int j=0; j<2; ++j) -+ { -+ double s = 0.0; -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ s += Bo(ex, dx) * w[j][dx][ey]; -+ } -+ const int local_index = ey*o_dofs1D + ex; -+ y(local_index, e) += s * vk(j, local_index, e); -+ } -+ } -+ } -+ -+ // dofs that point parallel to y-axis (open in y, closed in x) -+ -+ // contract in y -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int j=0; j<2; ++j) -+ { -+ w[j][dx][ey] = 0.0; -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ w[j][dx][ey] += Bo(ey, dy) * x(dx, dy, j, e); -+ } -+ } -+ } -+ } -+ -+ // contract in x -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ for (int j=0; j<2; ++j) -+ { -+ double s = 0.0; -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ s += Bc(ex, dx) * w[j][dx][ey]; -+ } -+ const int local_index = c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -+ y(local_index, e) += s * vk(j, local_index, e); -+ } -+ } -+ } -+ }); -+} -+ -+static void PAHcurlVecH1IdentityApplyTranspose2D(const int c_dofs1D, -+ const int o_dofs1D, -+ const int NE, -+ const Array &Bclosed, -+ const Array &Bopen, -+ const Vector &pa_data, -+ const Vector &x_, -+ Vector &y_) -+{ -+ auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D); -+ auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D); -+ -+ auto x = Reshape(x_.Read(), (2 * c_dofs1D * o_dofs1D), NE); -+ auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, 2, NE); -+ -+ auto vk = Reshape(pa_data.Read(), 2, (2 * c_dofs1D * o_dofs1D), NE); -+ -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ //constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -+ -+ MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double w[2][MAX_D1D][MAX_D1D]; -+ -+ // dofs that point parallel to x-axis (open in x, closed in y) -+ -+ // contract in x -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int j=0; j<2; ++j) { w[j][dx][ey] = 0.0; } -+ } -+ for (int ex = 0; ex < o_dofs1D; ++ex) -+ { -+ const int local_index = ey*o_dofs1D + ex; -+ const double xd = x(local_index, e); -+ -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int j=0; j<2; ++j) -+ { -+ w[j][dx][ey] += Bo(ex, dx) * xd * vk(j, local_index, e); -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int j=0; j<2; ++j) -+ { -+ double s = 0.0; -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ s += w[j][dx][ey] * Bc(ey, dy); -+ } -+ y(dx, dy, j, e) += s; -+ } -+ } -+ } -+ -+ // dofs that point parallel to y-axis (open in y, closed in x) -+ -+ // contract in x -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int j=0; j<2; ++j) { w[j][dx][ey] = 0.0; } -+ } -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ const int local_index = c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -+ const double xd = x(local_index, e); -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int j=0; j<2; ++j) -+ { -+ w[j][dx][ey] += Bc(ex, dx) * xd * vk(j, local_index, e); -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int j=0; j<2; ++j) -+ { -+ double s = 0.0; -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ s += w[j][dx][ey] * Bo(ey, dy); -+ } -+ y(dx, dy, j, e) += s; -+ } -+ } -+ } -+ }); -+} -+ -+static void PAHcurlVecH1IdentityApply3D(const int c_dofs1D, -+ const int o_dofs1D, -+ const int NE, -+ const Array &Bclosed, -+ const Array &Bopen, -+ const Vector &pa_data, -+ const Vector &x_, -+ Vector &y_) -+{ -+ auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D); -+ auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D); -+ -+ auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, 3, NE); -+ auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE); -+ -+ auto vk = Reshape(pa_data.Read(), 3, (3 * c_dofs1D * c_dofs1D * o_dofs1D), -+ NE); -+ -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double w1[3][MAX_D1D][MAX_D1D][MAX_D1D]; -+ double w2[3][MAX_D1D][MAX_D1D][MAX_D1D]; -+ -+ // dofs that point parallel to x-axis (open in x, closed in y, z) -+ -+ // contract in z -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ w1[j][dx][dy][ez] = 0.0; -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ w1[j][dx][dy][ez] += Bc(ez, dz) * x(dx, dy, dz, j, e); -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ w2[j][dx][ey][ez] = 0.0; -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ w2[j][dx][ey][ez] += Bc(ey, dy) * w1[j][dx][dy][ez]; -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in x -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int ex = 0; ex < o_dofs1D; ++ex) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ double s = 0.0; -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ s += Bo(ex, dx) * w2[j][dx][ey][ez]; -+ } -+ const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex; -+ y(local_index, e) += s * vk(j, local_index, e); -+ } -+ } -+ } -+ } -+ -+ // dofs that point parallel to y-axis (open in y, closed in x, z) -+ -+ // contract in z -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ w1[j][dx][dy][ez] = 0.0; -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ w1[j][dx][dy][ez] += Bc(ez, dz) * x(dx, dy, dz, j, e); -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ w2[j][dx][ey][ez] = 0.0; -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ w2[j][dx][ey][ez] += Bo(ey, dy) * w1[j][dx][dy][ez]; -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in x -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ double s = 0.0; -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ s += Bc(ex, dx) * w2[j][dx][ey][ez]; -+ } -+ const int local_index = c_dofs1D*c_dofs1D*o_dofs1D + -+ ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -+ y(local_index, e) += s * vk(j, local_index, e); -+ } -+ } -+ } -+ } -+ -+ // dofs that point parallel to z-axis (open in z, closed in x, y) -+ -+ // contract in z -+ for (int ez = 0; ez < o_dofs1D; ++ez) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ w1[j][dx][dy][ez] = 0.0; -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ w1[j][dx][dy][ez] += Bo(ez, dz) * x(dx, dy, dz, j, e); -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int ez = 0; ez < o_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ w2[j][dx][ey][ez] = 0.0; -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ w2[j][dx][ey][ez] += Bc(ey, dy) * w1[j][dx][dy][ez]; -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in x -+ for (int ez = 0; ez < o_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ double s = 0.0; -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ s += Bc(ex, dx) * w2[j][dx][ey][ez]; -+ } -+ const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D + -+ ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex; -+ y(local_index, e) += s * vk(j, local_index, e); -+ } -+ } -+ } -+ } -+ }); -+} -+ -+static void PAHcurlVecH1IdentityApplyTranspose3D(const int c_dofs1D, -+ const int o_dofs1D, -+ const int NE, -+ const Array &Bclosed, -+ const Array &Bopen, -+ const Vector &pa_data, -+ const Vector &x_, -+ Vector &y_) -+{ -+ auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D); -+ auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D); -+ -+ auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE); -+ auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, 3, NE); -+ -+ auto vk = Reshape(pa_data.Read(), 3, (3 * c_dofs1D * c_dofs1D * o_dofs1D), -+ NE); -+ -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ -+ MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double w1[3][MAX_D1D][MAX_D1D][MAX_D1D]; -+ double w2[3][MAX_D1D][MAX_D1D][MAX_D1D]; -+ -+ // dofs that point parallel to x-axis (open in x, closed in y, z) -+ -+ // contract in x -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ w2[j][dx][ey][ez] = 0.0; -+ } -+ for (int ex = 0; ex < o_dofs1D; ++ex) -+ { -+ const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex; -+ const double xv = x(local_index, e) * vk(j, local_index, e); -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ w2[j][dx][ey][ez] += xv * Bo(ex, dx); -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ w1[j][dx][dy][ez] = 0.0; -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ w1[j][dx][dy][ez] += w2[j][dx][ey][ez] * Bc(ey, dy); -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in z -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ double s = 0.0; -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ s += w1[j][dx][dy][ez] * Bc(ez, dz); -+ } -+ y(dx, dy, dz, j, e) += s; -+ } -+ } -+ } -+ } -+ -+ // dofs that point parallel to y-axis (open in y, closed in x, z) -+ -+ // contract in x -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ w2[j][dx][ey][ez] = 0.0; -+ } -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ const int local_index = c_dofs1D*c_dofs1D*o_dofs1D + -+ ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -+ const double xv = x(local_index, e) * vk(j, local_index, e); -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ w2[j][dx][ey][ez] += xv * Bc(ex, dx); -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ w1[j][dx][dy][ez] = 0.0; -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ w1[j][dx][dy][ez] += w2[j][dx][ey][ez] * Bo(ey, dy); -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in z -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ double s = 0.0; -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ s += w1[j][dx][dy][ez] * Bc(ez, dz); -+ } -+ y(dx, dy, dz, j, e) += s; -+ } -+ } -+ } -+ } -+ -+ // dofs that point parallel to z-axis (open in z, closed in x, y) -+ -+ // contract in x -+ for (int ez = 0; ez < o_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ w2[j][dx][ey][ez] = 0.0; -+ } -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D + -+ ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex; -+ const double xv = x(local_index, e) * vk(j, local_index, e); -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ w2[j][dx][ey][ez] += xv * Bc(ex, dx); -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int ez = 0; ez < o_dofs1D; ++ez) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ w1[j][dx][dy][ez] = 0.0; -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ w1[j][dx][dy][ez] += w2[j][dx][ey][ez] * Bc(ey, dy); -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in z -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ double s = 0.0; -+ for (int ez = 0; ez < o_dofs1D; ++ez) -+ { -+ s += w1[j][dx][dy][ez] * Bo(ez, dz); -+ } -+ y(dx, dy, dz, j, e) += s; -+ } -+ } -+ } -+ } -+ }); -+} -+ -+void IdentityInterpolator::AddMultPA(const Vector &x, Vector &y) const -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->AddMult(x, y); -+ } -+ else -+ { -+ if (dim == 3) -+ { -+ PAHcurlVecH1IdentityApply3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->B, -+ pa_data, x, y); -+ } -+ else if (dim == 2) -+ { -+ PAHcurlVecH1IdentityApply2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->B, -+ pa_data, x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Bad dimension!"); -+ } -+ } -+} -+ -+void IdentityInterpolator::AddMultTransposePA(const Vector &x, Vector &y) const -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->AddMultTranspose(x, y); -+ } -+ else -+ { -+ if (dim == 3) -+ { -+ PAHcurlVecH1IdentityApplyTranspose3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, -+ maps_O_C->B, pa_data, x, y); -+ } -+ else if (dim == 2) -+ { -+ PAHcurlVecH1IdentityApplyTranspose2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, -+ maps_O_C->B, pa_data, x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Bad dimension!"); -+ } -+ } -+} -+ -+} // namespace mfem -diff --git a/fem/integ/bilininteg_mass_mf.cpp b/fem/integ/bilininteg_mass_mf.cpp -index 34a118b6d..41ab07b94 100644 ---- a/fem/integ/bilininteg_mass_mf.cpp -+++ b/fem/integ/bilininteg_mass_mf.cpp -@@ -19,42 +19,40 @@ namespace mfem - - void MassIntegrator::AssembleMF(const FiniteElementSpace &fes) - { -- // Assuming the same element type - Mesh *mesh = fes.GetMesh(); - if (mesh->GetNE() == 0) { return; } -- const FiniteElement &el = *fes.GetFE(0); -- ElementTransformation *T = mesh->GetElementTransformation(0); -- const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el, *T); - if (DeviceCanUseCeed()) - { - delete ceedOp; -- const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 || -- fes.IsVariableOrder(); -- if (mixed) -- { -- ceedOp = new ceed::MixedMFMassIntegrator(*this, fes, Q); -- } -- else -- { -- ceedOp = new ceed::MFMassIntegrator(fes, *ir, Q); -- } -+ ceedOp = new ceed::MFMassIntegrator(*this, fes, Q); - return; - } -+ -+ // Assuming the same element type -+ // const FiniteElement &el = *fes.GetFE(0); -+ // ElementTransformation &T = *mesh->GetElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); - MFEM_ABORT("Error: MassIntegrator::AssembleMF only implemented with" - " libCEED"); - } - --void MassIntegrator::AddMultMF(const Vector &x, Vector &y) const -+void MassIntegrator::AssembleMFBoundary(const FiniteElementSpace &fes) - { -+ Mesh *mesh = fes.GetMesh(); -+ if (mesh->GetNBE() == 0) { return; } - if (DeviceCanUseCeed()) - { -- ceedOp->AddMult(x, y); -- } -- else -- { -- MFEM_ABORT("Error: MassIntegrator::AddMultMF only implemented with" -- " libCEED"); -+ delete ceedOp; -+ ceedOp = new ceed::MFMassIntegrator(*this, fes, Q, true); -+ return; - } -+ -+ // Assuming the same element type -+ // const FiniteElement &el = *fes.GetBE(0); -+ // ElementTransformation &T = *mesh->GetBdrElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); -+ MFEM_ABORT("Error: MassIntegrator::AssembleMFBoundary only implemented with" -+ " libCEED"); - } - - void MassIntegrator::AssembleDiagonalMF(Vector &diag) -@@ -70,4 +68,17 @@ void MassIntegrator::AssembleDiagonalMF(Vector &diag) - } - } - -+void MassIntegrator::AddMultMF(const Vector &x, Vector &y) const -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->AddMult(x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Error: MassIntegrator::AddMultMF only implemented with" -+ " libCEED"); -+ } -+} -+ - } // namespace mfem -diff --git a/fem/integ/bilininteg_mass_pa.cpp b/fem/integ/bilininteg_mass_pa.cpp -index ffdec1edb..0e4cd8f41 100644 ---- a/fem/integ/bilininteg_mass_pa.cpp -+++ b/fem/integ/bilininteg_mass_pa.cpp -@@ -23,28 +23,19 @@ void MassIntegrator::AssemblePA(const FiniteElementSpace &fes) - { - const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ? - Device::GetDeviceMemoryType() : pa_mt; -- -- // Assuming the same element type - Mesh *mesh = fes.GetMesh(); - if (mesh->GetNE() == 0) { return; } -- const FiniteElement &el = *fes.GetFE(0); -- ElementTransformation *T0 = mesh->GetElementTransformation(0); -- const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el, *T0); - if (DeviceCanUseCeed()) - { - delete ceedOp; -- const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 || -- fes.IsVariableOrder(); -- if (mixed) -- { -- ceedOp = new ceed::MixedPAMassIntegrator(*this, fes, Q); -- } -- else -- { -- ceedOp = new ceed::PAMassIntegrator(fes, *ir, Q); -- } -+ ceedOp = new ceed::PAMassIntegrator(*this, fes, Q); - return; - } -+ -+ // Assuming the same element type -+ const FiniteElement &el = *fes.GetFE(0); -+ ElementTransformation &T =* mesh->GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); - int map_type = el.GetMapType(); - dim = mesh->Dimension(); - ne = fes.GetMesh()->GetNE(); -@@ -116,14 +107,19 @@ void MassIntegrator::AssemblePABoundary(const FiniteElementSpace &fes) - { - const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ? - Device::GetDeviceMemoryType() : pa_mt; -- -- // Assuming the same element type - Mesh *mesh = fes.GetMesh(); - if (mesh->GetNBE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ ceedOp = new ceed::PAMassIntegrator(*this, fes, Q, true); -+ return; -+ } -+ -+ // Assuming the same element type - const FiniteElement &el = *fes.GetBE(0); - ElementTransformation *T0 = mesh->GetBdrElementTransformation(0); -- const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el, *T0); -- -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, *T0); - int map_type = el.GetMapType(); - dim = el.GetDim(); // Dimension of the boundary element, *not* the mesh - ne = fes.GetMesh()->GetNBE(); -diff --git a/fem/integ/bilininteg_mixedcurl_mf.cpp b/fem/integ/bilininteg_mixedcurl_mf.cpp -new file mode 100644 -index 000000000..b22cc297e ---- /dev/null -+++ b/fem/integ/bilininteg_mixedcurl_mf.cpp -@@ -0,0 +1,108 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../ceed/integrators/mixedveccurl/mixedveccurl.hpp" -+ -+namespace mfem -+{ -+ -+void MixedVectorCurlIntegrator::AssembleMF(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) -+{ -+ Mesh *mesh = trial_fes.GetMesh(); -+ if (mesh->GetNE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ if (MQ) -+ { -+ ceedOp = new ceed::MFMixedVectorCurlIntegrator(*this, trial_fes, -+ test_fes, MQ); -+ } -+ else if (DQ) -+ { -+ ceedOp = new ceed::MFMixedVectorCurlIntegrator(*this, trial_fes, -+ test_fes, DQ); -+ } -+ else -+ { -+ ceedOp = new ceed::MFMixedVectorCurlIntegrator(*this, trial_fes, -+ test_fes, Q); -+ } -+ return; -+ } -+ -+ // Assuming the same element type -+ MFEM_ABORT("Error: MixedVectorCurlIntegrator::AssembleMF only implemented with" -+ " libCEED"); -+} -+ -+void MixedVectorCurlIntegrator::AddMultMF(const Vector &x, Vector &y) const -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->AddMult(x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Error: MixedVectorCurlIntegrator::AddMultMF only" -+ " implemented with libCEED"); -+ } -+} -+ -+void MixedVectorWeakCurlIntegrator::AssembleMF( -+ const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) -+{ -+ Mesh *mesh = trial_fes.GetMesh(); -+ if (mesh->GetNE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ if (MQ) -+ { -+ ceedOp = new ceed::MFMixedVectorWeakCurlIntegrator(*this, trial_fes, -+ test_fes, MQ); -+ } -+ else if (DQ) -+ { -+ ceedOp = new ceed::MFMixedVectorWeakCurlIntegrator(*this, trial_fes, -+ test_fes, DQ); -+ } -+ else -+ { -+ ceedOp = new ceed::MFMixedVectorWeakCurlIntegrator(*this, trial_fes, -+ test_fes, Q); -+ } -+ return; -+ } -+ -+ // Assuming the same element type -+ MFEM_ABORT("Error: MixedVectorWeakCurlIntegrator::AssembleMF only" -+ " implemented with libCEED"); -+} -+ -+void MixedVectorWeakCurlIntegrator::AddMultMF(const Vector &x, Vector &y) const -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->AddMult(x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Error: MixedVectorWeakCurlIntegrator::AddMultMF only" -+ " implemented with libCEED"); -+ } -+} -+ -+} // namespace mfem -diff --git a/fem/integ/bilininteg_mixedcurl_pa.cpp b/fem/integ/bilininteg_mixedcurl_pa.cpp -index 3d70bc4c9..dd7a9375f 100644 ---- a/fem/integ/bilininteg_mixedcurl_pa.cpp -+++ b/fem/integ/bilininteg_mixedcurl_pa.cpp -@@ -13,6 +13,7 @@ - #include "../bilininteg.hpp" - #include "../gridfunc.hpp" - #include "../qfunction.hpp" -+#include "../ceed/integrators/mixedveccurl/mixedveccurl.hpp" - #include "bilininteg_hcurl_kernels.hpp" - #include "bilininteg_hcurlhdiv_kernels.hpp" - -@@ -36,9 +37,8 @@ void MixedScalarCurlIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, - MFEM_ABORT("Unknown kernel."); - } - -- const IntegrationRule *ir -- = IntRule ? IntRule : &MassIntegrator::GetRule(*eltest, *eltest, -- *mesh->GetElementTransformation(0)); -+ ElementTransformation &T = *mesh->GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(*fel, *eltest, T); - - const int dims = el->GetDim(); - MFEM_VERIFY(dims == 2, ""); -@@ -111,8 +111,30 @@ void MixedScalarCurlIntegrator::AddMultTransposePA(const Vector &x, - void MixedVectorCurlIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, - const FiniteElementSpace &test_fes) - { -- // Assumes tensor-product elements, with vector test and trial spaces. - Mesh *mesh = trial_fes.GetMesh(); -+ if (mesh->GetNE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ if (MQ) -+ { -+ ceedOp = new ceed::PAMixedVectorCurlIntegrator(*this, trial_fes, -+ test_fes, MQ); -+ } -+ else if (DQ) -+ { -+ ceedOp = new ceed::PAMixedVectorCurlIntegrator(*this, trial_fes, -+ test_fes, DQ); -+ } -+ else -+ { -+ ceedOp = new ceed::PAMixedVectorCurlIntegrator(*this, trial_fes, -+ test_fes, Q); -+ } -+ return; -+ } -+ -+ // Assumes tensor-product elements, with vector test and trial spaces. - const FiniteElement *trial_fel = trial_fes.GetFE(0); - const FiniteElement *test_fel = test_fes.GetFE(0); - -@@ -124,9 +146,10 @@ void MixedVectorCurlIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, - dynamic_cast(test_fel); - MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!"); - -- const IntegrationRule *ir -- = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el, -- *mesh->GetElementTransformation(0)); -+ ElementTransformation &T = *mesh->GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(*trial_el, *test_el, -+ T); -+ - const int dims = trial_el->GetDim(); - MFEM_VERIFY(dims == 3, ""); - -@@ -194,75 +217,90 @@ void MixedVectorCurlIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, - - void MixedVectorCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const - { -- if (testType == mfem::FiniteElement::CURL && -- trialType == mfem::FiniteElement::CURL && dim == 3) -+ if (DeviceCanUseCeed()) - { -- const int ndata = coeffDim == 1 ? 1 : 9; -- -- if (Device::Allows(Backend::DEVICE_MASK)) -+ ceedOp->AddMult(x, y); -+ } -+ else -+ { -+ if (testType == mfem::FiniteElement::CURL && -+ trialType == mfem::FiniteElement::CURL && dim == 3) - { -- const int ID = (dofs1D << 4) | quad1D; -- switch (ID) -+ const int ndata = coeffDim == 1 ? 1 : 9; -+ -+ if (Device::Allows(Backend::DEVICE_MASK)) - { -- case 0x23: -- return internal::SmemPAHcurlL2Apply3D<2,3>( -- dofs1D, quad1D, ndata, ne, -- mapsO->B, mapsC->B, mapsC->G, -- pa_data, x, y); -- case 0x34: -- return internal::SmemPAHcurlL2Apply3D<3,4>( -- dofs1D, quad1D, ndata, ne, -- mapsO->B, mapsC->B, mapsC->G, -- pa_data, x, y); -- case 0x45: -- return internal::SmemPAHcurlL2Apply3D<4,5>( -- dofs1D, quad1D, ndata, ne, -- mapsO->B, mapsC->B, mapsC->G, -- pa_data, x, y); -- case 0x56: -- return internal::SmemPAHcurlL2Apply3D<5,6>( -- dofs1D, quad1D, ndata, ne, -- mapsO->B, mapsC->B, mapsC->G, -- pa_data, x, y); -- default: -- return internal::SmemPAHcurlL2Apply3D( -- dofs1D, quad1D, ndata, ne, -- mapsO->B, mapsC->B, mapsC->G, -- pa_data, x, y); -+ const int ID = (dofs1D << 4) | quad1D; -+ switch (ID) -+ { -+ case 0x23: -+ return internal::SmemPAHcurlL2Apply3D<2,3>( -+ dofs1D, quad1D, ndata, ne, -+ mapsO->B, mapsC->B, mapsC->G, -+ pa_data, x, y); -+ case 0x34: -+ return internal::SmemPAHcurlL2Apply3D<3,4>( -+ dofs1D, quad1D, ndata, ne, -+ mapsO->B, mapsC->B, mapsC->G, -+ pa_data, x, y); -+ case 0x45: -+ return internal::SmemPAHcurlL2Apply3D<4,5>( -+ dofs1D, quad1D, ndata, ne, -+ mapsO->B, mapsC->B, mapsC->G, -+ pa_data, x, y); -+ case 0x56: -+ return internal::SmemPAHcurlL2Apply3D<5,6>( -+ dofs1D, quad1D, ndata, ne, -+ mapsO->B, mapsC->B, mapsC->G, -+ pa_data, x, y); -+ default: -+ return internal::SmemPAHcurlL2Apply3D( -+ dofs1D, quad1D, ndata, ne, -+ mapsO->B, mapsC->B, mapsC->G, -+ pa_data, x, y); -+ } -+ } -+ else -+ { -+ internal::PAHcurlL2Apply3D(dofs1D, quad1D, ndata, ne, mapsO->B, mapsC->B, -+ mapsO->Bt, mapsC->Bt, mapsC->G, pa_data, x, y); - } - } -+ else if (testType == mfem::FiniteElement::DIV && -+ trialType == mfem::FiniteElement::CURL && dim == 3) -+ { -+ internal::PAHcurlHdivApply3D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B, -+ mapsC->B, mapsOtest->Bt, mapsCtest->Bt, mapsC->G, -+ pa_data, x, y); -+ } - else - { -- internal::PAHcurlL2Apply3D(dofs1D, quad1D, ndata, ne, mapsO->B, mapsC->B, -- mapsO->Bt, mapsC->Bt, mapsC->G, pa_data, x, y); -+ MFEM_ABORT("Unsupported dimension or space!"); - } - } -- else if (testType == mfem::FiniteElement::DIV && -- trialType == mfem::FiniteElement::CURL && dim == 3) -- { -- internal::PAHcurlHdivApply3D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B, -- mapsC->B, mapsOtest->Bt, mapsCtest->Bt, mapsC->G, -- pa_data, x, y); -- } -- else -- { -- MFEM_ABORT("Unsupported dimension or space!"); -- } - } - - void MixedVectorCurlIntegrator::AddMultTransposePA(const Vector &x, - Vector &y) const - { -- if (testType == mfem::FiniteElement::DIV && -- trialType == mfem::FiniteElement::CURL && dim == 3) -+ if (DeviceCanUseCeed()) - { -- internal::PAHcurlHdivApply3DTranspose(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B, -- mapsC->B, mapsOtest->Bt, mapsCtest->Bt, -- mapsC->Gt, pa_data, x, y); -+ MFEM_ABORT("AddMultTransposePA not yet implemented with libCEED for" -+ " MixedVectorCurlIntegrator."); - } - else - { -- MFEM_ABORT("Unsupported dimension or space!"); -+ if (testType == mfem::FiniteElement::DIV && -+ trialType == mfem::FiniteElement::CURL && dim == 3) -+ { -+ internal::PAHcurlHdivApply3DTranspose(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B, -+ mapsC->B, mapsOtest->Bt, mapsCtest->Bt, -+ mapsC->Gt, pa_data, x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Unsupported dimension or space!"); -+ } - } - } - -@@ -270,8 +308,30 @@ void MixedVectorWeakCurlIntegrator::AssemblePA( - const FiniteElementSpace &trial_fes, - const FiniteElementSpace &test_fes) - { -- // Assumes tensor-product elements, with vector test and trial spaces. - Mesh *mesh = trial_fes.GetMesh(); -+ if (mesh->GetNE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ if (MQ) -+ { -+ ceedOp = new ceed::PAMixedVectorWeakCurlIntegrator(*this, trial_fes, -+ test_fes, MQ); -+ } -+ else if (DQ) -+ { -+ ceedOp = new ceed::PAMixedVectorWeakCurlIntegrator(*this, trial_fes, -+ test_fes, DQ); -+ } -+ else -+ { -+ ceedOp = new ceed::PAMixedVectorWeakCurlIntegrator(*this, trial_fes, -+ test_fes, Q); -+ } -+ return; -+ } -+ -+ // Assumes tensor-product elements, with vector test and trial spaces. - const FiniteElement *trial_fel = trial_fes.GetFE(0); - const FiniteElement *test_fel = test_fes.GetFE(0); - -@@ -283,9 +343,10 @@ void MixedVectorWeakCurlIntegrator::AssemblePA( - dynamic_cast(test_fel); - MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!"); - -- const IntegrationRule *ir -- = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el, -- *mesh->GetElementTransformation(0)); -+ ElementTransformation &T = *mesh->GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(*trial_el, *test_el, -+ T); -+ - const int dims = trial_el->GetDim(); - MFEM_VERIFY(dims == 3, ""); - -@@ -349,75 +410,90 @@ void MixedVectorWeakCurlIntegrator::AssemblePA( - - void MixedVectorWeakCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const - { -- if (testType == mfem::FiniteElement::CURL && -- trialType == mfem::FiniteElement::CURL && dim == 3) -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->AddMult(x, y); -+ } -+ else - { -- const int ndata = coeffDim == 1 ? 1 : 9; -- if (Device::Allows(Backend::DEVICE_MASK)) -+ if (testType == mfem::FiniteElement::CURL && -+ trialType == mfem::FiniteElement::CURL && dim == 3) - { -- const int ID = (dofs1D << 4) | quad1D; -- switch (ID) -+ const int ndata = coeffDim == 1 ? 1 : 9; -+ if (Device::Allows(Backend::DEVICE_MASK)) - { -- case 0x23: -- return internal::SmemPAHcurlL2Apply3DTranspose<2,3>( -- dofs1D, quad1D, ndata, -- ne, mapsO->B, mapsC->B, -- mapsC->G, pa_data, x, y); -- case 0x34: -- return internal::SmemPAHcurlL2Apply3DTranspose<3,4>( -- dofs1D, quad1D, ndata, -- ne, mapsO->B, mapsC->B, -- mapsC->G, pa_data, x, y); -- case 0x45: -- return internal::SmemPAHcurlL2Apply3DTranspose<4,5>( -- dofs1D, quad1D, ndata, -- ne, mapsO->B, mapsC->B, -- mapsC->G, pa_data, x, y); -- case 0x56: -- return internal::SmemPAHcurlL2Apply3DTranspose<5,6>( -- dofs1D, quad1D, ndata, -- ne, mapsO->B, mapsC->B, -- mapsC->G, pa_data, x, y); -- default: -- return internal::SmemPAHcurlL2Apply3DTranspose( -- dofs1D, quad1D, ndata, ne, -- mapsO->B, mapsC->B, -- mapsC->G, pa_data, x, y); -+ const int ID = (dofs1D << 4) | quad1D; -+ switch (ID) -+ { -+ case 0x23: -+ return internal::SmemPAHcurlL2Apply3DTranspose<2,3>( -+ dofs1D, quad1D, ndata, -+ ne, mapsO->B, mapsC->B, -+ mapsC->G, pa_data, x, y); -+ case 0x34: -+ return internal::SmemPAHcurlL2Apply3DTranspose<3,4>( -+ dofs1D, quad1D, ndata, -+ ne, mapsO->B, mapsC->B, -+ mapsC->G, pa_data, x, y); -+ case 0x45: -+ return internal::SmemPAHcurlL2Apply3DTranspose<4,5>( -+ dofs1D, quad1D, ndata, -+ ne, mapsO->B, mapsC->B, -+ mapsC->G, pa_data, x, y); -+ case 0x56: -+ return internal::SmemPAHcurlL2Apply3DTranspose<5,6>( -+ dofs1D, quad1D, ndata, -+ ne, mapsO->B, mapsC->B, -+ mapsC->G, pa_data, x, y); -+ default: -+ return internal::SmemPAHcurlL2Apply3DTranspose( -+ dofs1D, quad1D, ndata, ne, -+ mapsO->B, mapsC->B, -+ mapsC->G, pa_data, x, y); -+ } -+ } -+ else -+ { -+ internal::PAHcurlL2Apply3DTranspose(dofs1D, quad1D, ndata, ne, mapsO->B, -+ mapsC->B, mapsO->Bt, mapsC->Bt, -+ mapsC->Gt, pa_data, x, y); - } - } -+ else if (testType == mfem::FiniteElement::CURL && -+ trialType == mfem::FiniteElement::DIV && dim == 3) -+ { -+ internal::PAHcurlHdivApply3DTranspose(dofs1D, dofs1D, quad1D, ne, mapsO->B, -+ mapsC->B, mapsO->Bt, mapsC->Bt, -+ mapsC->Gt, pa_data, x, y); -+ } - else - { -- internal::PAHcurlL2Apply3DTranspose(dofs1D, quad1D, ndata, ne, mapsO->B, -- mapsC->B, mapsO->Bt, mapsC->Bt, mapsC->Gt, -- pa_data, x, y); -+ MFEM_ABORT("Unsupported dimension or space!"); - } - } -- else if (testType == mfem::FiniteElement::CURL && -- trialType == mfem::FiniteElement::DIV && dim == 3) -- { -- internal::PAHcurlHdivApply3DTranspose(dofs1D, dofs1D, quad1D, ne, mapsO->B, -- mapsC->B, mapsO->Bt, mapsC->Bt, -- mapsC->Gt, pa_data, x, y); -- } -- else -- { -- MFEM_ABORT("Unsupported dimension or space!"); -- } - } - - void MixedVectorWeakCurlIntegrator::AddMultTransposePA(const Vector &x, - Vector &y) const - { -- if (testType == mfem::FiniteElement::CURL && -- trialType == mfem::FiniteElement::DIV && dim == 3) -+ if (DeviceCanUseCeed()) - { -- internal::PAHcurlHdivApply3D(dofs1D, dofs1D, quad1D, ne, mapsO->B, -- mapsC->B, mapsO->Bt, mapsC->Bt, mapsC->G, -- pa_data, x, y); -+ MFEM_ABORT("AddMultTransposePA not yet implemented with libCEED for" -+ " MixedVectorWeakCurlIntegrator."); - } - else - { -- MFEM_ABORT("Unsupported dimension or space!"); -+ if (testType == mfem::FiniteElement::CURL && -+ trialType == mfem::FiniteElement::DIV && dim == 3) -+ { -+ internal::PAHcurlHdivApply3D(dofs1D, dofs1D, quad1D, ne, mapsO->B, -+ mapsC->B, mapsO->Bt, mapsC->Bt, mapsC->G, -+ pa_data, x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Unsupported dimension or space!"); -+ } - } - } - -diff --git a/fem/integ/bilininteg_mixedvecgrad_mf.cpp b/fem/integ/bilininteg_mixedvecgrad_mf.cpp -new file mode 100644 -index 000000000..10b3b9686 ---- /dev/null -+++ b/fem/integ/bilininteg_mixedvecgrad_mf.cpp -@@ -0,0 +1,174 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../ceed/integrators/mixedvecgrad/mixedvecgrad.hpp" -+ -+namespace mfem -+{ -+ -+void MixedVectorGradientIntegrator::AssembleMF( -+ const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) -+{ -+ Mesh *mesh = trial_fes.GetMesh(); -+ if (mesh->GetNE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ if (MQ) -+ { -+ ceedOp = new ceed::MFMixedVectorGradientIntegrator(*this, trial_fes, -+ test_fes, MQ); -+ } -+ else if (DQ) -+ { -+ ceedOp = new ceed::MFMixedVectorGradientIntegrator(*this, trial_fes, -+ test_fes, DQ); -+ } -+ else -+ { -+ ceedOp = new ceed::MFMixedVectorGradientIntegrator(*this, trial_fes, -+ test_fes, Q); -+ } -+ return; -+ } -+ -+ // Assuming the same element type -+ MFEM_ABORT("Error: MixedVectorGradientIntegrator::AssembleMF only" -+ " implemented with libCEED"); -+} -+ -+void MixedVectorGradientIntegrator::AssembleMFBoundary( -+ const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) -+{ -+ Mesh *mesh = trial_fes.GetMesh(); -+ if (mesh->GetNBE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ if (MQ) -+ { -+ ceedOp = new ceed::MFMixedVectorGradientIntegrator(*this, trial_fes, -+ test_fes, MQ, true); -+ } -+ else if (DQ) -+ { -+ ceedOp = new ceed::MFMixedVectorGradientIntegrator(*this, trial_fes, -+ test_fes, DQ, true); -+ } -+ else -+ { -+ ceedOp = new ceed::MFMixedVectorGradientIntegrator(*this, trial_fes, -+ test_fes, Q, true); -+ } -+ return; -+ } -+ -+ // Assuming the same element type -+ MFEM_ABORT("Error: MixedVectorGradientIntegrator::AssembleMFBoundary only" -+ " implemented with libCEED"); -+} -+ -+void MixedVectorGradientIntegrator::AddMultMF(const Vector &x, Vector &y) const -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->AddMult(x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Error: MixedVectorGradientIntegrator::AddMultMF only" -+ " implemented with libCEED"); -+ } -+} -+ -+void MixedVectorWeakDivergenceIntegrator::AssembleMF( -+ const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) -+{ -+ Mesh *mesh = trial_fes.GetMesh(); -+ if (mesh->GetNE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ if (MQ) -+ { -+ ceedOp = new ceed::MFMixedVectorWeakDivergenceIntegrator(*this, trial_fes, -+ test_fes, MQ); -+ } -+ else if (DQ) -+ { -+ ceedOp = new ceed::MFMixedVectorWeakDivergenceIntegrator(*this, trial_fes, -+ test_fes, DQ); -+ } -+ else -+ { -+ ceedOp = new ceed::MFMixedVectorWeakDivergenceIntegrator(*this, trial_fes, -+ test_fes, Q); -+ } -+ return; -+ } -+ -+ // Assuming the same element type -+ MFEM_ABORT("Error: MixedVectorWeakDivergenceIntegrator::AssembleMF only" -+ " implemented with libCEED"); -+} -+ -+void MixedVectorWeakDivergenceIntegrator::AssembleMFBoundary( -+ const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) -+{ -+ Mesh *mesh = trial_fes.GetMesh(); -+ if (mesh->GetNBE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ if (MQ) -+ { -+ ceedOp = new ceed::MFMixedVectorWeakDivergenceIntegrator(*this, trial_fes, -+ test_fes, MQ, true); -+ } -+ else if (DQ) -+ { -+ ceedOp = new ceed::MFMixedVectorWeakDivergenceIntegrator(*this, trial_fes, -+ test_fes, DQ, true); -+ } -+ else -+ { -+ ceedOp = new ceed::MFMixedVectorWeakDivergenceIntegrator(*this, trial_fes, -+ test_fes, Q, true); -+ } -+ return; -+ } -+ -+ // Assuming the same element type -+ MFEM_ABORT("Error: MixedVectorWeakDivergenceIntegrator::AssembleMFBoundary only" -+ " implemented with libCEED"); -+} -+ -+void MixedVectorWeakDivergenceIntegrator::AddMultMF(const Vector &x, -+ Vector &y) const -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->AddMult(x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Error: MixedVectorWeakDivergenceIntegrator::AddMultMF only" -+ " implemented with libCEED"); -+ } -+} -+ -+} // namespace mfem -diff --git a/fem/integ/bilininteg_mixedvecgrad_pa.cpp b/fem/integ/bilininteg_mixedvecgrad_pa.cpp -index f9e6d3ee8..5acf3367b 100644 ---- a/fem/integ/bilininteg_mixedvecgrad_pa.cpp -+++ b/fem/integ/bilininteg_mixedvecgrad_pa.cpp -@@ -13,11 +13,128 @@ - #include "../bilininteg.hpp" - #include "../gridfunc.hpp" - #include "../qfunction.hpp" -+#include "../ceed/integrators/mixedvecgrad/mixedvecgrad.hpp" - #include "bilininteg_diffusion_kernels.hpp" - - namespace mfem - { - -+void MixedVectorGradientIntegrator::AssemblePA( -+ const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) -+{ -+ Mesh *mesh = trial_fes.GetMesh(); -+ if (mesh->GetNE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ if (MQ) -+ { -+ ceedOp = new ceed::PAMixedVectorGradientIntegrator(*this, trial_fes, -+ test_fes, MQ); -+ } -+ else if (DQ) -+ { -+ ceedOp = new ceed::PAMixedVectorGradientIntegrator(*this, trial_fes, -+ test_fes, DQ); -+ } -+ else -+ { -+ ceedOp = new ceed::PAMixedVectorGradientIntegrator(*this, trial_fes, -+ test_fes, Q); -+ } -+ return; -+ } -+ -+ // Assumes tensor-product elements, with a vector test space and H^1 trial space. -+ const FiniteElement *trial_fel = trial_fes.GetFE(0); -+ const FiniteElement *test_fel = test_fes.GetFE(0); -+ -+ const NodalTensorFiniteElement *trial_el = -+ dynamic_cast(trial_fel); -+ MFEM_VERIFY(trial_el != NULL, "Only NodalTensorFiniteElement is supported!"); -+ -+ const VectorTensorFiniteElement *test_el = -+ dynamic_cast(test_fel); -+ MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!"); -+ -+ ElementTransformation &T = *mesh->GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(*trial_el, *test_el, -+ T); -+ -+ const int dims = trial_el->GetDim(); -+ MFEM_VERIFY(dims == 2 || dims == 3, ""); -+ -+ const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6 -+ const int nq = ir->GetNPoints(); -+ dim = mesh->Dimension(); -+ MFEM_VERIFY(dim == 2 || dim == 3, ""); -+ -+ MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), ""); -+ -+ ne = trial_fes.GetNE(); -+ geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS); -+ mapsC = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR); -+ mapsO = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); -+ dofs1D = mapsC->ndof; -+ quad1D = mapsC->nqpt; -+ -+ MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, ""); -+ -+ pa_data.SetSize(symmDims * nq * ne, Device::GetMemoryType()); -+ -+ QuadratureSpace qs(*mesh, *ir); -+ CoefficientVector coeff(Q, qs, CoefficientStorage::FULL); -+ -+ // Use the same setup functions as VectorFEMassIntegrator. -+ if (test_el->GetDerivType() == mfem::FiniteElement::CURL && dim == 3) -+ { -+ internal::PADiffusionSetup3D(quad1D, 1, ne, ir->GetWeights(), geom->J, -+ coeff, pa_data); -+ } -+ else if (test_el->GetDerivType() == mfem::FiniteElement::CURL && dim == 2) -+ { -+ internal::PADiffusionSetup2D<2>(quad1D, 1, ne, ir->GetWeights(), geom->J, -+ coeff, pa_data); -+ } -+ else -+ { -+ MFEM_ABORT("Unknown kernel."); -+ } -+} -+ -+void MixedVectorGradientIntegrator::AssemblePABoundary( -+ const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) -+{ -+ Mesh *mesh = trial_fes.GetMesh(); -+ if (mesh->GetNBE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ if (MQ) -+ { -+ ceedOp = new ceed::PAMixedVectorGradientIntegrator(*this, trial_fes, -+ test_fes, MQ, true); -+ } -+ else if (DQ) -+ { -+ ceedOp = new ceed::PAMixedVectorGradientIntegrator(*this, trial_fes, -+ test_fes, DQ, true); -+ } -+ else -+ { -+ ceedOp = new ceed::PAMixedVectorGradientIntegrator(*this, trial_fes, -+ test_fes, Q, true); -+ } -+ return; -+ } -+ -+ // Assuming the same element type -+ MFEM_ABORT("Error: MixedVectorGradientIntegrator::AssemblePABoundary only" -+ " implemented with libCEED"); -+} -+ - // Apply to x corresponding to DOFs in H^1 (trial), whose gradients are - // integrated against H(curl) test functions corresponding to y. - static void PAHcurlH1Apply2D(const int D1D, -@@ -656,101 +773,133 @@ static void PAHcurlH1ApplyTranspose3D(const int D1D, - }); // end of element loop - } - --void MixedVectorGradientIntegrator::AssemblePA( -- const FiniteElementSpace &trial_fes, -- const FiniteElementSpace &test_fes) -+void MixedVectorGradientIntegrator::AddMultPA(const Vector &x, Vector &y) const - { -- // Assumes tensor-product elements, with a vector test space and H^1 trial space. -- Mesh *mesh = trial_fes.GetMesh(); -- const FiniteElement *trial_fel = trial_fes.GetFE(0); -- const FiniteElement *test_fel = test_fes.GetFE(0); -- -- const NodalTensorFiniteElement *trial_el = -- dynamic_cast(trial_fel); -- MFEM_VERIFY(trial_el != NULL, "Only NodalTensorFiniteElement is supported!"); -- -- const VectorTensorFiniteElement *test_el = -- dynamic_cast(test_fel); -- MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!"); -- -- const IntegrationRule *ir -- = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el, -- *mesh->GetElementTransformation(0)); -- const int dims = trial_el->GetDim(); -- MFEM_VERIFY(dims == 2 || dims == 3, ""); -- -- const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6 -- const int nq = ir->GetNPoints(); -- dim = mesh->Dimension(); -- MFEM_VERIFY(dim == 2 || dim == 3, ""); -- -- MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), ""); -- -- ne = trial_fes.GetNE(); -- geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS); -- mapsC = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR); -- mapsO = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); -- dofs1D = mapsC->ndof; -- quad1D = mapsC->nqpt; -- -- MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, ""); -- -- pa_data.SetSize(symmDims * nq * ne, Device::GetMemoryType()); -- -- QuadratureSpace qs(*mesh, *ir); -- CoefficientVector coeff(Q, qs, CoefficientStorage::FULL); -- -- // Use the same setup functions as VectorFEMassIntegrator. -- if (test_el->GetDerivType() == mfem::FiniteElement::CURL && dim == 3) -+ if (DeviceCanUseCeed()) - { -- internal::PADiffusionSetup3D(quad1D, 1, ne, ir->GetWeights(), geom->J, -- coeff, pa_data); -- } -- else if (test_el->GetDerivType() == mfem::FiniteElement::CURL && dim == 2) -- { -- internal::PADiffusionSetup2D<2>(quad1D, 1, ne, ir->GetWeights(), geom->J, -- coeff, pa_data); -+ ceedOp->AddMult(x, y); - } - else - { -- MFEM_ABORT("Unknown kernel."); -+ if (dim == 3) -+ { -+ PAHcurlH1Apply3D(dofs1D, quad1D, ne, mapsC->B, mapsC->G, -+ mapsO->Bt, mapsC->Bt, pa_data, x, y); -+ } -+ else if (dim == 2) -+ { -+ PAHcurlH1Apply2D(dofs1D, quad1D, ne, mapsC->B, mapsC->G, -+ mapsO->Bt, mapsC->Bt, pa_data, x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Unsupported dimension!"); -+ } - } - } - --void MixedVectorGradientIntegrator::AddMultPA(const Vector &x, Vector &y) const -+void MixedVectorGradientIntegrator::AddMultTransposePA(const Vector &x, -+ Vector &y) const - { -- if (dim == 3) -+ if (DeviceCanUseCeed()) - { -- PAHcurlH1Apply3D(dofs1D, quad1D, ne, mapsC->B, mapsC->G, -- mapsO->Bt, mapsC->Bt, pa_data, x, y); -+ MFEM_ABORT("AddMultTransposePA not yet implemented with libCEED for" -+ " MixedVectorGradientIntegrator."); - } -- else if (dim == 2) -+ else - { -- PAHcurlH1Apply2D(dofs1D, quad1D, ne, mapsC->B, mapsC->G, -- mapsO->Bt, mapsC->Bt, pa_data, x, y); -+ if (dim == 3) -+ { -+ PAHcurlH1ApplyTranspose3D(dofs1D, quad1D, ne, mapsC->B, mapsO->B, -+ mapsC->Bt, mapsC->Gt, pa_data, x, y); -+ } -+ else if (dim == 2) -+ { -+ PAHcurlH1ApplyTranspose2D(dofs1D, quad1D, ne, mapsC->B, mapsO->B, -+ mapsC->Bt, mapsC->Gt, pa_data, x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Unsupported dimension!"); -+ } - } -- else -+} -+ -+void MixedVectorWeakDivergenceIntegrator::AssemblePA( -+ const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) -+{ -+ Mesh *mesh = trial_fes.GetMesh(); -+ if (mesh->GetNE() == 0) { return; } -+ if (DeviceCanUseCeed()) - { -- MFEM_ABORT("Unsupported dimension!"); -+ delete ceedOp; -+ if (MQ) -+ { -+ ceedOp = new ceed::PAMixedVectorWeakDivergenceIntegrator(*this, trial_fes, -+ test_fes, MQ); -+ } -+ else if (DQ) -+ { -+ ceedOp = new ceed::PAMixedVectorWeakDivergenceIntegrator(*this, trial_fes, -+ test_fes, DQ); -+ } -+ else -+ { -+ ceedOp = new ceed::PAMixedVectorWeakDivergenceIntegrator(*this, trial_fes, -+ test_fes, Q); -+ } -+ return; - } -+ -+ // Assuming the same element type -+ MFEM_ABORT("Error: MixedVectorWeakDivergenceIntegrator::AssemblePA only" -+ " implemented with libCEED"); - } - --void MixedVectorGradientIntegrator::AddMultTransposePA(const Vector &x, -- Vector &y) const -+void MixedVectorWeakDivergenceIntegrator::AssemblePABoundary( -+ const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) - { -- if (dim == 3) -+ Mesh *mesh = trial_fes.GetMesh(); -+ if (mesh->GetNBE() == 0) { return; } -+ if (DeviceCanUseCeed()) - { -- PAHcurlH1ApplyTranspose3D(dofs1D, quad1D, ne, mapsC->B, mapsO->B, -- mapsC->Bt, mapsC->Gt, pa_data, x, y); -+ delete ceedOp; -+ if (MQ) -+ { -+ ceedOp = new ceed::PAMixedVectorWeakDivergenceIntegrator(*this, trial_fes, -+ test_fes, MQ, true); -+ } -+ else if (DQ) -+ { -+ ceedOp = new ceed::PAMixedVectorWeakDivergenceIntegrator(*this, trial_fes, -+ test_fes, DQ, true); -+ } -+ else -+ { -+ ceedOp = new ceed::PAMixedVectorWeakDivergenceIntegrator(*this, trial_fes, -+ test_fes, Q, true); -+ } -+ return; - } -- else if (dim == 2) -+ -+ // Assuming the same element type -+ MFEM_ABORT("Error: MixedVectorWeakDivergenceIntegrator::AssemblePABoundary only" -+ " implemented with libCEED"); -+} -+ -+void MixedVectorWeakDivergenceIntegrator::AddMultPA(const Vector &x, -+ Vector &y) const -+{ -+ if (DeviceCanUseCeed()) - { -- PAHcurlH1ApplyTranspose2D(dofs1D, quad1D, ne, mapsC->B, mapsO->B, -- mapsC->Bt, mapsC->Gt, pa_data, x, y); -+ ceedOp->AddMult(x, y); - } - else - { -- MFEM_ABORT("Unsupported dimension!"); -+ MFEM_ABORT("Error: MixedVectorWeakDivergenceIntegrator::AddMultMF only" -+ " implemented with libCEED"); - } - } - -diff --git a/fem/integ/bilininteg_vecdiffusion_mf.cpp b/fem/integ/bilininteg_vecdiffusion_mf.cpp -index 7cad61496..56139d2ef 100644 ---- a/fem/integ/bilininteg_vecdiffusion_mf.cpp -+++ b/fem/integ/bilininteg_vecdiffusion_mf.cpp -@@ -19,45 +19,45 @@ namespace mfem - - void VectorDiffusionIntegrator::AssembleMF(const FiniteElementSpace &fes) - { -- // Assumes tensor-product elements - Mesh *mesh = fes.GetMesh(); - if (mesh->GetNE() == 0) { return; } -- const FiniteElement &el = *fes.GetFE(0); -- const IntegrationRule *ir -- = IntRule ? IntRule : &DiffusionIntegrator::GetRule(el, el); - if (DeviceCanUseCeed()) - { -- delete ceedOp; - MFEM_VERIFY(!VQ && !MQ, -- "Only scalar coefficient supported for DiffusionIntegrator" -- " with libCEED"); -- const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 || -- fes.IsVariableOrder(); -- if (mixed) -- { -- ceedOp = new ceed::MixedMFDiffusionIntegrator(*this, fes, Q); -- } -- else -- { -- ceedOp = new ceed::MFDiffusionIntegrator(fes, *ir, Q); -- } -+ "Only scalar coefficient is supported for matrix-free assembly for VectorDiffusionIntegrator"); -+ delete ceedOp; -+ ceedOp = new ceed::MFDiffusionIntegrator(*this, fes, Q); - return; - } -+ -+ // Assumes tensor-product elements -+ // const FiniteElement &el = *fes.GetFE(0); -+ // ElementTransformation &T = *mesh->GetElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); - MFEM_ABORT("Error: VectorDiffusionIntegrator::AssembleMF only implemented" - " with libCEED"); - } - --void VectorDiffusionIntegrator::AddMultMF(const Vector &x, Vector &y) const -+void VectorDiffusionIntegrator::AssembleMFBoundary( -+ const FiniteElementSpace &fes) - { -+ Mesh *mesh = fes.GetMesh(); -+ if (mesh->GetNBE() == 0) { return; } - if (DeviceCanUseCeed()) - { -- ceedOp->AddMult(x, y); -- } -- else -- { -- MFEM_ABORT("Error: VectorDiffusionIntegrator::AddMultMF only implemented" -- " with libCEED"); -+ MFEM_VERIFY(!VQ && !MQ, -+ "Only scalar coefficient is supported for matrix-free assembly for VectorDiffusionIntegrator"); -+ delete ceedOp; -+ ceedOp = new ceed::MFDiffusionIntegrator(*this, fes, Q, true); -+ return; - } -+ -+ // Assumes tensor-product elements -+ // const FiniteElement &el = *fes.GetBE(0); -+ // ElementTransformation &T = *mesh->GetBdrElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); -+ MFEM_ABORT("Error: VectorDiffusionIntegrator::AssembleMFBoundary only implemented" -+ " with libCEED"); - } - - void VectorDiffusionIntegrator::AssembleDiagonalMF(Vector &diag) -@@ -73,4 +73,17 @@ void VectorDiffusionIntegrator::AssembleDiagonalMF(Vector &diag) - } - } - -+void VectorDiffusionIntegrator::AddMultMF(const Vector &x, Vector &y) const -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->AddMult(x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Error: VectorDiffusionIntegrator::AddMultMF only implemented" -+ " with libCEED"); -+ } -+} -+ - } // namespace mfem -diff --git a/fem/integ/bilininteg_vecdiffusion_pa.cpp b/fem/integ/bilininteg_vecdiffusion_pa.cpp -index 84e4d5b2a..3fe58e1c1 100644 ---- a/fem/integ/bilininteg_vecdiffusion_pa.cpp -+++ b/fem/integ/bilininteg_vecdiffusion_pa.cpp -@@ -114,26 +114,21 @@ static void PAVectorDiffusionSetup3D(const int Q1D, - - void VectorDiffusionIntegrator::AssemblePA(const FiniteElementSpace &fes) - { -- // Assumes tensor-product elements - Mesh *mesh = fes.GetMesh(); -- const FiniteElement &el = *fes.GetFE(0); -- const IntegrationRule *ir -- = IntRule ? IntRule : &DiffusionIntegrator::GetRule(el, el); -+ if (mesh->GetNE() == 0) { return; } - if (DeviceCanUseCeed()) - { -+ MFEM_VERIFY(!VQ && !MQ, -+ "Only scalar coefficient is supported for partial assembly for VectorDiffusionIntegrator"); - delete ceedOp; -- const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 || -- fes.IsVariableOrder(); -- if (mixed) -- { -- ceedOp = new ceed::MixedPADiffusionIntegrator(*this, fes, Q); -- } -- else -- { -- ceedOp = new ceed::PADiffusionIntegrator(fes, *ir, Q); -- } -+ ceedOp = new ceed::PADiffusionIntegrator(*this, fes, Q); - return; - } -+ -+ // Assumes tensor-product elements -+ const FiniteElement &el = *fes.GetFE(0); -+ ElementTransformation &T = *mesh->GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); - const int dims = el.GetDim(); - const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6 - const int nq = ir->GetNPoints(); -@@ -209,6 +204,28 @@ void VectorDiffusionIntegrator::AssemblePA(const FiniteElementSpace &fes) - } - } - -+void VectorDiffusionIntegrator::AssemblePABoundary( -+ const FiniteElementSpace &fes) -+{ -+ Mesh *mesh = fes.GetMesh(); -+ if (mesh->GetNBE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ MFEM_VERIFY(!VQ && !MQ, -+ "Only scalar coefficient is supported for partial assembly for VectorDiffusionIntegrator"); -+ delete ceedOp; -+ ceedOp = new ceed::PADiffusionIntegrator(*this, fes, Q, true); -+ return; -+ } -+ -+ // Assumes tensor-product elements -+ // const FiniteElement &el = *fes.GetBE(0); -+ // ElementTransformation &T = *mesh->GetBdrElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); -+ MFEM_ABORT("Error: VectorDiffusionIntegrator::AssemblePABoundary only implemented" -+ " with libCEED"); -+} -+ - template - static void PAVectorDiffusionDiagonal2D(const int NE, - const Array &b, -diff --git a/fem/integ/bilininteg_vecdiv_pa.cpp b/fem/integ/bilininteg_vecdiv_pa.cpp -index 63f7a3308..cf58df9ea 100644 ---- a/fem/integ/bilininteg_vecdiv_pa.cpp -+++ b/fem/integ/bilininteg_vecdiv_pa.cpp -@@ -105,9 +105,8 @@ void VectorDivergenceIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, - Mesh *mesh = trial_fes.GetMesh(); - const FiniteElement &trial_fe = *trial_fes.GetFE(0); - const FiniteElement &test_fe = *test_fes.GetFE(0); -- ElementTransformation *trans = mesh->GetElementTransformation(0); -- const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe, -- *trans); -+ ElementTransformation &T = *mesh->GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe, T); - const int dims = trial_fe.GetDim(); - const int dimsToStore = dims * dims; - nq = ir->GetNPoints(); -diff --git a/fem/integ/bilininteg_vecmass_mf.cpp b/fem/integ/bilininteg_vecmass_mf.cpp -index cc2eb0174..59d7209db 100644 ---- a/fem/integ/bilininteg_vecmass_mf.cpp -+++ b/fem/integ/bilininteg_vecmass_mf.cpp -@@ -19,43 +19,40 @@ namespace mfem - - void VectorMassIntegrator::AssembleMF(const FiniteElementSpace &fes) - { -- // Assuming the same element type - Mesh *mesh = fes.GetMesh(); - if (mesh->GetNE() == 0) { return; } -- const FiniteElement &el = *fes.GetFE(0); -- ElementTransformation *T = mesh->GetElementTransformation(0); -- const IntegrationRule *ir -- = IntRule ? IntRule : &MassIntegrator::GetRule(el, el, *T); - if (DeviceCanUseCeed()) - { - delete ceedOp; -- const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 || -- fes.IsVariableOrder(); -- if (mixed) -- { -- ceedOp = new ceed::MixedMFMassIntegrator(*this, fes, Q); -- } -- else -- { -- ceedOp = new ceed::MFMassIntegrator(fes, *ir, Q); -- } -+ ceedOp = new ceed::MFMassIntegrator(*this, fes, Q); - return; - } -+ -+ // Assuming the same element type -+ // const FiniteElement &el = *fes.GetFE(0); -+ // ElementTransformation &T = *mesh->GetElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); - MFEM_ABORT("Error: VectorMassIntegrator::AssembleMF only implemented with" - " libCEED"); - } - --void VectorMassIntegrator::AddMultMF(const Vector &x, Vector &y) const -+void VectorMassIntegrator::AssembleMFBoundary(const FiniteElementSpace &fes) - { -+ Mesh *mesh = fes.GetMesh(); -+ if (mesh->GetNBE() == 0) { return; } - if (DeviceCanUseCeed()) - { -- ceedOp->AddMult(x, y); -- } -- else -- { -- MFEM_ABORT("Error: VectorMassIntegrator::AddMultMF only implemented with" -- " libCEED"); -+ delete ceedOp; -+ ceedOp = new ceed::MFMassIntegrator(*this, fes, Q, true); -+ return; - } -+ -+ // Assuming the same element type -+ // const FiniteElement &el = *fes.GetBE(0); -+ // ElementTransformation &T = *mesh->GetBdrElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); -+ MFEM_ABORT("Error: VectorMassIntegrator::AssembleMFBoundary only implemented with" -+ " libCEED"); - } - - void VectorMassIntegrator::AssembleDiagonalMF(Vector &diag) -@@ -71,4 +68,17 @@ void VectorMassIntegrator::AssembleDiagonalMF(Vector &diag) - } - } - -+void VectorMassIntegrator::AddMultMF(const Vector &x, Vector &y) const -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->AddMult(x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Error: VectorMassIntegrator::AddMultMF only implemented with" -+ " libCEED"); -+ } -+} -+ - } // namespace mfem -diff --git a/fem/integ/bilininteg_vecmass_pa.cpp b/fem/integ/bilininteg_vecmass_pa.cpp -index b1c20b4c4..3be9e2283 100644 ---- a/fem/integ/bilininteg_vecmass_pa.cpp -+++ b/fem/integ/bilininteg_vecmass_pa.cpp -@@ -19,33 +19,23 @@ namespace mfem - - void VectorMassIntegrator::AssemblePA(const FiniteElementSpace &fes) - { -- // Assuming the same element type - Mesh *mesh = fes.GetMesh(); - if (mesh->GetNE() == 0) { return; } -- const FiniteElement &el = *fes.GetFE(0); -- ElementTransformation *T = mesh->GetElementTransformation(0); -- const IntegrationRule *ir -- = IntRule ? IntRule : &MassIntegrator::GetRule(el, el, *T); - if (DeviceCanUseCeed()) - { - delete ceedOp; -- const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 || -- fes.IsVariableOrder(); -- if (mixed) -- { -- ceedOp = new ceed::MixedPAMassIntegrator(*this, fes, Q); -- } -- else -- { -- ceedOp = new ceed::PAMassIntegrator(fes, *ir, Q); -- } -+ ceedOp = new ceed::PAMassIntegrator(*this, fes, Q); - return; - } -+ -+ // Assuming the same element type -+ const FiniteElement &el = *fes.GetFE(0); -+ ElementTransformation &T = *mesh->GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); - dim = mesh->Dimension(); - ne = fes.GetMesh()->GetNE(); - nq = ir->GetNPoints(); -- geom = mesh->GetGeometricFactors(*ir, GeometricFactors::COORDINATES | -- GeometricFactors::JACOBIANS); -+ geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS); - maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR); - dofs1D = maps->ndof; - quad1D = maps->nqpt; -@@ -106,6 +96,25 @@ void VectorMassIntegrator::AssemblePA(const FiniteElementSpace &fes) - } - } - -+void VectorMassIntegrator::AssemblePABoundary(const FiniteElementSpace &fes) -+{ -+ Mesh *mesh = fes.GetMesh(); -+ if (mesh->GetNBE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ ceedOp = new ceed::PAMassIntegrator(*this, fes, Q, true); -+ return; -+ } -+ -+ // Assuming the same element type -+ // const FiniteElement &el = *fes.GetBE(0); -+ // ElementTransformation &T = *mesh->GetBdrElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); -+ MFEM_ABORT("Error: VectorMassIntegrator::AssemblePABoundary only implemented with" -+ " libCEED"); -+} -+ - template - static void PAVectorMassAssembleDiagonal2D(const int NE, - const Array &B_, -diff --git a/fem/integ/bilininteg_vectorfediv_pa.cpp b/fem/integ/bilininteg_vectorfediv_pa.cpp -index 2915a253b..4a2c2a055 100644 ---- a/fem/integ/bilininteg_vectorfediv_pa.cpp -+++ b/fem/integ/bilininteg_vectorfediv_pa.cpp -@@ -38,9 +38,9 @@ VectorFEDivergenceIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, - dynamic_cast(test_fel); - MFEM_VERIFY(test_el != NULL, "Only NodalTensorFiniteElement is supported!"); - -- const IntegrationRule *ir = IntRule ? IntRule : &MassIntegrator::GetRule( -- *trial_el, *trial_el, -- *mesh->GetElementTransformation(0)); -+ ElementTransformation &T = *mesh->GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(*trial_el, *test_el, -+ T); - - const int dims = trial_el->GetDim(); - MFEM_VERIFY(dims == 2 || dims == 3, ""); -diff --git a/fem/integ/bilininteg_vectorfemass_mf.cpp b/fem/integ/bilininteg_vectorfemass_mf.cpp -new file mode 100644 -index 000000000..91d2b6b5a ---- /dev/null -+++ b/fem/integ/bilininteg_vectorfemass_mf.cpp -@@ -0,0 +1,89 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../ceed/integrators/vecfemass/vecfemass.hpp" -+ -+using namespace std; -+ -+namespace mfem -+{ -+ -+void VectorFEMassIntegrator::AssembleMF(const FiniteElementSpace &fes) -+{ -+ Mesh *mesh = fes.GetMesh(); -+ if (mesh->GetNE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ if (MQ) { ceedOp = new ceed::MFVectorFEMassIntegrator(*this, fes, MQ); } -+ else if (DQ) { ceedOp = new ceed::MFVectorFEMassIntegrator(*this, fes, DQ); } -+ else { ceedOp = new ceed::MFVectorFEMassIntegrator(*this, fes, Q); } -+ return; -+ } -+ -+ // Assumes tensor-product elements -+ // const FiniteElement &el = *fes.GetFE(0); -+ // ElementTransformation &T = *mesh->GetElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); -+ MFEM_ABORT("Error: VectorFEMassIntegrator::AssembleMF only implemented with" -+ " libCEED"); -+} -+ -+void VectorFEMassIntegrator::AssembleMFBoundary(const FiniteElementSpace &fes) -+{ -+ Mesh *mesh = fes.GetMesh(); -+ if (mesh->GetNBE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ if (MQ) { ceedOp = new ceed::MFVectorFEMassIntegrator(*this, fes, MQ, true); } -+ else if (DQ) { ceedOp = new ceed::MFVectorFEMassIntegrator(*this, fes, DQ, true); } -+ else { ceedOp = new ceed::MFVectorFEMassIntegrator(*this, fes, Q, true); } -+ return; -+ } -+ -+ // Assumes tensor-product elements -+ // const FiniteElement &el = *fes.GetBE(0); -+ // ElementTransformation &T = *mesh->GetBdrElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); -+ MFEM_ABORT("Error: VectorFEMassIntegrator::AssembleMFBoundary only implemented with" -+ " libCEED"); -+} -+ -+void VectorFEMassIntegrator::AssembleDiagonalMF(Vector &diag) -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->GetDiagonal(diag); -+ } -+ else -+ { -+ MFEM_ABORT("Error: VectorFEMassIntegrator::AssembleDiagonalMF only" -+ " implemented with libCEED"); -+ } -+} -+ -+void VectorFEMassIntegrator::AddMultMF(const Vector &x, Vector &y) const -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->AddMult(x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Error: VectorFEMassIntegrator::AddMultMF only implemented with" -+ " libCEED"); -+ } -+} -+ -+} -diff --git a/fem/integ/bilininteg_vectorfemass_pa.cpp b/fem/integ/bilininteg_vectorfemass_pa.cpp -index c07e9f816..a49a9daa7 100644 ---- a/fem/integ/bilininteg_vectorfemass_pa.cpp -+++ b/fem/integ/bilininteg_vectorfemass_pa.cpp -@@ -12,6 +12,7 @@ - #include "../bilininteg.hpp" - #include "../gridfunc.hpp" - #include "../qfunction.hpp" -+#include "../ceed/integrators/vecfemass/vecfemass.hpp" - #include "bilininteg_diffusion_kernels.hpp" - #include "bilininteg_hcurl_kernels.hpp" - #include "bilininteg_hdiv_kernels.hpp" -@@ -23,30 +24,37 @@ namespace mfem - void VectorFEMassIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, - const FiniteElementSpace &test_fes) - { -- // Assumes tensor-product elements - Mesh *mesh = trial_fes.GetMesh(); -+ if (mesh->GetNE() == 0) { return; } -+ if (DeviceCanUseCeed()) -+ { -+ MFEM_VERIFY(&trial_fes == &test_fes, -+ "VectorFEMassIntegrator with mixed FE spaces is not supported by libCEED!"); -+ delete ceedOp; -+ if (MQ) { ceedOp = new ceed::PAVectorFEMassIntegrator(*this, trial_fes, MQ); } -+ else if (DQ) { ceedOp = new ceed::PAVectorFEMassIntegrator(*this, trial_fes, DQ); } -+ else { ceedOp = new ceed::PAVectorFEMassIntegrator(*this, trial_fes, Q); } -+ return; -+ } - -+ // Assumes tensor-product elements - const FiniteElement *trial_fel = trial_fes.GetFE(0); - const VectorTensorFiniteElement *trial_el = - dynamic_cast(trial_fel); - MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!"); -- - const FiniteElement *test_fel = test_fes.GetFE(0); - const VectorTensorFiniteElement *test_el = - dynamic_cast(test_fel); - MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!"); -- -- const IntegrationRule *ir -- = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el, -- *mesh->GetElementTransformation(0)); -+ ElementTransformation &T = *mesh->GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(*trial_el, *test_el, -+ T); - const int dims = trial_el->GetDim(); - MFEM_VERIFY(dims == 2 || dims == 3, ""); -- - const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6 - nq = ir->GetNPoints(); - dim = mesh->Dimension(); - MFEM_VERIFY(dim == 2 || dim == 3, ""); -- - ne = trial_fes.GetNE(); - MFEM_VERIFY(ne == test_fes.GetNE(), - "Different meshes for test and trial spaces"); -@@ -55,13 +63,10 @@ void VectorFEMassIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, - mapsO = &trial_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); - dofs1D = mapsC->ndof; - quad1D = mapsC->nqpt; -- - mapsCtest = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR); - mapsOtest = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); - dofs1Dtest = mapsCtest->ndof; -- - MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, ""); -- - trial_fetype = trial_el->GetDerivType(); - test_fetype = test_el->GetDerivType(); - -@@ -72,6 +77,7 @@ void VectorFEMassIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, - - QuadratureSpace qs(*mesh, *ir); - CoefficientVector coeff(qs, CoefficientStorage::SYMMETRIC); -+ - if (Q) { coeff.Project(*Q); } - else if (MQ) { coeff.ProjectTranspose(*MQ); } - else if (DQ) { coeff.Project(*DQ); } -@@ -138,172 +144,206 @@ void VectorFEMassIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, - } - } - --void VectorFEMassIntegrator::AssembleDiagonalPA(Vector& diag) -+void VectorFEMassIntegrator::AssemblePABoundary(const FiniteElementSpace &fes) - { -- if (dim == 3) -+ Mesh *mesh = fes.GetMesh(); -+ if (mesh->GetNBE() == 0) { return; } -+ if (DeviceCanUseCeed()) - { -- if (trial_fetype == mfem::FiniteElement::CURL && test_fetype == trial_fetype) -+ delete ceedOp; -+ if (MQ) { ceedOp = new ceed::PAVectorFEMassIntegrator(*this, fes, MQ, true); } -+ else if (DQ) { ceedOp = new ceed::PAVectorFEMassIntegrator(*this, fes, DQ, true); } -+ else { ceedOp = new ceed::PAVectorFEMassIntegrator(*this, fes, Q, true); } -+ return; -+ } -+ -+ // Assuming the same element type -+ // const FiniteElement &el = *fes.GetBE(0); -+ // ElementTransformation &T = *mesh->GetBdrElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); -+ MFEM_ABORT("Error: VectorFEMassIntegrator::AssemblePABoundary only implemented with" -+ " libCEED"); -+} -+ -+void VectorFEMassIntegrator::AssembleDiagonalPA(Vector &diag) -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->GetDiagonal(diag); -+ } -+ else -+ { -+ if (dim == 3) - { -- if (Device::Allows(Backend::DEVICE_MASK)) -+ if (trial_fetype == mfem::FiniteElement::CURL && test_fetype == trial_fetype) - { -- const int ID = (dofs1D << 4) | quad1D; -- switch (ID) -+ if (Device::Allows(Backend::DEVICE_MASK)) -+ { -+ const int ID = (dofs1D << 4) | quad1D; -+ switch (ID) -+ { -+ case 0x23: -+ return internal::SmemPAHcurlMassAssembleDiagonal3D<2,3>( -+ dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, pa_data, diag); -+ case 0x34: -+ return internal::SmemPAHcurlMassAssembleDiagonal3D<3,4>( -+ dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, pa_data, diag); -+ case 0x45: -+ return internal::SmemPAHcurlMassAssembleDiagonal3D<4,5>( -+ dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, pa_data, diag); -+ case 0x56: -+ return internal::SmemPAHcurlMassAssembleDiagonal3D<5,6>( -+ dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, pa_data, diag); -+ default: -+ return internal::SmemPAHcurlMassAssembleDiagonal3D( -+ dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, pa_data, diag); -+ } -+ } -+ else - { -- case 0x23: -- return internal::SmemPAHcurlMassAssembleDiagonal3D<2,3>( -- dofs1D, quad1D, ne, symmetric, -- mapsO->B, mapsC->B, pa_data, diag); -- case 0x34: -- return internal::SmemPAHcurlMassAssembleDiagonal3D<3,4>( -- dofs1D, quad1D, ne, symmetric, -- mapsO->B, mapsC->B, pa_data, diag); -- case 0x45: -- return internal::SmemPAHcurlMassAssembleDiagonal3D<4,5>( -- dofs1D, quad1D, ne, symmetric, -- mapsO->B, mapsC->B, pa_data, diag); -- case 0x56: -- return internal::SmemPAHcurlMassAssembleDiagonal3D<5,6>( -- dofs1D, quad1D, ne, symmetric, -- mapsO->B, mapsC->B, pa_data, diag); -- default: -- return internal::SmemPAHcurlMassAssembleDiagonal3D( -- dofs1D, quad1D, ne, symmetric, -- mapsO->B, mapsC->B, pa_data, diag); -+ internal::PAHcurlMassAssembleDiagonal3D(dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, pa_data, diag); - } - } -+ else if (trial_fetype == mfem::FiniteElement::DIV && -+ test_fetype == trial_fetype) -+ { -+ internal::PAHdivMassAssembleDiagonal3D(dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, pa_data, diag); -+ } - else - { -- internal::PAHcurlMassAssembleDiagonal3D(dofs1D, quad1D, ne, symmetric, -- mapsO->B, mapsC->B, pa_data, diag); -+ MFEM_ABORT("Unknown kernel."); - } - } -- else if (trial_fetype == mfem::FiniteElement::DIV && -- test_fetype == trial_fetype) -+ else // 2D - { -- internal::PAHdivMassAssembleDiagonal3D(dofs1D, quad1D, ne, symmetric, -- mapsO->B, mapsC->B, pa_data, diag); -- } -- else -- { -- MFEM_ABORT("Unknown kernel."); -- } -- } -- else // 2D -- { -- if (trial_fetype == mfem::FiniteElement::CURL && test_fetype == trial_fetype) -- { -- internal::PAHcurlMassAssembleDiagonal2D(dofs1D, quad1D, ne, symmetric, -- mapsO->B, mapsC->B, pa_data, diag); -- } -- else if (trial_fetype == mfem::FiniteElement::DIV && -- test_fetype == trial_fetype) -- { -- internal::PAHdivMassAssembleDiagonal2D(dofs1D, quad1D, ne, symmetric, -- mapsO->B, mapsC->B, pa_data, diag); -- } -- else -- { -- MFEM_ABORT("Unknown kernel."); -+ if (trial_fetype == mfem::FiniteElement::CURL && test_fetype == trial_fetype) -+ { -+ internal::PAHcurlMassAssembleDiagonal2D(dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, pa_data, diag); -+ } -+ else if (trial_fetype == mfem::FiniteElement::DIV && -+ test_fetype == trial_fetype) -+ { -+ internal::PAHdivMassAssembleDiagonal2D(dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, pa_data, diag); -+ } -+ else -+ { -+ MFEM_ABORT("Unknown kernel."); -+ } - } - } - } - - void VectorFEMassIntegrator::AddMultPA(const Vector &x, Vector &y) const - { -- const bool trial_curl = (trial_fetype == mfem::FiniteElement::CURL); -- const bool trial_div = (trial_fetype == mfem::FiniteElement::DIV); -- const bool test_curl = (test_fetype == mfem::FiniteElement::CURL); -- const bool test_div = (test_fetype == mfem::FiniteElement::DIV); -- -- if (dim == 3) -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->AddMult(x, y); -+ } -+ else - { -- if (trial_curl && test_curl) -+ const bool trial_curl = (trial_fetype == mfem::FiniteElement::CURL); -+ const bool trial_div = (trial_fetype == mfem::FiniteElement::DIV); -+ const bool test_curl = (test_fetype == mfem::FiniteElement::CURL); -+ const bool test_div = (test_fetype == mfem::FiniteElement::DIV); -+ -+ if (dim == 3) - { -- if (Device::Allows(Backend::DEVICE_MASK)) -+ if (trial_curl && test_curl) - { -- const int ID = (dofs1D << 4) | quad1D; -- switch (ID) -+ if (Device::Allows(Backend::DEVICE_MASK)) -+ { -+ const int ID = (dofs1D << 4) | quad1D; -+ switch (ID) -+ { -+ case 0x23: -+ return internal::SmemPAHcurlMassApply3D<2,3>( -+ dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, mapsO->Bt, -+ mapsC->Bt, pa_data, x, y); -+ case 0x34: -+ return internal::SmemPAHcurlMassApply3D<3,4>( -+ dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, mapsO->Bt, -+ mapsC->Bt, pa_data, x, y); -+ case 0x45: -+ return internal::SmemPAHcurlMassApply3D<4,5>( -+ dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, mapsO->Bt, -+ mapsC->Bt, pa_data, x, y); -+ case 0x56: -+ return internal::SmemPAHcurlMassApply3D<5,6>( -+ dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, mapsO->Bt, -+ mapsC->Bt, pa_data, x, y); -+ default: -+ return internal::SmemPAHcurlMassApply3D( -+ dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, mapsO->Bt, -+ mapsC->Bt, pa_data, x, y); -+ } -+ } -+ else - { -- case 0x23: -- return internal::SmemPAHcurlMassApply3D<2,3>( -- dofs1D, quad1D, ne, symmetric, -- mapsO->B, mapsC->B, mapsO->Bt, -- mapsC->Bt, pa_data, x, y); -- case 0x34: -- return internal::SmemPAHcurlMassApply3D<3,4>( -- dofs1D, quad1D, ne, symmetric, -- mapsO->B, mapsC->B, mapsO->Bt, -- mapsC->Bt, pa_data, x, y); -- case 0x45: -- return internal::SmemPAHcurlMassApply3D<4,5>( -- dofs1D, quad1D, ne, symmetric, -- mapsO->B, mapsC->B, mapsO->Bt, -- mapsC->Bt, pa_data, x, y); -- case 0x56: -- return internal::SmemPAHcurlMassApply3D<5,6>( -- dofs1D, quad1D, ne, symmetric, -- mapsO->B, mapsC->B, mapsO->Bt, -- mapsC->Bt, pa_data, x, y); -- default: -- return internal::SmemPAHcurlMassApply3D( -- dofs1D, quad1D, ne, symmetric, -- mapsO->B, mapsC->B, mapsO->Bt, -- mapsC->Bt, pa_data, x, y); -+ internal::PAHcurlMassApply3D(dofs1D, quad1D, ne, symmetric, mapsO->B, mapsC->B, -+ mapsO->Bt, mapsC->Bt, pa_data, x, y); - } - } -+ else if (trial_div && test_div) -+ { -+ internal::PAHdivMassApply(3, dofs1D, quad1D, ne, symmetric, mapsO->B, mapsC->B, -+ mapsO->Bt, mapsC->Bt, pa_data, x, y); -+ } -+ else if (trial_curl && test_div) -+ { -+ const bool scalarCoeff = !(DQ || MQ); -+ internal::PAHcurlHdivMassApply3D(dofs1D, dofs1Dtest, quad1D, ne, scalarCoeff, -+ true, false, mapsO->B, mapsC->B, mapsOtest->Bt, -+ mapsCtest->Bt, pa_data, x, y); -+ } -+ else if (trial_div && test_curl) -+ { -+ const bool scalarCoeff = !(DQ || MQ); -+ internal::PAHcurlHdivMassApply3D(dofs1D, dofs1Dtest, quad1D, ne, scalarCoeff, -+ false, false, mapsO->B, mapsC->B, mapsOtest->Bt, -+ mapsCtest->Bt, pa_data, x, y); -+ } - else - { -- internal::PAHcurlMassApply3D(dofs1D, quad1D, ne, symmetric, mapsO->B, mapsC->B, -- mapsO->Bt, mapsC->Bt, pa_data, x, y); -+ MFEM_ABORT("Unknown kernel."); - } - } -- else if (trial_div && test_div) -- { -- internal::PAHdivMassApply(3, dofs1D, quad1D, ne, symmetric, mapsO->B, mapsC->B, -- mapsO->Bt, mapsC->Bt, pa_data, x, y); -- } -- else if (trial_curl && test_div) -- { -- const bool scalarCoeff = !(DQ || MQ); -- internal::PAHcurlHdivMassApply3D(dofs1D, dofs1Dtest, quad1D, ne, scalarCoeff, -- true, false, mapsO->B, mapsC->B, mapsOtest->Bt, -- mapsCtest->Bt, pa_data, x, y); -- } -- else if (trial_div && test_curl) -- { -- const bool scalarCoeff = !(DQ || MQ); -- internal::PAHcurlHdivMassApply3D(dofs1D, dofs1Dtest, quad1D, ne, scalarCoeff, -- false, false, mapsO->B, mapsC->B, mapsOtest->Bt, -- mapsCtest->Bt, pa_data, x, y); -- } -- else -- { -- MFEM_ABORT("Unknown kernel."); -- } -- } -- else // 2D -- { -- if (trial_curl && test_curl) -+ else // 2D - { -- internal::PAHcurlMassApply2D(dofs1D, quad1D, ne, symmetric, mapsO->B, mapsC->B, -+ if (trial_curl && test_curl) -+ { -+ internal::PAHcurlMassApply2D(dofs1D, quad1D, ne, symmetric, mapsO->B, mapsC->B, -+ mapsO->Bt, mapsC->Bt, pa_data, x, y); -+ } -+ else if (trial_div && test_div) -+ { -+ internal::PAHdivMassApply(2, dofs1D, quad1D, ne, symmetric, mapsO->B, mapsC->B, - mapsO->Bt, mapsC->Bt, pa_data, x, y); -- } -- else if (trial_div && test_div) -- { -- internal::PAHdivMassApply(2, dofs1D, quad1D, ne, symmetric, mapsO->B, mapsC->B, -- mapsO->Bt, -- mapsC->Bt, pa_data, x, y); -- } -- else if ((trial_curl && test_div) || (trial_div && test_curl)) -- { -- const bool scalarCoeff = !(DQ || MQ); -- internal::PAHcurlHdivMassApply2D(dofs1D, dofs1Dtest, quad1D, ne, scalarCoeff, -- trial_curl, false, mapsO->B, mapsC->B, -- mapsOtest->Bt, mapsCtest->Bt, pa_data, x, y); -- } -- else -- { -- MFEM_ABORT("Unknown kernel."); -+ } -+ else if ((trial_curl && test_div) || (trial_div && test_curl)) -+ { -+ const bool scalarCoeff = !(DQ || MQ); -+ internal::PAHcurlHdivMassApply2D(dofs1D, dofs1Dtest, quad1D, ne, scalarCoeff, -+ trial_curl, false, mapsO->B, mapsC->B, -+ mapsOtest->Bt, mapsCtest->Bt, pa_data, x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Unknown kernel."); -+ } - } - } - } -@@ -311,35 +351,43 @@ void VectorFEMassIntegrator::AddMultPA(const Vector &x, Vector &y) const - void VectorFEMassIntegrator::AddMultTransposePA(const Vector &x, - Vector &y) const - { -- const bool trial_curl = (trial_fetype == mfem::FiniteElement::CURL); -- const bool trial_div = (trial_fetype == mfem::FiniteElement::DIV); -- const bool test_curl = (test_fetype == mfem::FiniteElement::CURL); -- const bool test_div = (test_fetype == mfem::FiniteElement::DIV); -- -- bool symmetricSpaces = true; -- if (dim == 3 && ((trial_div && test_curl) || (trial_curl && test_div))) -- { -- const bool scalarCoeff = !(DQ || MQ); -- internal::PAHcurlHdivMassApply3D(dofs1D, dofs1Dtest, quad1D, ne, scalarCoeff, -- trial_div, true, mapsO->B, mapsC->B, -- mapsOtest->Bt, mapsCtest->Bt, pa_data, x, y); -- symmetricSpaces = false; -- } -- else if (dim == 2 && ((trial_curl && test_div) || (trial_div && test_curl))) -+ if (DeviceCanUseCeed()) - { -- const bool scalarCoeff = !(DQ || MQ); -- internal::PAHcurlHdivMassApply2D(dofs1D, dofs1Dtest, quad1D, ne, scalarCoeff, -- !trial_curl, true, mapsO->B, mapsC->B, -- mapsOtest->Bt, mapsCtest->Bt, pa_data, x, y); -- symmetricSpaces = false; -+ MFEM_ABORT("AddMultTransposePA not yet implemented with libCEED for" -+ " VectorFEMassIntegrator."); - } -- if (symmetricSpaces) -+ else - { -- if (MQ && dynamic_cast(MQ) == NULL) -+ const bool trial_curl = (trial_fetype == mfem::FiniteElement::CURL); -+ const bool trial_div = (trial_fetype == mfem::FiniteElement::DIV); -+ const bool test_curl = (test_fetype == mfem::FiniteElement::CURL); -+ const bool test_div = (test_fetype == mfem::FiniteElement::DIV); -+ -+ bool symmetricSpaces = true; -+ if (dim == 3 && ((trial_div && test_curl) || (trial_curl && test_div))) -+ { -+ const bool scalarCoeff = !(DQ || MQ); -+ internal::PAHcurlHdivMassApply3D(dofs1D, dofs1Dtest, quad1D, ne, scalarCoeff, -+ trial_div, true, mapsO->B, mapsC->B, -+ mapsOtest->Bt, mapsCtest->Bt, pa_data, x, y); -+ symmetricSpaces = false; -+ } -+ else if (dim == 2 && ((trial_curl && test_div) || (trial_div && test_curl))) - { -- MFEM_ABORT("VectorFEMassIntegrator transpose not implemented for asymmetric MatrixCoefficient"); -+ const bool scalarCoeff = !(DQ || MQ); -+ internal::PAHcurlHdivMassApply2D(dofs1D, dofs1Dtest, quad1D, ne, scalarCoeff, -+ !trial_curl, true, mapsO->B, mapsC->B, -+ mapsOtest->Bt, mapsCtest->Bt, pa_data, x, y); -+ symmetricSpaces = false; -+ } -+ if (symmetricSpaces) -+ { -+ if (MQ && dynamic_cast(MQ) == NULL) -+ { -+ MFEM_ABORT("VectorFEMassIntegrator transpose not implemented for asymmetric MatrixCoefficient"); -+ } -+ AddMultPA(x, y); - } -- AddMultPA(x, y); - } - } - -diff --git a/fem/integ/lininteg_boundary.cpp b/fem/integ/lininteg_boundary.cpp -index 9b785335c..92f1ff8a5 100644 ---- a/fem/integ/lininteg_boundary.cpp -+++ b/fem/integ/lininteg_boundary.cpp -@@ -214,30 +214,28 @@ void BoundaryLFIntegrator::AssembleDevice(const FiniteElementSpace &fes, - const Array &markers, - Vector &b) - { -- const FiniteElement &fe = *fes.GetBE(0); -- const int qorder = oa * fe.GetOrder() + ob; -- const Geometry::Type gtype = fe.GetGeomType(); -- const IntegrationRule &ir = IntRule ? *IntRule : IntRules.Get(gtype, qorder); - Mesh &mesh = *fes.GetMesh(); -+ const FiniteElement &fe = *fes.GetBE(0); -+ ElementTransformation &T = *mesh.GetBdrElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(fe, T, oa, ob); - -- FaceQuadratureSpace qs(mesh, ir, FaceType::Boundary); -+ FaceQuadratureSpace qs(mesh, *ir, FaceType::Boundary); - CoefficientVector coeff(Q, qs, CoefficientStorage::COMPRESSED); -- BLFEvalAssemble(fes, ir, markers, coeff, false, b); -+ BLFEvalAssemble(fes, *ir, markers, coeff, false, b); - } - - void BoundaryNormalLFIntegrator::AssembleDevice(const FiniteElementSpace &fes, - const Array &markers, - Vector &b) - { -- const FiniteElement &fe = *fes.GetBE(0); -- const int qorder = oa * fe.GetOrder() + ob; -- const Geometry::Type gtype = fe.GetGeomType(); -- const IntegrationRule &ir = IntRule ? *IntRule : IntRules.Get(gtype, qorder); - Mesh &mesh = *fes.GetMesh(); -+ const FiniteElement &fe = *fes.GetBE(0); -+ ElementTransformation &T = *mesh.GetBdrElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(fe, T, oa, ob); - -- FaceQuadratureSpace qs(mesh, ir, FaceType::Boundary); -+ FaceQuadratureSpace qs(mesh, *ir, FaceType::Boundary); - CoefficientVector coeff(Q, qs, CoefficientStorage::COMPRESSED); -- BLFEvalAssemble(fes, ir, markers, coeff, true, b); -+ BLFEvalAssemble(fes, *ir, markers, coeff, true, b); - } - - } // namespace mfem -diff --git a/fem/integ/lininteg_boundary_flux.cpp b/fem/integ/lininteg_boundary_flux.cpp -index b9f047817..a6e422b94 100644 ---- a/fem/integ/lininteg_boundary_flux.cpp -+++ b/fem/integ/lininteg_boundary_flux.cpp -@@ -166,15 +166,14 @@ void VectorFEBoundaryFluxLFIntegrator::AssembleDevice( - const Array &markers, - Vector &b) - { -- const FiniteElement &fe = *fes.GetBE(0); -- const int qorder = oa * fe.GetOrder() + ob; -- const Geometry::Type gtype = fe.GetGeomType(); -- const IntegrationRule &ir = IntRule ? *IntRule : IntRules.Get(gtype, qorder); - Mesh &mesh = *fes.GetMesh(); -+ const FiniteElement &fe = *fes.GetBE(0); -+ ElementTransformation &T = *mesh.GetBdrElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(fe, T, oa, ob); - -- FaceQuadratureSpace qs(mesh, ir, FaceType::Boundary); -+ FaceQuadratureSpace qs(mesh, *ir, FaceType::Boundary); - CoefficientVector coeff(F, qs, CoefficientStorage::COMPRESSED); -- BFLFEvalAssemble(fes, ir, markers, coeff, b); -+ BFLFEvalAssemble(fes, *ir, markers, coeff, b); - } - - } // namespace mfem -diff --git a/fem/integ/lininteg_domain.cpp b/fem/integ/lininteg_domain.cpp -index 6ff7b090d..438da4df2 100644 ---- a/fem/integ/lininteg_domain.cpp -+++ b/fem/integ/lininteg_domain.cpp -@@ -242,10 +242,10 @@ void DomainLFIntegrator::AssembleDevice(const FiniteElementSpace &fes, - const Array &markers, - Vector &b) - { -+ Mesh &mesh = *fes.GetMesh(); - const FiniteElement &fe = *fes.GetFE(0); -- const int qorder = oa * fe.GetOrder() + ob; -- const Geometry::Type gtype = fe.GetGeomType(); -- const IntegrationRule *ir = IntRule ? IntRule : &IntRules.Get(gtype, qorder); -+ ElementTransformation &T = *mesh.GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(fe, T, oa, ob); - - QuadratureSpace qs(*fes.GetMesh(), *ir); - CoefficientVector coeff(Q, qs, CoefficientStorage::COMPRESSED); -@@ -256,10 +256,10 @@ void VectorDomainLFIntegrator::AssembleDevice(const FiniteElementSpace &fes, - const Array &markers, - Vector &b) - { -+ Mesh &mesh = *fes.GetMesh(); - const FiniteElement &fe = *fes.GetFE(0); -- const int qorder = 2 * fe.GetOrder(); -- const Geometry::Type gtype = fe.GetGeomType(); -- const IntegrationRule *ir = IntRule ? IntRule : &IntRules.Get(gtype, qorder); -+ ElementTransformation &T = *mesh.GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(fe, T); - - QuadratureSpace qs(*fes.GetMesh(), *ir); - CoefficientVector coeff(Q, qs, CoefficientStorage::COMPRESSED); -diff --git a/fem/integ/lininteg_domain_grad.cpp b/fem/integ/lininteg_domain_grad.cpp -index 5cca01a1d..735ea56c4 100644 ---- a/fem/integ/lininteg_domain_grad.cpp -+++ b/fem/integ/lininteg_domain_grad.cpp -@@ -321,11 +321,10 @@ void DomainLFGradIntegrator::AssembleDevice(const FiniteElementSpace &fes, - const Array &markers, - Vector &b) - { -- -+ Mesh &mesh = *fes.GetMesh(); - const FiniteElement &fe = *fes.GetFE(0); -- const int qorder = 2 * fe.GetOrder(); -- const Geometry::Type gtype = fe.GetGeomType(); -- const IntegrationRule *ir = IntRule ? IntRule : &IntRules.Get(gtype, qorder); -+ ElementTransformation &T = *mesh.GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(fe, T); - - QuadratureSpace qs(*fes.GetMesh(), *ir); - CoefficientVector coeff(Q, qs, CoefficientStorage::COMPRESSED); -@@ -336,10 +335,10 @@ void VectorDomainLFGradIntegrator::AssembleDevice(const FiniteElementSpace &fes, - const Array &markers, - Vector &b) - { -+ Mesh &mesh = *fes.GetMesh(); - const FiniteElement &fe = *fes.GetFE(0); -- const int qorder = 2 * fe.GetOrder(); -- const Geometry::Type gtype = fe.GetGeomType(); -- const IntegrationRule *ir = IntRule ? IntRule : &IntRules.Get(gtype, qorder); -+ ElementTransformation &T = *mesh.GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(fe, T); - - QuadratureSpace qs(*fes.GetMesh(), *ir); - CoefficientVector coeff(Q, qs, CoefficientStorage::COMPRESSED); -diff --git a/fem/integ/lininteg_domain_vectorfe.cpp b/fem/integ/lininteg_domain_vectorfe.cpp -index 16d9e866c..0765d58bc 100644 ---- a/fem/integ/lininteg_domain_vectorfe.cpp -+++ b/fem/integ/lininteg_domain_vectorfe.cpp -@@ -325,10 +325,10 @@ void VectorFEDomainLFIntegrator::AssembleDevice(const FiniteElementSpace &fes, - const Array &markers, - Vector &b) - { -+ Mesh &mesh = *fes.GetMesh(); - const FiniteElement &fe = *fes.GetFE(0); -- const int qorder = 2 * fe.GetOrder(); -- const Geometry::Type gtype = fe.GetGeomType(); -- const IntegrationRule *ir = IntRule ? IntRule : &IntRules.Get(gtype, qorder); -+ ElementTransformation &T = *mesh.GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(fe, T); - - QuadratureSpace qs(*fes.GetMesh(), *ir); - CoefficientVector coeff(QF, qs, CoefficientStorage::COMPRESSED); -diff --git a/fem/integ/nonlininteg_vecconvection_mf.cpp b/fem/integ/nonlininteg_vecconvection_mf.cpp -index 4005d6836..370fd7991 100644 ---- a/fem/integ/nonlininteg_vecconvection_mf.cpp -+++ b/fem/integ/nonlininteg_vecconvection_mf.cpp -@@ -19,27 +19,22 @@ namespace mfem - void VectorConvectionNLFIntegrator::AssembleMF(const FiniteElementSpace &fes) - { - MFEM_ASSERT(fes.GetOrdering() == Ordering::byNODES, -- "PA Only supports Ordering::byNODES!"); -+ "MF only supports Ordering::byNODES!"); - Mesh *mesh = fes.GetMesh(); -- const FiniteElement &el = *fes.GetFE(0); -- ElementTransformation &T = *mesh->GetElementTransformation(0); -- const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); -+ if (mesh->GetNE() == 0) { return; } - if (DeviceCanUseCeed()) - { - delete ceedOp; -- const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 || -- fes.IsVariableOrder(); -- if (mixed) -- { -- ceedOp = new ceed::MixedMFVectorConvectionNLIntegrator(*this, fes, Q); -- } -- else -- { -- ceedOp = new ceed::MFVectorConvectionNLFIntegrator(fes, *ir, Q); -- } -+ ceedOp = new ceed::MFVectorConvectionNLIntegrator(*this, fes, Q); - return; - } -- MFEM_ABORT("Not yet implemented."); -+ -+ // Assuming the same element type -+ // const FiniteElement &el = *fes.GetFE(0); -+ // ElementTransformation &T = *mesh->GetElementTransformation(0); -+ // const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); -+ MFEM_ABORT("Error: VectorConvectionNLFIntegrator::AssembleMF only" -+ " implemented with libCEED"); - } - - void VectorConvectionNLFIntegrator::AddMultMF(const Vector &x, Vector &y) const -diff --git a/fem/integ/nonlininteg_vecconvection_pa.cpp b/fem/integ/nonlininteg_vecconvection_pa.cpp -index 7bed31800..d8ca8f899 100644 ---- a/fem/integ/nonlininteg_vecconvection_pa.cpp -+++ b/fem/integ/nonlininteg_vecconvection_pa.cpp -@@ -21,24 +21,18 @@ void VectorConvectionNLFIntegrator::AssemblePA(const FiniteElementSpace &fes) - MFEM_ASSERT(fes.GetOrdering() == Ordering::byNODES, - "PA Only supports Ordering::byNODES!"); - Mesh *mesh = fes.GetMesh(); -- const FiniteElement &el = *fes.GetFE(0); -- ElementTransformation &T = *mesh->GetElementTransformation(0); -- const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); -+ if (mesh->GetNE() == 0) { return; } - if (DeviceCanUseCeed()) - { - delete ceedOp; -- const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 || -- fes.IsVariableOrder(); -- if (mixed) -- { -- ceedOp = new ceed::MixedPAVectorConvectionNLIntegrator(*this, fes, Q); -- } -- else -- { -- ceedOp = new ceed::PAVectorConvectionNLFIntegrator(fes, *ir, Q); -- } -+ ceedOp = new ceed::PAVectorConvectionNLIntegrator(*this, fes, Q); - return; - } -+ -+ // Assumes tensor-product elements -+ const FiniteElement &el = *fes.GetFE(0); -+ ElementTransformation &T = *mesh->GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); - dim = mesh->Dimension(); - ne = fes.GetMesh()->GetNE(); - nq = ir->GetNPoints(); -diff --git a/fem/lininteg.cpp b/fem/lininteg.cpp -index c9b6b4699..25e7c2823 100644 ---- a/fem/lininteg.cpp -+++ b/fem/lininteg.cpp -@@ -15,6 +15,22 @@ - namespace mfem - { - -+const IntegrationRule &LinearFormIntegrator::GetRule( -+ const FiniteElement &el, -+ ElementTransformation &Tr, -+ int oa, int ob) const -+{ -+ return IntRules.Get(el.GetGeomType(), oa * el.GetOrder() + ob); -+} -+ -+const IntegrationRule &LinearFormIntegrator::GetRule( -+ const FiniteElement &el, -+ FaceElementTransformations &Tr, -+ int oa, int ob) const -+{ -+ return IntRules.Get(Tr.GetGeometryType(), oa * el.GetOrder() + ob); -+} -+ - void LinearFormIntegrator::AssembleDevice(const FiniteElementSpace &fes, - const Array &markers, - Vector &b) -@@ -41,17 +57,11 @@ void DomainLFIntegrator::AssembleRHSElementVect(const FiniteElement &el, - { - int dof = el.GetDof(); - -- shape.SetSize(dof); // vector of size dof -+ shape.SetSize(dof); // vector of size dof - elvect.SetSize(dof); - elvect = 0.0; - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- // ir = &IntRules.Get(el.GetGeomType(), -- // oa * el.GetOrder() + ob + Tr.OrderW()); -- ir = &IntRules.Get(el.GetGeomType(), oa * el.GetOrder() + ob); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Tr, oa, ob); - - for (int i = 0; i < ir->GetNPoints(); i++) - { -@@ -86,12 +96,7 @@ void DomainLFGradIntegrator::AssembleRHSElementVect( - elvect.SetSize(dof); - elvect = 0.0; - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int intorder = 2 * el.GetOrder(); -- ir = &IntRules.Get(el.GetGeomType(), intorder); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Tr); - - for (int i = 0; i < ir->GetNPoints(); i++) - { -@@ -128,16 +133,11 @@ void BoundaryLFIntegrator::AssembleRHSElementVect( - { - int dof = el.GetDof(); - -- shape.SetSize(dof); // vector of size dof -+ shape.SetSize(dof); // vector of size dof - elvect.SetSize(dof); - elvect = 0.0; - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int intorder = oa * el.GetOrder() + ob; // <---------- -- ir = &IntRules.Get(el.GetGeomType(), intorder); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Tr, oa, ob); - - for (int i = 0; i < ir->GetNPoints(); i++) - { -@@ -157,16 +157,11 @@ void BoundaryLFIntegrator::AssembleRHSElementVect( - { - int dof = el.GetDof(); - -- shape.SetSize(dof); // vector of size dof -+ shape.SetSize(dof); // vector of size dof - elvect.SetSize(dof); - elvect = 0.0; - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int intorder = oa * el.GetOrder() + ob; // <------ user control -- ir = &IntRules.Get(Tr.FaceGeom, intorder); // of integration order -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Tr, oa, ob); - - for (int i = 0; i < ir->GetNPoints(); i++) - { -@@ -197,12 +192,7 @@ void BoundaryNormalLFIntegrator::AssembleRHSElementVect( - elvect.SetSize(dof); - elvect = 0.0; - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int intorder = oa * el.GetOrder() + ob; // <---------- -- ir = &IntRules.Get(el.GetGeomType(), intorder); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Tr, oa, ob); - - for (int i = 0; i < ir->GetNPoints(); i++) - { -@@ -241,12 +231,7 @@ void BoundaryTangentialLFIntegrator::AssembleRHSElementVect( - mfem_error("These methods make sense only in 2D problems."); - } - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int intorder = oa * el.GetOrder() + ob; // <---------- -- ir = &IntRules.Get(el.GetGeomType(), intorder); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Tr, oa, ob); - - for (int i = 0; i < ir->GetNPoints(); i++) - { -@@ -273,17 +258,12 @@ void VectorDomainLFIntegrator::AssembleRHSElementVect( - - double val,cf; - -- shape.SetSize(dof); // vector of size dof -+ shape.SetSize(dof); // vector of size dof - - elvect.SetSize(dof * vdim); - elvect = 0.0; - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int intorder = 2*el.GetOrder(); -- ir = &IntRules.Get(el.GetGeomType(), intorder); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Tr); - - for (int i = 0; i < ir->GetNPoints(); i++) - { -@@ -337,12 +317,7 @@ void VectorDomainLFGradIntegrator::AssembleRHSElementVect( - elvect.SetSize(dof*(vdim/sdim)); - elvect = 0.0; - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int intorder = 2 * el.GetOrder(); -- ir = &IntRules.Get(el.GetGeomType(), intorder); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Tr); - - Vector pelvect(dof); - Vector part_x(dim); -@@ -384,12 +359,7 @@ void VectorBoundaryLFIntegrator::AssembleRHSElementVect( - elvect.SetSize(dof * vdim); - elvect = 0.0; - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int intorder = 2*el.GetOrder(); -- ir = &IntRules.Get(el.GetGeomType(), intorder); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Tr); - - for (int i = 0; i < ir->GetNPoints(); i++) - { -@@ -419,12 +389,7 @@ void VectorBoundaryLFIntegrator::AssembleRHSElementVect( - elvect.SetSize(dof * vdim); - elvect = 0.0; - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int intorder = 2*el.GetOrder(); -- ir = &IntRules.Get(Tr.GetGeometryType(), intorder); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Tr); - - for (int i = 0; i < ir->GetNPoints(); i++) - { -@@ -463,13 +428,8 @@ void VectorFEDomainLFIntegrator::AssembleRHSElementVect( - elvect.SetSize(dof); - elvect = 0.0; - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- // int intorder = 2*el.GetOrder() - 1; // ok for O(h^{k+1}) conv. in L2 -- int intorder = 2*el.GetOrder(); -- ir = &IntRules.Get(el.GetGeomType(), intorder); -- } -+ // Previously: 2 * el.GetOrder() - 1; // ok for O(h^{k+1}) conv. in L2 -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Tr); - - for (int i = 0; i < ir->GetNPoints(); i++) - { -@@ -512,12 +472,7 @@ void VectorFEDomainLFCurlIntegrator::AssembleRHSElementVect( - elvect.SetSize(dof); - elvect = 0.0; - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int intorder = 2*el.GetOrder(); -- ir = &IntRules.Get(el.GetGeomType(), intorder); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Tr); - - for (int i = 0; i < ir->GetNPoints(); i++) - { -@@ -554,16 +509,11 @@ void VectorFEDomainLFDivIntegrator::AssembleRHSElementVect( - { - int dof = el.GetDof(); - -- divshape.SetSize(dof); // vector of size dof -+ divshape.SetSize(dof); // vector of size dof - elvect.SetSize(dof); - elvect = 0.0; - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int intorder = 2 * el.GetOrder(); -- ir = &IntRules.Get(el.GetGeomType(), intorder); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Tr); - - for (int i = 0; i < ir->GetNPoints(); i++) - { -@@ -596,11 +546,7 @@ void VectorBoundaryFluxLFIntegrator::AssembleRHSElementVect( - nor.SetSize (dim); - elvect.SetSize (dim*dof); - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- ir = &IntRules.Get(el.GetGeomType(), el.GetOrder() + 1); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Tr, 1, 1); - - elvect = 0.0; - for (int i = 0; i < ir->GetNPoints(); i++) -@@ -618,7 +564,6 @@ void VectorBoundaryFluxLFIntegrator::AssembleRHSElementVect( - } - } - -- - void VectorFEBoundaryFluxLFIntegrator::AssembleRHSElementVect( - const FiniteElement &el, ElementTransformation &Tr, Vector &elvect) - { -@@ -628,12 +573,7 @@ void VectorFEBoundaryFluxLFIntegrator::AssembleRHSElementVect( - elvect.SetSize(dof); - elvect = 0.0; - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int intorder = oa * el.GetOrder() + ob; // <---------- -- ir = &IntRules.Get(el.GetGeomType(), intorder); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Tr, oa, ob); - - for (int i = 0; i < ir->GetNPoints(); i++) - { -@@ -667,12 +607,7 @@ void VectorFEBoundaryTangentLFIntegrator::AssembleRHSElementVect( - elvect.SetSize(dof); - elvect = 0.0; - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- int intorder = oa * el.GetOrder() + ob; // <---------- -- ir = &IntRules.Get(el.GetGeomType(), intorder); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Tr, oa, ob); - - for (int i = 0; i < ir->GetNPoints(); i++) - { -@@ -707,6 +642,20 @@ void VectorFEBoundaryTangentLFIntegrator::AssembleRHSElementVect( - } - } - -+const IntegrationRule &BoundaryFlowIntegrator::GetRule( -+ const FiniteElement &el, -+ FaceElementTransformations &Tr, -+ int oa, int ob) const -+{ -+ // Assuming order(u) == order(mesh) -+ int order = 2 * el.GetOrder() + Tr.Elem1->OrderW(); -+ if (el.Space() == FunctionSpace::Pk) -+ { -+ order++; -+ } -+ return IntRules.Get(Tr.GetGeometryType(), order); -+} -+ - void BoundaryFlowIntegrator::AssembleRHSElementVect( - const FiniteElement &el, ElementTransformation &Tr, Vector &elvect) - { -@@ -719,24 +668,14 @@ void BoundaryFlowIntegrator::AssembleRHSElementVect( - void BoundaryFlowIntegrator::AssembleRHSElementVect( - const FiniteElement &el, FaceElementTransformations &Tr, Vector &elvect) - { -- int dim, ndof, order; -+ int dim, ndof; - double un, w, vu_data[3], nor_data[3]; - - dim = el.GetDim(); - ndof = el.GetDof(); - Vector vu(vu_data, dim), nor(nor_data, dim); - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- // Assuming order(u)==order(mesh) -- order = Tr.Elem1->OrderW() + 2*el.GetOrder(); -- if (el.Space() == FunctionSpace::Pk) -- { -- order++; -- } -- ir = &IntRules.Get(Tr.GetGeometryType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Tr); - - shape.SetSize(ndof); - elvect.SetSize(ndof); -@@ -805,13 +744,7 @@ void DGDirichletLFIntegrator::AssembleRHSElementVect( - elvect.SetSize(ndof); - elvect = 0.0; - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- // a simple choice for the integration order; is this OK? -- int order = 2*el.GetOrder(); -- ir = &IntRules.Get(Tr.GetGeometryType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Tr); - - for (int p = 0; p < ir->GetNPoints(); p++) - { -@@ -902,12 +835,7 @@ void DGElasticityDirichletLFIntegrator::AssembleRHSElementVect( - dshape_du.SetSize(ndofs); - u_dir.SetSize(dim); - -- const IntegrationRule *ir = IntRule; -- if (ir == NULL) -- { -- const int order = 2*el.GetOrder(); // <----- -- ir = &IntRules.Get(Tr.GetGeometryType(), order); -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Tr); - - for (int pi = 0; pi < ir->GetNPoints(); ++pi) - { -@@ -1000,12 +928,10 @@ void DGElasticityDirichletLFIntegrator::AssembleRHSElementVect( - } - } - -- -- --void WhiteGaussianNoiseDomainLFIntegrator::AssembleRHSElementVect --(const FiniteElement &el, -- ElementTransformation &Tr, -- Vector &elvect) -+void WhiteGaussianNoiseDomainLFIntegrator::AssembleRHSElementVect( -+ const FiniteElement &el, -+ ElementTransformation &Tr, -+ Vector &elvect) - { - int n = el.GetDof(); - elvect.SetSize(n); -@@ -1040,13 +966,18 @@ void WhiteGaussianNoiseDomainLFIntegrator::AssembleRHSElementVect - } - } - -+const IntegrationRule &VectorQuadratureLFIntegrator::GetRule( -+ const FiniteElement &el, -+ ElementTransformation &Tr, -+ int oa, int ob) const -+{ -+ return vqfc.GetQuadFunction().GetSpace()->GetIntRule(Tr.ElementNo); -+} - - void VectorQuadratureLFIntegrator::AssembleRHSElementVect( - const FiniteElement &fe, ElementTransformation &Tr, Vector &elvect) - { -- const IntegrationRule *ir = -- &vqfc.GetQuadFunction().GetSpace()->GetIntRule(Tr.ElementNo); -- -+ const IntegrationRule *ir = &GetRule(fe, Tr); - const int nqp = ir->GetNPoints(); - const int vdim = vqfc.GetVDim(); - const int ndofs = fe.GetDof(); -@@ -1071,14 +1002,19 @@ void VectorQuadratureLFIntegrator::AssembleRHSElementVect( - } - } - -+const IntegrationRule &QuadratureLFIntegrator::GetRule( -+ const FiniteElement &el, -+ ElementTransformation &Tr, -+ int oa, int ob) const -+{ -+ return qfc.GetQuadFunction().GetSpace()->GetIntRule(Tr.ElementNo); -+} - - void QuadratureLFIntegrator::AssembleRHSElementVect(const FiniteElement &fe, - ElementTransformation &Tr, - Vector &elvect) - { -- const IntegrationRule *ir = -- &qfc.GetQuadFunction().GetSpace()->GetIntRule(Tr.ElementNo); -- -+ const IntegrationRule *ir = &GetRule(fe, Tr); - const int nqp = ir->GetNPoints(); - const int ndofs = fe.GetDof(); - Vector shape(ndofs); -diff --git a/fem/lininteg.hpp b/fem/lininteg.hpp -index 02fde00c9..5bcdfdb83 100644 ---- a/fem/lininteg.hpp -+++ b/fem/lininteg.hpp -@@ -29,10 +29,20 @@ protected: - LinearFormIntegrator(const IntegrationRule *ir = NULL) { IntRule = ir; } - - public: -- - /// Method probing for assembly on device - virtual bool SupportsDevice() const { return false; } - -+ virtual void SetIntRule(const IntegrationRule *ir) { IntRule = ir; } -+ -+ const IntegrationRule *GetIntRule() { return IntRule; } -+ -+ virtual const IntegrationRule &GetRule(const FiniteElement &el, -+ ElementTransformation &Tr, -+ int oa = 2, int ob = 0) const; -+ virtual const IntegrationRule &GetRule(const FiniteElement &el, -+ FaceElementTransformations &Tr, -+ int oa = 2, int ob = 0) const; -+ - /// Method defining assembly on device - virtual void AssembleDevice(const FiniteElementSpace &fes, - const Array &markers, -@@ -51,13 +61,9 @@ public: - FaceElementTransformations &Tr, - Vector &elvect); - -- virtual void SetIntRule(const IntegrationRule *ir) { IntRule = ir; } -- const IntegrationRule* GetIntRule() { return IntRule; } -- -- virtual ~LinearFormIntegrator() { } -+ virtual ~LinearFormIntegrator() {} - }; - -- - /// Abstract class for integrators that support delta coefficients - class DeltaLFIntegrator : public LinearFormIntegrator - { -@@ -70,7 +76,7 @@ protected: - DeltaLFIntegrator(Coefficient &q, const IntegrationRule *ir = NULL) - : LinearFormIntegrator(ir), - delta(dynamic_cast(&q)), -- vec_delta(NULL) { } -+ vec_delta(NULL) {} - - /** @brief This constructor should be used by derived classes that use a - VectorDeltaCoefficient. */ -@@ -78,7 +84,7 @@ protected: - const IntegrationRule *ir = NULL) - : LinearFormIntegrator(ir), - delta(NULL), -- vec_delta(dynamic_cast(&vq)) { } -+ vec_delta(dynamic_cast(&vq)) {} - - public: - /// Returns true if the derived class instance uses a delta coefficient. -@@ -103,23 +109,23 @@ public: - Vector &elvect) = 0; - }; - -- - /// Class for domain integration L(v) := (f, v) - class DomainLFIntegrator : public DeltaLFIntegrator - { - Vector shape; - Coefficient &Q; - int oa, ob; -+ - public: - /// Constructs a domain integrator with a given Coefficient -+ /// the old default was a = 1, b = 1 -+ /// for simple elliptic problems a = 2, b = -2 is OK - DomainLFIntegrator(Coefficient &QF, int a = 2, int b = 0) -- // the old default was a = 1, b = 1 -- // for simple elliptic problems a = 2, b = -2 is OK -- : DeltaLFIntegrator(QF), Q(QF), oa(a), ob(b) { } -+ : DeltaLFIntegrator(QF), Q(QF), oa(a), ob(b) {} - - /// Constructs a domain integrator with a given Coefficient - DomainLFIntegrator(Coefficient &QF, const IntegrationRule *ir) -- : DeltaLFIntegrator(QF, ir), Q(QF), oa(1), ob(1) { } -+ : DeltaLFIntegrator(QF, ir), Q(QF), oa(1), ob(1) {} - - virtual bool SupportsDevice() const { return true; } - -@@ -152,7 +158,7 @@ private: - public: - /// Constructs the domain integrator (Q, grad v) - DomainLFGradIntegrator(VectorCoefficient &QF) -- : DeltaLFIntegrator(QF), Q(QF) { } -+ : DeltaLFIntegrator(QF), Q(QF) {} - - virtual bool SupportsDevice() const { return true; } - -@@ -174,18 +180,18 @@ public: - using LinearFormIntegrator::AssembleRHSElementVect; - }; - -- - /// Class for boundary integration L(v) := (g, v) - class BoundaryLFIntegrator : public LinearFormIntegrator - { - Vector shape; - Coefficient &Q; - int oa, ob; -+ - public: - /** @brief Constructs a boundary integrator with a given Coefficient @a QG. - Integration order will be @a a * basis_order + @a b. */ - BoundaryLFIntegrator(Coefficient &QG, int a = 1, int b = 1) -- : Q(QG), oa(a), ob(b) { } -+ : Q(QG), oa(a), ob(b) {} - - virtual bool SupportsDevice() const { return true; } - -@@ -212,10 +218,11 @@ class BoundaryNormalLFIntegrator : public LinearFormIntegrator - Vector shape; - VectorCoefficient &Q; - int oa, ob; -+ - public: - /// Constructs a boundary integrator with a given Coefficient QG - BoundaryNormalLFIntegrator(VectorCoefficient &QG, int a = 1, int b = 1) -- : Q(QG), oa(a), ob(b) { } -+ : Q(QG), oa(a), ob(b) {} - - virtual bool SupportsDevice() const { return true; } - -@@ -237,10 +244,11 @@ class BoundaryTangentialLFIntegrator : public LinearFormIntegrator - Vector shape; - VectorCoefficient &Q; - int oa, ob; -+ - public: - /// Constructs a boundary integrator with a given Coefficient QG - BoundaryTangentialLFIntegrator(VectorCoefficient &QG, int a = 1, int b = 1) -- : Q(QG), oa(a), ob(b) { } -+ : Q(QG), oa(a), ob(b) {} - - virtual void AssembleRHSElementVect(const FiniteElement &el, - ElementTransformation &Tr, -@@ -260,7 +268,7 @@ private: - public: - /// Constructs a domain integrator with a given VectorCoefficient - VectorDomainLFIntegrator(VectorCoefficient &QF) -- : DeltaLFIntegrator(QF), Q(QF) { } -+ : DeltaLFIntegrator(QF), Q(QF) {} - - virtual bool SupportsDevice() const { return true; } - -@@ -294,7 +302,7 @@ private: - public: - /// Constructs the domain integrator (Q, grad v) - VectorDomainLFGradIntegrator(VectorCoefficient &QF) -- : DeltaLFIntegrator(QF), Q(QF) { } -+ : DeltaLFIntegrator(QF), Q(QF) {} - - virtual bool SupportsDevice() const override { return true; } - -@@ -326,7 +334,7 @@ private: - - public: - /// Constructs a boundary integrator with a given VectorCoefficient QG -- VectorBoundaryLFIntegrator(VectorCoefficient &QG) : Q(QG) { } -+ VectorBoundaryLFIntegrator(VectorCoefficient &QG) : Q(QG) {} - - /** Given a particular boundary Finite Element and a transformation (Tr) - computes the element boundary vector, elvect. */ -@@ -352,7 +360,9 @@ private: - - public: - VectorFEDomainLFIntegrator(VectorCoefficient &F) -- : DeltaLFIntegrator(F), QF(F) { } -+ : DeltaLFIntegrator(F), QF(F) {} -+ -+ virtual bool SupportsDevice() const { return true; } - - virtual void AssembleRHSElementVect(const FiniteElement &el, - ElementTransformation &Tr, -@@ -362,8 +372,6 @@ public: - ElementTransformation &Trans, - Vector &elvect); - -- virtual bool SupportsDevice() const { return true; } -- - virtual void AssembleDevice(const FiniteElementSpace &fes, - const Array &markers, - Vector &b); -@@ -382,7 +390,7 @@ private: - public: - /// Constructs the domain integrator (Q, curl v) - VectorFEDomainLFCurlIntegrator(VectorCoefficient &F) -- : DeltaLFIntegrator(F), QF(&F) { } -+ : DeltaLFIntegrator(F), QF(&F) {} - - virtual void AssembleRHSElementVect(const FiniteElement &el, - ElementTransformation &Tr, -@@ -401,10 +409,11 @@ class VectorFEDomainLFDivIntegrator : public DeltaLFIntegrator - private: - Vector divshape; - Coefficient &Q; -+ - public: - /// Constructs the domain integrator (Q, div v) - VectorFEDomainLFDivIntegrator(Coefficient &QF) -- : DeltaLFIntegrator(QF), Q(QF) { } -+ : DeltaLFIntegrator(QF), Q(QF) {} - - /** Given a particular Finite Element and a transformation (Tr) - computes the element right hand side element vector, elvect. */ -@@ -432,7 +441,7 @@ private: - public: - VectorBoundaryFluxLFIntegrator(Coefficient &f, double s = 1.0, - const IntegrationRule *ir = NULL) -- : LinearFormIntegrator(ir), Sign(s), F(&f) { } -+ : LinearFormIntegrator(ir), Sign(s), F(&f) {} - - virtual void AssembleRHSElementVect(const FiniteElement &el, - ElementTransformation &Tr, -@@ -453,21 +462,21 @@ private: - - public: - VectorFEBoundaryFluxLFIntegrator(int a = 1, int b = -1) -- : F(NULL), oa(a), ob(b) { } -+ : F(NULL), oa(a), ob(b) {} - VectorFEBoundaryFluxLFIntegrator(Coefficient &f, int a = 2, int b = 0) -- : F(&f), oa(a), ob(b) { } -+ : F(&f), oa(a), ob(b) {} -+ -+ virtual bool SupportsDevice() const { return true; } - - virtual void AssembleRHSElementVect(const FiniteElement &el, - ElementTransformation &Tr, - Vector &elvect); - -- using LinearFormIntegrator::AssembleRHSElementVect; -- -- virtual bool SupportsDevice() const { return true; } -- - virtual void AssembleDevice(const FiniteElementSpace &fes, - const Array &markers, - Vector &b); -+ -+ using LinearFormIntegrator::AssembleRHSElementVect; - }; - - /// Class for boundary integration \f$ L(v) = (n \times f, v) \f$ -@@ -480,7 +489,7 @@ private: - public: - VectorFEBoundaryTangentLFIntegrator(VectorCoefficient &QG, - int a = 2, int b = 0) -- : f(QG), oa(a), ob(b) { } -+ : f(QG), oa(a), ob(b) {} - - virtual void AssembleRHSElementVect(const FiniteElement &el, - ElementTransformation &Tr, -@@ -489,7 +498,6 @@ public: - using LinearFormIntegrator::AssembleRHSElementVect; - }; - -- - /** Class for boundary integration of the linear form: - (alpha/2) < (u.n) f, w > - beta < |u.n| f, w >, - where f and u are given scalar and vector coefficients, respectively, -@@ -512,6 +520,11 @@ public: - double a, double b) - { f = &f_; u = &u_; alpha = a; beta = b; } - -+ using LinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &el, -+ FaceElementTransformations &Tr, -+ int oa = 2, int ob = 0) const; -+ - virtual void AssembleRHSElementVect(const FiniteElement &el, - ElementTransformation &Tr, - Vector &elvect); -@@ -522,7 +535,6 @@ public: - using LinearFormIntegrator::AssembleRHSElementVect; - }; - -- - /** Boundary linear integrator for imposing non-zero Dirichlet boundary - conditions, to be used in conjunction with DGDiffusionIntegrator. - Specifically, given the Dirichlet data u_D, the linear form assembles the -@@ -546,13 +558,13 @@ protected: - - public: - DGDirichletLFIntegrator(Coefficient &u, const double s, const double k) -- : uD(&u), Q(NULL), MQ(NULL), sigma(s), kappa(k) { } -+ : uD(&u), Q(NULL), MQ(NULL), sigma(s), kappa(k) {} - DGDirichletLFIntegrator(Coefficient &u, Coefficient &q, - const double s, const double k) -- : uD(&u), Q(&q), MQ(NULL), sigma(s), kappa(k) { } -+ : uD(&u), Q(&q), MQ(NULL), sigma(s), kappa(k) {} - DGDirichletLFIntegrator(Coefficient &u, MatrixCoefficient &q, - const double s, const double k) -- : uD(&u), Q(NULL), MQ(&q), sigma(s), kappa(k) { } -+ : uD(&u), Q(NULL), MQ(&q), sigma(s), kappa(k) {} - - virtual void AssembleRHSElementVect(const FiniteElement &el, - ElementTransformation &Tr, -@@ -564,7 +576,6 @@ public: - using LinearFormIntegrator::AssembleRHSElementVect; - }; - -- - /** Boundary linear form integrator for imposing non-zero Dirichlet boundary - conditions, in a DG elasticity formulation. Specifically, the linear form is - given by -@@ -597,7 +608,7 @@ public: - DGElasticityDirichletLFIntegrator(VectorCoefficient &uD_, - Coefficient &lambda_, Coefficient &mu_, - double alpha_, double kappa_) -- : uD(uD_), lambda(&lambda_), mu(&mu_), alpha(alpha_), kappa(kappa_) { } -+ : uD(uD_), lambda(&lambda_), mu(&mu_), alpha(alpha_), kappa(kappa_) {} - - virtual void AssembleRHSElementVect(const FiniteElement &el, - ElementTransformation &Tr, -@@ -609,7 +620,6 @@ public: - using LinearFormIntegrator::AssembleRHSElementVect; - }; - -- - /** Class for spatial white Gaussian noise integration. - - The target problem is the linear SPDE a(u,v) = F(v) with F(v) := , -@@ -637,8 +647,8 @@ class WhiteGaussianNoiseDomainLFIntegrator : public LinearFormIntegrator - std::normal_distribution dist; - - bool save_factors = false; --public: - -+public: - #ifdef MFEM_USE_MPI - /** @brief Sets the @a seed_ of the random number generator. A fixed seed - allows for a reproducible sequence of white noise vectors. */ -@@ -669,13 +679,13 @@ public: - if (seed_ > 0) { SetSeed(seed_); } - } - #endif -+ - /// @brief Sets/resets the @a seed of the random number generator. - void SetSeed(int seed) - { - generator.seed(seed); - } - -- using LinearFormIntegrator::AssembleRHSElementVect; - virtual void AssembleRHSElementVect(const FiniteElement &el, - ElementTransformation &Tr, - Vector &elvect); -@@ -715,8 +725,9 @@ public: - } - L.DeleteAll(); - } --}; - -+ using LinearFormIntegrator::AssembleRHSElementVect; -+}; - - /** Class for domain integration of L(v) := (f, v), where - f=(f1,...,fn) and v=(v1,...,vn). that makes use of -@@ -738,18 +749,23 @@ public: - } - } - -- using LinearFormIntegrator::AssembleRHSElementVect; -- virtual void AssembleRHSElementVect(const FiniteElement &fe, -- ElementTransformation &Tr, -- Vector &elvect); -- - virtual void SetIntRule(const IntegrationRule *ir) - { - MFEM_WARNING("Integration rule not used in this class. " - "The QuadratureFunction integration rules are used instead"); - } --}; - -+ using LinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &el, -+ ElementTransformation &Tr, -+ int oa = 2, int ob = 0) const; -+ -+ virtual void AssembleRHSElementVect(const FiniteElement &fe, -+ ElementTransformation &Tr, -+ Vector &elvect); -+ -+ using LinearFormIntegrator::AssembleRHSElementVect; -+}; - - /** Class for domain integration L(v) := (f, v) that makes use - of QuadratureFunctionCoefficient. */ -@@ -770,19 +786,24 @@ public: - } - } - -- using LinearFormIntegrator::AssembleRHSElementVect; -- virtual void AssembleRHSElementVect(const FiniteElement &fe, -- ElementTransformation &Tr, -- Vector &elvect); -- - virtual void SetIntRule(const IntegrationRule *ir) - { - MFEM_WARNING("Integration rule not used in this class. " - "The QuadratureFunction integration rules are used instead"); - } -+ -+ using LinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &el, -+ ElementTransformation &Tr, -+ int oa = 2, int ob = 0) const; -+ -+ virtual void AssembleRHSElementVect(const FiniteElement &fe, -+ ElementTransformation &Tr, -+ Vector &elvect); -+ -+ using LinearFormIntegrator::AssembleRHSElementVect; - }; - - } - -- - #endif -diff --git a/fem/nonlininteg.cpp b/fem/nonlininteg.cpp -index 5ee1febea..a704ee207 100644 ---- a/fem/nonlininteg.cpp -+++ b/fem/nonlininteg.cpp -@@ -15,26 +15,44 @@ - namespace mfem - { - -+const IntegrationRule &NonlinearFormIntegrator::GetRule( -+ const FiniteElement&, const FiniteElement&, -+ ElementTransformation&) const -+{ -+ MFEM_ABORT("NonlinearFormIntegrator::GetRule(...)\n" -+ " is not implemented for this class."); -+ return IntRules.Get(0, 0); -+} -+ -+const IntegrationRule &NonlinearFormIntegrator::GetRule( -+ const FiniteElement&, const FiniteElement&, -+ FaceElementTransformations&) const -+{ -+ MFEM_ABORT("NonlinearFormIntegrator::GetRule(...)\n" -+ " is not implemented for this class."); -+ return IntRules.Get(0, 0); -+} -+ - void NonlinearFormIntegrator::AssemblePA(const FiniteElementSpace&) - { - MFEM_ABORT("NonlinearFormIntegrator::AssemblePA(...)\n" - " is not implemented for this class."); - } - --void NonlinearFormIntegrator::AssembleGradPA(const Vector &x, -- const FiniteElementSpace &fes) -+void NonlinearFormIntegrator::AssembleGradPA(const Vector&, -+ const FiniteElementSpace&) - { - MFEM_ABORT("NonlinearFormIntegrator::AssembleGradPA(...)\n" - " is not implemented for this class."); - } - --void NonlinearFormIntegrator::AssembleGradDiagonalPA(Vector &diag) const -+void NonlinearFormIntegrator::AssembleGradDiagonalPA(Vector&) const - { - MFEM_ABORT("NonlinearFormIntegrator::AssembleGradDiagonalPA(...)\n" - " is not implemented for this class."); - } - --void NonlinearFormIntegrator::AddMultPA(const Vector &, Vector &) const -+void NonlinearFormIntegrator::AddMultPA(const Vector&, Vector&) const - { - MFEM_ABORT("NonlinearFormIntegrator::AddMultPA(...)\n" - " is not implemented for this class."); -@@ -46,119 +64,141 @@ void NonlinearFormIntegrator::AddMultGradPA(const Vector&, Vector&) const - " is not implemented for this class."); - } - --double NonlinearFormIntegrator::GetLocalStateEnergyPA(const Vector &x) const -+double NonlinearFormIntegrator::GetLocalStateEnergyPA(const Vector&) const - { - MFEM_ABORT("NonlinearFormIntegrator::GetLocalStateEnergyPA(...)\n" - " is not implemented for this class."); - return 0.0; - } - --void NonlinearFormIntegrator::AssembleMF(const FiniteElementSpace &fes) -+void NonlinearFormIntegrator::AssembleMF(const FiniteElementSpace&) - { - MFEM_ABORT("NonlinearFormIntegrator::AssembleMF(...)\n" - " is not implemented for this class."); - } - --void NonlinearFormIntegrator::AddMultMF(const Vector &, Vector &) const -+void NonlinearFormIntegrator::AddMultMF(const Vector&, Vector&) const - { - MFEM_ABORT("NonlinearFormIntegrator::AddMultMF(...)\n" - " is not implemented for this class."); - } - --double NonlinearFormIntegrator::GetElementEnergy( -- const FiniteElement &el, ElementTransformation &Tr, const Vector &elfun) -+double NonlinearFormIntegrator::GetElementEnergy(const FiniteElement&, -+ ElementTransformation&, -+ const Vector&) - { -- MFEM_ABORT("NonlinearFormIntegrator::GetElementEnergy" -- " is not overloaded!"); -+ MFEM_ABORT("NonlinearFormIntegrator::GetElementEnergy(...)" -+ " is not implemented for this class."); - return 0.0; - } - --void NonlinearFormIntegrator::AssembleElementVector( -- const FiniteElement &el, ElementTransformation &Tr, -- const Vector &elfun, Vector &elvect) -+void NonlinearFormIntegrator::AssembleElementVector(const FiniteElement&, -+ ElementTransformation&, -+ const Vector&, -+ Vector&) - { -- MFEM_ABORT("NonlinearFormIntegrator::AssembleElementVector" -- " is not overloaded!"); -+ MFEM_ABORT("NonlinearFormIntegrator::AssembleElementVector(...)\n" -+ " is not implemented for this class."); - } - --void NonlinearFormIntegrator::AssembleFaceVector( -- const FiniteElement &el1, const FiniteElement &el2, -- FaceElementTransformations &Tr, const Vector &elfun, Vector &elvect) -+void NonlinearFormIntegrator::AssembleFaceVector(const FiniteElement&, -+ const FiniteElement&, -+ FaceElementTransformations&, -+ const Vector&, -+ Vector&) - { -- MFEM_ABORT("NonlinearFormIntegrator::AssembleFaceVector" -- " is not overloaded!"); -+ MFEM_ABORT("NonlinearFormIntegrator::AssembleFaceVector(...)\n" -+ " is not implemented for this class."); - } - --void NonlinearFormIntegrator::AssembleElementGrad( -- const FiniteElement &el, ElementTransformation &Tr, const Vector &elfun, -- DenseMatrix &elmat) -+void NonlinearFormIntegrator::AssembleElementGrad(const FiniteElement&, -+ ElementTransformation&, -+ const Vector&, -+ DenseMatrix&) - { -- MFEM_ABORT("NonlinearFormIntegrator::AssembleElementGrad" -- " is not overloaded!"); -+ MFEM_ABORT("NonlinearFormIntegrator::AssembleElementGrad(...)\n" -+ " is not implemented for this class."); - } - --void NonlinearFormIntegrator::AssembleFaceGrad( -- const FiniteElement &el1, const FiniteElement &el2, -- FaceElementTransformations &Tr, const Vector &elfun, -- DenseMatrix &elmat) -+void NonlinearFormIntegrator::AssembleFaceGrad(const FiniteElement&, -+ const FiniteElement&, -+ FaceElementTransformations&, -+ const Vector&, -+ DenseMatrix&) - { -- MFEM_ABORT("NonlinearFormIntegrator::AssembleFaceGrad" -- " is not overloaded!"); -+ MFEM_ABORT("NonlinearFormIntegrator::AssembleFaceGrad(...)\n" -+ " is not implemented for this class."); - } - -+const IntegrationRule &BlockNonlinearFormIntegrator::GetRule( -+ const FiniteElement&, const FiniteElement&test_fe, -+ ElementTransformation&) const -+{ -+ MFEM_ABORT("BlockNonlinearFormIntegrator::GetRule(...)\n" -+ " is not implemented for this class."); -+ return IntRules.Get(0, 0); -+} - --void BlockNonlinearFormIntegrator::AssembleElementVector( -- const Array &el, -- ElementTransformation &Tr, -- const Array &elfun, -- const Array &elvec) -+const IntegrationRule &BlockNonlinearFormIntegrator::GetRule( -+ const FiniteElement&, const FiniteElement&, -+ FaceElementTransformations&) const - { -- MFEM_ABORT("BlockNonlinearFormIntegrator::AssembleElementVector" -- " is not overloaded!"); -+ MFEM_ABORT("BlockNonlinearFormIntegrator::GetRule(...)\n" -+ " is not implemented for this class."); -+ return IntRules.Get(0, 0); - } - --void BlockNonlinearFormIntegrator::AssembleFaceVector( -- const Array &el1, -- const Array &el2, -- FaceElementTransformations &Tr, -- const Array &elfun, -- const Array &elvect) -+double BlockNonlinearFormIntegrator::GetElementEnergy( -+ const Array&, -+ ElementTransformation&, -+ const Array&) - { -- MFEM_ABORT("BlockNonlinearFormIntegrator::AssembleFaceVector" -- " is not overloaded!"); -+ MFEM_ABORT("BlockNonlinearFormIntegrator::GetElementEnergy(...)\n" -+ " is not implemented for this class."); -+ return 0.0; - } - --void BlockNonlinearFormIntegrator::AssembleElementGrad( -- const Array &el, -- ElementTransformation &Tr, -- const Array &elfun, -- const Array2D &elmats) -+void BlockNonlinearFormIntegrator::AssembleElementVector( -+ const Array&, -+ ElementTransformation&, -+ const Array&, -+ const Array&) - { -- MFEM_ABORT("BlockNonlinearFormIntegrator::AssembleElementGrad" -- " is not overloaded!"); -+ MFEM_ABORT("BlockNonlinearFormIntegrator::AssembleElementVector(...)\n" -+ " is not implemented for this class."); - } - --void BlockNonlinearFormIntegrator::AssembleFaceGrad( -- const Array&el1, -- const Array&el2, -- FaceElementTransformations &Tr, -- const Array &elfun, -- const Array2D &elmats) -+void BlockNonlinearFormIntegrator::AssembleFaceVector( -+ const Array&, -+ const Array&, -+ FaceElementTransformations&, -+ const Array&, -+ const Array&) - { -- MFEM_ABORT("BlockNonlinearFormIntegrator::AssembleFaceGrad" -- " is not overloaded!"); -+ MFEM_ABORT("BlockNonlinearFormIntegrator::AssembleFaceVector(...)\n" -+ " is not implemented for this class."); - } - --double BlockNonlinearFormIntegrator::GetElementEnergy( -- const Array&el, -- ElementTransformation &Tr, -- const Array&elfun) -+void BlockNonlinearFormIntegrator::AssembleElementGrad( -+ const Array&, -+ ElementTransformation&, -+ const Array&, -+ const Array2D&) - { -- MFEM_ABORT("BlockNonlinearFormIntegrator::GetElementEnergy" -- " is not overloaded!"); -- return 0.0; -+ MFEM_ABORT("BlockNonlinearFormIntegrator::AssembleElementGrad(...)\n" -+ " is not implemented for this class."); - } - -+void BlockNonlinearFormIntegrator::AssembleFaceGrad( -+ const Array&, -+ const Array&, -+ FaceElementTransformations&, -+ const Array&, -+ const Array2D&) -+{ -+ MFEM_ABORT("BlockNonlinearFormIntegrator::AssembleFaceGrad(...)\n" -+ " is not implemented for this class."); -+} - - double InverseHarmonicModel::EvalW(const DenseMatrix &J) const - { -@@ -260,7 +300,6 @@ void InverseHarmonicModel::AssembleH( - } - } - -- - inline void NeoHookeanModel::EvalCoeffs() const - { - mu = c_mu->Eval(*Ttr, Ttr->GetIntPoint()); -@@ -376,6 +415,13 @@ void NeoHookeanModel::AssembleH(const DenseMatrix &J, const DenseMatrix &DS, - } - } - -+const IntegrationRule &HyperelasticNLFIntegrator::GetRule( -+ const FiniteElement &trial_fe, const FiniteElement &test_fe, -+ ElementTransformation &Trans) const -+{ -+ int order = trial_fe.GetOrder() + test_fe.GetOrder() + 3; -+ return IntRules.Get(trial_fe.GetGeomType(), order); -+} - - double HyperelasticNLFIntegrator::GetElementEnergy(const FiniteElement &el, - ElementTransformation &Ttr, -@@ -390,11 +436,7 @@ double HyperelasticNLFIntegrator::GetElementEnergy(const FiniteElement &el, - Jpt.SetSize(dim); - PMatI.UseExternalData(elfun.GetData(), dof, dim); - -- const IntegrationRule *ir = IntRule; -- if (!ir) -- { -- ir = &(IntRules.Get(el.GetGeomType(), 2*el.GetOrder() + 3)); // <--- -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Ttr); - - energy = 0.0; - model->SetTransformation(Ttr); -@@ -429,11 +471,7 @@ void HyperelasticNLFIntegrator::AssembleElementVector( - elvect.SetSize(dof*dim); - PMatO.UseExternalData(elvect.GetData(), dof, dim); - -- const IntegrationRule *ir = IntRule; -- if (!ir) -- { -- ir = &(IntRules.Get(el.GetGeomType(), 2*el.GetOrder() + 3)); // <--- -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Ttr); - - elvect = 0.0; - model->SetTransformation(Ttr); -@@ -468,11 +506,7 @@ void HyperelasticNLFIntegrator::AssembleElementGrad(const FiniteElement &el, - PMatI.UseExternalData(elfun.GetData(), dof, dim); - elmat.SetSize(dof*dim); - -- const IntegrationRule *ir = IntRule; -- if (!ir) -- { -- ir = &(IntRules.Get(el.GetGeomType(), 2*el.GetOrder() + 3)); // <--- -- } -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Ttr); - - elmat = 0.0; - model->SetTransformation(Ttr); -@@ -490,6 +524,13 @@ void HyperelasticNLFIntegrator::AssembleElementGrad(const FiniteElement &el, - } - } - -+const IntegrationRule &IncompressibleNeoHookeanIntegrator::GetRule( -+ const FiniteElement &trial_fe, const FiniteElement &test_fe, -+ ElementTransformation &Trans) const -+{ -+ int order = trial_fe.GetOrder() + test_fe.GetOrder() + 3; -+ return IntRules.Get(trial_fe.GetGeomType(), order); -+} - - double IncompressibleNeoHookeanIntegrator::GetElementEnergy( - const Array&el, -@@ -511,15 +552,14 @@ double IncompressibleNeoHookeanIntegrator::GetElementEnergy( - J.SetSize(dim); - PMatI_u.UseExternalData(elfun[0]->GetData(), dof_u, dim); - -- int intorder = 2*el[0]->GetOrder() + 3; // <--- -- const IntegrationRule &ir = IntRules.Get(el[0]->GetGeomType(), intorder); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(*el[0], Tr); - - double energy = 0.0; - double mu = 0.0; - -- for (int i = 0; i < ir.GetNPoints(); ++i) -+ for (int i = 0; i < ir->GetNPoints(); ++i) - { -- const IntegrationPoint &ip = ir.IntPoint(i); -+ const IntegrationPoint &ip = ir->IntPoint(i); - Tr.SetIntPoint(&ip); - CalcInverse(Tr.Jacobian(), J0i); - -@@ -572,15 +612,14 @@ void IncompressibleNeoHookeanIntegrator::AssembleElementVector( - Sh_p.SetSize(dof_p); - elvec[1]->SetSize(dof_p); - -- int intorder = 2*el[0]->GetOrder() + 3; // <--- -- const IntegrationRule &ir = IntRules.Get(el[0]->GetGeomType(), intorder); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(*el[0], Tr); - - *elvec[0] = 0.0; - *elvec[1] = 0.0; - -- for (int i = 0; i < ir.GetNPoints(); ++i) -+ for (int i = 0; i < ir->GetNPoints(); ++i) - { -- const IntegrationPoint &ip = ir.IntPoint(i); -+ const IntegrationPoint &ip = ir->IntPoint(i); - Tr.SetIntPoint(&ip); - CalcInverse(Tr.Jacobian(), J0i); - -@@ -605,7 +644,6 @@ void IncompressibleNeoHookeanIntegrator::AssembleElementVector( - - elvec[1]->Add(ip.weight * Tr.Weight() * (dJ - 1.0), Sh_p); - } -- - } - - void IncompressibleNeoHookeanIntegrator::AssembleElementGrad( -@@ -639,12 +677,11 @@ void IncompressibleNeoHookeanIntegrator::AssembleElementGrad( - PMatI_u.UseExternalData(elfun[0]->GetData(), dof_u, dim); - Sh_p.SetSize(dof_p); - -- int intorder = 2*el[0]->GetOrder() + 3; // <--- -- const IntegrationRule &ir = IntRules.Get(el[0]->GetGeomType(), intorder); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(*el[0], Tr); - -- for (int i = 0; i < ir.GetNPoints(); ++i) -+ for (int i = 0; i < ir->GetNPoints(); ++i) - { -- const IntegrationPoint &ip = ir.IntPoint(i); -+ const IntegrationPoint &ip = ir->IntPoint(i); - Tr.SetIntPoint(&ip); - CalcInverse(Tr.Jacobian(), J0i); - -@@ -721,16 +758,15 @@ void IncompressibleNeoHookeanIntegrator::AssembleElementGrad( - } - } - } -- - } - -- --const IntegrationRule& --VectorConvectionNLFIntegrator::GetRule(const FiniteElement &fe, -- ElementTransformation &T) -+const IntegrationRule &VectorConvectionNLFIntegrator::GetRule( -+ const FiniteElement &trial_fe, const FiniteElement &test_fe, -+ ElementTransformation &Trans) const - { -- const int order = 2 * fe.GetOrder() + T.OrderGrad(&fe); -- return IntRules.Get(fe.GetGeomType(), order); -+ int order = Trans.OrderGrad(&trial_fe) + trial_fe.GetOrder() + -+ test_fe.GetOrder(); -+ return IntRules.Get(trial_fe.GetGeomType(), order); - } - - void VectorConvectionNLFIntegrator::AssembleElementVector( -@@ -751,7 +787,9 @@ void VectorConvectionNLFIntegrator::AssembleElementVector( - ELV.UseExternalData(elvect.GetData(), nd, dim); - - Vector vec1(dim), vec2(dim); -+ - const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, T); -+ - ELV = 0.0; - for (int i = 0; i < ir->GetNPoints(); i++) - { -@@ -841,7 +879,6 @@ void VectorConvectionNLFIntegrator::AssembleElementGrad( - } - } - -- - void ConvectiveVectorConvectionNLFIntegrator::AssembleElementGrad( - const FiniteElement &el, - ElementTransformation &trans, -@@ -890,7 +927,6 @@ void ConvectiveVectorConvectionNLFIntegrator::AssembleElementGrad( - } - } - -- - void SkewSymmetricVectorConvectionNLFIntegrator::AssembleElementGrad( - const FiniteElement &el, - ElementTransformation &trans, -diff --git a/fem/nonlininteg.hpp b/fem/nonlininteg.hpp -index 38b133244..4c2ee2470 100644 ---- a/fem/nonlininteg.hpp -+++ b/fem/nonlininteg.hpp -@@ -17,6 +17,7 @@ - #include "coefficient.hpp" - #include "fespace.hpp" - #include "ceed/interface/operator.hpp" -+#include "ceed/interface/util.hpp" - - namespace mfem - { -@@ -37,23 +38,43 @@ protected: - : IntRule(ir), ceedOp(NULL) {} - - public: -+ /// Set the memory type used for GeometricFactors and other large allocations -+ /// in PA extensions. -+ void SetPAMemoryType(MemoryType mt) { pa_mt = mt; } -+ -+ /// Indicates whether this integrator can use a Ceed backend. -+ virtual bool SupportsCeed() const { return false; } -+ -+ /// Access the underlying ceed::Operator for libCEED backends, after the -+ /// integrator has been assembled. -+ ceed::Operator &GetCeedOp() { return *ceedOp; } -+ - /** @brief Prescribe a fixed IntegrationRule to use (when @a ir != NULL) or - let the integrator choose (when @a ir == NULL). */ - virtual void SetIntRule(const IntegrationRule *ir) { IntRule = ir; } -- -- /// Prescribe a fixed IntegrationRule to use. - void SetIntegrationRule(const IntegrationRule &ir) { SetIntRule(&ir); } - -- /// Set the memory type used for GeometricFactors and other large allocations -- /// in PA extensions. -- void SetPAMemoryType(MemoryType mt) { pa_mt = mt; } -- - /// Get the integration rule of the integrator (possibly NULL). - const IntegrationRule *GetIntegrationRule() const { return IntRule; } - -+ /// Get the integration rule of the integrator as a function of the finite -+ /// element and geometry orders. -+ virtual const IntegrationRule &GetRule(const FiniteElement &el1, -+ const FiniteElement &el2, -+ ElementTransformation &Trans) const; -+ virtual const IntegrationRule &GetRule(const FiniteElement &el1, -+ const FiniteElement &el2, -+ FaceElementTransformations &Trans) const; -+ const IntegrationRule &GetRule(const FiniteElement &el, -+ ElementTransformation &Trans) const -+ { return GetRule(el, el, Trans); } -+ const IntegrationRule &GetRule(const FiniteElement &el, -+ FaceElementTransformations &Trans) const -+ { return GetRule(el, el, Trans); } -+ - /// Method defining partial assembly. - /** The result of the partial assembly is stored internally so that it can be -- used later in the methods AddMultPA(). */ -+ used later in the methods AddMultPA() and AddMultTransposePA(). */ - virtual void AssemblePA(const FiniteElementSpace &fes); - - /** @brief Prepare the integrator for partial assembly (PA) gradient -@@ -132,11 +153,6 @@ public: - FaceElementTransformations &Tr, - const Vector &elfun, DenseMatrix &elmat); - -- /// Indicates whether this integrator can use a Ceed backend. -- virtual bool SupportsCeed() const { return false; } -- -- ceed::Operator &GetCeedOp() { return *ceedOp; } -- - virtual ~NonlinearFormIntegrator() - { - delete ceedOp; -@@ -149,7 +165,36 @@ public: - for block state vectors. */ - class BlockNonlinearFormIntegrator - { -+protected: -+ const IntegrationRule *IntRule; -+ -+ BlockNonlinearFormIntegrator(const IntegrationRule *ir = NULL) -+ : IntRule(ir) {} -+ - public: -+ /** @brief Prescribe a fixed IntegrationRule to use (when @a ir != NULL) or -+ let the integrator choose (when @a ir == NULL). */ -+ virtual void SetIntRule(const IntegrationRule *ir) { IntRule = ir; } -+ void SetIntegrationRule(const IntegrationRule &ir) { SetIntRule(&ir); } -+ -+ /// Get the integration rule of the integrator (possibly NULL). -+ const IntegrationRule *GetIntegrationRule() const { return IntRule; } -+ -+ /// Get the integration rule of the integrator as a function of the finite -+ /// element and geometry orders. -+ virtual const IntegrationRule &GetRule(const FiniteElement &el1, -+ const FiniteElement &el2, -+ ElementTransformation &Tr) const; -+ virtual const IntegrationRule &GetRule(const FiniteElement &el1, -+ const FiniteElement &el2, -+ FaceElementTransformations &Tr) const; -+ const IntegrationRule &GetRule(const FiniteElement &el, -+ ElementTransformation &Trans) const -+ { return GetRule(el, el, Trans); } -+ const IntegrationRule &GetRule(const FiniteElement &el, -+ FaceElementTransformations &Trans) const -+ { return GetRule(el, el, Trans); } -+ - /// Compute the local energy - virtual double GetElementEnergy(const Array&el, - ElementTransformation &Tr, -@@ -182,8 +227,7 @@ public: - virtual ~BlockNonlinearFormIntegrator() {} - }; - -- --/// Abstract class for hyperelastic models -+/// Abstract base class for hyperelastic models - class HyperelasticModel - { - protected: -@@ -228,7 +272,6 @@ public: - const double weight, DenseMatrix &A) const = 0; - }; - -- - /** Inverse-harmonic hyperelastic model with a strain energy density function - given by the formula: W(J) = (1/2) det(J) Tr((J J^t)^{-1}) where J is the - deformation gradient. */ -@@ -247,7 +290,6 @@ public: - const double weight, DenseMatrix &A) const; - }; - -- - /** Neo-Hookean hyperelastic model with a strain energy density function given - by the formula: \f$(\mu/2)(\bar{I}_1 - dim) + (K/2)(det(J)/g - 1)^2\f$ where - J is the deformation gradient and \f$\bar{I}_1 = (det(J))^{-2/dim} Tr(J -@@ -281,7 +323,6 @@ public: - const double weight, DenseMatrix &A) const; - }; - -- - /** Hyperelastic integrator for any given HyperelasticModel. - - Represents @f$ \int W(Jpt) dx @f$ over a target zone, where W is the -@@ -310,10 +351,11 @@ public: - /** @param[in] m HyperelasticModel that will be integrated. */ - HyperelasticNLFIntegrator(HyperelasticModel *m) : model(m) {} - -- /** @brief Computes the integral of W(Jacobian(Trt)) over a target zone -- @param[in] el Type of FiniteElement. -- @param[in] Ttr Represents ref->target coordinates transformation. -- @param[in] elfun Physical coordinates of the zone. */ -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &el1, -+ const FiniteElement &el2, -+ ElementTransformation &Trans) const; -+ - virtual double GetElementEnergy(const FiniteElement &el, - ElementTransformation &Ttr, - const Vector &elfun); -@@ -342,24 +384,26 @@ private: - public: - IncompressibleNeoHookeanIntegrator(Coefficient &mu_) : c_mu(&mu_) {} - -+ using BlockNonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &el1, -+ const FiniteElement &el2, -+ ElementTransformation &Trans) const; -+ - virtual double GetElementEnergy(const Array&el, - ElementTransformation &Tr, - const Array &elfun); - -- /// Perform the local action of the NonlinearFormIntegrator - virtual void AssembleElementVector(const Array &el, - ElementTransformation &Tr, - const Array &elfun, - const Array &elvec); - -- /// Assemble the local gradient matrix - virtual void AssembleElementGrad(const Array &el, - ElementTransformation &Tr, - const Array &elfun, - const Array2D &elmats); - }; - -- - class VectorConvectionNLFIntegrator : public NonlinearFormIntegrator - { - private: -@@ -378,8 +422,12 @@ public: - - VectorConvectionNLFIntegrator() = default; - -- static const IntegrationRule &GetRule(const FiniteElement &fe, -- ElementTransformation &T); -+ virtual bool SupportsCeed() const { return DeviceCanUseCeed(); } -+ -+ using NonlinearFormIntegrator::GetRule; -+ virtual const IntegrationRule &GetRule(const FiniteElement &el1, -+ const FiniteElement &el2, -+ ElementTransformation &Trans) const; - - virtual void AssembleElementVector(const FiniteElement &el, - ElementTransformation &trans, -@@ -400,7 +448,6 @@ public: - virtual void AddMultMF(const Vector &x, Vector &y) const; - }; - -- - /** This class is used to assemble the convective form of the nonlinear term - arising in the Navier-Stokes equations \f$(u \cdot \nabla v, w )\f$ */ - class ConvectiveVectorConvectionNLFIntegrator : -@@ -422,7 +469,6 @@ public: - DenseMatrix &elmat); - }; - -- - /** This class is used to assemble the skew-symmetric form of the nonlinear term - arising in the Navier-Stokes equations - \f$.5*(u \cdot \nabla v, w ) - .5*(u \cdot \nabla w, v )\f$ */ -diff --git a/fem/transfer.cpp b/fem/transfer.cpp -index 7f95ca9fe..a7dd0731c 100644 ---- a/fem/transfer.cpp -+++ b/fem/transfer.cpp -@@ -930,17 +930,13 @@ TransferOperator::TransferOperator(const FiniteElementSpace& lFESpace_, - P.SetOperatorOwner(false); - opr = P.Ptr(); - } -- else if (lFESpace_.GetMesh()->GetNE() > 0 -- && hFESpace_.GetMesh()->GetNE() > 0 -- && lFESpace_.GetVDim() == 1 -- && hFESpace_.GetVDim() == 1 -- && dynamic_cast(lFESpace_.GetFE(0)) -- && dynamic_cast(hFESpace_.GetFE(0)) -- && !isvar_order -- && (hFESpace_.FEColl()->GetContType() == -- mfem::FiniteElementCollection::CONTINUOUS || -- hFESpace_.FEColl()->GetContType() == -- mfem::FiniteElementCollection::DISCONTINUOUS)) -+ else if (UsesTensorBasis(lFESpace_) && UsesTensorBasis(hFESpace_) && -+ lFESpace_.GetVDim() == 1 && hFESpace_.GetVDim() == 1 && -+ !isvar_order && -+ (hFESpace_.FEColl()->GetContType() == -+ mfem::FiniteElementCollection::CONTINUOUS || -+ hFESpace_.FEColl()->GetContType() == -+ mfem::FiniteElementCollection::DISCONTINUOUS)) - { - opr = new TensorProductPRefinementTransferOperator(lFESpace_, hFESpace_); - } -diff --git a/general/array.cpp b/general/array.cpp -index 12c3e3c06..e1a81e2d1 100644 ---- a/general/array.cpp -+++ b/general/array.cpp -@@ -175,6 +175,7 @@ void Array2D::Print(std::ostream &os, int width_) - } - } - -+template class Array; - template class Array; - template class Array; - template class Array; -diff --git a/general/device.cpp b/general/device.cpp -index ccee71cd7..1ea480245 100644 ---- a/general/device.cpp -+++ b/general/device.cpp -@@ -481,14 +481,14 @@ static void OccaDeviceSetup(const int dev) - #endif - } - --static void CeedDeviceSetup(const char* ceed_spec) -+static void CeedDeviceSetup(const char *ceed_spec) - { - #ifdef MFEM_USE_CEED - CeedInit(ceed_spec, &internal::ceed); - const char *ceed_backend; - CeedGetResource(internal::ceed, &ceed_backend); -- if (strcmp(ceed_spec, ceed_backend) && strcmp(ceed_spec, "/cpu/self") && -- strcmp(ceed_spec, "/gpu/hip")) -+ size_t ceed_spec_len = strlen(ceed_spec); -+ if (strncmp(ceed_spec, ceed_backend, ceed_spec_len)) - { - mfem::out << std::endl << "WARNING!!!\n" - "libCEED is not using the requested backend!!!\n" -diff --git a/makefile b/makefile -index a606f6dfe..ee5cd6b0d 100644 ---- a/makefile -+++ b/makefile -@@ -271,7 +271,6 @@ MFEM_REQ_LIB_DEPS = ENZYME SUPERLU MUMPS METIS FMS CONDUIT SIDRE LAPACK SUNDIALS - GSLIB OCCA CEED RAJA UMPIRE MKL_CPARDISO AMGX CALIPER PARELAG BENCHMARK\ - MOONOLITH ALGOIM - -- - PETSC_ERROR_MSG = $(if $(PETSC_FOUND),,. PETSC config not found: $(PETSC_VARS)) - SLEPC_ERROR_MSG = $(if $(SLEPC_FOUND),,. SLEPC config not found: $(SLEPC_VARS)) - -@@ -409,7 +408,11 @@ endif - DIRS = general linalg linalg/simd mesh mesh/submesh fem fem/ceed/interface \ - fem/ceed/integrators/mass fem/ceed/integrators/convection \ - fem/ceed/integrators/diffusion fem/ceed/integrators/nlconvection \ -- fem/ceed/solvers fem/fe fem/lor fem/qinterp fem/integ fem/tmop -+ fem/ceed/integrators/vecfemass fem/ceed/integrators/divdiv \ -+ fem/ceed/integrators/curlcurl fem/ceed/integrators/mixedvecgrad \ -+ fem/ceed/integrators/mixedveccurl fem/ceed/integrators/interp \ -+ fem/ceed/integrators/util fem/ceed/solvers \ -+ fem/fe fem/lor fem/qinterp fem/integ fem/tmop - - ifeq ($(MFEM_USE_MOONOLITH),YES) - MFEM_CXXFLAGS += $(MOONOLITH_CXX_FLAGS) -@@ -423,7 +426,7 @@ RELSRC_FILES = $(patsubst $(SRC)%,%,$(SOURCE_FILES)) - OBJECT_FILES = $(patsubst $(SRC)%,$(BLD)%,$(SOURCE_FILES:.cpp=.o)) - OKL_DIRS = fem - --.PHONY: lib all clean distclean install config status info deps serial parallel \ -+.PHONY: lib all clean distclean install config status info deps serial parallel \ - debug pdebug cuda hip pcuda cudebug pcudebug hpc style check test unittest \ - deprecation-warnings - -@@ -603,6 +606,14 @@ install: $(if $(static),$(BLD)libmfem.a) $(if $(shared),$(BLD)libmfem.$(SO_EXT)) - $(INSTALL) -m 640 $(SRC)fem/ceed/integrators/diffusion/*.h $(PREFIX_INC)/mfem/fem/ceed/integrators/diffusion - mkdir -p $(PREFIX_INC)/mfem/fem/ceed/integrators/nlconvection - $(INSTALL) -m 640 $(SRC)fem/ceed/integrators/nlconvection/*.h $(PREFIX_INC)/mfem/fem/ceed/integrators/nlconvection -+ mkdir -p $(PREFIX_INC)/mfem/fem/ceed/integrators/vecfemass -+ $(INSTALL) -m 640 $(SRC)fem/ceed/integrators/vecfemass/*.h $(PREFIX_INC)/mfem/fem/ceed/integrators/vecfemass -+ mkdir -p $(PREFIX_INC)/mfem/fem/ceed/integrators/divdiv -+ $(INSTALL) -m 640 $(SRC)fem/ceed/integrators/divdiv/*.h $(PREFIX_INC)/mfem/fem/ceed/integrators/divdiv -+ mkdir -p $(PREFIX_INC)/mfem/fem/ceed/integrators/curlcurl -+ $(INSTALL) -m 640 $(SRC)fem/ceed/integrators/curlcurl/*.h $(PREFIX_INC)/mfem/fem/ceed/integrators/curlcurl -+ mkdir -p $(PREFIX_INC)/mfem/fem/ceed/integrators/util -+ $(INSTALL) -m 640 $(SRC)fem/ceed/integrators/util/*.h $(PREFIX_INC)/mfem/fem/ceed/integrators/util - # install config.mk in $(PREFIX_SHARE) - mkdir -p $(PREFIX_SHARE) - $(MAKE) -C $(BLD)config config-mk CONFIG_MK=config-install.mk -@@ -726,17 +737,16 @@ status info: - ASTYLE_BIN = astyle - ASTYLE = $(ASTYLE_BIN) --options=$(SRC)config/mfem.astylerc - ASTYLE_VER = "Artistic Style Version 3.1" --FORMAT_FILES = $(foreach dir,$(DIRS) $(EM_DIRS) config,$(dir)/*.?pp) --FORMAT_FILES += tests/unit/*.?pp --UNIT_TESTS_SUBDIRS = general linalg mesh fem miniapps ceed --MINIAPPS_SUBDIRS = dpg/util hooke/operators hooke/preconditioners hooke/materials hooke/kernels --FORMAT_FILES += $(foreach dir,$(UNIT_TESTS_SUBDIRS),tests/unit/$(dir)/*.?pp) --FORMAT_FILES += $(foreach dir,$(MINIAPPS_SUBDIRS),miniapps/$(dir)/*.?pp) --FORMAT_EXCLUDE = general/tinyxml2.cpp tests/unit/catch.hpp -+FORMAT_FILES = $(foreach dir,$(DIRS) $(EM_DIRS) config,$(dir)/*.[ch]pp $(dir)/*.[ch]) -+FORMAT_FILES += tests/unit/*.[ch]pp -+FORMAT_FILES += $(foreach dir,$(wildcard tests/unit/*),$(dir)/*.[ch]pp $(dir)/*.[ch]) -+FORMAT_FILES += $(foreach dir,$(wildcard miniapps/*/*),$(dir)/*.[ch]pp $(dir)/*.[ch]) -+FORMAT_EXCLUDE = general/tinyxml2.cpp tests/unit/catch.hpp fem/picojson.h general/tinyxml2.h - FORMAT_LIST = $(filter-out $(FORMAT_EXCLUDE),$(wildcard $(FORMAT_FILES))) - --COUT_CERR_FILES = $(foreach dir,$(DIRS),$(dir)/*.[ch]pp) --COUT_CERR_EXCLUDE = '^general/error\.cpp' '^general/globals\.[ch]pp' -+COUT_CERR_FILES = $(foreach dir,$(DIRS),$(dir)/*.[ch]pp $(dir)/*.[ch]) -+COUT_CERR_EXCLUDE = general/error.cpp general/globals.cpp general/globals.hpp -+COUT_CERR_LIST = $(filter-out $(COUT_CERR_EXCLUDE),$(wildcard $(COUT_CERR_FILES))) - - DEPRECATION_WARNING := \ - "This feature is planned for removal in the next release."\ -@@ -772,12 +782,12 @@ style: - "Please make sure the changes are committed");\ - echo "Checking for use of std::cout...";\ - $(call mfem_check_command,\ -- grep cout $(COUT_CERR_FILES) | grep -v $(COUT_CERR_EXCLUDE:%=-e %),\ -+ grep cout $(COUT_CERR_LIST),\ - "No use of std::cout found", "Use mfem::out instead of std::cout");\ - echo "Checking for use of std::cerr...";\ - $(call mfem_check_command,\ -- grep cerr $(COUT_CERR_FILES) |\ -- grep -v $(COUT_CERR_EXCLUDE:%=-e %) -e cerrno,\ -+ grep cerr $(COUT_CERR_LIST) |\ -+ grep -v -e cerrno,\ - "No use of std::cerr found", "Use mfem::err instead of std::cerr");\ - exit $$err_code - -diff --git a/miniapps/shifted/sbm_solver.hpp b/miniapps/shifted/sbm_solver.hpp -index db16738d5..b8830bf31 100644 ---- a/miniapps/shifted/sbm_solver.hpp -+++ b/miniapps/shifted/sbm_solver.hpp -@@ -121,7 +121,6 @@ public: - par_shared_face_count(0), - cut_marker(cut_marker_) { } - -- using BilinearFormIntegrator::AssembleFaceMatrix; - virtual void AssembleFaceMatrix(const FiniteElement &el1, - const FiniteElement &el2, - FaceElementTransformations &Trans, -@@ -241,7 +240,6 @@ public: - par_shared_face_count(0), - cut_marker(cut_marker_) { } - -- using BilinearFormIntegrator::AssembleFaceMatrix; - virtual void AssembleFaceMatrix(const FiniteElement &el1, - const FiniteElement &el2, - FaceElementTransformations &Trans, -diff --git a/tests/unit/ceed/test_ceed.cpp b/tests/unit/ceed/test_ceed.cpp -index 971b68e24..46f5e6bab 100644 ---- a/tests/unit/ceed/test_ceed.cpp -+++ b/tests/unit/ceed/test_ceed.cpp -@@ -21,7 +21,15 @@ namespace ceed_test - - #ifdef MFEM_USE_CEED - --enum class CeedCoeffType { Const, Grid, Quad, VecConst, VecGrid, VecQuad }; -+enum class CeedCoeffType { Const, -+ Grid, -+ Quad, -+ VecConst, -+ VecGrid, -+ VecQuad, -+ MatConst, -+ MatQuad -+ }; - - double coeff_function(const Vector &x) - { -@@ -41,11 +49,24 @@ void velocity_function(const Vector &x, Vector &v) - } - } - -+// Matrix-valued velocity coefficient -+void matrix_velocity_function(const Vector &x, DenseMatrix &m) -+{ -+ int dim = x.Size(); -+ Vector v(dim); -+ velocity_function(x, v); -+ m.SetSize(dim); -+ m = 0.5; -+ for (int i = 0; i < dim; i++) -+ { -+ m(i, i) = 1.0 + v(i); -+ } -+} -+ - // Vector valued quantity to convect - void quantity(const Vector &x, Vector &u) - { - int dim = x.Size(); -- - switch (dim) - { - case 1: u(0) = x[0]*x[0]; break; -@@ -59,7 +80,6 @@ void quantity(const Vector &x, Vector &u) - void convected_quantity(const Vector &x, Vector &u) - { - double a, b, c; -- - int dim = x.Size(); - switch (dim) - { -@@ -82,7 +102,7 @@ void convected_quantity(const Vector &x, Vector &u) - } - } - --std::string getString(AssemblyLevel assembly) -+std::string GetString(AssemblyLevel assembly) - { - switch (assembly) - { -@@ -106,7 +126,7 @@ std::string getString(AssemblyLevel assembly) - return ""; - } - --std::string getString(CeedCoeffType coeff_type) -+std::string GetString(CeedCoeffType coeff_type) - { - switch (coeff_type) - { -@@ -128,6 +148,12 @@ std::string getString(CeedCoeffType coeff_type) - case CeedCoeffType::VecQuad: - return "VecQuad"; - break; -+ case CeedCoeffType::MatConst: -+ return "MatConst"; -+ break; -+ case CeedCoeffType::MatQuad: -+ return "MatQuad"; -+ break; - } - MFEM_ABORT("Unknown CeedCoeffType."); - return ""; -@@ -138,10 +164,16 @@ enum class Problem { Mass, - Diffusion, - VectorMass, - VectorDiffusion, -- MassDiffusion -+ MassDiffusion, -+ HDivMass, -+ HCurlMass, -+ DivDiv, -+ CurlCurl, -+ MixedVectorGradient, -+ MixedVectorCurl - }; - --std::string getString(Problem pb) -+std::string GetString(Problem pb) - { - switch (pb) - { -@@ -163,6 +195,24 @@ std::string getString(Problem pb) - case Problem::MassDiffusion: - return "MassDiffusion"; - break; -+ case Problem::HDivMass: -+ return "HDivMass"; -+ break; -+ case Problem::HCurlMass: -+ return "HCurlMass"; -+ break; -+ case Problem::DivDiv: -+ return "DivDiv"; -+ break; -+ case Problem::CurlCurl: -+ return "CurlCurl"; -+ break; -+ case Problem::MixedVectorGradient: -+ return "MixedVectorGradient"; -+ break; -+ case Problem::MixedVectorCurl: -+ return "MixedVectorCurl"; -+ break; - } - MFEM_ABORT("Unknown Problem."); - return ""; -@@ -170,7 +220,7 @@ std::string getString(Problem pb) - - enum class NLProblem {Convection}; - --std::string getString(NLProblem pb) -+std::string GetString(NLProblem pb) - { - switch (pb) - { -@@ -178,14 +228,15 @@ std::string getString(NLProblem pb) - return "Convection"; - break; - } -- MFEM_ABORT("Unknown Problem."); -+ MFEM_ABORT("Unknown NLProblem."); - return ""; - } - - void InitCoeff(Mesh &mesh, FiniteElementCollection &fec, const int dim, - const CeedCoeffType coeff_type, GridFunction *&gf, -- FiniteElementSpace *& coeff_fes, -- Coefficient *&coeff, VectorCoefficient *&vcoeff) -+ FiniteElementSpace *&coeff_fes, -+ Coefficient *&coeff, VectorCoefficient *&vcoeff, -+ MatrixCoefficient *&mcoeff) - { - switch (coeff_type) - { -@@ -209,7 +260,7 @@ void InitCoeff(Mesh &mesh, FiniteElementCollection &fec, const int dim, - Vector val(dim); - for (int i = 0; i < dim; i++) - { -- val(i) = 1.0; -+ val(i) = 1.0 + i; - } - vcoeff = new VectorConstantCoefficient(val); - break; -@@ -226,21 +277,38 @@ void InitCoeff(Mesh &mesh, FiniteElementCollection &fec, const int dim, - case CeedCoeffType::VecQuad: - vcoeff = new VectorFunctionCoefficient(dim, velocity_function); - break; -+ case CeedCoeffType::MatConst: -+ { -+ DenseMatrix val(dim); -+ val = 0.5; -+ for (int i = 0; i < dim; i++) -+ { -+ val(i, i) = 1.0 + i; -+ } -+ mcoeff = new MatrixConstantCoefficient(val); -+ break; -+ } -+ case CeedCoeffType::MatQuad: -+ mcoeff = new MatrixFunctionCoefficient(dim, matrix_velocity_function); -+ break; - } - } - --void test_ceed_operator(const char* input, int order, -+void test_ceed_operator(const char *input, int order, - const CeedCoeffType coeff_type, const Problem pb, -- const AssemblyLevel assembly) -+ const AssemblyLevel assembly, bool mixed_p, bool bdr_integ) - { -- std::string section = "assembly: " + getString(assembly) + "\n" + -- "coeff_type: " + getString(coeff_type) + "\n" + -- "pb: " + getString(pb) + "\n" + -+ std::string section = "assembly: " + GetString(assembly) + "\n" + -+ "coeff_type: " + GetString(coeff_type) + "\n" + -+ "pb: " + GetString(pb) + "\n" + - "order: " + std::to_string(order) + "\n" + -+ (mixed_p ? "mixed_p: true\n" : "") + -+ (bdr_integ ? "bdr_integ: true\n" : "") + - "mesh: " + input; - INFO(section); - Mesh mesh(input, 1, 1); - mesh.EnsureNodes(); -+ if (mixed_p) { mesh.EnsureNCMesh(); } - int dim = mesh.Dimension(); - H1_FECollection fec(order, dim); - -@@ -249,43 +317,65 @@ void test_ceed_operator(const char* input, int order, - FiniteElementSpace *coeff_fes = nullptr; - Coefficient *coeff = nullptr; - VectorCoefficient *vcoeff = nullptr; -- InitCoeff(mesh, fec, dim, coeff_type, gf, coeff_fes, coeff, vcoeff); -+ MatrixCoefficient *mcoeff = nullptr; -+ InitCoeff(mesh, fec, dim, coeff_type, gf, coeff_fes, coeff, vcoeff, mcoeff); -+ MFEM_VERIFY(!mcoeff, -+ "Unexpected matrix-valued coefficient in test_ceed_operator."); - - // Build the BilinearForm - bool vecOp = pb == Problem::VectorMass || pb == Problem::VectorDiffusion; - const int vdim = vecOp ? dim : 1; - FiniteElementSpace fes(&mesh, &fec, vdim); -+ if (mixed_p) -+ { -+ fes.SetElementOrder(0, order+1); -+ fes.SetElementOrder(fes.GetNE() - 1, order+1); -+ fes.Update(false); -+ } - -- BilinearForm k_test(&fes); - BilinearForm k_ref(&fes); -+ BilinearForm k_test(&fes); -+ auto AddIntegrator = [&bdr_integ](BilinearForm &k, BilinearFormIntegrator *blfi) -+ { -+ if (bdr_integ) -+ { -+ k.AddBoundaryIntegrator(blfi); -+ } -+ else -+ { -+ k.AddDomainIntegrator(blfi); -+ } -+ }; - switch (pb) - { - case Problem::Mass: -- k_ref.AddDomainIntegrator(new MassIntegrator(*coeff)); -- k_test.AddDomainIntegrator(new MassIntegrator(*coeff)); -+ AddIntegrator(k_ref, new MassIntegrator(*coeff)); -+ AddIntegrator(k_test, new MassIntegrator(*coeff)); - break; - case Problem::Convection: -- k_ref.AddDomainIntegrator(new ConvectionIntegrator(*vcoeff,-1)); -- k_test.AddDomainIntegrator(new ConvectionIntegrator(*vcoeff,-1)); -+ AddIntegrator(k_ref, new ConvectionIntegrator(*vcoeff, -1)); -+ AddIntegrator(k_test, new ConvectionIntegrator(*vcoeff, -1)); - break; - case Problem::Diffusion: -- k_ref.AddDomainIntegrator(new DiffusionIntegrator(*coeff)); -- k_test.AddDomainIntegrator(new DiffusionIntegrator(*coeff)); -+ AddIntegrator(k_ref, new DiffusionIntegrator(*coeff)); -+ AddIntegrator(k_test, new DiffusionIntegrator(*coeff)); - break; - case Problem::VectorMass: -- k_ref.AddDomainIntegrator(new VectorMassIntegrator(*coeff)); -- k_test.AddDomainIntegrator(new VectorMassIntegrator(*coeff)); -+ AddIntegrator(k_ref, new VectorMassIntegrator(*coeff)); -+ AddIntegrator(k_test, new VectorMassIntegrator(*coeff)); - break; - case Problem::VectorDiffusion: -- k_ref.AddDomainIntegrator(new VectorDiffusionIntegrator(*coeff)); -- k_test.AddDomainIntegrator(new VectorDiffusionIntegrator(*coeff)); -+ AddIntegrator(k_ref, new VectorDiffusionIntegrator(*coeff)); -+ AddIntegrator(k_test, new VectorDiffusionIntegrator(*coeff)); - break; - case Problem::MassDiffusion: -- k_ref.AddDomainIntegrator(new MassIntegrator(*coeff)); -- k_test.AddDomainIntegrator(new MassIntegrator(*coeff)); -- k_ref.AddDomainIntegrator(new DiffusionIntegrator(*coeff)); -- k_test.AddDomainIntegrator(new DiffusionIntegrator(*coeff)); -+ AddIntegrator(k_ref, new MassIntegrator(*coeff)); -+ AddIntegrator(k_test, new MassIntegrator(*coeff)); -+ AddIntegrator(k_ref, new DiffusionIntegrator(*coeff)); -+ AddIntegrator(k_test, new DiffusionIntegrator(*coeff)); - break; -+ default: -+ MFEM_ABORT("Unexpected problem type."); - } - - k_ref.Assemble(); -@@ -294,121 +384,499 @@ void test_ceed_operator(const char* input, int order, - k_test.SetAssemblyLevel(assembly); - k_test.Assemble(); - -- // Compare ceed with mfem. -+ // Compare ceed with mfem - GridFunction x(&fes), y_ref(&fes), y_test(&fes); -+ Vector d_ref(fes.GetTrueVSize()), d_test(fes.GetTrueVSize()); - - x.Randomize(1); - -- k_ref.Mult(x,y_ref); -- k_test.Mult(x,y_test); -+ k_ref.Mult(x, y_ref); -+ k_test.Mult(x, y_test); - - y_test -= y_ref; - -- REQUIRE(y_test.Norml2() < 1.e-12); -+ REQUIRE(y_test.Norml2() < 1.e-12 * std::max(y_ref.Norml2(), 1.0)); -+ -+ if (mesh.Nonconforming()) -+ { -+ k_ref.ConformingAssemble(); -+ } -+ k_ref.AssembleDiagonal(d_ref); -+ k_test.AssembleDiagonal(d_test); -+ -+ d_test -= d_ref; -+ -+ // // TODO: Debug -+ // if (mesh.Nonconforming() && -+ // d_test.Norml2() > 0.1 * d_ref.Norml2()) -+ // { -+ // out << "\nDIAGONAL ASSEMBLY DELTA\n\n"; -+ // d_test.Print(); -+ // out << "\nDIAGONAL ASSEMBLY REF\n\n"; -+ // d_ref.Print(); -+ // // Vector temp(d_test); -+ // // temp += d_ref; -+ // // out << "\nDIAGONAL ASSEMBLY TEST\n\n"; -+ // // temp.Print(); -+ // } -+ -+ REQUIRE(d_test.Norml2() < -+ (mesh.Nonconforming() ? 1.0 : 1.e-12) * std::max(d_ref.Norml2(), 1.0)); - delete gf; - delete coeff_fes; - delete coeff; - delete vcoeff; -+ delete mcoeff; - } - --void test_mixed_p_ceed_operator(const char* input, int order, -- const CeedCoeffType coeff_type, const Problem pb, -- const AssemblyLevel assembly) -+void test_ceed_vectorfe_operator(const char *input, int order, -+ const CeedCoeffType coeff_type, const Problem pb, -+ const AssemblyLevel assembly, bool bdr_integ) - { -- std::string section = "assembly: " + getString(assembly) + "\n" + -- "coeff_type: " + getString(coeff_type) + "\n" + -- "pb: " + getString(pb) + "\n" + -+ std::string section = "assembly: " + GetString(assembly) + "\n" + -+ "coeff_type: " + GetString(coeff_type) + "\n" + -+ "pb: " + GetString(pb) + "\n" + - "order: " + std::to_string(order) + "\n" + -+ (bdr_integ ? "bdr_integ: true\n" : "") + - "mesh: " + input; - INFO(section); - Mesh mesh(input, 1, 1); - mesh.EnsureNodes(); -- mesh.EnsureNCMesh(); - int dim = mesh.Dimension(); -- MFEM_VERIFY(dim == 2, "p-adaptivity only supported in serial 2D."); -- H1_FECollection fec(order, dim); -+ FiniteElementCollection *fec = nullptr; -+ if ((pb == Problem::HDivMass || pb == Problem::DivDiv) && bdr_integ) -+ { -+ // Boundary RT elements in 2D and 3D are actually L2 -+ return; -+ } -+ if (pb == Problem::CurlCurl && dim - bdr_integ < 2) -+ { -+ // No 1D ND curl shape -+ return; -+ } -+ switch (pb) -+ { -+ case Problem::Mass: -+ case Problem::Diffusion: -+ fec = new H1_FECollection(order, dim); -+ break; -+ case Problem::HDivMass: -+ case Problem::DivDiv: -+ fec = new RT_FECollection(order-1, dim); -+ break; -+ case Problem::HCurlMass: -+ case Problem::CurlCurl: -+ fec = new ND_FECollection(order, dim); -+ break; -+ default: -+ MFEM_ABORT("Unexpected problem type."); -+ } - - // Coefficient Initialization - GridFunction *gf = nullptr; - FiniteElementSpace *coeff_fes = nullptr; - Coefficient *coeff = nullptr; - VectorCoefficient *vcoeff = nullptr; -- InitCoeff(mesh, fec, dim, coeff_type, gf, coeff_fes, coeff, vcoeff); -+ MatrixCoefficient *mcoeff = nullptr; -+ InitCoeff(mesh, *fec, dim, coeff_type, gf, coeff_fes, coeff, vcoeff, mcoeff); -+ if (!coeff && (pb == Problem::Mass || pb == Problem::DivDiv || -+ (pb == Problem::CurlCurl && dim - bdr_integ < 3))) -+ { -+ delete gf; -+ delete coeff_fes; -+ delete coeff; -+ delete vcoeff; -+ delete mcoeff; -+ delete fec; -+ return; -+ } - - // Build the BilinearForm -- bool vecOp = pb == Problem::VectorMass || pb == Problem::VectorDiffusion; -- const int vdim = vecOp ? dim : 1; -- FiniteElementSpace fes(&mesh, &fec, vdim); -- fes.SetElementOrder(0, order+1); -- fes.SetElementOrder(fes.GetNE() - 1, order+1); -- fes.Update(false); -+ FiniteElementSpace fes(&mesh, fec); - -- BilinearForm k_test(&fes); - BilinearForm k_ref(&fes); -+ BilinearForm k_test(&fes); -+ auto AddIntegrator = [&bdr_integ](BilinearForm &k, BilinearFormIntegrator *blfi) -+ { -+ if (bdr_integ) -+ { -+ k.AddBoundaryIntegrator(blfi); -+ } -+ else -+ { -+ k.AddDomainIntegrator(blfi); -+ } -+ }; - switch (pb) - { - case Problem::Mass: -- k_ref.AddDomainIntegrator(new MassIntegrator(*coeff)); -- k_test.AddDomainIntegrator(new MassIntegrator(*coeff)); -- break; -- case Problem::Convection: -- k_ref.AddDomainIntegrator(new ConvectionIntegrator(*vcoeff,-1)); -- k_test.AddDomainIntegrator(new ConvectionIntegrator(*vcoeff,-1)); -+ AddIntegrator(k_ref, new MassIntegrator(*coeff)); -+ AddIntegrator(k_test, new MassIntegrator(*coeff)); - break; - case Problem::Diffusion: -- k_ref.AddDomainIntegrator(new DiffusionIntegrator(*coeff)); -- k_test.AddDomainIntegrator(new DiffusionIntegrator(*coeff)); -+ if (coeff) -+ { -+ AddIntegrator(k_ref, new DiffusionIntegrator(*coeff)); -+ AddIntegrator(k_test, new DiffusionIntegrator(*coeff)); -+ } -+ else if (vcoeff) -+ { -+ AddIntegrator(k_ref, new DiffusionIntegrator(*vcoeff)); -+ AddIntegrator(k_test, new DiffusionIntegrator(*vcoeff)); -+ } -+ else if (mcoeff) -+ { -+ AddIntegrator(k_ref, new DiffusionIntegrator(*mcoeff)); -+ AddIntegrator(k_test, new DiffusionIntegrator(*mcoeff)); -+ } - break; -- case Problem::VectorMass: -- k_ref.AddDomainIntegrator(new VectorMassIntegrator(*coeff)); -- k_test.AddDomainIntegrator(new VectorMassIntegrator(*coeff)); -+ case Problem::HDivMass: -+ case Problem::HCurlMass: -+ if (coeff) -+ { -+ AddIntegrator(k_ref, new VectorFEMassIntegrator(*coeff)); -+ AddIntegrator(k_test, new VectorFEMassIntegrator(*coeff)); -+ } -+ else if (vcoeff) -+ { -+ AddIntegrator(k_ref, new VectorFEMassIntegrator(*vcoeff)); -+ AddIntegrator(k_test, new VectorFEMassIntegrator(*vcoeff)); -+ } -+ else if (mcoeff) -+ { -+ AddIntegrator(k_ref, new VectorFEMassIntegrator(*mcoeff)); -+ AddIntegrator(k_test, new VectorFEMassIntegrator(*mcoeff)); -+ } - break; -- case Problem::VectorDiffusion: -- k_ref.AddDomainIntegrator(new VectorDiffusionIntegrator(*coeff)); -- k_test.AddDomainIntegrator(new VectorDiffusionIntegrator(*coeff)); -+ case Problem::DivDiv: -+ AddIntegrator(k_ref, new DivDivIntegrator(*coeff)); -+ AddIntegrator(k_test, new DivDivIntegrator(*coeff)); - break; -- case Problem::MassDiffusion: -- k_ref.AddDomainIntegrator(new MassIntegrator(*coeff)); -- k_test.AddDomainIntegrator(new MassIntegrator(*coeff)); -- k_ref.AddDomainIntegrator(new DiffusionIntegrator(*coeff)); -- k_test.AddDomainIntegrator(new DiffusionIntegrator(*coeff)); -+ case Problem::CurlCurl: -+ if (coeff) -+ { -+ AddIntegrator(k_ref, new CurlCurlIntegrator(*coeff)); -+ AddIntegrator(k_test, new CurlCurlIntegrator(*coeff)); -+ } -+ else if (vcoeff) -+ { -+ AddIntegrator(k_ref, new CurlCurlIntegrator(*vcoeff)); -+ AddIntegrator(k_test, new CurlCurlIntegrator(*vcoeff)); -+ } -+ else if (mcoeff) -+ { -+ AddIntegrator(k_ref, new CurlCurlIntegrator(*mcoeff)); -+ AddIntegrator(k_test, new CurlCurlIntegrator(*mcoeff)); -+ } - break; -+ default: -+ MFEM_ABORT("Unexpected problem type."); - } - -+ // Timer for profiling -+ const int trials = 1; -+ const bool debug = false; -+ StopWatch chrono_setup_ref, chrono_setup_test; -+ StopWatch chrono_apply_ref, chrono_apply_test; -+ chrono_setup_ref.Clear(); -+ chrono_setup_ref.Start(); -+ - k_ref.Assemble(); - k_ref.Finalize(); - -+ chrono_setup_ref.Stop(); -+ chrono_setup_test.Clear(); -+ chrono_setup_test.Start(); -+ - k_test.SetAssemblyLevel(assembly); - k_test.Assemble(); - -- // Compare ceed with mfem. -+ chrono_setup_test.Stop(); -+ -+ // Compare ceed with mfem - GridFunction x(&fes), y_ref(&fes), y_test(&fes); -+ Vector d_ref(fes.GetTrueVSize()), d_test(fes.GetTrueVSize()); - - x.Randomize(1); - -- k_ref.Mult(x,y_ref); -- k_test.Mult(x,y_test); -+ chrono_apply_ref.Clear(); -+ chrono_apply_ref.Start(); -+ -+ for (int trial = 0; trial < trials; trial++) -+ { -+ k_ref.Mult(x, y_ref); -+ } -+ -+ chrono_apply_ref.Stop(); -+ chrono_apply_test.Clear(); -+ chrono_apply_test.Start(); -+ -+ for (int trial = 0; trial < trials; trial++) -+ { -+ k_test.Mult(x, y_test); -+ } -+ -+ chrono_apply_test.Stop(); - - y_test -= y_ref; - -- REQUIRE(y_test.Norml2() < 1.e-12); -+ REQUIRE(y_test.Norml2() < 1.e-12 * std::max(y_ref.Norml2(), 1.0)); -+ -+ if (mesh.Nonconforming()) -+ { -+ k_ref.ConformingAssemble(); -+ } -+ k_ref.AssembleDiagonal(d_ref); -+ k_test.AssembleDiagonal(d_test); -+ -+ d_test -= d_ref; -+ -+ // // TODO: Debug -+ // if (!UsesTensorBasis(fes) && order > 1 && -+ // (pb == Problem::HCurlMass || pb == Problem::CurlCurl) && -+ // d_test.Norml2() > 0.1 * d_ref.Norml2()) -+ // { -+ // out << "\nH(CURL) DIAGONAL ASSEMBLY DELTA\n\n"; -+ // d_test.Print(); -+ // out << "\nH(CURL) DIAGONAL ASSEMBLY REF\n\n"; -+ // d_ref.Print(); -+ // // Vector temp(d_test); -+ // // temp += d_ref; -+ // // out << "\nH(CURL) DIAGONAL ASSEMBLY TEST\n\n"; -+ // // temp.Print(); -+ // } -+ -+ REQUIRE(d_test.Norml2() < -+ (mesh.Nonconforming() || -+ (!UsesTensorBasis(fes) && order > 1 && -+ (pb == Problem::HCurlMass || pb == Problem::CurlCurl)) ? -+ 1.0 : 1.e-12) * std::max(d_ref.Norml2(), 1.0)); -+ -+ if (debug) -+ { -+ // Estimates only for !bdr_integ, non-mixed meshes -+ std::size_t mem_test = 0; -+ if (!bdr_integ && mesh.GetNumGeometries(dim) == 1) -+ { -+ const FiniteElement &fe = *fes.GetFE(0); -+ ElementTransformation &T = *mesh.GetElementTransformation(0); -+ const int Q = (*k_ref.GetDBFI())[0]->GetRule(fe, T).GetNPoints(); -+ const int P = fe.GetDof(); -+ switch (pb) -+ { -+ case Problem::Mass: -+ mem_test = Q * 1 * 8; -+ mem_test += P * 4; -+ case Problem::Diffusion: -+ mem_test = Q * (dim * (dim + 1)) / 2 * 8; -+ mem_test += P * 4; -+ break; -+ case Problem::HDivMass: -+ mem_test = Q * (dim * (dim + 1)) / 2 * 8; -+ mem_test += P * 4; -+ case Problem::DivDiv: -+ mem_test = Q * 1 * 8; -+ mem_test += P * 4; -+ break; -+ case Problem::HCurlMass: -+ mem_test = Q * (dim * (dim + 1)) / 2 * 8; -+ mem_test += P * 3 * 4; // Tri-diagonal curl orientations -+ case Problem::CurlCurl: -+ mem_test = Q * (dim - bdr_integ < 3 ? 1 : dim * (dim + 1) / 2) * 8; -+ mem_test += P * 3 * 4; -+ break; -+ default: -+ MFEM_ABORT("Unexpected problem type."); -+ } -+ mem_test *= mesh.GetNE(); // Estimate for QFunction memory -+ } -+ std::size_t mem_ref = k_ref.SpMat().NumNonZeroElems() * (8 + 4) + -+ k_ref.Height() * 4; -+ -+ out << "\n" << section << "\n"; -+ out << "benchmark (" << fes.GetTrueVSize() << " unknowns)\n" -+ << " setup: ref = " -+ << chrono_setup_ref.RealTime() * 1e3 << " ms\n" -+ << " test = " -+ << chrono_setup_test.RealTime() * 1e3 << " ms\n" -+ << " apply: ref = " -+ << chrono_apply_ref.RealTime() * 1e3 / trials << " ms\n" -+ << " test = " -+ << chrono_apply_test.RealTime() * 1e3 / trials << " ms\n" -+ << " mem usage: ref = " << mem_ref / 1e6 << " MB\n" -+ << " test = " << mem_test / 1e6 << " MB\n"; -+ } - delete gf; - delete coeff_fes; - delete coeff; - delete vcoeff; -+ delete mcoeff; -+ delete fec; - } - --void test_ceed_nloperator(const char* mesh_filename, int order, -+void test_ceed_mixed_operator(const char *input, int order, -+ const CeedCoeffType coeff_type, const Problem pb, -+ const AssemblyLevel assembly, bool bdr_integ) -+{ -+ std::string section = "assembly: " + GetString(assembly) + "\n" + -+ "coeff_type: " + GetString(coeff_type) + "\n" + -+ "pb: " + GetString(pb) + "\n" + -+ "order: " + std::to_string(order) + "\n" + -+ (bdr_integ ? "bdr_integ: true\n" : "") + -+ "mesh: " + input; -+ INFO(section); -+ Mesh mesh(input, 1, 1); -+ mesh.EnsureNodes(); -+ int dim = mesh.Dimension(); -+ FiniteElementCollection *trial_fec = nullptr, *test_fec = nullptr; -+ if (pb == Problem::MixedVectorGradient && dim - bdr_integ < 2) -+ { -+ // MixedVectorGradient is only supported in 2D or 3D -+ return; -+ } -+ if (pb == Problem::MixedVectorCurl && dim - bdr_integ < 3) -+ { -+ // MixedVectorCurl is only supported in 3D -+ return; -+ } -+ switch (pb) -+ { -+ case Problem::MixedVectorGradient: -+ trial_fec = new H1_FECollection(order, dim); -+ test_fec = new ND_FECollection(order, dim); -+ break; -+ case Problem::MixedVectorCurl: -+ trial_fec = new ND_FECollection(order, dim); -+ test_fec = new RT_FECollection(order - 1, dim); -+ break; -+ default: -+ MFEM_ABORT("Unexpected problem type."); -+ } -+ -+ // Coefficient Initialization -+ GridFunction *gf = nullptr; -+ FiniteElementSpace *coeff_fes = nullptr; -+ Coefficient *coeff = nullptr; -+ VectorCoefficient *vcoeff = nullptr; -+ MatrixCoefficient *mcoeff = nullptr; -+ InitCoeff(mesh, *trial_fec, dim, coeff_type, gf, coeff_fes, coeff, vcoeff, -+ mcoeff); -+ -+ // Build the BilinearForm -+ FiniteElementSpace trial_fes(&mesh, trial_fec); -+ FiniteElementSpace test_fes(&mesh, test_fec); -+ -+ MixedBilinearForm k_ref(&trial_fes, &test_fes); -+ MixedBilinearForm k_test(&trial_fes, &test_fes); -+ MixedBilinearForm k_test_t(&test_fes, &trial_fes); -+ auto AddIntegrator = [&bdr_integ](MixedBilinearForm &k, -+ BilinearFormIntegrator *blfi) -+ { -+ if (bdr_integ) -+ { -+ k.AddBoundaryIntegrator(blfi); -+ } -+ else -+ { -+ k.AddDomainIntegrator(blfi); -+ } -+ }; -+ switch (pb) -+ { -+ case Problem::MixedVectorGradient: -+ if (coeff) -+ { -+ AddIntegrator(k_ref, new MixedVectorGradientIntegrator(*coeff)); -+ AddIntegrator(k_test, new MixedVectorGradientIntegrator(*coeff)); -+ AddIntegrator(k_test_t, new MixedVectorWeakDivergenceIntegrator(*coeff)); -+ } -+ else if (vcoeff) -+ { -+ AddIntegrator(k_ref, new MixedVectorGradientIntegrator(*vcoeff)); -+ AddIntegrator(k_test, new MixedVectorGradientIntegrator(*vcoeff)); -+ AddIntegrator(k_test_t, new MixedVectorWeakDivergenceIntegrator(*vcoeff)); -+ } -+ else if (mcoeff) -+ { -+ AddIntegrator(k_ref, new MixedVectorGradientIntegrator(*mcoeff)); -+ AddIntegrator(k_test, new MixedVectorGradientIntegrator(*mcoeff)); -+ AddIntegrator(k_test_t, new MixedVectorWeakDivergenceIntegrator(*mcoeff)); -+ } -+ break; -+ case Problem::MixedVectorCurl: -+ if (coeff) -+ { -+ AddIntegrator(k_ref, new MixedVectorCurlIntegrator(*coeff)); -+ AddIntegrator(k_test, new MixedVectorCurlIntegrator(*coeff)); -+ AddIntegrator(k_test_t, new MixedVectorWeakCurlIntegrator(*coeff)); -+ } -+ else if (vcoeff) -+ { -+ AddIntegrator(k_ref, new MixedVectorCurlIntegrator(*vcoeff)); -+ AddIntegrator(k_test, new MixedVectorCurlIntegrator(*vcoeff)); -+ AddIntegrator(k_test_t, new MixedVectorWeakCurlIntegrator(*vcoeff)); -+ } -+ else if (mcoeff) -+ { -+ AddIntegrator(k_ref, new MixedVectorCurlIntegrator(*mcoeff)); -+ AddIntegrator(k_test, new MixedVectorCurlIntegrator(*mcoeff)); -+ AddIntegrator(k_test_t, new MixedVectorWeakCurlIntegrator(*mcoeff)); -+ } -+ break; -+ default: -+ MFEM_ABORT("Unexpected problem type."); -+ } -+ -+ k_ref.Assemble(); -+ k_ref.Finalize(); -+ -+ k_test.SetAssemblyLevel(assembly); -+ k_test.Assemble(); -+ -+ k_test_t.SetAssemblyLevel(assembly); -+ k_test_t.Assemble(); -+ -+ // Compare ceed with mfem -+ GridFunction x(&trial_fes), y_ref(&test_fes), y_test(&test_fes); -+ GridFunction x_t(&test_fes), y_t_ref(&trial_fes), y_t_test(&trial_fes); -+ -+ x.Randomize(1); -+ -+ k_ref.Mult(x, y_ref); -+ k_test.Mult(x, y_test); -+ -+ y_test -= y_ref; -+ -+ REQUIRE(y_test.Norml2() < 1.e-12 * std::max(y_ref.Norml2(), 1.0)); -+ -+ x_t.Randomize(1); -+ -+ k_ref.MultTranspose(x_t, y_t_ref); -+ k_test_t.Mult(x_t, y_t_test); -+ -+ y_t_test.Add((pb == Problem::MixedVectorCurl) ? -1.0 : 1.0, y_t_ref); -+ -+ REQUIRE(y_t_test.Norml2() < 1.e-12 * std::max(y_t_ref.Norml2(), 1.0)); -+ delete gf; -+ delete coeff_fes; -+ delete coeff; -+ delete vcoeff; -+ delete mcoeff; -+ delete trial_fec; -+ delete test_fec; -+} -+ -+void test_ceed_nloperator(const char *input, int order, - const CeedCoeffType coeff_type, - const NLProblem pb, const AssemblyLevel assembly) - { -- std::string section = "assembly: " + getString(assembly) + "\n" + -- "coeff_type: " + getString(coeff_type) + "\n" + -- "pb: " + getString(pb) + "\n" + -+ std::string section = "assembly: " + GetString(assembly) + "\n" + -+ "coeff_type: " + GetString(coeff_type) + "\n" + -+ "pb: " + GetString(pb) + "\n" + - "order: " + std::to_string(order) + "\n" + -- "mesh: " + mesh_filename; -+ "mesh: " + input; - INFO(section); -- Mesh mesh(mesh_filename, 1, 1); -+ Mesh mesh(input, 1, 1); - mesh.EnsureNodes(); - int dim = mesh.Dimension(); - H1_FECollection fec(order, dim); -@@ -418,15 +886,18 @@ void test_ceed_nloperator(const char* mesh_filename, int order, - FiniteElementSpace *coeff_fes = nullptr; - Coefficient *coeff = nullptr; - VectorCoefficient *vcoeff = nullptr; -- InitCoeff(mesh, fec, dim, coeff_type, gf, coeff_fes, coeff, vcoeff); -+ MatrixCoefficient *mcoeff = nullptr; -+ InitCoeff(mesh, fec, dim, coeff_type, gf, coeff_fes, coeff, vcoeff, mcoeff); -+ MFEM_VERIFY(!vcoeff && !mcoeff, -+ "Unexpected vector- or matrix-valued coefficient in test_ceed_nloperator."); - - // Build the NonlinearForm - bool vecOp = pb == NLProblem::Convection; - const int vdim = vecOp ? dim : 1; - FiniteElementSpace fes(&mesh, &fec, vdim); - -- NonlinearForm k_test(&fes); - NonlinearForm k_ref(&fes); -+ NonlinearForm k_test(&fes); - switch (pb) - { - case NLProblem::Convection: -@@ -435,42 +906,48 @@ void test_ceed_nloperator(const char* mesh_filename, int order, - break; - } - -+ k_ref.Setup(); - k_test.SetAssemblyLevel(assembly); - k_test.Setup(); -- k_ref.Setup(); - -- // Compare ceed with mfem. -+ // Compare ceed with mfem - GridFunction x(&fes), y_ref(&fes), y_test(&fes); - - x.Randomize(1); - -- k_ref.Mult(x,y_ref); -- k_test.Mult(x,y_test); -+ k_ref.Mult(x, y_ref); -+ k_test.Mult(x, y_test); - - y_test -= y_ref; - -- REQUIRE(y_test.Norml2() < 1.e-12); -+ REQUIRE(y_test.Norml2() < 1.e-12 * std::max(y_ref.Norml2(), 1.0)); - delete gf; - delete coeff_fes; - delete coeff; - delete vcoeff; -+ delete mcoeff; - } - - // This function specifically tests convection of a vector valued quantity and - // using a custom integration rule. The integration rule is chosen s.t. in - // combination with an appropriate order, it can represent the analytical - // polynomial functions correctly. --void test_ceed_convection(const char* mesh_filename, int order, -+void test_ceed_convection(const char *input, int order, - const AssemblyLevel assembly) - { -- Mesh mesh(mesh_filename, 1, 1); -+ std::string section = "assembly: " + GetString(assembly) + "\n" + -+ "order: " + std::to_string(order) + "\n" + -+ "mesh: " + input; -+ INFO(section); -+ Mesh mesh(input, 1, 1); - mesh.EnsureNodes(); - int dim = mesh.Dimension(); - H1_FECollection fec(order, dim); - - VectorFunctionCoefficient velocity_coeff(dim, velocity_function); - -- FiniteElementSpace fes(&mesh, &fec, dim); -+ FiniteElementSpace fes(&mesh, &fec, 1); -+ FiniteElementSpace vfes(&mesh, &fec, dim); - BilinearForm conv_op(&fes); - - IntegrationRules rules(0, Quadrature1D::GaussLobatto); -@@ -483,7 +960,7 @@ void test_ceed_convection(const char* mesh_filename, int order, - conv_op.SetAssemblyLevel(assembly); - conv_op.Assemble(); - -- GridFunction q(&fes), r(&fes), ex(&fes); -+ GridFunction q(&vfes), r(&vfes), ex(&vfes); - - VectorFunctionCoefficient quantity_coeff(dim, quantity); - q.ProjectCoefficient(quantity_coeff); -@@ -492,9 +969,15 @@ void test_ceed_convection(const char* mesh_filename, int order, - ex.ProjectCoefficient(convected_quantity_coeff); - - r = 0.0; -- conv_op.Mult(q, r); -+ for (int i = 0; i < dim; i++) -+ { -+ GridFunction qi, ri; -+ qi.MakeRef(&fes, q, i * fes.GetVSize()); -+ ri.MakeRef(&fes, r, i * fes.GetVSize()); -+ conv_op.Mult(qi, ri); -+ } - -- LinearForm f(&fes); -+ LinearForm f(&vfes); - VectorDomainLFIntegrator *vlf_integ = new VectorDomainLFIntegrator( - convected_quantity_coeff); - vlf_integ->SetIntRule(&ir); -@@ -503,7 +986,360 @@ void test_ceed_convection(const char* mesh_filename, int order, - - r -= f; - -- REQUIRE(r.Norml2() < 1e-12); -+ REQUIRE(r.Norml2() < 1.e-12 * std::max(f.Norml2(), 1.0)); -+} -+ -+void test_ceed_full_assembly(const char *input, int order, -+ const AssemblyLevel assembly) -+{ -+ std::string section = "assembly: " + GetString(assembly) + "\n" + -+ "order: " + std::to_string(order) + "\n" + -+ "mesh: " + input; -+ INFO(section); -+ Mesh mesh(input, 1, 1); -+ mesh.EnsureNodes(); -+ int dim = mesh.Dimension(); -+ H1_FECollection fec(order, dim); -+ -+ DenseMatrix val(dim); -+ val = 0.0; -+ for (int i = 0; i < dim; i++) -+ { -+ val(i, i) = 1.0 + i; -+ } -+ MatrixConstantCoefficient diff_coeff(val); -+ ConstantCoefficient mass_coeff(1.0); -+ -+ FiniteElementSpace fes(&mesh, &fec, 1); -+ BilinearForm k_test(&fes); -+ BilinearForm k_ref(&fes); -+ -+ k_ref.AddDomainIntegrator(new MassIntegrator(mass_coeff)); -+ k_test.AddDomainIntegrator(new MassIntegrator(mass_coeff)); -+ k_ref.AddBoundaryIntegrator(new MassIntegrator(mass_coeff)); -+ k_test.AddBoundaryIntegrator(new MassIntegrator(mass_coeff)); -+ k_ref.AddDomainIntegrator(new DiffusionIntegrator(diff_coeff)); -+ k_test.AddDomainIntegrator(new DiffusionIntegrator(diff_coeff)); -+ -+ k_ref.Assemble(); -+ k_ref.Finalize(); -+ -+ k_test.SetAssemblyLevel(assembly); -+ k_test.Assemble(); -+ -+ SparseMatrix *mat_ref = &k_ref.SpMat(); -+ SparseMatrix *mat_test = ceed::CeedOperatorFullAssemble(k_test); -+ SparseMatrix *mat_diff = Add(1.0, *mat_ref, -1.0, *mat_test); -+ -+ REQUIRE(mat_diff->MaxNorm() < 1.e-12 * std::max(mat_ref->MaxNorm(), 1.0)); -+ delete mat_diff; -+ delete mat_test; -+} -+ -+void test_ceed_linear_interpolator(const char *input, int order) -+{ -+ std::string section = "order: " + std::to_string(order) + "\n" + -+ "mesh: " + input; -+ INFO(section); -+ Mesh mesh(input, 1, 1); -+ mesh.EnsureNodes(); -+ int dim = mesh.Dimension(); -+ H1_FECollection h1_fec(order, dim); -+ ND_FECollection nd_fec(order, dim); -+ RT_FECollection rt_fec(order - 1, dim); -+ -+ // Build the DiscreteLinearOperator -+ FiniteElementSpace h1_fes(&mesh, &h1_fec); -+ FiniteElementSpace nd_fes(&mesh, &nd_fec); -+ FiniteElementSpace rt_fes(&mesh, &rt_fec); -+ -+ // Discrete gradient -+ DiscreteLinearOperator grad_ref(&h1_fes, &nd_fes); -+ DiscreteLinearOperator grad_test(&h1_fes, &nd_fes); -+ grad_ref.AddDomainInterpolator(new GradientInterpolator); -+ grad_test.AddDomainInterpolator(new GradientInterpolator); -+ -+ // Timer for profiling -+ const int trials = 1; -+ const bool debug = false; -+ StopWatch chrono_setup_grad_ref, chrono_setup_grad_test; -+ StopWatch chrono_apply_grad_ref, chrono_apply_grad_test; -+ StopWatch chrono_apply_id_ref, chrono_apply_id_test; -+ chrono_setup_grad_ref.Clear(); -+ chrono_setup_grad_ref.Start(); -+ -+ grad_ref.Assemble(); -+ grad_ref.Finalize(); -+ -+ chrono_setup_grad_ref.Stop(); -+ chrono_setup_grad_test.Clear(); -+ chrono_setup_grad_test.Start(); -+ -+ grad_test.SetAssemblyLevel(AssemblyLevel::PARTIAL); -+ grad_test.Assemble(); -+ -+ chrono_setup_grad_test.Stop(); -+ -+ // Compare ceed with mfem -+ { -+ GridFunction x(&h1_fes), y_ref(&nd_fes), y_test(&nd_fes); -+ GridFunction x_t(&nd_fes), y_t_ref(&h1_fes), y_t_test(&h1_fes); -+ -+ x.Randomize(1); -+ -+ chrono_apply_grad_ref.Clear(); -+ chrono_apply_grad_ref.Start(); -+ -+ for (int trial = 0; trial < trials; trial++) -+ { -+ grad_ref.Mult(x, y_ref); -+ } -+ -+ chrono_apply_grad_ref.Stop(); -+ chrono_apply_grad_test.Clear(); -+ chrono_apply_grad_test.Start(); -+ -+ for (int trial = 0; trial < trials; trial++) -+ { -+ grad_test.Mult(x, y_test); -+ } -+ -+ chrono_apply_grad_test.Stop(); -+ -+ y_test -= y_ref; -+ -+ REQUIRE(y_test.Norml2() < 1.e-12); -+ -+ x_t.Randomize(1); -+ -+ chrono_apply_grad_ref.Start(); -+ -+ for (int trial = 0; trial < trials; trial++) -+ { -+ grad_ref.MultTranspose(x_t, y_t_ref); -+ } -+ -+ chrono_apply_grad_ref.Stop(); -+ chrono_apply_grad_test.Start(); -+ -+ for (int trial = 0; trial < trials; trial++) -+ { -+ grad_test.MultTranspose(x_t, y_t_test); -+ } -+ -+ chrono_apply_grad_test.Stop(); -+ -+ y_t_test -= y_t_ref; -+ -+ REQUIRE(y_t_test.Norml2() < 1.e-12); -+ } -+ -+ // Discrete curl -+ if (dim == 3) -+ { -+ DiscreteLinearOperator curl_ref(&nd_fes, &rt_fes); -+ DiscreteLinearOperator curl_test(&nd_fes, &rt_fes); -+ curl_ref.AddDomainInterpolator(new CurlInterpolator); -+ curl_test.AddDomainInterpolator(new CurlInterpolator); -+ -+ curl_ref.Assemble(); -+ curl_ref.Finalize(); -+ -+ curl_test.SetAssemblyLevel(AssemblyLevel::PARTIAL); -+ curl_test.Assemble(); -+ -+ // Compare ceed with mfem -+ { -+ GridFunction x(&nd_fes), y_ref(&rt_fes), y_test(&rt_fes); -+ GridFunction x_t(&rt_fes), y_t_ref(&nd_fes), y_t_test(&nd_fes); -+ -+ x.Randomize(1); -+ -+ curl_ref.Mult(x, y_ref); -+ curl_test.Mult(x, y_test); -+ -+ y_test -= y_ref; -+ -+ REQUIRE(y_test.Norml2() < 1.e-10); -+ -+ x_t.Randomize(1); -+ -+ curl_ref.MultTranspose(x_t, y_t_ref); -+ curl_test.MultTranspose(x_t, y_t_test); -+ -+ y_t_test -= y_t_ref; -+ -+ REQUIRE(y_t_test.Norml2() < 1.e-10); -+ } -+ } -+ -+ // Prolongation and restriction -+ H1_FECollection fine_h1_fec(order + 1, dim); -+ ND_FECollection fine_nd_fec(order + 1, dim); -+ RT_FECollection fine_rt_fec(order, dim); -+ -+ FiniteElementSpace fine_h1_fes(&mesh, &fine_h1_fec); -+ FiniteElementSpace fine_nd_fes(&mesh, &fine_nd_fec); -+ FiniteElementSpace fine_rt_fes(&mesh, &fine_rt_fec); -+ -+ DiscreteLinearOperator id_h1_test(&h1_fes, &fine_h1_fes); -+ DiscreteLinearOperator id_nd_test(&nd_fes, &fine_nd_fes); -+ DiscreteLinearOperator id_rt_test(&rt_fes, &fine_rt_fes); -+ id_h1_test.AddDomainInterpolator(new IdentityInterpolator); -+ id_nd_test.AddDomainInterpolator(new IdentityInterpolator); -+ id_rt_test.AddDomainInterpolator(new IdentityInterpolator); -+ -+ id_h1_test.SetAssemblyLevel(AssemblyLevel::PARTIAL); -+ id_h1_test.Assemble(); -+ -+ id_nd_test.SetAssemblyLevel(AssemblyLevel::PARTIAL); -+ id_nd_test.Assemble(); -+ -+ id_rt_test.SetAssemblyLevel(AssemblyLevel::PARTIAL); -+ id_rt_test.Assemble(); -+ -+ TransferOperator id_h1_ref(h1_fes, fine_h1_fes); -+ TransferOperator id_nd_ref(nd_fes, fine_nd_fes); -+ TransferOperator id_rt_ref(rt_fes, fine_rt_fes); -+ -+ // Compare ceed with mfem -+ for (int t = 0; t < 3; t++) -+ { -+ GridFunction *x = nullptr, *y_t_ref = nullptr, *y_t_test = nullptr; -+ GridFunction *y_exact = nullptr, *y_ref = nullptr, *y_test = nullptr; -+ switch (t) -+ { -+ case 0: -+ x = new GridFunction(&h1_fes); -+ y_t_ref = new GridFunction(&h1_fes); -+ y_t_test = new GridFunction(&h1_fes); -+ y_exact = new GridFunction(&fine_h1_fes); -+ y_ref = new GridFunction(&fine_h1_fes); -+ y_test = new GridFunction(&fine_h1_fes); -+ break; -+ case 1: -+ x = new GridFunction(&nd_fes); -+ y_t_ref = new GridFunction(&nd_fes); -+ y_t_test = new GridFunction(&nd_fes); -+ y_exact = new GridFunction(&fine_nd_fes); -+ y_ref = new GridFunction(&fine_nd_fes); -+ y_test = new GridFunction(&fine_nd_fes); -+ break; -+ case 2: -+ x = new GridFunction(&rt_fes); -+ y_t_ref = new GridFunction(&rt_fes); -+ y_t_test = new GridFunction(&rt_fes); -+ y_exact = new GridFunction(&fine_rt_fes); -+ y_ref = new GridFunction(&fine_rt_fes); -+ y_test = new GridFunction(&fine_rt_fes); -+ break; -+ default: -+ MFEM_ABORT("Unexpected problem type."); -+ } -+ -+ if (t == 0) -+ { -+ FunctionCoefficient f_coeff(coeff_function); -+ x->ProjectCoefficient(f_coeff); -+ y_exact->ProjectCoefficient(f_coeff); -+ } -+ else -+ { -+ VectorFunctionCoefficient vf_coeff(dim, velocity_function); -+ x->ProjectCoefficient(vf_coeff); -+ y_exact->ProjectCoefficient(vf_coeff); -+ } -+ -+ chrono_apply_id_ref.Clear(); -+ chrono_apply_id_ref.Start(); -+ -+ for (int trial = 0; trial < trials; trial++) -+ { -+ switch (t) -+ { -+ case 0: -+ id_h1_ref.Mult(*x, *y_ref); -+ id_h1_ref.MultTranspose(*y_exact, *y_t_ref); -+ break; -+ case 1: -+ id_nd_ref.Mult(*x, *y_ref); -+ id_nd_ref.MultTranspose(*y_exact, *y_t_ref); -+ break; -+ case 2: -+ id_rt_ref.Mult(*x, *y_ref); -+ id_rt_ref.MultTranspose(*y_exact, *y_t_ref); -+ break; -+ default: -+ MFEM_ABORT("Unexpected problem type."); -+ } -+ } -+ -+ chrono_apply_id_ref.Stop(); -+ chrono_apply_id_test.Clear(); -+ chrono_apply_id_test.Start(); -+ -+ for (int trial = 0; trial < trials; trial++) -+ { -+ switch (t) -+ { -+ case 0: -+ id_h1_test.Mult(*x, *y_test); -+ id_h1_test.MultTranspose(*y_exact, *y_t_test); -+ break; -+ case 1: -+ id_nd_test.Mult(*x, *y_test); -+ id_nd_test.MultTranspose(*y_exact, *y_t_test); -+ break; -+ case 2: -+ id_rt_test.Mult(*x, *y_test); -+ id_rt_test.MultTranspose(*y_exact, *y_t_test); -+ break; -+ default: -+ MFEM_ABORT("Unexpected problem type."); -+ } -+ } -+ -+ chrono_apply_id_test.Stop(); -+ -+ *y_test -= *y_ref; -+ -+ REQUIRE(y_test->Norml2() < 1.e-10); -+ -+ *y_t_test -= *y_t_ref; -+ -+ REQUIRE(y_t_test->Norml2() < 1.e-10); -+ delete x; -+ delete y_t_ref; -+ delete y_t_test; -+ delete y_exact; -+ delete y_ref; -+ delete y_test; -+ } -+ -+ if (debug) -+ { -+ out << "\n" << section << "\n"; -+ out << "benchmark (unknowns: H1: " << h1_fes.GetTrueVSize() -+ << ", ND: " << nd_fes.GetTrueVSize() -+ << ", RT: " << rt_fes.GetTrueVSize() << ",\n" -+ << " fine H1: " << fine_h1_fes.GetTrueVSize() -+ << ", fine ND: " << fine_nd_fes.GetTrueVSize() -+ << ", fine RT: " << fine_rt_fes.GetTrueVSize() << ")\n" -+ << " discrete gradient interpolator\n" -+ << " setup: ref = " -+ << chrono_setup_grad_ref.RealTime() * 1e3 << " ms\n" -+ << " test = " -+ << chrono_setup_grad_test.RealTime() * 1e3 << " ms\n" -+ << " apply: ref = " -+ << chrono_apply_grad_ref.RealTime() * 1e3 / trials << " ms\n" -+ << " test = " -+ << chrono_apply_grad_test.RealTime() * 1e3 / trials << " ms\n" -+ << " identity interpolator\n" -+ << " apply: ref = " -+ << chrono_apply_id_ref.RealTime() * 1e3 / trials << " ms\n" -+ << " test = " -+ << chrono_apply_id_test.RealTime() * 1e3 / trials << " ms\n"; -+ } - } - - TEST_CASE("CEED mass & diffusion", "[CEED]") -@@ -513,17 +1349,20 @@ TEST_CASE("CEED mass & diffusion", "[CEED]") - CeedCoeffType::Quad); - auto pb = GENERATE(Problem::Mass,Problem::Diffusion,Problem::MassDiffusion, - Problem::VectorMass,Problem::VectorDiffusion); -- auto order = GENERATE(1); -+ auto order = GENERATE(1,2); -+ auto bdr_integ = GENERATE(false,true); - auto mesh = GENERATE("../../data/inline-quad.mesh", - "../../data/inline-hex.mesh", -- "../../data/periodic-square.mesh", -+ "../../data/inline-tri.mesh", -+ "../../data/inline-tet.mesh", - "../../data/star-q2.mesh", - "../../data/fichera-q2.mesh", - "../../data/amr-quad.mesh", - "../../data/fichera-amr.mesh", - "../../data/square-mixed.mesh", - "../../data/fichera-mixed.mesh"); -- test_ceed_operator(mesh, order, coeff_type, pb, assembly); -+ bool mixed_p = false; -+ test_ceed_operator(mesh, order, coeff_type, pb, assembly, mixed_p, bdr_integ); - } // test case - - TEST_CASE("CEED p-adaptivity", "[CEED]") -@@ -539,46 +1378,99 @@ TEST_CASE("CEED p-adaptivity", "[CEED]") - "../../data/star-q2.mesh", - "../../data/amr-quad.mesh", - "../../data/square-mixed.mesh"); -- test_mixed_p_ceed_operator(mesh, order, coeff_type, pb, assembly); -+ bool mixed_p = true; -+ bool bdr_integ = false; -+ test_ceed_operator(mesh, order, coeff_type, pb, assembly, mixed_p, bdr_integ); - } // test case - --TEST_CASE("CEED convection low", "[CEED],[Convection]") -+TEST_CASE("CEED vector and matrix coefficients and vector FE operators", -+ "[CEED], [VectorFE]") - { - auto assembly = GENERATE(AssemblyLevel::PARTIAL,AssemblyLevel::NONE); -- auto coeff_type = GENERATE(CeedCoeffType::VecConst,CeedCoeffType::VecGrid, -- CeedCoeffType::VecQuad); -+ auto coeff_type = GENERATE(CeedCoeffType::Const,CeedCoeffType::Quad, -+ CeedCoeffType::VecConst,CeedCoeffType::VecQuad, -+ CeedCoeffType::MatConst,CeedCoeffType::MatQuad); -+ auto pb = GENERATE(Problem::Mass,Problem::Diffusion, -+ Problem::HDivMass,Problem::DivDiv, -+ Problem::HCurlMass,Problem::CurlCurl); -+ auto order = GENERATE(1,3); -+ auto bdr_integ = GENERATE(false,true); - auto mesh = GENERATE("../../data/inline-quad.mesh", - "../../data/inline-hex.mesh", -+ "../../data/inline-tri.mesh", -+ "../../data/inline-tet.mesh", - "../../data/star-q2.mesh", - "../../data/fichera-q2.mesh", - "../../data/amr-quad.mesh", - "../../data/fichera-amr.mesh", - "../../data/square-mixed.mesh", - "../../data/fichera-mixed.mesh"); -- Problem pb = Problem::Convection; -+ test_ceed_vectorfe_operator(mesh, order, coeff_type, pb, assembly, bdr_integ); -+} // test case - -- // Test that the CEED and MFEM integrators give the same answer -- int low_order = 1; -- test_ceed_operator(mesh, low_order, coeff_type, pb, assembly); -+TEST_CASE("CEED mixed integrators", -+ "[CEED], [MixedVectorIntegrator], [VectorFE]") -+{ -+ auto assembly = GENERATE(AssemblyLevel::PARTIAL,AssemblyLevel::NONE); -+ auto coeff_type = GENERATE(CeedCoeffType::Const,CeedCoeffType::Quad, -+ CeedCoeffType::VecConst,CeedCoeffType::VecQuad, -+ CeedCoeffType::MatConst,CeedCoeffType::MatQuad); -+ auto pb = GENERATE(Problem::MixedVectorGradient,Problem::MixedVectorCurl); -+ auto order = GENERATE(2); -+ auto bdr_integ = GENERATE(false,true); -+ auto mesh = GENERATE("../../data/inline-quad.mesh", -+ "../../data/inline-hex.mesh", -+ "../../data/inline-tri.mesh", -+ "../../data/inline-tet.mesh", -+ "../../data/star-q2.mesh", -+ "../../data/fichera-q2.mesh", -+ "../../data/amr-quad.mesh", -+ "../../data/fichera-amr.mesh", -+ "../../data/square-mixed.mesh", -+ "../../data/fichera-mixed.mesh"); -+ test_ceed_mixed_operator(mesh, order, coeff_type, pb, assembly, bdr_integ); - } // test case - --TEST_CASE("CEED convection high", "[CEED],[Convection]") -+TEST_CASE("CEED convection low", "[CEED], [Convection]") - { - auto assembly = GENERATE(AssemblyLevel::PARTIAL,AssemblyLevel::NONE); -+ auto coeff_type = GENERATE(CeedCoeffType::VecConst,CeedCoeffType::VecGrid, -+ CeedCoeffType::VecQuad); - auto mesh = GENERATE("../../data/inline-quad.mesh", - "../../data/inline-hex.mesh", -+ "../../data/inline-tri.mesh", -+ "../../data/inline-tet.mesh", -+ "../../data/periodic-square.mesh", - "../../data/star-q2.mesh", - "../../data/fichera-q2.mesh", - "../../data/amr-quad.mesh", -- "../../data/fichera-amr.mesh"); -+ "../../data/fichera-amr.mesh", -+ "../../data/square-mixed.mesh", -+ "../../data/fichera-mixed.mesh"); -+ Problem pb = Problem::Convection; -+ int low_order = 1; -+ bool mixed_p = false; -+ bool bdr_integ = false; -+ test_ceed_operator(mesh, low_order, coeff_type, pb, assembly, mixed_p, -+ bdr_integ); -+} // test case - -+TEST_CASE("CEED convection high", "[CEED], [Convection]") -+{ - // Apply the CEED convection integrator applied to a vector quantity, check - // that we get the exact answer (with sufficiently high polynomial degree) -+ auto assembly = GENERATE(AssemblyLevel::PARTIAL,AssemblyLevel::NONE); -+ auto mesh = GENERATE("../../data/inline-quad.mesh", -+ "../../data/periodic-square.mesh", -+ "../../data/star-q2.mesh", -+ "../../data/fichera-q2.mesh", -+ "../../data/amr-quad.mesh", -+ "../../data/fichera-amr.mesh"); - int high_order = 4; - test_ceed_convection(mesh, high_order, assembly); - } // test case - --TEST_CASE("CEED non-linear convection", "[CEED],[NLConvection]") -+TEST_CASE("CEED nonlinear convection", "[CEED], [NLConvection]") - { - auto assembly = GENERATE(AssemblyLevel::PARTIAL,AssemblyLevel::NONE); - auto coeff_type = GENERATE(CeedCoeffType::Const,CeedCoeffType::Grid, -@@ -587,6 +1479,8 @@ TEST_CASE("CEED non-linear convection", "[CEED],[NLConvection]") - auto order = GENERATE(1); - auto mesh = GENERATE("../../data/inline-quad.mesh", - "../../data/inline-hex.mesh", -+ "../../data/inline-tri.mesh", -+ "../../data/inline-tet.mesh", - "../../data/periodic-square.mesh", - "../../data/star-q2.mesh", - "../../data/fichera.mesh", -@@ -595,6 +1489,35 @@ TEST_CASE("CEED non-linear convection", "[CEED],[NLConvection]") - test_ceed_nloperator(mesh, order, coeff_type, pb, assembly); - } // test case - -+TEST_CASE("CEED full assembly", "[CEED]") -+{ -+ auto assembly = GENERATE(AssemblyLevel::PARTIAL,AssemblyLevel::NONE); -+ auto mesh = GENERATE("../../data/inline-quad.mesh", -+ "../../data/inline-hex.mesh", -+ "../../data/star-q2.mesh", -+ "../../data/fichera-q2.mesh", -+ "../../data/amr-quad.mesh", -+ "../../data/fichera-amr.mesh", -+ "../../data/square-mixed.mesh", -+ "../../data/fichera-mixed.mesh"); -+ int order = 1; -+ test_ceed_full_assembly(mesh, order, assembly); -+} // test case -+ -+TEST_CASE("CEED linear interpolators", "[CEED]") -+{ -+ auto mesh = GENERATE("../../data/inline-quad.mesh", -+ "../../data/inline-hex.mesh", -+ "../../data/star-q2.mesh", -+ "../../data/fichera-q2.mesh", -+ "../../data/amr-quad.mesh", -+ "../../data/fichera-amr.mesh", -+ "../../data/square-mixed.mesh", -+ "../../data/fichera-mixed.mesh"); -+ int order = 2; -+ test_ceed_linear_interpolator(mesh, order); -+} // test case -+ - #endif - - } // namespace ceed_test -diff --git a/tests/unit/enzyme/compatibility.cpp b/tests/unit/enzyme/compatibility.cpp -index 8cbb658d1..118f11b12 100644 ---- a/tests/unit/enzyme/compatibility.cpp -+++ b/tests/unit/enzyme/compatibility.cpp -@@ -8,38 +8,40 @@ - template - void square(const VectorT& v, double& y) - { -- for (int i = 0; i < 4; i++) { -- y += v[i]*v[i]; -- } -+ for (int i = 0; i < 4; i++) -+ { -+ y += v[i]*v[i]; -+ } - } - - template - void dsquare(const VectorT& v, double& y, VectorT& dydv) - { -- double seed = 1.0; -- __enzyme_autodiff(square, &v, &dydv, &y, &seed); -+ double seed = 1.0; -+ __enzyme_autodiff(square, &v, &dydv, &y, &seed); - } - - template --void run_test() { -- VectorT v(4); -- v[0] = 2.0; -- v[1] = 3.0; -- v[2] = 1.0; -- v[3] = 7.0; -- -- double yy = 0; -- VectorT dydv(4); -- dydv[0] = 0; -- dydv[1] = 0; -- dydv[2] = 0; -- dydv[3] = 0; -- dsquare(v, yy, dydv); -- -- REQUIRE(dydv[0] == MFEM_Approx(4.0)); -- REQUIRE(dydv[1] == MFEM_Approx(6.0)); -- REQUIRE(dydv[2] == MFEM_Approx(2.0)); -- REQUIRE(dydv[3] == MFEM_Approx(14.0)); -+void run_test() -+{ -+ VectorT v(4); -+ v[0] = 2.0; -+ v[1] = 3.0; -+ v[2] = 1.0; -+ v[3] = 7.0; -+ -+ double yy = 0; -+ VectorT dydv(4); -+ dydv[0] = 0; -+ dydv[1] = 0; -+ dydv[2] = 0; -+ dydv[3] = 0; -+ dsquare(v, yy, dydv); -+ -+ REQUIRE(dydv[0] == MFEM_Approx(4.0)); -+ REQUIRE(dydv[1] == MFEM_Approx(6.0)); -+ REQUIRE(dydv[2] == MFEM_Approx(2.0)); -+ REQUIRE(dydv[3] == MFEM_Approx(14.0)); - } - - TEST_CASE("AD Vector implementation", "[Enzyme]") -diff --git a/tests/unit/fem/test_assemblediagonalpa.cpp b/tests/unit/fem/test_assemblediagonalpa.cpp -index 050561e1d..42cdc76ba 100644 ---- a/tests/unit/fem/test_assemblediagonalpa.cpp -+++ b/tests/unit/fem/test_assemblediagonalpa.cpp -@@ -447,8 +447,8 @@ TEST_CASE("Hcurl/Hdiv diagonal PA", - else - { - const FiniteElement *fel = fespace.GetFE(0); -- const IntegrationRule *intRule = &MassIntegrator::GetRule(*fel, *fel, -- *mesh.GetElementTransformation(0)); -+ ElementTransformation *T = mesh.GetElementTransformation(0); -+ const IntegrationRule *intRule = &MassIntegrator::GetRuleStatic(*fel, *fel, *T); - - if (spaceType == Hcurl) - { -diff --git a/tests/unit/fem/test_pa_coeff.cpp b/tests/unit/fem/test_pa_coeff.cpp -index b45738fad..270359f3c 100644 ---- a/tests/unit/fem/test_pa_coeff.cpp -+++ b/tests/unit/fem/test_pa_coeff.cpp -@@ -530,8 +530,8 @@ TEST_CASE("Hcurl/Hdiv PA Coefficient", - if (spaceType == Hcurl) - { - const FiniteElement *fel = fespace.GetFE(0); -- const IntegrationRule *intRule = &MassIntegrator::GetRule(*fel, *fel, -- *mesh.GetElementTransformation(0)); -+ ElementTransformation *T = mesh.GetElementTransformation(0); -+ const IntegrationRule *intRule = &MassIntegrator::GetRuleStatic(*fel, *fel, *T); - - if (coeffType >= 3 && dimension == 3) - { diff --git a/palace/deps/patch/mfem/patch_pa_prereq.diff b/palace/deps/patch/mfem/patch_pa_prereq.diff deleted file mode 100644 index 647d15c1d..000000000 --- a/palace/deps/patch/mfem/patch_pa_prereq.diff +++ /dev/null @@ -1,41184 +0,0 @@ -diff --git a/CHANGELOG b/CHANGELOG -index eb4f4e4bd..1026d6957 100644 ---- a/CHANGELOG -+++ b/CHANGELOG -@@ -22,6 +22,10 @@ Version 4.5.3 (development) - 338. Added the tmop-metric-magnitude tool for tracking how metrics change - under geometric perturbations. - -+- Reorganized files for bilinear form, linear form, and nonlinear form integrators -+ in the fem/integ/ subdirectory. -+ -+ - New and updated examples and miniapps - ------------------------------------- - - Added a miniapp pmesh-fitting in miniapps/meshing for interface and boundary -@@ -53,6 +57,7 @@ Integrations, testing and documentation - - Added an address sanitizer GitHub action for a serial build/test on Ubuntu, - based on Clang/LLVM (https://clang.llvm.org/docs/AddressSanitizer.html). - -+ - Version 4.5.2, released on March 23, 2023 - ========================================= - -diff --git a/CMakeLists.txt b/CMakeLists.txt -index 32112b549..9e46030ad 100644 ---- a/CMakeLists.txt -+++ b/CMakeLists.txt -@@ -218,10 +218,7 @@ if (MFEM_USE_HIP) - endif() - - # OpenMP --if (MFEM_USE_OPENMP OR MFEM_USE_LEGACY_OPENMP) -- if (NOT MFEM_THREAD_SAFE AND MFEM_USE_LEGACY_OPENMP) -- message(FATAL_ERROR " *** MFEM_USE_LEGACY_OPENMP requires MFEM_THREAD_SAFE=ON.") -- endif() -+if (MFEM_USE_OPENMP) - find_package(OpenMP REQUIRED) - set(OPENMP_LIBRARIES ${OpenMP_CXX_LIBRARIES}) - if(APPLE) -diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md -index ba372df94..a6111c3e0 100644 ---- a/CONTRIBUTING.md -+++ b/CONTRIBUTING.md -@@ -121,6 +121,7 @@ The MFEM source code has the following structure: - ├── fem - │ ├── ceed - │ ├── fe -+ │ ├── integ - │ ├── lor - │ ├── moonolith - │ ├── qinterp -diff --git a/INSTALL b/INSTALL -index cb092cc1b..e48618821 100644 ---- a/INSTALL -+++ b/INSTALL -@@ -309,10 +309,6 @@ MFEM_THREAD_SAFE = YES/NO - Use thread-safe implementation for some classes/methods. This comes at the - cost of extra memory allocation and de-allocation. - --MFEM_USE_LEGACY_OPENMP = YES/NO -- Enable (basic) experimental OpenMP support. Requires MFEM_THREAD_SAFE. -- This option is deprecated. -- - MFEM_USE_OPENMP = YES/NO - Enable the OpenMP backend. - -@@ -611,8 +607,8 @@ The specific libraries and their options are: - http://math-atlas.sourceforge.net (ATLAS) - Options: LAPACK_OPT (currently not used/needed), LAPACK_LIB. - --- OpenMP (optional), usually part of compiler, used when either MFEM_USE_OPENMP -- or MFEM_USE_LEGACY_OPENMP is set to YES. -+- OpenMP (optional), usually part of compiler, used when MFEM_USE_OPENMP is set -+ to YES. - Options: OPENMP_OPT, OPENMP_LIB. - - - High-resolution POSIX clocks: when using MFEM_TIMER_TYPE = 2, it may be -@@ -956,7 +952,6 @@ MFEM_USE_METIS - Set to ${MFEM_USE_MPI}, can be overwritten. - MFEM_USE_LIBUNWIND - MFEM_USE_LAPACK - MFEM_THREAD_SAFE --MFEM_USE_LEGACY_OPENMP - MFEM_USE_OPENMP - MFEM_USE_MEMALLOC - MFEM_TIMER_TYPE - Set automatically, can be overwritten. -diff --git a/config/cmake/MFEMConfig.cmake.in b/config/cmake/MFEMConfig.cmake.in -index 9d5eef52f..43b6d0671 100644 ---- a/config/cmake/MFEMConfig.cmake.in -+++ b/config/cmake/MFEMConfig.cmake.in -@@ -25,7 +25,6 @@ set(MFEM_USE_LIBUNWIND @MFEM_USE_LIBUNWIND@) - set(MFEM_USE_LAPACK @MFEM_USE_LAPACK@) - set(MFEM_THREAD_SAFE @MFEM_THREAD_SAFE@) - set(MFEM_USE_OPENMP @MFEM_USE_OPENMP@) --set(MFEM_USE_LEGACY_OPENMP @MFEM_USE_LEGACY_OPENMP@) - set(MFEM_USE_MEMALLOC @MFEM_USE_MEMALLOC@) - set(MFEM_TIMER_TYPE @MFEM_TIMER_TYPE@) - set(MFEM_USE_SUNDIALS @MFEM_USE_SUNDIALS@) -diff --git a/config/cmake/config.hpp.in b/config/cmake/config.hpp.in -index 7e820088a..ba75ff79e 100644 ---- a/config/cmake/config.hpp.in -+++ b/config/cmake/config.hpp.in -@@ -74,9 +74,6 @@ - // Enable the OpenMP backend. - #cmakedefine MFEM_USE_OPENMP - --// [Deprecated] Enable experimental OpenMP support. Requires MFEM_THREAD_SAFE. --#cmakedefine MFEM_USE_LEGACY_OPENMP -- - // Internal MFEM option: enable group/batch allocation for some small objects. - #cmakedefine MFEM_USE_MEMALLOC - -diff --git a/config/cmake/modules/MfemCmakeUtilities.cmake b/config/cmake/modules/MfemCmakeUtilities.cmake -index 204b7d87f..9a629330b 100644 ---- a/config/cmake/modules/MfemCmakeUtilities.cmake -+++ b/config/cmake/modules/MfemCmakeUtilities.cmake -@@ -843,16 +843,16 @@ function(mfem_export_mk_files) - # Convert Boolean vars to YES/NO without writing the values to cache - set(CONFIG_MK_BOOL_VARS MFEM_USE_MPI MFEM_USE_METIS MFEM_USE_METIS_5 - MFEM_DEBUG MFEM_USE_EXCEPTIONS MFEM_USE_ZLIB MFEM_USE_LIBUNWIND -- MFEM_USE_LAPACK MFEM_THREAD_SAFE MFEM_USE_LEGACY_OPENMP MFEM_USE_OPENMP -- MFEM_USE_MEMALLOC MFEM_USE_SUNDIALS MFEM_USE_SUITESPARSE -- MFEM_USE_SUPERLU MFEM_USE_SUPERLU5 MFEM_USE_MUMPS MFEM_USE_STRUMPACK -- MFEM_USE_GINKGO MFEM_USE_AMGX MFEM_USE_GNUTLS MFEM_USE_NETCDF -- MFEM_USE_PETSC MFEM_USE_SLEPC MFEM_USE_MPFR MFEM_USE_SIDRE MFEM_USE_FMS -- MFEM_USE_CONDUIT MFEM_USE_PUMI MFEM_USE_HIOP MFEM_USE_GSLIB MFEM_USE_CUDA -- MFEM_USE_HIP MFEM_USE_RAJA MFEM_USE_OCCA MFEM_USE_CEED MFEM_USE_CALIPER -- MFEM_USE_UMPIRE MFEM_USE_SIMD MFEM_USE_ADIOS2 MFEM_USE_MKL_CPARDISO -- MFEM_USE_ADFORWARD MFEM_USE_CODIPACK MFEM_USE_BENCHMARK MFEM_USE_PARELAG -- MFEM_USE_MOONOLITH MFEM_USE_ALGOIM MFEM_USE_ENZYME) -+ MFEM_USE_LAPACK MFEM_THREAD_SAFE MFEM_USE_OPENMP MFEM_USE_MEMALLOC -+ MFEM_USE_SUNDIALS MFEM_USE_SUITESPARSE MFEM_USE_SUPERLU MFEM_USE_SUPERLU5 -+ MFEM_USE_MUMPS MFEM_USE_STRUMPACK MFEM_USE_GINKGO MFEM_USE_AMGX -+ MFEM_USE_GNUTLS MFEM_USE_NETCDF MFEM_USE_PETSC MFEM_USE_SLEPC -+ MFEM_USE_MPFR MFEM_USE_SIDRE MFEM_USE_FMS MFEM_USE_CONDUIT MFEM_USE_PUMI -+ MFEM_USE_HIOP MFEM_USE_GSLIB MFEM_USE_CUDA MFEM_USE_HIP MFEM_USE_RAJA -+ MFEM_USE_OCCA MFEM_USE_CEED MFEM_USE_CALIPER MFEM_USE_UMPIRE -+ MFEM_USE_SIMD MFEM_USE_ADIOS2 MFEM_USE_MKL_CPARDISO MFEM_USE_ADFORWARD -+ MFEM_USE_CODIPACK MFEM_USE_BENCHMARK MFEM_USE_PARELAG MFEM_USE_MOONOLITH -+ MFEM_USE_ALGOIM MFEM_USE_ENZYME) - foreach(var ${CONFIG_MK_BOOL_VARS}) - if (${var}) - set(${var} YES) -diff --git a/config/config.hpp.in b/config/config.hpp.in -index 76145927b..d82bf192c 100644 ---- a/config/config.hpp.in -+++ b/config/config.hpp.in -@@ -74,9 +74,6 @@ - // Enable the OpenMP backend. - // #define MFEM_USE_OPENMP - --// [Deprecated] Enable experimental OpenMP support. Requires MFEM_THREAD_SAFE. --// #define MFEM_USE_LEGACY_OPENMP -- - // Internal MFEM option: enable group/batch allocation for some small objects. - // #define MFEM_USE_MEMALLOC - -diff --git a/config/config.mk.in b/config/config.mk.in -index baf5c2955..8858d01b8 100644 ---- a/config/config.mk.in -+++ b/config/config.mk.in -@@ -24,7 +24,6 @@ MFEM_USE_ZLIB = @MFEM_USE_ZLIB@ - MFEM_USE_LIBUNWIND = @MFEM_USE_LIBUNWIND@ - MFEM_USE_LAPACK = @MFEM_USE_LAPACK@ - MFEM_THREAD_SAFE = @MFEM_THREAD_SAFE@ --MFEM_USE_LEGACY_OPENMP = @MFEM_USE_LEGACY_OPENMP@ - MFEM_USE_OPENMP = @MFEM_USE_OPENMP@ - MFEM_USE_MEMALLOC = @MFEM_USE_MEMALLOC@ - MFEM_TIMER_TYPE = @MFEM_TIMER_TYPE@ -diff --git a/config/defaults.cmake b/config/defaults.cmake -index d5104092b..a72be813f 100644 ---- a/config/defaults.cmake -+++ b/config/defaults.cmake -@@ -28,7 +28,6 @@ option(MFEM_USE_LIBUNWIND "Enable backtrace for errors." OFF) - option(MFEM_USE_LAPACK "Enable LAPACK usage" OFF) - option(MFEM_THREAD_SAFE "Enable thread safety" OFF) - option(MFEM_USE_OPENMP "Enable the OpenMP backend" OFF) --option(MFEM_USE_LEGACY_OPENMP "Enable legacy OpenMP usage" OFF) - option(MFEM_USE_MEMALLOC "Enable the internal MEMALLOC option." ON) - option(MFEM_USE_SUNDIALS "Enable SUNDIALS usage" OFF) - option(MFEM_USE_SUITESPARSE "Enable SuiteSparse usage" OFF) -diff --git a/config/defaults.mk b/config/defaults.mk -index ca5dc3c45..364627756 100644 ---- a/config/defaults.mk -+++ b/config/defaults.mk -@@ -127,7 +127,6 @@ MFEM_USE_LIBUNWIND = NO - MFEM_USE_LAPACK = NO - MFEM_THREAD_SAFE = NO - MFEM_USE_OPENMP = NO --MFEM_USE_LEGACY_OPENMP = NO - MFEM_USE_MEMALLOC = YES - MFEM_TIMER_TYPE = $(if $(NOTMAC),2,4) - MFEM_USE_SUNDIALS = NO -diff --git a/fem/CMakeLists.txt b/fem/CMakeLists.txt -index 462ef72aa..6da0cfea3 100644 ---- a/fem/CMakeLists.txt -+++ b/fem/CMakeLists.txt -@@ -13,28 +13,39 @@ set(SRCS - bilinearform.cpp - bilinearform_ext.cpp - bilininteg.cpp -- bilininteg_br2.cpp -- bilininteg_convection_mf.cpp -- bilininteg_convection_pa.cpp -- bilininteg_convection_ea.cpp -- bilininteg_dgtrace_pa.cpp -- bilininteg_dgtrace_ea.cpp -- bilininteg_diffusion_mf.cpp -- bilininteg_diffusion_pa.cpp -- bilininteg_diffusion_ea.cpp -- bilininteg_divergence.cpp -- bilininteg_hcurl.cpp -- bilininteg_hdiv.cpp -- bilininteg_vectorfe.cpp -- bilininteg_gradient.cpp -- bilininteg_mass_mf.cpp -- bilininteg_mass_pa.cpp -- bilininteg_mass_ea.cpp -- bilininteg_transpose_ea.cpp -- bilininteg_vecdiffusion.cpp -- bilininteg_vecdiffusion_mf.cpp -- bilininteg_vecmass.cpp -- bilininteg_vecmass_mf.cpp -+ integ/bilininteg_br2.cpp -+ integ/bilininteg_convection_mf.cpp -+ integ/bilininteg_convection_pa.cpp -+ integ/bilininteg_convection_ea.cpp -+ integ/bilininteg_curlcurl_pa.cpp -+ integ/bilininteg_dgtrace_pa.cpp -+ integ/bilininteg_dgtrace_ea.cpp -+ integ/bilininteg_diffusion_mf.cpp -+ integ/bilininteg_diffusion_pa.cpp -+ integ/bilininteg_diffusion_ea.cpp -+ integ/bilininteg_divdiv_pa.cpp -+ integ/bilininteg_gradient_pa.cpp -+ integ/bilininteg_interp_pa.cpp -+ integ/bilininteg_mass_mf.cpp -+ integ/bilininteg_mass_pa.cpp -+ integ/bilininteg_mass_ea.cpp -+ integ/bilininteg_mixedcurl_pa.cpp -+ integ/bilininteg_mixedvecgrad_pa.cpp -+ integ/bilininteg_transpose_ea.cpp -+ integ/bilininteg_vecdiffusion_mf.cpp -+ integ/bilininteg_vecdiffusion_pa.cpp -+ integ/bilininteg_vecdiv_pa.cpp -+ integ/bilininteg_vecmass_mf.cpp -+ integ/bilininteg_vecmass_pa.cpp -+ integ/bilininteg_vectorfediv_pa.cpp -+ integ/bilininteg_vectorfemass_pa.cpp -+ integ/lininteg_boundary.cpp -+ integ/lininteg_boundary_flux.cpp -+ integ/lininteg_domain.cpp -+ integ/lininteg_domain_grad.cpp -+ integ/lininteg_domain_vectorfe.cpp -+ integ/nonlininteg_vecconvection_pa.cpp -+ integ/nonlininteg_vecconvection_mf.cpp - coefficient.cpp - complex_fem.cpp - convergence.cpp -@@ -74,11 +85,6 @@ set(SRCS - linearform.cpp - linearform_ext.cpp - lininteg.cpp -- lininteg_boundary.cpp -- lininteg_boundary_flux.cpp -- lininteg_domain.cpp -- lininteg_domain_grad.cpp -- lininteg_vectorfe_domain.cpp - lor/lor.cpp - lor/lor_ads.cpp - lor/lor_ams.cpp -@@ -91,8 +97,6 @@ set(SRCS - nonlinearform_ext.cpp - nonlininteg.cpp - fespacehierarchy.cpp -- nonlininteg_vectorconvection.cpp -- nonlininteg_vectorconvection_mf.cpp - qfunction.cpp - qinterp/det.cpp - qinterp/eval_by_nodes.cpp -@@ -143,7 +147,11 @@ set(HDRS - bilinearform.hpp - bilinearform_ext.hpp - bilininteg.hpp -- bilininteg_mass_pa.hpp -+ integ/bilininteg_diffusion_kernels.hpp -+ integ/bilininteg_hcurl_kernels.hpp -+ integ/bilininteg_hdiv_kernels.hpp -+ integ/bilininteg_hcurlhdiv_kernels.hpp -+ integ/bilininteg_mass_kernels.hpp - coefficient.hpp - complex_fem.hpp - convergence.hpp -diff --git a/fem/bilinearform.cpp b/fem/bilinearform.cpp -index fad9717aa..a549d03a7 100644 ---- a/fem/bilinearform.cpp -+++ b/fem/bilinearform.cpp -@@ -18,84 +18,31 @@ - namespace mfem - { - --void BilinearForm::AllocMat() --{ -- if (static_cond) { return; } -- -- if (precompute_sparsity == 0 || fes->GetVDim() > 1) -- { -- mat = new SparseMatrix(height); -- return; -- } -- -- const Table &elem_dof = fes->GetElementToDofTable(); -- Table dof_dof; -- -- if (interior_face_integs.Size() > 0) -- { -- // the sparsity pattern is defined from the map: face->element->dof -- Table face_dof, dof_face; -- { -- Table *face_elem = fes->GetMesh()->GetFaceToElementTable(); -- mfem::Mult(*face_elem, elem_dof, face_dof); -- delete face_elem; -- } -- Transpose(face_dof, dof_face, height); -- mfem::Mult(dof_face, face_dof, dof_dof); -- } -- else -- { -- // the sparsity pattern is defined from the map: element->dof -- Table dof_elem; -- Transpose(elem_dof, dof_elem, height); -- mfem::Mult(dof_elem, elem_dof, dof_dof); -- } -- -- dof_dof.SortRows(); -- -- int *I = dof_dof.GetI(); -- int *J = dof_dof.GetJ(); -- double *data = Memory(I[height]); -- -- mat = new SparseMatrix(I, J, data, height, height, true, true, true); -- *mat = 0.0; -- -- dof_dof.LoseData(); --} -- --BilinearForm::BilinearForm(FiniteElementSpace * f) -- : Matrix (f->GetVSize()) -+BilinearForm::BilinearForm(FiniteElementSpace *f) -+ : Matrix(f->GetVSize()) - { - fes = f; - sequence = f->GetSequence(); - mat = mat_e = NULL; - extern_bfs = 0; -- element_matrices = NULL; - static_cond = NULL; - hybridization = NULL; -- precompute_sparsity = 0; - diag_policy = DIAG_KEEP; -- - assembly = AssemblyLevel::LEGACY; -- batch = 1; - ext = NULL; - } - --BilinearForm::BilinearForm (FiniteElementSpace * f, BilinearForm * bf, int ps) -- : Matrix (f->GetVSize()) -+BilinearForm::BilinearForm(FiniteElementSpace *f, BilinearForm *bf) -+ : Matrix(f->GetVSize()) - { - fes = f; - sequence = f->GetSequence(); -- mat_e = NULL; -+ mat = mat_e = NULL; - extern_bfs = 1; -- element_matrices = NULL; - static_cond = NULL; - hybridization = NULL; -- precompute_sparsity = ps; - diag_policy = DIAG_KEEP; -- - assembly = AssemblyLevel::LEGACY; -- batch = 1; - ext = NULL; - - // Copy the pointers to the integrators -@@ -108,8 +55,6 @@ BilinearForm::BilinearForm (FiniteElementSpace * f, BilinearForm * bf, int ps) - - boundary_face_integs = bf->boundary_face_integs; - boundary_face_integs_marker = bf->boundary_face_integs_marker; -- -- AllocMat(); - } - - void BilinearForm::SetAssemblyLevel(AssemblyLevel assembly_level) -@@ -124,7 +69,7 @@ void BilinearForm::SetAssemblyLevel(AssemblyLevel assembly_level) - case AssemblyLevel::LEGACY: - break; - case AssemblyLevel::FULL: -- SetDiagonalPolicy( DIAG_ONE ); // Only diagonal policy supported on device -+ SetDiagonalPolicy(DIAG_ONE); // Only diagonal policy supported on device - ext = new FABilinearFormExtension(this); - break; - case AssemblyLevel::ELEMENT: -@@ -181,48 +126,71 @@ void BilinearForm::EnableHybridization(FiniteElementSpace *constr_space, - hybridization->Init(ess_tdof_list); - } - --void BilinearForm::UseSparsity(int *I, int *J, bool isSorted) -+double &BilinearForm::Elem(int i, int j) - { -- if (static_cond) { return; } -+ return mat->Elem(i,j); -+} -+ -+const double &BilinearForm::Elem(int i, int j) const -+{ -+ return mat->Elem(i,j); -+} - -- if (mat) -+void BilinearForm::Mult(const Vector &x, Vector &y) const -+{ -+ if (ext) - { -- if (mat->Finalized() && mat->GetI() == I && mat->GetJ() == J) -- { -- return; // mat is already using the given sparsity -- } -- delete mat; -+ ext->Mult(x, y); -+ } -+ else -+ { -+ mat->Mult(x, y); - } -- height = width = fes->GetVSize(); -- mat = new SparseMatrix(I, J, NULL, height, width, false, true, isSorted); - } - --void BilinearForm::UseSparsity(SparseMatrix &A) -+void BilinearForm::AddMult(const Vector &x, Vector &y, const double a) const - { -- MFEM_ASSERT(A.Height() == fes->GetVSize() && A.Width() == fes->GetVSize(), -- "invalid matrix A dimensions: " -- << A.Height() << " x " << A.Width()); -- MFEM_ASSERT(A.Finalized(), "matrix A must be Finalized"); -- -- UseSparsity(A.GetI(), A.GetJ(), A.ColumnsAreSorted()); -+ if (ext) -+ { -+ ext->AddMult(x, y, a); -+ } -+ else -+ { -+ mat->AddMult(x, y, a); -+ } - } - --double& BilinearForm::Elem (int i, int j) -+void BilinearForm::MultTranspose(const Vector &x, Vector &y) const - { -- return mat -> Elem(i,j); -+ if (ext) -+ { -+ ext->MultTranspose(x, y); -+ } -+ else -+ { -+ mat->MultTranspose(x, y); -+ } - } - --const double& BilinearForm::Elem (int i, int j) const -+void BilinearForm::AddMultTranspose(const Vector &x, Vector &y, -+ const double a) const - { -- return mat -> Elem(i,j); -+ if (ext) -+ { -+ ext->AddMultTranspose(x, y, a); -+ } -+ else -+ { -+ mat->AddMultTranspose(x, y, a); -+ } - } - --MatrixInverse * BilinearForm::Inverse() const -+MatrixInverse *BilinearForm::Inverse() const - { -- return mat -> Inverse(); -+ return mat->Inverse(); - } - --void BilinearForm::Finalize (int skip_zeros) -+void BilinearForm::Finalize(int skip_zeros) - { - if (assembly == AssemblyLevel::LEGACY) - { -@@ -246,22 +214,22 @@ void BilinearForm::AddDomainIntegrator(BilinearFormIntegrator *bfi, - domain_integs_marker.Append(&elem_marker); - } - --void BilinearForm::AddBoundaryIntegrator (BilinearFormIntegrator * bfi) -+void BilinearForm::AddBoundaryIntegrator(BilinearFormIntegrator *bfi) - { -- boundary_integs.Append (bfi); -+ boundary_integs.Append(bfi); - boundary_integs_marker.Append(NULL); // NULL marker means apply everywhere - } - --void BilinearForm::AddBoundaryIntegrator (BilinearFormIntegrator * bfi, -- Array &bdr_marker) -+void BilinearForm::AddBoundaryIntegrator(BilinearFormIntegrator *bfi, -+ Array &bdr_marker) - { -- boundary_integs.Append (bfi); -+ boundary_integs.Append(bfi); - boundary_integs_marker.Append(&bdr_marker); - } - --void BilinearForm::AddInteriorFaceIntegrator(BilinearFormIntegrator * bfi) -+void BilinearForm::AddInteriorFaceIntegrator(BilinearFormIntegrator *bfi) - { -- interior_face_integs.Append (bfi); -+ interior_face_integs.Append(bfi); - } - - void BilinearForm::AddBdrFaceIntegrator(BilinearFormIntegrator *bfi) -@@ -278,55 +246,6 @@ void BilinearForm::AddBdrFaceIntegrator(BilinearFormIntegrator *bfi, - boundary_face_integs_marker.Append(&bdr_marker); - } - --void BilinearForm::ComputeElementMatrix(int i, DenseMatrix &elmat) --{ -- if (element_matrices) -- { -- elmat.SetSize(element_matrices->SizeI(), element_matrices->SizeJ()); -- elmat = element_matrices->GetData(i); -- return; -- } -- -- if (domain_integs.Size()) -- { -- const FiniteElement &fe = *fes->GetFE(i); -- ElementTransformation *eltrans = fes->GetElementTransformation(i); -- domain_integs[0]->AssembleElementMatrix(fe, *eltrans, elmat); -- for (int k = 1; k < domain_integs.Size(); k++) -- { -- domain_integs[k]->AssembleElementMatrix(fe, *eltrans, elemmat); -- elmat += elemmat; -- } -- } -- else -- { -- fes->GetElementVDofs(i, vdofs); -- elmat.SetSize(vdofs.Size()); -- elmat = 0.0; -- } --} -- --void BilinearForm::ComputeBdrElementMatrix(int i, DenseMatrix &elmat) --{ -- if (boundary_integs.Size()) -- { -- const FiniteElement &be = *fes->GetBE(i); -- ElementTransformation *eltrans = fes->GetBdrElementTransformation(i); -- boundary_integs[0]->AssembleElementMatrix(be, *eltrans, elmat); -- for (int k = 1; k < boundary_integs.Size(); k++) -- { -- boundary_integs[k]->AssembleElementMatrix(be, *eltrans, elemmat); -- elmat += elemmat; -- } -- } -- else -- { -- fes->GetBdrElementVDofs(i, vdofs); -- elmat.SetSize(vdofs.Size()); -- elmat = 0.0; -- } --} -- - void BilinearForm::AssembleElementMatrix( - int i, const DenseMatrix &elmat, int skip_zeros) - { -@@ -345,7 +264,7 @@ void BilinearForm::AssembleElementMatrix( - { - if (mat == NULL) - { -- AllocMat(); -+ mat = new SparseMatrix(height); - } - mat->AddSubMatrix(vdofs_, vdofs_, elmat, skip_zeros); - if (hybridization) -@@ -373,7 +292,7 @@ void BilinearForm::AssembleBdrElementMatrix( - { - if (mat == NULL) - { -- AllocMat(); -+ mat = new SparseMatrix(height); - } - mat->AddSubMatrix(vdofs_, vdofs_, elmat, skip_zeros); - if (hybridization) -@@ -392,23 +311,14 @@ void BilinearForm::Assemble(int skip_zeros) - } - - ElementTransformation *eltrans; -- DofTransformation * doftrans; -- Mesh *mesh = fes -> GetMesh(); -- DenseMatrix elmat, *elmat_p; -- -- if (mat == NULL) -- { -- AllocMat(); -- } -+ DofTransformation *doftrans; -+ Mesh *mesh = fes->GetMesh(); -+ DenseMatrix elmat; - --#ifdef MFEM_USE_LEGACY_OPENMP -- int free_element_matrices = 0; -- if (!element_matrices) -+ if (mat == NULL && !static_cond) - { -- ComputeElementMatrices(); -- free_element_matrices = 1; -+ mat = new SparseMatrix(height); - } --#endif - - if (domain_integs.Size()) - { -@@ -423,61 +333,49 @@ void BilinearForm::Assemble(int skip_zeros) - } - } - -- for (int i = 0; i < fes -> GetNE(); i++) -+ for (int i = 0; i < fes->GetNE(); i++) - { - int elem_attr = fes->GetMesh()->GetAttribute(i); - doftrans = fes->GetElementVDofs(i, vdofs); -- if (element_matrices) -- { -- elmat_p = &(*element_matrices)(i); -- } -- else -+ elmat.SetSize(0); -+ for (int k = 0; k < domain_integs.Size(); k++) - { -- elmat.SetSize(0); -- for (int k = 0; k < domain_integs.Size(); k++) -+ if ( domain_integs_marker[k] == NULL || -+ (*(domain_integs_marker[k]))[elem_attr-1] == 1) - { -- if ( domain_integs_marker[k] == NULL || -- (*(domain_integs_marker[k]))[elem_attr-1] == 1) -+ const FiniteElement &fe = *fes->GetFE(i); -+ eltrans = fes->GetElementTransformation(i); -+ domain_integs[k]->AssembleElementMatrix(fe, *eltrans, elemmat); -+ if (elmat.Size() == 0) - { -- const FiniteElement &fe = *fes->GetFE(i); -- eltrans = fes->GetElementTransformation(i); -- domain_integs[k]->AssembleElementMatrix(fe, *eltrans, elemmat); -- if (elmat.Size() == 0) -- { -- elmat = elemmat; -- } -- else -- { -- elmat += elemmat; -- } -+ elmat = elemmat; -+ } -+ else -+ { -+ elmat += elemmat; - } - } -- if (elmat.Size() == 0) -- { -- continue; -- } -- else -- { -- elmat_p = &elmat; -- } -- if (doftrans) -- { -- doftrans->TransformDual(elmat); -- } -- elmat_p = &elmat; - } -- if (static_cond) -+ if (elmat.Size() == 0) - { -- static_cond->AssembleMatrix(i, *elmat_p); -+ continue; - } -- else -+ if (doftrans) - { -- mat->AddSubMatrix(vdofs, vdofs, *elmat_p, skip_zeros); -+ doftrans->TransformDual(elmat); -+ } -+ if (!static_cond) -+ { -+ mat->AddSubMatrix(vdofs, vdofs, elmat, skip_zeros); - if (hybridization) - { -- hybridization->AssembleMatrix(i, *elmat_p); -+ hybridization->AssembleMatrix(i, elmat); - } - } -+ else -+ { -+ static_cond->AssembleMatrix(i, elmat); -+ } - } - } - -@@ -504,14 +402,14 @@ void BilinearForm::Assemble(int skip_zeros) - } - } - -- for (int i = 0; i < fes -> GetNBE(); i++) -+ for (int i = 0; i < fes->GetNBE(); i++) - { - const int bdr_attr = mesh->GetBdrAttribute(i); - if (bdr_attr_marker[bdr_attr-1] == 0) { continue; } - - const FiniteElement &be = *fes->GetBE(i); -- doftrans = fes -> GetBdrElementVDofs (i, vdofs); -- eltrans = fes -> GetBdrElementTransformation (i); -+ doftrans = fes->GetBdrElementVDofs(i, vdofs); -+ eltrans = fes->GetBdrElementTransformation(i); - int k = 0; - for (; k < boundary_integs.Size(); k++) - { -@@ -534,18 +432,17 @@ void BilinearForm::Assemble(int skip_zeros) - { - doftrans->TransformDual(elmat); - } -- elmat_p = &elmat; - if (!static_cond) - { -- mat->AddSubMatrix(vdofs, vdofs, *elmat_p, skip_zeros); -+ mat->AddSubMatrix(vdofs, vdofs, elmat, skip_zeros); - if (hybridization) - { -- hybridization->AssembleBdrMatrix(i, *elmat_p); -+ hybridization->AssembleBdrMatrix(i, elmat); - } - } - else - { -- static_cond->AssembleBdrMatrix(i, *elmat_p); -+ static_cond->AssembleBdrMatrix(i, elmat); - } - } - } -@@ -558,19 +455,18 @@ void BilinearForm::Assemble(int skip_zeros) - int nfaces = mesh->GetNumFaces(); - for (int i = 0; i < nfaces; i++) - { -- tr = mesh -> GetInteriorFaceTransformations (i); -+ tr = mesh->GetInteriorFaceTransformations(i); - if (tr != NULL) - { -- fes -> GetElementVDofs (tr -> Elem1No, vdofs); -- fes -> GetElementVDofs (tr -> Elem2No, vdofs2); -- vdofs.Append (vdofs2); -+ fes->GetElementVDofs(tr->Elem1No, vdofs); -+ fes->GetElementVDofs(tr->Elem2No, vdofs2); -+ vdofs.Append(vdofs2); - for (int k = 0; k < interior_face_integs.Size(); k++) - { -- interior_face_integs[k]-> -- AssembleFaceMatrix(*fes->GetFE(tr->Elem1No), -- *fes->GetFE(tr->Elem2No), -- *tr, elemmat); -- mat -> AddSubMatrix (vdofs, vdofs, elemmat, skip_zeros); -+ interior_face_integs[k]->AssembleFaceMatrix(*fes->GetFE(tr->Elem1No), -+ *fes->GetFE(tr->Elem2No), -+ *tr, elemmat); -+ mat->AddSubMatrix(vdofs, vdofs, elemmat, skip_zeros); - } - } - } -@@ -602,16 +498,16 @@ void BilinearForm::Assemble(int skip_zeros) - } - } - -- for (int i = 0; i < fes -> GetNBE(); i++) -+ for (int i = 0; i < fes->GetNBE(); i++) - { - const int bdr_attr = mesh->GetBdrAttribute(i); - if (bdr_attr_marker[bdr_attr-1] == 0) { continue; } - -- tr = mesh -> GetBdrFaceTransformations (i); -+ tr = mesh->GetBdrFaceTransformations(i); - if (tr != NULL) - { -- fes -> GetElementVDofs (tr -> Elem1No, vdofs); -- fe1 = fes -> GetFE (tr -> Elem1No); -+ fes->GetElementVDofs(tr->Elem1No, vdofs); -+ fe1 = fes->GetFE(tr->Elem1No); - // The fe2 object is really a dummy and not used on the boundaries, - // but we can't dereference a NULL pointer, and we don't want to - // actually make a fake element. -@@ -622,20 +518,13 @@ void BilinearForm::Assemble(int skip_zeros) - (*boundary_face_integs_marker[k])[bdr_attr-1] == 0) - { continue; } - -- boundary_face_integs[k] -> AssembleFaceMatrix (*fe1, *fe2, *tr, -- elemmat); -- mat -> AddSubMatrix (vdofs, vdofs, elemmat, skip_zeros); -+ boundary_face_integs[k]->AssembleFaceMatrix(*fe1, *fe2, *tr, -+ elemmat); -+ mat->AddSubMatrix(vdofs, vdofs, elemmat, skip_zeros); - } - } - } - } -- --#ifdef MFEM_USE_LEGACY_OPENMP -- if (free_element_matrices) -- { -- FreeElementMatrices(); -- } --#endif - } - - void BilinearForm::ConformingAssemble() -@@ -644,8 +533,9 @@ void BilinearForm::ConformingAssemble() - // matrix which in turn will give rise to symmetric structure in the new - // matrix. This ensures that subsequent calls to EliminateRowCol will work - // correctly. -- Finalize(0); - MFEM_ASSERT(mat, "the BilinearForm is not assembled"); -+ const int remove_zeros = 0; -+ Finalize(remove_zeros); - - const SparseMatrix *P = fes->GetConformingProlongation(); - if (!P) { return; } // conforming mesh -@@ -693,7 +583,6 @@ void BilinearForm::AssembleDiagonal(Vector &diag) const - return; - } - // Here, we have extension, ext, and conforming prolongation, cP. -- - // For an AMR mesh, a convergent diagonal is assembled with |P^T| d_l, - // where |P^T| has the entry-wise absolute values of the conforming - // prolongation transpose operator. -@@ -708,12 +597,26 @@ void BilinearForm::FormLinearSystem(const Array &ess_tdof_list, Vector &x, - { - if (ext) - { -- ext->FormLinearSystem(ess_tdof_list, x, b, A, X, B, copy_interior); -+ Operator *oper; -+ ext->FormLinearSystem(ess_tdof_list, x, b, oper, X, B, copy_interior); -+ if (assembly == AssemblyLevel::FULL) -+ { -+ delete oper; -+ FormSystemMatrix(ess_tdof_list, A); -+ } -+ else -+ { -+ A.Reset(oper); -+ } - return; - } -- const SparseMatrix *P = fes->GetConformingProlongation(); -+ -+ // Finish the matrix assembly and perform BC elimination, storing the -+ // eliminated part of the matrix. - FormSystemMatrix(ess_tdof_list, A); - -+ const SparseMatrix *P = fes->GetConformingProlongation(); -+ - // Transform the system and perform the elimination in B, based on the - // essential BC values from x. Restrict the BC part of x in X, and set the - // non-BC part to zero. Since there is no good initial guess for the Lagrange -@@ -776,7 +679,23 @@ void BilinearForm::FormSystemMatrix(const Array &ess_tdof_list, - { - if (ext) - { -- ext->FormSystemMatrix(ess_tdof_list, A); -+ if (assembly == AssemblyLevel::FULL) -+ { -+ // Always does `DIAG_ONE` policy to be consistent with -+ // `Operator::FormConstrainedSystemOperator`. -+ MFEM_VERIFY(diag_policy == DiagonalPolicy::DIAG_ONE, -+ "Only DiagonalPolicy::DIAG_ONE supported with" -+ " FABilinearFormExtension."); -+ ConformingAssemble(); -+ mat->EliminateBC(ess_tdof_list, DiagonalPolicy::DIAG_ONE); -+ A.Reset(mat, false); -+ } -+ else -+ { -+ Operator *oper; -+ ext->FormSystemOperator(ess_tdof_list, oper); -+ A.Reset(oper); -+ } - return; - } - -@@ -797,8 +716,7 @@ void BilinearForm::FormSystemMatrix(const Array &ess_tdof_list, - { - if (!mat_e) - { -- const SparseMatrix *P = fes->GetConformingProlongation(); -- if (P) { ConformingAssemble(); } -+ ConformingAssemble(); - EliminateVDofs(ess_tdof_list, diag_policy); - const int remove_zeros = 0; - Finalize(remove_zeros); -@@ -872,48 +790,6 @@ void BilinearForm::RecoverFEMSolution(const Vector &X, - } - } - --void BilinearForm::ComputeElementMatrices() --{ -- if (element_matrices || domain_integs.Size() == 0 || fes->GetNE() == 0) -- { -- return; -- } -- -- int num_elements = fes->GetNE(); -- int num_dofs_per_el = fes->GetFE(0)->GetDof() * fes->GetVDim(); -- -- element_matrices = new DenseTensor(num_dofs_per_el, num_dofs_per_el, -- num_elements); -- -- DenseMatrix tmp; -- IsoparametricTransformation eltrans; -- --#ifdef MFEM_USE_LEGACY_OPENMP -- #pragma omp parallel for private(tmp,eltrans) --#endif -- for (int i = 0; i < num_elements; i++) -- { -- DenseMatrix elmat(element_matrices->GetData(i), -- num_dofs_per_el, num_dofs_per_el); -- const FiniteElement &fe = *fes->GetFE(i); --#ifdef MFEM_DEBUG -- if (num_dofs_per_el != fe.GetDof()*fes->GetVDim()) -- mfem_error("BilinearForm::ComputeElementMatrices:" -- " all elements must have same number of dofs"); --#endif -- fes->GetElementTransformation(i, &eltrans); -- -- domain_integs[0]->AssembleElementMatrix(fe, eltrans, elmat); -- for (int k = 1; k < domain_integs.Size(); k++) -- { -- // note: some integrators may not be thread-safe -- domain_integs[k]->AssembleElementMatrix(fe, eltrans, tmp); -- elmat += tmp; -- } -- elmat.ClearExternalData(); -- } --} -- - void BilinearForm::EliminateEssentialBC(const Array &bdr_attr_is_ess, - const Vector &sol, Vector &rhs, - DiagonalPolicy dpolicy) -@@ -949,8 +825,8 @@ void BilinearForm::EliminateEssentialBC(const Array &bdr_attr_is_ess, - } - } - --void BilinearForm::EliminateEssentialBCDiag (const Array &bdr_attr_is_ess, -- double value) -+void BilinearForm::EliminateEssentialBCDiag(const Array &bdr_attr_is_ess, -+ double value) - { - Array ess_dofs, conf_ess_dofs; - fes->GetEssentialVDofs(bdr_attr_is_ess, ess_dofs); -@@ -976,11 +852,11 @@ void BilinearForm::EliminateVDofs(const Array &vdofs_, - int vdof = vdofs_[i]; - if ( vdof >= 0 ) - { -- mat -> EliminateRowCol (vdof, sol(vdof), rhs, dpolicy); -+ mat->EliminateRowCol(vdof, sol(vdof), rhs, dpolicy); - } - else - { -- mat -> EliminateRowCol (-1-vdof, sol(-1-vdof), rhs, dpolicy); -+ mat->EliminateRowCol(-1-vdof, sol(-1-vdof), rhs, dpolicy); - } - } - } -@@ -999,11 +875,11 @@ void BilinearForm::EliminateVDofs(const Array &vdofs_, - int vdof = vdofs_[i]; - if ( vdof >= 0 ) - { -- mat -> EliminateRowCol (vdof, *mat_e, dpolicy); -+ mat->EliminateRowCol(vdof, *mat_e, dpolicy); - } - else - { -- mat -> EliminateRowCol (-1-vdof, *mat_e, dpolicy); -+ mat->EliminateRowCol(-1-vdof, *mat_e, dpolicy); - } - } - } -@@ -1019,31 +895,31 @@ void BilinearForm::EliminateEssentialBCFromDofs( - for (int i = 0; i < ess_dofs.Size(); i++) - if (ess_dofs[i] < 0) - { -- mat -> EliminateRowCol (i, sol(i), rhs, dpolicy); -+ mat->EliminateRowCol(i, sol(i), rhs, dpolicy); - } - } - --void BilinearForm::EliminateEssentialBCFromDofs (const Array &ess_dofs, -- DiagonalPolicy dpolicy) -+void BilinearForm::EliminateEssentialBCFromDofs(const Array &ess_dofs, -+ DiagonalPolicy dpolicy) - { - MFEM_ASSERT(ess_dofs.Size() == height, "incorrect dof Array size"); - - for (int i = 0; i < ess_dofs.Size(); i++) - if (ess_dofs[i] < 0) - { -- mat -> EliminateRowCol (i, dpolicy); -+ mat->EliminateRowCol(i, dpolicy); - } - } - --void BilinearForm::EliminateEssentialBCFromDofsDiag (const Array &ess_dofs, -- double value) -+void BilinearForm::EliminateEssentialBCFromDofsDiag(const Array &ess_dofs, -+ double value) - { - MFEM_ASSERT(ess_dofs.Size() == height, "incorrect dof Array size"); - - for (int i = 0; i < ess_dofs.Size(); i++) - if (ess_dofs[i] < 0) - { -- mat -> EliminateRowColDiag (i, value); -+ mat->EliminateRowColDiag(i, value); - } - } - -@@ -1054,31 +930,6 @@ void BilinearForm::EliminateVDofsInRHS( - mat->PartMult(vdofs_, x, b); - } - --void BilinearForm::Mult(const Vector &x, Vector &y) const --{ -- if (ext) -- { -- ext->Mult(x, y); -- } -- else -- { -- mat->Mult(x, y); -- } --} -- --void BilinearForm::MultTranspose(const Vector & x, Vector & y) const --{ -- if (ext) -- { -- ext->MultTranspose(x, y); -- } -- else -- { -- y = 0.0; -- AddMultTranspose (x, y); -- } --} -- - void BilinearForm::Update(FiniteElementSpace *nfes) - { - bool full_update; -@@ -1098,7 +949,6 @@ void BilinearForm::Update(FiniteElementSpace *nfes) - - delete mat_e; - mat_e = NULL; -- FreeElementMatrices(); - delete static_cond; - static_cond = NULL; - -@@ -1121,16 +971,10 @@ void BilinearForm::Update(FiniteElementSpace *nfes) - if (ext) { ext->Update(); } - } - --void BilinearForm::SetDiagonalPolicy(DiagonalPolicy policy) --{ -- diag_policy = policy; --} -- - BilinearForm::~BilinearForm() - { - delete mat_e; - delete mat; -- delete element_matrices; - delete static_cond; - delete hybridization; - -@@ -1148,9 +992,8 @@ BilinearForm::~BilinearForm() - delete ext; - } - -- --MixedBilinearForm::MixedBilinearForm (FiniteElementSpace *tr_fes, -- FiniteElementSpace *te_fes) -+MixedBilinearForm::MixedBilinearForm(FiniteElementSpace *tr_fes, -+ FiniteElementSpace *te_fes) - : Matrix(te_fes->GetVSize(), tr_fes->GetVSize()) - { - trial_fes = tr_fes; -@@ -1162,9 +1005,9 @@ MixedBilinearForm::MixedBilinearForm (FiniteElementSpace *tr_fes, - ext = NULL; - } - --MixedBilinearForm::MixedBilinearForm (FiniteElementSpace *tr_fes, -- FiniteElementSpace *te_fes, -- MixedBilinearForm * mbf) -+MixedBilinearForm::MixedBilinearForm(FiniteElementSpace *tr_fes, -+ FiniteElementSpace *te_fes, -+ MixedBilinearForm *mbf) - : Matrix(te_fes->GetVSize(), tr_fes->GetVSize()) - { - trial_fes = tr_fes; -@@ -1173,6 +1016,8 @@ MixedBilinearForm::MixedBilinearForm (FiniteElementSpace *tr_fes, - mat_e = NULL; - extern_bfs = 1; - ext = NULL; -+ assembly = AssemblyLevel::LEGACY; -+ ext = NULL; - - // Copy the pointers to the integrators - domain_integs = mbf->domain_integs; -@@ -1182,9 +1027,6 @@ MixedBilinearForm::MixedBilinearForm (FiniteElementSpace *tr_fes, - - boundary_integs_marker = mbf->boundary_integs_marker; - boundary_trace_face_integs_marker = mbf->boundary_trace_face_integs_marker; -- -- assembly = AssemblyLevel::LEGACY; -- ext = NULL; - } - - void MixedBilinearForm::SetAssemblyLevel(AssemblyLevel assembly_level) -@@ -1210,31 +1052,36 @@ void MixedBilinearForm::SetAssemblyLevel(AssemblyLevel assembly_level) - ext = new PAMixedBilinearFormExtension(this); - break; - case AssemblyLevel::NONE: -- mfem_error("Matrix-free action not supported yet... stay tuned!"); -- // ext = new MFMixedBilinearFormExtension(this); -+ ext = new MFMixedBilinearFormExtension(this); - break; - default: - mfem_error("Unknown assembly level"); - } - } - --double & MixedBilinearForm::Elem (int i, int j) -+double &MixedBilinearForm::Elem(int i, int j) - { - return (*mat)(i, j); - } - --const double & MixedBilinearForm::Elem (int i, int j) const -+const double &MixedBilinearForm::Elem(int i, int j) const - { - return (*mat)(i, j); - } - --void MixedBilinearForm::Mult(const Vector & x, Vector & y) const -+void MixedBilinearForm::Mult(const Vector &x, Vector &y) const - { -- y = 0.0; -- AddMult(x, y); -+ if (ext) -+ { -+ ext->Mult(x, y); -+ } -+ else -+ { -+ mat->Mult(x, y); -+ } - } - --void MixedBilinearForm::AddMult(const Vector & x, Vector & y, -+void MixedBilinearForm::AddMult(const Vector &x, Vector &y, - const double a) const - { - if (ext) -@@ -1247,13 +1094,19 @@ void MixedBilinearForm::AddMult(const Vector & x, Vector & y, - } - } - --void MixedBilinearForm::MultTranspose(const Vector & x, Vector & y) const -+void MixedBilinearForm::MultTranspose(const Vector &x, Vector &y) const - { -- y = 0.0; -- AddMultTranspose(x, y); -+ if (ext) -+ { -+ ext->MultTranspose(x, y); -+ } -+ else -+ { -+ mat->MultTranspose(x, y); -+ } - } - --void MixedBilinearForm::AddMultTranspose(const Vector & x, Vector & y, -+void MixedBilinearForm::AddMultTranspose(const Vector &x, Vector &y, - const double a) const - { - if (ext) -@@ -1266,7 +1119,7 @@ void MixedBilinearForm::AddMultTranspose(const Vector & x, Vector & y, - } - } - --MatrixInverse * MixedBilinearForm::Inverse() const -+MatrixInverse *MixedBilinearForm::Inverse() const - { - if (assembly != AssemblyLevel::LEGACY) - { -@@ -1276,15 +1129,15 @@ MatrixInverse * MixedBilinearForm::Inverse() const - } - else - { -- return mat -> Inverse (); -+ return mat->Inverse(); - } - } - --void MixedBilinearForm::Finalize (int skip_zeros) -+void MixedBilinearForm::Finalize(int skip_zeros) - { - if (assembly == AssemblyLevel::LEGACY) - { -- mat -> Finalize (skip_zeros); -+ mat->Finalize(skip_zeros); - } - } - -@@ -1300,27 +1153,27 @@ void MixedBilinearForm::GetBlocks(Array2D &blocks) const - mat->GetBlocks(blocks); - } - --void MixedBilinearForm::AddDomainIntegrator (BilinearFormIntegrator * bfi) -+void MixedBilinearForm::AddDomainIntegrator(BilinearFormIntegrator *bfi) - { -- domain_integs.Append (bfi); -+ domain_integs.Append(bfi); - } - --void MixedBilinearForm::AddBoundaryIntegrator (BilinearFormIntegrator * bfi) -+void MixedBilinearForm::AddBoundaryIntegrator(BilinearFormIntegrator *bfi) - { -- boundary_integs.Append (bfi); -+ boundary_integs.Append(bfi); - boundary_integs_marker.Append(NULL); // NULL marker means apply everywhere - } - --void MixedBilinearForm::AddBoundaryIntegrator (BilinearFormIntegrator * bfi, -- Array &bdr_marker) -+void MixedBilinearForm::AddBoundaryIntegrator(BilinearFormIntegrator *bfi, -+ Array &bdr_marker) - { -- boundary_integs.Append (bfi); -+ boundary_integs.Append(bfi); - boundary_integs_marker.Append(&bdr_marker); - } - --void MixedBilinearForm::AddTraceFaceIntegrator (BilinearFormIntegrator * bfi) -+void MixedBilinearForm::AddTraceFaceIntegrator(BilinearFormIntegrator *bfi) - { -- trace_face_integs.Append (bfi); -+ trace_face_integs.Append(bfi); - } - - void MixedBilinearForm::AddBdrTraceFaceIntegrator(BilinearFormIntegrator *bfi) -@@ -1337,7 +1190,45 @@ void MixedBilinearForm::AddBdrTraceFaceIntegrator(BilinearFormIntegrator *bfi, - boundary_trace_face_integs_marker.Append(&bdr_marker); - } - --void MixedBilinearForm::Assemble (int skip_zeros) -+void MixedBilinearForm::AssembleElementMatrix( -+ int i, const DenseMatrix &elmat, int skip_zeros) -+{ -+ AssembleElementMatrix(i, elmat, trial_vdofs, test_vdofs, skip_zeros); -+} -+ -+void MixedBilinearForm::AssembleElementMatrix( -+ int i, const DenseMatrix &elmat, Array &trial_vdofs_, -+ Array &test_vdofs_, int skip_zeros) -+{ -+ trial_fes->GetElementVDofs(i, trial_vdofs_); -+ test_fes->GetElementVDofs(i, test_vdofs_); -+ if (mat == NULL) -+ { -+ mat = new SparseMatrix(height, width); -+ } -+ mat->AddSubMatrix(test_vdofs_, trial_vdofs_, elmat, skip_zeros); -+} -+ -+void MixedBilinearForm::AssembleBdrElementMatrix( -+ int i, const DenseMatrix &elmat, int skip_zeros) -+{ -+ AssembleBdrElementMatrix(i, elmat, trial_vdofs, test_vdofs, skip_zeros); -+} -+ -+void MixedBilinearForm::AssembleBdrElementMatrix( -+ int i, const DenseMatrix &elmat, Array &trial_vdofs_, -+ Array &test_vdofs_, int skip_zeros) -+{ -+ trial_fes->GetBdrElementVDofs(i, trial_vdofs_); -+ test_fes->GetBdrElementVDofs(i, test_vdofs_); -+ if (mat == NULL) -+ { -+ mat = new SparseMatrix(height, width); -+ } -+ mat->AddSubMatrix(test_vdofs_, trial_vdofs_, elmat, skip_zeros); -+} -+ -+void MixedBilinearForm::Assemble(int skip_zeros) - { - if (ext) - { -@@ -1346,12 +1237,11 @@ void MixedBilinearForm::Assemble (int skip_zeros) - } - - ElementTransformation *eltrans; -- DofTransformation * dom_dof_trans; -- DofTransformation * ran_dof_trans; -+ DofTransformation *dom_dof_trans; -+ DofTransformation *ran_dof_trans; -+ Mesh *mesh = test_fes->GetMesh(); - DenseMatrix elmat; - -- Mesh *mesh = test_fes -> GetMesh(); -- - if (mat == NULL) - { - mat = new SparseMatrix(height, width); -@@ -1359,26 +1249,26 @@ void MixedBilinearForm::Assemble (int skip_zeros) - - if (domain_integs.Size()) - { -- for (int i = 0; i < test_fes -> GetNE(); i++) -+ for (int i = 0; i < test_fes->GetNE(); i++) - { -- dom_dof_trans = trial_fes -> GetElementVDofs (i, trial_vdofs); -- ran_dof_trans = test_fes -> GetElementVDofs (i, test_vdofs); -- eltrans = test_fes -> GetElementTransformation (i); -+ dom_dof_trans = trial_fes->GetElementVDofs(i, trial_vdofs); -+ ran_dof_trans = test_fes->GetElementVDofs(i, test_vdofs); -+ eltrans = test_fes->GetElementTransformation(i); - - elmat.SetSize(test_vdofs.Size(), trial_vdofs.Size()); - elmat = 0.0; - for (int k = 0; k < domain_integs.Size(); k++) - { -- domain_integs[k] -> AssembleElementMatrix2 (*trial_fes -> GetFE(i), -- *test_fes -> GetFE(i), -- *eltrans, elemmat); -+ domain_integs[k]->AssembleElementMatrix2(*trial_fes->GetFE(i), -+ *test_fes->GetFE(i), -+ *eltrans, elemmat); - elmat += elemmat; - } - if (ran_dof_trans || dom_dof_trans) - { - TransformDual(ran_dof_trans, dom_dof_trans, elmat); - } -- mat -> AddSubMatrix (test_vdofs, trial_vdofs, elmat, skip_zeros); -+ mat->AddSubMatrix(test_vdofs, trial_vdofs, elmat, skip_zeros); - } - } - -@@ -1405,14 +1295,14 @@ void MixedBilinearForm::Assemble (int skip_zeros) - } - } - -- for (int i = 0; i < test_fes -> GetNBE(); i++) -+ for (int i = 0; i < test_fes->GetNBE(); i++) - { - const int bdr_attr = mesh->GetBdrAttribute(i); - if (bdr_attr_marker[bdr_attr-1] == 0) { continue; } - -- dom_dof_trans = trial_fes -> GetBdrElementVDofs (i, trial_vdofs); -- ran_dof_trans = test_fes -> GetBdrElementVDofs (i, test_vdofs); -- eltrans = test_fes -> GetBdrElementTransformation (i); -+ dom_dof_trans = trial_fes->GetBdrElementVDofs(i, trial_vdofs); -+ ran_dof_trans = test_fes->GetBdrElementVDofs(i, test_vdofs); -+ eltrans = test_fes->GetBdrElementTransformation(i); - - elmat.SetSize(test_vdofs.Size(), trial_vdofs.Size()); - elmat = 0.0; -@@ -1421,16 +1311,16 @@ void MixedBilinearForm::Assemble (int skip_zeros) - if (boundary_integs_marker[k] && - (*boundary_integs_marker[k])[bdr_attr-1] == 0) { continue; } - -- boundary_integs[k]->AssembleElementMatrix2 (*trial_fes -> GetBE(i), -- *test_fes -> GetBE(i), -- *eltrans, elemmat); -+ boundary_integs[k]->AssembleElementMatrix2(*trial_fes->GetBE(i), -+ *test_fes->GetBE(i), -+ *eltrans, elemmat); - elmat += elemmat; - } - if (ran_dof_trans || dom_dof_trans) - { - TransformDual(ran_dof_trans, dom_dof_trans, elmat); - } -- mat -> AddSubMatrix (test_vdofs, trial_vdofs, elmat, skip_zeros); -+ mat->AddSubMatrix(test_vdofs, trial_vdofs, elmat, skip_zeros); - } - } - -@@ -1497,7 +1387,7 @@ void MixedBilinearForm::Assemble (int skip_zeros) - } - } - -- for (int i = 0; i < trial_fes -> GetNBE(); i++) -+ for (int i = 0; i < trial_fes->GetNBE(); i++) - { - const int bdr_attr = mesh->GetBdrAttribute(i); - if (bdr_attr_marker[bdr_attr-1] == 0) { continue; } -@@ -1530,6 +1420,37 @@ void MixedBilinearForm::Assemble (int skip_zeros) - } - } - -+void MixedBilinearForm::ConformingAssemble() -+{ -+ if (assembly != AssemblyLevel::LEGACY) -+ { -+ MFEM_WARNING("Conforming assemble not supported for this assembly level!"); -+ return; -+ } -+ -+ const int remove_zeros = 0; -+ Finalize(remove_zeros); -+ -+ const SparseMatrix *test_P = test_fes->GetConformingProlongation(); -+ if (test_P) -+ { -+ SparseMatrix *RA = mfem::TransposeMult(*test_P, *mat); -+ delete mat; -+ mat = RA; -+ } -+ -+ const SparseMatrix *trial_P = trial_fes->GetConformingProlongation(); -+ if (trial_P) -+ { -+ SparseMatrix *RAP = mfem::Mult(*mat, *trial_P); -+ delete mat; -+ mat = RAP; -+ } -+ -+ height = mat->Height(); -+ width = mat->Width(); -+} -+ - void MixedBilinearForm::AssembleDiagonal_ADAt(const Vector &D, - Vector &diag) const - { -@@ -1578,259 +1499,119 @@ void MixedBilinearForm::AssembleDiagonal_ADAt(const Vector &D, - } - } - --void MixedBilinearForm::ConformingAssemble() -+void MixedBilinearForm::FormRectangularLinearSystem( -+ const Array &trial_tdof_list, -+ const Array &test_tdof_list, -+ Vector &x, Vector &b, -+ OperatorHandle &A, -+ Vector &X, Vector &B) - { -- if (assembly != AssemblyLevel::LEGACY) -+ if (ext) - { -- MFEM_WARNING("Conforming assemble not supported for this assembly level!"); -+ Operator *oper; -+ ext->FormRectangularLinearSystem(trial_tdof_list, test_tdof_list, -+ x, b, oper, X, B); -+ A.Reset(oper); - return; - } - -- Finalize(); -- -- const SparseMatrix *P2 = test_fes->GetConformingProlongation(); -- if (P2) -- { -- SparseMatrix *R = Transpose(*P2); -- SparseMatrix *RA = mfem::Mult(*R, *mat); -- delete R; -- delete mat; -- mat = RA; -- } -+ const Operator *Pi = this->GetProlongation(); -+ const Operator *Po = this->GetOutputProlongation(); -+ const Operator *Ri = this->GetRestriction(); -+ InitTVectors(Po, Ri, Pi, x, b, X, B); - -- const SparseMatrix *P1 = trial_fes->GetConformingProlongation(); -- if (P1) -+ if (!mat_e) - { -- SparseMatrix *RAP = mfem::Mult(*mat, *P1); -- delete mat; -- mat = RAP; -+ // Set A = mat_e -+ FormRectangularSystemMatrix(trial_tdof_list, test_tdof_list, A); - } -+ // Eliminate essential BCs with B -= Ab xb -+ mat_e->AddMult(X, B, -1.0); - -- height = mat->Height(); -- width = mat->Width(); --} -- -- --void MixedBilinearForm::ComputeElementMatrix(int i, DenseMatrix &elmat) --{ -- if (domain_integs.Size()) -- { -- const FiniteElement &trial_fe = *trial_fes->GetFE(i); -- const FiniteElement &test_fe = *test_fes->GetFE(i); -- ElementTransformation *eltrans = test_fes->GetElementTransformation(i); -- domain_integs[0]->AssembleElementMatrix2(trial_fe, test_fe, *eltrans, -- elmat); -- for (int k = 1; k < domain_integs.Size(); k++) -- { -- domain_integs[k]->AssembleElementMatrix2(trial_fe, test_fe, *eltrans, -- elemmat); -- elmat += elemmat; -- } -- } -- else -- { -- trial_fes->GetElementVDofs(i, trial_vdofs); -- test_fes->GetElementVDofs(i, test_vdofs); -- elmat.SetSize(test_vdofs.Size(), trial_vdofs.Size()); -- elmat = 0.0; -- } -+ B.SetSubVector(test_tdof_list, 0.0); - } - --void MixedBilinearForm::ComputeBdrElementMatrix(int i, DenseMatrix &elmat) -+void MixedBilinearForm::FormRectangularSystemMatrix( -+ const Array &trial_tdof_list, -+ const Array &test_tdof_list, -+ OperatorHandle &A) - { -- if (boundary_integs.Size()) -- { -- const FiniteElement &trial_be = *trial_fes->GetBE(i); -- const FiniteElement &test_be = *test_fes->GetBE(i); -- ElementTransformation *eltrans = test_fes->GetBdrElementTransformation(i); -- boundary_integs[0]->AssembleElementMatrix2(trial_be, test_be, *eltrans, -- elmat); -- for (int k = 1; k < boundary_integs.Size(); k++) -- { -- boundary_integs[k]->AssembleElementMatrix2(trial_be, test_be, *eltrans, -- elemmat); -- elmat += elemmat; -- } -- } -- else -+ if (ext) - { -- trial_fes->GetBdrElementVDofs(i, trial_vdofs); -- test_fes->GetBdrElementVDofs(i, test_vdofs); -- elmat.SetSize(test_vdofs.Size(), trial_vdofs.Size()); -- elmat = 0.0; -+ Operator *oper; -+ ext->FormRectangularSystemOperator(trial_tdof_list, test_tdof_list, oper); -+ A.Reset(oper); -+ return; - } --} - --void MixedBilinearForm::AssembleElementMatrix( -- int i, const DenseMatrix &elmat, int skip_zeros) --{ -- AssembleElementMatrix(i, elmat, trial_vdofs, test_vdofs, skip_zeros); --} -+ ConformingAssemble(); - --void MixedBilinearForm::AssembleElementMatrix( -- int i, const DenseMatrix &elmat, Array &trial_vdofs_, -- Array &test_vdofs_, int skip_zeros) --{ -- trial_fes->GetElementVDofs(i, trial_vdofs_); -- test_fes->GetElementVDofs(i, test_vdofs_); -- if (mat == NULL) -- { -- mat = new SparseMatrix(height, width); -- } -- mat->AddSubMatrix(test_vdofs_, trial_vdofs_, elmat, skip_zeros); --} -+ Array ess_trial_tdof_marker, ess_test_tdof_marker; -+ FiniteElementSpace::ListToMarker(trial_tdof_list, trial_fes->GetTrueVSize(), -+ ess_trial_tdof_marker); -+ FiniteElementSpace::ListToMarker(test_tdof_list, test_fes->GetTrueVSize(), -+ ess_test_tdof_marker); - --void MixedBilinearForm::AssembleBdrElementMatrix( -- int i, const DenseMatrix &elmat, int skip_zeros) --{ -- AssembleBdrElementMatrix(i, elmat, trial_vdofs, test_vdofs, skip_zeros); --} -+ mat_e = new SparseMatrix(mat->Height(), mat->Width()); -+ mat->EliminateCols(ess_trial_tdof_marker, *mat_e); - --void MixedBilinearForm::AssembleBdrElementMatrix( -- int i, const DenseMatrix &elmat, Array &trial_vdofs_, -- Array &test_vdofs_, int skip_zeros) --{ -- trial_fes->GetBdrElementVDofs(i, trial_vdofs_); -- test_fes->GetBdrElementVDofs(i, test_vdofs_); -- if (mat == NULL) -+ for (int i = 0; i < test_tdof_list.Size(); i++) - { -- mat = new SparseMatrix(height, width); -+ mat->EliminateRow(test_tdof_list[i]); - } -- mat->AddSubMatrix(test_vdofs_, trial_vdofs_, elmat, skip_zeros); -+ mat_e->Finalize(); -+ A.Reset(mat, false); - } - --void MixedBilinearForm::EliminateTrialDofs ( -+void MixedBilinearForm::EliminateTrialDofs( - const Array &bdr_attr_is_ess, const Vector &sol, Vector &rhs ) - { - int i, j, k; -- Array tr_vdofs, cols_marker (trial_fes -> GetVSize()); -+ Array tr_vdofs, cols_marker(trial_fes->GetVSize()); - - cols_marker = 0; -- for (i = 0; i < trial_fes -> GetNBE(); i++) -- if (bdr_attr_is_ess[trial_fes -> GetBdrAttribute (i)-1]) -+ for (i = 0; i < trial_fes->GetNBE(); i++) -+ if (bdr_attr_is_ess[trial_fes->GetBdrAttribute(i)-1]) - { -- trial_fes -> GetBdrElementVDofs (i, tr_vdofs); -+ trial_fes->GetBdrElementVDofs(i, tr_vdofs); - for (j = 0; j < tr_vdofs.Size(); j++) - { -- if ( (k = tr_vdofs[j]) < 0 ) -+ if ((k = tr_vdofs[j]) < 0) - { - k = -1-k; - } - cols_marker[k] = 1; - } - } -- mat -> EliminateCols (cols_marker, &sol, &rhs); -+ mat->EliminateCols(cols_marker, &sol, &rhs); - } - --void MixedBilinearForm::EliminateEssentialBCFromTrialDofs ( -+void MixedBilinearForm::EliminateEssentialBCFromTrialDofs( - const Array &marked_vdofs, const Vector &sol, Vector &rhs) - { -- mat -> EliminateCols (marked_vdofs, &sol, &rhs); -+ mat->EliminateCols(marked_vdofs, &sol, &rhs); - } - --void MixedBilinearForm::EliminateTestDofs (const Array &bdr_attr_is_ess) -+void MixedBilinearForm::EliminateTestDofs(const Array &bdr_attr_is_ess) - { - int i, j, k; - Array te_vdofs; - -- for (i = 0; i < test_fes -> GetNBE(); i++) -- if (bdr_attr_is_ess[test_fes -> GetBdrAttribute (i)-1]) -+ for (i = 0; i < test_fes->GetNBE(); i++) -+ if (bdr_attr_is_ess[test_fes->GetBdrAttribute(i)-1]) - { -- test_fes -> GetBdrElementVDofs (i, te_vdofs); -+ test_fes->GetBdrElementVDofs(i, te_vdofs); - for (j = 0; j < te_vdofs.Size(); j++) - { -- if ( (k = te_vdofs[j]) < 0 ) -+ if ((k = te_vdofs[j]) < 0) - { - k = -1-k; - } -- mat -> EliminateRow (k); -+ mat->EliminateRow(k); - } - } - } - --void MixedBilinearForm::FormRectangularSystemMatrix( -- const Array &trial_tdof_list, -- const Array &test_tdof_list, -- OperatorHandle &A) -- --{ -- if (ext) -- { -- ext->FormRectangularSystemOperator(trial_tdof_list, test_tdof_list, A); -- return; -- } -- -- const SparseMatrix *test_P = test_fes->GetConformingProlongation(); -- const SparseMatrix *trial_P = trial_fes->GetConformingProlongation(); -- -- mat->Finalize(); -- -- if (test_P && trial_P) -- { -- SparseMatrix *m = RAP(*test_P, *mat, *trial_P); -- delete mat; -- mat = m; -- } -- else if (test_P) -- { -- SparseMatrix *m = TransposeMult(*test_P, *mat); -- delete mat; -- mat = m; -- } -- else if (trial_P) -- { -- SparseMatrix *m = mfem::Mult(*mat, *trial_P); -- delete mat; -- mat = m; -- } -- -- Array ess_trial_tdof_marker, ess_test_tdof_marker; -- FiniteElementSpace::ListToMarker(trial_tdof_list, trial_fes->GetTrueVSize(), -- ess_trial_tdof_marker); -- FiniteElementSpace::ListToMarker(test_tdof_list, test_fes->GetTrueVSize(), -- ess_test_tdof_marker); -- -- mat_e = new SparseMatrix(mat->Height(), mat->Width()); -- mat->EliminateCols(ess_trial_tdof_marker, *mat_e); -- -- for (int i=0; iEliminateRow(test_tdof_list[i]); -- } -- mat_e->Finalize(); -- A.Reset(mat, false); --} -- --void MixedBilinearForm::FormRectangularLinearSystem( -- const Array &trial_tdof_list, -- const Array &test_tdof_list, -- Vector &x, Vector &b, -- OperatorHandle &A, -- Vector &X, Vector &B) --{ -- if (ext) -- { -- ext->FormRectangularLinearSystem(trial_tdof_list, test_tdof_list, -- x, b, A, X, B); -- return; -- } -- -- const Operator *Pi = this->GetProlongation(); -- const Operator *Po = this->GetOutputProlongation(); -- const Operator *Ri = this->GetRestriction(); -- InitTVectors(Po, Ri, Pi, x, b, X, B); -- -- if (!mat_e) -- { -- FormRectangularSystemMatrix(trial_tdof_list, test_tdof_list, -- A); // Set A = mat_e -- } -- // Eliminate essential BCs with B -= Ab xb -- mat_e->AddMult(X, B, -1.0); -- -- B.SetSubVector(test_tdof_list, 0.0); --} -- - void MixedBilinearForm::Update() - { - delete mat; -@@ -1895,66 +1676,97 @@ void DiscreteLinearOperator::Assemble(int skip_zeros) - return; - } - -- Array dom_vdofs, ran_vdofs; -- ElementTransformation *T; -- DofTransformation * dom_dof_trans; -- DofTransformation * ran_dof_trans; -- const FiniteElement *dom_fe, *ran_fe; -- DenseMatrix totelmat, elmat; -+ ElementTransformation *eltrans; -+ DofTransformation *dom_dof_trans; -+ DofTransformation *ran_dof_trans; -+ Mesh *mesh = test_fes->GetMesh(); -+ DenseMatrix elmat; - - if (mat == NULL) - { - mat = new SparseMatrix(height, width); - } - -- if (domain_integs.Size() > 0) -+ if (domain_integs.Size()) - { - for (int i = 0; i < test_fes->GetNE(); i++) - { -- dom_dof_trans = trial_fes->GetElementVDofs(i, dom_vdofs); -- ran_dof_trans = test_fes->GetElementVDofs(i, ran_vdofs); -- T = test_fes->GetElementTransformation(i); -- dom_fe = trial_fes->GetFE(i); -- ran_fe = test_fes->GetFE(i); -- -- domain_integs[0]->AssembleElementMatrix2(*dom_fe, *ran_fe, *T, -- totelmat); -- for (int j = 1; j < domain_integs.Size(); j++) -+ dom_dof_trans = trial_fes->GetElementVDofs(i, trial_vdofs); -+ ran_dof_trans = test_fes->GetElementVDofs(i, test_vdofs); -+ eltrans = test_fes->GetElementTransformation(i); -+ -+ elmat.SetSize(test_vdofs.Size(), trial_vdofs.Size()); -+ elmat = 0.0; -+ for (int j = 0; j < domain_integs.Size(); j++) - { -- domain_integs[j]->AssembleElementMatrix2(*dom_fe, *ran_fe, *T, -- elmat); -- totelmat += elmat; -+ domain_integs[j]->AssembleElementMatrix2(*trial_fes->GetFE(i), -+ *test_fes->GetFE(i), -+ *eltrans, elemmat); -+ elmat += elemmat; - } - if (ran_dof_trans || dom_dof_trans) - { -- TransformPrimal(ran_dof_trans, dom_dof_trans, totelmat); -+ TransformPrimal(ran_dof_trans, dom_dof_trans, elmat); - } -- mat->SetSubMatrix(ran_vdofs, dom_vdofs, totelmat, skip_zeros); -+ mat->SetSubMatrix(test_vdofs, trial_vdofs, elmat, skip_zeros); - } - } - - if (trace_face_integs.Size()) - { -- const int nfaces = test_fes->GetMesh()->GetNumFaces(); -+ const int nfaces = mesh->GetNumFaces(); - for (int i = 0; i < nfaces; i++) - { -- trial_fes->GetFaceVDofs(i, dom_vdofs); -- test_fes->GetFaceVDofs(i, ran_vdofs); -- T = test_fes->GetMesh()->GetFaceTransformation(i); -- dom_fe = trial_fes->GetFaceElement(i); -- ran_fe = test_fes->GetFaceElement(i); -- -- trace_face_integs[0]->AssembleElementMatrix2(*dom_fe, *ran_fe, *T, -- totelmat); -- for (int j = 1; j < trace_face_integs.Size(); j++) -+ trial_fes->GetFaceVDofs(i, trial_vdofs); -+ test_fes->GetFaceVDofs(i, test_vdofs); -+ eltrans = mesh->GetFaceTransformation(i); -+ -+ elmat.SetSize(test_vdofs.Size(), trial_vdofs.Size()); -+ elmat = 0.0; -+ for (int j = 0; j < trace_face_integs.Size(); j++) - { -- trace_face_integs[j]->AssembleElementMatrix2(*dom_fe, *ran_fe, *T, -- elmat); -- totelmat += elmat; -+ trace_face_integs[j]->AssembleElementMatrix2(*trial_fes->GetFaceElement(i), -+ *test_fes->GetFaceElement(i), -+ *eltrans, elemmat); -+ elmat += elemmat; - } -- mat->SetSubMatrix(ran_vdofs, dom_vdofs, totelmat, skip_zeros); -+ mat->SetSubMatrix(test_vdofs, trial_vdofs, elmat, skip_zeros); - } - } - } - -+void DiscreteLinearOperator::FormDiscreteOperatorMatrix(OperatorHandle &A) -+{ -+ if (ext) -+ { -+ Operator *oper; -+ ext->FormDiscreteOperator(oper); -+ A.Reset(oper); -+ return; -+ } -+ -+ mat->Finalize(); -+ -+ const SparseMatrix *test_R = test_fes->GetConformingRestriction(); -+ if (test_R) -+ { -+ SparseMatrix *RA = mfem::Mult(*test_R, *mat); -+ delete mat; -+ mat = RA; -+ } -+ -+ const SparseMatrix *trial_P = trial_fes->GetConformingProlongation(); -+ if (trial_P) -+ { -+ SparseMatrix *RAP = mfem::Mult(*mat, *trial_P); -+ delete mat; -+ mat = RAP; -+ } -+ -+ height = mat->Height(); -+ width = mat->Width(); -+ -+ A.Reset(mat, false); -+} -+ - } -diff --git a/fem/bilinearform.hpp b/fem/bilinearform.hpp -index b23df9280..b878b8d27 100644 ---- a/fem/bilinearform.hpp -+++ b/fem/bilinearform.hpp -@@ -36,8 +36,6 @@ enum class AssemblyLevel - /// is fully evaluated on the fly. - /// This assembly level is ALWAYS performed on the host. - LEGACY = 0, -- /// @deprecated Use LEGACY instead. -- LEGACYFULL = 0, - /// Fully assembled form, i.e. a global sparse matrix in MFEM format. This - /// assembly is compatible with device execution. - FULL, -@@ -66,7 +64,7 @@ protected: - SparseMatrix *mat; - - /** @brief Sparse Matrix \f$ M_e \f$ used to store the eliminations -- from the b.c. Owned. -+ from the b.c. Owned. - \f$ M + M_e = M_{original} \f$ */ - SparseMatrix *mat_e; - -@@ -75,11 +73,11 @@ protected: - - /// The assembly level of the form (full, partial, etc.) - AssemblyLevel assembly; -- /// Element batch size used in the form action (1, 8, num_elems, etc.) -- int batch; -+ - /** @brief Extension for supporting Full Assembly (FA), Element Assembly (EA), - Partial Assembly (PA), or Matrix Free assembly (MF). */ - BilinearFormExtension *ext; -+ - /** Indicates if the sparse matrix is sorted after assembly when using - Full Assembly (FA). */ - bool sort_sparse_matrix = false; -@@ -113,11 +111,6 @@ protected: - Array boundary_face_integs; - Array*> boundary_face_integs_marker; ///< Entries are not owned. - -- DenseMatrix elemmat; -- Array vdofs; -- -- DenseTensor *element_matrices; ///< Owned. -- - StaticCondensation *static_cond; ///< Owned. - Hybridization *hybridization; ///< Owned. - -@@ -126,31 +119,29 @@ protected: - the constrained DoFs. */ - DiagonalPolicy diag_policy; - -- int precompute_sparsity; -- // Allocate appropriate SparseMatrix and assign it to mat -- void AllocMat(); -- -- void ConformingAssemble(); -+ DenseMatrix elemmat; -+ Array vdofs; - - // may be used in the construction of derived classes -- BilinearForm() : Matrix (0) -+ BilinearForm() : Matrix(0) - { -- fes = NULL; sequence = -1; -- mat = mat_e = NULL; extern_bfs = 0; element_matrices = NULL; -- static_cond = NULL; hybridization = NULL; -- precompute_sparsity = 0; -+ fes = NULL; -+ sequence = -1; -+ mat = mat_e = NULL; -+ extern_bfs = 0; -+ static_cond = NULL; -+ hybridization = NULL; - diag_policy = DIAG_KEEP; - assembly = AssemblyLevel::LEGACY; -- batch = 1; - ext = NULL; - } - - private: -- /// Copy construction is not supported; body is undefined. -- BilinearForm(const BilinearForm &); -+ /// Copy construction is not supported. -+ BilinearForm(const BilinearForm &) = delete; - -- /// Copy assignment is not supported; body is undefined. -- BilinearForm &operator=(const BilinearForm &); -+ /// Copy assignment is not supported. -+ BilinearForm &operator=(const BilinearForm &) = delete; - - public: - /// Creates bilinear form associated with FE space @a *f. -@@ -163,11 +154,8 @@ public: - The pointer @a f is not owned by the newly constructed object. - - The integrators in @a bf are copied as pointers and they are not owned by -- the newly constructed BilinearForm. -- -- The optional parameter @a ps is used to initialize the internal flag -- #precompute_sparsity, see UsePrecomputedSparsity() for details. */ -- BilinearForm(FiniteElementSpace *f, BilinearForm *bf, int ps = 0); -+ the newly constructed BilinearForm. */ -+ BilinearForm(FiniteElementSpace *f, BilinearForm *bf); - - /// Get the size of the BilinearForm as a square matrix. - int Size() const { return height; } -@@ -184,6 +172,18 @@ public: - If used, this method must be called before assembly. */ - void SetAssemblyLevel(AssemblyLevel assembly_level); - -+ /// Returns the assembly level -+ AssemblyLevel GetAssemblyLevel() const { return assembly; } -+ -+ /// Sets diagonal policy used upon construction of the linear system. -+ /** Policies include: -+ -+ - DIAG_ZERO (Set the diagonal values to zero) -+ - DIAG_ONE (Set the diagonal values to one) -+ - DIAG_KEEP (Keep the diagonal values) -+ */ -+ void SetDiagonalPolicy(DiagonalPolicy policy) { diag_policy = policy; } -+ - /** @brief Force the sparse matrix column indices to be sorted when using - AssemblyLevel::FULL. - -@@ -199,8 +199,16 @@ public: - sort_sparse_matrix = enable_it; - } - -- /// Returns the assembly level -- AssemblyLevel GetAssemblyLevel() const { return assembly; } -+ /// Indicate that integrators are not owned by the BilinearForm -+ void UseExternalIntegrators() { extern_bfs = 1; } -+ -+ /// Enable hybridization. -+ /** For details see the description for class -+ Hybridization in fem/hybridization.hpp. This method should be called -+ before assembly. */ -+ void EnableHybridization(FiniteElementSpace *constr_space, -+ BilinearFormIntegrator *constr_integ, -+ const Array &ess_tdof_list); - - Hybridization *GetHybridization() const { return hybridization; } - -@@ -218,60 +226,6 @@ public: - FiniteElementSpace *SCFESpace() const - { return static_cond ? static_cond->GetTraceFESpace() : NULL; } - -- /// Enable hybridization. -- /** For details see the description for class -- Hybridization in fem/hybridization.hpp. This method should be called -- before assembly. */ -- void EnableHybridization(FiniteElementSpace *constr_space, -- BilinearFormIntegrator *constr_integ, -- const Array &ess_tdof_list); -- -- /** @brief For scalar FE spaces, precompute the sparsity pattern of the matrix -- (assuming dense element matrices) based on the types of integrators -- present in the bilinear form. */ -- void UsePrecomputedSparsity(int ps = 1) { precompute_sparsity = ps; } -- -- /** @brief Use the given CSR sparsity pattern to allocate the internal -- SparseMatrix. -- -- - The @a I and @a J arrays must define a square graph with size equal to -- GetVSize() of the associated FiniteElementSpace. -- - This method should be called after enabling static condensation or -- hybridization, if used. -- - In the case of static condensation, @a I and @a J are not used. -- - The ownership of the arrays @a I and @a J remains with the caller. */ -- void UseSparsity(int *I, int *J, bool isSorted); -- -- /// Use the sparsity of @a A to allocate the internal SparseMatrix. -- void UseSparsity(SparseMatrix &A); -- -- /// Pre-allocate the internal SparseMatrix before assembly. -- /** If the flag 'precompute sparsity' -- is set, the matrix is allocated in CSR format (i.e. -- finalized) and the entries are initialized with zeros. */ -- void AllocateMatrix() { if (mat == NULL) { AllocMat(); } } -- -- /// Access all the integrators added with AddDomainIntegrator(). -- Array *GetDBFI() { return &domain_integs; } -- -- /// Access all the integrators added with AddBoundaryIntegrator(). -- Array *GetBBFI() { return &boundary_integs; } -- /** @brief Access all boundary markers added with AddBoundaryIntegrator(). -- If no marker was specified when the integrator was added, the -- corresponding pointer (to Array) will be NULL. */ -- Array*> *GetBBFI_Marker() { return &boundary_integs_marker; } -- -- /// Access all integrators added with AddInteriorFaceIntegrator(). -- Array *GetFBFI() { return &interior_face_integs; } -- -- /// Access all integrators added with AddBdrFaceIntegrator(). -- Array *GetBFBFI() { return &boundary_face_integs; } -- /** @brief Access all boundary markers added with AddBdrFaceIntegrator(). -- If no marker was specified when the integrator was added, the -- corresponding pointer (to Array) will be NULL. */ -- Array*> *GetBFBFI_Marker() -- { return &boundary_face_integs_marker; } -- - /// Returns a reference to: \f$ M_{ij} \f$ - const double &operator()(int i, int j) { return (*mat)(i,j); } - -@@ -284,42 +238,52 @@ public: - /// Matrix vector multiplication: \f$ y = M x \f$ - virtual void Mult(const Vector &x, Vector &y) const; - -+ /// Add the matrix vector multiple to a vector: \f$ y += a M x \f$ -+ virtual void AddMult(const Vector &x, Vector &y, -+ const double a = 1.0) const; -+ -+ /// Matrix transpose vector multiplication: \f$ y = M^T x \f$ -+ virtual void MultTranspose(const Vector &x, Vector &y) const; -+ -+ /// Add the matrix transpose vector multiplication: \f$ y += a M^T x \f$ -+ virtual void AddMultTranspose(const Vector &x, Vector &y, -+ const double a = 1.0) const; -+ - /** @brief Matrix vector multiplication with the original uneliminated - matrix. The original matrix is \f$ M + M_e \f$ so we have: - \f$ y = M x + M_e x \f$ */ - void FullMult(const Vector &x, Vector &y) const - { mat->Mult(x, y); mat_e->AddMult(x, y); } - -- /// Add the matrix vector multiple to a vector: \f$ y += a M x \f$ -- virtual void AddMult(const Vector &x, Vector &y, const double a = 1.0) const -- { mat -> AddMult (x, y, a); } -- - /** @brief Add the original uneliminated matrix vector multiple to a vector. - The original matrix is \f$ M + Me \f$ so we have: - \f$ y += M x + M_e x \f$ */ - void FullAddMult(const Vector &x, Vector &y) const - { mat->AddMult(x, y); mat_e->AddMult(x, y); } - -- /// Add the matrix transpose vector multiplication: \f$ y += a M^T x \f$ -- virtual void AddMultTranspose(const Vector & x, Vector & y, -- const double a = 1.0) const -- { mat->AddMultTranspose(x, y, a); } -- - /** @brief Add the original uneliminated matrix transpose vector - multiple to a vector. The original matrix is \f$ M + M_e \f$ - so we have: \f$ y += M^T x + {M_e}^T x \f$ */ -- void FullAddMultTranspose(const Vector & x, Vector & y) const -+ void FullAddMultTranspose(const Vector &x, Vector &y) const - { mat->AddMultTranspose(x, y); mat_e->AddMultTranspose(x, y); } - -- /// Matrix transpose vector multiplication: \f$ y = M^T x \f$ -- virtual void MultTranspose(const Vector & x, Vector & y) const; -+ /// Compute inner product for full uneliminated matrix \f$ y^T M x + y^T M_e x \f$ -+ double FullInnerProduct(const Vector &x, const Vector &y) const -+ { return mat->InnerProduct(x, y) + mat_e->InnerProduct(x, y); } -+ -+ /// Returns a pointer to (approximation) of the matrix inverse: \f$ M^{-1} \f$ -+ virtual MatrixInverse *Inverse() const; - - /// Compute \f$ y^T M x \f$ - double InnerProduct(const Vector &x, const Vector &y) const -- { return mat->InnerProduct (x, y); } -+ { return mat->InnerProduct(x, y); } - -- /// Returns a pointer to (approximation) of the matrix inverse: \f$ M^{-1} \f$ -- virtual MatrixInverse *Inverse() const; -+ /// Sets all sparse values of \f$ M \f$ and \f$ M_e \f$ to 'a'. -+ void operator=(const double a) -+ { -+ if (mat != NULL) { *mat = a; } -+ if (mat_e != NULL) { *mat_e = a; } -+ } - - /// Finalizes the matrix initialization. - virtual void Finalize(int skip_zeros = 1); -@@ -345,15 +309,7 @@ public: - /** @brief Returns true if the sparse matrix is not null, false otherwise. - - @sa SpMat(). */ -- bool HasSpMat() -- { -- return mat != nullptr; -- } -- -- -- /** @brief Nullifies the internal matrix \f$ M \f$ and returns a pointer -- to it. Used for transferring ownership. */ -- SparseMatrix *LoseMat() { SparseMatrix *tmp = mat; mat = NULL; return tmp; } -+ bool HasSpMat() const { return mat != nullptr; } - - /** @brief Returns a const reference to the sparse matrix of eliminated b.c.: - \f$ M_e \f$ -@@ -379,11 +335,31 @@ public: - false otherwise. - - @sa SpMatElim(). */ -- bool HasSpMatElim() -+ bool HasSpMatElim()const { return mat_e != nullptr; } -+ -+ /** @brief Nullifies the internal matrix \f$ M \f$ and returns a pointer -+ to it. Used for transferring ownership. */ -+ SparseMatrix *LoseMat() { SparseMatrix *tmp = mat; mat = NULL; return tmp; } -+ -+ /** Returns a const reference to the extension for assembly levels other -+ than AssemblyLevel::LEGACY. */ -+ const BilinearFormExtension &Ext() const - { -- return mat_e != nullptr; -+ MFEM_VERIFY(ext, "ext is NULL and can't be dereferenced"); -+ return *ext; - } - -+ /** Returns a reference to the extension for assembly levels other than -+ AssemblyLevel::LEGACY. */ -+ BilinearFormExtension &Ext() -+ { -+ MFEM_VERIFY(ext, "ext is NULL and can't be dereferenced"); -+ return *ext; -+ } -+ -+ /// Returns true if the extension is not null, false otherwise. -+ bool HasExt() const { return ext != nullptr; } -+ - /// Adds new Domain Integrator. Assumes ownership of @a bfi. - void AddDomainIntegrator(BilinearFormIntegrator *bfi); - /// Adds new Domain Integrator restricted to certain elements specified by -@@ -416,16 +392,72 @@ public: - void AddBdrFaceIntegrator(BilinearFormIntegrator *bfi, - Array &bdr_marker); - -- /// Sets all sparse values of \f$ M \f$ and \f$ M_e \f$ to 'a'. -- void operator=(const double a) -- { -- if (mat != NULL) { *mat = a; } -- if (mat_e != NULL) { *mat_e = a; } -- } -+ /// Access all the integrators added with AddDomainIntegrator(). -+ Array *GetDBFI() { return &domain_integs; } -+ -+ /// Access all the integrators added with AddBoundaryIntegrator(). -+ Array *GetBBFI() { return &boundary_integs; } -+ /** @brief Access all boundary markers added with AddBoundaryIntegrator(). -+ If no marker was specified when the integrator was added, the -+ corresponding pointer (to Array) will be NULL. */ -+ Array*> *GetBBFI_Marker() { return &boundary_integs_marker; } -+ -+ /// Access all integrators added with AddInteriorFaceIntegrator(). -+ Array *GetFBFI() { return &interior_face_integs; } -+ -+ /// Access all integrators added with AddBdrFaceIntegrator(). -+ Array *GetBFBFI() { return &boundary_face_integs; } -+ /** @brief Access all boundary markers added with AddBdrFaceIntegrator(). -+ If no marker was specified when the integrator was added, the -+ corresponding pointer (to Array) will be NULL. */ -+ Array*> *GetBFBFI_Marker() -+ { return &boundary_face_integs_marker; } -+ -+ /// Assemble the given element matrix -+ /** The element matrix @a elmat is assembled for the element @a i, i.e. -+ added to the system matrix. The flag @a skip_zeros skips the zero -+ elements of the matrix, unless they are breaking the symmetry of -+ the system matrix. -+ */ -+ void AssembleElementMatrix(int i, const DenseMatrix &elmat, -+ int skip_zeros = 1); -+ -+ /// Assemble the given element matrix -+ /** The element matrix @a elmat is assembled for the element @a i, i.e. -+ added to the system matrix. The vdofs of the element are returned -+ in @a vdofs. The flag @a skip_zeros skips the zero elements of the -+ matrix, unless they are breaking the symmetry of the system matrix. -+ */ -+ void AssembleElementMatrix(int i, const DenseMatrix &elmat, -+ Array &vdofs, int skip_zeros = 1); -+ -+ /// Assemble the given boundary element matrix -+ /** The boundary element matrix @a elmat is assembled for the boundary -+ element @a i, i.e. added to the system matrix. The flag @a skip_zeros -+ skips the zero elements of the matrix, unless they are breaking the -+ symmetry of the system matrix. -+ */ -+ void AssembleBdrElementMatrix(int i, const DenseMatrix &elmat, -+ int skip_zeros = 1); -+ -+ /// Assemble the given boundary element matrix -+ /** The boundary element matrix @a elmat is assembled for the boundary -+ element @a i, i.e. added to the system matrix. The vdofs of the element -+ are returned in @a vdofs. The flag @a skip_zeros skips the zero elements -+ of the matrix, unless they are breaking the symmetry of the system matrix. -+ */ -+ void AssembleBdrElementMatrix(int i, const DenseMatrix &elmat, -+ Array &vdofs, int skip_zeros = 1); - - /// Assembles the form i.e. sums over all domain/bdr integrators. - void Assemble(int skip_zeros = 1); - -+ /** For a partially conforming FE space, complete the assembly process by -+ performing A := P^t A P where A is the internal sparse matrix; P is the -+ conforming prolongation matrices of the FE space. After this call the -+ BilinearForm becomes an operator on the conforming FE spaces. */ -+ void ConformingAssemble(); -+ - /** @brief Assemble the diagonal of the bilinear form into @a diag. Note that - @a diag is a tdof Vector. - -@@ -439,32 +471,20 @@ public: - - /// Get the finite element space prolongation operator. - virtual const Operator *GetProlongation() const -- { return fes->GetConformingProlongation(); } -+ { return fes->GetProlongationMatrix(); } -+ - /// Get the finite element space restriction operator - virtual const Operator *GetRestriction() const -- { return fes->GetConformingRestriction(); } -+ { return fes->GetRestrictionMatrix(); } -+ - /// Get the output finite element space prolongation matrix - virtual const Operator *GetOutputProlongation() const - { return GetProlongation(); } -- /** @brief Returns the output fe space restriction matrix, transposed - -- Logically, this is the transpose of GetOutputRestriction, but in -- practice it is convenient to have it in transposed form for -- construction of RAP operators in matrix-free methods. */ -- virtual const Operator *GetOutputRestrictionTranspose() const -- { return GetOutputProlongation(); } - /// Get the output finite element space restriction matrix - virtual const Operator *GetOutputRestriction() const - { return GetRestriction(); } - -- /// @brief Compute serial RAP operator and store it in @a A as a SparseMatrix. -- void SerialRAP(OperatorHandle &A) -- { -- MFEM_ASSERT(mat, "SerialRAP requires the SparseMatrix to be assembled."); -- ConformingAssemble(); -- A.Reset(mat, false); -- } -- - /** @brief Form the linear system A X = B, corresponding to this bilinear - form and the linear form @a b(.). */ - /** This method applies any necessary transformations to the linear system -@@ -541,59 +561,6 @@ public: - */ - virtual void RecoverFEMSolution(const Vector &X, const Vector &b, Vector &x); - -- /// Compute and store internally all element matrices. -- void ComputeElementMatrices(); -- -- /// Free the memory used by the element matrices. -- void FreeElementMatrices() -- { delete element_matrices; element_matrices = NULL; } -- -- /// Compute the element matrix of the given element -- /** The element matrix is computed by calling the domain integrators -- or the one stored internally by a prior call of ComputeElementMatrices() -- is returned when available. -- */ -- void ComputeElementMatrix(int i, DenseMatrix &elmat); -- -- /// Compute the boundary element matrix of the given boundary element -- void ComputeBdrElementMatrix(int i, DenseMatrix &elmat); -- -- /// Assemble the given element matrix -- /** The element matrix @a elmat is assembled for the element @a i, i.e. -- added to the system matrix. The flag @a skip_zeros skips the zero -- elements of the matrix, unless they are breaking the symmetry of -- the system matrix. -- */ -- void AssembleElementMatrix(int i, const DenseMatrix &elmat, -- int skip_zeros = 1); -- -- /// Assemble the given element matrix -- /** The element matrix @a elmat is assembled for the element @a i, i.e. -- added to the system matrix. The vdofs of the element are returned -- in @a vdofs. The flag @a skip_zeros skips the zero elements of the -- matrix, unless they are breaking the symmetry of the system matrix. -- */ -- void AssembleElementMatrix(int i, const DenseMatrix &elmat, -- Array &vdofs, int skip_zeros = 1); -- -- /// Assemble the given boundary element matrix -- /** The boundary element matrix @a elmat is assembled for the boundary -- element @a i, i.e. added to the system matrix. The flag @a skip_zeros -- skips the zero elements of the matrix, unless they are breaking the -- symmetry of the system matrix. -- */ -- void AssembleBdrElementMatrix(int i, const DenseMatrix &elmat, -- int skip_zeros = 1); -- -- /// Assemble the given boundary element matrix -- /** The boundary element matrix @a elmat is assembled for the boundary -- element @a i, i.e. added to the system matrix. The vdofs of the element -- are returned in @a vdofs. The flag @a skip_zeros skips the zero elements -- of the matrix, unless they are breaking the symmetry of the system matrix. -- */ -- void AssembleBdrElementMatrix(int i, const DenseMatrix &elmat, -- Array &vdofs, int skip_zeros = 1); -- - /// Eliminate essential boundary DOFs from the system. - /** The array @a bdr_attr_is_ess marks boundary attributes that constitute - the essential part of the boundary. By default, the diagonal at the -@@ -645,34 +612,14 @@ public: - void EliminateVDofsInRHS(const Array &vdofs, const Vector &x, - Vector &b); - -- /// Compute inner product for full uneliminated matrix \f$ y^T M x + y^T M_e x \f$ -- double FullInnerProduct(const Vector &x, const Vector &y) const -- { return mat->InnerProduct(x, y) + mat_e->InnerProduct(x, y); } -- - /// Update the @a FiniteElementSpace and delete all data associated with the old one. - virtual void Update(FiniteElementSpace *nfes = NULL); - -- /// (DEPRECATED) Return the FE space associated with the BilinearForm. -- /** @deprecated Use FESpace() instead. */ -- MFEM_DEPRECATED FiniteElementSpace *GetFES() { return fes; } -- - /// Return the FE space associated with the BilinearForm. - FiniteElementSpace *FESpace() { return fes; } - /// Read-only access to the associated FiniteElementSpace. - const FiniteElementSpace *FESpace() const { return fes; } - -- /// Sets diagonal policy used upon construction of the linear system. -- /** Policies include: -- -- - DIAG_ZERO (Set the diagonal values to zero) -- - DIAG_ONE (Set the diagonal values to one) -- - DIAG_KEEP (Keep the diagonal values) -- */ -- void SetDiagonalPolicy(DiagonalPolicy policy); -- -- /// Indicate that integrators are not owned by the BilinearForm -- void UseExternalIntegrators() { extern_bfs = 1; } -- - /// Destroys bilinear form. - virtual ~BilinearForm(); - }; -@@ -696,11 +643,12 @@ public: - class MixedBilinearForm : public Matrix - { - protected: -- SparseMatrix *mat; ///< Owned. -- SparseMatrix *mat_e; ///< Owned. -+ /** Sparse matrices associated with the form and the eliminations from -+ the b.c. Owned. */ -+ SparseMatrix *mat, *mat_e; - -- FiniteElementSpace *trial_fes, ///< Not owned -- *test_fes; ///< Not owned -+ /// FE space on which the form lives. Not owned. -+ FiniteElementSpace *trial_fes, *test_fes; - - /// The form assembly level (full, partial, etc.) - AssemblyLevel assembly; -@@ -733,11 +681,11 @@ protected: - Array trial_vdofs, test_vdofs; - - private: -- /// Copy construction is not supported; body is undefined. -- MixedBilinearForm(const MixedBilinearForm &); -+ /// Copy construction is not supported. -+ MixedBilinearForm(const MixedBilinearForm &) = delete; - -- /// Copy assignment is not supported; body is undefined. -- MixedBilinearForm &operator=(const MixedBilinearForm &); -+ /// Copy assignment is not supported. -+ MixedBilinearForm &operator=(const MixedBilinearForm &) = delete; - - public: - /** @brief Construct a MixedBilinearForm on the given trial, @a tr_fes, and -@@ -760,6 +708,13 @@ public: - FiniteElementSpace *te_fes, - MixedBilinearForm *mbf); - -+ /// Set the desired assembly level. The default is AssemblyLevel::LEGACY. -+ /** This method must be called before assembly. */ -+ void SetAssemblyLevel(AssemblyLevel assembly_level); -+ -+ /// Returns the assembly level -+ AssemblyLevel GetAssemblyLevel() const { return assembly; } -+ - /// Returns a reference to: \f$ M_{ij} \f$ - virtual double &Elem(int i, int j); - -@@ -767,17 +722,21 @@ public: - virtual const double &Elem(int i, int j) const; - - /// Matrix multiplication: \f$ y = M x \f$ -- virtual void Mult(const Vector & x, Vector & y) const; -+ virtual void Mult(const Vector &x, Vector &y) const; - -- virtual void AddMult(const Vector & x, Vector & y, -+ virtual void AddMult(const Vector &x, Vector &y, - const double a = 1.0) const; - -- virtual void MultTranspose(const Vector & x, Vector & y) const; -- virtual void AddMultTranspose(const Vector & x, Vector & y, -+ virtual void MultTranspose(const Vector &x, Vector &y) const; -+ -+ virtual void AddMultTranspose(const Vector &x, Vector &y, - const double a = 1.0) const; - - virtual MatrixInverse *Inverse() const; - -+ /// Sets all sparse values of \f$ M \f$ to @a a. -+ void operator=(const double a) { *mat = a; } -+ - /// Finalizes the matrix initialization. - virtual void Finalize(int skip_zeros = 1); - -@@ -787,15 +746,45 @@ public: - void GetBlocks(Array2D &blocks) const; - - /// Returns a const reference to the sparse matrix: \f$ M \f$ -- const SparseMatrix &SpMat() const { return *mat; } -+ const SparseMatrix &SpMat() const -+ { -+ MFEM_VERIFY(mat, "mat is NULL and can't be dereferenced"); -+ return *mat; -+ } - - /// Returns a reference to the sparse matrix: \f$ M \f$ -- SparseMatrix &SpMat() { return *mat; } -+ SparseMatrix &SpMat() -+ { -+ MFEM_VERIFY(mat, "mat is NULL and can't be dereferenced"); -+ return *mat; -+ } -+ -+ /// Returns true if the sparse matrix is not null, false otherwise. -+ bool HasSpMat() const { return mat != nullptr; } - - /** @brief Nullifies the internal matrix \f$ M \f$ and returns a pointer - to it. Used for transferring ownership. */ - SparseMatrix *LoseMat() { SparseMatrix *tmp = mat; mat = NULL; return tmp; } - -+ /** Returns a const reference to the extension for assembly levels other -+ than AssemblyLevel::LEGACY. */ -+ const MixedBilinearFormExtension &Ext() const -+ { -+ MFEM_VERIFY(ext, "ext is NULL and can't be dereferenced"); -+ return *ext; -+ } -+ -+ /** Returns a reference to the extension for assembly levels other than -+ AssemblyLevel::LEGACY. */ -+ MixedBilinearFormExtension &Ext() -+ { -+ MFEM_VERIFY(ext, "ext is NULL and can't be dereferenced"); -+ return *ext; -+ } -+ -+ /// Returns true if the extension is not null, false otherwise. -+ bool HasExt() const { return ext != nullptr; } -+ - /// Adds a domain integrator. Assumes ownership of @a bfi. - void AddDomainIntegrator(BilinearFormIntegrator *bfi); - -@@ -803,8 +792,8 @@ public: - void AddBoundaryIntegrator(BilinearFormIntegrator *bfi); - - /// Adds a boundary integrator. Assumes ownership of @a bfi. -- void AddBoundaryIntegrator (BilinearFormIntegrator * bfi, -- Array &bdr_marker); -+ void AddBoundaryIntegrator(BilinearFormIntegrator *bfi, -+ Array &bdr_marker); - - /** @brief Add a trace face integrator. Assumes ownership of @a bfi. - -@@ -814,11 +803,11 @@ public: - void AddTraceFaceIntegrator(BilinearFormIntegrator *bfi); - - /// Adds a boundary trace face integrator. Assumes ownership of @a bfi. -- void AddBdrTraceFaceIntegrator (BilinearFormIntegrator * bfi); -+ void AddBdrTraceFaceIntegrator(BilinearFormIntegrator *bfi); - - /// Adds a boundary trace face integrator. Assumes ownership of @a bfi. -- void AddBdrTraceFaceIntegrator (BilinearFormIntegrator * bfi, -- Array &bdr_marker); -+ void AddBdrTraceFaceIntegrator(BilinearFormIntegrator *bfi, -+ Array &bdr_marker); - - /// Access all integrators added with AddDomainIntegrator(). - Array *GetDBFI() { return &domain_integs; } -@@ -842,48 +831,6 @@ public: - Array*> *GetBTFBFI_Marker() - { return &boundary_trace_face_integs_marker; } - -- /// Sets all sparse values of \f$ M \f$ to @a a. -- void operator=(const double a) { *mat = a; } -- -- /// Set the desired assembly level. The default is AssemblyLevel::LEGACY. -- /** This method must be called before assembly. */ -- void SetAssemblyLevel(AssemblyLevel assembly_level); -- -- void Assemble(int skip_zeros = 1); -- -- /** @brief Assemble the diagonal of ADA^T into diag, where A is this mixed -- bilinear form and D is a diagonal. */ -- void AssembleDiagonal_ADAt(const Vector &D, Vector &diag) const; -- -- /// Get the input finite element space prolongation matrix -- virtual const Operator *GetProlongation() const -- { return trial_fes->GetProlongationMatrix(); } -- -- /// Get the input finite element space restriction matrix -- virtual const Operator *GetRestriction() const -- { return trial_fes->GetRestrictionMatrix(); } -- -- /// Get the test finite element space prolongation matrix -- virtual const Operator *GetOutputProlongation() const -- { return test_fes->GetProlongationMatrix(); } -- -- /// Get the test finite element space restriction matrix -- virtual const Operator *GetOutputRestriction() const -- { return test_fes->GetRestrictionMatrix(); } -- -- /** For partially conforming trial and/or test FE spaces, complete the -- assembly process by performing A := P2^t A P1 where A is the internal -- sparse matrix; P1 and P2 are the conforming prolongation matrices of the -- trial and test FE spaces, respectively. After this call the -- MixedBilinearForm becomes an operator on the conforming FE spaces. */ -- void ConformingAssemble(); -- -- /// Compute the element matrix of the given element -- void ComputeElementMatrix(int i, DenseMatrix &elmat); -- -- /// Compute the boundary element matrix of the given boundary element -- void ComputeBdrElementMatrix(int i, DenseMatrix &elmat); -- - /// Assemble the given element matrix - /** The element matrix @a elmat is assembled for the element @a i, i.e. - added to the system matrix. The flag @a skip_zeros skips the zero -@@ -924,40 +871,34 @@ public: - Array &trial_vdofs, Array &test_vdofs, - int skip_zeros = 1); - -- void EliminateTrialDofs(const Array &bdr_attr_is_ess, -- const Vector &sol, Vector &rhs); -+ void Assemble(int skip_zeros = 1); - -- void EliminateEssentialBCFromTrialDofs(const Array &marked_vdofs, -- const Vector &sol, Vector &rhs); -+ /** For partially conforming trial and/or test FE spaces, complete the -+ assembly process by performing A := P2^t A P1 where A is the internal -+ sparse matrix; P1 and P2 are the conforming prolongation matrices of the -+ trial and test FE spaces, respectively. After this call the -+ MixedBilinearForm becomes an operator on the conforming FE spaces. */ -+ void ConformingAssemble(); - -- virtual void EliminateTestDofs(const Array &bdr_attr_is_ess); -+ /** @brief Assemble the diagonal of ADA^T into diag, where A is this mixed -+ bilinear form and D is a diagonal. */ -+ void AssembleDiagonal_ADAt(const Vector &D, Vector &diag) const; - -- /** @brief Return in @a A that is column-constrained. -+ /// Get the input finite element space prolongation matrix -+ virtual const Operator *GetProlongation() const -+ { return trial_fes->GetProlongationMatrix(); } - -- This returns the same operator as FormRectangularLinearSystem(), but does -- without the transformations of the right-hand side. */ -- virtual void FormRectangularSystemMatrix(const Array &trial_tdof_list, -- const Array &test_tdof_list, -- OperatorHandle &A); -+ /// Get the input finite element space restriction matrix -+ virtual const Operator *GetRestriction() const -+ { return trial_fes->GetRestrictionMatrix(); } - -- /** @brief Form the column-constrained linear system matrix A. -- See FormRectangularSystemMatrix() for details. -+ /// Get the test finite element space prolongation matrix -+ virtual const Operator *GetOutputProlongation() const -+ { return test_fes->GetProlongationMatrix(); } - -- Version of the method FormRectangularSystemMatrix() where the system matrix is -- returned in the variable @a A, of type OpType, holding a *reference* to -- the system matrix (created with the method OpType::MakeRef()). The -- reference will be invalidated when SetOperatorType(), Update(), or the -- destructor is called. */ -- template -- void FormRectangularSystemMatrix(const Array &trial_tdof_list, -- const Array &test_tdof_list, OpType &A) -- { -- OperatorHandle Ah; -- FormRectangularSystemMatrix(trial_tdof_list, test_tdof_list, Ah); -- OpType *A_ptr = Ah.Is(); -- MFEM_VERIFY(A_ptr, "invalid OpType used"); -- A.MakeRef(*A_ptr); -- } -+ /// Get the test finite element space restriction matrix -+ virtual const Operator *GetOutputRestriction() const -+ { return test_fes->GetRestrictionMatrix(); } - - /** @brief Form the linear system A X = B, corresponding to this mixed bilinear - form and the linear form @a b(.). -@@ -992,6 +933,41 @@ public: - A.MakeRef(*A_ptr); - } - -+ /** @brief Return in @a A that is column-constrained. -+ -+ This returns the same operator as FormRectangularLinearSystem(), but does -+ without the transformations of the right-hand side. */ -+ virtual void FormRectangularSystemMatrix(const Array &trial_tdof_list, -+ const Array &test_tdof_list, -+ OperatorHandle &A); -+ -+ /** @brief Form the column-constrained linear system matrix A. -+ See FormRectangularSystemMatrix() for details. -+ -+ Version of the method FormRectangularSystemMatrix() where the system matrix is -+ returned in the variable @a A, of type OpType, holding a *reference* to -+ the system matrix (created with the method OpType::MakeRef()). The -+ reference will be invalidated when SetOperatorType(), Update(), or the -+ destructor is called. */ -+ template -+ void FormRectangularSystemMatrix(const Array &trial_tdof_list, -+ const Array &test_tdof_list, OpType &A) -+ { -+ OperatorHandle Ah; -+ FormRectangularSystemMatrix(trial_tdof_list, test_tdof_list, Ah); -+ OpType *A_ptr = Ah.Is(); -+ MFEM_VERIFY(A_ptr, "invalid OpType used"); -+ A.MakeRef(*A_ptr); -+ } -+ -+ void EliminateTrialDofs(const Array &bdr_attr_is_ess, -+ const Vector &sol, Vector &rhs); -+ -+ void EliminateEssentialBCFromTrialDofs(const Array &marked_vdofs, -+ const Vector &sol, Vector &rhs); -+ -+ virtual void EliminateTestDofs(const Array &bdr_attr_is_ess); -+ - void Update(); - - /// Return the trial FE space associated with the BilinearForm. -@@ -1041,11 +1017,11 @@ public: - class DiscreteLinearOperator : public MixedBilinearForm - { - private: -- /// Copy construction is not supported; body is undefined. -- DiscreteLinearOperator(const DiscreteLinearOperator &); -+ /// Copy construction is not supported. -+ DiscreteLinearOperator(const DiscreteLinearOperator &) = delete; - -- /// Copy assignment is not supported; body is undefined. -- DiscreteLinearOperator &operator=(const DiscreteLinearOperator &); -+ /// Copy assignment is not supported. -+ DiscreteLinearOperator &operator=(const DiscreteLinearOperator &) = delete; - - public: - /** @brief Construct a DiscreteLinearOperator on the given -@@ -1065,9 +1041,12 @@ public: - { AddTraceFaceIntegrator(di); } - - /// Access all interpolators added with AddDomainInterpolator(). -- Array *GetDI() { return &domain_integs; } -+ Array *GetDI() { return GetDBFI(); } -+ -+ /// Access all interpolators added with AddTraceFaceInterpolator(). -+ Array *GetTFI() { return GetTFBFI(); } - -- /// Set the desired assembly level. The default is AssemblyLevel::FULL. -+ /// Set the desired assembly level. The default is AssemblyLevel::LEGACY. - /** This method must be called before assembly. */ - void SetAssemblyLevel(AssemblyLevel assembly_level); - -@@ -1075,10 +1054,26 @@ public: - linear operator. */ - virtual void Assemble(int skip_zeros = 1); - -- /** @brief Get the output finite element space restriction matrix in -- transposed form. */ -- virtual const Operator *GetOutputRestrictionTranspose() const -- { return test_fes->GetRestrictionTransposeOperator(); } -+ /** @brief Return in @a A that is column-constrained. */ -+ virtual void FormDiscreteOperatorMatrix(OperatorHandle &A); -+ -+ /** @brief Form the column-constrained discrete linear operator matrix A. -+ See FormDiscreteOperatorMatrix() for details. -+ -+ Version of the method FormDiscreteOperatorMatrix() where the discrete -+ operator matrix is returned in the variable @a A, of type OpType, -+ holding a *reference* to the discrete operator matrix (created with the -+ method OpType::MakeRef()). The reference will be invalidated when -+ SetOperatorType(), Update(), or the destructor is called. */ -+ template -+ void FormDiscreteOperatorMatrix(OpType &A) -+ { -+ OperatorHandle Ah; -+ FormDiscreteOperatorMatrix(Ah); -+ OpType *A_ptr = Ah.Is(); -+ MFEM_VERIFY(A_ptr, "invalid OpType used"); -+ A.MakeRef(*A_ptr); -+ } - }; - - } -diff --git a/fem/bilinearform_ext.cpp b/fem/bilinearform_ext.cpp -index 50c2cf198..90a1655f4 100644 ---- a/fem/bilinearform_ext.cpp -+++ b/fem/bilinearform_ext.cpp -@@ -21,10 +21,10 @@ - namespace mfem - { - -+/// Base class for extensions to the BilinearForm class - BilinearFormExtension::BilinearFormExtension(BilinearForm *form) - : Operator(form->Size()), a(form) - { -- // empty - } - - const Operator *BilinearFormExtension::GetProlongation() const -@@ -37,812 +37,1226 @@ const Operator *BilinearFormExtension::GetRestriction() const - return a->GetRestriction(); - } - --// Data and methods for partially-assembled bilinear forms -+/// Data and methods for matrix-free bilinear forms - MFBilinearFormExtension::MFBilinearFormExtension(BilinearForm *form) -- : BilinearFormExtension(form), -- trial_fes(a->FESpace()), -- test_fes(a->FESpace()) -+ : BilinearFormExtension(form) - { -- elem_restrict = NULL; -- int_face_restrict_lex = NULL; -- bdr_face_restrict_lex = NULL; -+ Update(); - } - --void MFBilinearFormExtension::Assemble() -+void MFBilinearFormExtension::SetupRestrictionOperators(const L2FaceValues m) - { -- Array &integrators = *a->GetDBFI(); -- const int integratorCount = integrators.Size(); -- for (int i = 0; i < integratorCount; ++i) -+ if (DeviceCanUseCeed()) { return; } -+ ElementDofOrdering ordering = UsesTensorBasis(*fes) ? -+ ElementDofOrdering::LEXICOGRAPHIC : -+ ElementDofOrdering::NATIVE; -+ elem_restrict = fes->GetElementRestriction(ordering); -+ if (elem_restrict) -+ { -+ local_x.SetSize(elem_restrict->Height(), Device::GetDeviceMemoryType()); -+ local_y.SetSize(elem_restrict->Height(), Device::GetDeviceMemoryType()); -+ local_y.UseDevice(true); // ensure 'local_y = 0.0' is done on device -+ } -+ -+ // Construct face restriction operators only if the bilinear form has -+ // interior or boundary face integrators -+ if (int_face_restrict_lex == nullptr && a->GetFBFI()->Size() > 0) -+ { -+ int_face_restrict_lex = fes->GetFaceRestriction( -+ ElementDofOrdering::LEXICOGRAPHIC, -+ FaceType::Interior); -+ int_face_x.SetSize(int_face_restrict_lex->Height(), -+ Device::GetDeviceMemoryType()); -+ int_face_y.SetSize(int_face_restrict_lex->Height(), -+ Device::GetDeviceMemoryType()); -+ int_face_y.UseDevice(true); -+ } -+ -+ const bool has_bdr_integs = (a->GetBFBFI()->Size() > 0 || -+ a->GetBBFI()->Size() > 0); -+ if (bdr_face_restrict_lex == nullptr && has_bdr_integs) - { -- integrators[i]->AssembleMF(*a->FESpace()); -+ bdr_face_restrict_lex = fes->GetFaceRestriction( -+ ElementDofOrdering::LEXICOGRAPHIC, -+ FaceType::Boundary, -+ m); -+ bdr_face_x.SetSize(bdr_face_restrict_lex->Height(), -+ Device::GetDeviceMemoryType()); -+ bdr_face_y.SetSize(bdr_face_restrict_lex->Height(), -+ Device::GetDeviceMemoryType()); -+ bdr_face_y.UseDevice(true); - } - } - --void MFBilinearFormExtension::AssembleDiagonal(Vector &y) const -+void MFBilinearFormExtension::Assemble() - { -- Array &integrators = *a->GetDBFI(); -+ SetupRestrictionOperators(L2FaceValues::DoubleValued); -+ -+ Array &integrators = *a->GetDBFI(); -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AssembleMF(*fes); -+ } -+ -+ Array &bdr_integrators = *a->GetBBFI(); -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AssembleMFBoundary(*fes); -+ } -+ -+ MFEM_VERIFY(a->GetFBFI()->Size() == 0, "AddInteriorFaceIntegrator is not " -+ "currently supported in MFBilinearFormExtension"); -+ -+ MFEM_VERIFY(a->GetBFBFI()->Size() == 0, "AddBdrFaceIntegrator is not " -+ "currently supported in MFBilinearFormExtension"); -+} - -- const int iSz = integrators.Size(); -- if (elem_restrict && !DeviceCanUseCeed()) -+void MFBilinearFormExtension::AssembleDiagonal(Vector &diag) const -+{ -+ Array &integrators = *a->GetDBFI(); -+ if (elem_restrict && integrators.Size() > 0) - { -- localY = 0.0; -- for (int i = 0; i < iSz; ++i) -+ local_y = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) - { -- integrators[i]->AssembleDiagonalMF(localY); -+ integ->AssembleDiagonalMF(local_y); - } -- const ElementRestriction* H1elem_restrict = -- dynamic_cast(elem_restrict); -- if (H1elem_restrict) -+ elem_restrict->MultTransposeUnsigned(local_y, diag); -+ } -+ else -+ { -+ diag.UseDevice(true); // typically this is a large vector, so store on device -+ diag = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) - { -- H1elem_restrict->MultTransposeUnsigned(localY, y); -+ integ->AssembleDiagonalMF(diag); - } -- else -+ } -+ -+ Array &bdr_integrators = *a->GetBBFI(); -+ if (bdr_face_restrict_lex && bdr_integrators.Size() > 0) -+ { -+ bdr_face_y = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) - { -- elem_restrict->MultTranspose(localY, y); -+ integ->AssembleDiagonalMF(bdr_face_y); - } -+ bdr_face_restrict_lex->AddMultTransposeUnsigned(bdr_face_y, diag); - } - else - { -- y.UseDevice(true); // typically this is a large vector, so store on device -- y = 0.0; -- for (int i = 0; i < iSz; ++i) -+ for (BilinearFormIntegrator *integ : bdr_integrators) - { -- integrators[i]->AssembleDiagonalMF(y); -+ integ->AssembleDiagonalMF(diag); - } - } - } - --void MFBilinearFormExtension::Update() --{ -- FiniteElementSpace *fes = a->FESpace(); -- height = width = fes->GetVSize(); -- trial_fes = fes; -- test_fes = fes; -- -- elem_restrict = nullptr; -- int_face_restrict_lex = nullptr; -- bdr_face_restrict_lex = nullptr; --} -- --void MFBilinearFormExtension::FormSystemMatrix(const Array &ess_tdof_list, -- OperatorHandle &A) --{ -- Operator *oper; -- Operator::FormSystemOperator(ess_tdof_list, oper); -- A.Reset(oper); // A will own oper --} -- --void MFBilinearFormExtension::FormLinearSystem(const Array &ess_tdof_list, -- Vector &x, Vector &b, -- OperatorHandle &A, -- Vector &X, Vector &B, -- int copy_interior) --{ -- Operator *oper; -- Operator::FormLinearSystem(ess_tdof_list, x, b, oper, X, B, copy_interior); -- A.Reset(oper); // A will own oper --} -- - void MFBilinearFormExtension::Mult(const Vector &x, Vector &y) const - { -- Array &integrators = *a->GetDBFI(); -- -- const int iSz = integrators.Size(); -- if (DeviceCanUseCeed() || !elem_restrict) -+ Array &integrators = *a->GetDBFI(); -+ if (elem_restrict && integrators.Size() > 0) - { -- y.UseDevice(true); // typically this is a large vector, so store on device -- y = 0.0; -- for (int i = 0; i < iSz; ++i) -+ elem_restrict->Mult(x, local_x); -+ local_y = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) - { -- integrators[i]->AddMultMF(x, y); -+ integ->AddMultMF(local_x, local_y); - } -+ elem_restrict->MultTranspose(local_y, y); - } - else - { -- elem_restrict->Mult(x, localX); -- localY = 0.0; -- for (int i = 0; i < iSz; ++i) -+ y.UseDevice(true); // typically this is a large vector, so store on device -+ y = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) - { -- integrators[i]->AddMultMF(localX, localY); -+ integ->AddMultMF(x, y); - } -- elem_restrict->MultTranspose(localY, y); - } - -- Array &intFaceIntegrators = *a->GetFBFI(); -- const int iFISz = intFaceIntegrators.Size(); -- if (int_face_restrict_lex && iFISz>0) -+ Array &bdr_integrators = *a->GetBBFI(); -+ if (bdr_face_restrict_lex && bdr_integrators.Size() > 0) - { -- int_face_restrict_lex->Mult(x, int_face_X); -- if (int_face_X.Size()>0) -+ bdr_face_restrict_lex->Mult(x, bdr_face_x); -+ if (bdr_face_x.Size() > 0) - { -- int_face_Y = 0.0; -- for (int i = 0; i < iFISz; ++i) -+ bdr_face_y = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) - { -- intFaceIntegrators[i]->AddMultMF(int_face_X, int_face_Y); -+ integ->AddMultMF(bdr_face_x, bdr_face_y); - } -- int_face_restrict_lex->AddMultTransposeInPlace(int_face_Y, y); -+ bdr_face_restrict_lex->AddMultTransposeInPlace(bdr_face_y, y); - } - } -- -- Array &bdrFaceIntegrators = *a->GetBFBFI(); -- const int bFISz = bdrFaceIntegrators.Size(); -- if (bdr_face_restrict_lex && bFISz>0) -+ else - { -- bdr_face_restrict_lex->Mult(x, bdr_face_X); -- if (bdr_face_X.Size()>0) -+ for (BilinearFormIntegrator *integ : bdr_integrators) - { -- bdr_face_Y = 0.0; -- for (int i = 0; i < bFISz; ++i) -- { -- bdrFaceIntegrators[i]->AddMultMF(bdr_face_X, bdr_face_Y); -- } -- bdr_face_restrict_lex->AddMultTransposeInPlace(bdr_face_Y, y); -+ integ->AddMultMF(x, y); - } - } - } - --void MFBilinearFormExtension::MultTranspose(const Vector &x, Vector &y) const -+void MFBilinearFormExtension::AddMult(const Vector &x, Vector &y, -+ const double c) const - { -- Array &integrators = *a->GetDBFI(); -- const int iSz = integrators.Size(); -- if (elem_restrict) -+ Array &integrators = *a->GetDBFI(); -+ if (elem_restrict && integrators.Size() > 0) - { -- elem_restrict->Mult(x, localX); -- localY = 0.0; -- for (int i = 0; i < iSz; ++i) -+ elem_restrict->Mult(x, local_x); -+ local_y = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultMF(local_x, local_y); -+ } -+ if (c != 1.0) - { -- integrators[i]->AddMultTransposeMF(localX, localY); -+ local_y *= c; - } -- elem_restrict->MultTranspose(localY, y); -+ elem_restrict->AddMultTranspose(local_y, y); - } - else - { -- y.UseDevice(true); -- y = 0.0; -- for (int i = 0; i < iSz; ++i) -+ y.UseDevice(true); // typically this is a large vector, so store on device -+ if (c != 1.0 && integrators.Size() > 0) - { -- integrators[i]->AddMultTransposeMF(x, y); -+ temp_y.SetSize(y.Size()); -+ temp_y.UseDevice(true); -+ temp_y = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultMF(x, temp_y); -+ } -+ y.Add(c, temp_y); -+ } -+ else -+ { -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultMF(x, y); -+ } - } - } - -- Array &intFaceIntegrators = *a->GetFBFI(); -- const int iFISz = intFaceIntegrators.Size(); -- if (int_face_restrict_lex && iFISz>0) -+ Array &bdr_integrators = *a->GetBBFI(); -+ if (bdr_face_restrict_lex && bdr_integrators.Size() > 0) - { -- int_face_restrict_lex->Mult(x, int_face_X); -- if (int_face_X.Size()>0) -+ bdr_face_restrict_lex->Mult(x, bdr_face_x); -+ if (bdr_face_x.Size() > 0) - { -- int_face_Y = 0.0; -- for (int i = 0; i < iFISz; ++i) -+ bdr_face_y = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultMF(bdr_face_x, bdr_face_y); -+ } -+ if (c != 1.0) - { -- intFaceIntegrators[i]->AddMultTransposeMF(int_face_X, int_face_Y); -+ bdr_face_y *= c; - } -- int_face_restrict_lex->AddMultTransposeInPlace(int_face_Y, y); -+ bdr_face_restrict_lex->AddMultTransposeInPlace(bdr_face_y, y); - } - } -- -- Array &bdrFaceIntegrators = *a->GetBFBFI(); -- const int bFISz = bdrFaceIntegrators.Size(); -- if (bdr_face_restrict_lex && bFISz>0) -+ else - { -- bdr_face_restrict_lex->Mult(x, bdr_face_X); -- if (bdr_face_X.Size()>0) -+ if (c != 1.0 && bdr_integrators.Size() > 0) -+ { -+ temp_y.SetSize(y.Size()); -+ temp_y.UseDevice(true); -+ temp_y = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultMF(x, temp_y); -+ } -+ y.Add(c, temp_y); -+ } -+ else - { -- bdr_face_Y = 0.0; -- for (int i = 0; i < bFISz; ++i) -+ for (BilinearFormIntegrator *integ : bdr_integrators) - { -- bdrFaceIntegrators[i]->AddMultTransposeMF(bdr_face_X, bdr_face_Y); -+ integ->AddMultMF(x, y); - } -- bdr_face_restrict_lex->AddMultTransposeInPlace(bdr_face_Y, y); - } - } - } - --// Data and methods for partially-assembled bilinear forms --PABilinearFormExtension::PABilinearFormExtension(BilinearForm *form) -- : BilinearFormExtension(form), -- trial_fes(a->FESpace()), -- test_fes(a->FESpace()) --{ -- elem_restrict = NULL; -- int_face_restrict_lex = NULL; -- bdr_face_restrict_lex = NULL; --} -- --void PABilinearFormExtension::SetupRestrictionOperators(const L2FaceValues m) -+void MFBilinearFormExtension::MultTranspose(const Vector &x, Vector &y) const - { -- if ( Device::Allows(Backend::CEED_MASK) ) { return; } -- ElementDofOrdering ordering = UsesTensorBasis(*a->FESpace())? -- ElementDofOrdering::LEXICOGRAPHIC: -- ElementDofOrdering::NATIVE; -- elem_restrict = trial_fes->GetElementRestriction(ordering); -- if (elem_restrict) -- { -- localX.SetSize(elem_restrict->Height(), Device::GetDeviceMemoryType()); -- localY.SetSize(elem_restrict->Height(), Device::GetDeviceMemoryType()); -- localY.UseDevice(true); // ensure 'localY = 0.0' is done on device -- } -- -- // Construct face restriction operators only if the bilinear form has -- // interior or boundary face integrators -- if (int_face_restrict_lex == NULL && a->GetFBFI()->Size() > 0) -- { -- int_face_restrict_lex = trial_fes->GetFaceRestriction( -- ElementDofOrdering::LEXICOGRAPHIC, -- FaceType::Interior); -- int_face_X.SetSize(int_face_restrict_lex->Height(), Device::GetMemoryType()); -- int_face_Y.SetSize(int_face_restrict_lex->Height(), Device::GetMemoryType()); -- int_face_Y.UseDevice(true); // ensure 'int_face_Y = 0.0' is done on device -- } -- -- if (bdr_face_restrict_lex == NULL && a->GetBFBFI()->Size() > 0) -+ Array &integrators = *a->GetDBFI(); -+ if (elem_restrict && integrators.Size() > 0) - { -- bdr_face_restrict_lex = trial_fes->GetFaceRestriction( -- ElementDofOrdering::LEXICOGRAPHIC, -- FaceType::Boundary, -- m); -- bdr_face_X.SetSize(bdr_face_restrict_lex->Height(), Device::GetMemoryType()); -- bdr_face_Y.SetSize(bdr_face_restrict_lex->Height(), Device::GetMemoryType()); -- bdr_face_Y.UseDevice(true); // ensure 'faceBoundY = 0.0' is done on device -+ elem_restrict->Mult(x, local_x); -+ local_y = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultTransposeMF(local_x, local_y); -+ } -+ elem_restrict->MultTranspose(local_y, y); - } --} -- --void PABilinearFormExtension::Assemble() --{ -- SetupRestrictionOperators(L2FaceValues::DoubleValued); -- -- Array &integrators = *a->GetDBFI(); -- const int integratorCount = integrators.Size(); -- for (int i = 0; i < integratorCount; ++i) -+ else - { -- integrators[i]->AssemblePA(*a->FESpace()); -+ y.UseDevice(true); // typically this is a large vector, so store on device -+ y = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultTransposeMF(x, y); -+ } - } - -- MFEM_VERIFY(a->GetBBFI()->Size() == 0, -- "Partial assembly does not support AddBoundaryIntegrator yet."); -- -- Array &intFaceIntegrators = *a->GetFBFI(); -- const int intFaceIntegratorCount = intFaceIntegrators.Size(); -- for (int i = 0; i < intFaceIntegratorCount; ++i) -+ Array &bdr_integrators = *a->GetBBFI(); -+ if (bdr_face_restrict_lex && bdr_integrators.Size() > 0) - { -- intFaceIntegrators[i]->AssemblePAInteriorFaces(*a->FESpace()); -+ bdr_face_restrict_lex->Mult(x, bdr_face_x); -+ if (bdr_face_x.Size() > 0) -+ { -+ bdr_face_y = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultTransposeMF(bdr_face_x, bdr_face_y); -+ } -+ bdr_face_restrict_lex->AddMultTransposeInPlace(bdr_face_y, y); -+ } - } -- -- Array &bdrFaceIntegrators = *a->GetBFBFI(); -- const int boundFaceIntegratorCount = bdrFaceIntegrators.Size(); -- for (int i = 0; i < boundFaceIntegratorCount; ++i) -+ else - { -- bdrFaceIntegrators[i]->AssemblePABoundaryFaces(*a->FESpace()); -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultTransposeMF(x, y); -+ } - } - } - --void PABilinearFormExtension::AssembleDiagonal(Vector &y) const -+void MFBilinearFormExtension::AddMultTranspose(const Vector &x, Vector &y, -+ const double c) const - { -- Array &integrators = *a->GetDBFI(); -- -- const int iSz = integrators.Size(); -- if (elem_restrict && !DeviceCanUseCeed()) -+ Array &integrators = *a->GetDBFI(); -+ if (elem_restrict && integrators.Size() > 0) - { -- localY = 0.0; -- for (int i = 0; i < iSz; ++i) -+ elem_restrict->Mult(x, local_x); -+ local_y = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) - { -- integrators[i]->AssembleDiagonalPA(localY); -+ integ->AddMultTransposeMF(local_x, local_y); - } -- const ElementRestriction* H1elem_restrict = -- dynamic_cast(elem_restrict); -- if (H1elem_restrict) -+ if (c != 1.0) -+ { -+ local_y *= c; -+ } -+ elem_restrict->AddMultTranspose(local_y, y); -+ } -+ else -+ { -+ y.UseDevice(true); // typically this is a large vector, so store on device -+ if (c != 1.0 && integrators.Size() > 0) - { -- H1elem_restrict->MultTransposeUnsigned(localY, y); -+ temp_y.SetSize(y.Size()); -+ temp_y.UseDevice(true); -+ temp_y = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultTransposeMF(x, temp_y); -+ } -+ y.Add(c, temp_y); - } - else - { -- elem_restrict->MultTranspose(localY, y); -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultTransposeMF(x, y); -+ } -+ } -+ } -+ -+ Array &bdr_integrators = *a->GetBBFI(); -+ if (bdr_face_restrict_lex && bdr_integrators.Size() > 0) -+ { -+ bdr_face_restrict_lex->Mult(x, bdr_face_x); -+ if (bdr_face_x.Size() > 0) -+ { -+ bdr_face_y = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultTransposeMF(bdr_face_x, bdr_face_y); -+ } -+ if (c != 1.0) -+ { -+ bdr_face_y *= c; -+ } -+ bdr_face_restrict_lex->AddMultTransposeInPlace(bdr_face_y, y); - } - } - else - { -- y.UseDevice(true); // typically this is a large vector, so store on device -- y = 0.0; -- for (int i = 0; i < iSz; ++i) -+ if (c != 1.0 && bdr_integrators.Size() > 0) -+ { -+ temp_y.SetSize(y.Size()); -+ temp_y.UseDevice(true); -+ temp_y = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultTransposeMF(x, temp_y); -+ } -+ y.Add(c, temp_y); -+ } -+ else - { -- integrators[i]->AssembleDiagonalPA(y); -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultTransposeMF(x, y); -+ } - } - } - } - --void PABilinearFormExtension::Update() -+void MFBilinearFormExtension::Update() - { -- FiniteElementSpace *fes = a->FESpace(); -+ fes = a->FESpace(); - height = width = fes->GetVSize(); -- trial_fes = fes; -- test_fes = fes; - - elem_restrict = nullptr; - int_face_restrict_lex = nullptr; - bdr_face_restrict_lex = nullptr; - } - --void PABilinearFormExtension::FormSystemMatrix(const Array &ess_tdof_list, -- OperatorHandle &A) -+/// Data and methods for partially-assembled bilinear forms -+PABilinearFormExtension::PABilinearFormExtension(BilinearForm *form) -+ : MFBilinearFormExtension(form) - { -- Operator *oper; -- Operator::FormSystemOperator(ess_tdof_list, oper); -- A.Reset(oper); // A will own oper - } - --void PABilinearFormExtension::FormLinearSystem(const Array &ess_tdof_list, -- Vector &x, Vector &b, -- OperatorHandle &A, -- Vector &X, Vector &B, -- int copy_interior) -+void PABilinearFormExtension::Assemble() - { -- Operator *oper; -- Operator::FormLinearSystem(ess_tdof_list, x, b, oper, X, B, copy_interior); -- A.Reset(oper); // A will own oper -+ SetupRestrictionOperators(L2FaceValues::DoubleValued); -+ -+ Array &integrators = *a->GetDBFI(); -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AssemblePA(*fes); -+ } -+ -+ Array &bdr_integrators = *a->GetBBFI(); -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AssemblePABoundary(*fes); -+ } -+ -+ Array &int_face_integrators = *a->GetFBFI(); -+ for (BilinearFormIntegrator *integ : int_face_integrators) -+ { -+ integ->AssemblePAInteriorFaces(*fes); -+ } -+ -+ Array &bdr_face_integrators = *a->GetBFBFI(); -+ for (BilinearFormIntegrator *integ : bdr_face_integrators) -+ { -+ integ->AssemblePABoundaryFaces(*fes); -+ } - } - --void PABilinearFormExtension::Mult(const Vector &x, Vector &y) const -+void PABilinearFormExtension::AssembleDiagonal(Vector &diag) const - { -- Array &integrators = *a->GetDBFI(); -- -- const int iSz = integrators.Size(); -- if (DeviceCanUseCeed() || !elem_restrict) -+ Array &integrators = *a->GetDBFI(); -+ if (elem_restrict && integrators.Size() > 0) - { -- y.UseDevice(true); // typically this is a large vector, so store on device -- y = 0.0; -- for (int i = 0; i < iSz; ++i) -+ local_y = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) - { -- integrators[i]->AddMultPA(x, y); -+ integ->AssembleDiagonalPA(local_y); - } -+ elem_restrict->MultTransposeUnsigned(local_y, diag); - } - else - { -- elem_restrict->Mult(x, localX); -- localY = 0.0; -- for (int i = 0; i < iSz; ++i) -+ diag.UseDevice(true); // typically this is a large vector, so store on device -+ diag = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) - { -- integrators[i]->AddMultPA(localX, localY); -+ integ->AssembleDiagonalPA(diag); - } -- elem_restrict->MultTranspose(localY, y); - } - -- Array &intFaceIntegrators = *a->GetFBFI(); -- const int iFISz = intFaceIntegrators.Size(); -- if (int_face_restrict_lex && iFISz>0) -+ Array &bdr_integrators = *a->GetBBFI(); -+ if (bdr_face_restrict_lex && bdr_integrators.Size() > 0) - { -- int_face_restrict_lex->Mult(x, int_face_X); -- if (int_face_X.Size()>0) -+ bdr_face_y = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) - { -- int_face_Y = 0.0; -- for (int i = 0; i < iFISz; ++i) -- { -- intFaceIntegrators[i]->AddMultPA(int_face_X, int_face_Y); -- } -- int_face_restrict_lex->AddMultTransposeInPlace(int_face_Y, y); -+ integ->AssembleDiagonalPA(bdr_face_y); - } -+ bdr_face_restrict_lex->AddMultTransposeUnsigned(bdr_face_y, diag); - } -- -- Array &bdrFaceIntegrators = *a->GetBFBFI(); -- const int bFISz = bdrFaceIntegrators.Size(); -- if (bdr_face_restrict_lex && bFISz>0) -+ else - { -- bdr_face_restrict_lex->Mult(x, bdr_face_X); -- if (bdr_face_X.Size()>0) -+ for (BilinearFormIntegrator *integ : bdr_integrators) - { -- bdr_face_Y = 0.0; -- for (int i = 0; i < bFISz; ++i) -- { -- bdrFaceIntegrators[i]->AddMultPA(bdr_face_X, bdr_face_Y); -- } -- bdr_face_restrict_lex->AddMultTransposeInPlace(bdr_face_Y, y); -+ integ->AssembleDiagonalPA(diag); - } - } - } - --void PABilinearFormExtension::MultTranspose(const Vector &x, Vector &y) const -+void PABilinearFormExtension::Mult(const Vector &x, Vector &y) const - { -- Array &integrators = *a->GetDBFI(); -- const int iSz = integrators.Size(); -- if (elem_restrict) -+ Array &integrators = *a->GetDBFI(); -+ if (elem_restrict && integrators.Size() > 0) - { -- elem_restrict->Mult(x, localX); -- localY = 0.0; -- for (int i = 0; i < iSz; ++i) -+ elem_restrict->Mult(x, local_x); -+ local_y = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) - { -- integrators[i]->AddMultTransposePA(localX, localY); -+ integ->AddMultPA(local_x, local_y); - } -- elem_restrict->MultTranspose(localY, y); -+ elem_restrict->MultTranspose(local_y, y); - } - else - { -- y.UseDevice(true); -+ y.UseDevice(true); // typically this is a large vector, so store on device - y = 0.0; -- for (int i = 0; i < iSz; ++i) -+ for (BilinearFormIntegrator *integ : integrators) - { -- integrators[i]->AddMultTransposePA(x, y); -+ integ->AddMultPA(x, y); - } - } - -- Array &intFaceIntegrators = *a->GetFBFI(); -- const int iFISz = intFaceIntegrators.Size(); -- if (int_face_restrict_lex && iFISz>0) -+ Array &int_face_integrators = *a->GetFBFI(); -+ if (int_face_restrict_lex && int_face_integrators.Size() > 0) - { -- int_face_restrict_lex->Mult(x, int_face_X); -- if (int_face_X.Size()>0) -+ int_face_restrict_lex->Mult(x, int_face_x); -+ if (int_face_x.Size() > 0) - { -- int_face_Y = 0.0; -- for (int i = 0; i < iFISz; ++i) -+ int_face_y = 0.0; -+ for (BilinearFormIntegrator *integ : int_face_integrators) - { -- intFaceIntegrators[i]->AddMultTransposePA(int_face_X, int_face_Y); -+ integ->AddMultPA(int_face_x, int_face_y); - } -- int_face_restrict_lex->AddMultTransposeInPlace(int_face_Y, y); -+ int_face_restrict_lex->AddMultTransposeInPlace(int_face_y, y); -+ } -+ } -+ else -+ { -+ for (BilinearFormIntegrator *integ : int_face_integrators) -+ { -+ integ->AddMultPA(x, y); - } - } - -- Array &bdrFaceIntegrators = *a->GetBFBFI(); -- const int bFISz = bdrFaceIntegrators.Size(); -- if (bdr_face_restrict_lex && bFISz>0) -+ Array &bdr_integrators = *a->GetBBFI(); -+ Array &bdr_face_integrators = *a->GetBFBFI(); -+ if (bdr_face_restrict_lex && (bdr_integrators.Size() > 0 || -+ bdr_face_integrators.Size() > 0)) - { -- bdr_face_restrict_lex->Mult(x, bdr_face_X); -- if (bdr_face_X.Size()>0) -+ bdr_face_restrict_lex->Mult(x, bdr_face_x); -+ if (bdr_face_x.Size() > 0) - { -- bdr_face_Y = 0.0; -- for (int i = 0; i < bFISz; ++i) -+ bdr_face_y = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultPA(bdr_face_x, bdr_face_y); -+ } -+ for (BilinearFormIntegrator *integ : bdr_face_integrators) - { -- bdrFaceIntegrators[i]->AddMultTransposePA(bdr_face_X, bdr_face_Y); -+ integ->AddMultPA(bdr_face_x, bdr_face_y); - } -- bdr_face_restrict_lex->AddMultTransposeInPlace(bdr_face_Y, y); -+ bdr_face_restrict_lex->AddMultTransposeInPlace(bdr_face_y, y); - } - } --} -- --// Data and methods for element-assembled bilinear forms --EABilinearFormExtension::EABilinearFormExtension(BilinearForm *form) -- : PABilinearFormExtension(form), -- factorize_face_terms(false) --{ -- if (form->FESpace()->IsDGSpace() && form->FESpace()->Conforming()) -+ else - { -- factorize_face_terms = true; -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultPA(x, y); -+ } -+ for (BilinearFormIntegrator *integ : bdr_face_integrators) -+ { -+ integ->AddMultPA(x, y); -+ } - } - } - --void EABilinearFormExtension::Assemble() -+void PABilinearFormExtension::AddMult(const Vector &x, Vector &y, -+ const double c) const - { -- SetupRestrictionOperators(L2FaceValues::SingleValued); -- -- ne = trial_fes->GetMesh()->GetNE(); -- elemDofs = trial_fes->GetFE(0)->GetDof(); -- -- ea_data.SetSize(ne*elemDofs*elemDofs, Device::GetMemoryType()); -- ea_data.UseDevice(true); -- -- Array &integrators = *a->GetDBFI(); -- const int integratorCount = integrators.Size(); -- if ( integratorCount == 0 ) -+ Array &integrators = *a->GetDBFI(); -+ if (elem_restrict && integrators.Size() > 0) - { -- ea_data = 0.0; -+ elem_restrict->Mult(x, local_x); -+ local_y = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultPA(local_x, local_y); -+ } -+ if (c != 1.0) -+ { -+ local_y *= c; -+ } -+ elem_restrict->AddMultTranspose(local_y, y); - } -- for (int i = 0; i < integratorCount; ++i) -+ else - { -- integrators[i]->AssembleEA(*a->FESpace(), ea_data, i); -+ y.UseDevice(true); // typically this is a large vector, so store on device -+ if (c != 1.0 && integrators.Size() > 0) -+ { -+ temp_y.SetSize(y.Size()); -+ temp_y.UseDevice(true); -+ temp_y = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultPA(x, temp_y); -+ } -+ y.Add(c, temp_y); -+ } -+ else -+ { -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultPA(x, y); -+ } -+ } - } - -- faceDofs = trial_fes -> -- GetTraceElement(0, trial_fes->GetMesh()->GetFaceGeometry(0)) -> -- GetDof(); -- -- MFEM_VERIFY(a->GetBBFI()->Size() == 0, -- "Element assembly does not support AddBoundaryIntegrator yet."); -- -- Array &intFaceIntegrators = *a->GetFBFI(); -- const int intFaceIntegratorCount = intFaceIntegrators.Size(); -- if (intFaceIntegratorCount>0) -+ Array &int_face_integrators = *a->GetFBFI(); -+ if (int_face_restrict_lex && int_face_integrators.Size() > 0) - { -- nf_int = trial_fes->GetNFbyType(FaceType::Interior); -- ea_data_int.SetSize(2*nf_int*faceDofs*faceDofs, Device::GetMemoryType()); -- ea_data_ext.SetSize(2*nf_int*faceDofs*faceDofs, Device::GetMemoryType()); -+ int_face_restrict_lex->Mult(x, int_face_x); -+ if (int_face_x.Size() > 0) -+ { -+ int_face_y = 0.0; -+ for (BilinearFormIntegrator *integ : int_face_integrators) -+ { -+ integ->AddMultPA(int_face_x, int_face_y); -+ } -+ if (c != 1.0) -+ { -+ int_face_y *= c; -+ } -+ int_face_restrict_lex->AddMultTransposeInPlace(int_face_y, y); -+ } - } -- for (int i = 0; i < intFaceIntegratorCount; ++i) -+ else - { -- intFaceIntegrators[i]->AssembleEAInteriorFaces(*a->FESpace(), -- ea_data_int, -- ea_data_ext, -- i); -+ if (c != 1.0 && int_face_integrators.Size() > 0) -+ { -+ temp_y.SetSize(y.Size()); -+ temp_y.UseDevice(true); -+ temp_y = 0.0; -+ for (BilinearFormIntegrator *integ : int_face_integrators) -+ { -+ integ->AddMultPA(x, temp_y); -+ } -+ y.Add(c, temp_y); -+ } -+ else -+ { -+ for (BilinearFormIntegrator *integ : int_face_integrators) -+ { -+ integ->AddMultPA(x, y); -+ } -+ } - } - -- Array &bdrFaceIntegrators = *a->GetBFBFI(); -- const int boundFaceIntegratorCount = bdrFaceIntegrators.Size(); -- if (boundFaceIntegratorCount>0) -- { -- nf_bdr = trial_fes->GetNFbyType(FaceType::Boundary); -- ea_data_bdr.SetSize(nf_bdr*faceDofs*faceDofs, Device::GetMemoryType()); -- ea_data_bdr = 0.0; -- } -- for (int i = 0; i < boundFaceIntegratorCount; ++i) -+ Array &bdr_integrators = *a->GetBBFI(); -+ Array &bdr_face_integrators = *a->GetBFBFI(); -+ if (bdr_face_restrict_lex && (bdr_integrators.Size() > 0 || -+ bdr_face_integrators.Size() > 0)) - { -- bdrFaceIntegrators[i]->AssembleEABoundaryFaces(*a->FESpace(),ea_data_bdr,i); -+ bdr_face_restrict_lex->Mult(x, bdr_face_x); -+ if (bdr_face_x.Size() > 0) -+ { -+ bdr_face_y = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultPA(bdr_face_x, bdr_face_y); -+ } -+ for (BilinearFormIntegrator *integ : bdr_face_integrators) -+ { -+ integ->AddMultPA(bdr_face_x, bdr_face_y); -+ } -+ if (c != 1.0) -+ { -+ bdr_face_y *= c; -+ } -+ bdr_face_restrict_lex->AddMultTransposeInPlace(bdr_face_y, y); -+ } - } -+ else -+ { -+ if (c != 1.0 && (bdr_integrators.Size() > 0 || bdr_face_integrators.Size() > 0)) -+ { -+ temp_y.SetSize(y.Size()); -+ temp_y.UseDevice(true); -+ temp_y = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultPA(x, temp_y); -+ } -+ for (BilinearFormIntegrator *integ : bdr_face_integrators) -+ { -+ integ->AddMultPA(x, temp_y); -+ } -+ y.Add(c, temp_y); -+ } -+ else -+ { -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultPA(x, y); -+ } -+ for (BilinearFormIntegrator *integ : bdr_face_integrators) -+ { -+ integ->AddMultPA(x, y); -+ } -+ } -+ } -+} - -- if (factorize_face_terms && int_face_restrict_lex) -+void PABilinearFormExtension::MultTranspose(const Vector &x, Vector &y) const -+{ -+ Array &integrators = *a->GetDBFI(); -+ if (elem_restrict && integrators.Size() > 0) - { -- auto restFint = dynamic_cast(int_face_restrict_lex); -- restFint->AddFaceMatricesToElementMatrices(ea_data_int, ea_data); -+ elem_restrict->Mult(x, local_x); -+ local_y = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultTransposePA(local_x, local_y); -+ } -+ elem_restrict->MultTranspose(local_y, y); - } -- if (factorize_face_terms && bdr_face_restrict_lex) -+ else -+ { -+ y.UseDevice(true); -+ y = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultTransposePA(x, y); -+ } -+ } -+ -+ Array &int_face_integrators = *a->GetFBFI(); -+ if (int_face_restrict_lex && int_face_integrators.Size() > 0) - { -- auto restFbdr = dynamic_cast(bdr_face_restrict_lex); -- restFbdr->AddFaceMatricesToElementMatrices(ea_data_bdr, ea_data); -+ int_face_restrict_lex->Mult(x, int_face_x); -+ if (int_face_x.Size() > 0) -+ { -+ int_face_y = 0.0; -+ for (BilinearFormIntegrator *integ : int_face_integrators) -+ { -+ integ->AddMultTransposePA(int_face_x, int_face_y); -+ } -+ int_face_restrict_lex->AddMultTransposeInPlace(int_face_y, y); -+ } -+ } -+ else -+ { -+ for (BilinearFormIntegrator *integ : int_face_integrators) -+ { -+ integ->AddMultTransposePA(x, y); -+ } -+ } -+ -+ Array &bdr_integrators = *a->GetBBFI(); -+ Array &bdr_face_integrators = *a->GetBFBFI(); -+ if (bdr_face_restrict_lex && (bdr_integrators.Size() > 0 || -+ bdr_face_integrators.Size() > 0)) -+ { -+ bdr_face_restrict_lex->Mult(x, bdr_face_x); -+ if (bdr_face_x.Size() > 0) -+ { -+ bdr_face_y = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultTransposePA(bdr_face_x, bdr_face_y); -+ } -+ for (BilinearFormIntegrator *integ : bdr_face_integrators) -+ { -+ integ->AddMultTransposePA(bdr_face_x, bdr_face_y); -+ } -+ bdr_face_restrict_lex->AddMultTransposeInPlace(bdr_face_y, y); -+ } -+ } -+ else -+ { -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultTransposePA(x, y); -+ } -+ for (BilinearFormIntegrator *integ : bdr_face_integrators) -+ { -+ integ->AddMultTransposePA(x, y); -+ } - } - } - --void EABilinearFormExtension::Mult(const Vector &x, Vector &y) const -+void PABilinearFormExtension::AddMultTranspose(const Vector &x, Vector &y, -+ const double c) const - { -- // Apply the Element Restriction -- const bool useRestrict = !DeviceCanUseCeed() && elem_restrict; -- if (!useRestrict) -+ Array &integrators = *a->GetDBFI(); -+ if (elem_restrict && integrators.Size() > 0) -+ { -+ elem_restrict->Mult(x, local_x); -+ local_y = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultTransposePA(local_x, local_y); -+ } -+ if (c != 1.0) -+ { -+ local_y *= c; -+ } -+ elem_restrict->AddMultTranspose(local_y, y); -+ } -+ else - { - y.UseDevice(true); // typically this is a large vector, so store on device -- y = 0.0; -+ if (c != 1.0 && integrators.Size() > 0) -+ { -+ temp_y.SetSize(y.Size()); -+ temp_y.UseDevice(true); -+ temp_y = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultTransposePA(x, temp_y); -+ } -+ y.Add(c, temp_y); -+ } -+ else -+ { -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultTransposePA(x, y); -+ } -+ } -+ } -+ -+ Array &int_face_integrators = *a->GetFBFI(); -+ if (int_face_restrict_lex && int_face_integrators.Size() > 0) -+ { -+ int_face_restrict_lex->Mult(x, int_face_x); -+ if (int_face_x.Size() > 0) -+ { -+ int_face_y = 0.0; -+ for (BilinearFormIntegrator *integ : int_face_integrators) -+ { -+ integ->AddMultTransposePA(int_face_x, int_face_y); -+ } -+ if (c != 1.0) -+ { -+ int_face_y *= c; -+ } -+ int_face_restrict_lex->AddMultTransposeInPlace(int_face_y, y); -+ } - } - else - { -- elem_restrict->Mult(x, localX); -- localY = 0.0; -+ if (c != 1.0 && int_face_integrators.Size() > 0) -+ { -+ temp_y.SetSize(y.Size()); -+ temp_y.UseDevice(true); -+ temp_y = 0.0; -+ for (BilinearFormIntegrator *integ : int_face_integrators) -+ { -+ integ->AddMultTransposePA(x, temp_y); -+ } -+ y.Add(c, temp_y); -+ } -+ else -+ { -+ for (BilinearFormIntegrator *integ : int_face_integrators) -+ { -+ integ->AddMultTransposePA(x, y); -+ } -+ } -+ } -+ -+ Array &bdr_integrators = *a->GetBBFI(); -+ Array &bdr_face_integrators = *a->GetBFBFI(); -+ if (bdr_face_restrict_lex && (bdr_integrators.Size() > 0 || -+ bdr_face_integrators.Size() > 0)) -+ { -+ bdr_face_restrict_lex->Mult(x, bdr_face_x); -+ if (bdr_face_x.Size() > 0) -+ { -+ bdr_face_y = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultTransposePA(bdr_face_x, bdr_face_y); -+ } -+ for (BilinearFormIntegrator *integ : bdr_face_integrators) -+ { -+ integ->AddMultTransposePA(bdr_face_x, bdr_face_y); -+ } -+ if (c != 1.0) -+ { -+ bdr_face_y *= c; -+ } -+ bdr_face_restrict_lex->AddMultTransposeInPlace(bdr_face_y, y); -+ } -+ } -+ else -+ { -+ if (c != 1.0 && (bdr_integrators.Size() > 0 || bdr_face_integrators.Size() > 0)) -+ { -+ temp_y.SetSize(y.Size()); -+ temp_y.UseDevice(true); -+ temp_y = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultTransposePA(x, temp_y); -+ } -+ for (BilinearFormIntegrator *integ : bdr_face_integrators) -+ { -+ integ->AddMultTransposePA(x, temp_y); -+ } -+ y.Add(c, temp_y); -+ } -+ else -+ { -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultTransposePA(x, y); -+ } -+ for (BilinearFormIntegrator *integ : bdr_face_integrators) -+ { -+ integ->AddMultTransposePA(x, y); -+ } -+ } -+ } -+} -+ -+/// Data and methods for element-assembled bilinear forms -+EABilinearFormExtension::EABilinearFormExtension(BilinearForm *form) -+ : PABilinearFormExtension(form), -+ factorize_face_terms(fes->IsDGSpace() && fes->Conforming()) -+{ -+} -+ -+void EABilinearFormExtension::Assemble() -+{ -+ SetupRestrictionOperators(L2FaceValues::SingleValued); -+ -+ ne = fes->GetNE(); -+ elem_dofs = fes->GetFE(0)->GetDof(); -+ -+ Array &integrators = *a->GetDBFI(); -+ if (integrators.Size() > 0) -+ { -+ ea_data.SetSize(ne * elem_dofs * elem_dofs, Device::GetMemoryType()); -+ ea_data.UseDevice(true); -+ ea_data = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AssembleEA(*fes, ea_data); -+ } -+ } -+ -+ MFEM_VERIFY(a->GetBBFI()->Size() == 0, -+ "Element assembly does not support AddBoundaryIntegrator yet."); -+ -+ nf_int = fes->GetNFbyType(FaceType::Interior); -+ nf_bdr = fes->GetNFbyType(FaceType::Boundary); -+ face_dofs = fes->GetTraceElement(0, -+ fes->GetMesh()->GetFaceGeometry(0))->GetDof(); -+ -+ Array &int_face_integrators = *a->GetFBFI(); -+ if (int_face_integrators.Size() > 0) -+ { -+ ea_data_int.SetSize(2 * nf_int * face_dofs * face_dofs, -+ Device::GetMemoryType()); -+ ea_data_ext.SetSize(2 * nf_int * face_dofs * face_dofs, -+ Device::GetMemoryType()); -+ ea_data_int = 0.0; -+ ea_data_ext = 0.0; -+ for (BilinearFormIntegrator *integ : int_face_integrators) -+ { -+ integ->AssembleEAInteriorFaces(*fes, ea_data_int, ea_data_ext); -+ } -+ } -+ -+ Array &bdr_face_integrators = *a->GetBFBFI(); -+ if (bdr_face_integrators.Size() > 0) -+ { -+ ea_data_bdr.SetSize(nf_bdr * face_dofs * face_dofs, Device::GetMemoryType()); -+ ea_data_bdr = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_face_integrators) -+ { -+ integ->AssembleEABoundaryFaces(*fes, ea_data_bdr); -+ } -+ } -+ -+ if (factorize_face_terms && int_face_restrict_lex) -+ { -+ auto l2_face_restrict = dynamic_cast -+ (*int_face_restrict_lex); -+ l2_face_restrict.AddFaceMatricesToElementMatrices(ea_data_int, ea_data); - } -- // Apply the Element Matrices -+ if (factorize_face_terms && bdr_face_restrict_lex) - { -- const int NDOFS = elemDofs; -- auto X = Reshape(useRestrict?localX.Read():x.Read(), NDOFS, ne); -- auto Y = Reshape(useRestrict?localY.ReadWrite():y.ReadWrite(), NDOFS, ne); -- auto A = Reshape(ea_data.Read(), NDOFS, NDOFS, ne); -- mfem::forall(ne*NDOFS, [=] MFEM_HOST_DEVICE (int glob_j) -+ auto l2_face_restrict = dynamic_cast -+ (*bdr_face_restrict_lex); -+ l2_face_restrict.AddFaceMatricesToElementMatrices(ea_data_bdr, ea_data); -+ } -+} -+ -+void EABilinearFormExtension::Mult(const Vector &x, Vector &y) const -+{ -+ Array &integrators = *a->GetDBFI(); -+ auto Apply = [](const int nelem, const int ndofs, const Vector &data, -+ const Vector &x, Vector &y) -+ { -+ auto X = Reshape(x.Read(), ndofs, nelem); -+ auto Y = Reshape(y.ReadWrite(), ndofs, nelem); -+ auto A = Reshape(data.Read(), ndofs, ndofs, nelem); -+ mfem::forall(nelem * ndofs, [=] MFEM_HOST_DEVICE (int k) - { -- const int e = glob_j/NDOFS; -- const int j = glob_j%NDOFS; -+ const int e = k / ndofs; -+ const int j = k % ndofs; - double res = 0.0; -- for (int i = 0; i < NDOFS; i++) -+ for (int i = 0; i < ndofs; i++) - { -- res += A(i, j, e)*X(i, e); -+ res += A(i, j, e) * X(i, e); - } - Y(j, e) += res; - }); -- // Apply the Element Restriction transposed -- if (useRestrict) -+ }; -+ if (elem_restrict) -+ { -+ if (integrators.Size() > 0) - { -- elem_restrict->MultTranspose(localY, y); -+ elem_restrict->Mult(x, local_x); -+ local_y = 0.0; -+ Apply(ne, elem_dofs, ea_data, local_x, local_y); -+ elem_restrict->MultTranspose(local_y, y); -+ } -+ else -+ { -+ y = 0.0; -+ } -+ } -+ else -+ { -+ y.UseDevice(true); // typically this is a large vector, so store on device -+ y = 0.0; -+ if (integrators.Size() > 0) -+ { -+ Apply(ne, elem_dofs, ea_data, x, y); - } - } - - // Treatment of interior faces -- Array &intFaceIntegrators = *a->GetFBFI(); -- const int iFISz = intFaceIntegrators.Size(); -- if (int_face_restrict_lex && iFISz>0) -- { -- // Apply the Interior Face Restriction -- int_face_restrict_lex->Mult(x, int_face_X); -- if (int_face_X.Size()>0) -- { -- int_face_Y = 0.0; -- // Apply the interior face matrices -- const int NDOFS = faceDofs; -- auto X = Reshape(int_face_X.Read(), NDOFS, 2, nf_int); -- auto Y = Reshape(int_face_Y.ReadWrite(), NDOFS, 2, nf_int); -+ Array &int_face_integrators = *a->GetFBFI(); -+ auto ApplyIntFace = [](const int nface, const int ndofs, const Vector &data, -+ const Vector &x, Vector &y) -+ { -+ auto X = Reshape(x.Read(), ndofs, 2, nface); -+ auto Y = Reshape(y.ReadWrite(), ndofs, 2, nface); -+ auto A = Reshape(data.Read(), ndofs, ndofs, 2, nface); -+ mfem::forall(nface * ndofs, [=] MFEM_HOST_DEVICE (int k) -+ { -+ const int f = k / ndofs; -+ const int j = k % ndofs; -+ double res = 0.0; -+ for (int i = 0; i < ndofs; i++) -+ { -+ res += A(i, j, 0, f) * X(i, 0, f); -+ } -+ Y(j, 0, f) += res; -+ res = 0.0; -+ for (int i = 0; i < ndofs; i++) -+ { -+ res += A(i, j, 1, f) * X(i, 1, f); -+ } -+ Y(j, 1, f) += res; -+ }); -+ }; -+ auto ApplyExtFace = [](const int nface, const int ndofs, const Vector &data, -+ const Vector &x, Vector &y) -+ { -+ auto X = Reshape(x.Read(), ndofs, 2, nface); -+ auto Y = Reshape(y.ReadWrite(), ndofs, 2, nface); -+ auto A = Reshape(data.Read(), ndofs, ndofs, 2, nface); -+ mfem::forall(nface * ndofs, [=] MFEM_HOST_DEVICE (int k) -+ { -+ const int f = k / ndofs; -+ const int j = k % ndofs; -+ double res = 0.0; -+ for (int i = 0; i < ndofs; i++) -+ { -+ res += A(i, j, 0, f) * X(i, 0, f); -+ } -+ Y(j, 1, f) += res; -+ res = 0.0; -+ for (int i = 0; i < ndofs; i++) -+ { -+ res += A(i, j, 1, f) * X(i, 1, f); -+ } -+ Y(j, 0, f) += res; -+ }); -+ }; -+ if (int_face_restrict_lex && int_face_integrators.Size() > 0) -+ { -+ int_face_restrict_lex->Mult(x, int_face_x); -+ if (int_face_x.Size() > 0) -+ { -+ int_face_y = 0.0; - if (!factorize_face_terms) - { -- auto A_int = Reshape(ea_data_int.Read(), NDOFS, NDOFS, 2, nf_int); -- mfem::forall(nf_int*NDOFS, [=] MFEM_HOST_DEVICE (int glob_j) -- { -- const int f = glob_j/NDOFS; -- const int j = glob_j%NDOFS; -- double res = 0.0; -- for (int i = 0; i < NDOFS; i++) -- { -- res += A_int(i, j, 0, f)*X(i, 0, f); -- } -- Y(j, 0, f) += res; -- res = 0.0; -- for (int i = 0; i < NDOFS; i++) -- { -- res += A_int(i, j, 1, f)*X(i, 1, f); -- } -- Y(j, 1, f) += res; -- }); -- } -- auto A_ext = Reshape(ea_data_ext.Read(), NDOFS, NDOFS, 2, nf_int); -- mfem::forall(nf_int*NDOFS, [=] MFEM_HOST_DEVICE (int glob_j) -- { -- const int f = glob_j/NDOFS; -- const int j = glob_j%NDOFS; -- double res = 0.0; -- for (int i = 0; i < NDOFS; i++) -- { -- res += A_ext(i, j, 0, f)*X(i, 0, f); -- } -- Y(j, 1, f) += res; -- res = 0.0; -- for (int i = 0; i < NDOFS; i++) -- { -- res += A_ext(i, j, 1, f)*X(i, 1, f); -- } -- Y(j, 0, f) += res; -- }); -- // Apply the Interior Face Restriction transposed -- int_face_restrict_lex->AddMultTransposeInPlace(int_face_Y, y); -+ ApplyIntFace(nf_int, face_dofs, ea_data_int, int_face_x, int_face_y); -+ } -+ ApplyExtFace(nf_int, face_dofs, ea_data_ext, int_face_x, int_face_y); -+ int_face_restrict_lex->AddMultTransposeInPlace(int_face_y, y); - } - } - - // Treatment of boundary faces -- Array &bdrFaceIntegrators = *a->GetBFBFI(); -- const int bFISz = bdrFaceIntegrators.Size(); -- if (!factorize_face_terms && bdr_face_restrict_lex && bFISz>0) -- { -- // Apply the Boundary Face Restriction -- bdr_face_restrict_lex->Mult(x, bdr_face_X); -- if (bdr_face_X.Size()>0) -- { -- bdr_face_Y = 0.0; -- // Apply the boundary face matrices -- const int NDOFS = faceDofs; -- auto X = Reshape(bdr_face_X.Read(), NDOFS, nf_bdr); -- auto Y = Reshape(bdr_face_Y.ReadWrite(), NDOFS, nf_bdr); -- auto A = Reshape(ea_data_bdr.Read(), NDOFS, NDOFS, nf_bdr); -- mfem::forall(nf_bdr*NDOFS, [=] MFEM_HOST_DEVICE (int glob_j) -- { -- const int f = glob_j/NDOFS; -- const int j = glob_j%NDOFS; -- double res = 0.0; -- for (int i = 0; i < NDOFS; i++) -- { -- res += A(i, j, f)*X(i, f); -- } -- Y(j, f) += res; -- }); -- // Apply the Boundary Face Restriction transposed -- bdr_face_restrict_lex->AddMultTransposeInPlace(bdr_face_Y, y); -+ Array &bdr_face_integrators = *a->GetBFBFI(); -+ if (!factorize_face_terms && bdr_face_restrict_lex && -+ bdr_face_integrators.Size() > 0) -+ { -+ bdr_face_restrict_lex->Mult(x, bdr_face_x); -+ if (bdr_face_x.Size() > 0) -+ { -+ bdr_face_y = 0.0; -+ Apply(nf_bdr, face_dofs, ea_data_bdr, bdr_face_x, bdr_face_y); -+ bdr_face_restrict_lex->AddMultTransposeInPlace(bdr_face_y, y); - } - } - } - - void EABilinearFormExtension::MultTranspose(const Vector &x, Vector &y) const - { -- // Apply the Element Restriction -- const bool useRestrict = !DeviceCanUseCeed() && elem_restrict; -- if (!useRestrict) -- { -- y.UseDevice(true); // typically this is a large vector, so store on device -- y = 0.0; -- } -- else -- { -- elem_restrict->Mult(x, localX); -- localY = 0.0; -- } -- // Apply the Element Matrices transposed -- { -- const int NDOFS = elemDofs; -- auto X = Reshape(useRestrict?localX.Read():x.Read(), NDOFS, ne); -- auto Y = Reshape(useRestrict?localY.ReadWrite():y.ReadWrite(), NDOFS, ne); -- auto A = Reshape(ea_data.Read(), NDOFS, NDOFS, ne); -- mfem::forall(ne*NDOFS, [=] MFEM_HOST_DEVICE (int glob_j) -+ Array &integrators = *a->GetDBFI(); -+ auto ApplyTranspose = [](const int nelem, const int ndofs, const Vector &data, -+ const Vector &x, Vector &y) -+ { -+ auto X = Reshape(x.Read(), ndofs, nelem); -+ auto Y = Reshape(y.ReadWrite(), ndofs, nelem); -+ auto A = Reshape(data.Read(), ndofs, ndofs, nelem); -+ mfem::forall(nelem * ndofs, [=] MFEM_HOST_DEVICE (int k) - { -- const int e = glob_j/NDOFS; -- const int j = glob_j%NDOFS; -+ const int e = k / ndofs; -+ const int j = k % ndofs; - double res = 0.0; -- for (int i = 0; i < NDOFS; i++) -+ for (int i = 0; i < ndofs; i++) - { -- res += A(j, i, e)*X(i, e); -+ res += A(j, i, e) * X(i, e); - } - Y(j, e) += res; - }); -- // Apply the Element Restriction transposed -- if (useRestrict) -+ }; -+ if (elem_restrict) -+ { -+ if (integrators.Size() > 0) -+ { -+ elem_restrict->Mult(x, local_x); -+ local_y = 0.0; -+ ApplyTranspose(ne, elem_dofs, ea_data, local_x, local_y); -+ elem_restrict->MultTranspose(local_y, y); -+ } -+ else - { -- elem_restrict->MultTranspose(localY, y); -+ y = 0.0; -+ } -+ } -+ else -+ { -+ y.UseDevice(true); // typically this is a large vector, so store on device -+ y = 0.0; -+ if (integrators.Size() > 0) -+ { -+ ApplyTranspose(ne, elem_dofs, ea_data, x, y); - } - } - - // Treatment of interior faces -- Array &intFaceIntegrators = *a->GetFBFI(); -- const int iFISz = intFaceIntegrators.Size(); -- if (int_face_restrict_lex && iFISz>0) -- { -- // Apply the Interior Face Restriction -- int_face_restrict_lex->Mult(x, int_face_X); -- if (int_face_X.Size()>0) -- { -- int_face_Y = 0.0; -- // Apply the interior face matrices transposed -- const int NDOFS = faceDofs; -- auto X = Reshape(int_face_X.Read(), NDOFS, 2, nf_int); -- auto Y = Reshape(int_face_Y.ReadWrite(), NDOFS, 2, nf_int); -+ Array &int_face_integrators = *a->GetFBFI(); -+ auto ApplyIntFaceTranspose = [](const int nface, const int ndofs, -+ const Vector &data, const Vector &x, Vector &y) -+ { -+ auto X = Reshape(x.Read(), ndofs, 2, nface); -+ auto Y = Reshape(y.ReadWrite(), ndofs, 2, nface); -+ auto A = Reshape(data.Read(), ndofs, ndofs, 2, nface); -+ mfem::forall(nface * ndofs, [=] MFEM_HOST_DEVICE (int k) -+ { -+ const int f = k / ndofs; -+ const int j = k % ndofs; -+ double res = 0.0; -+ for (int i = 0; i < ndofs; i++) -+ { -+ res += A(j, i, 0, f) * X(i, 0, f); -+ } -+ Y(j, 0, f) += res; -+ res = 0.0; -+ for (int i = 0; i < ndofs; i++) -+ { -+ res += A(j, i, 1, f) * X(i, 1, f); -+ } -+ Y(j, 1, f) += res; -+ }); -+ }; -+ auto ApplyExtFaceTranspose = [](const int nface, const int ndofs, -+ const Vector &data, const Vector &x, Vector &y) -+ { -+ auto X = Reshape(x.Read(), ndofs, 2, nface); -+ auto Y = Reshape(y.ReadWrite(), ndofs, 2, nface); -+ auto A = Reshape(data.Read(), ndofs, ndofs, 2, nface); -+ mfem::forall(nface * ndofs, [=] MFEM_HOST_DEVICE (int k) -+ { -+ const int f = k / ndofs; -+ const int j = k % ndofs; -+ double res = 0.0; -+ for (int i = 0; i < ndofs; i++) -+ { -+ res += A(j, i, 1, f) * X(i, 0, f); -+ } -+ Y(j, 1, f) += res; -+ res = 0.0; -+ for (int i = 0; i < ndofs; i++) -+ { -+ res += A(j, i, 0, f) * X(i, 1, f); -+ } -+ Y(j, 0, f) += res; -+ }); -+ }; -+ if (int_face_restrict_lex && int_face_integrators.Size() > 0) -+ { -+ int_face_restrict_lex->Mult(x, int_face_x); -+ if (int_face_x.Size() > 0) -+ { -+ int_face_y = 0.0; - if (!factorize_face_terms) - { -- auto A_int = Reshape(ea_data_int.Read(), NDOFS, NDOFS, 2, nf_int); -- mfem::forall(nf_int*NDOFS, [=] MFEM_HOST_DEVICE (int glob_j) -- { -- const int f = glob_j/NDOFS; -- const int j = glob_j%NDOFS; -- double res = 0.0; -- for (int i = 0; i < NDOFS; i++) -- { -- res += A_int(j, i, 0, f)*X(i, 0, f); -- } -- Y(j, 0, f) += res; -- res = 0.0; -- for (int i = 0; i < NDOFS; i++) -- { -- res += A_int(j, i, 1, f)*X(i, 1, f); -- } -- Y(j, 1, f) += res; -- }); -- } -- auto A_ext = Reshape(ea_data_ext.Read(), NDOFS, NDOFS, 2, nf_int); -- mfem::forall(nf_int*NDOFS, [=] MFEM_HOST_DEVICE (int glob_j) -- { -- const int f = glob_j/NDOFS; -- const int j = glob_j%NDOFS; -- double res = 0.0; -- for (int i = 0; i < NDOFS; i++) -- { -- res += A_ext(j, i, 1, f)*X(i, 0, f); -- } -- Y(j, 1, f) += res; -- res = 0.0; -- for (int i = 0; i < NDOFS; i++) -- { -- res += A_ext(j, i, 0, f)*X(i, 1, f); -- } -- Y(j, 0, f) += res; -- }); -- // Apply the Interior Face Restriction transposed -- int_face_restrict_lex->AddMultTransposeInPlace(int_face_Y, y); -+ ApplyIntFaceTranspose(nf_int, face_dofs, ea_data_int, int_face_x, int_face_y); -+ } -+ ApplyExtFaceTranspose(nf_int, face_dofs, ea_data_ext, int_face_x, int_face_y); -+ int_face_restrict_lex->AddMultTransposeInPlace(int_face_y, y); - } - } - - // Treatment of boundary faces -- Array &bdrFaceIntegrators = *a->GetBFBFI(); -- const int bFISz = bdrFaceIntegrators.Size(); -- if (!factorize_face_terms && bdr_face_restrict_lex && bFISz>0) -- { -- // Apply the Boundary Face Restriction -- bdr_face_restrict_lex->Mult(x, bdr_face_X); -- if (bdr_face_X.Size()>0) -- { -- bdr_face_Y = 0.0; -- // Apply the boundary face matrices transposed -- const int NDOFS = faceDofs; -- auto X = Reshape(bdr_face_X.Read(), NDOFS, nf_bdr); -- auto Y = Reshape(bdr_face_Y.ReadWrite(), NDOFS, nf_bdr); -- auto A = Reshape(ea_data_bdr.Read(), NDOFS, NDOFS, nf_bdr); -- mfem::forall(nf_bdr*NDOFS, [=] MFEM_HOST_DEVICE (int glob_j) -- { -- const int f = glob_j/NDOFS; -- const int j = glob_j%NDOFS; -- double res = 0.0; -- for (int i = 0; i < NDOFS; i++) -- { -- res += A(j, i, f)*X(i, f); -- } -- Y(j, f) += res; -- }); -- // Apply the Boundary Face Restriction transposed -- bdr_face_restrict_lex->AddMultTransposeInPlace(bdr_face_Y, y); -+ Array &bdr_face_integrators = *a->GetBFBFI(); -+ if (!factorize_face_terms && bdr_face_restrict_lex && -+ bdr_face_integrators.Size() > 0) -+ { -+ bdr_face_restrict_lex->Mult(x, bdr_face_x); -+ if (bdr_face_x.Size() > 0) -+ { -+ bdr_face_y = 0.0; -+ ApplyTranspose(nf_bdr, face_dofs, ea_data_bdr, bdr_face_x, bdr_face_y); -+ bdr_face_restrict_lex->AddMultTransposeInPlace(bdr_face_y, y); - } - } - } - --// Data and methods for fully-assembled bilinear forms -+/// Data and methods for fully-assembled bilinear forms - FABilinearFormExtension::FABilinearFormExtension(BilinearForm *form) - : EABilinearFormExtension(form), - mat(a->mat) - { - #ifdef MFEM_USE_MPI -- ParFiniteElementSpace *pfes = nullptr; -- if ( a->GetFBFI()->Size()>0 && -- (pfes = dynamic_cast(form->FESpace())) ) -+ const ParFiniteElementSpace *pfes = nullptr; -+ if (a->GetFBFI()->Size() > 0 && -+ (pfes = dynamic_cast(form->FESpace()))) - { -- pfes->ExchangeFaceNbrData(); -+ const_cast(pfes)->ExchangeFaceNbrData(); - } - #endif - } -@@ -850,20 +1264,20 @@ FABilinearFormExtension::FABilinearFormExtension(BilinearForm *form) - void FABilinearFormExtension::Assemble() - { - EABilinearFormExtension::Assemble(); -- FiniteElementSpace &fes = *a->FESpace(); -- int width = fes.GetVSize(); -- int height = fes.GetVSize(); -+ -+ int width = fes->GetVSize(); -+ int height = fes->GetVSize(); - bool keep_nbr_block = false; - #ifdef MFEM_USE_MPI -- ParFiniteElementSpace *pfes = nullptr; -- if ( a->GetFBFI()->Size()>0 && -- (pfes = dynamic_cast(&fes)) ) -+ const ParFiniteElementSpace *pfes = nullptr; -+ if (a->GetFBFI()->Size() > 0 && -+ (pfes = dynamic_cast(fes))) - { -- pfes->ExchangeFaceNbrData(); -+ const_cast(pfes)->ExchangeFaceNbrData(); - width += pfes->GetFaceNbrVSize(); - dg_x.SetSize(width); - ParBilinearForm *pb = nullptr; -- if ((pb = dynamic_cast(a)) && (pb->keep_nbr_block)) -+ if ((pb = dynamic_cast(a)) && pb->keep_nbr_block) - { - height += pfes->GetFaceNbrVSize(); - dg_y.SetSize(height); -@@ -873,15 +1287,14 @@ void FABilinearFormExtension::Assemble() - #endif - if (a->mat) // We reuse the sparse matrix memory - { -- if (fes.IsDGSpace()) -+ if (fes->IsDGSpace()) - { -- const L2ElementRestriction *restE = -- static_cast(elem_restrict); -- const L2FaceRestriction *restF = -- static_cast(int_face_restrict_lex); -- MFEM_VERIFY( -- fes.Conforming(), -- "Full Assembly not yet supported on NCMesh."); -+ const auto *restE = -+ static_cast(elem_restrict); -+ const auto *restF = -+ static_cast(int_face_restrict_lex); -+ MFEM_VERIFY(fes->Conforming(), -+ "Full Assembly not yet supported on NCMesh."); - // 1. Fill J and Data - // 1.1 Fill J and Data with Elem ea_data - restE->FillJAndData(ea_data, *mat); -@@ -897,8 +1310,8 @@ void FABilinearFormExtension::Assemble() - } - else - { -- const ElementRestriction &rest = -- static_cast(*elem_restrict); -+ const auto &rest = -+ static_cast(*elem_restrict); - rest.FillJAndData(ea_data, *mat); - } - } -@@ -906,15 +1319,14 @@ void FABilinearFormExtension::Assemble() - { - mat = new SparseMatrix; - mat->OverrideSize(height, width); -- if (fes.IsDGSpace()) -- { -- const L2ElementRestriction *restE = -- static_cast(elem_restrict); -- const L2FaceRestriction *restF = -- static_cast(int_face_restrict_lex); -- MFEM_VERIFY( -- fes.Conforming(), -- "Full Assembly not yet supported on NCMesh."); -+ if (fes->IsDGSpace()) -+ { -+ const auto *restE = -+ static_cast(elem_restrict); -+ const auto *restF = -+ static_cast(int_face_restrict_lex); -+ MFEM_VERIFY(fes->Conforming(), -+ "Full Assembly not yet supported on NCMesh."); - // 1. Fill I - mat->GetMemoryI().New(height+1, mat->GetMemoryI().GetMemoryType()); - // 1.1 Increment with restE -@@ -947,87 +1359,32 @@ void FABilinearFormExtension::Assemble() - } - I[0] = 0; - } -- else // continuous Galerkin case -+ else - { -- const ElementRestriction &rest = -- static_cast(*elem_restrict); -+ const auto &rest = -+ static_cast(*elem_restrict); - rest.FillSparseMatrix(ea_data, *mat); - } - a->mat = mat; - } -- if ( a->sort_sparse_matrix ) -+ if (a->sort_sparse_matrix) - { - a->mat->SortColumnIndices(); - } - } - -- --void FABilinearFormExtension::RAP(OperatorHandle &A) --{ --#ifdef MFEM_USE_MPI -- if ( auto pa = dynamic_cast(a) ) -- { -- pa->ParallelRAP(*pa->mat, A); -- } -- else --#endif -- { -- a->SerialRAP(A); -- } --} -- --void FABilinearFormExtension::EliminateBC(const Array &ess_dofs, -- OperatorHandle &A) --{ -- MFEM_VERIFY(a->diag_policy == DiagonalPolicy::DIAG_ONE, -- "Only DiagonalPolicy::DIAG_ONE supported with" -- " FABilinearFormExtension."); --#ifdef MFEM_USE_MPI -- if ( dynamic_cast(a) ) -- { -- A.As()->EliminateBC(ess_dofs, -- DiagonalPolicy::DIAG_ONE); -- } -- else --#endif -- { -- A.As()->EliminateBC(ess_dofs, -- DiagonalPolicy::DIAG_ONE); -- } --} -- --void FABilinearFormExtension::FormSystemMatrix(const Array &ess_dofs, -- OperatorHandle &A) --{ -- RAP(A); -- EliminateBC(ess_dofs, A); --} -- --void FABilinearFormExtension::FormLinearSystem(const Array &ess_tdof_list, -- Vector &x, Vector &b, -- OperatorHandle &A, -- Vector &X, Vector &B, -- int copy_interior) --{ -- Operator *A_out; -- Operator::FormLinearSystem(ess_tdof_list, x, b, A_out, X, B, copy_interior); -- delete A_out; -- FormSystemMatrix(ess_tdof_list, A); --} -- - void FABilinearFormExtension::DGMult(const Vector &x, Vector &y) const - { - #ifdef MFEM_USE_MPI -- const ParFiniteElementSpace *pfes; -- if ( (pfes = dynamic_cast(test_fes)) ) -+ if (const auto pfes = dynamic_cast(fes)) - { - // DG Prolongation - ParGridFunction x_gf; -- x_gf.MakeRef(const_cast(pfes), -- const_cast(x),0); -+ x_gf.MakeRef(const_cast(pfes), -+ const_cast(x), 0); - x_gf.ExchangeFaceNbrData(); - Vector &shared_x = x_gf.FaceNbrData(); -- const int local_size = a->FESpace()->GetVSize(); -+ const int local_size = fes->GetVSize(); - auto dg_x_ptr = dg_x.Write(); - auto x_ptr = x.Read(); - mfem::forall(local_size, [=] MFEM_HOST_DEVICE (int i) -@@ -1040,8 +1397,8 @@ void FABilinearFormExtension::DGMult(const Vector &x, Vector &y) const - { - dg_x_ptr[local_size+i] = shared_x_ptr[i]; - }); -- ParBilinearForm *pform = nullptr; -- if ((pform = dynamic_cast(a)) && (pform->keep_nbr_block)) -+ ParBilinearForm *pb = nullptr; -+ if ((pb = dynamic_cast(a)) && pb->keep_nbr_block) - { - mat->Mult(dg_x, dg_y); - // DG Restriction -@@ -1066,7 +1423,7 @@ void FABilinearFormExtension::DGMult(const Vector &x, Vector &y) const - - void FABilinearFormExtension::Mult(const Vector &x, Vector &y) const - { -- if ( a->GetFBFI()->Size()>0 ) -+ if (a->GetFBFI()->Size() > 0) - { - DGMult(x, y); - } -@@ -1079,16 +1436,15 @@ void FABilinearFormExtension::Mult(const Vector &x, Vector &y) const - void FABilinearFormExtension::DGMultTranspose(const Vector &x, Vector &y) const - { - #ifdef MFEM_USE_MPI -- const ParFiniteElementSpace *pfes; -- if ( (pfes = dynamic_cast(test_fes)) ) -+ if (const auto pfes = dynamic_cast(fes)) - { - // DG Prolongation - ParGridFunction x_gf; -- x_gf.MakeRef(const_cast(pfes), -- const_cast(x),0); -+ x_gf.MakeRef(const_cast(pfes), -+ const_cast(x), 0); - x_gf.ExchangeFaceNbrData(); - Vector &shared_x = x_gf.FaceNbrData(); -- const int local_size = a->FESpace()->GetVSize(); -+ const int local_size = fes->GetVSize(); - auto dg_x_ptr = dg_x.Write(); - auto x_ptr = x.Read(); - mfem::forall(local_size, [=] MFEM_HOST_DEVICE (int i) -@@ -1102,7 +1458,7 @@ void FABilinearFormExtension::DGMultTranspose(const Vector &x, Vector &y) const - dg_x_ptr[local_size+i] = shared_x_ptr[i]; - }); - ParBilinearForm *pb = nullptr; -- if ((pb = dynamic_cast(a)) && (pb->keep_nbr_block)) -+ if ((pb = dynamic_cast(a)) && (pb->keep_nbr_block)) - { - mat->MultTranspose(dg_x, dg_y); - // DG Restriction -@@ -1127,7 +1483,7 @@ void FABilinearFormExtension::DGMultTranspose(const Vector &x, Vector &y) const - - void FABilinearFormExtension::MultTranspose(const Vector &x, Vector &y) const - { -- if ( a->GetFBFI()->Size()>0 ) -+ if (a->GetFBFI()->Size() > 0) - { - DGMultTranspose(x, y); - } -@@ -1138,10 +1494,10 @@ void FABilinearFormExtension::MultTranspose(const Vector &x, Vector &y) const - } - - -+/// Base class for extensions to the MixedBilinearForm class - MixedBilinearFormExtension::MixedBilinearFormExtension(MixedBilinearForm *form) - : Operator(form->Height(), form->Width()), a(form) - { -- // empty - } - - const Operator *MixedBilinearFormExtension::GetProlongation() const -@@ -1164,377 +1520,690 @@ const Operator *MixedBilinearFormExtension::GetOutputRestriction() const - return a->GetOutputRestriction(); - } - --// Data and methods for partially-assembled bilinear forms -- --PAMixedBilinearFormExtension::PAMixedBilinearFormExtension( -+/// Data and methods for matrix-free mixed bilinear forms -+MFMixedBilinearFormExtension::MFMixedBilinearFormExtension( - MixedBilinearForm *form) -- : MixedBilinearFormExtension(form), -- trial_fes(form->TrialFESpace()), -- test_fes(form->TestFESpace()), -- elem_restrict_trial(NULL), -- elem_restrict_test(NULL) -+ : MixedBilinearFormExtension(form) - { - Update(); - } - --void PAMixedBilinearFormExtension::Assemble() -+void MFMixedBilinearFormExtension::SetupRestrictionOperators( -+ const L2FaceValues m) - { -- Array &integrators = *a->GetDBFI(); -- const int integratorCount = integrators.Size(); -- for (int i = 0; i < integratorCount; ++i) -+ if (DeviceCanUseCeed()) { return; } -+ ElementDofOrdering trial_ordering = UsesTensorBasis(*trial_fes) ? -+ ElementDofOrdering::LEXICOGRAPHIC : -+ ElementDofOrdering::NATIVE; -+ ElementDofOrdering test_ordering = UsesTensorBasis(*test_fes) ? -+ ElementDofOrdering::LEXICOGRAPHIC : -+ ElementDofOrdering::NATIVE; -+ elem_restrict_trial = trial_fes->GetElementRestriction(trial_ordering); -+ elem_restrict_test = test_fes->GetElementRestriction(test_ordering); -+ if (elem_restrict_trial) - { -- integrators[i]->AssemblePA(*trial_fes, *test_fes); -+ local_trial.SetSize(elem_restrict_trial->Height(), -+ Device::GetDeviceMemoryType()); -+ local_trial.UseDevice(true); // ensure 'local_trial = 0.0' is done on device -+ } -+ if (elem_restrict_test) -+ { -+ local_test.SetSize(elem_restrict_test->Height(), -+ Device::GetDeviceMemoryType()); -+ local_test.UseDevice(true); // ensure 'local_test = 0.0' is done on device -+ } -+ -+ // Construct face restriction operators only if the bilinear form has -+ // interior or boundary face integrators -+ if (a->GetTFBFI()->Size() > 0) -+ { -+ if (int_face_restrict_lex_trial == nullptr) -+ { -+ int_face_restrict_lex_trial = trial_fes->GetFaceRestriction( -+ ElementDofOrdering::LEXICOGRAPHIC, -+ FaceType::Interior); -+ int_face_trial.SetSize(int_face_restrict_lex_trial->Height(), -+ Device::GetDeviceMemoryType()); -+ int_face_trial.UseDevice(true); -+ } -+ if (int_face_restrict_lex_test == nullptr) -+ { -+ int_face_restrict_lex_test = test_fes->GetFaceRestriction( -+ ElementDofOrdering::LEXICOGRAPHIC, -+ FaceType::Interior); -+ int_face_test.SetSize(int_face_restrict_lex_test->Height(), -+ Device::GetDeviceMemoryType()); -+ int_face_test.UseDevice(true); -+ } -+ } -+ -+ const bool has_bdr_integs = (a->GetBTFBFI()->Size() > 0 || -+ a->GetBBFI()->Size() > 0); -+ if (has_bdr_integs) -+ { -+ if (bdr_face_restrict_lex_trial == nullptr) -+ { -+ bdr_face_restrict_lex_trial = trial_fes->GetFaceRestriction( -+ ElementDofOrdering::LEXICOGRAPHIC, -+ FaceType::Boundary, -+ m); -+ bdr_face_trial.SetSize(bdr_face_restrict_lex_trial->Height(), -+ Device::GetDeviceMemoryType()); -+ bdr_face_trial.UseDevice(true); -+ } -+ if (bdr_face_restrict_lex_test == nullptr) -+ { -+ bdr_face_restrict_lex_test = test_fes->GetFaceRestriction( -+ ElementDofOrdering::LEXICOGRAPHIC, -+ FaceType::Boundary, -+ m); -+ bdr_face_test.SetSize(bdr_face_restrict_lex_test->Height(), -+ Device::GetDeviceMemoryType()); -+ bdr_face_test.UseDevice(true); -+ } - } -- MFEM_VERIFY(a->GetBBFI()->Size() == 0, -- "Partial assembly does not support AddBoundaryIntegrator yet."); -- MFEM_VERIFY(a->GetTFBFI()->Size() == 0, -- "Partial assembly does not support AddTraceFaceIntegrator yet."); -- MFEM_VERIFY(a->GetBTFBFI()->Size() == 0, -- "Partial assembly does not support AddBdrTraceFaceIntegrator yet."); - } - --void PAMixedBilinearFormExtension::Update() -+void MFMixedBilinearFormExtension::Assemble() - { -- trial_fes = a->TrialFESpace(); -- test_fes = a->TestFESpace(); -- height = test_fes->GetVSize(); -- width = trial_fes->GetVSize(); -- elem_restrict_trial = trial_fes->GetElementRestriction( -- ElementDofOrdering::LEXICOGRAPHIC); -- elem_restrict_test = test_fes->GetElementRestriction( -- ElementDofOrdering::LEXICOGRAPHIC); -- if (elem_restrict_trial) -+ SetupRestrictionOperators(L2FaceValues::DoubleValued); -+ -+ Array &integrators = *a->GetDBFI(); -+ for (BilinearFormIntegrator *integ : integrators) - { -- localTrial.UseDevice(true); -- localTrial.SetSize(elem_restrict_trial->Height(), -- Device::GetMemoryType()); -+ integ->AssembleMF(*trial_fes, *test_fes); - } -- if (elem_restrict_test) -+ -+ Array &bdr_integrators = *a->GetBBFI(); -+ for (BilinearFormIntegrator *integ : bdr_integrators) - { -- localTest.UseDevice(true); // ensure 'localY = 0.0' is done on device -- localTest.SetSize(elem_restrict_test->Height(), Device::GetMemoryType()); -+ integ->AssembleMFBoundary(*trial_fes, *test_fes); - } --} - --void PAMixedBilinearFormExtension::FormRectangularSystemOperator( -- const Array &trial_tdof_list, -- const Array &test_tdof_list, -- OperatorHandle &A) --{ -- Operator * oper; -- Operator::FormRectangularSystemOperator(trial_tdof_list, test_tdof_list, -- oper); -- A.Reset(oper); // A will own oper -+ MFEM_VERIFY(a->GetTFBFI()->Size() == 0, "AddInteriorFaceIntegrator is not " -+ "currently supported in MFMixedBilinearFormExtension"); -+ -+ MFEM_VERIFY(a->GetBTFBFI()->Size() == 0, "AddBdrFaceIntegrator is not " -+ "currently supported in MFMixedBilinearFormExtension"); - } - --void PAMixedBilinearFormExtension::FormRectangularLinearSystem( -- const Array &trial_tdof_list, -- const Array &test_tdof_list, -- Vector &x, Vector &b, -- OperatorHandle &A, -- Vector &X, Vector &B) -+void MFMixedBilinearFormExtension::Mult(const Vector &x, Vector &y) const - { -- Operator *oper; -- Operator::FormRectangularLinearSystem(trial_tdof_list, test_tdof_list, x, b, -- oper, X, B); -- A.Reset(oper); // A will own oper -+ y = 0.0; -+ AddMult(x, y); - } - --void PAMixedBilinearFormExtension::SetupMultInputs( -- const Operator *elem_restrict_x, -- const Vector &x, -- Vector &localX, -- const Operator *elem_restrict_y, -- Vector &y, -- Vector &localY, -- const double c) const -+void MFMixedBilinearFormExtension::AddMult(const Vector &x, Vector &y, -+ const double c) const - { -- // * G operation: localX = c*local(x) -- if (elem_restrict_x) -+ Array &integrators = *a->GetDBFI(); -+ if (elem_restrict_trial && integrators.Size() > 0) -+ { -+ elem_restrict_trial->Mult(x, local_trial); -+ } -+ if (elem_restrict_test && integrators.Size() > 0) - { -- elem_restrict_x->Mult(x, localX); -+ local_test = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultMF(elem_restrict_trial ? local_trial : x, local_test); -+ } - if (c != 1.0) - { -- localX *= c; -+ local_test *= c; - } -+ elem_restrict_test->AddMultTranspose(local_test, y); - } - else - { -- if (c == 1.0) -+ y.UseDevice(true); // typically this is a large vector, so store on device -+ if (c != 1.0 && integrators.Size() > 0) - { -- localX.SyncAliasMemory(x); -+ temp_test.SetSize(y.Size()); -+ temp_test.UseDevice(true); -+ temp_test = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultMF(elem_restrict_trial ? local_trial : x, temp_test); -+ } -+ y.Add(c, temp_test); - } - else - { -- localX.Set(c, x); -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultMF(elem_restrict_trial ? local_trial : x, y); -+ } - } - } -- if (elem_restrict_y) -+ -+ Array &bdr_integrators = *a->GetBBFI(); -+ if (bdr_face_restrict_lex_trial && bdr_integrators.Size() > 0) -+ { -+ bdr_face_restrict_lex_trial->Mult(x, bdr_face_trial); -+ } -+ if (bdr_face_restrict_lex_test && bdr_integrators.Size() > 0) - { -- localY = 0.0; -+ bdr_face_test = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultMF(bdr_face_restrict_lex_trial ? bdr_face_trial : x, -+ bdr_face_test); -+ } -+ if (c != 1.0) -+ { -+ bdr_face_test *= c; -+ } -+ bdr_face_restrict_lex_test->AddMultTranspose(bdr_face_test, y); - } - else - { -- y.UseDevice(true); -- localY.SyncAliasMemory(y); -+ if (c != 1.0 && bdr_integrators.Size() > 0) -+ { -+ temp_test.SetSize(y.Size()); -+ temp_test.UseDevice(true); -+ temp_test = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultMF(bdr_face_restrict_lex_trial ? bdr_face_trial : x, -+ temp_test); -+ } -+ y.Add(c, temp_test); -+ } -+ else -+ { -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultMF(bdr_face_restrict_lex_trial ? bdr_face_trial : x, y); -+ } -+ } - } - } - --void PAMixedBilinearFormExtension::Mult(const Vector &x, Vector &y) const -+void MFMixedBilinearFormExtension::MultTranspose(const Vector &x, -+ Vector &y) const - { - y = 0.0; -- AddMult(x, y); -+ AddMultTranspose(x, y); - } - --void PAMixedBilinearFormExtension::AddMult(const Vector &x, Vector &y, -- const double c) const -+void MFMixedBilinearFormExtension::AddMultTranspose(const Vector &x, Vector &y, -+ const double c) const - { -- Array &integrators = *a->GetDBFI(); -- const int iSz = integrators.Size(); -- -- // * G operation -- SetupMultInputs(elem_restrict_trial, x, localTrial, -- elem_restrict_test, y, localTest, c); -- -- // * B^TDB operation -- for (int i = 0; i < iSz; ++i) -+ Array &integrators = *a->GetDBFI(); -+ if (integrators.Size() > 0) - { -- integrators[i]->AddMultPA(localTrial, localTest); -+ if (elem_restrict_test) -+ { -+ elem_restrict_test->Mult(x, local_test); -+ } -+ if (elem_restrict_trial) -+ { -+ local_trial = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultTransposeMF(elem_restrict_test ? local_test : x, -+ local_trial); -+ } -+ if (c != 1.0) -+ { -+ local_trial *= c; -+ } -+ elem_restrict_trial->AddMultTranspose(local_trial, y); -+ } -+ else -+ { -+ y.UseDevice(true); // typically this is a large vector, so store on device -+ if (c != 1.0) -+ { -+ temp_trial.SetSize(y.Size()); -+ temp_trial.UseDevice(true); -+ temp_trial = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultTransposeMF(elem_restrict_test ? local_test : x, -+ temp_trial); -+ } -+ y.Add(c, temp_trial); -+ } -+ else -+ { -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultTransposeMF(elem_restrict_test ? local_test : x, y); -+ } -+ } -+ } - } - -- // * G^T operation -- if (elem_restrict_test) -+ Array &bdr_integrators = *a->GetBBFI(); -+ if (bdr_face_restrict_lex_test && bdr_integrators.Size() > 0) -+ { -+ bdr_face_restrict_lex_test->Mult(x, bdr_face_test); -+ } -+ if (bdr_face_restrict_lex_trial && bdr_integrators.Size() > 0) -+ { -+ bdr_face_trial = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultTransposeMF(bdr_face_restrict_lex_test ? bdr_face_test : x, -+ bdr_face_trial); -+ } -+ if (c != 1.0) -+ { -+ bdr_face_trial *= c; -+ } -+ bdr_face_restrict_lex_trial->AddMultTranspose(bdr_face_trial, y); -+ } -+ else - { -- tempY.SetSize(y.Size()); -- elem_restrict_test->MultTranspose(localTest, tempY); -- y += tempY; -+ y.UseDevice(true); // typically this is a large vector, so store on device -+ if (c != 1.0 && bdr_integrators.Size() > 0) -+ { -+ temp_trial.SetSize(y.Size()); -+ temp_trial.UseDevice(true); -+ temp_trial = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultTransposeMF(bdr_face_restrict_lex_test ? bdr_face_test : x, -+ temp_trial); -+ } -+ y.Add(c, temp_trial); -+ } -+ else -+ { -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultTransposeMF(bdr_face_restrict_lex_test ? bdr_face_test : x, y); -+ } -+ } - } - } - --void PAMixedBilinearFormExtension::MultTranspose(const Vector &x, -- Vector &y) const -+void MFMixedBilinearFormExtension::Update() - { -- y = 0.0; -- AddMultTranspose(x, y); -+ trial_fes = a->TrialFESpace(); -+ test_fes = a->TestFESpace(); -+ height = test_fes->GetVSize(); -+ width = trial_fes->GetVSize(); -+ -+ elem_restrict_trial = nullptr; -+ elem_restrict_test = nullptr; -+ int_face_restrict_lex_trial = nullptr; -+ int_face_restrict_lex_test = nullptr; -+ bdr_face_restrict_lex_trial = nullptr; -+ bdr_face_restrict_lex_test = nullptr; - } - --void PAMixedBilinearFormExtension::AddMultTranspose(const Vector &x, Vector &y, -- const double c) const -+/// Data and methods for partially-assembled mixed bilinear forms -+PAMixedBilinearFormExtension::PAMixedBilinearFormExtension( -+ MixedBilinearForm *form) -+ : MFMixedBilinearFormExtension(form) - { -- Array &integrators = *a->GetDBFI(); -- const int iSz = integrators.Size(); -+} - -- // * G operation -- SetupMultInputs(elem_restrict_test, x, localTest, -- elem_restrict_trial, y, localTrial, c); -+void PAMixedBilinearFormExtension::Assemble() -+{ -+ SetupRestrictionOperators(L2FaceValues::DoubleValued); - -- // * B^TD^TB operation -- for (int i = 0; i < iSz; ++i) -+ Array &integrators = *a->GetDBFI(); -+ for (BilinearFormIntegrator *integ : integrators) - { -- integrators[i]->AddMultTransposePA(localTest, localTrial); -+ integ->AssemblePA(*trial_fes, *test_fes); - } - -- // * G^T operation -- if (elem_restrict_trial) -+ Array &bdr_integrators = *a->GetBBFI(); -+ for (BilinearFormIntegrator *integ : bdr_integrators) - { -- tempY.SetSize(y.Size()); -- elem_restrict_trial->MultTranspose(localTrial, tempY); -- y += tempY; -+ integ->AssemblePABoundary(*trial_fes, *test_fes); - } -+ -+ MFEM_VERIFY(a->GetTFBFI()->Size() == 0, "AddInteriorFaceIntegrator is not " -+ "currently supported in PAMixedBilinearFormExtension"); -+ -+ MFEM_VERIFY(a->GetBTFBFI()->Size() == 0, "AddBdrFaceIntegrator is not " -+ "currently supported in PAMixedBilinearFormExtension"); - } - - void PAMixedBilinearFormExtension::AssembleDiagonal_ADAt(const Vector &D, - Vector &diag) const - { -- Array &integrators = *a->GetDBFI(); -+ Array &integrators = *a->GetDBFI(); -+ if (elem_restrict_trial && integrators.Size() > 0) -+ { -+ elem_restrict_trial->MultUnsigned(D, local_trial); -+ } -+ if (elem_restrict_test && integrators.Size() > 0) -+ { -+ local_test = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AssembleDiagonalPA_ADAt(elem_restrict_trial ? local_trial : D, -+ local_test); -+ } -+ elem_restrict_test->MultTransposeUnsigned(local_test, diag); -+ } -+ else -+ { -+ diag.UseDevice(true); // typically this is a large vector, so store on device -+ diag = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AssembleDiagonalPA_ADAt(elem_restrict_trial ? local_trial : D, diag); -+ } -+ } -+ -+ Array &bdr_integrators = *a->GetBBFI(); -+ if (bdr_face_restrict_lex_trial && bdr_integrators.Size() > 0) -+ { -+ bdr_face_restrict_lex_trial->MultUnsigned(D, bdr_face_trial); -+ } -+ if (bdr_face_restrict_lex_test && bdr_integrators.Size() > 0) -+ { -+ bdr_face_test = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AssembleDiagonalPA_ADAt(bdr_face_restrict_lex_trial ? bdr_face_trial : D, -+ bdr_face_test); -+ } -+ bdr_face_restrict_lex_test->AddMultTransposeUnsigned(bdr_face_test, diag); -+ } -+ else -+ { -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AssembleDiagonalPA_ADAt(bdr_face_restrict_lex_trial ? bdr_face_trial : D, -+ diag); -+ } -+ } -+} - -- const int iSz = integrators.Size(); -+void PAMixedBilinearFormExtension::AddMult(const Vector &x, Vector &y, -+ const double c) const -+{ -+ Array &integrators = *a->GetDBFI(); -+ if (elem_restrict_trial && integrators.Size() > 0) -+ { -+ elem_restrict_trial->Mult(x, local_trial); -+ } -+ if (elem_restrict_test && integrators.Size() > 0) -+ { -+ local_test = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultPA(elem_restrict_trial ? local_trial : x, local_test); -+ } -+ if (c != 1.0) -+ { -+ local_test *= c; -+ } -+ elem_restrict_test->AddMultTranspose(local_test, y); -+ } -+ else -+ { -+ y.UseDevice(true); // typically this is a large vector, so store on device -+ if (c != 1.0 && integrators.Size() > 0) -+ { -+ temp_test.SetSize(y.Size()); -+ temp_test.UseDevice(true); -+ temp_test = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultPA(elem_restrict_trial ? local_trial : x, temp_test); -+ } -+ y.Add(c, temp_test); -+ } -+ else -+ { -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultPA(elem_restrict_trial ? local_trial : x, y); -+ } -+ } -+ } - -- if (elem_restrict_trial) -+ Array &bdr_integrators = *a->GetBBFI(); -+ if (bdr_face_restrict_lex_trial && bdr_integrators.Size() > 0) - { -- const ElementRestriction* H1elem_restrict_trial = -- dynamic_cast(elem_restrict_trial); -- if (H1elem_restrict_trial) -+ bdr_face_restrict_lex_trial->Mult(x, bdr_face_trial); -+ } -+ if (bdr_face_restrict_lex_test && bdr_integrators.Size() > 0) -+ { -+ bdr_face_test = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultPA(bdr_face_restrict_lex_trial ? bdr_face_trial : x, -+ bdr_face_test); -+ } -+ if (c != 1.0) -+ { -+ bdr_face_test *= c; -+ } -+ bdr_face_restrict_lex_test->AddMultTranspose(bdr_face_test, y); -+ } -+ else -+ { -+ y.UseDevice(true); // typically this is a large vector, so store on device -+ if (c != 1.0 && bdr_integrators.Size() > 0) - { -- H1elem_restrict_trial->MultUnsigned(D, localTrial); -+ temp_test.SetSize(y.Size()); -+ temp_test.UseDevice(true); -+ temp_test = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultPA(bdr_face_restrict_lex_trial ? bdr_face_trial : x, -+ temp_test); -+ } -+ y.Add(c, temp_test); - } - else - { -- elem_restrict_trial->Mult(D, localTrial); -+ for (BilinearFormIntegrator *integ : bdr_integrators) -+ { -+ integ->AddMultPA(bdr_face_restrict_lex_trial ? bdr_face_trial : x, y); -+ } - } - } -+} - -- if (elem_restrict_test) -+void PAMixedBilinearFormExtension::AddMultTranspose(const Vector &x, Vector &y, -+ const double c) const -+{ -+ Array &integrators = *a->GetDBFI(); -+ if (integrators.Size() > 0) - { -- localTest = 0.0; -- for (int i = 0; i < iSz; ++i) -+ if (elem_restrict_test) -+ { -+ elem_restrict_test->Mult(x, local_test); -+ } -+ if (elem_restrict_trial) -+ { -+ local_trial = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultTransposePA(elem_restrict_test ? local_test : x, -+ local_trial); -+ } -+ if (c != 1.0) -+ { -+ local_trial *= c; -+ } -+ elem_restrict_trial->AddMultTranspose(local_trial, y); -+ } -+ else - { -- if (elem_restrict_trial) -+ y.UseDevice(true); // typically this is a large vector, so store on device -+ if (c != 1.0) - { -- integrators[i]->AssembleDiagonalPA_ADAt(localTrial, localTest); -+ temp_trial.SetSize(y.Size()); -+ temp_trial.UseDevice(true); -+ temp_trial = 0.0; -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultTransposePA(elem_restrict_test ? local_test : x, -+ temp_trial); -+ } -+ y.Add(c, temp_trial); - } - else - { -- integrators[i]->AssembleDiagonalPA_ADAt(D, localTest); -+ for (BilinearFormIntegrator *integ : integrators) -+ { -+ integ->AddMultTransposePA(elem_restrict_test ? local_test : x, y); -+ } - } - } -- const ElementRestriction* H1elem_restrict_test = -- dynamic_cast(elem_restrict_test); -- if (H1elem_restrict_test) -+ } -+ -+ Array &bdr_integrators = *a->GetBBFI(); -+ if (bdr_face_restrict_lex_test && bdr_integrators.Size() > 0) -+ { -+ bdr_face_restrict_lex_test->Mult(x, bdr_face_test); -+ } -+ if (bdr_face_restrict_lex_trial && bdr_integrators.Size() > 0) -+ { -+ bdr_face_trial = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) - { -- H1elem_restrict_test->MultTransposeUnsigned(localTest, diag); -+ integ->AddMultTransposePA(bdr_face_restrict_lex_test ? bdr_face_test : x, -+ bdr_face_trial); - } -- else -+ if (c != 1.0) - { -- elem_restrict_test->MultTranspose(localTest, diag); -+ bdr_face_trial *= c; - } -+ bdr_face_restrict_lex_trial->AddMultTranspose(bdr_face_trial, y); - } - else - { -- diag.UseDevice(true); // typically this is a large vector, so store on device -- diag = 0.0; -- for (int i = 0; i < iSz; ++i) -+ y.UseDevice(true); // typically this is a large vector, so store on device -+ if (c != 1.0 && bdr_integrators.Size() > 0) - { -- if (elem_restrict_trial) -+ temp_trial.SetSize(y.Size()); -+ temp_trial.UseDevice(true); -+ temp_trial = 0.0; -+ for (BilinearFormIntegrator *integ : bdr_integrators) - { -- integrators[i]->AssembleDiagonalPA_ADAt(localTrial, diag); -+ integ->AddMultTransposePA(bdr_face_restrict_lex_test ? bdr_face_test : x, -+ temp_trial); - } -- else -+ y.Add(c, temp_trial); -+ } -+ else -+ { -+ for (BilinearFormIntegrator *integ : integrators) - { -- integrators[i]->AssembleDiagonalPA_ADAt(D, diag); -+ integ->AddMultTransposePA(bdr_face_restrict_lex_test ? bdr_face_test : x, y); - } - } - } - } - -+/// Data and methods for partially-assembled discrete linear operators - PADiscreteLinearOperatorExtension::PADiscreteLinearOperatorExtension( - DiscreteLinearOperator *linop) : - PAMixedBilinearFormExtension(linop) - { - } - --const --Operator *PADiscreteLinearOperatorExtension::GetOutputRestrictionTranspose() --const --{ -- return a->GetOutputRestrictionTranspose(); --} -- - void PADiscreteLinearOperatorExtension::Assemble() - { -- Array &integrators = *a->GetDBFI(); -- const int integratorCount = integrators.Size(); -- for (int i = 0; i < integratorCount; ++i) -- { -- integrators[i]->AssemblePA(*trial_fes, *test_fes); -- } -+ PAMixedBilinearFormExtension::Assemble(); - -+ // Construct element vdof multiplicity (avoid use of elem_restrict_test -+ // because it might not exist for libCEED) -+ test_multiplicity.SetSize(height); - test_multiplicity.UseDevice(true); -- test_multiplicity.SetSize(elem_restrict_test->Width()); // l-vector -- Vector ones(elem_restrict_test->Height()); // e-vector -- ones = 1.0; -- -- const ElementRestriction* elem_restrict = -- dynamic_cast(elem_restrict_test); -- if (elem_restrict) -- { -- elem_restrict->MultTransposeUnsigned(ones, test_multiplicity); -- } -- else -- { -- mfem_error("A real ElementRestriction is required in this setting!"); -+ test_multiplicity = 0.0; -+ Array dofs; -+ for (int i = 0; i < test_fes->GetNE(); i++) -+ { -+ test_fes->GetElementVDofs(i, dofs); -+ const int ndofs = dofs.Size(); -+ auto d_mult = test_multiplicity.HostReadWrite(); -+ auto d_dofs = dofs.HostRead(); -+ mfem::forall(ndofs, [=] MFEM_HOST_DEVICE (int i) -+ { -+ const int j = d_dofs[i]; -+ d_mult[(j >= 0) ? j : -1 - j] += 1.0; -+ }); - } -- -- auto tm = test_multiplicity.ReadWrite(); -- mfem::forall(test_multiplicity.Size(), [=] MFEM_HOST_DEVICE (int i) -- { -- tm[i] = 1.0 / tm[i]; -- }); -+ test_multiplicity.Reciprocal(); - } - --void PADiscreteLinearOperatorExtension::AddMult( -- const Vector &x, Vector &y, const double c) const -+void PADiscreteLinearOperatorExtension::AddMult(const Vector &x, Vector &y, -+ const double c) const - { -- Array &integrators = *a->GetDBFI(); -- const int iSz = integrators.Size(); -- -- // * G operation -- SetupMultInputs(elem_restrict_trial, x, localTrial, -- elem_restrict_test, y, localTest, c); -- -- // * B^TDB operation -- for (int i = 0; i < iSz; ++i) -+ Array &interpolators = *a->GetDBFI(); -+ temp_test.SetSize(y.Size()); -+ temp_test.UseDevice(true); -+ if (elem_restrict_trial) - { -- integrators[i]->AddMultPA(localTrial, localTest); -+ elem_restrict_trial->Mult(x, local_trial); - } -- -- // do a kind of "set" rather than "add" in the below -- // operation as compared to the BilinearForm case -- // * G^T operation (kind of...) -- const ElementRestriction* elem_restrict = -- dynamic_cast(elem_restrict_test); -- if (elem_restrict) -+ if (elem_restrict_test) - { -- tempY.SetSize(y.Size()); -- elem_restrict->MultLeftInverse(localTest, tempY); -- y += tempY; -+ local_test = 0.0; -+ for (BilinearFormIntegrator *interp : interpolators) -+ { -+ interp->AddMultPA(elem_restrict_trial ? local_trial : x, local_test); -+ } -+ elem_restrict_test->MultTranspose(local_test, temp_test); - } - else - { -- mfem_error("In this setting you need a real ElementRestriction!"); -+ for (BilinearFormIntegrator *interp : interpolators) -+ { -+ interp->AddMultPA(elem_restrict_trial ? local_trial : x, temp_test); -+ } - } -+ temp_test *= test_multiplicity; -+ y.Add(c, temp_test); - } - --void PADiscreteLinearOperatorExtension::AddMultTranspose( -- const Vector &x, Vector &y, const double c) const -+void PADiscreteLinearOperatorExtension::AddMultTranspose(const Vector &x, -+ Vector &y, -+ const double c) const - { -- Array &integrators = *a->GetDBFI(); -- const int iSz = integrators.Size(); -- -- // do a kind of "set" rather than "add" in the below -- // operation as compared to the BilinearForm case -- // * G operation (kinda) -- Vector xscaled(x); -- MFEM_VERIFY(x.Size() == test_multiplicity.Size(), "Input vector of wrong size"); -- auto xs = xscaled.ReadWrite(); -- auto tm = test_multiplicity.Read(); -- mfem::forall(x.Size(), [=] MFEM_HOST_DEVICE (int i) -- { -- xs[i] *= tm[i]; -- }); -- SetupMultInputs(elem_restrict_test, xscaled, localTest, -- elem_restrict_trial, y, localTrial, c); -- -- // * B^TD^TB operation -- for (int i = 0; i < iSz; ++i) -+ Array &interpolators = *a->GetDBFI(); -+ temp_test.SetSize(y.Size()); -+ temp_test.UseDevice(true); -+ temp_test = x; -+ temp_test *= test_multiplicity; -+ if (elem_restrict_test) - { -- integrators[i]->AddMultTransposePA(localTest, localTrial); -+ elem_restrict_test->Mult(temp_test, local_test); - } -- -- // * G^T operation - if (elem_restrict_trial) - { -- tempY.SetSize(y.Size()); -- elem_restrict_trial->MultTranspose(localTrial, tempY); -- y += tempY; -+ local_trial = 0.0; -+ for (BilinearFormIntegrator *interp : interpolators) -+ { -+ interp->AddMultTransposePA(elem_restrict_test ? local_test : temp_test, -+ local_trial); -+ } -+ if (c != 1.0) -+ { -+ local_trial *= c; -+ } -+ elem_restrict_trial->AddMultTranspose(local_trial, y); - } - else - { -- mfem_error("Trial ElementRestriction not defined"); -+ y.UseDevice(true); // typically this is a large vector, so store on device -+ if (c != 1.0) -+ { -+ MFEM_ABORT("General coefficient case for PADiscreteLinearOperatorExtension::" -+ "AddMultTranspose is not yet supported!"); -+ } -+ else -+ { -+ for (BilinearFormIntegrator *interp : interpolators) -+ { -+ interp->AddMultTransposePA(elem_restrict_test ? local_test : temp_test, y); -+ } -+ } - } - } - --void PADiscreteLinearOperatorExtension::FormRectangularSystemOperator( -- const Array& ess1, const Array& ess2, OperatorHandle &A) --{ -- const Operator *Pi = this->GetProlongation(); -- const Operator *RoT = this->GetOutputRestrictionTranspose(); -- Operator *rap = SetupRAP(Pi, RoT); -- -- RectangularConstrainedOperator *Arco -- = new RectangularConstrainedOperator(rap, ess1, ess2, rap != this); -- -- A.Reset(Arco); --} -- - } // namespace mfem -diff --git a/fem/bilinearform_ext.hpp b/fem/bilinearform_ext.hpp -index ef54dc71c..db26eb801 100644 ---- a/fem/bilinearform_ext.hpp -+++ b/fem/bilinearform_ext.hpp -@@ -25,8 +25,8 @@ class DiscreteLinearOperator; - - /// Class extending the BilinearForm class to support different AssemblyLevels. - /** FA - Full Assembly -- PA - Partial Assembly - EA - Element Assembly -+ PA - Partial Assembly - MF - Matrix Free - */ - class BilinearFormExtension : public Operator -@@ -54,57 +54,59 @@ public: - MFEM_ABORT("AssembleDiagonal not implemented for this assembly level!"); - } - -- virtual void FormSystemMatrix(const Array &ess_tdof_list, -- OperatorHandle &A) = 0; -- virtual void FormLinearSystem(const Array &ess_tdof_list, -- Vector &x, Vector &b, -- OperatorHandle &A, Vector &X, Vector &B, -- int copy_interior = 0) = 0; - virtual void Update() = 0; - }; - --/// Data and methods for partially-assembled bilinear forms --class PABilinearFormExtension : public BilinearFormExtension -+/// Data and methods for matrix-free bilinear forms -+class MFBilinearFormExtension : public BilinearFormExtension - { - protected: -- const FiniteElementSpace *trial_fes, *test_fes; // Not owned -- mutable Vector localX, localY; -- mutable Vector int_face_X, int_face_Y; -- mutable Vector bdr_face_X, bdr_face_Y; -- const Operator *elem_restrict; // Not owned -+ const FiniteElementSpace *fes; // Not owned -+ mutable Vector local_x, local_y, temp_y; -+ mutable Vector int_face_x, int_face_y; -+ mutable Vector bdr_face_x, bdr_face_y; -+ const ElementRestriction *elem_restrict; // Not owned - const FaceRestriction *int_face_restrict_lex; // Not owned - const FaceRestriction *bdr_face_restrict_lex; // Not owned - - public: -- PABilinearFormExtension(BilinearForm*); -+ MFBilinearFormExtension(BilinearForm *form); - - void Assemble(); - void AssembleDiagonal(Vector &diag) const; -- void FormSystemMatrix(const Array &ess_tdof_list, OperatorHandle &A); -- void FormLinearSystem(const Array &ess_tdof_list, -- Vector &x, Vector &b, -- OperatorHandle &A, Vector &X, Vector &B, -- int copy_interior = 0); - void Mult(const Vector &x, Vector &y) const; -+ void AddMult(const Vector &x, Vector &y, const double c = 1.0) const; - void MultTranspose(const Vector &x, Vector &y) const; -+ void AddMultTranspose(const Vector &x, Vector &y, const double c = 1.0) const; - void Update(); - - protected: - void SetupRestrictionOperators(const L2FaceValues m); - }; - -+/// Data and methods for partially-assembled bilinear forms -+class PABilinearFormExtension : public MFBilinearFormExtension -+{ -+public: -+ PABilinearFormExtension(BilinearForm *form); -+ -+ void Assemble(); -+ void AssembleDiagonal(Vector &diag) const; -+ void Mult(const Vector &x, Vector &y) const; -+ void AddMult(const Vector &x, Vector &y, const double c = 1.0) const; -+ void MultTranspose(const Vector &x, Vector &y) const; -+ void AddMultTranspose(const Vector &x, Vector &y, const double c = 1.0) const; -+}; -+ - /// Data and methods for element-assembled bilinear forms - class EABilinearFormExtension : public PABilinearFormExtension - { - protected: -- int ne; -- int elemDofs; -- // The element matrices are stored row major -- Vector ea_data; -- int nf_int, nf_bdr; -- int faceDofs; -+ const bool factorize_face_terms; -+ int ne, elem_dofs; -+ Vector ea_data; // The element matrices are stored row major -+ int nf_int, nf_bdr, face_dofs; - Vector ea_data_int, ea_data_ext, ea_data_bdr; -- bool factorize_face_terms; - - public: - EABilinearFormExtension(BilinearForm *form); -@@ -125,15 +127,6 @@ public: - FABilinearFormExtension(BilinearForm *form); - - void Assemble(); -- void RAP(OperatorHandle &A); -- /** @note Always does `DIAG_ONE` policy to be consistent with -- `Operator::FormConstrainedSystemOperator`. */ -- void EliminateBC(const Array &ess_dofs, OperatorHandle &A); -- void FormSystemMatrix(const Array &ess_tdof_list, OperatorHandle &A); -- void FormLinearSystem(const Array &ess_tdof_list, -- Vector &x, Vector &b, -- OperatorHandle &A, Vector &X, Vector &B, -- int copy_interior = 0); - void Mult(const Vector &x, Vector &y) const; - void MultTranspose(const Vector &x, Vector &y) const; - -@@ -143,37 +136,10 @@ public: - void DGMultTranspose(const Vector &x, Vector &y) const; - }; - --/// Data and methods for matrix-free bilinear forms --class MFBilinearFormExtension : public BilinearFormExtension --{ --protected: -- const FiniteElementSpace *trial_fes, *test_fes; // Not owned -- mutable Vector localX, localY; -- mutable Vector int_face_X, int_face_Y; -- mutable Vector bdr_face_X, bdr_face_Y; -- const Operator *elem_restrict; // Not owned -- const FaceRestriction *int_face_restrict_lex; // Not owned -- const FaceRestriction *bdr_face_restrict_lex; // Not owned -- --public: -- MFBilinearFormExtension(BilinearForm *form); -- -- void Assemble(); -- void AssembleDiagonal(Vector &diag) const; -- void FormSystemMatrix(const Array &ess_tdof_list, OperatorHandle &A); -- void FormLinearSystem(const Array &ess_tdof_list, -- Vector &x, Vector &b, -- OperatorHandle &A, Vector &X, Vector &B, -- int copy_interior = 0); -- void Mult(const Vector &x, Vector &y) const; -- void MultTranspose(const Vector &x, Vector &y) const; -- void Update(); --}; -- - /// Class extending the MixedBilinearForm class to support different AssemblyLevels. - /** FA - Full Assembly -- PA - Partial Assembly - EA - Element Assembly -+ PA - Partial Assembly - MF - Matrix Free - */ - class MixedBilinearFormExtension : public Operator -@@ -185,7 +151,7 @@ public: - MixedBilinearFormExtension(MixedBilinearForm *form); - - virtual MemoryClass GetMemoryClass() const -- { return Device::GetMemoryClass(); } -+ { return Device::GetDeviceMemoryClass(); } - - /// Get the finite element space prolongation matrix - virtual const Operator *GetProlongation() const; -@@ -199,101 +165,70 @@ public: - /// Get the output finite element space restriction matrix - virtual const Operator *GetOutputRestriction() const; - -+ /// Assemble at the level given for the BilinearFormExtension subclass - virtual void Assemble() = 0; -- virtual void FormRectangularSystemOperator(const Array &trial_tdof_list, -- const Array &test_tdof_list, -- OperatorHandle &A) = 0; -- virtual void FormRectangularLinearSystem(const Array &trial_tdof_list, -- const Array &test_tdof_list, -- Vector &x, Vector &b, -- OperatorHandle &A, Vector &X, Vector &B) = 0; - -- virtual void AssembleDiagonal_ADAt(const Vector &D, Vector &diag) const = 0; -+ virtual void AssembleDiagonal_ADAt(const Vector &D, Vector &diag) const -+ { -+ MFEM_ABORT("AssembleDiagonal_ADAt not implemented for this assembly level!"); -+ } - - virtual void Update() = 0; - }; - --/// Data and methods for partially-assembled mixed bilinear forms --class PAMixedBilinearFormExtension : public MixedBilinearFormExtension -+/// Data and methods for matrix-free mixed bilinear forms -+class MFMixedBilinearFormExtension : public MixedBilinearFormExtension - { - protected: - const FiniteElementSpace *trial_fes, *test_fes; // Not owned -- mutable Vector localTrial, localTest, tempY; -- const Operator *elem_restrict_trial; // Not owned -- const Operator *elem_restrict_test; // Not owned -- -- /// Helper function to set up inputs/outputs for Mult or MultTranspose -- void SetupMultInputs(const Operator *elem_restrict_x, -- const Vector &x, Vector &localX, -- const Operator *elem_restrict_y, -- Vector &y, Vector &localY, const double c) const; -+ mutable Vector local_trial, local_test, temp_trial, temp_test; -+ mutable Vector int_face_trial, int_face_test, int_face_y; -+ mutable Vector bdr_face_trial, bdr_face_test, bdr_face_y; -+ const ElementRestriction *elem_restrict_trial; // Not owned -+ const ElementRestriction *elem_restrict_test; // Not owned -+ const FaceRestriction *int_face_restrict_lex_trial; // Not owned -+ const FaceRestriction *int_face_restrict_lex_test; // Not owned -+ const FaceRestriction *bdr_face_restrict_lex_trial; // Not owned -+ const FaceRestriction *bdr_face_restrict_lex_test; // Not owned - - public: -- PAMixedBilinearFormExtension(MixedBilinearForm *form); -+ MFMixedBilinearFormExtension(MixedBilinearForm *form); - -- /// Partial assembly of all internal integrators - void Assemble(); -- /** -- @brief Setup OperatorHandle A to contain constrained linear operator -- -- OperatorHandle A contains matrix-free constrained operator formed for RAP -- system where ess_tdof_list are in trial space and eliminated from -- "columns" of A. -- */ -- void FormRectangularSystemOperator(const Array &trial_tdof_list, -- const Array &test_tdof_list, -- OperatorHandle &A); -- /** -- Setup OperatorHandle A to contain constrained linear operator and -- eliminate columns corresponding to essential dofs from system, -- updating RHS B vector with the results. -- */ -- void FormRectangularLinearSystem(const Array &trial_tdof_list, -- const Array &test_tdof_list, -- Vector &x, Vector &b, -- OperatorHandle &A, Vector &X, Vector &B); -- /// y = A*x - void Mult(const Vector &x, Vector &y) const; -- /// y += c*A*x -- void AddMult(const Vector &x, Vector &y, const double c=1.0) const; -- /// y = A^T*x -+ void AddMult(const Vector &x, Vector &y, const double c = 1.0) const; - void MultTranspose(const Vector &x, Vector &y) const; -- /// y += c*A^T*x -- void AddMultTranspose(const Vector &x, Vector &y, const double c=1.0) const; -- /// Assemble the diagonal of ADA^T for a diagonal vector D. -- void AssembleDiagonal_ADAt(const Vector &D, Vector &diag) const; -- -- /// Update internals for when a new MixedBilinearForm is given to this class -+ void AddMultTranspose(const Vector &x, Vector &y, const double c = 1.0) const; - void Update(); -+ -+protected: -+ void SetupRestrictionOperators(const L2FaceValues m); - }; - -+/// Data and methods for partially-assembled mixed bilinear forms -+class PAMixedBilinearFormExtension : public MFMixedBilinearFormExtension -+{ -+public: -+ PAMixedBilinearFormExtension(MixedBilinearForm *form); - --/** -- @brief Partial assembly extension for DiscreteLinearOperator -+ void Assemble(); -+ void AssembleDiagonal_ADAt(const Vector &D, Vector &diag) const; -+ void AddMult(const Vector &x, Vector &y, const double c = 1.0) const; -+ void AddMultTranspose(const Vector &x, Vector &y, const double c = 1.0) const; -+}; - -- This acts very much like PAMixedBilinearFormExtension, but its -- FormRectangularSystemOperator implementation emulates 'Set' rather than -- 'Add' in the assembly case. --*/ -+/// Data and methods for partially-assembled discrete linear operators - class PADiscreteLinearOperatorExtension : public PAMixedBilinearFormExtension - { -+private: -+ Vector test_multiplicity; -+ - public: - PADiscreteLinearOperatorExtension(DiscreteLinearOperator *linop); - -- /// Partial assembly of all internal integrators - void Assemble(); -- -- void AddMult(const Vector &x, Vector &y, const double c=1.0) const; -- -- void AddMultTranspose(const Vector &x, Vector &y, const double c=1.0) const; -- -- void FormRectangularSystemOperator(const Array&, const Array&, -- OperatorHandle& A); -- -- const Operator * GetOutputRestrictionTranspose() const; -- --private: -- Vector test_multiplicity; -+ void AddMult(const Vector &x, Vector &y, const double c = 1.0) const; -+ void AddMultTranspose(const Vector &x, Vector &y, const double c = 1.0) const; - }; - - } -diff --git a/fem/bilininteg.cpp b/fem/bilininteg.cpp -index c552e9510..e6fc2a6ee 100644 ---- a/fem/bilininteg.cpp -+++ b/fem/bilininteg.cpp -@@ -22,126 +22,162 @@ namespace mfem - - void BilinearFormIntegrator::AssemblePA(const FiniteElementSpace&) - { -- mfem_error ("BilinearFormIntegrator::AssemblePA(fes)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("BilinearFormIntegrator::AssemblePA(fes)\n" -+ " is not implemented for this class."); - } - - void BilinearFormIntegrator::AssemblePA(const FiniteElementSpace&, - const FiniteElementSpace&) - { -- mfem_error ("BilinearFormIntegrator::AssemblePA(fes, fes)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("BilinearFormIntegrator::AssemblePA(fes, fes)\n" -+ " is not implemented for this class."); -+} -+ -+void BilinearFormIntegrator::AssemblePABoundary(const FiniteElementSpace&) -+{ -+ MFEM_ABORT("BilinearFormIntegrator::AssemblePABoundary(fes)\n" -+ " is not implemented for this class."); -+} -+ -+void BilinearFormIntegrator::AssemblePABoundary(const FiniteElementSpace&, -+ const FiniteElementSpace&) -+{ -+ MFEM_ABORT("BilinearFormIntegrator::AssemblePABoundary(fes, fes)\n" -+ " is not implemented for this class."); - } - - void BilinearFormIntegrator::AssemblePAInteriorFaces(const FiniteElementSpace&) - { -- mfem_error ("BilinearFormIntegrator::AssemblePAInteriorFaces(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("BilinearFormIntegrator::AssemblePAInteriorFaces(fes)\n" -+ " is not implemented for this class."); - } - - void BilinearFormIntegrator::AssemblePABoundaryFaces(const FiniteElementSpace&) - { -- mfem_error ("BilinearFormIntegrator::AssemblePABoundaryFaces(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("BilinearFormIntegrator::AssemblePABoundaryFaces(fes)\n" -+ " is not implemented for this class."); - } - --void BilinearFormIntegrator::AssembleDiagonalPA(Vector &) -+void BilinearFormIntegrator::AssembleDiagonalPA(Vector&) - { -- mfem_error ("BilinearFormIntegrator::AssembleDiagonalPA(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("BilinearFormIntegrator::AssembleDiagonalPA(...)\n" -+ " is not implemented for this class."); - } - --void BilinearFormIntegrator::AssembleEA(const FiniteElementSpace &fes, -- Vector &emat, -- const bool add) -+void BilinearFormIntegrator::AssembleDiagonalPA_ADAt(const Vector&, Vector&) - { -- mfem_error ("BilinearFormIntegrator::AssembleEA(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("BilinearFormIntegrator::AssembleDiagonalPA_ADAt(...)\n" -+ " is not implemented for this class."); - } - --void BilinearFormIntegrator::AssembleEAInteriorFaces(const FiniteElementSpace -- &fes, -- Vector &ea_data_int, -- Vector &ea_data_ext, -- const bool add) -+void BilinearFormIntegrator::AddMultPA(const Vector&, Vector&) const - { -- mfem_error ("BilinearFormIntegrator::AssembleEAInteriorFaces(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("BilinearFormIntegrator::MultAssembled(...)\n" -+ " is not implemented for this class."); - } - --void BilinearFormIntegrator::AssembleEABoundaryFaces(const FiniteElementSpace -- &fes, -- Vector &ea_data_bdr, -- const bool add) -+void BilinearFormIntegrator::AddMultTransposePA(const Vector&, Vector&) const - { -- mfem_error ("BilinearFormIntegrator::AssembleEABoundaryFaces(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("BilinearFormIntegrator::AddMultTransposePA(...)\n" -+ " is not implemented for this class."); - } - --void BilinearFormIntegrator::AssembleDiagonalPA_ADAt(const Vector &, Vector &) -+void BilinearFormIntegrator::AssembleMF(const FiniteElementSpace&) - { -- MFEM_ABORT("BilinearFormIntegrator::AssembleDiagonalPA_ADAt(...)\n" -+ MFEM_ABORT("BilinearFormIntegrator::AssembleMF(fes)\n" - " is not implemented for this class."); - } - --void BilinearFormIntegrator::AddMultPA(const Vector &, Vector &) const -+void BilinearFormIntegrator::AssembleMF(const FiniteElementSpace&, -+ const FiniteElementSpace&) - { -- mfem_error ("BilinearFormIntegrator::MultAssembled(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("BilinearFormIntegrator::AssembleMF(fes, fes)\n" -+ " is not implemented for this class."); - } - --void BilinearFormIntegrator::AddMultTransposePA(const Vector &, Vector &) const -+void BilinearFormIntegrator::AssembleMFBoundary(const FiniteElementSpace&) - { -- mfem_error ("BilinearFormIntegrator::AddMultTransposePA(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("BilinearFormIntegrator::AssembleMFBoundary(fes)\n" -+ " is not implemented for this class."); - } - --void BilinearFormIntegrator::AssembleMF(const FiniteElementSpace &fes) -+void BilinearFormIntegrator::AssembleMFBoundary(const FiniteElementSpace&, -+ const FiniteElementSpace&) - { -- mfem_error ("BilinearFormIntegrator::AssembleMF(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("BilinearFormIntegrator::AssembleMFBoundary(fes, fes)\n" -+ " is not implemented for this class."); - } - --void BilinearFormIntegrator::AddMultMF(const Vector &, Vector &) const -+void BilinearFormIntegrator::AssembleDiagonalMF(Vector&) - { -- mfem_error ("BilinearFormIntegrator::AddMultMF(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("BilinearFormIntegrator::AssembleDiagonalMF(...)\n" -+ " is not implemented for this class."); -+} -+ -+void BilinearFormIntegrator::AddMultMF(const Vector&, Vector&) const -+{ -+ MFEM_ABORT("BilinearFormIntegrator::AddMultMF(...)\n" -+ " is not implemented for this class."); - } - --void BilinearFormIntegrator::AddMultTransposeMF(const Vector &, Vector &) const -+void BilinearFormIntegrator::AddMultTransposeMF(const Vector&, Vector&) const - { -- mfem_error ("BilinearFormIntegrator::AddMultTransposeMF(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("BilinearFormIntegrator::AddMultTransposeMF(...)\n" -+ " is not implemented for this class."); - } - --void BilinearFormIntegrator::AssembleDiagonalMF(Vector &) -+void BilinearFormIntegrator::AssembleEA(const FiniteElementSpace&, -+ Vector&) - { -- mfem_error ("BilinearFormIntegrator::AssembleDiagonalMF(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("BilinearFormIntegrator::AssembleEA(...)\n" -+ " is not implemented for this class."); -+} -+ -+void BilinearFormIntegrator::AssembleEA(const FiniteElementSpace&, -+ const FiniteElementSpace&, -+ Vector&) -+{ -+ MFEM_ABORT("BilinearFormIntegrator::AssembleEA(...)\n" -+ " is not implemented for this class."); - } - --void BilinearFormIntegrator::AssembleElementMatrix ( -+void BilinearFormIntegrator::AssembleEAInteriorFaces(const FiniteElementSpace &, -+ Vector&, -+ Vector&) -+{ -+ MFEM_ABORT("BilinearFormIntegrator::AssembleEAInteriorFaces(...)\n" -+ " is not implemented for this class."); -+} -+ -+void BilinearFormIntegrator::AssembleEABoundaryFaces(const FiniteElementSpace&, -+ Vector&) -+{ -+ MFEM_ABORT("BilinearFormIntegrator::AssembleEABoundaryFaces(...)\n" -+ " is not implemented for this class."); -+} -+ -+void BilinearFormIntegrator::AssembleElementMatrix( - const FiniteElement &el, ElementTransformation &Trans, -- DenseMatrix &elmat ) -+ DenseMatrix &elmat) - { -- mfem_error ("BilinearFormIntegrator::AssembleElementMatrix(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("BilinearFormIntegrator::AssembleElementMatrix(...)\n" -+ " is not implemented for this class."); - } - --void BilinearFormIntegrator::AssembleElementMatrix2 ( -+void BilinearFormIntegrator::AssembleElementMatrix2( - const FiniteElement &el1, const FiniteElement &el2, -- ElementTransformation &Trans, DenseMatrix &elmat ) -+ ElementTransformation &Trans, DenseMatrix &elmat) - { -- mfem_error ("BilinearFormIntegrator::AssembleElementMatrix2(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("BilinearFormIntegrator::AssembleElementMatrix2(...)\n" -+ " is not implemented for this class."); - } - --void BilinearFormIntegrator::AssembleFaceMatrix ( -+void BilinearFormIntegrator::AssembleFaceMatrix( - const FiniteElement &el1, const FiniteElement &el2, - FaceElementTransformations &Trans, DenseMatrix &elmat) - { -- mfem_error ("BilinearFormIntegrator::AssembleFaceMatrix(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("BilinearFormIntegrator::AssembleFaceMatrix(...)\n" -+ " is not implemented for this class."); - } - - void BilinearFormIntegrator::AssembleFaceMatrix( -@@ -191,30 +227,30 @@ void TransposeIntegrator::SetIntRule(const IntegrationRule *ir) - bfi->SetIntRule(ir); - } - --void TransposeIntegrator::AssembleElementMatrix ( -+void TransposeIntegrator::AssembleElementMatrix( - const FiniteElement &el, ElementTransformation &Trans, DenseMatrix &elmat) - { -- bfi -> AssembleElementMatrix (el, Trans, bfi_elmat); -+ bfi->AssembleElementMatrix(el, Trans, bfi_elmat); - // elmat = bfi_elmat^t -- elmat.Transpose (bfi_elmat); -+ elmat.Transpose(bfi_elmat); - } - --void TransposeIntegrator::AssembleElementMatrix2 ( -+void TransposeIntegrator::AssembleElementMatrix2( - const FiniteElement &trial_fe, const FiniteElement &test_fe, - ElementTransformation &Trans, DenseMatrix &elmat) - { -- bfi -> AssembleElementMatrix2 (test_fe, trial_fe, Trans, bfi_elmat); -+ bfi->AssembleElementMatrix2(test_fe, trial_fe, Trans, bfi_elmat); - // elmat = bfi_elmat^t -- elmat.Transpose (bfi_elmat); -+ elmat.Transpose(bfi_elmat); - } - --void TransposeIntegrator::AssembleFaceMatrix ( -+void TransposeIntegrator::AssembleFaceMatrix( - const FiniteElement &el1, const FiniteElement &el2, - FaceElementTransformations &Trans, DenseMatrix &elmat) - { -- bfi -> AssembleFaceMatrix (el1, el2, Trans, bfi_elmat); -+ bfi->AssembleFaceMatrix(el1, el2, Trans, bfi_elmat); - // elmat = bfi_elmat^t -- elmat.Transpose (bfi_elmat); -+ elmat.Transpose(bfi_elmat); - } - - void LumpedIntegrator::SetIntRule(const IntegrationRule *ir) -@@ -223,10 +259,10 @@ void LumpedIntegrator::SetIntRule(const IntegrationRule *ir) - bfi->SetIntRule(ir); - } - --void LumpedIntegrator::AssembleElementMatrix ( -+void LumpedIntegrator::AssembleElementMatrix( - const FiniteElement &el, ElementTransformation &Trans, DenseMatrix &elmat) - { -- bfi -> AssembleElementMatrix (el, Trans, elmat); -+ bfi->AssembleElementMatrix(el, Trans, elmat); - elmat.Lump(); - } - -@@ -316,6 +352,15 @@ void SumIntegrator::AssemblePA(const FiniteElementSpace& fes) - } - } - -+void SumIntegrator::AssemblePA(const FiniteElementSpace& trial_fes, -+ const FiniteElementSpace& test_fes) -+{ -+ for (int i = 0; i < integrators.Size(); i++) -+ { -+ integrators[i]->AssemblePA(trial_fes, test_fes); -+ } -+} -+ - void SumIntegrator::AssembleDiagonalPA(Vector &diag) - { - for (int i = 0; i < integrators.Size(); i++) -@@ -364,57 +409,63 @@ void SumIntegrator::AssembleMF(const FiniteElementSpace &fes) - } - } - --void SumIntegrator::AddMultMF(const Vector& x, Vector& y) const -+void SumIntegrator::AssembleMF(const FiniteElementSpace& trial_fes, -+ const FiniteElementSpace& test_fes) - { - for (int i = 0; i < integrators.Size(); i++) - { -- integrators[i]->AddMultTransposeMF(x, y); -+ integrators[i]->AssembleMF(trial_fes, test_fes); - } - } - --void SumIntegrator::AddMultTransposeMF(const Vector &x, Vector &y) const -+void SumIntegrator::AssembleDiagonalMF(Vector &diag) - { - for (int i = 0; i < integrators.Size(); i++) - { -- integrators[i]->AddMultMF(x, y); -+ integrators[i]->AssembleDiagonalMF(diag); - } - } - --void SumIntegrator::AssembleDiagonalMF(Vector &diag) -+void SumIntegrator::AddMultMF(const Vector& x, Vector& y) const - { - for (int i = 0; i < integrators.Size(); i++) - { -- integrators[i]->AssembleDiagonalMF(diag); -+ integrators[i]->AddMultTransposeMF(x, y); -+ } -+} -+ -+void SumIntegrator::AddMultTransposeMF(const Vector &x, Vector &y) const -+{ -+ for (int i = 0; i < integrators.Size(); i++) -+ { -+ integrators[i]->AddMultMF(x, y); - } - } - --void SumIntegrator::AssembleEA(const FiniteElementSpace &fes, Vector &emat, -- const bool add) -+void SumIntegrator::AssembleEA(const FiniteElementSpace &fes, Vector &emat) - { - for (int i = 0; i < integrators.Size(); i++) - { -- integrators[i]->AssembleEA(fes, emat, add); -+ integrators[i]->AssembleEA(fes, emat); - } - } - - void SumIntegrator::AssembleEAInteriorFaces(const FiniteElementSpace &fes, - Vector &ea_data_int, -- Vector &ea_data_ext, -- const bool add) -+ Vector &ea_data_ext) - { - for (int i = 0; i < integrators.Size(); i++) - { -- integrators[i]->AssembleEAInteriorFaces(fes,ea_data_int,ea_data_ext,add); -+ integrators[i]->AssembleEAInteriorFaces(fes, ea_data_int, ea_data_ext); - } - } - - void SumIntegrator::AssembleEABoundaryFaces(const FiniteElementSpace &fes, -- Vector &ea_data_bdr, -- const bool add) -+ Vector &ea_data_bdr) - { - for (int i = 0; i < integrators.Size(); i++) - { -- integrators[i]->AssembleEABoundaryFaces(fes, ea_data_bdr, add); -+ integrators[i]->AssembleEABoundaryFaces(fes, ea_data_bdr); - } - } - -@@ -642,15 +693,15 @@ void MixedVectorIntegrator::AssembleElementMatrix2( - { - if (Q) - { -- w *= Q -> Eval (Trans, ip); -+ w *= Q->Eval(Trans, ip); - } - if (same_shapes) - { -- AddMult_a_AAt (w, test_shape, elmat); -+ AddMult_a_AAt(w, test_shape, elmat); - } - else - { -- AddMult_a_ABt (w, test_shape, trial_shape, elmat); -+ AddMult_a_ABt(w, test_shape, trial_shape, elmat); - } - } - } -@@ -724,7 +775,7 @@ void MixedScalarVectorIntegrator::AssembleElementMatrix2( - VQ->Eval(V, Trans, ip); - V *= w; - -- if ( vdim == 2 && cross_2d ) -+ if (vdim == 2 && cross_2d) - { - vtmp = V[0]; - V[0] = -V[1]; -@@ -736,7 +787,6 @@ void MixedScalarVectorIntegrator::AssembleElementMatrix2( - } - } - -- - void GradientIntegrator::AssembleElementMatrix2( - const FiniteElement &trial_fe, const FiniteElement &test_fe, - ElementTransformation &Trans, DenseMatrix &elmat) -@@ -793,19 +843,18 @@ void GradientIntegrator::AssembleElementMatrix2( - } - } - --const IntegrationRule &GradientIntegrator::GetRule(const FiniteElement -- &trial_fe, -- const FiniteElement &test_fe, -- ElementTransformation &Trans) -+const IntegrationRule &GradientIntegrator::GetRule( -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, -+ ElementTransformation &Trans) - { - int order = Trans.OrderGrad(&trial_fe) + test_fe.GetOrder() + Trans.OrderJ(); - return IntRules.Get(trial_fe.GetGeomType(), order); - } - -- --void DiffusionIntegrator::AssembleElementMatrix --( const FiniteElement &el, ElementTransformation &Trans, -- DenseMatrix &elmat ) -+void DiffusionIntegrator::AssembleElementMatrix( -+ const FiniteElement &el, ElementTransformation &Trans, -+ DenseMatrix &elmat) - { - int nd = el.GetDof(); - dim = el.GetDim(); -@@ -1044,10 +1093,14 @@ void DiffusionIntegrator::AssembleElementVector( - } - } - --void DiffusionIntegrator::ComputeElementFlux --( const FiniteElement &el, ElementTransformation &Trans, -- Vector &u, const FiniteElement &fluxelem, Vector &flux, bool with_coef, -- const IntegrationRule *ir) -+void DiffusionIntegrator::ComputeElementFlux( -+ const FiniteElement &el, -+ ElementTransformation &Trans, -+ Vector &u, -+ const FiniteElement &fluxelem, -+ Vector &flux, -+ bool with_coef, -+ const IntegrationRule *ir) - { - int nd, spaceDim, fnd; - -@@ -1087,7 +1140,7 @@ void DiffusionIntegrator::ComputeElementFlux - ir = &fluxelem.GetNodes(); - } - fnd = ir->GetNPoints(); -- flux.SetSize( fnd * spaceDim ); -+ flux.SetSize(fnd * spaceDim); - - for (int i = 0; i < fnd; i++) - { -@@ -1095,7 +1148,7 @@ void DiffusionIntegrator::ComputeElementFlux - el.CalcDShape(ip, dshape); - dshape.MultTranspose(u, vec); - -- Trans.SetIntPoint (&ip); -+ Trans.SetIntPoint(&ip); - CalcInverse(Trans.Jacobian(), invdfdx); - invdfdx.MultTranspose(vec, vecdxt); - -@@ -1143,9 +1196,11 @@ void DiffusionIntegrator::ComputeElementFlux - } - } - --double DiffusionIntegrator::ComputeFluxEnergy --( const FiniteElement &fluxelem, ElementTransformation &Trans, -- Vector &flux, Vector* d_energy) -+double DiffusionIntegrator::ComputeFluxEnergy( -+ const FiniteElement &fluxelem, -+ ElementTransformation &Trans, -+ Vector &flux, -+ Vector* d_energy) - { - int nd = fluxelem.GetDof(); - dim = fluxelem.GetDim(); -@@ -1220,7 +1275,8 @@ double DiffusionIntegrator::ComputeFluxEnergy - } - - const IntegrationRule &DiffusionIntegrator::GetRule( -- const FiniteElement &trial_fe, const FiniteElement &test_fe) -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) - { - int order; - if (trial_fe.Space() == FunctionSpace::Pk) -@@ -1232,7 +1288,6 @@ const IntegrationRule &DiffusionIntegrator::GetRule( - // order = 2*el.GetOrder() - 2; // <-- this seems to work fine too - order = trial_fe.GetOrder() + test_fe.GetOrder() + trial_fe.GetDim() - 1; - } -- - if (trial_fe.Space() == FunctionSpace::rQk) - { - return RefinedIntRules.Get(trial_fe.GetGeomType(), order); -@@ -1240,10 +1295,10 @@ const IntegrationRule &DiffusionIntegrator::GetRule( - return IntRules.Get(trial_fe.GetGeomType(), order); - } - -- --void MassIntegrator::AssembleElementMatrix --( const FiniteElement &el, ElementTransformation &Trans, -- DenseMatrix &elmat ) -+void MassIntegrator::AssembleElementMatrix( -+ const FiniteElement &el, -+ ElementTransformation &Trans, -+ DenseMatrix &elmat) - { - int nd = el.GetDof(); - // int dim = el.GetDim(); -@@ -1252,8 +1307,8 @@ void MassIntegrator::AssembleElementMatrix - #ifdef MFEM_THREAD_SAFE - Vector shape; - #endif -- elmat.SetSize(nd); - shape.SetSize(nd); -+ elmat.SetSize(nd); - - const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el, Trans); - -@@ -1261,14 +1316,14 @@ void MassIntegrator::AssembleElementMatrix - for (int i = 0; i < ir->GetNPoints(); i++) - { - const IntegrationPoint &ip = ir->IntPoint(i); -- Trans.SetIntPoint (&ip); -+ Trans.SetIntPoint(&ip); - - el.CalcPhysShape(Trans, shape); - - w = Trans.Weight() * ip.weight; - if (Q) - { -- w *= Q -> Eval(Trans, ip); -+ w *= Q->Eval(Trans, ip); - } - - AddMult_a_VVt(w, shape, elmat); -@@ -1300,11 +1355,11 @@ void MassIntegrator::AssembleElementMatrix2( - trial_fe.CalcShape(ip, shape); - test_fe.CalcShape(ip, te_shape); - -- Trans.SetIntPoint (&ip); -+ Trans.SetIntPoint(&ip); - w = Trans.Weight() * ip.weight; - if (Q) - { -- w *= Q -> Eval(Trans, ip); -+ w *= Q->Eval(Trans, ip); - } - - te_shape *= w; -@@ -1326,7 +1381,6 @@ const IntegrationRule &MassIntegrator::GetRule(const FiniteElement &trial_fe, - return IntRules.Get(trial_fe.GetGeomType(), order); - } - -- - void BoundaryMassIntegrator::AssembleFaceMatrix( - const FiniteElement &el1, const FiniteElement &el2, - FaceElementTransformations &Trans, DenseMatrix &elmat) -@@ -1366,7 +1420,7 @@ void BoundaryMassIntegrator::AssembleFaceMatrix( - w = Trans.Weight() * ip.weight; - if (Q) - { -- w *= Q -> Eval(Trans, ip); -+ w *= Q->Eval(Trans, ip); - } - - AddMult_a_VVt(w, shape, elmat); -@@ -1420,9 +1474,18 @@ void ConvectionIntegrator::AssembleElementMatrix( - } - } - -+const IntegrationRule &ConvectionIntegrator::GetRule( -+ const FiniteElement &fe, -+ ElementTransformation &Trans) -+{ -+ int order = Trans.OrderGrad(&fe) + Trans.Order() + fe.GetOrder(); -+ return IntRules.Get(fe.GetGeomType(), order); -+} - - void GroupConvectionIntegrator::AssembleElementMatrix( -- const FiniteElement &el, ElementTransformation &Trans, DenseMatrix &elmat) -+ const FiniteElement &el, -+ ElementTransformation &Trans, -+ DenseMatrix &elmat) - { - int nd = el.GetDof(); - int dim = el.GetDim(); -@@ -1473,24 +1536,10 @@ void GroupConvectionIntegrator::AssembleElementMatrix( - } - } - --const IntegrationRule &ConvectionIntegrator::GetRule( -- const FiniteElement &trial_fe, const FiniteElement &test_fe, -- ElementTransformation &Trans) --{ -- int order = Trans.OrderGrad(&trial_fe) + Trans.Order() + test_fe.GetOrder(); -- -- return IntRules.Get(trial_fe.GetGeomType(), order); --} -- --const IntegrationRule &ConvectionIntegrator::GetRule( -- const FiniteElement &el, ElementTransformation &Trans) --{ -- return GetRule(el,el,Trans); --} -- --void VectorMassIntegrator::AssembleElementMatrix --( const FiniteElement &el, ElementTransformation &Trans, -- DenseMatrix &elmat ) -+void VectorMassIntegrator::AssembleElementMatrix( -+ const FiniteElement &el, -+ ElementTransformation &Trans, -+ DenseMatrix &elmat) - { - int nd = el.GetDof(); - int spaceDim = Trans.GetSpaceDim(); -@@ -1533,7 +1582,7 @@ void VectorMassIntegrator::AssembleElementMatrix - const IntegrationPoint &ip = ir->IntPoint(s); - el.CalcShape(ip, shape); - -- Trans.SetIntPoint (&ip); -+ Trans.SetIntPoint(&ip); - norm = ip.weight * Trans.Weight(); - - MultVVt(shape, partelmat); -@@ -1790,7 +1839,7 @@ void VectorFECurlIntegrator::AssembleElementMatrix2( - "At least one of the finite elements must be in H(Curl)"); - - int curl_nd, vec_nd; -- if ( trial_fe.GetMapType() == mfem::FiniteElement::H_CURL ) -+ if (trial_fe.GetMapType() == mfem::FiniteElement::H_CURL) - { - curl_nd = trial_nd; - vec_nd = test_nd; -@@ -1829,7 +1878,7 @@ void VectorFECurlIntegrator::AssembleElementMatrix2( - Trans.SetIntPoint(&ip); - if (dim == 3) - { -- if ( trial_fe.GetMapType() == mfem::FiniteElement::H_CURL ) -+ if (trial_fe.GetMapType() == mfem::FiniteElement::H_CURL) - { - trial_fe.CalcCurlShape(ip, curlshapeTrial); - test_fe.CalcVShape(Trans, vshapeTest); -@@ -1843,7 +1892,7 @@ void VectorFECurlIntegrator::AssembleElementMatrix2( - } - else - { -- if ( trial_fe.GetMapType() == mfem::FiniteElement::H_CURL ) -+ if (trial_fe.GetMapType() == mfem::FiniteElement::H_CURL) - { - trial_fe.CalcCurlShape(ip, curlshapeTrial_dFT); - test_fe.CalcShape(ip, shapeTest); -@@ -1863,7 +1912,7 @@ void VectorFECurlIntegrator::AssembleElementMatrix2( - } - // Note: shapeTest points to the same data as vshapeTest - vshapeTest *= w; -- if ( trial_fe.GetMapType() == mfem::FiniteElement::H_CURL ) -+ if (trial_fe.GetMapType() == mfem::FiniteElement::H_CURL) - { - AddMultABt(vshapeTest, curlshapeTrial_dFT, elmat); - } -@@ -1874,7 +1923,7 @@ void VectorFECurlIntegrator::AssembleElementMatrix2( - } - } - --void DerivativeIntegrator::AssembleElementMatrix2 ( -+void DerivativeIntegrator::AssembleElementMatrix2( - const FiniteElement &trial_fe, - const FiniteElement &test_fe, - ElementTransformation &Trans, -@@ -1888,12 +1937,12 @@ void DerivativeIntegrator::AssembleElementMatrix2 ( - int i, l; - double det; - -- elmat.SetSize (test_nd,trial_nd); -- dshape.SetSize (trial_nd,dim); -+ elmat.SetSize(test_nd,trial_nd); -+ dshape.SetSize(trial_nd,dim); - dshapedxt.SetSize(trial_nd, spaceDim); - dshapedxi.SetSize(trial_nd); - invdfdx.SetSize(dim, spaceDim); -- shape.SetSize (test_nd); -+ shape.SetSize(test_nd); - - const IntegrationRule *ir = IntRule; - if (ir == NULL) -@@ -1925,10 +1974,10 @@ void DerivativeIntegrator::AssembleElementMatrix2 ( - - trial_fe.CalcDShape(ip, dshape); - -- Trans.SetIntPoint (&ip); -- CalcInverse (Trans.Jacobian(), invdfdx); -+ Trans.SetIntPoint(&ip); -+ CalcInverse(Trans.Jacobian(), invdfdx); - det = Trans.Weight(); -- Mult (dshape, invdfdx, dshapedxt); -+ Mult(dshape, invdfdx, dshapedxt); - - test_fe.CalcShape(ip, shape); - -@@ -1938,13 +1987,14 @@ void DerivativeIntegrator::AssembleElementMatrix2 ( - } - - shape *= Q->Eval(Trans,ip) * det * ip.weight; -- AddMultVWt (shape, dshapedxi, elmat); -+ AddMultVWt(shape, dshapedxi, elmat); - } - } - --void CurlCurlIntegrator::AssembleElementMatrix --( const FiniteElement &el, ElementTransformation &Trans, -- DenseMatrix &elmat ) -+void CurlCurlIntegrator::AssembleElementMatrix( -+ const FiniteElement &el, -+ ElementTransformation &Trans, -+ DenseMatrix &elmat) - { - int nd = el.GetDof(); - dim = el.GetDim(); -@@ -1959,6 +2009,7 @@ void CurlCurlIntegrator::AssembleElementMatrix - curlshape_dFt.SetSize(nd,dimc); - #endif - elmat.SetSize(nd); -+ - if (MQ) { M.SetSize(dimc); } - if (DQ) { D.SetSize(dimc); } - -@@ -1983,7 +2034,7 @@ void CurlCurlIntegrator::AssembleElementMatrix - { - const IntegrationPoint &ip = ir->IntPoint(i); - -- Trans.SetIntPoint (&ip); -+ Trans.SetIntPoint(&ip); - - w = ip.weight * Trans.Weight(); - el.CalcPhysCurlShape(Trans, curlshape_dFt); -@@ -2090,10 +2141,10 @@ void CurlCurlIntegrator::AssembleElementMatrix2(const FiniteElement &trial_fe, - } - } - --void CurlCurlIntegrator --::ComputeElementFlux(const FiniteElement &el, ElementTransformation &Trans, -- Vector &u, const FiniteElement &fluxelem, Vector &flux, -- bool with_coef, const IntegrationRule *ir) -+void CurlCurlIntegrator::ComputeElementFlux( -+ const FiniteElement &el, ElementTransformation &Trans, -+ Vector &u, const FiniteElement &fluxelem, Vector &flux, -+ bool with_coef, const IntegrationRule *ir) - { - #ifdef MFEM_THREAD_SAFE - DenseMatrix projcurl; -@@ -2215,7 +2266,9 @@ double CurlCurlIntegrator::ComputeFluxEnergy(const FiniteElement &fluxelem, - } - - void VectorCurlCurlIntegrator::AssembleElementMatrix( -- const FiniteElement &el, ElementTransformation &Trans, DenseMatrix &elmat) -+ const FiniteElement &el, -+ ElementTransformation &Trans, -+ DenseMatrix &elmat) - { - int dim = el.GetDim(); - int dof = el.GetDof(); -@@ -2405,7 +2458,6 @@ void MixedCurlIntegrator::AssembleElementMatrix2( - } - } - -- - void VectorFEMassIntegrator::AssembleElementMatrix( - const FiniteElement &el, - ElementTransformation &Trans, -@@ -2443,7 +2495,7 @@ void VectorFEMassIntegrator::AssembleElementMatrix( - { - const IntegrationPoint &ip = ir->IntPoint(i); - -- Trans.SetIntPoint (&ip); -+ Trans.SetIntPoint(&ip); - - el.CalcVShape(Trans, trial_vshape); - -@@ -2465,9 +2517,9 @@ void VectorFEMassIntegrator::AssembleElementMatrix( - { - if (Q) - { -- w *= Q -> Eval (Trans, ip); -+ w *= Q->Eval (Trans, ip); - } -- AddMult_a_AAt (w, trial_vshape, elmat); -+ AddMult_a_AAt(w, trial_vshape, elmat); - } - } - } -@@ -2512,7 +2564,7 @@ void VectorFEMassIntegrator::AssembleElementMatrix2( - { - const IntegrationPoint &ip = ir->IntPoint(i); - -- Trans.SetIntPoint (&ip); -+ Trans.SetIntPoint(&ip); - - trial_fe.CalcVShape(Trans, trial_vshape); - test_fe.CalcShape(ip, shape); -@@ -2598,7 +2650,7 @@ void VectorFEMassIntegrator::AssembleElementMatrix2( - #endif - DenseMatrix tmp(test_vshape.Height(), K.Width()); - -- elmat.SetSize (test_dof, trial_dof); -+ elmat.SetSize(test_dof, trial_dof); - - const IntegrationRule *ir = IntRule; - if (ir == NULL) -@@ -2612,7 +2664,7 @@ void VectorFEMassIntegrator::AssembleElementMatrix2( - { - const IntegrationPoint &ip = ir->IntPoint(i); - -- Trans.SetIntPoint (&ip); -+ Trans.SetIntPoint(&ip); - - trial_fe.CalcVShape(Trans, trial_vshape); - test_fe.CalcVShape(Trans, test_vshape); -@@ -2635,7 +2687,7 @@ void VectorFEMassIntegrator::AssembleElementMatrix2( - { - if (Q) - { -- w *= Q -> Eval (Trans, ip); -+ w *= Q->Eval (Trans, ip); - } - AddMult_a_ABt(w,test_vshape,trial_vshape,elmat); - } -@@ -2643,7 +2695,7 @@ void VectorFEMassIntegrator::AssembleElementMatrix2( - } - else - { -- mfem_error("VectorFEMassIntegrator::AssembleElementMatrix2(...)\n" -+ MFEM_ABORT("VectorFEMassIntegrator::AssembleElementMatrix2(...)\n" - " is not implemented for given trial and test bases."); - } - } -@@ -2659,42 +2711,42 @@ void VectorDivergenceIntegrator::AssembleElementMatrix2( - int test_dof = test_fe.GetDof(); - double c; - -- dshape.SetSize (trial_dof, dim); -- gshape.SetSize (trial_dof, dim); -- Jadj.SetSize (dim); -- divshape.SetSize (dim*trial_dof); -- shape.SetSize (test_dof); -+ dshape.SetSize(trial_dof, dim); -+ gshape.SetSize(trial_dof, dim); -+ Jadj.SetSize(dim); -+ divshape.SetSize(dim*trial_dof); -+ shape.SetSize(test_dof); - -- elmat.SetSize (test_dof, dim*trial_dof); -+ elmat.SetSize(test_dof, dim*trial_dof); - - const IntegrationRule *ir = IntRule ? IntRule : &GetRule(trial_fe, test_fe, - Trans); - - elmat = 0.0; - -- for (int i = 0; i < ir -> GetNPoints(); i++) -+ for (int i = 0; i < ir->GetNPoints(); i++) - { - const IntegrationPoint &ip = ir->IntPoint(i); - -- trial_fe.CalcDShape (ip, dshape); -- test_fe.CalcShape (ip, shape); -+ trial_fe.CalcDShape(ip, dshape); -+ test_fe.CalcShape(ip, shape); - -- Trans.SetIntPoint (&ip); -+ Trans.SetIntPoint(&ip); - CalcAdjugate(Trans.Jacobian(), Jadj); - -- Mult (dshape, Jadj, gshape); -+ Mult(dshape, Jadj, gshape); - - gshape.GradToDiv (divshape); - - c = ip.weight; - if (Q) - { -- c *= Q -> Eval (Trans, ip); -+ c *= Q->Eval (Trans, ip); - } - - // elmat += c * shape * divshape ^ t - shape *= c; -- AddMultVWt (shape, divshape, elmat); -+ AddMultVWt(shape, divshape, elmat); - } - } - -@@ -2707,7 +2759,6 @@ const IntegrationRule &VectorDivergenceIntegrator::GetRule( - return IntRules.Get(trial_fe.GetGeomType(), order); - } - -- - void DivDivIntegrator::AssembleElementMatrix( - const FiniteElement &el, - ElementTransformation &Trans, -@@ -2731,23 +2782,22 @@ void DivDivIntegrator::AssembleElementMatrix( - } - - elmat = 0.0; -- -- for (int i = 0; i < ir -> GetNPoints(); i++) -+ for (int i = 0; i < ir->GetNPoints(); i++) - { - const IntegrationPoint &ip = ir->IntPoint(i); - -- el.CalcDivShape (ip, divshape); -+ el.CalcDivShape(ip, divshape); - -- Trans.SetIntPoint (&ip); -+ Trans.SetIntPoint(&ip); - c = ip.weight / Trans.Weight(); - - if (Q) - { -- c *= Q -> Eval (Trans, ip); -+ c *= Q->Eval (Trans, ip); - } - - // elmat += c * divshape * divshape ^ t -- AddMult_a_VVt (c, divshape, elmat); -+ AddMult_a_VVt(c, divshape, elmat); - } - } - -@@ -2779,20 +2829,19 @@ void DivDivIntegrator::AssembleElementMatrix2( - } - - elmat = 0.0; -- -- for (int i = 0; i < ir -> GetNPoints(); i++) -+ for (int i = 0; i < ir->GetNPoints(); i++) - { - const IntegrationPoint &ip = ir->IntPoint(i); - - trial_fe.CalcDivShape(ip,divshape); - test_fe.CalcDivShape(ip,te_divshape); - -- Trans.SetIntPoint (&ip); -+ Trans.SetIntPoint(&ip); - c = ip.weight / Trans.Weight(); - - if (Q) - { -- c *= Q -> Eval (Trans, ip); -+ c *= Q->Eval (Trans, ip); - } - - te_divshape *= c; -@@ -2836,7 +2885,7 @@ void VectorDiffusionIntegrator::AssembleElementMatrix( - - elmat = 0.0; - -- for (int i = 0; i < ir -> GetNPoints(); i++) -+ for (int i = 0; i < ir->GetNPoints(); i++) - { - - const IntegrationPoint &ip = ir->IntPoint(i); -@@ -2972,7 +3021,6 @@ void VectorDiffusionIntegrator::AssembleElementVector( - } - } - -- - void ElasticityIntegrator::AssembleElementMatrix( - const FiniteElement &el, ElementTransformation &Trans, DenseMatrix &elmat) - { -@@ -3003,7 +3051,7 @@ void ElasticityIntegrator::AssembleElementMatrix( - - elmat = 0.0; - -- for (int i = 0; i < ir -> GetNPoints(); i++) -+ for (int i = 0; i < ir->GetNPoints(); i++) - { - const IntegrationPoint &ip = ir->IntPoint(i); - -@@ -3038,7 +3086,7 @@ void ElasticityIntegrator::AssembleElementMatrix( - for (int k = 0; k < dof; k++) - for (int l = 0; l < dof; l++) - { -- elmat (dof*d+k, dof*d+l) += (M * w) * pelmat(k, l); -+ elmat(dof*d+k, dof*d+l) += (M * w) * pelmat(k, l); - } - } - for (int ii = 0; ii < dim; ii++) -@@ -3360,7 +3408,6 @@ void DGTraceIntegrator::AssembleFaceMatrix(const FiniteElement &el1, - } - } - -- - const IntegrationRule &DGTraceIntegrator::GetRule( - Geometry::Type geom, int order, FaceElementTransformations &T) - { -@@ -3603,8 +3650,6 @@ void DGDiffusionIntegrator::AssembleFaceMatrix( - } - } - -- --// static method - void DGElasticityIntegrator::AssembleBlock( - const int dim, const int row_ndofs, const int col_ndofs, - const int row_offset, const int col_offset, -@@ -3827,7 +3872,6 @@ void DGElasticityIntegrator::AssembleFaceMatrix( - } - } - -- - void TraceJumpIntegrator::AssembleFaceMatrix( - const FiniteElement &trial_face_fe, const FiniteElement &test_fe1, - const FiniteElement &test_fe2, FaceElementTransformations &Trans, -@@ -4243,7 +4287,6 @@ void NormalInterpolator::AssembleElementMatrix2( - } - } - -- - namespace internal - { - -@@ -4284,7 +4327,6 @@ ScalarProductInterpolator::AssembleElementMatrix2(const FiniteElement &dom_fe, - ran_fe.Project(dom_shape_coeff, Trans, elmat_as_vec); - } - -- - void - ScalarVectorProductInterpolator::AssembleElementMatrix2( - const FiniteElement &dom_fe, -@@ -4319,7 +4361,6 @@ ScalarVectorProductInterpolator::AssembleElementMatrix2( - ran_fe.ProjectMatrixCoefficient(dom_shape_coeff, Trans, elmat_as_vec); - } - -- - void - VectorScalarProductInterpolator::AssembleElementMatrix2( - const FiniteElement &dom_fe, -@@ -4357,7 +4398,6 @@ VectorScalarProductInterpolator::AssembleElementMatrix2( - ran_fe.ProjectMatrixCoefficient(dom_shape_coeff, Trans, elmat_as_vec); - } - -- - void - ScalarCrossProductInterpolator::AssembleElementMatrix2( - const FiniteElement &dom_fe, -@@ -4453,7 +4493,6 @@ VectorCrossProductInterpolator::AssembleElementMatrix2( - ran_fe.ProjectMatrixCoefficient(dom_shape_coeff, Trans, elmat_as_vec); - } - -- - namespace internal - { - -@@ -4483,8 +4522,7 @@ struct VDotVShapeCoefficient : public VectorCoefficient - - } - --void --VectorInnerProductInterpolator::AssembleElementMatrix2( -+void VectorInnerProductInterpolator::AssembleElementMatrix2( - const FiniteElement &dom_fe, - const FiniteElement &ran_fe, - ElementTransformation &Trans, -diff --git a/fem/bilininteg.hpp b/fem/bilininteg.hpp -index 11922cff0..209898714 100644 ---- a/fem/bilininteg.hpp -+++ b/fem/bilininteg.hpp -@@ -27,7 +27,6 @@ constexpr int HCURL_MAX_Q1D = 5; - #else - constexpr int HCURL_MAX_Q1D = 6; - #endif -- - constexpr int HDIV_MAX_D1D = 5; - constexpr int HDIV_MAX_Q1D = 6; - -@@ -36,7 +35,7 @@ class BilinearFormIntegrator : public NonlinearFormIntegrator - { - protected: - BilinearFormIntegrator(const IntegrationRule *ir = NULL) -- : NonlinearFormIntegrator(ir) { } -+ : NonlinearFormIntegrator(ir) {} - - public: - // TODO: add support for other assembly levels (in addition to PA) and their -@@ -51,8 +50,6 @@ public: - // make sense for the action of the nonlinear operator (but they all make - // sense for its Jacobian). - -- using NonlinearFormIntegrator::AssemblePA; -- - /// Method defining partial assembly. - /** The result of the partial assembly is stored internally so that it can be - used later in the methods AddMultPA() and AddMultTransposePA(). */ -@@ -61,6 +58,11 @@ public: - virtual void AssemblePA(const FiniteElementSpace &trial_fes, - const FiniteElementSpace &test_fes); - -+ virtual void AssemblePABoundary(const FiniteElementSpace &fes); -+ /** Used with BilinearFormIntegrators that have different spaces. */ -+ virtual void AssemblePABoundary(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes); -+ - virtual void AssemblePAInteriorFaces(const FiniteElementSpace &fes); - - virtual void AssemblePABoundaryFaces(const FiniteElementSpace &fes); -@@ -89,20 +91,21 @@ public: - called. */ - virtual void AddMultTransposePA(const Vector &x, Vector &y) const; - -- /// Method defining element assembly. -- /** The result of the element assembly is added to the @a emat Vector if -- @a add is true. Otherwise, if @a add is false, we set @a emat. */ -- virtual void AssembleEA(const FiniteElementSpace &fes, Vector &emat, -- const bool add = true); -- /** Used with BilinearFormIntegrators that have different spaces. */ -- // virtual void AssembleEA(const FiniteElementSpace &trial_fes, -- // const FiniteElementSpace &test_fes, -- // Vector &emat); -- - /// Method defining matrix-free assembly. - /** The result of fully matrix-free assembly is stored internally so that it - can be used later in the methods AddMultMF() and AddMultTransposeMF(). */ - virtual void AssembleMF(const FiniteElementSpace &fes); -+ /** Used with BilinearFormIntegrators that have different spaces. */ -+ virtual void AssembleMF(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes); -+ -+ virtual void AssembleMFBoundary(const FiniteElementSpace &fes); -+ /** Used with BilinearFormIntegrators that have different spaces. */ -+ virtual void AssembleMFBoundary(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes); -+ -+ /// Assemble diagonal and add it to Vector @a diag. -+ virtual void AssembleDiagonalMF(Vector &diag); - - /** Perform the action of integrator on the input @a x and add the result to - the output @a y. Both @a x and @a y are E-vectors, i.e. they represent -@@ -120,17 +123,20 @@ public: - called. */ - virtual void AddMultTransposeMF(const Vector &x, Vector &y) const; - -- /// Assemble diagonal and add it to Vector @a diag. -- virtual void AssembleDiagonalMF(Vector &diag); -+ /// Method defining element assembly. -+ /** The result of the element assembly is added to the @a emat Vector. */ -+ virtual void AssembleEA(const FiniteElementSpace &fes, Vector &emat); -+ /** Used with BilinearFormIntegrators that have different spaces. */ -+ virtual void AssembleEA(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes, -+ Vector &emat); - - virtual void AssembleEAInteriorFaces(const FiniteElementSpace &fes, - Vector &ea_data_int, -- Vector &ea_data_ext, -- const bool add = true); -+ Vector &ea_data_ext); - - virtual void AssembleEABoundaryFaces(const FiniteElementSpace &fes, -- Vector &ea_data_bdr, -- const bool add = true); -+ Vector &ea_data_bdr); - - /// Given a particular Finite Element computes the element matrix elmat. - virtual void AssembleElementMatrix(const FiniteElement &el, -@@ -234,7 +240,7 @@ public: - Vector &u, - const FiniteElement &fluxelem, - Vector &flux, bool with_coef = true, -- const IntegrationRule *ir = NULL) { } -+ const IntegrationRule *ir = NULL) {} - - /** @brief Virtual method required for Zienkiewicz-Zhu type error estimators. - -@@ -260,7 +266,7 @@ public: - Vector &flux, Vector *d_energy = NULL) - { return 0.0; } - -- virtual ~BilinearFormIntegrator() { } -+ virtual ~BilinearFormIntegrator() {} - }; - - /** Wraps a given @a BilinearFormIntegrator and transposes the resulting element -@@ -268,13 +274,12 @@ public: - class TransposeIntegrator : public BilinearFormIntegrator - { - private: -- int own_bfi; -+ bool own_bfi; - BilinearFormIntegrator *bfi; -- - DenseMatrix bfi_elmat; - - public: -- TransposeIntegrator (BilinearFormIntegrator *bfi_, int own_bfi_ = 1) -+ TransposeIntegrator(BilinearFormIntegrator *bfi_, bool own_bfi_ = true) - { bfi = bfi_; own_bfi = own_bfi_; } - - virtual void SetIntRule(const IntegrationRule *ir); -@@ -294,12 +299,25 @@ public: - FaceElementTransformations &Trans, - DenseMatrix &elmat); - -- using BilinearFormIntegrator::AssemblePA; -- -- virtual void AssemblePA(const FiniteElementSpace& fes) -+ virtual void AssemblePA(const FiniteElementSpace &fes) - { - bfi->AssemblePA(fes); - } -+ virtual void AssemblePA(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) -+ { -+ bfi->AssemblePA(trial_fes, test_fes); -+ } -+ -+ virtual void AssemblePABoundary(const FiniteElementSpace &fes) -+ { -+ bfi->AssemblePABoundary(fes); -+ } -+ virtual void AssemblePABoundary(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) -+ { -+ bfi->AssemblePABoundary(trial_fes, test_fes); -+ } - - virtual void AssemblePAInteriorFaces(const FiniteElementSpace &fes) - { -@@ -316,22 +334,20 @@ public: - bfi->AddMultPA(x, y); - } - -- virtual void AddMultPA(const Vector& x, Vector& y) const -+ virtual void AddMultPA(const Vector &x, Vector &y) const - { - bfi->AddMultTransposePA(x, y); - } - -- virtual void AssembleEA(const FiniteElementSpace &fes, Vector &emat, -- const bool add); -+ using BilinearFormIntegrator::AssembleEA; -+ virtual void AssembleEA(const FiniteElementSpace &fes, Vector &emat); - - virtual void AssembleEAInteriorFaces(const FiniteElementSpace &fes, - Vector &ea_data_int, -- Vector &ea_data_ext, -- const bool add); -+ Vector &ea_data_ext); - - virtual void AssembleEABoundaryFaces(const FiniteElementSpace &fes, -- Vector &ea_data_bdr, -- const bool add); -+ Vector &ea_data_bdr); - - virtual ~TransposeIntegrator() { if (own_bfi) { delete bfi; } } - }; -@@ -339,11 +355,11 @@ public: - class LumpedIntegrator : public BilinearFormIntegrator - { - private: -- int own_bfi; -+ bool own_bfi; - BilinearFormIntegrator *bfi; - - public: -- LumpedIntegrator (BilinearFormIntegrator *bfi_, int own_bfi_ = 1) -+ LumpedIntegrator(BilinearFormIntegrator *bfi_, bool own_bfi_ = true) - { bfi = bfi_; own_bfi = own_bfi_; } - - virtual void SetIntRule(const IntegrationRule *ir); -@@ -359,11 +375,11 @@ public: - class InverseIntegrator : public BilinearFormIntegrator - { - private: -- int own_integrator; -+ bool own_integrator; - BilinearFormIntegrator *integrator; - - public: -- InverseIntegrator(BilinearFormIntegrator *integ, int own_integ = 1) -+ InverseIntegrator(BilinearFormIntegrator *integ, bool own_integ = 1) - { integrator = integ; own_integrator = own_integ; } - - virtual void SetIntRule(const IntegrationRule *ir); -@@ -379,12 +395,12 @@ public: - class SumIntegrator : public BilinearFormIntegrator - { - private: -- int own_integrators; -+ bool own_integrators; - mutable DenseMatrix elem_mat; - Array integrators; - - public: -- SumIntegrator(int own_integs = 1) { own_integrators = own_integs; } -+ SumIntegrator(bool own_integs = true) { own_integrators = own_integs; } - - virtual void SetIntRule(const IntegrationRule *ir); - -@@ -411,8 +427,9 @@ public: - FaceElementTransformations &Trans, - DenseMatrix &elmat); - -- using BilinearFormIntegrator::AssemblePA; -- virtual void AssemblePA(const FiniteElementSpace& fes); -+ virtual void AssemblePA(const FiniteElementSpace &fes); -+ virtual void AssemblePA(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes); - - virtual void AssembleDiagonalPA(Vector &diag); - -@@ -422,27 +439,27 @@ public: - - virtual void AddMultTransposePA(const Vector &x, Vector &y) const; - -- virtual void AddMultPA(const Vector& x, Vector& y) const; -+ virtual void AddMultPA(const Vector &x, Vector &y) const; - - virtual void AssembleMF(const FiniteElementSpace &fes); -+ virtual void AssembleMF(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes); -+ -+ virtual void AssembleDiagonalMF(Vector &diag); - - virtual void AddMultMF(const Vector &x, Vector &y) const; - - virtual void AddMultTransposeMF(const Vector &x, Vector &y) const; - -- virtual void AssembleDiagonalMF(Vector &diag); -- -- virtual void AssembleEA(const FiniteElementSpace &fes, Vector &emat, -- const bool add); -+ using BilinearFormIntegrator::AssembleEA; -+ virtual void AssembleEA(const FiniteElementSpace &fes, Vector &emat); - - virtual void AssembleEAInteriorFaces(const FiniteElementSpace &fes, - Vector &ea_data_int, -- Vector &ea_data_ext, -- const bool add); -+ Vector &ea_data_ext); - - virtual void AssembleEABoundaryFaces(const FiniteElementSpace &fes, -- Vector &ea_data_bdr, -- const bool add); -+ Vector &ea_data_bdr); - - virtual ~SumIntegrator(); - }; -@@ -452,7 +469,6 @@ public: - class MixedScalarIntegrator: public BilinearFormIntegrator - { - public: -- - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, - ElementTransformation &Trans, -@@ -474,44 +490,39 @@ protected: - MixedScalarIntegrator(Coefficient &q) : same_calc_shape(false), Q(&q) {} - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && - test_fe.GetRangeType() == mfem::FiniteElement::SCALAR ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarIntegrator: " - "Trial and test spaces must both be scalar fields."; - } - -- inline virtual int GetIntegrationOrder(const FiniteElement & trial_fe, -- const FiniteElement & test_fe, -+ inline virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, - ElementTransformation &Trans) - { return trial_fe.GetOrder() + test_fe.GetOrder() + Trans.OrderW(); } - -- -- inline virtual void CalcTestShape(const FiniteElement & test_fe, -+ inline virtual void CalcTestShape(const FiniteElement &test_fe, - ElementTransformation &Trans, -- Vector & shape) -+ Vector &shape) - { test_fe.CalcPhysShape(Trans, shape); } - -- inline virtual void CalcTrialShape(const FiniteElement & trial_fe, -+ inline virtual void CalcTrialShape(const FiniteElement &trial_fe, - ElementTransformation &Trans, -- Vector & shape) -+ Vector &shape) - { trial_fe.CalcPhysShape(Trans, shape); } - - Coefficient *Q; - - private: -- - #ifndef MFEM_THREAD_SAFE -- Vector test_shape; -- Vector trial_shape; -+ Vector test_shape, trial_shape; - #endif -- - }; - - /** An abstract class for integrating the inner product of two vector basis -@@ -519,7 +530,6 @@ private: - class MixedVectorIntegrator: public BilinearFormIntegrator - { - public: -- - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, - ElementTransformation &Trans, -@@ -548,39 +558,38 @@ protected: - : same_calc_shape(false), Q(NULL), VQ(NULL), DQ(NULL), MQ(&mq) {} - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && - test_fe.GetRangeType() == mfem::FiniteElement::VECTOR ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedVectorIntegrator: " - "Trial and test spaces must both be vector fields"; - } - -- inline virtual int GetIntegrationOrder(const FiniteElement & trial_fe, -- const FiniteElement & test_fe, -+ inline virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, - ElementTransformation &Trans) - { return trial_fe.GetOrder() + test_fe.GetOrder() + Trans.OrderW(); } - -- -- inline virtual int GetTestVDim(const FiniteElement & test_fe) -+ inline virtual int GetTestVDim(const FiniteElement &test_fe) - { return std::max(space_dim, test_fe.GetVDim()); } - -- inline virtual void CalcTestShape(const FiniteElement & test_fe, -+ inline virtual void CalcTestShape(const FiniteElement &test_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { test_fe.CalcVShape(Trans, shape); } - -- inline virtual int GetTrialVDim(const FiniteElement & trial_fe) -+ inline virtual int GetTrialVDim(const FiniteElement &trial_fe) - { return std::max(space_dim, trial_fe.GetVDim()); } - -- inline virtual void CalcTrialShape(const FiniteElement & trial_fe, -+ inline virtual void CalcTrialShape(const FiniteElement &trial_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { trial_fe.CalcVShape(Trans, shape); } - - int space_dim; -@@ -590,16 +599,10 @@ protected: - MatrixCoefficient *MQ; - - private: -- - #ifndef MFEM_THREAD_SAFE -- Vector V; -- Vector D; -- DenseMatrix M; -- DenseMatrix test_shape; -- DenseMatrix trial_shape; -- DenseMatrix shape_tmp; -+ Vector V, D; -+ DenseMatrix M, test_shape, trial_shape, shape_tmp; - #endif -- - }; - - /** An abstract class for integrating the product of a scalar basis function and -@@ -608,7 +611,6 @@ private: - class MixedScalarVectorIntegrator: public BilinearFormIntegrator - { - public: -- - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, - ElementTransformation &Trans, -@@ -625,14 +627,13 @@ public: - { AssembleElementMatrix2(fe, fe, Trans, elmat); } - - protected: -- - MixedScalarVectorIntegrator(VectorCoefficient &vq, bool transpose_ = false, - bool cross_2d_ = false) - : VQ(&vq), transpose(transpose_), cross_2d(cross_2d_) {} - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return ((transpose && - trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -@@ -643,9 +644,9 @@ protected: - ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { -- if ( transpose ) -+ if (transpose) - { - return "MixedScalarVectorIntegrator: " - "Trial space must be a vector field " -@@ -659,23 +660,22 @@ protected: - } - } - -- inline virtual int GetIntegrationOrder(const FiniteElement & trial_fe, -- const FiniteElement & test_fe, -+ inline virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, - ElementTransformation &Trans) - { return trial_fe.GetOrder() + test_fe.GetOrder() + Trans.OrderW(); } - -- -- inline virtual int GetVDim(const FiniteElement & vector_fe) -+ inline virtual int GetVDim(const FiniteElement &vector_fe) - { return std::max(space_dim, vector_fe.GetVDim()); } - -- inline virtual void CalcVShape(const FiniteElement & vector_fe, -+ inline virtual void CalcVShape(const FiniteElement &vector_fe, - ElementTransformation &Trans, -- DenseMatrix & shape_) -+ DenseMatrix &shape_) - { vector_fe.CalcVShape(Trans, shape_); } - -- inline virtual void CalcShape(const FiniteElement & scalar_fe, -+ inline virtual void CalcShape(const FiniteElement &scalar_fe, - ElementTransformation &Trans, -- Vector & shape_) -+ Vector &shape_) - { scalar_fe.CalcPhysShape(Trans, shape_); } - - VectorCoefficient *VQ; -@@ -684,14 +684,12 @@ protected: - bool cross_2d; // In 2D use a cross product rather than a dot product - - private: -- - #ifndef MFEM_THREAD_SAFE - Vector V; - DenseMatrix vshape; - Vector shape; - Vector vshape_tmp; - #endif -- - }; - - /** Class for integrating the bilinear form a(u,v) := (Q u, v) in either 1D, 2D, -@@ -726,24 +724,24 @@ public: - - protected: - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetDim() == 1 && test_fe.GetDim() == 1 && - trial_fe.GetDerivType() == mfem::FiniteElement::GRAD && - test_fe.GetRangeType() == mfem::FiniteElement::SCALAR ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarDerivativeIntegrator: " - "Trial and test spaces must both be scalar fields in 1D " - "and the trial space must implement CalcDShape."; - } - -- inline virtual void CalcTrialShape(const FiniteElement & trial_fe, -+ inline virtual void CalcTrialShape(const FiniteElement &trial_fe, - ElementTransformation &Trans, -- Vector & shape) -+ Vector &shape) - { - DenseMatrix dshape(shape.GetData(), shape.Size(), 1); - trial_fe.CalcPhysDShape(Trans, dshape); -@@ -761,15 +759,15 @@ public: - - protected: - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetDim() == 1 && test_fe.GetDim() == 1 && - trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && - test_fe.GetDerivType() == mfem::FiniteElement::GRAD ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarWeakDerivativeIntegrator: " - "Trial and test spaces must both be scalar fields in 1D " -@@ -777,9 +775,9 @@ protected: - "map type \"VALUE\"."; - } - -- inline virtual void CalcTestShape(const FiniteElement & test_fe, -+ inline virtual void CalcTestShape(const FiniteElement &test_fe, - ElementTransformation &Trans, -- Vector & shape) -+ Vector &shape) - { - DenseMatrix dshape(shape.GetData(), shape.Size(), 1); - test_fe.CalcPhysDShape(Trans, dshape); -@@ -799,28 +797,28 @@ public: - - protected: - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetDerivType() == mfem::FiniteElement::DIV && - test_fe.GetRangeType() == mfem::FiniteElement::SCALAR ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarDivergenceIntegrator: " - "Trial must be H(Div) and the test space must be a " - "scalar field"; - } - -- inline virtual int GetIntegrationOrder(const FiniteElement & trial_fe, -- const FiniteElement & test_fe, -+ inline virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, - ElementTransformation &Trans) - { return trial_fe.GetOrder() + test_fe.GetOrder() + Trans.OrderW() - 1; } - -- inline virtual void CalcTrialShape(const FiniteElement & trial_fe, -+ inline virtual void CalcTrialShape(const FiniteElement &trial_fe, - ElementTransformation &Trans, -- Vector & shape) -+ Vector &shape) - { trial_fe.CalcPhysDivShape(Trans, shape); } - }; - -@@ -835,14 +833,14 @@ public: - - protected: - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetDerivType() == mfem::FiniteElement::DIV && - test_fe.GetRangeType() == mfem::FiniteElement::VECTOR ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedVectorDivergenceIntegrator: " - "Trial must be H(Div) and the test space must be a " -@@ -851,14 +849,14 @@ protected: - - // Subtract one due to the divergence and add one for the coefficient - // which is assumed to be at least linear. -- inline virtual int GetIntegrationOrder(const FiniteElement & trial_fe, -- const FiniteElement & test_fe, -+ inline virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, - ElementTransformation &Trans) - { return trial_fe.GetOrder() + test_fe.GetOrder() + Trans.OrderW() - 1 + 1; } - -- inline virtual void CalcShape(const FiniteElement & scalar_fe, -+ inline virtual void CalcShape(const FiniteElement &scalar_fe, - ElementTransformation &Trans, -- Vector & shape) -+ Vector &shape) - { scalar_fe.CalcPhysDivShape(Trans, shape); } - }; - -@@ -874,28 +872,28 @@ public: - - protected: - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && - test_fe.GetDerivType() == mfem::FiniteElement::DIV ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarWeakGradientIntegrator: " - "Trial space must be a scalar field " - "and the test space must be H(Div)"; - } - -- inline virtual int GetIntegrationOrder(const FiniteElement & trial_fe, -- const FiniteElement & test_fe, -+ inline virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, - ElementTransformation &Trans) - { return trial_fe.GetOrder() + test_fe.GetOrder() + Trans.OrderW() - 1; } - -- virtual void CalcTestShape(const FiniteElement & test_fe, -+ virtual void CalcTestShape(const FiniteElement &test_fe, - ElementTransformation &Trans, -- Vector & shape) -+ Vector &shape) - { - test_fe.CalcPhysDivShape(Trans, shape); - shape *= -1.0; -@@ -914,29 +912,29 @@ public: - - protected: - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetDim() == 2 && test_fe.GetDim() == 2 && - trial_fe.GetDerivType() == mfem::FiniteElement::CURL && - test_fe.GetRangeType() == mfem::FiniteElement::SCALAR); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarCurlIntegrator: " - "Trial must be H(Curl) and the test space must be a " - "scalar field"; - } - -- inline virtual int GetIntegrationOrder(const FiniteElement & trial_fe, -- const FiniteElement & test_fe, -+ inline virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, - ElementTransformation &Trans) - { return trial_fe.GetOrder() + test_fe.GetOrder() + Trans.OrderW() - 1; } - -- inline virtual void CalcTrialShape(const FiniteElement & trial_fe, -+ inline virtual void CalcTrialShape(const FiniteElement &trial_fe, - ElementTransformation &Trans, -- Vector & shape) -+ Vector &shape) - { - DenseMatrix dshape(shape.GetData(), shape.Size(), 1); - trial_fe.CalcPhysCurlShape(Trans, dshape); -@@ -946,7 +944,8 @@ protected: - virtual void AssemblePA(const FiniteElementSpace &trial_fes, - const FiniteElementSpace &test_fes); - -- virtual void AddMultPA(const Vector&, Vector&) const; -+ virtual void AddMultPA(const Vector &x, Vector &y) const; -+ - virtual void AddMultTransposePA(const Vector &x, Vector &y) const; - - // PA extension -@@ -970,24 +969,24 @@ public: - - protected: - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetDim() == 2 && test_fe.GetDim() == 2 && - trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && - test_fe.GetDerivType() == mfem::FiniteElement::CURL ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarWeakCurlIntegrator: " - "Trial space must be a scalar field " - "and the test space must be H(Curl)"; - } - -- inline virtual void CalcTestShape(const FiniteElement & test_fe, -+ inline virtual void CalcTestShape(const FiniteElement &test_fe, - ElementTransformation &Trans, -- Vector & shape) -+ Vector &shape) - { - DenseMatrix dshape(shape.GetData(), shape.Size(), 1); - test_fe.CalcPhysCurlShape(Trans, dshape); -@@ -1028,14 +1027,14 @@ public: - : MixedScalarVectorIntegrator(vq, true) {} - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && - test_fe.GetRangeType() == mfem::FiniteElement::SCALAR ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedDotProductIntegrator: " - "Trial space must be a vector field " -@@ -1053,15 +1052,15 @@ public: - : MixedScalarVectorIntegrator(vq, true) {} - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && - test_fe.GetRangeType() == mfem::FiniteElement::VECTOR && - test_fe.GetDerivType() == mfem::FiniteElement::DIV ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedWeakGradDotIntegrator: " - "Trial space must be a vector field " -@@ -1070,14 +1069,14 @@ public: - - // Subtract one due to the gradient and add one for the coefficient - // which is assumed to be at least linear. -- inline virtual int GetIntegrationOrder(const FiniteElement & trial_fe, -- const FiniteElement & test_fe, -+ inline virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, - ElementTransformation &Trans) - { return trial_fe.GetOrder() + test_fe.GetOrder() + Trans.OrderW() - 1 + 1; } - -- inline virtual void CalcShape(const FiniteElement & scalar_fe, -+ inline virtual void CalcShape(const FiniteElement &scalar_fe, - ElementTransformation &Trans, -- Vector & shape) -+ Vector &shape) - { scalar_fe.CalcPhysDivShape(Trans, shape); shape *= -1.0; } - }; - -@@ -1090,8 +1089,8 @@ public: - : MixedVectorIntegrator(vq, false) {} - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetVDim() == 3 && - trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -@@ -1099,19 +1098,19 @@ public: - test_fe.GetDerivType() == mfem::FiniteElement::GRAD ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedWeakDivCrossIntegrator: " - "Trial space must be a vector field in 3D " - "and the test space must be a scalar field with a gradient"; - } - -- inline virtual int GetTestVDim(const FiniteElement & test_fe) -+ inline virtual int GetTestVDim(const FiniteElement &test_fe) - { return space_dim; } - -- inline virtual void CalcTestShape(const FiniteElement & test_fe, -+ inline virtual void CalcTestShape(const FiniteElement &test_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { test_fe.CalcPhysDShape(Trans, shape); shape *= -1.0; } - }; - -@@ -1130,8 +1129,8 @@ public: - : MixedVectorIntegrator(mq) { same_calc_shape = true; } - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && - trial_fe.GetDerivType() == mfem::FiniteElement::GRAD && -@@ -1139,15 +1138,15 @@ public: - test_fe.GetDerivType() == mfem::FiniteElement::GRAD ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedGradGradIntegrator: " - "Trial and test spaces must both be scalar fields " - "with a gradient operator."; - } - -- inline virtual int GetIntegrationOrder(const FiniteElement & trial_fe, -- const FiniteElement & test_fe, -+ inline virtual int GetIntegrationOrder(const FiniteElement &trial_fe, -+ const FiniteElement &test_fe, - ElementTransformation &Trans) - { - // Same as DiffusionIntegrator -@@ -1156,20 +1155,20 @@ public: - trial_fe.GetOrder() + test_fe.GetOrder() + test_fe.GetDim() - 1; - } - -- inline virtual int GetTrialVDim(const FiniteElement & trial_fe) -+ inline virtual int GetTrialVDim(const FiniteElement &trial_fe) - { return space_dim; } - -- inline virtual void CalcTrialShape(const FiniteElement & trial_fe, -+ inline virtual void CalcTrialShape(const FiniteElement &trial_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { trial_fe.CalcPhysDShape(Trans, shape); } - -- inline virtual int GetTestVDim(const FiniteElement & test_fe) -+ inline virtual int GetTestVDim(const FiniteElement &test_fe) - { return space_dim; } - -- inline virtual void CalcTestShape(const FiniteElement & test_fe, -+ inline virtual void CalcTestShape(const FiniteElement &test_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { test_fe.CalcPhysDShape(Trans, shape); } - }; - -@@ -1182,8 +1181,8 @@ public: - : MixedVectorIntegrator(vq, false) { same_calc_shape = true; } - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && - trial_fe.GetDerivType() == mfem::FiniteElement::GRAD && -@@ -1191,27 +1190,27 @@ public: - test_fe.GetDerivType() == mfem::FiniteElement::GRAD ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedCrossGradGradIntegrator: " - "Trial and test spaces must both be scalar fields " - "with a gradient operator."; - } - -- inline virtual int GetTrialVDim(const FiniteElement & trial_fe) -+ inline virtual int GetTrialVDim(const FiniteElement &trial_fe) - { return space_dim; } - -- inline virtual void CalcTrialShape(const FiniteElement & trial_fe, -+ inline virtual void CalcTrialShape(const FiniteElement &trial_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { trial_fe.CalcPhysDShape(Trans, shape); } - -- inline virtual int GetTestVDim(const FiniteElement & test_fe) -+ inline virtual int GetTestVDim(const FiniteElement &test_fe) - { return space_dim; } - -- inline virtual void CalcTestShape(const FiniteElement & test_fe, -+ inline virtual void CalcTestShape(const FiniteElement &test_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { test_fe.CalcPhysDShape(Trans, shape); } - }; - -@@ -1230,8 +1229,8 @@ public: - : MixedVectorIntegrator(mq) { same_calc_shape = true; } - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetCurlDim() == 3 && test_fe.GetCurlDim() == 3 && - trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -@@ -1240,27 +1239,27 @@ public: - test_fe.GetDerivType() == mfem::FiniteElement::CURL ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedCurlCurlIntegrator" - "Trial and test spaces must both be vector fields in 3D " - "with a curl."; - } - -- inline virtual int GetTrialVDim(const FiniteElement & trial_fe) -+ inline virtual int GetTrialVDim(const FiniteElement &trial_fe) - { return trial_fe.GetCurlDim(); } - -- inline virtual void CalcTrialShape(const FiniteElement & trial_fe, -+ inline virtual void CalcTrialShape(const FiniteElement &trial_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { trial_fe.CalcPhysCurlShape(Trans, shape); } - -- inline virtual int GetTestVDim(const FiniteElement & test_fe) -+ inline virtual int GetTestVDim(const FiniteElement &test_fe) - { return test_fe.GetCurlDim(); } - -- inline virtual void CalcTestShape(const FiniteElement & test_fe, -+ inline virtual void CalcTestShape(const FiniteElement &test_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { test_fe.CalcPhysCurlShape(Trans, shape); } - }; - -@@ -1273,8 +1272,8 @@ public: - : MixedVectorIntegrator(vq, false) { same_calc_shape = true; } - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetCurlDim() == 3 && trial_fe.GetVDim() == 3 && - test_fe.GetCurlDim() == 3 && test_fe.GetVDim() == 3 && -@@ -1284,27 +1283,27 @@ public: - test_fe.GetDerivType() == mfem::FiniteElement::CURL ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedCrossCurlCurlIntegrator: " - "Trial and test spaces must both be vector fields in 3D " - "with a curl."; - } - -- inline virtual int GetTrialVDim(const FiniteElement & trial_fe) -+ inline virtual int GetTrialVDim(const FiniteElement &trial_fe) - { return trial_fe.GetCurlDim(); } - -- inline virtual void CalcTrialShape(const FiniteElement & trial_fe, -+ inline virtual void CalcTrialShape(const FiniteElement &trial_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { trial_fe.CalcPhysCurlShape(Trans, shape); } - -- inline virtual int GetTestVDim(const FiniteElement & test_fe) -+ inline virtual int GetTestVDim(const FiniteElement &test_fe) - { return test_fe.GetCurlDim(); } - -- inline virtual void CalcTestShape(const FiniteElement & test_fe, -+ inline virtual void CalcTestShape(const FiniteElement &test_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { test_fe.CalcPhysCurlShape(Trans, shape); } - }; - -@@ -1317,8 +1316,8 @@ public: - : MixedVectorIntegrator(vq, false) {} - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetCurlDim() == 3 && - trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -@@ -1327,27 +1326,27 @@ public: - test_fe.GetDerivType() == mfem::FiniteElement::GRAD ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedCrossCurlGradIntegrator" - "Trial space must be a vector field in 3D with a curl" - "and the test space must be a scalar field with a gradient"; - } - -- inline virtual int GetTrialVDim(const FiniteElement & trial_fe) -+ inline virtual int GetTrialVDim(const FiniteElement &trial_fe) - { return trial_fe.GetCurlDim(); } - -- inline virtual void CalcTrialShape(const FiniteElement & trial_fe, -+ inline virtual void CalcTrialShape(const FiniteElement &trial_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { trial_fe.CalcPhysCurlShape(Trans, shape); } - -- inline virtual int GetTestVDim(const FiniteElement & test_fe) -+ inline virtual int GetTestVDim(const FiniteElement &test_fe) - { return space_dim; } - -- inline virtual void CalcTestShape(const FiniteElement & test_fe, -+ inline virtual void CalcTestShape(const FiniteElement &test_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { test_fe.CalcPhysDShape(Trans, shape); } - }; - -@@ -1360,8 +1359,8 @@ public: - : MixedVectorIntegrator(vq, false) {} - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (test_fe.GetCurlDim() == 3 && - trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && -@@ -1370,27 +1369,27 @@ public: - test_fe.GetDerivType() == mfem::FiniteElement::CURL ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedCrossGradCurlIntegrator" - "Trial space must be a scalar field in 3D with a gradient" - "and the test space must be a vector field with a curl"; - } - -- inline virtual int GetTrialVDim(const FiniteElement & trial_fe) -+ inline virtual int GetTrialVDim(const FiniteElement &trial_fe) - { return space_dim; } - -- inline virtual void CalcTrialShape(const FiniteElement & trial_fe, -+ inline virtual void CalcTrialShape(const FiniteElement &trial_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { trial_fe.CalcPhysDShape(Trans, shape); } - -- inline virtual int GetTestVDim(const FiniteElement & test_fe) -+ inline virtual int GetTestVDim(const FiniteElement &test_fe) - { return test_fe.GetCurlDim(); } - -- inline virtual void CalcTestShape(const FiniteElement & test_fe, -+ inline virtual void CalcTestShape(const FiniteElement &test_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { test_fe.CalcPhysCurlShape(Trans, shape); } - }; - -@@ -1404,8 +1403,8 @@ public: - : MixedVectorIntegrator(vq, false) {} - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetVDim() == 3 && test_fe.GetCurlDim() == 3 && - trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -@@ -1413,19 +1412,19 @@ public: - test_fe.GetDerivType() == mfem::FiniteElement::CURL ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedWeakCurlCrossIntegrator: " - "Trial space must be a vector field in 3D " - "and the test space must be a vector field with a curl"; - } - -- inline virtual int GetTestVDim(const FiniteElement & test_fe) -+ inline virtual int GetTestVDim(const FiniteElement &test_fe) - { return test_fe.GetCurlDim(); } - -- inline virtual void CalcTestShape(const FiniteElement & test_fe, -+ inline virtual void CalcTestShape(const FiniteElement &test_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { test_fe.CalcPhysCurlShape(Trans, shape); } - }; - -@@ -1439,8 +1438,8 @@ public: - : MixedScalarVectorIntegrator(vq, true, true) {} - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetDim() == 2 && test_fe.GetDim() == 2 && - trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -@@ -1448,16 +1447,16 @@ public: - test_fe.GetDerivType() == mfem::FiniteElement::CURL ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarWeakCurlCrossIntegrator: " - "Trial space must be a vector field in 2D " - "and the test space must be a vector field with a curl"; - } - -- inline virtual void CalcShape(const FiniteElement & scalar_fe, -+ inline virtual void CalcShape(const FiniteElement &scalar_fe, - ElementTransformation &Trans, -- Vector & shape) -+ Vector &shape) - { - DenseMatrix dshape(shape.GetData(), shape.Size(), 1); - scalar_fe.CalcPhysCurlShape(Trans, dshape); -@@ -1474,8 +1473,8 @@ public: - : MixedVectorIntegrator(vq, false) {} - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (test_fe.GetVDim() == 3 && - trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && -@@ -1483,24 +1482,24 @@ public: - test_fe.GetRangeType() == mfem::FiniteElement::VECTOR ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedCrossGradIntegrator: " - "Trial space must be a scalar field with a gradient operator" - " and the test space must be a vector field both in 3D."; - } - -- inline virtual int GetTrialVDim(const FiniteElement & trial_fe) -+ inline virtual int GetTrialVDim(const FiniteElement &trial_fe) - { return space_dim; } - -- inline virtual void CalcTrialShape(const FiniteElement & trial_fe, -+ inline virtual void CalcTrialShape(const FiniteElement &trial_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { trial_fe.CalcPhysDShape(Trans, shape); } - -- inline virtual void CalcTestShape(const FiniteElement & test_fe, -+ inline virtual void CalcTestShape(const FiniteElement &test_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { test_fe.CalcVShape(Trans, shape); } - }; - -@@ -1514,8 +1513,8 @@ public: - : MixedVectorIntegrator(vq, false) {} - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetCurlDim() == 3 && test_fe.GetVDim() == 3 && - trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -@@ -1523,19 +1522,19 @@ public: - test_fe.GetRangeType() == mfem::FiniteElement::VECTOR ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedCrossCurlIntegrator: " - "Trial space must be a vector field in 3D with a curl " - "and the test space must be a vector field"; - } - -- inline virtual int GetTrialVDim(const FiniteElement & trial_fe) -+ inline virtual int GetTrialVDim(const FiniteElement &trial_fe) - { return trial_fe.GetCurlDim(); } - -- inline virtual void CalcTrialShape(const FiniteElement & trial_fe, -+ inline virtual void CalcTrialShape(const FiniteElement &trial_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { trial_fe.CalcPhysCurlShape(Trans, shape); } - }; - -@@ -1549,8 +1548,8 @@ public: - : MixedScalarVectorIntegrator(vq, false, true) {} - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetDim() == 2 && test_fe.GetDim() == 2 && - trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && -@@ -1558,16 +1557,16 @@ public: - test_fe.GetRangeType() == mfem::FiniteElement::VECTOR ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedCrossCurlIntegrator: " - "Trial space must be a vector field in 2D with a curl " - "and the test space must be a vector field"; - } - -- inline virtual void CalcShape(const FiniteElement & scalar_fe, -+ inline virtual void CalcShape(const FiniteElement &scalar_fe, - ElementTransformation &Trans, -- Vector & shape) -+ Vector &shape) - { - DenseMatrix dshape(shape.GetData(), shape.Size(), 1); - scalar_fe.CalcPhysCurlShape(Trans, dshape); shape *= -1.0; -@@ -1583,8 +1582,8 @@ public: - : MixedScalarVectorIntegrator(vq, true, true) {} - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetDim() == 2 && test_fe.GetDim() == 2 && - trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && -@@ -1592,19 +1591,19 @@ public: - test_fe.GetRangeType() == mfem::FiniteElement::SCALAR ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarCrossGradIntegrator: " - "Trial space must be a scalar field in 2D with a gradient " - "and the test space must be a scalar field"; - } - -- inline int GetVDim(const FiniteElement & vector_fe) -+ inline int GetVDim(const FiniteElement &vector_fe) - { return space_dim; } - -- inline virtual void CalcVShape(const FiniteElement & vector_fe, -+ inline virtual void CalcVShape(const FiniteElement &vector_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { vector_fe.CalcPhysDShape(Trans, shape); } - }; - -@@ -1617,15 +1616,15 @@ public: - : MixedScalarVectorIntegrator(vq, true, true) {} - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetDim() == 2 && test_fe.GetDim() == 2 && - trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && - test_fe.GetRangeType() == mfem::FiniteElement::SCALAR ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarCrossProductIntegrator: " - "Trial space must be a vector field in 2D " -@@ -1642,24 +1641,24 @@ public: - : MixedScalarVectorIntegrator(vq, false, true) {} - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetDim() == 2 && test_fe.GetDim() == 2 && - trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && - test_fe.GetRangeType() == mfem::FiniteElement::VECTOR ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarWeakCrossProductIntegrator: " - "Trial space must be a scalar field in 2D " - "and the test space must be a vector field"; - } - -- inline virtual void CalcShape(const FiniteElement & scalar_fe, -+ inline virtual void CalcShape(const FiniteElement &scalar_fe, - ElementTransformation &Trans, -- Vector & shape) -+ Vector &shape) - { scalar_fe.CalcPhysShape(Trans, shape); shape *= -1.0; } - }; - -@@ -1672,27 +1671,27 @@ public: - : MixedScalarVectorIntegrator(vq, true) {} - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && - trial_fe.GetDerivType() == mfem::FiniteElement::GRAD && - test_fe.GetRangeType() == mfem::FiniteElement::SCALAR ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedDirectionalDerivativeIntegrator: " - "Trial space must be a scalar field with a gradient " - "and the test space must be a scalar field"; - } - -- inline virtual int GetVDim(const FiniteElement & vector_fe) -+ inline virtual int GetVDim(const FiniteElement &vector_fe) - { return space_dim; } - -- inline virtual void CalcVShape(const FiniteElement & vector_fe, -+ inline virtual void CalcVShape(const FiniteElement &vector_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { vector_fe.CalcPhysDShape(Trans, shape); } - }; - -@@ -1705,8 +1704,8 @@ public: - : MixedScalarVectorIntegrator(vq, true) {} - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && - trial_fe.GetDerivType() == mfem::FiniteElement::GRAD && -@@ -1714,24 +1713,24 @@ public: - test_fe.GetDerivType() == mfem::FiniteElement::DIV ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedGradDivIntegrator: " - "Trial space must be a scalar field with a gradient" - "and the test space must be a vector field with a divergence"; - } - -- inline virtual int GetVDim(const FiniteElement & vector_fe) -+ inline virtual int GetVDim(const FiniteElement &vector_fe) - { return space_dim; } - -- inline virtual void CalcVShape(const FiniteElement & vector_fe, -+ inline virtual void CalcVShape(const FiniteElement &vector_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { vector_fe.CalcPhysDShape(Trans, shape); shape *= -1.0; } - -- inline virtual void CalcShape(const FiniteElement & scalar_fe, -+ inline virtual void CalcShape(const FiniteElement &scalar_fe, - ElementTransformation &Trans, -- Vector & shape) -+ Vector &shape) - { scalar_fe.CalcPhysDivShape(Trans, shape); } - }; - -@@ -1744,8 +1743,8 @@ public: - : MixedScalarVectorIntegrator(vq, false) {} - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && - trial_fe.GetDerivType() == mfem::FiniteElement::DIV && -@@ -1754,24 +1753,24 @@ public: - ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedDivGradIntegrator: " - "Trial space must be a vector field with a divergence" - "and the test space must be a scalar field with a gradient"; - } - -- inline virtual int GetVDim(const FiniteElement & vector_fe) -+ inline virtual int GetVDim(const FiniteElement &vector_fe) - { return space_dim; } - -- inline virtual void CalcVShape(const FiniteElement & vector_fe, -+ inline virtual void CalcVShape(const FiniteElement &vector_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { vector_fe.CalcPhysDShape(Trans, shape); shape *= -1.0; } - -- inline virtual void CalcShape(const FiniteElement & scalar_fe, -+ inline virtual void CalcShape(const FiniteElement &scalar_fe, - ElementTransformation &Trans, -- Vector & shape) -+ Vector &shape) - { scalar_fe.CalcPhysDivShape(Trans, shape); } - }; - -@@ -1784,27 +1783,27 @@ public: - : MixedScalarVectorIntegrator(vq, false) {} - - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::SCALAR && - test_fe.GetRangeType() == mfem::FiniteElement::SCALAR && - test_fe.GetDerivType() == mfem::FiniteElement::GRAD ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedScalarWeakDivergenceIntegrator: " - "Trial space must be a scalar field " - "and the test space must be a scalar field with a gradient"; - } - -- inline int GetVDim(const FiniteElement & vector_fe) -+ inline int GetVDim(const FiniteElement &vector_fe) - { return space_dim; } - -- inline virtual void CalcVShape(const FiniteElement & vector_fe, -+ inline virtual void CalcVShape(const FiniteElement &vector_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { vector_fe.CalcPhysDShape(Trans, shape); shape *= -1.0; } - }; - -@@ -1825,40 +1824,40 @@ public: - MixedVectorGradientIntegrator(MatrixCoefficient &mq) - : MixedVectorIntegrator(mq) {} - -+ using BilinearFormIntegrator::AssemblePA; -+ virtual void AssemblePA(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes); -+ -+ virtual void AddMultPA(const Vector &x, Vector &y) const; -+ -+ virtual void AddMultTransposePA(const Vector &x, Vector &y) const; -+ - protected: - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetDerivType() == mfem::FiniteElement::GRAD && - test_fe.GetRangeType() == mfem::FiniteElement::VECTOR ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedVectorGradientIntegrator: " - "Trial spaces must be H1 and the test space must be a " - "vector field in 2D or 3D"; - } - -- inline virtual int GetTrialVDim(const FiniteElement & trial_fe) -+ inline virtual int GetTrialVDim(const FiniteElement &trial_fe) - { return space_dim; } - -- inline virtual void CalcTrialShape(const FiniteElement & trial_fe, -+ inline virtual void CalcTrialShape(const FiniteElement &trial_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { - trial_fe.CalcPhysDShape(Trans, shape); - } - -- using BilinearFormIntegrator::AssemblePA; -- virtual void AssemblePA(const FiniteElementSpace &trial_fes, -- const FiniteElementSpace &test_fes); -- -- virtual void AddMultPA(const Vector&, Vector&) const; -- virtual void AddMultTransposePA(const Vector&, Vector&) const; -- --private: - DenseMatrix Jinv; - - // PA extension -@@ -1883,40 +1882,41 @@ public: - MixedVectorCurlIntegrator(MatrixCoefficient &mq) - : MixedVectorIntegrator(mq) {} - -+ using BilinearFormIntegrator::AssemblePA; -+ virtual void AssemblePA(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes); -+ -+ virtual void AddMultPA(const Vector &x, Vector &y) const; -+ -+ virtual void AddMultTransposePA(const Vector &x, Vector &y) const; -+ - protected: - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetCurlDim() == 3 && test_fe.GetVDim() == 3 && - trial_fe.GetDerivType() == mfem::FiniteElement::CURL && - test_fe.GetRangeType() == mfem::FiniteElement::VECTOR ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedVectorCurlIntegrator: " - "Trial space must be H(Curl) and the test space must be a " - "vector field in 3D"; - } - -- inline virtual int GetTrialVDim(const FiniteElement & trial_fe) -+ inline virtual int GetTrialVDim(const FiniteElement &trial_fe) - { return trial_fe.GetCurlDim(); } - -- inline virtual void CalcTrialShape(const FiniteElement & trial_fe, -+ inline virtual void CalcTrialShape(const FiniteElement &trial_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { - trial_fe.CalcPhysCurlShape(Trans, shape); - } - -- using BilinearFormIntegrator::AssemblePA; -- virtual void AssemblePA(const FiniteElementSpace &trial_fes, -- const FiniteElementSpace &test_fes); -- -- virtual void AddMultPA(const Vector&, Vector&) const; -- virtual void AddMultTransposePA(const Vector&, Vector&) const; -- - private: - // PA extension - Vector pa_data; -@@ -1942,40 +1942,41 @@ public: - MixedVectorWeakCurlIntegrator(MatrixCoefficient &mq) - : MixedVectorIntegrator(mq) {} - -+ using BilinearFormIntegrator::AssemblePA; -+ virtual void AssemblePA(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes); -+ -+ virtual void AddMultPA(const Vector &x, Vector &y) const; -+ -+ virtual void AddMultTransposePA(const Vector &x, Vector &y) const; -+ - protected: - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetVDim() == 3 && test_fe.GetCurlDim() == 3 && - trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && - test_fe.GetDerivType() == mfem::FiniteElement::CURL ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedVectorWeakCurlIntegrator: " - "Trial space must be vector field in 3D and the " - "test space must be H(Curl)"; - } - -- inline virtual int GetTestVDim(const FiniteElement & test_fe) -+ inline virtual int GetTestVDim(const FiniteElement &test_fe) - { return test_fe.GetCurlDim(); } - -- inline virtual void CalcTestShape(const FiniteElement & test_fe, -+ inline virtual void CalcTestShape(const FiniteElement &test_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { - test_fe.CalcPhysCurlShape(Trans, shape); - } - -- using BilinearFormIntegrator::AssemblePA; -- virtual void AssemblePA(const FiniteElementSpace &trial_fes, -- const FiniteElementSpace &test_fes); -- -- virtual void AddMultPA(const Vector&, Vector&) const; -- virtual void AddMultTransposePA(const Vector&, Vector&) const; -- - private: - // PA extension - Vector pa_data; -@@ -2001,26 +2002,26 @@ public: - - protected: - inline virtual bool VerifyFiniteElementTypes( -- const FiniteElement & trial_fe, -- const FiniteElement & test_fe) const -+ const FiniteElement &trial_fe, -+ const FiniteElement &test_fe) const - { - return (trial_fe.GetRangeType() == mfem::FiniteElement::VECTOR && - test_fe.GetDerivType() == mfem::FiniteElement::GRAD ); - } - -- inline virtual const char * FiniteElementTypeFailureMessage() const -+ inline virtual const char *FiniteElementTypeFailureMessage() const - { - return "MixedVectorWeakDivergenceIntegrator: " - "Trial space must be vector field and the " - "test space must be H1"; - } - -- inline virtual int GetTestVDim(const FiniteElement & test_fe) -+ inline virtual int GetTestVDim(const FiniteElement &test_fe) - { return space_dim; } - -- inline virtual void CalcTestShape(const FiniteElement & test_fe, -+ inline virtual void CalcTestShape(const FiniteElement &test_fe, - ElementTransformation &Trans, -- DenseMatrix & shape) -+ DenseMatrix &shape) - { - test_fe.CalcPhysDShape(Trans, shape); - shape *= -1.0; -@@ -2043,6 +2044,7 @@ private: - DenseMatrix gshape; - DenseMatrix Jadj; - DenseMatrix elmat_comp; -+ - // PA extension - Vector pa_data; - const DofToQuad *trial_maps, *test_maps; ///< Not owned -@@ -2053,13 +2055,13 @@ private: - public: - GradientIntegrator() : - Q{NULL}, trial_maps{NULL}, test_maps{NULL}, geom{NULL} -- { } -+ {} - GradientIntegrator(Coefficient *q_) : - Q{q_}, trial_maps{NULL}, test_maps{NULL}, geom{NULL} -- { } -+ {} - GradientIntegrator(Coefficient &q) : - Q{&q}, trial_maps{NULL}, test_maps{NULL}, geom{NULL} -- { } -+ {} - - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, -@@ -2071,6 +2073,7 @@ public: - const FiniteElementSpace &test_fes); - - virtual void AddMultPA(const Vector &x, Vector &y) const; -+ - virtual void AddMultTransposePA(const Vector &x, Vector &y) const; - - static const IntegrationRule &GetRule(const FiniteElement &trial_fe, -@@ -2096,7 +2099,6 @@ private: - #endif - - // PA extension -- const FiniteElementSpace *fespace; - const DofToQuad *maps; ///< Not owned - const GeometricFactors *geom; ///< Not owned - int dim, ne, dofs1D, quad1D; -@@ -2107,30 +2109,31 @@ public: - /// Construct a diffusion integrator with coefficient Q = 1 - DiffusionIntegrator(const IntegrationRule *ir = nullptr) - : BilinearFormIntegrator(ir), -- Q(NULL), VQ(NULL), MQ(NULL), maps(NULL), geom(NULL) { } -+ Q(NULL), VQ(NULL), MQ(NULL), maps(NULL), geom(NULL) {} - - /// Construct a diffusion integrator with a scalar coefficient q - DiffusionIntegrator(Coefficient &q, const IntegrationRule *ir = nullptr) - : BilinearFormIntegrator(ir), -- Q(&q), VQ(NULL), MQ(NULL), maps(NULL), geom(NULL) { } -+ Q(&q), VQ(NULL), MQ(NULL), maps(NULL), geom(NULL) {} - - /// Construct a diffusion integrator with a vector coefficient q - DiffusionIntegrator(VectorCoefficient &q, - const IntegrationRule *ir = nullptr) - : BilinearFormIntegrator(ir), -- Q(NULL), VQ(&q), MQ(NULL), maps(NULL), geom(NULL) { } -+ Q(NULL), VQ(&q), MQ(NULL), maps(NULL), geom(NULL) {} - - /// Construct a diffusion integrator with a matrix coefficient q - DiffusionIntegrator(MatrixCoefficient &q, - const IntegrationRule *ir = nullptr) - : BilinearFormIntegrator(ir), -- Q(NULL), VQ(NULL), MQ(&q), maps(NULL), geom(NULL) { } -+ Q(NULL), VQ(NULL), MQ(&q), maps(NULL), geom(NULL) {} - - /** Given a particular Finite Element computes the element stiffness matrix - elmat. */ - virtual void AssembleElementMatrix(const FiniteElement &el, - ElementTransformation &Trans, - DenseMatrix &elmat); -+ - /** Given a trial and test Finite Element computes the element stiffness - matrix elmat. */ - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, -@@ -2154,23 +2157,23 @@ public: - Vector &flux, Vector *d_energy = NULL); - - using BilinearFormIntegrator::AssemblePA; -+ virtual void AssemblePA(const FiniteElementSpace &fes); - -- virtual void AssembleMF(const FiniteElementSpace &fes); -+ virtual void AssembleDiagonalPA(Vector &diag); - -- virtual void AssemblePA(const FiniteElementSpace &fes); -+ virtual void AddMultPA(const Vector &x, Vector &y) const; - -- virtual void AssembleEA(const FiniteElementSpace &fes, Vector &emat, -- const bool add); -+ virtual void AddMultTransposePA(const Vector &x, Vector &y) const; - -- virtual void AssembleDiagonalPA(Vector &diag); -+ using BilinearFormIntegrator::AssembleMF; -+ virtual void AssembleMF(const FiniteElementSpace &fes); - - virtual void AssembleDiagonalMF(Vector &diag); - -- virtual void AddMultMF(const Vector&, Vector&) const; -- -- virtual void AddMultPA(const Vector&, Vector&) const; -+ virtual void AddMultMF(const Vector &x, Vector &y) const; - -- virtual void AddMultTransposePA(const Vector&, Vector&) const; -+ using BilinearFormIntegrator::AssembleEA; -+ virtual void AssembleEA(const FiniteElementSpace &fes, Vector &emat); - - static const IntegrationRule &GetRule(const FiniteElement &trial_fe, - const FiniteElement &test_fe); -@@ -2183,55 +2186,62 @@ public: - /** Class for local mass matrix assembling a(u,v) := (Q u, v) */ - class MassIntegrator: public BilinearFormIntegrator - { -+private: - friend class DGMassInverse; -+ - protected: - #ifndef MFEM_THREAD_SAFE - Vector shape, te_shape; - #endif - Coefficient *Q; -+ - // PA extension -- const FiniteElementSpace *fespace; - Vector pa_data; -- const DofToQuad *maps; ///< Not owned -- const GeometricFactors *geom; ///< Not owned -+ const DofToQuad *maps; ///< Not owned -+ const GeometricFactors *geom; ///< Not owned -+ const FaceGeometricFactors *face_geom; ///< Not owned - int dim, ne, nq, dofs1D, quad1D; - - public: - MassIntegrator(const IntegrationRule *ir = NULL) -- : BilinearFormIntegrator(ir), Q(NULL), maps(NULL), geom(NULL) { } -+ : BilinearFormIntegrator(ir), Q(NULL), maps(NULL), geom(NULL) {} - - /// Construct a mass integrator with coefficient q - MassIntegrator(Coefficient &q, const IntegrationRule *ir = NULL) -- : BilinearFormIntegrator(ir), Q(&q), maps(NULL), geom(NULL) { } -+ : BilinearFormIntegrator(ir), Q(&q), maps(NULL), geom(NULL) {} - - /** Given a particular Finite Element computes the element mass matrix - elmat. */ - virtual void AssembleElementMatrix(const FiniteElement &el, - ElementTransformation &Trans, - DenseMatrix &elmat); -+ - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, - ElementTransformation &Trans, - DenseMatrix &elmat); - - using BilinearFormIntegrator::AssemblePA; -- -- virtual void AssembleMF(const FiniteElementSpace &fes); -- - virtual void AssemblePA(const FiniteElementSpace &fes); - -- virtual void AssembleEA(const FiniteElementSpace &fes, Vector &emat, -- const bool add); -+ using BilinearFormIntegrator::AssemblePABoundary; -+ virtual void AssemblePABoundary(const FiniteElementSpace &fes); - - virtual void AssembleDiagonalPA(Vector &diag); - -- virtual void AssembleDiagonalMF(Vector &diag); -+ virtual void AddMultPA(const Vector &x, Vector &y) const; - -- virtual void AddMultMF(const Vector&, Vector&) const; -+ virtual void AddMultTransposePA(const Vector &x, Vector &y) const; - -- virtual void AddMultPA(const Vector&, Vector&) const; -+ using BilinearFormIntegrator::AssembleMF; -+ virtual void AssembleMF(const FiniteElementSpace &fes); - -- virtual void AddMultTransposePA(const Vector&, Vector&) const; -+ virtual void AssembleDiagonalMF(Vector &diag); -+ -+ virtual void AddMultMF(const Vector &x, Vector &y) const; -+ -+ using BilinearFormIntegrator::AssembleEA; -+ virtual void AssembleEA(const FiniteElementSpace &fes, Vector &emat); - - static const IntegrationRule &GetRule(const FiniteElement &trial_fe, - const FiniteElement &test_fe, -@@ -2246,7 +2256,7 @@ public: - class BoundaryMassIntegrator : public MassIntegrator - { - public: -- BoundaryMassIntegrator(Coefficient &q) : MassIntegrator(q) { } -+ BoundaryMassIntegrator(Coefficient &q) : MassIntegrator(q) {} - - using BilinearFormIntegrator::AssembleFaceMatrix; - -@@ -2262,6 +2272,7 @@ class ConvectionIntegrator : public BilinearFormIntegrator - protected: - VectorCoefficient *Q; - double alpha; -+ - // PA extension - Vector pa_data; - const DofToQuad *maps; ///< Not owned -@@ -2278,34 +2289,30 @@ public: - ConvectionIntegrator(VectorCoefficient &q, double a = 1.0) - : Q(&q) { alpha = a; } - -- virtual void AssembleElementMatrix(const FiniteElement &, -- ElementTransformation &, -- DenseMatrix &); -+ virtual void AssembleElementMatrix(const FiniteElement &fes, -+ ElementTransformation &Trans, -+ DenseMatrix &elmat); - - using BilinearFormIntegrator::AssemblePA; -+ virtual void AssemblePA(const FiniteElementSpace &fes); - -- virtual void AssembleMF(const FiniteElementSpace &fes); -+ virtual void AssembleDiagonalPA(Vector &diag); - -- virtual void AssemblePA(const FiniteElementSpace&); -+ virtual void AddMultPA(const Vector &x, Vector &y) const; - -- virtual void AssembleEA(const FiniteElementSpace &fes, Vector &emat, -- const bool add); -+ virtual void AddMultTransposePA(const Vector &x, Vector &y) const; - -- virtual void AssembleDiagonalPA(Vector &diag); -+ using BilinearFormIntegrator::AssembleMF; -+ virtual void AssembleMF(const FiniteElementSpace &fes); - - virtual void AssembleDiagonalMF(Vector &diag); - -- virtual void AddMultMF(const Vector&, Vector&) const; -- -- virtual void AddMultPA(const Vector&, Vector&) const; -- -- virtual void AddMultTransposePA(const Vector &x, Vector &y) const; -+ virtual void AddMultMF(const Vector &x, Vector &y) const; - -- static const IntegrationRule &GetRule(const FiniteElement &el, -- ElementTransformation &Trans); -+ using BilinearFormIntegrator::AssembleEA; -+ virtual void AssembleEA(const FiniteElementSpace &fes, Vector &emat); - -- static const IntegrationRule &GetRule(const FiniteElement &trial_fe, -- const FiniteElement &test_fe, -+ static const IntegrationRule &GetRule(const FiniteElement &fe, - ElementTransformation &Trans); - - bool SupportsCeed() const { return DeviceCanUseCeed(); } -@@ -2319,7 +2326,7 @@ class ConservativeConvectionIntegrator : public TransposeIntegrator - { - public: - ConservativeConvectionIntegrator(VectorCoefficient &q, double a = 1.0) -- : TransposeIntegrator(new ConvectionIntegrator(q, -a)) { } -+ : TransposeIntegrator(new ConvectionIntegrator(q, -a)) {} - }; - - /// alpha (q . grad u, v) using the "group" FE discretization -@@ -2336,6 +2343,7 @@ private: - public: - GroupConvectionIntegrator(VectorCoefficient &q, double a = 1.0) - : Q(&q) { alpha = a; } -+ - virtual void AssembleElementMatrix(const FiniteElement &, - ElementTransformation &, - DenseMatrix &); -@@ -2357,6 +2365,7 @@ protected: - Coefficient *Q; - VectorCoefficient *VQ; - MatrixCoefficient *MQ; -+ - // PA extension - Vector pa_data; - const DofToQuad *maps; ///< Not owned -@@ -2366,21 +2375,21 @@ protected: - public: - /// Construct an integrator with coefficient 1.0 - VectorMassIntegrator() -- : vdim(-1), Q_order(0), Q(NULL), VQ(NULL), MQ(NULL) { } -+ : vdim(-1), Q_order(0), Q(NULL), VQ(NULL), MQ(NULL) {} - /** Construct an integrator with scalar coefficient q. If possible, save - memory by using a scalar integrator since the resulting matrix is block - diagonal with the same diagonal block repeated. */ - VectorMassIntegrator(Coefficient &q, int qo = 0) -- : vdim(-1), Q_order(qo), Q(&q), VQ(NULL), MQ(NULL) { } -+ : vdim(-1), Q_order(qo), Q(&q), VQ(NULL), MQ(NULL) {} - VectorMassIntegrator(Coefficient &q, const IntegrationRule *ir) - : BilinearFormIntegrator(ir), vdim(-1), Q_order(0), Q(&q), VQ(NULL), -- MQ(NULL) { } -+ MQ(NULL) {} - /// Construct an integrator with diagonal coefficient q - VectorMassIntegrator(VectorCoefficient &q, int qo = 0) -- : vdim(q.GetVDim()), Q_order(qo), Q(NULL), VQ(&q), MQ(NULL) { } -+ : vdim(q.GetVDim()), Q_order(qo), Q(NULL), VQ(&q), MQ(NULL) {} - /// Construct an integrator with matrix coefficient q - VectorMassIntegrator(MatrixCoefficient &q, int qo = 0) -- : vdim(q.GetVDim()), Q_order(qo), Q(NULL), VQ(NULL), MQ(&q) { } -+ : vdim(q.GetVDim()), Q_order(qo), Q(NULL), VQ(NULL), MQ(&q) {} - - int GetVDim() const { return vdim; } - void SetVDim(int vdim_) { vdim = vdim_; } -@@ -2388,21 +2397,29 @@ public: - virtual void AssembleElementMatrix(const FiniteElement &el, - ElementTransformation &Trans, - DenseMatrix &elmat); -+ - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, - ElementTransformation &Trans, - DenseMatrix &elmat); -+ - using BilinearFormIntegrator::AssemblePA; - virtual void AssemblePA(const FiniteElementSpace &fes); -- virtual void AssembleMF(const FiniteElementSpace &fes); -+ - virtual void AssembleDiagonalPA(Vector &diag); -- virtual void AssembleDiagonalMF(Vector &diag); -+ - virtual void AddMultPA(const Vector &x, Vector &y) const; -+ -+ using BilinearFormIntegrator::AssembleMF; -+ virtual void AssembleMF(const FiniteElementSpace &fes); -+ -+ virtual void AssembleDiagonalMF(Vector &diag); -+ - virtual void AddMultMF(const Vector &x, Vector &y) const; -+ - bool SupportsCeed() const { return DeviceCanUseCeed(); } - }; - -- - /** Class for integrating (div u, p) where u is a vector field given by - VectorFiniteElement through Piola transformation (for RT elements); p is - scalar function given by FiniteElement through standard transformation. -@@ -2416,13 +2433,6 @@ class VectorFEDivergenceIntegrator : public BilinearFormIntegrator - protected: - Coefficient *Q; - -- using BilinearFormIntegrator::AssemblePA; -- virtual void AssemblePA(const FiniteElementSpace &trial_fes, -- const FiniteElementSpace &test_fes); -- -- virtual void AddMultPA(const Vector&, Vector&) const; -- virtual void AddMultTransposePA(const Vector&, Vector&) const; -- - private: - #ifndef MFEM_THREAD_SAFE - Vector divshape, shape; -@@ -2438,17 +2448,26 @@ private: - public: - VectorFEDivergenceIntegrator() { Q = NULL; } - VectorFEDivergenceIntegrator(Coefficient &q) { Q = &q; } -+ - virtual void AssembleElementMatrix(const FiniteElement &el, - ElementTransformation &Trans, -- DenseMatrix &elmat) { } -+ DenseMatrix &elmat) {} -+ - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, - ElementTransformation &Trans, - DenseMatrix &elmat); - -+ using BilinearFormIntegrator::AssemblePA; -+ virtual void AssemblePA(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes); -+ - virtual void AssembleDiagonalPA_ADAt(const Vector &D, Vector &diag); --}; - -+ virtual void AddMultPA(const Vector &x, Vector &y) const; -+ -+ virtual void AddMultTransposePA(const Vector &x, Vector &y) const; -+}; - - /** Integrator for `(-Q u, grad v)` for Nedelec (`u`) and H1 (`v`) elements. - This is equivalent to a weak divergence of the Nedelec basis functions. */ -@@ -2468,9 +2487,11 @@ private: - public: - VectorFEWeakDivergenceIntegrator() { Q = NULL; } - VectorFEWeakDivergenceIntegrator(Coefficient &q) { Q = &q; } -+ - virtual void AssembleElementMatrix(const FiniteElement &el, - ElementTransformation &Trans, -- DenseMatrix &elmat) { } -+ DenseMatrix &elmat) {} -+ - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, - ElementTransformation &Trans, -@@ -2494,9 +2515,11 @@ private: - public: - VectorFECurlIntegrator() { Q = NULL; } - VectorFECurlIntegrator(Coefficient &q) { Q = &q; } -+ - virtual void AssembleElementMatrix(const FiniteElement &el, - ElementTransformation &Trans, -- DenseMatrix &elmat) { } -+ DenseMatrix &elmat) {} -+ - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, - ElementTransformation &Trans, -@@ -2515,11 +2538,13 @@ private: - Vector shape, dshapedxi; - - public: -- DerivativeIntegrator(Coefficient &q, int i) : Q(&q), xi(i) { } -+ DerivativeIntegrator(Coefficient &q, int i) : Q(&q), xi(i) {} -+ - virtual void AssembleElementMatrix(const FiniteElement &el, - ElementTransformation &Trans, - DenseMatrix &elmat) - { AssembleElementMatrix2(el,el,Trans,elmat); } -+ - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, - ElementTransformation &Trans, -@@ -2555,12 +2580,12 @@ public: - CurlCurlIntegrator() { Q = NULL; DQ = NULL; MQ = NULL; } - /// Construct a bilinear form integrator for Nedelec elements - CurlCurlIntegrator(Coefficient &q, const IntegrationRule *ir = NULL) : -- BilinearFormIntegrator(ir), Q(&q), DQ(NULL), MQ(NULL) { } -+ BilinearFormIntegrator(ir), Q(&q), DQ(NULL), MQ(NULL) {} - CurlCurlIntegrator(DiagonalMatrixCoefficient &dq, - const IntegrationRule *ir = NULL) : -- BilinearFormIntegrator(ir), Q(NULL), DQ(&dq), MQ(NULL) { } -+ BilinearFormIntegrator(ir), Q(NULL), DQ(&dq), MQ(NULL) {} - CurlCurlIntegrator(MatrixCoefficient &mq, const IntegrationRule *ir = NULL) : -- BilinearFormIntegrator(ir), Q(NULL), DQ(NULL), MQ(&mq) { } -+ BilinearFormIntegrator(ir), Q(NULL), DQ(NULL), MQ(&mq) {} - - /* Given a particular Finite Element, compute the - element curl-curl matrix elmat */ -@@ -2585,8 +2610,10 @@ public: - - using BilinearFormIntegrator::AssemblePA; - virtual void AssemblePA(const FiniteElementSpace &fes); -+ -+ virtual void AssembleDiagonalPA(Vector &diag); -+ - virtual void AddMultPA(const Vector &x, Vector &y) const; -- virtual void AssembleDiagonalPA(Vector& diag); - - const Coefficient *GetCoefficient() const { return Q; } - }; -@@ -2606,7 +2633,7 @@ protected: - public: - VectorCurlCurlIntegrator() { Q = NULL; } - -- VectorCurlCurlIntegrator(Coefficient &q) : Q(&q) { } -+ VectorCurlCurlIntegrator(Coefficient &q) : Q(&q) {} - - /// Assemble an element matrix - virtual void AssembleElementMatrix(const FiniteElement &el, -@@ -2637,9 +2664,9 @@ private: - DenseMatrix curlshape; - DenseMatrix elmat_comp; - public: -- MixedCurlIntegrator() : Q{NULL} { } -- MixedCurlIntegrator(Coefficient *q_) : Q{q_} { } -- MixedCurlIntegrator(Coefficient &q) : Q{&q} { } -+ MixedCurlIntegrator() : Q{NULL} {} -+ MixedCurlIntegrator(Coefficient *q_) : Q{q_} {} -+ MixedCurlIntegrator(Coefficient &q) : Q{&q} {} - - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, -@@ -2693,18 +2720,21 @@ public: - virtual void AssembleElementMatrix(const FiniteElement &el, - ElementTransformation &Trans, - DenseMatrix &elmat); -+ - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, - ElementTransformation &Trans, - DenseMatrix &elmat); - -- using BilinearFormIntegrator::AssemblePA; -- virtual void AssemblePA(const FiniteElementSpace &fes); -+ virtual void AssemblePA(const FiniteElementSpace &fes) { AssemblePA(fes, fes); } - virtual void AssemblePA(const FiniteElementSpace &trial_fes, - const FiniteElementSpace &test_fes); -+ -+ virtual void AssembleDiagonalPA(Vector &diag); -+ - virtual void AddMultPA(const Vector &x, Vector &y) const; -+ - virtual void AddMultTransposePA(const Vector &x, Vector &y) const; -- virtual void AssembleDiagonalPA(Vector& diag); - - const Coefficient *GetCoefficient() const { return Q; } - }; -@@ -2722,6 +2752,7 @@ private: - DenseMatrix dshape; - DenseMatrix gshape; - DenseMatrix Jadj; -+ - // PA extension - Vector pa_data; - const DofToQuad *trial_maps, *test_maps; ///< Not owned -@@ -2735,10 +2766,10 @@ public: - { } - VectorDivergenceIntegrator(Coefficient *q_) : - Q(q_), trial_maps(NULL), test_maps(NULL), geom(NULL) -- { } -+ {} - VectorDivergenceIntegrator(Coefficient &q) : - Q(&q), trial_maps(NULL), test_maps(NULL), geom(NULL) -- { } -+ {} - - virtual void AssembleElementMatrix2(const FiniteElement &trial_fe, - const FiniteElement &test_fe, -@@ -2750,6 +2781,7 @@ public: - const FiniteElementSpace &test_fes); - - virtual void AddMultPA(const Vector &x, Vector &y) const; -+ - virtual void AddMultTransposePA(const Vector &x, Vector &y) const; - - static const IntegrationRule &GetRule(const FiniteElement &trial_fe, -@@ -2763,11 +2795,6 @@ class DivDivIntegrator: public BilinearFormIntegrator - protected: - Coefficient *Q; - -- using BilinearFormIntegrator::AssemblePA; -- virtual void AssemblePA(const FiniteElementSpace &fes); -- virtual void AddMultPA(const Vector &x, Vector &y) const; -- virtual void AssembleDiagonalPA(Vector& diag); -- - private: - #ifndef MFEM_THREAD_SAFE - Vector divshape, te_divshape; -@@ -2783,7 +2810,7 @@ private: - public: - DivDivIntegrator() { Q = NULL; } - DivDivIntegrator(Coefficient &q, const IntegrationRule *ir = NULL) : -- BilinearFormIntegrator(ir), Q(&q) { } -+ BilinearFormIntegrator(ir), Q(&q) {} - - virtual void AssembleElementMatrix(const FiniteElement &el, - ElementTransformation &Trans, -@@ -2794,6 +2821,13 @@ public: - ElementTransformation &Trans, - DenseMatrix &elmat); - -+ using BilinearFormIntegrator::AssemblePA; -+ virtual void AssemblePA(const FiniteElementSpace &fes); -+ -+ virtual void AssembleDiagonalPA(Vector &diag); -+ -+ virtual void AddMultPA(const Vector &x, Vector &y) const; -+ - const Coefficient *GetCoefficient() const { return Q; } - }; - -@@ -2830,7 +2864,7 @@ private: - Vector vcoeff; - - public: -- VectorDiffusionIntegrator() { } -+ VectorDiffusionIntegrator() {} - - /** \brief Integrator with unit coefficient for caller-specified vector - dimension. -@@ -2838,13 +2872,13 @@ public: - If the vector dimension does not match the true dimension of the space, - the resulting element matrix will be mathematically invalid. */ - VectorDiffusionIntegrator(int vector_dimension) -- : vdim(vector_dimension) { } -+ : vdim(vector_dimension) {} - - VectorDiffusionIntegrator(Coefficient &q) -- : Q(&q) { } -+ : Q(&q) {} - - VectorDiffusionIntegrator(Coefficient &q, const IntegrationRule *ir) -- : BilinearFormIntegrator(ir), Q(&q) { } -+ : BilinearFormIntegrator(ir), Q(&q) {} - - /** \brief Integrator with scalar coefficient for caller-specified vector - dimension. -@@ -2855,7 +2889,7 @@ public: - If the vector dimension does not match the true dimension of the space, - the resulting element matrix will be mathematically invalid. */ - VectorDiffusionIntegrator(Coefficient &q, int vector_dimension) -- : Q(&q), vdim(vector_dimension) { } -+ : Q(&q), vdim(vector_dimension) {} - - /** \brief Integrator with \c VectorCoefficient. The vector dimension of the - \c FiniteElementSpace is assumed to be the same as the dimension of the -@@ -2867,7 +2901,7 @@ public: - If the vector dimension does not match the true dimension of the space, - the resulting element matrix will be mathematically invalid. */ - VectorDiffusionIntegrator(VectorCoefficient &vq) -- : VQ(&vq), vdim(vq.GetVDim()) { } -+ : VQ(&vq), vdim(vq.GetVDim()) {} - - /** \brief Integrator with \c MatrixCoefficient. The vector dimension of the - \c FiniteElementSpace is assumed to be the same as the dimension of the -@@ -2879,21 +2913,30 @@ public: - If the vector dimension does not match the true dimension of the space, - the resulting element matrix will be mathematically invalid. */ - VectorDiffusionIntegrator(MatrixCoefficient& mq) -- : MQ(&mq), vdim(mq.GetVDim()) { } -+ : MQ(&mq), vdim(mq.GetVDim()) {} - - virtual void AssembleElementMatrix(const FiniteElement &el, - ElementTransformation &Trans, - DenseMatrix &elmat); -+ - virtual void AssembleElementVector(const FiniteElement &el, - ElementTransformation &Tr, - const Vector &elfun, Vector &elvect); -+ - using BilinearFormIntegrator::AssemblePA; - virtual void AssemblePA(const FiniteElementSpace &fes); -- virtual void AssembleMF(const FiniteElementSpace &fes); -+ - virtual void AssembleDiagonalPA(Vector &diag); -- virtual void AssembleDiagonalMF(Vector &diag); -+ - virtual void AddMultPA(const Vector &x, Vector &y) const; -+ -+ using BilinearFormIntegrator::AssembleMF; -+ virtual void AssembleMF(const FiniteElementSpace &fes); -+ -+ virtual void AssembleDiagonalMF(Vector &diag); -+ - virtual void AddMultMF(const Vector &x, Vector &y) const; -+ - bool SupportsCeed() const { return DeviceCanUseCeed(); } - }; - -@@ -2988,6 +3031,7 @@ protected: - Coefficient *rho; - VectorCoefficient *u; - double alpha, beta; -+ - // PA extension - Vector pa_data; - const DofToQuad *maps; ///< Not owned -@@ -3016,24 +3060,20 @@ public: - FaceElementTransformations &Trans, - DenseMatrix &elmat); - -- using BilinearFormIntegrator::AssemblePA; -- - virtual void AssemblePAInteriorFaces(const FiniteElementSpace &fes); - - virtual void AssemblePABoundaryFaces(const FiniteElementSpace &fes); - -- virtual void AddMultTransposePA(const Vector &x, Vector &y) const; -+ virtual void AddMultPA(const Vector &x, Vector &y) const; - -- virtual void AddMultPA(const Vector&, Vector&) const; -+ virtual void AddMultTransposePA(const Vector &x, Vector &y) const; - - virtual void AssembleEAInteriorFaces(const FiniteElementSpace& fes, - Vector &ea_data_int, -- Vector &ea_data_ext, -- const bool add); -+ Vector &ea_data_ext); - - virtual void AssembleEABoundaryFaces(const FiniteElementSpace& fes, -- Vector &ea_data_bdr, -- const bool add); -+ Vector &ea_data_bdr); - - static const IntegrationRule &GetRule(Geometry::Type geom, int order, - FaceElementTransformations &T); -@@ -3056,14 +3096,14 @@ class NonconservativeDGTraceIntegrator : public TransposeIntegrator - { - public: - NonconservativeDGTraceIntegrator(VectorCoefficient &u, double a) -- : TransposeIntegrator(new DGTraceIntegrator(u, -a, 0.5*a)) { } -+ : TransposeIntegrator(new DGTraceIntegrator(u, -a, 0.5*a)) {} - - NonconservativeDGTraceIntegrator(VectorCoefficient &u, double a, double b) -- : TransposeIntegrator(new DGTraceIntegrator(u, -a, b)) { } -+ : TransposeIntegrator(new DGTraceIntegrator(u, -a, b)) {} - - NonconservativeDGTraceIntegrator(Coefficient &rho, VectorCoefficient &u, - double a, double b) -- : TransposeIntegrator(new DGTraceIntegrator(rho, u, -a, b)) { } -+ : TransposeIntegrator(new DGTraceIntegrator(rho, u, -a, b)) {} - }; - - /** Integrator for the DG form: -@@ -3091,11 +3131,12 @@ protected: - - public: - DGDiffusionIntegrator(const double s, const double k) -- : Q(NULL), MQ(NULL), sigma(s), kappa(k) { } -+ : Q(NULL), MQ(NULL), sigma(s), kappa(k) {} - DGDiffusionIntegrator(Coefficient &q, const double s, const double k) -- : Q(&q), MQ(NULL), sigma(s), kappa(k) { } -+ : Q(&q), MQ(NULL), sigma(s), kappa(k) {} - DGDiffusionIntegrator(MatrixCoefficient &q, const double s, const double k) -- : Q(NULL), MQ(&q), sigma(s), kappa(k) { } -+ : Q(NULL), MQ(&q), sigma(s), kappa(k) {} -+ - using BilinearFormIntegrator::AssembleFaceMatrix; - virtual void AssembleFaceMatrix(const FiniteElement &el1, - const FiniteElement &el2, -@@ -3227,11 +3268,11 @@ class DGElasticityIntegrator : public BilinearFormIntegrator - { - public: - DGElasticityIntegrator(double alpha_, double kappa_) -- : lambda(NULL), mu(NULL), alpha(alpha_), kappa(kappa_) { } -+ : lambda(NULL), mu(NULL), alpha(alpha_), kappa(kappa_) {} - - DGElasticityIntegrator(Coefficient &lambda_, Coefficient &mu_, - double alpha_, double kappa_) -- : lambda(&lambda_), mu(&mu_), alpha(alpha_), kappa(kappa_) { } -+ : lambda(&lambda_), mu(&mu_), alpha(alpha_), kappa(kappa_) {} - - using BilinearFormIntegrator::AssembleFaceMatrix; - virtual void AssembleFaceMatrix(const FiniteElement &el1, -@@ -3282,7 +3323,8 @@ private: - Vector face_shape, shape1, shape2; - - public: -- TraceJumpIntegrator() { } -+ TraceJumpIntegrator() {} -+ - using BilinearFormIntegrator::AssembleFaceMatrix; - virtual void AssembleFaceMatrix(const FiniteElement &trial_face_fe, - const FiniteElement &test_fe1, -@@ -3301,7 +3343,8 @@ private: - DenseMatrix shape1, shape2; - - public: -- NormalTraceJumpIntegrator() { } -+ NormalTraceJumpIntegrator() {} -+ - using BilinearFormIntegrator::AssembleFaceMatrix; - virtual void AssembleFaceMatrix(const FiniteElement &trial_face_fe, - const FiniteElement &test_fe1, -@@ -3393,8 +3436,7 @@ public: - - /** Abstract class to serve as a base for local interpolators to be used in the - DiscreteLinearOperator class. */ --class DiscreteInterpolator : public BilinearFormIntegrator { }; -- -+class DiscreteInterpolator : public BilinearFormIntegrator {}; - - /** Class for constructing the gradient as a DiscreteLinearOperator from an - H1-conforming space to an H(curl)-conforming space. The range space can be -@@ -3402,7 +3444,7 @@ class DiscreteInterpolator : public BilinearFormIntegrator { }; - class GradientInterpolator : public DiscreteInterpolator - { - public: -- GradientInterpolator() : dofquad_fe(NULL) { } -+ GradientInterpolator() : dofquad_fe(NULL) {} - virtual ~GradientInterpolator() { delete dofquad_fe; } - - virtual void AssembleElementMatrix2(const FiniteElement &h1_fe, -@@ -3411,17 +3453,17 @@ public: - DenseMatrix &elmat) - { nd_fe.ProjectGrad(h1_fe, Trans, elmat); } - -- using BilinearFormIntegrator::AssemblePA; -- - /** @brief Setup method for PA data. - - @param[in] trial_fes H1 Lagrange space - @param[in] test_fes H(curl) Nedelec space - */ -+ using BilinearFormIntegrator::AssemblePA; - virtual void AssemblePA(const FiniteElementSpace &trial_fes, - const FiniteElementSpace &test_fes); - - virtual void AddMultPA(const Vector &x, Vector &y) const; -+ - virtual void AddMultTransposePA(const Vector &x, Vector &y) const; - - private: -@@ -3434,7 +3476,6 @@ private: - int dim, ne, o_dofs1D, c_dofs1D; - }; - -- - /** Class for constructing the identity map as a DiscreteLinearOperator. This - is the discrete embedding matrix when the domain space is a subspace of - the range space. Otherwise, a dof projection matrix is constructed. */ -@@ -3450,11 +3491,11 @@ public: - { ran_fe.Project(dom_fe, Trans, elmat); } - - using BilinearFormIntegrator::AssemblePA; -- - virtual void AssemblePA(const FiniteElementSpace &trial_fes, - const FiniteElementSpace &test_fes); - - virtual void AddMultPA(const Vector &x, Vector &y) const; -+ - virtual void AddMultTransposePA(const Vector &x, Vector &y) const; - - virtual ~IdentityInterpolator() { delete dofquad_fe; } -@@ -3470,7 +3511,6 @@ private: - Vector pa_data; - }; - -- - /** Class for constructing the (local) discrete curl matrix which can be used - as an integrator in a DiscreteLinearOperator object to assemble the global - discrete curl matrix. */ -@@ -3484,7 +3524,6 @@ public: - { ran_fe.ProjectCurl(dom_fe, Trans, elmat); } - }; - -- - /** Class for constructing the (local) discrete divergence matrix which can - be used as an integrator in a DiscreteLinearOperator object to assemble - the global discrete divergence matrix. -@@ -3503,7 +3542,6 @@ public: - { ran_fe.ProjectDiv(dom_fe, Trans, elmat); } - }; - -- - /** A trace face interpolator class for interpolating the normal component of - the domain space, e.g. vector H1, into the range space, e.g. the trace of - RT which uses FiniteElement::INTEGRAL map type. */ -@@ -3522,7 +3560,7 @@ public: - class ScalarProductInterpolator : public DiscreteInterpolator - { - public: -- ScalarProductInterpolator(Coefficient & sc) : Q(&sc) { } -+ ScalarProductInterpolator(Coefficient &sc) : Q(&sc) {} - - virtual void AssembleElementMatrix2(const FiniteElement &dom_fe, - const FiniteElement &ran_fe, -@@ -3539,13 +3577,14 @@ protected: - class ScalarVectorProductInterpolator : public DiscreteInterpolator - { - public: -- ScalarVectorProductInterpolator(Coefficient & sc) -- : Q(&sc) { } -+ ScalarVectorProductInterpolator(Coefficient &sc) -+ : Q(&sc) {} - - virtual void AssembleElementMatrix2(const FiniteElement &dom_fe, - const FiniteElement &ran_fe, - ElementTransformation &Trans, - DenseMatrix &elmat); -+ - protected: - Coefficient *Q; - }; -@@ -3556,13 +3595,14 @@ protected: - class VectorScalarProductInterpolator : public DiscreteInterpolator - { - public: -- VectorScalarProductInterpolator(VectorCoefficient & vc) -- : VQ(&vc) { } -+ VectorScalarProductInterpolator(VectorCoefficient &vc) -+ : VQ(&vc) {} - - virtual void AssembleElementMatrix2(const FiniteElement &dom_fe, - const FiniteElement &ran_fe, - ElementTransformation &Trans, - DenseMatrix &elmat); -+ - protected: - VectorCoefficient *VQ; - }; -@@ -3572,13 +3612,14 @@ protected: - class ScalarCrossProductInterpolator : public DiscreteInterpolator - { - public: -- ScalarCrossProductInterpolator(VectorCoefficient & vc) -- : VQ(&vc) { } -+ ScalarCrossProductInterpolator(VectorCoefficient &vc) -+ : VQ(&vc) {} - - virtual void AssembleElementMatrix2(const FiniteElement &nd_fe, - const FiniteElement &l2_fe, - ElementTransformation &Trans, - DenseMatrix &elmat); -+ - protected: - VectorCoefficient *VQ; - }; -@@ -3589,13 +3630,14 @@ protected: - class VectorCrossProductInterpolator : public DiscreteInterpolator - { - public: -- VectorCrossProductInterpolator(VectorCoefficient & vc) -- : VQ(&vc) { } -+ VectorCrossProductInterpolator(VectorCoefficient &vc) -+ : VQ(&vc) {} - - virtual void AssembleElementMatrix2(const FiniteElement &nd_fe, - const FiniteElement &rt_fe, - ElementTransformation &Trans, - DenseMatrix &elmat); -+ - protected: - VectorCoefficient *VQ; - }; -@@ -3606,27 +3648,16 @@ protected: - class VectorInnerProductInterpolator : public DiscreteInterpolator - { - public: -- VectorInnerProductInterpolator(VectorCoefficient & vc) : VQ(&vc) { } -+ VectorInnerProductInterpolator(VectorCoefficient &vc) : VQ(&vc) {} - - virtual void AssembleElementMatrix2(const FiniteElement &rt_fe, - const FiniteElement &l2_fe, - ElementTransformation &Trans, - DenseMatrix &elmat); -+ - protected: - VectorCoefficient *VQ; - }; - -- -- --// PA Diffusion Assemble 2D kernel --template --void PADiffusionSetup2D(const int Q1D, -- const int coeffDim, -- const int NE, -- const Array &w, -- const Vector &j, -- const Vector &c, -- Vector &d); -- - } - #endif -diff --git a/fem/bilininteg_hcurl.cpp b/fem/bilininteg_hcurl.cpp -deleted file mode 100644 -index e8762a71e..000000000 ---- a/fem/bilininteg_hcurl.cpp -+++ /dev/null -@@ -1,7764 +0,0 @@ --// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced --// at the Lawrence Livermore National Laboratory. All Rights reserved. See files --// LICENSE and NOTICE for details. LLNL-CODE-806117. --// --// This file is part of the MFEM library. For more information and source code --// availability visit https://mfem.org. --// --// MFEM is free software; you can redistribute it and/or modify it under the --// terms of the BSD-3 license. We welcome feedback and contributions, see file --// CONTRIBUTING.md for details. -- --#include "../general/forall.hpp" --#include "bilininteg.hpp" --#include "gridfunc.hpp" --#include "qspace.hpp" -- --using namespace std; -- --namespace mfem --{ -- --void PAHcurlHdivSetup3D(const int Q1D, -- const int coeffDim, -- const int NE, -- const bool transpose, -- const Array &w_, -- const Vector &j, -- Vector &coeff_, -- Vector &op); -- --void PAHcurlMassApply2D(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &bo, -- const Array &bc, -- const Array &bot, -- const Array &bct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y) --{ -- constexpr static int VDIM = 2; -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -- -- auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -- auto Bc = Reshape(bc.Read(), Q1D, D1D); -- auto Bot = Reshape(bot.Read(), D1D-1, Q1D); -- auto Bct = Reshape(bct.Read(), D1D, Q1D); -- auto op = Reshape(pa_data.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE); -- auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE); -- auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double mass[MAX_Q1D][MAX_Q1D][VDIM]; -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int c = 0; c < VDIM; ++c) -- { -- mass[qy][qx][c] = 0.0; -- } -- } -- } -- -- int osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y components -- { -- const int D1Dy = (c == 1) ? D1D - 1 : D1D; -- const int D1Dx = (c == 0) ? D1D - 1 : D1D; -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- double massX[MAX_Q1D]; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massX[qx] = 0.0; -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- const double t = X(dx + (dy * D1Dx) + osc, e); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx)); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- mass[qy][qx][c] += massX[qx] * wy; -- } -- } -- } -- -- osc += D1Dx * D1Dy; -- } // loop (c) over components -- -- // Apply D operator. -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double O11 = op(qx,qy,0,e); -- const double O21 = op(qx,qy,1,e); -- const double O12 = symmetric ? O21 : op(qx,qy,2,e); -- const double O22 = symmetric ? op(qx,qy,2,e) : op(qx,qy,3,e); -- const double massX = mass[qy][qx][0]; -- const double massY = mass[qy][qx][1]; -- mass[qy][qx][0] = (O11*massX)+(O12*massY); -- mass[qy][qx][1] = (O21*massX)+(O22*massY); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y components -- { -- const int D1Dy = (c == 1) ? D1D - 1 : D1D; -- const int D1Dx = (c == 0) ? D1D - 1 : D1D; -- -- double massX[MAX_D1D]; -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massX[dx] = 0.0; -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx)); -- } -- } -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy); -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- Y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy; -- } -- } -- -- osc += D1Dx * D1Dy; -- } // loop c -- } // loop qy -- }); // end of element loop --} -- --void PAHcurlMassAssembleDiagonal2D(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &bo, -- const Array &bc, -- const Vector &pa_data, -- Vector &diag) --{ -- constexpr static int VDIM = 2; -- constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -- -- auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -- auto Bc = Reshape(bc.Read(), Q1D, D1D); -- auto op = Reshape(pa_data.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE); -- auto D = Reshape(diag.ReadWrite(), 2*(D1D-1)*D1D, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- int osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y components -- { -- const int D1Dy = (c == 1) ? D1D - 1 : D1D; -- const int D1Dx = (c == 0) ? D1D - 1 : D1D; -- -- double mass[MAX_Q1D]; -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- mass[qx] = 0.0; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy); -- -- mass[qx] += wy * wy * ((c == 0) ? op(qx,qy,0,e) : -- op(qx,qy,symmetric ? 2 : 3, e)); -- } -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx)); -- D(dx + (dy * D1Dx) + osc, e) += mass[qx] * wx * wx; -- } -- } -- } -- -- osc += D1Dx * D1Dy; -- } // loop c -- }); // end of element loop --} -- --void PAHcurlMassAssembleDiagonal3D(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &bo, -- const Array &bc, -- const Vector &pa_data, -- Vector &diag) --{ -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -- -- MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -- MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -- constexpr static int VDIM = 3; -- -- auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -- auto Bc = Reshape(bc.Read(), Q1D, D1D); -- auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE); -- auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- int osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -- { -- const int D1Dz = (c == 2) ? D1D - 1 : D1D; -- const int D1Dy = (c == 1) ? D1D - 1 : D1D; -- const int D1Dx = (c == 0) ? D1D - 1 : D1D; -- -- const int opc = (c == 0) ? 0 : ((c == 1) ? (symmetric ? 3 : 4) : -- (symmetric ? 5 : 8)); -- -- double mass[MAX_Q1D]; -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- mass[qx] = 0.0; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy); -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz); -- -- mass[qx] += wy * wy * wz * wz * op(qx,qy,qz,opc,e); -- } -- } -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx)); -- D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += mass[qx] * wx * wx; -- } -- } -- } -- } -- -- osc += D1Dx * D1Dy * D1Dz; -- } // loop c -- }); // end of element loop --} -- --template --void SmemPAHcurlMassAssembleDiagonal3D(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &bo, -- const Array &bc, -- const Vector &pa_data, -- Vector &diag) --{ -- MFEM_VERIFY(D1D <= HCURL_MAX_D1D, "Error: D1D > MAX_D1D"); -- MFEM_VERIFY(Q1D <= HCURL_MAX_Q1D, "Error: Q1D > MAX_Q1D"); -- -- auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -- auto Bc = Reshape(bc.Read(), Q1D, D1D); -- auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE); -- auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -- -- mfem::forall_3D(NE, Q1D, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e) -- { -- constexpr int VDIM = 3; -- constexpr int tD1D = T_D1D ? T_D1D : HCURL_MAX_D1D; -- constexpr int tQ1D = T_Q1D ? T_Q1D : HCURL_MAX_Q1D; -- -- MFEM_SHARED double sBo[tQ1D][tD1D]; -- MFEM_SHARED double sBc[tQ1D][tD1D]; -- -- double op3[3]; -- MFEM_SHARED double sop[3][tQ1D][tQ1D]; -- -- MFEM_FOREACH_THREAD(qx,x,Q1D) -- { -- MFEM_FOREACH_THREAD(qy,y,Q1D) -- { -- MFEM_FOREACH_THREAD(qz,z,Q1D) -- { -- op3[0] = op(qx,qy,qz,0,e); -- op3[1] = op(qx,qy,qz,symmetric ? 3 : 4,e); -- op3[2] = op(qx,qy,qz,symmetric ? 5 : 8,e); -- } -- } -- } -- -- const int tidx = MFEM_THREAD_ID(x); -- const int tidy = MFEM_THREAD_ID(y); -- const int tidz = MFEM_THREAD_ID(z); -- -- if (tidz == 0) -- { -- MFEM_FOREACH_THREAD(d,y,D1D) -- { -- MFEM_FOREACH_THREAD(q,x,Q1D) -- { -- sBc[q][d] = Bc(q,d); -- if (d < D1D-1) -- { -- sBo[q][d] = Bo(q,d); -- } -- } -- } -- } -- MFEM_SYNC_THREAD; -- -- int osc = 0; -- for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -- { -- const int D1Dz = (c == 2) ? D1D - 1 : D1D; -- const int D1Dy = (c == 1) ? D1D - 1 : D1D; -- const int D1Dx = (c == 0) ? D1D - 1 : D1D; -- -- double dxyz = 0.0; -- -- for (int qz=0; qz < Q1D; ++qz) -- { -- if (tidz == qz) -- { -- for (int i=0; i<3; ++i) -- { -- sop[i][tidx][tidy] = op3[i]; -- } -- } -- -- MFEM_SYNC_THREAD; -- -- MFEM_FOREACH_THREAD(dz,z,D1Dz) -- { -- const double wz = ((c == 2) ? sBo[qz][dz] : sBc[qz][dz]); -- -- MFEM_FOREACH_THREAD(dy,y,D1Dy) -- { -- MFEM_FOREACH_THREAD(dx,x,D1Dx) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = ((c == 1) ? sBo[qy][dy] : sBc[qy][dy]); -- -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = ((c == 0) ? sBo[qx][dx] : sBc[qx][dx]); -- dxyz += sop[c][qx][qy] * wx * wx * wy * wy * wz * wz; -- } -- } -- } -- } -- } -- -- MFEM_SYNC_THREAD; -- } // qz loop -- -- MFEM_FOREACH_THREAD(dz,z,D1Dz) -- { -- MFEM_FOREACH_THREAD(dy,y,D1Dy) -- { -- MFEM_FOREACH_THREAD(dx,x,D1Dx) -- { -- D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz; -- } -- } -- } -- -- osc += D1Dx * D1Dy * D1Dz; -- } // c loop -- }); // end of element loop --} -- --void PAHcurlMassApply3D(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &bo, -- const Array &bc, -- const Array &bot, -- const Array &bct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y) --{ -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -- -- MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -- MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -- constexpr static int VDIM = 3; -- -- auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -- auto Bc = Reshape(bc.Read(), Q1D, D1D); -- auto Bot = Reshape(bot.Read(), D1D-1, Q1D); -- auto Bct = Reshape(bct.Read(), D1D, Q1D); -- auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE); -- auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE); -- auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM]; -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int c = 0; c < VDIM; ++c) -- { -- mass[qz][qy][qx][c] = 0.0; -- } -- } -- } -- } -- -- int osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -- { -- const int D1Dz = (c == 2) ? D1D - 1 : D1D; -- const int D1Dy = (c == 1) ? D1D - 1 : D1D; -- const int D1Dx = (c == 0) ? D1D - 1 : D1D; -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- double massXY[MAX_Q1D][MAX_Q1D]; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massXY[qy][qx] = 0.0; -- } -- } -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- double massX[MAX_Q1D]; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massX[qx] = 0.0; -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx)); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = massX[qx]; -- massXY[qy][qx] += wx * wy; -- } -- } -- } -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz); -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- mass[qz][qy][qx][c] += massXY[qy][qx] * wz; -- } -- } -- } -- } -- -- osc += D1Dx * D1Dy * D1Dz; -- } // loop (c) over components -- -- // Apply D operator. -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double O11 = op(qx,qy,qz,0,e); -- const double O12 = op(qx,qy,qz,1,e); -- const double O13 = op(qx,qy,qz,2,e); -- const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e); -- const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e); -- const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e); -- const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e); -- const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e); -- const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e); -- const double massX = mass[qz][qy][qx][0]; -- const double massY = mass[qz][qy][qx][1]; -- const double massZ = mass[qz][qy][qx][2]; -- mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ); -- mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ); -- mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ); -- } -- } -- } -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- double massXY[MAX_D1D][MAX_D1D]; -- -- osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -- { -- const int D1Dz = (c == 2) ? D1D - 1 : D1D; -- const int D1Dy = (c == 1) ? D1D - 1 : D1D; -- const int D1Dx = (c == 0) ? D1D - 1 : D1D; -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massXY[dy][dx] = 0.0; -- } -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- double massX[MAX_D1D]; -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massX[dx] = 0; -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massX[dx] += mass[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx)); -- } -- } -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy); -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massXY[dy][dx] += massX[dx] * wy; -- } -- } -- } -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz); -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz; -- } -- } -- } -- -- osc += D1Dx * D1Dy * D1Dz; -- } // loop c -- } // loop qz -- }); // end of element loop --} -- --template --void SmemPAHcurlMassApply3D(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &bo, -- const Array &bc, -- const Array &bot, -- const Array &bct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y) --{ -- MFEM_VERIFY(D1D <= HCURL_MAX_D1D, "Error: D1D > MAX_D1D"); -- MFEM_VERIFY(Q1D <= HCURL_MAX_Q1D, "Error: Q1D > MAX_Q1D"); -- -- const int dataSize = symmetric ? 6 : 9; -- -- auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -- auto Bc = Reshape(bc.Read(), Q1D, D1D); -- auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, dataSize, NE); -- auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE); -- auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -- -- mfem::forall_3D(NE, Q1D, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e) -- { -- constexpr int VDIM = 3; -- constexpr int tD1D = T_D1D ? T_D1D : HCURL_MAX_D1D; -- constexpr int tQ1D = T_Q1D ? T_Q1D : HCURL_MAX_Q1D; -- -- MFEM_SHARED double sBo[tQ1D][tD1D]; -- MFEM_SHARED double sBc[tQ1D][tD1D]; -- -- double op9[9]; -- MFEM_SHARED double sop[9*tQ1D*tQ1D]; -- MFEM_SHARED double mass[tQ1D][tQ1D][3]; -- -- MFEM_SHARED double sX[tD1D][tD1D][tD1D]; -- -- MFEM_FOREACH_THREAD(qx,x,Q1D) -- { -- MFEM_FOREACH_THREAD(qy,y,Q1D) -- { -- MFEM_FOREACH_THREAD(qz,z,Q1D) -- { -- for (int i=0; i &w, -- const Vector &j, -- Vector &coeff, -- Vector &op) --{ -- const int NQ = Q1D*Q1D; -- auto W = w.Read(); -- auto J = Reshape(j.Read(), NQ, 2, 2, NE); -- auto C = Reshape(coeff.Read(), NQ, NE); -- auto y = Reshape(op.Write(), NQ, NE); -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- for (int q = 0; q < NQ; ++q) -- { -- const double J11 = J(q,0,0,e); -- const double J21 = J(q,1,0,e); -- const double J12 = J(q,0,1,e); -- const double J22 = J(q,1,1,e); -- const double detJ = (J11*J22)-(J21*J12); -- y(q,e) = W[q] * C(q,e) / detJ; -- } -- }); --} -- --// PA H(curl) curl-curl assemble 3D kernel --static void PACurlCurlSetup3D(const int Q1D, -- const int coeffDim, -- const int NE, -- const Array &w, -- const Vector &j, -- Vector &coeff, -- Vector &op) --{ -- const int NQ = Q1D*Q1D*Q1D; -- const bool symmetric = (coeffDim != 9); -- auto W = w.Read(); -- auto J = Reshape(j.Read(), NQ, 3, 3, NE); -- auto C = Reshape(coeff.Read(), coeffDim, NQ, NE); -- auto y = Reshape(op.Write(), NQ, symmetric ? 6 : 9, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- for (int q = 0; q < NQ; ++q) -- { -- const double J11 = J(q,0,0,e); -- const double J21 = J(q,1,0,e); -- const double J31 = J(q,2,0,e); -- const double J12 = J(q,0,1,e); -- const double J22 = J(q,1,1,e); -- const double J32 = J(q,2,1,e); -- const double J13 = J(q,0,2,e); -- const double J23 = J(q,1,2,e); -- const double J33 = J(q,2,2,e); -- const double detJ = J11 * (J22 * J33 - J32 * J23) - -- J21 * (J12 * J33 - J32 * J13) + -- J31 * (J12 * J23 - J22 * J13); -- -- const double c_detJ = W[q] / detJ; -- -- if (coeffDim == 6 || coeffDim == 9) // Matrix coefficient version -- { -- // Set y to the 6 or 9 entries of J^T M J / det -- const double M11 = C(0, q, e); -- const double M12 = C(1, q, e); -- const double M13 = C(2, q, e); -- const double M21 = (!symmetric) ? C(3, q, e) : M12; -- const double M22 = (!symmetric) ? C(4, q, e) : C(3, q, e); -- const double M23 = (!symmetric) ? C(5, q, e) : C(4, q, e); -- const double M31 = (!symmetric) ? C(6, q, e) : M13; -- const double M32 = (!symmetric) ? C(7, q, e) : M23; -- const double M33 = (!symmetric) ? C(8, q, e) : C(5, q, e); -- -- // First compute R = MJ -- const double R11 = M11*J11 + M12*J21 + M13*J31; -- const double R12 = M11*J12 + M12*J22 + M13*J32; -- const double R13 = M11*J13 + M12*J23 + M13*J33; -- const double R21 = M21*J11 + M22*J21 + M23*J31; -- const double R22 = M21*J12 + M22*J22 + M23*J32; -- const double R23 = M21*J13 + M22*J23 + M23*J33; -- const double R31 = M31*J11 + M32*J21 + M33*J31; -- const double R32 = M31*J12 + M32*J22 + M33*J32; -- const double R33 = M31*J13 + M32*J23 + M33*J33; -- -- // Now set y to J^T R / det -- y(q,0,e) = c_detJ * (J11*R11 + J21*R21 + J31*R31); // 1,1 -- const double Y12 = c_detJ * (J11*R12 + J21*R22 + J31*R32); -- y(q,1,e) = Y12; // 1,2 -- y(q,2,e) = c_detJ * (J11*R13 + J21*R23 + J31*R33); // 1,3 -- -- const double Y21 = c_detJ * (J12*R11 + J22*R21 + J32*R31); -- const double Y22 = c_detJ * (J12*R12 + J22*R22 + J32*R32); -- const double Y23 = c_detJ * (J12*R13 + J22*R23 + J32*R33); -- -- const double Y33 = c_detJ * (J13*R13 + J23*R23 + J33*R33); -- -- y(q,3,e) = symmetric ? Y22 : Y21; // 2,2 or 2,1 -- y(q,4,e) = symmetric ? Y23 : Y22; // 2,3 or 2,2 -- y(q,5,e) = symmetric ? Y33 : Y23; // 3,3 or 2,3 -- -- if (!symmetric) -- { -- y(q,6,e) = c_detJ * (J13*R11 + J23*R21 + J33*R31); // 3,1 -- y(q,7,e) = c_detJ * (J13*R12 + J23*R22 + J33*R32); // 3,2 -- y(q,8,e) = Y33; // 3,3 -- } -- } -- else // Vector or scalar coefficient version -- { -- // Set y to the 6 entries of J^T D J / det^2 -- const double D1 = C(0, q, e); -- const double D2 = coeffDim == 3 ? C(1, q, e) : D1; -- const double D3 = coeffDim == 3 ? C(2, q, e) : D1; -- -- y(q,0,e) = c_detJ * (D1*J11*J11 + D2*J21*J21 + D3*J31*J31); // 1,1 -- y(q,1,e) = c_detJ * (D1*J11*J12 + D2*J21*J22 + D3*J31*J32); // 1,2 -- y(q,2,e) = c_detJ * (D1*J11*J13 + D2*J21*J23 + D3*J31*J33); // 1,3 -- y(q,3,e) = c_detJ * (D1*J12*J12 + D2*J22*J22 + D3*J32*J32); // 2,2 -- y(q,4,e) = c_detJ * (D1*J12*J13 + D2*J22*J23 + D3*J32*J33); // 2,3 -- y(q,5,e) = c_detJ * (D1*J13*J13 + D2*J23*J23 + D3*J33*J33); // 3,3 -- } -- } -- }); --} -- --// PA H(curl)-L2 assemble 2D kernel --static void PACurlL2Setup2D(const int Q1D, -- const int NE, -- const Array &w, -- Vector &coeff, -- Vector &op) --{ -- const int NQ = Q1D*Q1D; -- auto W = w.Read(); -- auto C = Reshape(coeff.Read(), NQ, NE); -- auto y = Reshape(op.Write(), NQ, NE); -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- for (int q = 0; q < NQ; ++q) -- { -- y(q,e) = W[q] * C(q,e); -- } -- }); --} -- --void CurlCurlIntegrator::AssemblePA(const FiniteElementSpace &fes) --{ -- // Assumes tensor-product elements -- Mesh *mesh = fes.GetMesh(); -- const FiniteElement *fel = fes.GetFE(0); -- -- const VectorTensorFiniteElement *el = -- dynamic_cast(fel); -- MFEM_VERIFY(el != NULL, "Only VectorTensorFiniteElement is supported!"); -- -- const IntegrationRule *ir -- = IntRule ? IntRule : &MassIntegrator::GetRule(*el, *el, -- *mesh->GetElementTransformation(0)); -- -- const int dims = el->GetDim(); -- MFEM_VERIFY(dims == 2 || dims == 3, ""); -- -- nq = ir->GetNPoints(); -- dim = mesh->Dimension(); -- MFEM_VERIFY(dim == 2 || dim == 3, ""); -- -- ne = fes.GetNE(); -- geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS); -- mapsC = &el->GetDofToQuad(*ir, DofToQuad::TENSOR); -- mapsO = &el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); -- dofs1D = mapsC->ndof; -- quad1D = mapsC->nqpt; -- -- MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, ""); -- -- QuadratureSpace qs(*mesh, *ir); -- CoefficientVector coeff(qs, CoefficientStorage::SYMMETRIC); -- if (Q) { coeff.Project(*Q); } -- else if (MQ) { coeff.ProjectTranspose(*MQ); } -- else if (DQ) { coeff.Project(*DQ); } -- else { coeff.SetConstant(1.0); } -- -- const int coeff_dim = coeff.GetVDim(); -- symmetric = (coeff_dim != dim*dim); -- const int sym_dims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6 -- const int ndata = (dim == 2) ? 1 : (symmetric ? sym_dims : dim*dim); -- pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType()); -- -- if (el->GetDerivType() != mfem::FiniteElement::CURL) -- { -- MFEM_ABORT("Unknown kernel."); -- } -- -- if (dim == 3) -- { -- PACurlCurlSetup3D(quad1D, coeff_dim, ne, ir->GetWeights(), geom->J, coeff, -- pa_data); -- } -- else -- { -- PACurlCurlSetup2D(quad1D, ne, ir->GetWeights(), geom->J, coeff, pa_data); -- } --} -- --static void PACurlCurlApply2D(const int D1D, -- const int Q1D, -- const int NE, -- const Array &bo, -- const Array &bot, -- const Array &gc, -- const Array &gct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y) --{ -- constexpr static int VDIM = 2; -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -- -- auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -- auto Bot = Reshape(bot.Read(), D1D-1, Q1D); -- auto Gc = Reshape(gc.Read(), Q1D, D1D); -- auto Gct = Reshape(gct.Read(), D1D, Q1D); -- auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE); -- auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE); -- auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double curl[MAX_Q1D][MAX_Q1D]; -- -- // curl[qy][qx] will be computed as du_y/dx - du_x/dy -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- curl[qy][qx] = 0.0; -- } -- } -- -- int osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y components -- { -- const int D1Dy = (c == 1) ? D1D - 1 : D1D; -- const int D1Dx = (c == 0) ? D1D - 1 : D1D; -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- double gradX[MAX_Q1D]; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- gradX[qx] = 0; -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- const double t = X(dx + (dy * D1Dx) + osc, e); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- gradX[qx] += t * ((c == 0) ? Bo(qx,dx) : Gc(qx,dx)); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = (c == 0) ? -Gc(qy,dy) : Bo(qy,dy); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- curl[qy][qx] += gradX[qx] * wy; -- } -- } -- } -- -- osc += D1Dx * D1Dy; -- } // loop (c) over components -- -- // Apply D operator. -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- curl[qy][qx] *= op(qx,qy,e); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y components -- { -- const int D1Dy = (c == 1) ? D1D - 1 : D1D; -- const int D1Dx = (c == 0) ? D1D - 1 : D1D; -- -- double gradX[MAX_D1D]; -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- gradX[dx] = 0.0; -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- gradX[dx] += curl[qy][qx] * ((c == 0) ? Bot(dx,qx) : Gct(dx,qx)); -- } -- } -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double wy = (c == 0) ? -Gct(dy,qy) : Bot(dy,qy); -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- Y(dx + (dy * D1Dx) + osc, e) += gradX[dx] * wy; -- } -- } -- -- osc += D1Dx * D1Dy; -- } // loop c -- } // loop qy -- }); // end of element loop --} -- --template --static void PACurlCurlApply3D(const int D1D, -- const int Q1D, -- const bool symmetric, -- const int NE, -- const Array &bo, -- const Array &bc, -- const Array &bot, -- const Array &bct, -- const Array &gc, -- const Array &gct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y) --{ -- MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -- MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -- // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get -- // (\nabla\times u) \cdot (\nabla\times v) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{v} -- // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}] -- // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -- // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -- -- constexpr static int VDIM = 3; -- -- auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -- auto Bc = Reshape(bc.Read(), Q1D, D1D); -- auto Bot = Reshape(bot.Read(), D1D-1, Q1D); -- auto Bct = Reshape(bct.Read(), D1D, Q1D); -- auto Gc = Reshape(gc.Read(), Q1D, D1D); -- auto Gct = Reshape(gct.Read(), D1D, Q1D); -- auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE); -- auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE); -- auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM]; -- // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point. -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int c = 0; c < VDIM; ++c) -- { -- curl[qz][qy][qx][c] = 0.0; -- } -- } -- } -- } -- -- // We treat x, y, z components separately for optimization specific to each. -- -- int osc = 0; -- -- { -- // x component -- const int D1Dz = D1D; -- const int D1Dy = D1D; -- const int D1Dx = D1D - 1; -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- double gradXY[MAX_Q1D][MAX_Q1D][2]; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int d = 0; d < 2; ++d) -- { -- gradXY[qy][qx][d] = 0.0; -- } -- } -- } -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- double massX[MAX_Q1D]; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massX[qx] = 0.0; -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massX[qx] += t * Bo(qx,dx); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = Bc(qy,dy); -- const double wDy = Gc(qy,dy); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = massX[qx]; -- gradXY[qy][qx][0] += wx * wDy; -- gradXY[qy][qx][1] += wx * wy; -- } -- } -- } -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- const double wz = Bc(qz,dz); -- const double wDz = Gc(qz,dz); -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}] -- curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2} -- curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1} -- } -- } -- } -- } -- -- osc += D1Dx * D1Dy * D1Dz; -- } -- -- { -- // y component -- const int D1Dz = D1D; -- const int D1Dy = D1D - 1; -- const int D1Dx = D1D; -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- double gradXY[MAX_Q1D][MAX_Q1D][2]; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int d = 0; d < 2; ++d) -- { -- gradXY[qy][qx][d] = 0.0; -- } -- } -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- double massY[MAX_Q1D]; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- massY[qy] = 0.0; -- } -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -- for (int qy = 0; qy < Q1D; ++qy) -- { -- massY[qy] += t * Bo(qy,dy); -- } -- } -- -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = Bc(qx,dx); -- const double wDx = Gc(qx,dx); -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = massY[qy]; -- gradXY[qy][qx][0] += wDx * wy; -- gradXY[qy][qx][1] += wx * wy; -- } -- } -- } -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- const double wz = Bc(qz,dz); -- const double wDz = Gc(qz,dz); -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -- curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2} -- curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0} -- } -- } -- } -- } -- -- osc += D1Dx * D1Dy * D1Dz; -- } -- -- { -- // z component -- const int D1Dz = D1D - 1; -- const int D1Dy = D1D; -- const int D1Dx = D1D; -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- double gradYZ[MAX_Q1D][MAX_Q1D][2]; -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int d = 0; d < 2; ++d) -- { -- gradYZ[qz][qy][d] = 0.0; -- } -- } -- } -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- double massZ[MAX_Q1D]; -- for (int qz = 0; qz < Q1D; ++qz) -- { -- massZ[qz] = 0.0; -- } -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -- for (int qz = 0; qz < Q1D; ++qz) -- { -- massZ[qz] += t * Bo(qz,dz); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = Bc(qy,dy); -- const double wDy = Gc(qy,dy); -- for (int qz = 0; qz < Q1D; ++qz) -- { -- const double wz = massZ[qz]; -- gradYZ[qz][qy][0] += wz * wy; -- gradYZ[qz][qy][1] += wz * wDy; -- } -- } -- } -- -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = Bc(qx,dx); -- const double wDx = Gc(qx,dx); -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qz = 0; qz < Q1D; ++qz) -- { -- // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -- curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1} -- curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0} -- } -- } -- } -- } -- } -- -- // Apply D operator. -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double O11 = op(qx,qy,qz,0,e); -- const double O12 = op(qx,qy,qz,1,e); -- const double O13 = op(qx,qy,qz,2,e); -- const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e); -- const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e); -- const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e); -- const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e); -- const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e); -- const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e); -- -- const double c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) + -- (O13 * curl[qz][qy][qx][2]); -- const double c2 = (O21 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) + -- (O23 * curl[qz][qy][qx][2]); -- const double c3 = (O31 * curl[qz][qy][qx][0]) + (O32 * curl[qz][qy][qx][1]) + -- (O33 * curl[qz][qy][qx][2]); -- -- curl[qz][qy][qx][0] = c1; -- curl[qz][qy][qx][1] = c2; -- curl[qz][qy][qx][2] = c3; -- } -- } -- } -- -- // x component -- osc = 0; -- { -- const int D1Dz = D1D; -- const int D1Dy = D1D; -- const int D1Dx = D1D - 1; -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- double gradXY12[MAX_D1D][MAX_D1D]; -- double gradXY21[MAX_D1D][MAX_D1D]; -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- gradXY12[dy][dx] = 0.0; -- gradXY21[dy][dx] = 0.0; -- } -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- double massX[MAX_D1D][2]; -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- for (int n = 0; n < 2; ++n) -- { -- massX[dx][n] = 0.0; -- } -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- const double wx = Bot(dx,qx); -- -- massX[dx][0] += wx * curl[qz][qy][qx][1]; -- massX[dx][1] += wx * curl[qz][qy][qx][2]; -- } -- } -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double wy = Bct(dy,qy); -- const double wDy = Gct(dy,qy); -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- gradXY21[dy][dx] += massX[dx][0] * wy; -- gradXY12[dy][dx] += massX[dx][1] * wDy; -- } -- } -- } -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- const double wz = Bct(dz,qz); -- const double wDz = Gct(dz,qz); -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}] -- // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2 -- Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, -- e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz); -- } -- } -- } -- } // loop qz -- -- osc += D1Dx * D1Dy * D1Dz; -- } -- -- // y component -- { -- const int D1Dz = D1D; -- const int D1Dy = D1D - 1; -- const int D1Dx = D1D; -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- double gradXY02[MAX_D1D][MAX_D1D]; -- double gradXY20[MAX_D1D][MAX_D1D]; -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- gradXY02[dy][dx] = 0.0; -- gradXY20[dy][dx] = 0.0; -- } -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- double massY[MAX_D1D][2]; -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- massY[dy][0] = 0.0; -- massY[dy][1] = 0.0; -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double wy = Bot(dy,qy); -- -- massY[dy][0] += wy * curl[qz][qy][qx][2]; -- massY[dy][1] += wy * curl[qz][qy][qx][0]; -- } -- } -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- const double wx = Bct(dx,qx); -- const double wDx = Gct(dx,qx); -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- gradXY02[dy][dx] += massY[dy][0] * wDx; -- gradXY20[dy][dx] += massY[dy][1] * wx; -- } -- } -- } -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- const double wz = Bct(dz,qz); -- const double wDz = Gct(dz,qz); -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -- // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2 -- Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, -- e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz); -- } -- } -- } -- } // loop qz -- -- osc += D1Dx * D1Dy * D1Dz; -- } -- -- // z component -- { -- const int D1Dz = D1D - 1; -- const int D1Dy = D1D; -- const int D1Dx = D1D; -- -- for (int qx = 0; qx < Q1D; ++qx) -- { -- double gradYZ01[MAX_D1D][MAX_D1D]; -- double gradYZ10[MAX_D1D][MAX_D1D]; -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- gradYZ01[dz][dy] = 0.0; -- gradYZ10[dz][dy] = 0.0; -- } -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- double massZ[MAX_D1D][2]; -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- for (int n = 0; n < 2; ++n) -- { -- massZ[dz][n] = 0.0; -- } -- } -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- const double wz = Bot(dz,qz); -- -- massZ[dz][0] += wz * curl[qz][qy][qx][0]; -- massZ[dz][1] += wz * curl[qz][qy][qx][1]; -- } -- } -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double wy = Bct(dy,qy); -- const double wDy = Gct(dy,qy); -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- gradYZ01[dz][dy] += wy * massZ[dz][1]; -- gradYZ10[dz][dy] += wDy * massZ[dz][0]; -- } -- } -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- const double wx = Bct(dx,qx); -- const double wDx = Gct(dx,qx); -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -- // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1 -- Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, -- e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx); -- } -- } -- } -- } // loop qx -- } -- }); // end of element loop --} -- --template --static void SmemPACurlCurlApply3D(const int D1D, -- const int Q1D, -- const bool symmetric, -- const int NE, -- const Array &bo, -- const Array &bc, -- const Array &bot, -- const Array &bct, -- const Array &gc, -- const Array &gct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y) --{ -- MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -- MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -- // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get -- // (\nabla\times u) \cdot (\nabla\times v) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{v} -- // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}] -- // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -- // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -- -- auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -- auto Bc = Reshape(bc.Read(), Q1D, D1D); -- auto Gc = Reshape(gc.Read(), Q1D, D1D); -- auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE); -- auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE); -- auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -- -- const int s = symmetric ? 6 : 9; -- -- auto device_kernel = [=] MFEM_DEVICE (int e) -- { -- constexpr int VDIM = 3; -- -- MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D]; -- MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D]; -- MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D]; -- -- double ope[9]; -- MFEM_SHARED double sop[9][MAX_Q1D][MAX_Q1D]; -- MFEM_SHARED double curl[MAX_Q1D][MAX_Q1D][3]; -- -- MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D]; -- -- MFEM_FOREACH_THREAD(qx,x,Q1D) -- { -- MFEM_FOREACH_THREAD(qy,y,Q1D) -- { -- MFEM_FOREACH_THREAD(qz,z,Q1D) -- { -- for (int i=0; i(true, NE, device_kernel, host_kernel, Q1D, Q1D, Q1D); --} -- --static void PACurlL2Apply2D(const int D1D, -- const int D1Dtest, -- const int Q1D, -- const int NE, -- const Array &bo, -- const Array &bot, -- const Array &bt, -- const Array &gc, -- const Vector &pa_data, -- const Vector &x, // trial = H(curl) -- Vector &y) // test = L2 or H1 --{ -- constexpr static int VDIM = 2; -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -- const int H1 = (D1Dtest == D1D); -- -- MFEM_VERIFY(y.Size() == NE*D1Dtest*D1Dtest, "Test vector of wrong dimension"); -- -- auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -- auto Bot = Reshape(bot.Read(), D1D-1, Q1D); -- auto Bt = Reshape(bt.Read(), D1D, Q1D); -- auto Gc = Reshape(gc.Read(), Q1D, D1D); -- auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE); -- auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE); -- auto Y = Reshape(y.ReadWrite(), D1Dtest, D1Dtest, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double curl[MAX_Q1D][MAX_Q1D]; -- -- // curl[qy][qx] will be computed as du_y/dx - du_x/dy -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- curl[qy][qx] = 0.0; -- } -- } -- -- int osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y components -- { -- const int D1Dy = (c == 1) ? D1D - 1 : D1D; -- const int D1Dx = (c == 0) ? D1D - 1 : D1D; -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- double gradX[MAX_Q1D]; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- gradX[qx] = 0; -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- const double t = X(dx + (dy * D1Dx) + osc, e); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- gradX[qx] += t * ((c == 0) ? Bo(qx,dx) : Gc(qx,dx)); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = (c == 0) ? -Gc(qy,dy) : Bo(qy,dy); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- curl[qy][qx] += gradX[qx] * wy; -- } -- } -- } -- -- osc += D1Dx * D1Dy; -- } // loop (c) over components -- -- // Apply D operator. -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- curl[qy][qx] *= op(qx,qy,e); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- double sol_x[MAX_D1D]; -- for (int dx = 0; dx < D1Dtest; ++dx) -- { -- sol_x[dx] = 0.0; -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double s = curl[qy][qx]; -- for (int dx = 0; dx < D1Dtest; ++dx) -- { -- sol_x[dx] += s * ((H1 == 1) ? Bt(dx,qx) : Bot(dx,qx)); -- } -- } -- for (int dy = 0; dy < D1Dtest; ++dy) -- { -- const double wy = (H1 == 1) ? Bt(dy,qy) : Bot(dy,qy); -- -- for (int dx = 0; dx < D1Dtest; ++dx) -- { -- Y(dx,dy,e) += sol_x[dx] * wy; -- } -- } -- } // loop qy -- }); // end of element loop --} -- --static void PACurlL2ApplyTranspose2D(const int D1D, -- const int D1Dtest, -- const int Q1D, -- const int NE, -- const Array &bo, -- const Array &bot, -- const Array &b, -- const Array &gct, -- const Vector &pa_data, -- const Vector &x, // trial = H(curl) -- Vector &y) // test = L2 or H1 --{ -- constexpr static int VDIM = 2; -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -- const int H1 = (D1Dtest == D1D); -- -- MFEM_VERIFY(x.Size() == NE*D1Dtest*D1Dtest, "Test vector of wrong dimension"); -- -- auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -- auto B = Reshape(b.Read(), Q1D, D1D); -- auto Bot = Reshape(bot.Read(), D1D-1, Q1D); -- auto Gct = Reshape(gct.Read(), D1D, Q1D); -- auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE); -- auto X = Reshape(x.Read(), D1Dtest, D1Dtest, NE); -- auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double mass[MAX_Q1D][MAX_Q1D]; -- -- // Zero-order term in L2 or H1 test space -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- mass[qy][qx] = 0.0; -- } -- } -- -- for (int dy = 0; dy < D1Dtest; ++dy) -- { -- double sol_x[MAX_Q1D]; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- sol_x[qy] = 0.0; -- } -- for (int dx = 0; dx < D1Dtest; ++dx) -- { -- const double s = X(dx,dy,e); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- sol_x[qx] += s * ((H1 == 1) ? B(qx,dx) : Bo(qx,dx)); -- } -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double d2q = (H1 == 1) ? B(qy,dy) : Bo(qy,dy); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- mass[qy][qx] += d2q * sol_x[qx]; -- } -- } -- } -- -- // Apply D operator. -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- mass[qy][qx] *= op(qx,qy,e); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- int osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y components -- { -- const int D1Dy = (c == 1) ? D1D - 1 : D1D; -- const int D1Dx = (c == 0) ? D1D - 1 : D1D; -- -- double gradX[MAX_D1D]; -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- gradX[dx] = 0.0; -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- gradX[dx] += mass[qy][qx] * ((c == 0) ? Bot(dx,qx) : Gct(dx,qx)); -- } -- } -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double wy = (c == 0) ? -Gct(dy,qy) : Bot(dy,qy); -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- Y(dx + (dy * D1Dx) + osc, e) += gradX[dx] * wy; -- } -- } -- -- osc += D1Dx * D1Dy; -- } // loop c -- } // loop qy -- }); // end of element loop --} -- --void CurlCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const --{ -- if (dim == 3) -- { -- if (Device::Allows(Backend::DEVICE_MASK)) -- { -- const int ID = (dofs1D << 4) | quad1D; -- switch (ID) -- { -- case 0x23: return SmemPACurlCurlApply3D<2,3>(dofs1D, quad1D, symmetric, ne, -- mapsO->B, mapsC->B, mapsO->Bt, -- mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y); -- case 0x34: return SmemPACurlCurlApply3D<3,4>(dofs1D, quad1D, symmetric, ne, -- mapsO->B, mapsC->B, mapsO->Bt, -- mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y); -- case 0x45: return SmemPACurlCurlApply3D<4,5>(dofs1D, quad1D, symmetric, ne, -- mapsO->B, -- mapsC->B, mapsO->Bt, -- mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y); -- case 0x56: return SmemPACurlCurlApply3D<5,6>(dofs1D, quad1D, symmetric, ne, -- mapsO->B, mapsC->B, mapsO->Bt, -- mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y); -- default: return SmemPACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B, -- mapsC->B, mapsO->Bt, mapsC->Bt, -- mapsC->G, mapsC->Gt, pa_data, x, y); -- } -- } -- else -- PACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B, mapsC->B, mapsO->Bt, -- mapsC->Bt, mapsC->G, mapsC->Gt, pa_data, x, y); -- } -- else if (dim == 2) -- { -- PACurlCurlApply2D(dofs1D, quad1D, ne, mapsO->B, mapsO->Bt, -- mapsC->G, mapsC->Gt, pa_data, x, y); -- } -- else -- { -- MFEM_ABORT("Unsupported dimension!"); -- } --} -- --static void PACurlCurlAssembleDiagonal2D(const int D1D, -- const int Q1D, -- const int NE, -- const Array &bo, -- const Array &gc, -- const Vector &pa_data, -- Vector &diag) --{ -- constexpr static int VDIM = 2; -- constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -- -- auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -- auto Gc = Reshape(gc.Read(), Q1D, D1D); -- auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE); -- auto D = Reshape(diag.ReadWrite(), 2*(D1D-1)*D1D, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- int osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y components -- { -- const int D1Dy = (c == 1) ? D1D - 1 : D1D; -- const int D1Dx = (c == 0) ? D1D - 1 : D1D; -- -- double t[MAX_Q1D]; -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- t[qx] = 0.0; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = (c == 1) ? Bo(qy,dy) : -Gc(qy,dy); -- t[qx] += wy * wy * op(qx,qy,e); -- } -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = ((c == 0) ? Bo(qx,dx) : Gc(qx,dx)); -- D(dx + (dy * D1Dx) + osc, e) += t[qx] * wx * wx; -- } -- } -- } -- -- osc += D1Dx * D1Dy; -- } // loop c -- }); // end of element loop --} -- --template --static void PACurlCurlAssembleDiagonal3D(const int D1D, -- const int Q1D, -- const bool symmetric, -- const int NE, -- const Array &bo, -- const Array &bc, -- const Array &go, -- const Array &gc, -- const Vector &pa_data, -- Vector &diag) --{ -- constexpr static int VDIM = 3; -- MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -- MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -- -- auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -- auto Bc = Reshape(bc.Read(), Q1D, D1D); -- auto Go = Reshape(go.Read(), Q1D, D1D-1); -- auto Gc = Reshape(gc.Read(), Q1D, D1D); -- auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE); -- auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -- -- const int s = symmetric ? 6 : 9; -- const int i11 = 0; -- const int i12 = 1; -- const int i13 = 2; -- const int i21 = symmetric ? i12 : 3; -- const int i22 = symmetric ? 3 : 4; -- const int i23 = symmetric ? 4 : 5; -- const int i31 = symmetric ? i13 : 6; -- const int i32 = symmetric ? i23 : 7; -- const int i33 = symmetric ? 5 : 8; -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get -- // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u} -- // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}] -- // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -- // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -- -- // For each c, we will keep 9 arrays for derivatives multiplied by the 9 entries of the 3x3 matrix (dF^T C dF), -- // which may be non-symmetric depending on a possibly non-symmetric matrix coefficient. -- -- int osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -- { -- const int D1Dz = (c == 2) ? D1D - 1 : D1D; -- const int D1Dy = (c == 1) ? D1D - 1 : D1D; -- const int D1Dx = (c == 0) ? D1D - 1 : D1D; -- -- double zt[MAX_Q1D][MAX_Q1D][MAX_D1D][9][3]; -- -- // z contraction -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- for (int i=0; i --static void SmemPACurlCurlAssembleDiagonal3D(const int D1D, -- const int Q1D, -- const bool symmetric, -- const int NE, -- const Array &bo, -- const Array &bc, -- const Array &go, -- const Array &gc, -- const Vector &pa_data, -- Vector &diag) --{ -- MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -- MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -- -- auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -- auto Bc = Reshape(bc.Read(), Q1D, D1D); -- auto Go = Reshape(go.Read(), Q1D, D1D-1); -- auto Gc = Reshape(gc.Read(), Q1D, D1D); -- auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE); -- auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -- -- const int s = symmetric ? 6 : 9; -- const int i11 = 0; -- const int i12 = 1; -- const int i13 = 2; -- const int i21 = symmetric ? i12 : 3; -- const int i22 = symmetric ? 3 : 4; -- const int i23 = symmetric ? 4 : 5; -- const int i31 = symmetric ? i13 : 6; -- const int i32 = symmetric ? i23 : 7; -- const int i33 = symmetric ? 5 : 8; -- -- mfem::forall_3D(NE, Q1D, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e) -- { -- // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get -- // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u} -- // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}] -- // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -- // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -- -- constexpr int VDIM = 3; -- -- MFEM_SHARED double sBo[MAX_Q1D][MAX_D1D]; -- MFEM_SHARED double sBc[MAX_Q1D][MAX_D1D]; -- MFEM_SHARED double sGo[MAX_Q1D][MAX_D1D]; -- MFEM_SHARED double sGc[MAX_Q1D][MAX_D1D]; -- -- double ope[9]; -- MFEM_SHARED double sop[9][MAX_Q1D][MAX_Q1D]; -- -- MFEM_FOREACH_THREAD(qx,x,Q1D) -- { -- MFEM_FOREACH_THREAD(qy,y,Q1D) -- { -- MFEM_FOREACH_THREAD(qz,z,Q1D) -- { -- for (int i=0; i(dofs1D, quad1D, -- symmetric, ne, -- mapsO->B, mapsC->B, -- mapsO->G, mapsC->G, -- pa_data, diag); -- case 0x34: return SmemPACurlCurlAssembleDiagonal3D<3,4>(dofs1D, quad1D, -- symmetric, ne, -- mapsO->B, mapsC->B, -- mapsO->G, mapsC->G, -- pa_data, diag); -- case 0x45: return SmemPACurlCurlAssembleDiagonal3D<4,5>(dofs1D, quad1D, -- symmetric, ne, -- mapsO->B, mapsC->B, -- mapsO->G, mapsC->G, -- pa_data, diag); -- case 0x56: return SmemPACurlCurlAssembleDiagonal3D<5,6>(dofs1D, quad1D, -- symmetric, ne, -- mapsO->B, mapsC->B, -- mapsO->G, mapsC->G, -- pa_data, diag); -- default: return SmemPACurlCurlAssembleDiagonal3D(dofs1D, quad1D, symmetric, ne, -- mapsO->B, mapsC->B, -- mapsO->G, mapsC->G, -- pa_data, diag); -- } -- } -- else -- PACurlCurlAssembleDiagonal3D(dofs1D, quad1D, symmetric, ne, -- mapsO->B, mapsC->B, -- mapsO->G, mapsC->G, -- pa_data, diag); -- } -- else if (dim == 2) -- { -- PACurlCurlAssembleDiagonal2D(dofs1D, quad1D, ne, -- mapsO->B, mapsC->G, pa_data, diag); -- } -- else -- { -- MFEM_ABORT("Unsupported dimension!"); -- } --} -- --// Apply to x corresponding to DOFs in H^1 (trial), whose gradients are --// integrated against H(curl) test functions corresponding to y. --void PAHcurlH1Apply3D(const int D1D, -- const int Q1D, -- const int NE, -- const Array &bc, -- const Array &gc, -- const Array &bot, -- const Array &bct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y) --{ -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -- -- MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -- MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -- -- constexpr static int VDIM = 3; -- -- auto Bc = Reshape(bc.Read(), Q1D, D1D); -- auto Gc = Reshape(gc.Read(), Q1D, D1D); -- auto Bot = Reshape(bot.Read(), D1D-1, Q1D); -- auto Bct = Reshape(bct.Read(), D1D, Q1D); -- auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE); -- auto X = Reshape(x.Read(), D1D, D1D, D1D, NE); -- auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM]; -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int c = 0; c < VDIM; ++c) -- { -- mass[qz][qy][qx][c] = 0.0; -- } -- } -- } -- } -- -- for (int dz = 0; dz < D1D; ++dz) -- { -- double gradXY[MAX_Q1D][MAX_Q1D][3]; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- gradXY[qy][qx][0] = 0.0; -- gradXY[qy][qx][1] = 0.0; -- gradXY[qy][qx][2] = 0.0; -- } -- } -- for (int dy = 0; dy < D1D; ++dy) -- { -- double gradX[MAX_Q1D][2]; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- gradX[qx][0] = 0.0; -- gradX[qx][1] = 0.0; -- } -- for (int dx = 0; dx < D1D; ++dx) -- { -- const double s = X(dx,dy,dz,e); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- gradX[qx][0] += s * Bc(qx,dx); -- gradX[qx][1] += s * Gc(qx,dx); -- } -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = Bc(qy,dy); -- const double wDy = Gc(qy,dy); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = gradX[qx][0]; -- const double wDx = gradX[qx][1]; -- gradXY[qy][qx][0] += wDx * wy; -- gradXY[qy][qx][1] += wx * wDy; -- gradXY[qy][qx][2] += wx * wy; -- } -- } -- } -- for (int qz = 0; qz < Q1D; ++qz) -- { -- const double wz = Bc(qz,dz); -- const double wDz = Gc(qz,dz); -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- mass[qz][qy][qx][0] += gradXY[qy][qx][0] * wz; -- mass[qz][qy][qx][1] += gradXY[qy][qx][1] * wz; -- mass[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz; -- } -- } -- } -- } -- -- // Apply D operator. -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double O11 = op(qx,qy,qz,0,e); -- const double O12 = op(qx,qy,qz,1,e); -- const double O13 = op(qx,qy,qz,2,e); -- const double O22 = op(qx,qy,qz,3,e); -- const double O23 = op(qx,qy,qz,4,e); -- const double O33 = op(qx,qy,qz,5,e); -- const double massX = mass[qz][qy][qx][0]; -- const double massY = mass[qz][qy][qx][1]; -- const double massZ = mass[qz][qy][qx][2]; -- mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ); -- mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ); -- mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ); -- } -- } -- } -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- double massXY[MAX_D1D][MAX_D1D]; -- -- int osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -- { -- const int D1Dz = (c == 2) ? D1D - 1 : D1D; -- const int D1Dy = (c == 1) ? D1D - 1 : D1D; -- const int D1Dx = (c == 0) ? D1D - 1 : D1D; -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massXY[dy][dx] = 0.0; -- } -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- double massX[MAX_D1D]; -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massX[dx] = 0; -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massX[dx] += mass[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx)); -- } -- } -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy); -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massXY[dy][dx] += massX[dx] * wy; -- } -- } -- } -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz); -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz; -- } -- } -- } -- -- osc += D1Dx * D1Dy * D1Dz; -- } // loop c -- } // loop qz -- }); // end of element loop --} -- --// Apply to x corresponding to DOFs in H(curl), integrated --// against gradients of H^1 functions corresponding to y. --void PAHcurlH1ApplyTranspose3D(const int D1D, -- const int Q1D, -- const int NE, -- const Array &bc, -- const Array &bo, -- const Array &bct, -- const Array &gct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y) --{ -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -- -- MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -- MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -- -- constexpr static int VDIM = 3; -- -- auto Bc = Reshape(bc.Read(), Q1D, D1D); -- auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -- auto Bt = Reshape(bct.Read(), D1D, Q1D); -- auto Gt = Reshape(gct.Read(), D1D, Q1D); -- auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE); -- auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE); -- auto Y = Reshape(y.ReadWrite(), D1D, D1D, D1D, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM]; -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int c = 0; c < VDIM; ++c) -- { -- mass[qz][qy][qx][c] = 0.0; -- } -- } -- } -- } -- -- int osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -- { -- const int D1Dz = (c == 2) ? D1D - 1 : D1D; -- const int D1Dy = (c == 1) ? D1D - 1 : D1D; -- const int D1Dx = (c == 0) ? D1D - 1 : D1D; -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- double massXY[MAX_Q1D][MAX_Q1D]; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massXY[qy][qx] = 0.0; -- } -- } -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- double massX[MAX_Q1D]; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massX[qx] = 0.0; -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx)); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = massX[qx]; -- massXY[qy][qx] += wx * wy; -- } -- } -- } -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz); -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- mass[qz][qy][qx][c] += massXY[qy][qx] * wz; -- } -- } -- } -- } -- -- osc += D1Dx * D1Dy * D1Dz; -- } // loop (c) over components -- -- // Apply D operator. -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double O11 = op(qx,qy,qz,0,e); -- const double O12 = op(qx,qy,qz,1,e); -- const double O13 = op(qx,qy,qz,2,e); -- const double O22 = op(qx,qy,qz,3,e); -- const double O23 = op(qx,qy,qz,4,e); -- const double O33 = op(qx,qy,qz,5,e); -- const double massX = mass[qz][qy][qx][0]; -- const double massY = mass[qz][qy][qx][1]; -- const double massZ = mass[qz][qy][qx][2]; -- mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ); -- mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ); -- mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ); -- } -- } -- } -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- double gradXY[MAX_D1D][MAX_D1D][3]; -- for (int dy = 0; dy < D1D; ++dy) -- { -- for (int dx = 0; dx < D1D; ++dx) -- { -- gradXY[dy][dx][0] = 0; -- gradXY[dy][dx][1] = 0; -- gradXY[dy][dx][2] = 0; -- } -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- double gradX[MAX_D1D][3]; -- for (int dx = 0; dx < D1D; ++dx) -- { -- gradX[dx][0] = 0; -- gradX[dx][1] = 0; -- gradX[dx][2] = 0; -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double gX = mass[qz][qy][qx][0]; -- const double gY = mass[qz][qy][qx][1]; -- const double gZ = mass[qz][qy][qx][2]; -- for (int dx = 0; dx < D1D; ++dx) -- { -- const double wx = Bt(dx,qx); -- const double wDx = Gt(dx,qx); -- gradX[dx][0] += gX * wDx; -- gradX[dx][1] += gY * wx; -- gradX[dx][2] += gZ * wx; -- } -- } -- for (int dy = 0; dy < D1D; ++dy) -- { -- const double wy = Bt(dy,qy); -- const double wDy = Gt(dy,qy); -- for (int dx = 0; dx < D1D; ++dx) -- { -- gradXY[dy][dx][0] += gradX[dx][0] * wy; -- gradXY[dy][dx][1] += gradX[dx][1] * wDy; -- gradXY[dy][dx][2] += gradX[dx][2] * wy; -- } -- } -- } -- for (int dz = 0; dz < D1D; ++dz) -- { -- const double wz = Bt(dz,qz); -- const double wDz = Gt(dz,qz); -- for (int dy = 0; dy < D1D; ++dy) -- { -- for (int dx = 0; dx < D1D; ++dx) -- { -- Y(dx,dy,dz,e) += -- ((gradXY[dy][dx][0] * wz) + -- (gradXY[dy][dx][1] * wz) + -- (gradXY[dy][dx][2] * wDz)); -- } -- } -- } -- } // loop qz -- }); // end of element loop --} -- --// Apply to x corresponding to DOFs in H^1 (trial), whose gradients are --// integrated against H(curl) test functions corresponding to y. --void PAHcurlH1Apply2D(const int D1D, -- const int Q1D, -- const int NE, -- const Array &bc, -- const Array &gc, -- const Array &bot, -- const Array &bct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y) --{ -- constexpr static int VDIM = 2; -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -- -- auto Bc = Reshape(bc.Read(), Q1D, D1D); -- auto Gc = Reshape(gc.Read(), Q1D, D1D); -- auto Bot = Reshape(bot.Read(), D1D-1, Q1D); -- auto Bct = Reshape(bct.Read(), D1D, Q1D); -- auto op = Reshape(pa_data.Read(), Q1D, Q1D, 3, NE); -- auto X = Reshape(x.Read(), D1D, D1D, NE); -- auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double mass[MAX_Q1D][MAX_Q1D][VDIM]; -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int c = 0; c < VDIM; ++c) -- { -- mass[qy][qx][c] = 0.0; -- } -- } -- } -- -- for (int dy = 0; dy < D1D; ++dy) -- { -- double gradX[MAX_Q1D][2]; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- gradX[qx][0] = 0.0; -- gradX[qx][1] = 0.0; -- } -- for (int dx = 0; dx < D1D; ++dx) -- { -- const double s = X(dx,dy,e); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- gradX[qx][0] += s * Bc(qx,dx); -- gradX[qx][1] += s * Gc(qx,dx); -- } -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = Bc(qy,dy); -- const double wDy = Gc(qy,dy); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = gradX[qx][0]; -- const double wDx = gradX[qx][1]; -- mass[qy][qx][0] += wDx * wy; -- mass[qy][qx][1] += wx * wDy; -- } -- } -- } -- -- // Apply D operator. -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double O11 = op(qx,qy,0,e); -- const double O12 = op(qx,qy,1,e); -- const double O22 = op(qx,qy,2,e); -- const double massX = mass[qy][qx][0]; -- const double massY = mass[qy][qx][1]; -- mass[qy][qx][0] = (O11*massX)+(O12*massY); -- mass[qy][qx][1] = (O12*massX)+(O22*massY); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- int osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y components -- { -- const int D1Dy = (c == 1) ? D1D - 1 : D1D; -- const int D1Dx = (c == 0) ? D1D - 1 : D1D; -- -- double massX[MAX_D1D]; -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massX[dx] = 0; -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx)); -- } -- } -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy); -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- Y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy; -- } -- } -- -- osc += D1Dx * D1Dy; -- } // loop c -- } -- }); // end of element loop --} -- --// Apply to x corresponding to DOFs in H(curl), integrated --// against gradients of H^1 functions corresponding to y. --void PAHcurlH1ApplyTranspose2D(const int D1D, -- const int Q1D, -- const int NE, -- const Array &bc, -- const Array &bo, -- const Array &bct, -- const Array &gct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y) --{ -- constexpr static int VDIM = 2; -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -- -- auto Bc = Reshape(bc.Read(), Q1D, D1D); -- auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -- auto Bt = Reshape(bct.Read(), D1D, Q1D); -- auto Gt = Reshape(gct.Read(), D1D, Q1D); -- auto op = Reshape(pa_data.Read(), Q1D, Q1D, 3, NE); -- auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE); -- auto Y = Reshape(y.ReadWrite(), D1D, D1D, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double mass[MAX_Q1D][MAX_Q1D][VDIM]; -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int c = 0; c < VDIM; ++c) -- { -- mass[qy][qx][c] = 0.0; -- } -- } -- } -- -- int osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y components -- { -- const int D1Dy = (c == 1) ? D1D - 1 : D1D; -- const int D1Dx = (c == 0) ? D1D - 1 : D1D; -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- double massX[MAX_Q1D]; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massX[qx] = 0.0; -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- const double t = X(dx + (dy * D1Dx) + osc, e); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx)); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- mass[qy][qx][c] += massX[qx] * wy; -- } -- } -- } -- -- osc += D1Dx * D1Dy; -- } // loop (c) over components -- -- // Apply D operator. -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double O11 = op(qx,qy,0,e); -- const double O12 = op(qx,qy,1,e); -- const double O22 = op(qx,qy,2,e); -- const double massX = mass[qy][qx][0]; -- const double massY = mass[qy][qx][1]; -- mass[qy][qx][0] = (O11*massX)+(O12*massY); -- mass[qy][qx][1] = (O12*massX)+(O22*massY); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- double gradX[MAX_D1D][2]; -- for (int dx = 0; dx < D1D; ++dx) -- { -- gradX[dx][0] = 0; -- gradX[dx][1] = 0; -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double gX = mass[qy][qx][0]; -- const double gY = mass[qy][qx][1]; -- for (int dx = 0; dx < D1D; ++dx) -- { -- const double wx = Bt(dx,qx); -- const double wDx = Gt(dx,qx); -- gradX[dx][0] += gX * wDx; -- gradX[dx][1] += gY * wx; -- } -- } -- for (int dy = 0; dy < D1D; ++dy) -- { -- const double wy = Bt(dy,qy); -- const double wDy = Gt(dy,qy); -- for (int dx = 0; dx < D1D; ++dx) -- { -- Y(dx,dy,e) += ((gradX[dx][0] * wy) + (gradX[dx][1] * wDy)); -- } -- } -- } -- }); // end of element loop --} -- --// PA H(curl) Mass Assemble 3D kernel --void PAHcurlL2Setup(const int NQ, -- const int coeffDim, -- const int NE, -- const Array &w, -- Vector &coeff, -- Vector &op) --{ -- auto W = w.Read(); -- auto C = Reshape(coeff.Read(), coeffDim, NQ, NE); -- auto y = Reshape(op.Write(), coeffDim, NQ, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- for (int q = 0; q < NQ; ++q) -- { -- for (int c=0; c(fel); -- MFEM_VERIFY(el != NULL, "Only VectorTensorFiniteElement is supported!"); -- -- if (el->GetDerivType() != mfem::FiniteElement::CURL) -- { -- MFEM_ABORT("Unknown kernel."); -- } -- -- const IntegrationRule *ir -- = IntRule ? IntRule : &MassIntegrator::GetRule(*eltest, *eltest, -- *mesh->GetElementTransformation(0)); -- -- const int dims = el->GetDim(); -- MFEM_VERIFY(dims == 2, ""); -- -- const int nq = ir->GetNPoints(); -- dim = mesh->Dimension(); -- MFEM_VERIFY(dim == 2, ""); -- -- ne = test_fes.GetNE(); -- mapsC = &el->GetDofToQuad(*ir, DofToQuad::TENSOR); -- mapsO = &el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); -- dofs1D = mapsC->ndof; -- quad1D = mapsC->nqpt; -- -- MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, ""); -- -- if (el->GetOrder() == eltest->GetOrder()) -- { -- dofs1Dtest = dofs1D; -- } -- else -- { -- dofs1Dtest = dofs1D - 1; -- } -- -- pa_data.SetSize(nq * ne, Device::GetMemoryType()); -- -- QuadratureSpace qs(*mesh, *ir); -- CoefficientVector coeff(Q, qs, CoefficientStorage::FULL); -- -- if (dim == 2) -- { -- PACurlL2Setup2D(quad1D, ne, ir->GetWeights(), coeff, pa_data); -- } -- else -- { -- MFEM_ABORT("Unsupported dimension!"); -- } --} -- --void MixedScalarCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const --{ -- if (dim == 2) -- { -- PACurlL2Apply2D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B, mapsO->Bt, -- mapsC->Bt, mapsC->G, pa_data, x, y); -- } -- else -- { -- MFEM_ABORT("Unsupported dimension!"); -- } --} -- --void MixedScalarCurlIntegrator::AddMultTransposePA(const Vector &x, -- Vector &y) const --{ -- if (dim == 2) -- { -- PACurlL2ApplyTranspose2D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B, mapsO->Bt, -- mapsC->B, mapsC->Gt, pa_data, x, y); -- } -- else -- { -- MFEM_ABORT("Unsupported dimension!"); -- } --} -- --void MixedVectorCurlIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, -- const FiniteElementSpace &test_fes) --{ -- // Assumes tensor-product elements, with vector test and trial spaces. -- Mesh *mesh = trial_fes.GetMesh(); -- const FiniteElement *trial_fel = trial_fes.GetFE(0); -- const FiniteElement *test_fel = test_fes.GetFE(0); -- -- const VectorTensorFiniteElement *trial_el = -- dynamic_cast(trial_fel); -- MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!"); -- -- const VectorTensorFiniteElement *test_el = -- dynamic_cast(test_fel); -- MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!"); -- -- const IntegrationRule *ir -- = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el, -- *mesh->GetElementTransformation(0)); -- const int dims = trial_el->GetDim(); -- MFEM_VERIFY(dims == 3, ""); -- -- const int nq = ir->GetNPoints(); -- dim = mesh->Dimension(); -- MFEM_VERIFY(dim == 3, ""); -- -- MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), ""); -- -- ne = trial_fes.GetNE(); -- geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS); -- mapsC = &trial_el->GetDofToQuad(*ir, DofToQuad::TENSOR); -- mapsO = &trial_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); -- mapsCtest = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR); -- mapsOtest = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); -- dofs1D = mapsC->ndof; -- quad1D = mapsC->nqpt; -- dofs1Dtest = mapsCtest->ndof; -- -- MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, ""); -- -- testType = test_el->GetDerivType(); -- trialType = trial_el->GetDerivType(); -- -- const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6 -- coeffDim = (DQ ? 3 : 1); -- -- const bool curlSpaces = (testType == mfem::FiniteElement::CURL && -- trialType == mfem::FiniteElement::CURL); -- -- const int ndata = curlSpaces ? (coeffDim == 1 ? 1 : 9) : symmDims; -- pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType()); -- -- QuadratureSpace qs(*mesh, *ir); -- CoefficientVector coeff(qs, CoefficientStorage::FULL); -- if (Q) { coeff.Project(*Q); } -- else if (DQ) { coeff.Project(*DQ); } -- else { coeff.SetConstant(1.0); } -- -- if (testType == mfem::FiniteElement::CURL && -- trialType == mfem::FiniteElement::CURL && dim == 3) -- { -- if (coeffDim == 1) -- { -- PAHcurlL2Setup(nq, coeffDim, ne, ir->GetWeights(), coeff, pa_data); -- } -- else -- { -- PAHcurlHdivSetup3D(quad1D, coeffDim, ne, false, ir->GetWeights(), -- geom->J, coeff, pa_data); -- } -- } -- else if (testType == mfem::FiniteElement::DIV && -- trialType == mfem::FiniteElement::CURL && dim == 3 && -- test_fel->GetOrder() == trial_fel->GetOrder()) -- { -- PACurlCurlSetup3D(quad1D, coeffDim, ne, ir->GetWeights(), geom->J, coeff, -- pa_data); -- } -- else -- { -- MFEM_ABORT("Unknown kernel."); -- } --} -- --// Apply to x corresponding to DOFs in H(curl) (trial), whose curl is --// integrated against H(curl) test functions corresponding to y. --template --static void PAHcurlL2Apply3D(const int D1D, -- const int Q1D, -- const int coeffDim, -- const int NE, -- const Array &bo, -- const Array &bc, -- const Array &bot, -- const Array &bct, -- const Array &gc, -- const Vector &pa_data, -- const Vector &x, -- Vector &y) --{ -- MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -- MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -- // Using u = dF^{-T} \hat{u} and (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get -- // (\nabla\times u) \cdot v = 1/det(dF) \hat{\nabla}\times\hat{u}^T dF^T dF^{-T} \hat{v} -- // = 1/det(dF) \hat{\nabla}\times\hat{u}^T \hat{v} -- // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}] -- // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -- // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -- -- constexpr static int VDIM = 3; -- -- auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -- auto Bc = Reshape(bc.Read(), Q1D, D1D); -- auto Bot = Reshape(bot.Read(), D1D-1, Q1D); -- auto Bct = Reshape(bct.Read(), D1D, Q1D); -- auto Gc = Reshape(gc.Read(), Q1D, D1D); -- auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE); -- auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE); -- auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM]; -- // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point. -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int c = 0; c < VDIM; ++c) -- { -- curl[qz][qy][qx][c] = 0.0; -- } -- } -- } -- } -- -- // We treat x, y, z components separately for optimization specific to each. -- -- int osc = 0; -- -- { -- // x component -- const int D1Dz = D1D; -- const int D1Dy = D1D; -- const int D1Dx = D1D - 1; -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- double gradXY[MAX_Q1D][MAX_Q1D][2]; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int d = 0; d < 2; ++d) -- { -- gradXY[qy][qx][d] = 0.0; -- } -- } -- } -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- double massX[MAX_Q1D]; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massX[qx] = 0.0; -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massX[qx] += t * Bo(qx,dx); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = Bc(qy,dy); -- const double wDy = Gc(qy,dy); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = massX[qx]; -- gradXY[qy][qx][0] += wx * wDy; -- gradXY[qy][qx][1] += wx * wy; -- } -- } -- } -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- const double wz = Bc(qz,dz); -- const double wDz = Gc(qz,dz); -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}] -- curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2} -- curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1} -- } -- } -- } -- } -- -- osc += D1Dx * D1Dy * D1Dz; -- } -- -- { -- // y component -- const int D1Dz = D1D; -- const int D1Dy = D1D - 1; -- const int D1Dx = D1D; -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- double gradXY[MAX_Q1D][MAX_Q1D][2]; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int d = 0; d < 2; ++d) -- { -- gradXY[qy][qx][d] = 0.0; -- } -- } -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- double massY[MAX_Q1D]; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- massY[qy] = 0.0; -- } -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -- for (int qy = 0; qy < Q1D; ++qy) -- { -- massY[qy] += t * Bo(qy,dy); -- } -- } -- -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = Bc(qx,dx); -- const double wDx = Gc(qx,dx); -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = massY[qy]; -- gradXY[qy][qx][0] += wDx * wy; -- gradXY[qy][qx][1] += wx * wy; -- } -- } -- } -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- const double wz = Bc(qz,dz); -- const double wDz = Gc(qz,dz); -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -- curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2} -- curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0} -- } -- } -- } -- } -- -- osc += D1Dx * D1Dy * D1Dz; -- } -- -- { -- // z component -- const int D1Dz = D1D - 1; -- const int D1Dy = D1D; -- const int D1Dx = D1D; -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- double gradYZ[MAX_Q1D][MAX_Q1D][2]; -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int d = 0; d < 2; ++d) -- { -- gradYZ[qz][qy][d] = 0.0; -- } -- } -- } -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- double massZ[MAX_Q1D]; -- for (int qz = 0; qz < Q1D; ++qz) -- { -- massZ[qz] = 0.0; -- } -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -- for (int qz = 0; qz < Q1D; ++qz) -- { -- massZ[qz] += t * Bo(qz,dz); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = Bc(qy,dy); -- const double wDy = Gc(qy,dy); -- for (int qz = 0; qz < Q1D; ++qz) -- { -- const double wz = massZ[qz]; -- gradYZ[qz][qy][0] += wz * wy; -- gradYZ[qz][qy][1] += wz * wDy; -- } -- } -- } -- -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = Bc(qx,dx); -- const double wDx = Gc(qx,dx); -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qz = 0; qz < Q1D; ++qz) -- { -- // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -- curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1} -- curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0} -- } -- } -- } -- } -- } -- -- // Apply D operator. -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double O11 = op(0,qx,qy,qz,e); -- if (coeffDim == 1) -- { -- for (int c = 0; c < VDIM; ++c) -- { -- curl[qz][qy][qx][c] *= O11; -- } -- } -- else -- { -- const double O21 = op(1,qx,qy,qz,e); -- const double O31 = op(2,qx,qy,qz,e); -- const double O12 = op(3,qx,qy,qz,e); -- const double O22 = op(4,qx,qy,qz,e); -- const double O32 = op(5,qx,qy,qz,e); -- const double O13 = op(6,qx,qy,qz,e); -- const double O23 = op(7,qx,qy,qz,e); -- const double O33 = op(8,qx,qy,qz,e); -- const double curlX = curl[qz][qy][qx][0]; -- const double curlY = curl[qz][qy][qx][1]; -- const double curlZ = curl[qz][qy][qx][2]; -- curl[qz][qy][qx][0] = (O11*curlX)+(O12*curlY)+(O13*curlZ); -- curl[qz][qy][qx][1] = (O21*curlX)+(O22*curlY)+(O23*curlZ); -- curl[qz][qy][qx][2] = (O31*curlX)+(O32*curlY)+(O33*curlZ); -- } -- } -- } -- } -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- double massXY[MAX_D1D][MAX_D1D]; -- -- osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -- { -- const int D1Dz = (c == 2) ? D1D - 1 : D1D; -- const int D1Dy = (c == 1) ? D1D - 1 : D1D; -- const int D1Dx = (c == 0) ? D1D - 1 : D1D; -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massXY[dy][dx] = 0; -- } -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- double massX[MAX_D1D]; -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massX[dx] = 0.0; -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massX[dx] += curl[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx)); -- } -- } -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy); -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massXY[dy][dx] += massX[dx] * wy; -- } -- } -- } -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz); -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz; -- } -- } -- } -- -- osc += D1Dx * D1Dy * D1Dz; -- } // loop c -- } // loop qz -- }); // end of element loop --} -- --// Apply to x corresponding to DOFs in H(curl) (trial), whose curl is --// integrated against H(curl) test functions corresponding to y. --template --static void SmemPAHcurlL2Apply3D(const int D1D, -- const int Q1D, -- const int coeffDim, -- const int NE, -- const Array &bo, -- const Array &bc, -- const Array &gc, -- const Vector &pa_data, -- const Vector &x, -- Vector &y) --{ -- MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -- MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -- -- auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -- auto Bc = Reshape(bc.Read(), Q1D, D1D); -- auto Gc = Reshape(gc.Read(), Q1D, D1D); -- auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE); -- auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE); -- auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -- -- auto device_kernel = [=] MFEM_DEVICE (int e) -- { -- constexpr int VDIM = 3; -- constexpr int maxCoeffDim = 9; -- -- MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D]; -- MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D]; -- MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D]; -- -- double opc[maxCoeffDim]; -- MFEM_SHARED double sop[maxCoeffDim][MAX_Q1D][MAX_Q1D]; -- MFEM_SHARED double curl[MAX_Q1D][MAX_Q1D][3]; -- -- MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D]; -- -- MFEM_FOREACH_THREAD(qx,x,Q1D) -- { -- MFEM_FOREACH_THREAD(qy,y,Q1D) -- { -- MFEM_FOREACH_THREAD(qz,z,Q1D) -- { -- for (int i=0; i(true, NE, device_kernel, host_kernel, Q1D, Q1D, Q1D); --} -- --// Apply to x corresponding to DOFs in H(curl) (trial), whose curl is --// integrated against H(div) test functions corresponding to y. --template --static void PAHcurlHdivApply3D(const int D1D, -- const int D1Dtest, -- const int Q1D, -- const int NE, -- const Array &bo, -- const Array &bc, -- const Array &bot, -- const Array &bct, -- const Array &gc, -- const Vector &pa_data, -- const Vector &x, -- Vector &y) --{ -- MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -- MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -- // Using Piola transformations (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} -- // for u in H(curl) and w = (1 / det (dF)) dF \hat{w} for w in H(div), we get -- // (\nabla\times u) \cdot w = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{w} -- // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}] -- // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -- // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -- -- constexpr static int VDIM = 3; -- -- auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -- auto Bc = Reshape(bc.Read(), Q1D, D1D); -- auto Bot = Reshape(bot.Read(), D1Dtest-1, Q1D); -- auto Bct = Reshape(bct.Read(), D1Dtest, Q1D); -- auto Gc = Reshape(gc.Read(), Q1D, D1D); -- auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE); -- auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE); -- auto Y = Reshape(y.ReadWrite(), 3*(D1Dtest-1)*(D1Dtest-1)*D1D, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM]; -- // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point. -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int c = 0; c < VDIM; ++c) -- { -- curl[qz][qy][qx][c] = 0.0; -- } -- } -- } -- } -- -- // We treat x, y, z components separately for optimization specific to each. -- -- int osc = 0; -- -- { -- // x component -- const int D1Dz = D1D; -- const int D1Dy = D1D; -- const int D1Dx = D1D - 1; -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- double gradXY[MAX_Q1D][MAX_Q1D][2]; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int d = 0; d < 2; ++d) -- { -- gradXY[qy][qx][d] = 0.0; -- } -- } -- } -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- double massX[MAX_Q1D]; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massX[qx] = 0.0; -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massX[qx] += t * Bo(qx,dx); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = Bc(qy,dy); -- const double wDy = Gc(qy,dy); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = massX[qx]; -- gradXY[qy][qx][0] += wx * wDy; -- gradXY[qy][qx][1] += wx * wy; -- } -- } -- } -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- const double wz = Bc(qz,dz); -- const double wDz = Gc(qz,dz); -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}] -- curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2} -- curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1} -- } -- } -- } -- } -- -- osc += D1Dx * D1Dy * D1Dz; -- } -- -- { -- // y component -- const int D1Dz = D1D; -- const int D1Dy = D1D - 1; -- const int D1Dx = D1D; -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- double gradXY[MAX_Q1D][MAX_Q1D][2]; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int d = 0; d < 2; ++d) -- { -- gradXY[qy][qx][d] = 0.0; -- } -- } -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- double massY[MAX_Q1D]; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- massY[qy] = 0.0; -- } -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -- for (int qy = 0; qy < Q1D; ++qy) -- { -- massY[qy] += t * Bo(qy,dy); -- } -- } -- -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = Bc(qx,dx); -- const double wDx = Gc(qx,dx); -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = massY[qy]; -- gradXY[qy][qx][0] += wDx * wy; -- gradXY[qy][qx][1] += wx * wy; -- } -- } -- } -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- const double wz = Bc(qz,dz); -- const double wDz = Gc(qz,dz); -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -- curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2} -- curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0} -- } -- } -- } -- } -- -- osc += D1Dx * D1Dy * D1Dz; -- } -- -- { -- // z component -- const int D1Dz = D1D - 1; -- const int D1Dy = D1D; -- const int D1Dx = D1D; -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- double gradYZ[MAX_Q1D][MAX_Q1D][2]; -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int d = 0; d < 2; ++d) -- { -- gradYZ[qz][qy][d] = 0.0; -- } -- } -- } -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- double massZ[MAX_Q1D]; -- for (int qz = 0; qz < Q1D; ++qz) -- { -- massZ[qz] = 0.0; -- } -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -- for (int qz = 0; qz < Q1D; ++qz) -- { -- massZ[qz] += t * Bo(qz,dz); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = Bc(qy,dy); -- const double wDy = Gc(qy,dy); -- for (int qz = 0; qz < Q1D; ++qz) -- { -- const double wz = massZ[qz]; -- gradYZ[qz][qy][0] += wz * wy; -- gradYZ[qz][qy][1] += wz * wDy; -- } -- } -- } -- -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = Bc(qx,dx); -- const double wDx = Gc(qx,dx); -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qz = 0; qz < Q1D; ++qz) -- { -- // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -- curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1} -- curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0} -- } -- } -- } -- } -- } -- -- // Apply D operator. -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double O11 = op(qx,qy,qz,0,e); -- const double O12 = op(qx,qy,qz,1,e); -- const double O13 = op(qx,qy,qz,2,e); -- const double O22 = op(qx,qy,qz,3,e); -- const double O23 = op(qx,qy,qz,4,e); -- const double O33 = op(qx,qy,qz,5,e); -- -- const double c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) + -- (O13 * curl[qz][qy][qx][2]); -- const double c2 = (O12 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) + -- (O23 * curl[qz][qy][qx][2]); -- const double c3 = (O13 * curl[qz][qy][qx][0]) + (O23 * curl[qz][qy][qx][1]) + -- (O33 * curl[qz][qy][qx][2]); -- -- curl[qz][qy][qx][0] = c1; -- curl[qz][qy][qx][1] = c2; -- curl[qz][qy][qx][2] = c3; -- } -- } -- } -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- double massXY[HCURL_MAX_D1D][HCURL_MAX_D1D]; // Assuming HDIV_MAX_D1D <= HCURL_MAX_D1D -- -- osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -- { -- const int D1Dz = (c == 2) ? D1Dtest : D1Dtest - 1; -- const int D1Dy = (c == 1) ? D1Dtest : D1Dtest - 1; -- const int D1Dx = (c == 0) ? D1Dtest : D1Dtest - 1; -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massXY[dy][dx] = 0; -- } -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- double massX[HCURL_MAX_D1D]; -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massX[dx] = 0; -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massX[dx] += curl[qz][qy][qx][c] * -- ((c == 0) ? Bct(dx,qx) : Bot(dx,qx)); -- } -- } -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double wy = (c == 1) ? Bct(dy,qy) : Bot(dy,qy); -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massXY[dy][dx] += massX[dx] * wy; -- } -- } -- } -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- const double wz = (c == 2) ? Bct(dz,qz) : Bot(dz,qz); -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += -- massXY[dy][dx] * wz; -- } -- } -- } -- -- osc += D1Dx * D1Dy * D1Dz; -- } // loop c -- } // loop qz -- }); // end of element loop --} -- --// Apply to x corresponding to DOFs in H(div) (test), integrated against the --// curl of H(curl) trial functions corresponding to y. --template --static void PAHcurlHdivApply3DTranspose(const int D1D, -- const int D1Dtest, -- const int Q1D, -- const int NE, -- const Array &bo, -- const Array &bc, -- const Array &bot, -- const Array &bct, -- const Array &gct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y) --{ -- MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -- MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -- // Using Piola transformations (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} -- // for u in H(curl) and w = (1 / det (dF)) dF \hat{w} for w in H(div), we get -- // (\nabla\times u) \cdot w = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{w} -- // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}] -- // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -- // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -- -- constexpr static int VDIM = 3; -- -- auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -- auto Bc = Reshape(bc.Read(), Q1D, D1D); -- auto Bot = Reshape(bot.Read(), D1Dtest-1, Q1D); -- auto Bct = Reshape(bct.Read(), D1Dtest, Q1D); -- auto Gct = Reshape(gct.Read(), D1D, Q1D); -- auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE); -- auto X = Reshape(x.Read(), 3*(D1Dtest-1)*(D1Dtest-1)*D1D, NE); -- auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM]; // Assuming HDIV_MAX_D1D <= HCURL_MAX_D1D -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int c = 0; c < VDIM; ++c) -- { -- mass[qz][qy][qx][c] = 0.0; -- } -- } -- } -- } -- -- int osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -- { -- const int D1Dz = (c == 2) ? D1D : D1D - 1; -- const int D1Dy = (c == 1) ? D1D : D1D - 1; -- const int D1Dx = (c == 0) ? D1D : D1D - 1; -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- double massXY[HDIV_MAX_Q1D][HDIV_MAX_Q1D]; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massXY[qy][qx] = 0.0; -- } -- } -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- double massX[HDIV_MAX_Q1D]; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massX[qx] = 0.0; -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massX[qx] += t * ((c == 0) ? Bc(qx,dx) : Bo(qx,dx)); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = (c == 1) ? Bc(qy,dy) : Bo(qy,dy); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = massX[qx]; -- massXY[qy][qx] += wx * wy; -- } -- } -- } -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- const double wz = (c == 2) ? Bc(qz,dz) : Bo(qz,dz); -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- mass[qz][qy][qx][c] += massXY[qy][qx] * wz; -- } -- } -- } -- } -- -- osc += D1Dx * D1Dy * D1Dz; -- } // loop (c) over components -- -- // Apply D operator. -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double O11 = op(qx,qy,qz,0,e); -- const double O12 = op(qx,qy,qz,1,e); -- const double O13 = op(qx,qy,qz,2,e); -- const double O22 = op(qx,qy,qz,3,e); -- const double O23 = op(qx,qy,qz,4,e); -- const double O33 = op(qx,qy,qz,5,e); -- const double massX = mass[qz][qy][qx][0]; -- const double massY = mass[qz][qy][qx][1]; -- const double massZ = mass[qz][qy][qx][2]; -- mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ); -- mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ); -- mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ); -- } -- } -- } -- -- // x component -- osc = 0; -- { -- const int D1Dz = D1D; -- const int D1Dy = D1D; -- const int D1Dx = D1D - 1; -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- double gradXY12[MAX_D1D][MAX_D1D]; -- double gradXY21[MAX_D1D][MAX_D1D]; -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- gradXY12[dy][dx] = 0.0; -- gradXY21[dy][dx] = 0.0; -- } -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- double massX[MAX_D1D][2]; -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- for (int n = 0; n < 2; ++n) -- { -- massX[dx][n] = 0.0; -- } -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- const double wx = Bot(dx,qx); -- -- massX[dx][0] += wx * mass[qz][qy][qx][1]; -- massX[dx][1] += wx * mass[qz][qy][qx][2]; -- } -- } -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double wy = Bct(dy,qy); -- const double wDy = Gct(dy,qy); -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- gradXY21[dy][dx] += massX[dx][0] * wy; -- gradXY12[dy][dx] += massX[dx][1] * wDy; -- } -- } -- } -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- const double wz = Bct(dz,qz); -- const double wDz = Gct(dz,qz); -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}] -- // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2 -- Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, -- e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz); -- } -- } -- } -- } // loop qz -- -- osc += D1Dx * D1Dy * D1Dz; -- } -- -- // y component -- { -- const int D1Dz = D1D; -- const int D1Dy = D1D - 1; -- const int D1Dx = D1D; -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- double gradXY02[MAX_D1D][MAX_D1D]; -- double gradXY20[MAX_D1D][MAX_D1D]; -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- gradXY02[dy][dx] = 0.0; -- gradXY20[dy][dx] = 0.0; -- } -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- double massY[MAX_D1D][2]; -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- massY[dy][0] = 0.0; -- massY[dy][1] = 0.0; -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double wy = Bot(dy,qy); -- -- massY[dy][0] += wy * mass[qz][qy][qx][2]; -- massY[dy][1] += wy * mass[qz][qy][qx][0]; -- } -- } -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- const double wx = Bct(dx,qx); -- const double wDx = Gct(dx,qx); -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- gradXY02[dy][dx] += massY[dy][0] * wDx; -- gradXY20[dy][dx] += massY[dy][1] * wx; -- } -- } -- } -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- const double wz = Bct(dz,qz); -- const double wDz = Gct(dz,qz); -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -- // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2 -- Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, -- e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz); -- } -- } -- } -- } // loop qz -- -- osc += D1Dx * D1Dy * D1Dz; -- } -- -- // z component -- { -- const int D1Dz = D1D - 1; -- const int D1Dy = D1D; -- const int D1Dx = D1D; -- -- for (int qx = 0; qx < Q1D; ++qx) -- { -- double gradYZ01[MAX_D1D][MAX_D1D]; -- double gradYZ10[MAX_D1D][MAX_D1D]; -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- gradYZ01[dz][dy] = 0.0; -- gradYZ10[dz][dy] = 0.0; -- } -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- double massZ[MAX_D1D][2]; -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- for (int n = 0; n < 2; ++n) -- { -- massZ[dz][n] = 0.0; -- } -- } -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- const double wz = Bot(dz,qz); -- -- massZ[dz][0] += wz * mass[qz][qy][qx][0]; -- massZ[dz][1] += wz * mass[qz][qy][qx][1]; -- } -- } -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double wy = Bct(dy,qy); -- const double wDy = Gct(dy,qy); -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- gradYZ01[dz][dy] += wy * massZ[dz][1]; -- gradYZ10[dz][dy] += wDy * massZ[dz][0]; -- } -- } -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- const double wx = Bct(dx,qx); -- const double wDx = Gct(dx,qx); -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -- // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1 -- Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, -- e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx); -- } -- } -- } -- } // loop qx -- } -- }); // end of element loop --} -- --void MixedVectorCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const --{ -- if (testType == mfem::FiniteElement::CURL && -- trialType == mfem::FiniteElement::CURL && dim == 3) -- { -- const int ndata = coeffDim == 1 ? 1 : 9; -- -- if (Device::Allows(Backend::DEVICE_MASK)) -- { -- const int ID = (dofs1D << 4) | quad1D; -- switch (ID) -- { -- case 0x23: return SmemPAHcurlL2Apply3D<2,3>(dofs1D, quad1D, ndata, ne, -- mapsO->B, mapsC->B, -- mapsC->G, pa_data, x, y); -- case 0x34: return SmemPAHcurlL2Apply3D<3,4>(dofs1D, quad1D, ndata, ne, -- mapsO->B, mapsC->B, -- mapsC->G, pa_data, x, y); -- case 0x45: return SmemPAHcurlL2Apply3D<4,5>(dofs1D, quad1D, ndata, ne, -- mapsO->B, mapsC->B, -- mapsC->G, pa_data, x, y); -- case 0x56: return SmemPAHcurlL2Apply3D<5,6>(dofs1D, quad1D, ndata, ne, -- mapsO->B, mapsC->B, -- mapsC->G, pa_data, x, y); -- default: return SmemPAHcurlL2Apply3D(dofs1D, quad1D, ndata, ne, -- mapsO->B, mapsC->B, mapsC->G, -- pa_data, x, y); -- } -- } -- else -- PAHcurlL2Apply3D(dofs1D, quad1D, ndata, ne, mapsO->B, mapsC->B, -- mapsO->Bt, mapsC->Bt, mapsC->G, pa_data, x, y); -- } -- else if (testType == mfem::FiniteElement::DIV && -- trialType == mfem::FiniteElement::CURL && dim == 3) -- PAHcurlHdivApply3D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B, -- mapsC->B, mapsOtest->Bt, mapsCtest->Bt, mapsC->G, -- pa_data, x, y); -- else -- { -- MFEM_ABORT("Unsupported dimension or space!"); -- } --} -- --void MixedVectorCurlIntegrator::AddMultTransposePA(const Vector &x, -- Vector &y) const --{ -- if (testType == mfem::FiniteElement::DIV && -- trialType == mfem::FiniteElement::CURL && dim == 3) -- PAHcurlHdivApply3DTranspose(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B, -- mapsC->B, mapsOtest->Bt, mapsCtest->Bt, -- mapsC->Gt, pa_data, x, y); -- else -- { -- MFEM_ABORT("Unsupported dimension or space!"); -- } --} -- --void MixedVectorWeakCurlIntegrator::AssemblePA(const FiniteElementSpace -- &trial_fes, -- const FiniteElementSpace &test_fes) --{ -- // Assumes tensor-product elements, with vector test and trial spaces. -- Mesh *mesh = trial_fes.GetMesh(); -- const FiniteElement *trial_fel = trial_fes.GetFE(0); -- const FiniteElement *test_fel = test_fes.GetFE(0); -- -- const VectorTensorFiniteElement *trial_el = -- dynamic_cast(trial_fel); -- MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!"); -- -- const VectorTensorFiniteElement *test_el = -- dynamic_cast(test_fel); -- MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!"); -- -- const IntegrationRule *ir -- = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el, -- *mesh->GetElementTransformation(0)); -- const int dims = trial_el->GetDim(); -- MFEM_VERIFY(dims == 3, ""); -- -- const int nq = ir->GetNPoints(); -- dim = mesh->Dimension(); -- MFEM_VERIFY(dim == 3, ""); -- -- MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), ""); -- -- ne = trial_fes.GetNE(); -- geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS); -- mapsC = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR); -- mapsO = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); -- dofs1D = mapsC->ndof; -- quad1D = mapsC->nqpt; -- -- MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, ""); -- -- testType = test_el->GetDerivType(); -- trialType = trial_el->GetDerivType(); -- -- const bool curlSpaces = (testType == mfem::FiniteElement::CURL && -- trialType == mfem::FiniteElement::CURL); -- -- const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6 -- -- coeffDim = DQ ? 3 : 1; -- const int ndata = curlSpaces ? (DQ ? 9 : 1) : symmDims; -- -- pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType()); -- -- QuadratureSpace qs(*mesh, *ir); -- CoefficientVector coeff(qs, CoefficientStorage::FULL); -- if (Q) { coeff.Project(*Q); } -- else if (DQ) { coeff.Project(*DQ); } -- else { coeff.SetConstant(1.0); } -- -- if (trialType == mfem::FiniteElement::CURL && dim == 3) -- { -- if (coeffDim == 1) -- { -- PAHcurlL2Setup(nq, coeffDim, ne, ir->GetWeights(), coeff, pa_data); -- } -- else -- { -- PAHcurlHdivSetup3D(quad1D, coeffDim, ne, false, ir->GetWeights(), -- geom->J, coeff, pa_data); -- } -- } -- else if (trialType == mfem::FiniteElement::DIV && dim == 3 && -- test_el->GetOrder() == trial_el->GetOrder()) -- { -- PACurlCurlSetup3D(quad1D, coeffDim, ne, ir->GetWeights(), geom->J, coeff, -- pa_data); -- } -- else -- { -- MFEM_ABORT("Unknown kernel."); -- } --} -- --// Apply to x corresponding to DOFs in H(curl) (trial), integrated against curl --// of H(curl) test functions corresponding to y. --template --static void PAHcurlL2Apply3DTranspose(const int D1D, -- const int Q1D, -- const int coeffDim, -- const int NE, -- const Array &bo, -- const Array &bc, -- const Array &bot, -- const Array &bct, -- const Array &gct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y) --{ -- // See PAHcurlL2Apply3D for comments. -- -- MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -- MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -- -- constexpr static int VDIM = 3; -- -- auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -- auto Bc = Reshape(bc.Read(), Q1D, D1D); -- auto Bot = Reshape(bot.Read(), D1D-1, Q1D); -- auto Bct = Reshape(bct.Read(), D1D, Q1D); -- auto Gct = Reshape(gct.Read(), D1D, Q1D); -- auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE); -- auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE); -- auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM]; -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int c = 0; c < VDIM; ++c) -- { -- mass[qz][qy][qx][c] = 0.0; -- } -- } -- } -- } -- -- int osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -- { -- const int D1Dz = (c == 2) ? D1D - 1 : D1D; -- const int D1Dy = (c == 1) ? D1D - 1 : D1D; -- const int D1Dx = (c == 0) ? D1D - 1 : D1D; -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- double massXY[MAX_Q1D][MAX_Q1D]; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massXY[qy][qx] = 0.0; -- } -- } -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- double massX[MAX_Q1D]; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massX[qx] = 0.0; -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx)); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = massX[qx]; -- massXY[qy][qx] += wx * wy; -- } -- } -- } -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz); -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- mass[qz][qy][qx][c] += massXY[qy][qx] * wz; -- } -- } -- } -- } -- -- osc += D1Dx * D1Dy * D1Dz; -- } // loop (c) over components -- -- // Apply D operator. -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double O11 = op(0,qx,qy,qz,e); -- if (coeffDim == 1) -- { -- for (int c = 0; c < VDIM; ++c) -- { -- mass[qz][qy][qx][c] *= O11; -- } -- } -- else -- { -- const double O12 = op(1,qx,qy,qz,e); -- const double O13 = op(2,qx,qy,qz,e); -- const double O21 = op(3,qx,qy,qz,e); -- const double O22 = op(4,qx,qy,qz,e); -- const double O23 = op(5,qx,qy,qz,e); -- const double O31 = op(6,qx,qy,qz,e); -- const double O32 = op(7,qx,qy,qz,e); -- const double O33 = op(8,qx,qy,qz,e); -- const double massX = mass[qz][qy][qx][0]; -- const double massY = mass[qz][qy][qx][1]; -- const double massZ = mass[qz][qy][qx][2]; -- mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ); -- mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ); -- mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ); -- } -- } -- } -- } -- -- // x component -- osc = 0; -- { -- const int D1Dz = D1D; -- const int D1Dy = D1D; -- const int D1Dx = D1D - 1; -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- double gradXY12[MAX_D1D][MAX_D1D]; -- double gradXY21[MAX_D1D][MAX_D1D]; -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- gradXY12[dy][dx] = 0.0; -- gradXY21[dy][dx] = 0.0; -- } -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- double massX[MAX_D1D][2]; -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- for (int n = 0; n < 2; ++n) -- { -- massX[dx][n] = 0.0; -- } -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- const double wx = Bot(dx,qx); -- -- massX[dx][0] += wx * mass[qz][qy][qx][1]; -- massX[dx][1] += wx * mass[qz][qy][qx][2]; -- } -- } -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double wy = Bct(dy,qy); -- const double wDy = Gct(dy,qy); -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- gradXY21[dy][dx] += massX[dx][0] * wy; -- gradXY12[dy][dx] += massX[dx][1] * wDy; -- } -- } -- } -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- const double wz = Bct(dz,qz); -- const double wDz = Gct(dz,qz); -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}] -- // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2 -- Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, -- e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz); -- } -- } -- } -- } // loop qz -- -- osc += D1Dx * D1Dy * D1Dz; -- } -- -- // y component -- { -- const int D1Dz = D1D; -- const int D1Dy = D1D - 1; -- const int D1Dx = D1D; -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- double gradXY02[MAX_D1D][MAX_D1D]; -- double gradXY20[MAX_D1D][MAX_D1D]; -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- gradXY02[dy][dx] = 0.0; -- gradXY20[dy][dx] = 0.0; -- } -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- double massY[MAX_D1D][2]; -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- massY[dy][0] = 0.0; -- massY[dy][1] = 0.0; -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double wy = Bot(dy,qy); -- -- massY[dy][0] += wy * mass[qz][qy][qx][2]; -- massY[dy][1] += wy * mass[qz][qy][qx][0]; -- } -- } -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- const double wx = Bct(dx,qx); -- const double wDx = Gct(dx,qx); -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- gradXY02[dy][dx] += massY[dy][0] * wDx; -- gradXY20[dy][dx] += massY[dy][1] * wx; -- } -- } -- } -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- const double wz = Bct(dz,qz); -- const double wDz = Gct(dz,qz); -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -- // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2 -- Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, -- e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz); -- } -- } -- } -- } // loop qz -- -- osc += D1Dx * D1Dy * D1Dz; -- } -- -- // z component -- { -- const int D1Dz = D1D - 1; -- const int D1Dy = D1D; -- const int D1Dx = D1D; -- -- for (int qx = 0; qx < Q1D; ++qx) -- { -- double gradYZ01[MAX_D1D][MAX_D1D]; -- double gradYZ10[MAX_D1D][MAX_D1D]; -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- gradYZ01[dz][dy] = 0.0; -- gradYZ10[dz][dy] = 0.0; -- } -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- double massZ[MAX_D1D][2]; -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- for (int n = 0; n < 2; ++n) -- { -- massZ[dz][n] = 0.0; -- } -- } -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- const double wz = Bot(dz,qz); -- -- massZ[dz][0] += wz * mass[qz][qy][qx][0]; -- massZ[dz][1] += wz * mass[qz][qy][qx][1]; -- } -- } -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double wy = Bct(dy,qy); -- const double wDy = Gct(dy,qy); -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- gradYZ01[dz][dy] += wy * massZ[dz][1]; -- gradYZ10[dz][dy] += wDy * massZ[dz][0]; -- } -- } -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- const double wx = Bct(dx,qx); -- const double wDx = Gct(dx,qx); -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -- // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1 -- Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, -- e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx); -- } -- } -- } -- } // loop qx -- } -- }); --} -- --template --static void SmemPAHcurlL2Apply3DTranspose(const int D1D, -- const int Q1D, -- const int coeffDim, -- const int NE, -- const Array &bo, -- const Array &bc, -- const Array &gc, -- const Vector &pa_data, -- const Vector &x, -- Vector &y) --{ -- MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -- MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -- -- auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -- auto Bc = Reshape(bc.Read(), Q1D, D1D); -- auto Gc = Reshape(gc.Read(), Q1D, D1D); -- auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE); -- auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE); -- auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -- -- auto device_kernel = [=] MFEM_DEVICE (int e) -- { -- constexpr int VDIM = 3; -- constexpr int maxCoeffDim = 9; -- -- MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D]; -- MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D]; -- MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D]; -- -- double opc[maxCoeffDim]; -- MFEM_SHARED double sop[maxCoeffDim][MAX_Q1D][MAX_Q1D]; -- MFEM_SHARED double mass[MAX_Q1D][MAX_Q1D][3]; -- -- MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D]; -- -- MFEM_FOREACH_THREAD(qx,x,Q1D) -- { -- MFEM_FOREACH_THREAD(qy,y,Q1D) -- { -- MFEM_FOREACH_THREAD(qz,z,Q1D) -- { -- for (int i=0; i(true, NE, device_kernel, host_kernel, Q1D, Q1D, Q1D); --} -- --void MixedVectorWeakCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const --{ -- if (testType == mfem::FiniteElement::CURL && -- trialType == mfem::FiniteElement::CURL && dim == 3) -- { -- const int ndata = coeffDim == 1 ? 1 : 9; -- if (Device::Allows(Backend::DEVICE_MASK)) -- { -- const int ID = (dofs1D << 4) | quad1D; -- switch (ID) -- { -- case 0x23: return SmemPAHcurlL2Apply3DTranspose<2,3>(dofs1D, quad1D, ndata, -- ne, mapsO->B, mapsC->B, -- mapsC->G, pa_data, x, y); -- case 0x34: return SmemPAHcurlL2Apply3DTranspose<3,4>(dofs1D, quad1D, ndata, -- ne, mapsO->B, mapsC->B, -- mapsC->G, pa_data, x, y); -- case 0x45: return SmemPAHcurlL2Apply3DTranspose<4,5>(dofs1D, quad1D, ndata, -- ne, mapsO->B, mapsC->B, -- mapsC->G, pa_data, x, y); -- case 0x56: return SmemPAHcurlL2Apply3DTranspose<5,6>(dofs1D, quad1D, ndata, -- ne, mapsO->B, mapsC->B, -- mapsC->G, pa_data, x, y); -- default: return SmemPAHcurlL2Apply3DTranspose(dofs1D, quad1D, ndata, ne, -- mapsO->B, mapsC->B, -- mapsC->G, pa_data, x, y); -- } -- } -- else -- PAHcurlL2Apply3DTranspose(dofs1D, quad1D, ndata, ne, mapsO->B, -- mapsC->B, mapsO->Bt, mapsC->Bt, mapsC->Gt, pa_data, x, y); -- } -- else if (testType == mfem::FiniteElement::CURL && -- trialType == mfem::FiniteElement::DIV && dim == 3) -- { -- PAHcurlHdivApply3DTranspose(dofs1D, dofs1D, quad1D, ne, mapsO->B, -- mapsC->B, mapsO->Bt, mapsC->Bt, -- mapsC->Gt, pa_data, x, y); -- } -- else -- { -- MFEM_ABORT("Unsupported dimension or space!"); -- } --} -- --void MixedVectorWeakCurlIntegrator::AddMultTransposePA(const Vector &x, -- Vector &y) const --{ -- if (testType == mfem::FiniteElement::CURL && -- trialType == mfem::FiniteElement::DIV && dim == 3) -- { -- PAHcurlHdivApply3D(dofs1D, dofs1D, quad1D, ne, mapsO->B, -- mapsC->B, mapsO->Bt, mapsC->Bt, mapsC->G, -- pa_data, x, y); -- } -- else -- { -- MFEM_ABORT("Unsupported dimension or space!"); -- } --} -- --// Apply to x corresponding to DOFs in H^1 (domain) the (topological) gradient --// to get a dof in H(curl) (range). You can think of the range as the "test" space --// and the domain as the "trial" space, but there's no integration. --static void PAHcurlApplyGradient2D(const int c_dofs1D, -- const int o_dofs1D, -- const int NE, -- const Array &B_, -- const Array &G_, -- const Vector &x_, -- Vector &y_) --{ -- auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D); -- auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D); -- -- auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, NE); -- auto y = Reshape(y_.ReadWrite(), 2 * c_dofs1D * o_dofs1D, NE); -- -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double w[MAX_D1D][MAX_D1D]; -- -- // horizontal part -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- w[dx][ey] = 0.0; -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- w[dx][ey] += B(ey, dy) * x(dx, dy, e); -- } -- } -- } -- -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int ex = 0; ex < o_dofs1D; ++ex) -- { -- double s = 0.0; -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- s += G(ex, dx) * w[dx][ey]; -- } -- const int local_index = ey*o_dofs1D + ex; -- y(local_index, e) += s; -- } -- } -- -- // vertical part -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- w[dx][ey] = 0.0; -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- w[dx][ey] += G(ey, dy) * x(dx, dy, e); -- } -- } -- } -- -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- double s = 0.0; -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- s += B(ex, dx) * w[dx][ey]; -- } -- const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex; -- y(local_index, e) += s; -- } -- } -- }); --} -- --// Specialization of PAHcurlApplyGradient2D to the case where B is identity --static void PAHcurlApplyGradient2DBId(const int c_dofs1D, -- const int o_dofs1D, -- const int NE, -- const Array &G_, -- const Vector &x_, -- Vector &y_) --{ -- auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D); -- -- auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, NE); -- auto y = Reshape(y_.ReadWrite(), 2 * c_dofs1D * o_dofs1D, NE); -- -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double w[MAX_D1D][MAX_D1D]; -- -- // horizontal part -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- const int dy = ey; -- w[dx][ey] = x(dx, dy, e); -- } -- } -- -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int ex = 0; ex < o_dofs1D; ++ex) -- { -- double s = 0.0; -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- s += G(ex, dx) * w[dx][ey]; -- } -- const int local_index = ey*o_dofs1D + ex; -- y(local_index, e) += s; -- } -- } -- -- // vertical part -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- w[dx][ey] = 0.0; -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- w[dx][ey] += G(ey, dy) * x(dx, dy, e); -- } -- } -- } -- -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- const int dx = ex; -- const double s = w[dx][ey]; -- const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex; -- y(local_index, e) += s; -- } -- } -- }); --} -- --static void PAHcurlApplyGradientTranspose2D( -- const int c_dofs1D, const int o_dofs1D, const int NE, -- const Array &B_, const Array &G_, -- const Vector &x_, Vector &y_) --{ -- auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D); -- auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D); -- -- auto x = Reshape(x_.Read(), 2 * c_dofs1D * o_dofs1D, NE); -- auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, NE); -- -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double w[MAX_D1D][MAX_D1D]; -- -- // horizontal part (open x, closed y) -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int ex = 0; ex < o_dofs1D; ++ex) -- { -- w[dy][ex] = 0.0; -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- const int local_index = ey*o_dofs1D + ex; -- w[dy][ex] += B(ey, dy) * x(local_index, e); -- } -- } -- } -- -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- double s = 0.0; -- for (int ex = 0; ex < o_dofs1D; ++ex) -- { -- s += G(ex, dx) * w[dy][ex]; -- } -- y(dx, dy, e) += s; -- } -- } -- -- // vertical part (open y, closed x) -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- w[dy][ex] = 0.0; -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex; -- w[dy][ex] += G(ey, dy) * x(local_index, e); -- } -- } -- } -- -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- double s = 0.0; -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- s += B(ex, dx) * w[dy][ex]; -- } -- y(dx, dy, e) += s; -- } -- } -- }); --} -- --// Specialization of PAHcurlApplyGradientTranspose2D to the case where --// B is identity --static void PAHcurlApplyGradientTranspose2DBId( -- const int c_dofs1D, const int o_dofs1D, const int NE, -- const Array &G_, -- const Vector &x_, Vector &y_) --{ -- auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D); -- -- auto x = Reshape(x_.Read(), 2 * c_dofs1D * o_dofs1D, NE); -- auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, NE); -- -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double w[MAX_D1D][MAX_D1D]; -- -- // horizontal part (open x, closed y) -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int ex = 0; ex < o_dofs1D; ++ex) -- { -- const int ey = dy; -- const int local_index = ey*o_dofs1D + ex; -- w[dy][ex] = x(local_index, e); -- } -- } -- -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- double s = 0.0; -- for (int ex = 0; ex < o_dofs1D; ++ex) -- { -- s += G(ex, dx) * w[dy][ex]; -- } -- y(dx, dy, e) += s; -- } -- } -- -- // vertical part (open y, closed x) -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- w[dy][ex] = 0.0; -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex; -- w[dy][ex] += G(ey, dy) * x(local_index, e); -- } -- } -- } -- -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- const int ex = dx; -- const double s = w[dy][ex]; -- y(dx, dy, e) += s; -- } -- } -- }); --} -- --static void PAHcurlApplyGradient3D(const int c_dofs1D, -- const int o_dofs1D, -- const int NE, -- const Array &B_, -- const Array &G_, -- const Vector &x_, -- Vector &y_) --{ -- auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D); -- auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D); -- -- auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, NE); -- auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE); -- -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double w1[MAX_D1D][MAX_D1D][MAX_D1D]; -- double w2[MAX_D1D][MAX_D1D][MAX_D1D]; -- -- // --- -- // dofs that point parallel to x-axis (open in x, closed in y, z) -- // --- -- -- // contract in z -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- w1[dx][dy][ez] = 0.0; -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- w1[dx][dy][ez] += B(ez, dz) * x(dx, dy, dz, e); -- } -- } -- } -- } -- -- // contract in y -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- w2[dx][ey][ez] = 0.0; -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- w2[dx][ey][ez] += B(ey, dy) * w1[dx][dy][ez]; -- } -- } -- } -- } -- -- // contract in x -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int ex = 0; ex < o_dofs1D; ++ex) -- { -- double s = 0.0; -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- s += G(ex, dx) * w2[dx][ey][ez]; -- } -- const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex; -- y(local_index, e) += s; -- } -- } -- } -- -- // --- -- // dofs that point parallel to y-axis (open in y, closed in x, z) -- // --- -- -- // contract in z -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- w1[dx][dy][ez] = 0.0; -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- w1[dx][dy][ez] += B(ez, dz) * x(dx, dy, dz, e); -- } -- } -- } -- } -- -- // contract in y -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- w2[dx][ey][ez] = 0.0; -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- w2[dx][ey][ez] += G(ey, dy) * w1[dx][dy][ez]; -- } -- } -- } -- } -- -- // contract in x -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- double s = 0.0; -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- s += B(ex, dx) * w2[dx][ey][ez]; -- } -- const int local_index = c_dofs1D*c_dofs1D*o_dofs1D + -- ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -- y(local_index, e) += s; -- } -- } -- } -- -- // --- -- // dofs that point parallel to z-axis (open in z, closed in x, y) -- // --- -- -- // contract in z -- for (int ez = 0; ez < o_dofs1D; ++ez) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- w1[dx][dy][ez] = 0.0; -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- w1[dx][dy][ez] += G(ez, dz) * x(dx, dy, dz, e); -- } -- } -- } -- } -- -- // contract in y -- for (int ez = 0; ez < o_dofs1D; ++ez) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- w2[dx][ey][ez] = 0.0; -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- w2[dx][ey][ez] += B(ey, dy) * w1[dx][dy][ez]; -- } -- } -- } -- } -- -- // contract in x -- for (int ez = 0; ez < o_dofs1D; ++ez) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- double s = 0.0; -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- s += B(ex, dx) * w2[dx][ey][ez]; -- } -- const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D + -- ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex; -- y(local_index, e) += s; -- } -- } -- } -- }); --} -- --// Specialization of PAHcurlApplyGradient3D to the case where --static void PAHcurlApplyGradient3DBId(const int c_dofs1D, -- const int o_dofs1D, -- const int NE, -- const Array &G_, -- const Vector &x_, -- Vector &y_) --{ -- auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D); -- -- auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, NE); -- auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE); -- -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double w1[MAX_D1D][MAX_D1D][MAX_D1D]; -- double w2[MAX_D1D][MAX_D1D][MAX_D1D]; -- -- // --- -- // dofs that point parallel to x-axis (open in x, closed in y, z) -- // --- -- -- // contract in z -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- const int dz = ez; -- w1[dx][dy][ez] = x(dx, dy, dz, e); -- } -- } -- } -- -- // contract in y -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- const int dy = ey; -- w2[dx][ey][ez] = w1[dx][dy][ez]; -- } -- } -- } -- -- // contract in x -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int ex = 0; ex < o_dofs1D; ++ex) -- { -- double s = 0.0; -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- s += G(ex, dx) * w2[dx][ey][ez]; -- } -- const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex; -- y(local_index, e) += s; -- } -- } -- } -- -- // --- -- // dofs that point parallel to y-axis (open in y, closed in x, z) -- // --- -- -- // contract in z -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- const int dz = ez; -- w1[dx][dy][ez] = x(dx, dy, dz, e); -- } -- } -- } -- -- // contract in y -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- w2[dx][ey][ez] = 0.0; -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- w2[dx][ey][ez] += G(ey, dy) * w1[dx][dy][ez]; -- } -- } -- } -- } -- -- // contract in x -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- const int dx = ex; -- const double s = w2[dx][ey][ez]; -- const int local_index = c_dofs1D*c_dofs1D*o_dofs1D + -- ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -- y(local_index, e) += s; -- } -- } -- } -- -- // --- -- // dofs that point parallel to z-axis (open in z, closed in x, y) -- // --- -- -- // contract in z -- for (int ez = 0; ez < o_dofs1D; ++ez) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- w1[dx][dy][ez] = 0.0; -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- w1[dx][dy][ez] += G(ez, dz) * x(dx, dy, dz, e); -- } -- } -- } -- } -- -- // contract in y -- for (int ez = 0; ez < o_dofs1D; ++ez) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- const int dy = ey; -- w2[dx][ey][ez] = w1[dx][dy][ez]; -- } -- } -- } -- -- // contract in x -- for (int ez = 0; ez < o_dofs1D; ++ez) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- const int dx = ex; -- const double s = w2[dx][ey][ez]; -- const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D + -- ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex; -- y(local_index, e) += s; -- } -- } -- } -- }); --} -- --static void PAHcurlApplyGradientTranspose3D( -- const int c_dofs1D, const int o_dofs1D, const int NE, -- const Array &B_, const Array &G_, -- const Vector &x_, Vector &y_) --{ -- auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D); -- auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D); -- -- auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE); -- auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, NE); -- -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double w1[MAX_D1D][MAX_D1D][MAX_D1D]; -- double w2[MAX_D1D][MAX_D1D][MAX_D1D]; -- // --- -- // dofs that point parallel to x-axis (open in x, closed in y, z) -- // --- -- -- // contract in z -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int ex = 0; ex < o_dofs1D; ++ex) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- w1[ex][ey][dz] = 0.0; -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex; -- w1[ex][ey][dz] += B(ez, dz) * x(local_index, e); -- } -- } -- } -- } -- -- // contract in y -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int ex = 0; ex < o_dofs1D; ++ex) -- { -- w2[ex][dy][dz] = 0.0; -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- w2[ex][dy][dz] += B(ey, dy) * w1[ex][ey][dz]; -- } -- } -- } -- } -- -- // contract in x -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- double s = 0.0; -- for (int ex = 0; ex < o_dofs1D; ++ex) -- { -- s += G(ex, dx) * w2[ex][dy][dz]; -- } -- y(dx, dy, dz, e) += s; -- } -- } -- } -- -- // --- -- // dofs that point parallel to y-axis (open in y, closed in x, z) -- // --- -- -- // contract in z -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- w1[ex][ey][dz] = 0.0; -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- const int local_index = c_dofs1D*c_dofs1D*o_dofs1D + -- ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -- w1[ex][ey][dz] += B(ez, dz) * x(local_index, e); -- } -- } -- } -- } -- -- // contract in y -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- w2[ex][dy][dz] = 0.0; -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- w2[ex][dy][dz] += G(ey, dy) * w1[ex][ey][dz]; -- } -- } -- } -- } -- -- // contract in x -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- double s = 0.0; -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- s += B(ex, dx) * w2[ex][dy][dz]; -- } -- y(dx, dy, dz, e) += s; -- } -- } -- } -- -- // --- -- // dofs that point parallel to z-axis (open in z, closed in x, y) -- // --- -- -- // contract in z -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- w1[ex][ey][dz] = 0.0; -- for (int ez = 0; ez < o_dofs1D; ++ez) -- { -- const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D + -- ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex; -- w1[ex][ey][dz] += G(ez, dz) * x(local_index, e); -- } -- } -- } -- } -- -- // contract in y -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- w2[ex][dy][dz] = 0.0; -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- w2[ex][dy][dz] += B(ey, dy) * w1[ex][ey][dz]; -- } -- } -- } -- } -- -- // contract in x -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- double s = 0.0; -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- s += B(ex, dx) * w2[ex][dy][dz]; -- } -- y(dx, dy, dz, e) += s; -- } -- } -- } -- }); --} -- --// Specialization of PAHcurlApplyGradientTranspose3D to the case where --static void PAHcurlApplyGradientTranspose3DBId( -- const int c_dofs1D, const int o_dofs1D, const int NE, -- const Array &G_, -- const Vector &x_, Vector &y_) --{ -- auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D); -- -- auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE); -- auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, NE); -- -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double w1[MAX_D1D][MAX_D1D][MAX_D1D]; -- double w2[MAX_D1D][MAX_D1D][MAX_D1D]; -- // --- -- // dofs that point parallel to x-axis (open in x, closed in y, z) -- // --- -- -- // contract in z -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int ex = 0; ex < o_dofs1D; ++ex) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- const int ez = dz; -- const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex; -- w1[ex][ey][dz] = x(local_index, e); -- } -- } -- } -- -- // contract in y -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int ex = 0; ex < o_dofs1D; ++ex) -- { -- const int ey = dy; -- w2[ex][dy][dz] = w1[ex][ey][dz]; -- } -- } -- } -- -- // contract in x -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- double s = 0.0; -- for (int ex = 0; ex < o_dofs1D; ++ex) -- { -- s += G(ex, dx) * w2[ex][dy][dz]; -- } -- y(dx, dy, dz, e) += s; -- } -- } -- } -- -- // --- -- // dofs that point parallel to y-axis (open in y, closed in x, z) -- // --- -- -- // contract in z -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- const int ez = dz; -- const int local_index = c_dofs1D*c_dofs1D*o_dofs1D + -- ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -- w1[ex][ey][dz] = x(local_index, e); -- } -- } -- } -- -- // contract in y -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- w2[ex][dy][dz] = 0.0; -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- w2[ex][dy][dz] += G(ey, dy) * w1[ex][ey][dz]; -- } -- } -- } -- } -- -- // contract in x -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- const int ex = dx; -- double s = w2[ex][dy][dz]; -- y(dx, dy, dz, e) += s; -- } -- } -- } -- -- // --- -- // dofs that point parallel to z-axis (open in z, closed in x, y) -- // --- -- -- // contract in z -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- w1[ex][ey][dz] = 0.0; -- for (int ez = 0; ez < o_dofs1D; ++ez) -- { -- const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D + -- ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex; -- w1[ex][ey][dz] += G(ez, dz) * x(local_index, e); -- } -- } -- } -- } -- -- // contract in y -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- const int ey = dy; -- w2[ex][dy][dz] = w1[ex][ey][dz]; -- } -- } -- } -- -- // contract in x -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- const int ex = dx; -- double s = w2[ex][dy][dz]; -- y(dx, dy, dz, e) += s; -- } -- } -- } -- }); --} -- --void GradientInterpolator::AssemblePA(const FiniteElementSpace &trial_fes, -- const FiniteElementSpace &test_fes) --{ -- // Assumes tensor-product elements, with a vector test space and H^1 trial space. -- Mesh *mesh = trial_fes.GetMesh(); -- const FiniteElement *trial_fel = trial_fes.GetFE(0); -- const FiniteElement *test_fel = test_fes.GetFE(0); -- -- const NodalTensorFiniteElement *trial_el = -- dynamic_cast(trial_fel); -- MFEM_VERIFY(trial_el != NULL, "Only NodalTensorFiniteElement is supported!"); -- -- const VectorTensorFiniteElement *test_el = -- dynamic_cast(test_fel); -- MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!"); -- -- const int dims = trial_el->GetDim(); -- MFEM_VERIFY(dims == 2 || dims == 3, "Bad dimension!"); -- dim = mesh->Dimension(); -- MFEM_VERIFY(dim == 2 || dim == 3, "Bad dimension!"); -- MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), -- "Orders do not match!"); -- ne = trial_fes.GetNE(); -- -- const int order = trial_el->GetOrder(); -- dofquad_fe = new H1_SegmentElement(order, trial_el->GetBasisType()); -- mfem::QuadratureFunctions1D qf1d; -- mfem::IntegrationRule closed_ir; -- closed_ir.SetSize(order + 1); -- qf1d.GaussLobatto(order + 1, &closed_ir); -- mfem::IntegrationRule open_ir; -- open_ir.SetSize(order); -- qf1d.GaussLegendre(order, &open_ir); -- -- maps_O_C = &dofquad_fe->GetDofToQuad(open_ir, DofToQuad::TENSOR); -- o_dofs1D = maps_O_C->nqpt; -- if (trial_el->GetBasisType() == BasisType::GaussLobatto) -- { -- B_id = true; -- c_dofs1D = maps_O_C->ndof; -- } -- else -- { -- B_id = false; -- maps_C_C = &dofquad_fe->GetDofToQuad(closed_ir, DofToQuad::TENSOR); -- c_dofs1D = maps_C_C->nqpt; -- } --} -- --void GradientInterpolator::AddMultPA(const Vector &x, Vector &y) const --{ -- if (dim == 3) -- { -- if (B_id) -- { -- PAHcurlApplyGradient3DBId(c_dofs1D, o_dofs1D, ne, -- maps_O_C->G, x, y); -- } -- else -- { -- PAHcurlApplyGradient3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, -- maps_O_C->G, x, y); -- } -- } -- else if (dim == 2) -- { -- if (B_id) -- { -- PAHcurlApplyGradient2DBId(c_dofs1D, o_dofs1D, ne, -- maps_O_C->G, x, y); -- } -- else -- { -- PAHcurlApplyGradient2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->G, -- x, y); -- } -- } -- else -- { -- mfem_error("Bad dimension!"); -- } --} -- --void GradientInterpolator::AddMultTransposePA(const Vector &x, Vector &y) const --{ -- if (dim == 3) -- { -- if (B_id) -- { -- PAHcurlApplyGradientTranspose3DBId(c_dofs1D, o_dofs1D, ne, -- maps_O_C->G, x, y); -- } -- else -- { -- PAHcurlApplyGradientTranspose3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, -- maps_O_C->G, x, y); -- } -- } -- else if (dim == 2) -- { -- if (B_id) -- { -- PAHcurlApplyGradientTranspose2DBId(c_dofs1D, o_dofs1D, ne, -- maps_O_C->G, x, y); -- } -- else -- { -- PAHcurlApplyGradientTranspose2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, -- maps_O_C->G, x, y); -- } -- } -- else -- { -- mfem_error("Bad dimension!"); -- } --} -- --static void PAHcurlVecH1IdentityApply3D(const int c_dofs1D, -- const int o_dofs1D, -- const int NE, -- const Array &Bclosed, -- const Array &Bopen, -- const Vector &pa_data, -- const Vector &x_, -- Vector &y_) --{ -- auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D); -- auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D); -- -- auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, 3, NE); -- auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE); -- -- auto vk = Reshape(pa_data.Read(), 3, (3 * c_dofs1D * c_dofs1D * o_dofs1D), -- NE); -- -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double w1[3][MAX_D1D][MAX_D1D][MAX_D1D]; -- double w2[3][MAX_D1D][MAX_D1D][MAX_D1D]; -- -- // dofs that point parallel to x-axis (open in x, closed in y, z) -- -- // contract in z -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int j=0; j<3; ++j) -- { -- w1[j][dx][dy][ez] = 0.0; -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- w1[j][dx][dy][ez] += Bc(ez, dz) * x(dx, dy, dz, j, e); -- } -- } -- } -- } -- } -- -- // contract in y -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int j=0; j<3; ++j) -- { -- w2[j][dx][ey][ez] = 0.0; -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- w2[j][dx][ey][ez] += Bc(ey, dy) * w1[j][dx][dy][ez]; -- } -- } -- } -- } -- } -- -- // contract in x -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int ex = 0; ex < o_dofs1D; ++ex) -- { -- for (int j=0; j<3; ++j) -- { -- double s = 0.0; -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- s += Bo(ex, dx) * w2[j][dx][ey][ez]; -- } -- const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex; -- y(local_index, e) += s * vk(j, local_index, e); -- } -- } -- } -- } -- -- // dofs that point parallel to y-axis (open in y, closed in x, z) -- -- // contract in z -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int j=0; j<3; ++j) -- { -- w1[j][dx][dy][ez] = 0.0; -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- w1[j][dx][dy][ez] += Bc(ez, dz) * x(dx, dy, dz, j, e); -- } -- } -- } -- } -- } -- -- // contract in y -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int j=0; j<3; ++j) -- { -- w2[j][dx][ey][ez] = 0.0; -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- w2[j][dx][ey][ez] += Bo(ey, dy) * w1[j][dx][dy][ez]; -- } -- } -- } -- } -- } -- -- // contract in x -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- for (int j=0; j<3; ++j) -- { -- double s = 0.0; -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- s += Bc(ex, dx) * w2[j][dx][ey][ez]; -- } -- const int local_index = c_dofs1D*c_dofs1D*o_dofs1D + -- ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -- y(local_index, e) += s * vk(j, local_index, e); -- } -- } -- } -- } -- -- // dofs that point parallel to z-axis (open in z, closed in x, y) -- -- // contract in z -- for (int ez = 0; ez < o_dofs1D; ++ez) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int j=0; j<3; ++j) -- { -- w1[j][dx][dy][ez] = 0.0; -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- w1[j][dx][dy][ez] += Bo(ez, dz) * x(dx, dy, dz, j, e); -- } -- } -- } -- } -- } -- -- // contract in y -- for (int ez = 0; ez < o_dofs1D; ++ez) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int j=0; j<3; ++j) -- { -- w2[j][dx][ey][ez] = 0.0; -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- w2[j][dx][ey][ez] += Bc(ey, dy) * w1[j][dx][dy][ez]; -- } -- } -- } -- } -- } -- -- // contract in x -- for (int ez = 0; ez < o_dofs1D; ++ez) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- for (int j=0; j<3; ++j) -- { -- double s = 0.0; -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- s += Bc(ex, dx) * w2[j][dx][ey][ez]; -- } -- const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D + -- ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex; -- y(local_index, e) += s * vk(j, local_index, e); -- } -- } -- } -- } -- }); --} -- --static void PAHcurlVecH1IdentityApplyTranspose3D(const int c_dofs1D, -- const int o_dofs1D, -- const int NE, -- const Array &Bclosed, -- const Array &Bopen, -- const Vector &pa_data, -- const Vector &x_, -- Vector &y_) --{ -- auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D); -- auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D); -- -- auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE); -- auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, 3, NE); -- -- auto vk = Reshape(pa_data.Read(), 3, (3 * c_dofs1D * c_dofs1D * o_dofs1D), -- NE); -- -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- -- MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double w1[3][MAX_D1D][MAX_D1D][MAX_D1D]; -- double w2[3][MAX_D1D][MAX_D1D][MAX_D1D]; -- -- // dofs that point parallel to x-axis (open in x, closed in y, z) -- -- // contract in x -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int j=0; j<3; ++j) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- w2[j][dx][ey][ez] = 0.0; -- } -- for (int ex = 0; ex < o_dofs1D; ++ex) -- { -- const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex; -- const double xv = x(local_index, e) * vk(j, local_index, e); -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- w2[j][dx][ey][ez] += xv * Bo(ex, dx); -- } -- } -- } -- } -- } -- -- // contract in y -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int j=0; j<3; ++j) -- { -- w1[j][dx][dy][ez] = 0.0; -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- w1[j][dx][dy][ez] += w2[j][dx][ey][ez] * Bc(ey, dy); -- } -- } -- } -- } -- } -- -- // contract in z -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int j=0; j<3; ++j) -- { -- double s = 0.0; -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- s += w1[j][dx][dy][ez] * Bc(ez, dz); -- } -- y(dx, dy, dz, j, e) += s; -- } -- } -- } -- } -- -- // dofs that point parallel to y-axis (open in y, closed in x, z) -- -- // contract in x -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- for (int j=0; j<3; ++j) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- w2[j][dx][ey][ez] = 0.0; -- } -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- const int local_index = c_dofs1D*c_dofs1D*o_dofs1D + -- ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -- const double xv = x(local_index, e) * vk(j, local_index, e); -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- w2[j][dx][ey][ez] += xv * Bc(ex, dx); -- } -- } -- } -- } -- } -- -- // contract in y -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int j=0; j<3; ++j) -- { -- w1[j][dx][dy][ez] = 0.0; -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- w1[j][dx][dy][ez] += w2[j][dx][ey][ez] * Bo(ey, dy); -- } -- } -- } -- } -- } -- -- // contract in z -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int j=0; j<3; ++j) -- { -- double s = 0.0; -- for (int ez = 0; ez < c_dofs1D; ++ez) -- { -- s += w1[j][dx][dy][ez] * Bc(ez, dz); -- } -- y(dx, dy, dz, j, e) += s; -- } -- } -- } -- } -- -- // dofs that point parallel to z-axis (open in z, closed in x, y) -- -- // contract in x -- for (int ez = 0; ez < o_dofs1D; ++ez) -- { -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int j=0; j<3; ++j) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- w2[j][dx][ey][ez] = 0.0; -- } -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D + -- ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex; -- const double xv = x(local_index, e) * vk(j, local_index, e); -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- w2[j][dx][ey][ez] += xv * Bc(ex, dx); -- } -- } -- } -- } -- } -- -- // contract in y -- for (int ez = 0; ez < o_dofs1D; ++ez) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int j=0; j<3; ++j) -- { -- w1[j][dx][dy][ez] = 0.0; -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- w1[j][dx][dy][ez] += w2[j][dx][ey][ez] * Bc(ey, dy); -- } -- } -- } -- } -- } -- -- // contract in z -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int dz = 0; dz < c_dofs1D; ++dz) -- { -- for (int j=0; j<3; ++j) -- { -- double s = 0.0; -- for (int ez = 0; ez < o_dofs1D; ++ez) -- { -- s += w1[j][dx][dy][ez] * Bo(ez, dz); -- } -- y(dx, dy, dz, j, e) += s; -- } -- } -- } -- } -- }); --} -- --static void PAHcurlVecH1IdentityApply2D(const int c_dofs1D, -- const int o_dofs1D, -- const int NE, -- const Array &Bclosed, -- const Array &Bopen, -- const Vector &pa_data, -- const Vector &x_, -- Vector &y_) --{ -- auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D); -- auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D); -- -- auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, 2, NE); -- auto y = Reshape(y_.ReadWrite(), (2 * c_dofs1D * o_dofs1D), NE); -- -- auto vk = Reshape(pa_data.Read(), 2, (2 * c_dofs1D * o_dofs1D), NE); -- -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- -- MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double w[2][MAX_D1D][MAX_D1D]; -- -- // dofs that point parallel to x-axis (open in x, closed in y) -- -- // contract in y -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int j=0; j<2; ++j) -- { -- w[j][dx][ey] = 0.0; -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- w[j][dx][ey] += Bc(ey, dy) * x(dx, dy, j, e); -- } -- } -- } -- } -- -- // contract in x -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int ex = 0; ex < o_dofs1D; ++ex) -- { -- for (int j=0; j<2; ++j) -- { -- double s = 0.0; -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- s += Bo(ex, dx) * w[j][dx][ey]; -- } -- const int local_index = ey*o_dofs1D + ex; -- y(local_index, e) += s * vk(j, local_index, e); -- } -- } -- } -- -- // dofs that point parallel to y-axis (open in y, closed in x) -- -- // contract in y -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int j=0; j<2; ++j) -- { -- w[j][dx][ey] = 0.0; -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- w[j][dx][ey] += Bo(ey, dy) * x(dx, dy, j, e); -- } -- } -- } -- } -- -- // contract in x -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- for (int j=0; j<2; ++j) -- { -- double s = 0.0; -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- s += Bc(ex, dx) * w[j][dx][ey]; -- } -- const int local_index = c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -- y(local_index, e) += s * vk(j, local_index, e); -- } -- } -- } -- }); --} -- --static void PAHcurlVecH1IdentityApplyTranspose2D(const int c_dofs1D, -- const int o_dofs1D, -- const int NE, -- const Array &Bclosed, -- const Array &Bopen, -- const Vector &pa_data, -- const Vector &x_, -- Vector &y_) --{ -- auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D); -- auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D); -- -- auto x = Reshape(x_.Read(), (2 * c_dofs1D * o_dofs1D), NE); -- auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, 2, NE); -- -- auto vk = Reshape(pa_data.Read(), 2, (2 * c_dofs1D * o_dofs1D), NE); -- -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- //constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -- -- MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double w[2][MAX_D1D][MAX_D1D]; -- -- // dofs that point parallel to x-axis (open in x, closed in y) -- -- // contract in x -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int j=0; j<2; ++j) { w[j][dx][ey] = 0.0; } -- } -- for (int ex = 0; ex < o_dofs1D; ++ex) -- { -- const int local_index = ey*o_dofs1D + ex; -- const double xd = x(local_index, e); -- -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int j=0; j<2; ++j) -- { -- w[j][dx][ey] += Bo(ex, dx) * xd * vk(j, local_index, e); -- } -- } -- } -- } -- -- // contract in y -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int j=0; j<2; ++j) -- { -- double s = 0.0; -- for (int ey = 0; ey < c_dofs1D; ++ey) -- { -- s += w[j][dx][ey] * Bc(ey, dy); -- } -- y(dx, dy, j, e) += s; -- } -- } -- } -- -- // dofs that point parallel to y-axis (open in y, closed in x) -- -- // contract in x -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int j=0; j<2; ++j) { w[j][dx][ey] = 0.0; } -- } -- for (int ex = 0; ex < c_dofs1D; ++ex) -- { -- const int local_index = c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -- const double xd = x(local_index, e); -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int j=0; j<2; ++j) -- { -- w[j][dx][ey] += Bc(ex, dx) * xd * vk(j, local_index, e); -- } -- } -- } -- } -- -- // contract in y -- for (int dx = 0; dx < c_dofs1D; ++dx) -- { -- for (int dy = 0; dy < c_dofs1D; ++dy) -- { -- for (int j=0; j<2; ++j) -- { -- double s = 0.0; -- for (int ey = 0; ey < o_dofs1D; ++ey) -- { -- s += w[j][dx][ey] * Bo(ey, dy); -- } -- y(dx, dy, j, e) += s; -- } -- } -- } -- }); --} -- --void IdentityInterpolator::AssemblePA(const FiniteElementSpace &trial_fes, -- const FiniteElementSpace &test_fes) --{ -- // Assumes tensor-product elements, with a vector test space and H^1 trial space. -- Mesh *mesh = trial_fes.GetMesh(); -- const FiniteElement *trial_fel = trial_fes.GetFE(0); -- const FiniteElement *test_fel = test_fes.GetFE(0); -- -- const NodalTensorFiniteElement *trial_el = -- dynamic_cast(trial_fel); -- MFEM_VERIFY(trial_el != NULL, "Only NodalTensorFiniteElement is supported!"); -- -- const VectorTensorFiniteElement *test_el = -- dynamic_cast(test_fel); -- MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!"); -- -- const int dims = trial_el->GetDim(); -- MFEM_VERIFY(dims == 2 || dims == 3, ""); -- -- dim = mesh->Dimension(); -- MFEM_VERIFY(dim == 2 || dim == 3, ""); -- -- MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), ""); -- -- ne = trial_fes.GetNE(); -- -- const int order = trial_el->GetOrder(); -- dofquad_fe = new H1_SegmentElement(order); -- mfem::QuadratureFunctions1D qf1d; -- mfem::IntegrationRule closed_ir; -- closed_ir.SetSize(order + 1); -- qf1d.GaussLobatto(order + 1, &closed_ir); -- mfem::IntegrationRule open_ir; -- open_ir.SetSize(order); -- qf1d.GaussLegendre(order, &open_ir); -- -- maps_C_C = &dofquad_fe->GetDofToQuad(closed_ir, DofToQuad::TENSOR); -- maps_O_C = &dofquad_fe->GetDofToQuad(open_ir, DofToQuad::TENSOR); -- -- o_dofs1D = maps_O_C->nqpt; -- c_dofs1D = maps_C_C->nqpt; -- MFEM_VERIFY(maps_O_C->ndof == c_dofs1D && -- maps_C_C->ndof == c_dofs1D, "Discrepancy in the number of DOFs"); -- -- const int ndof_test = (dim == 3) ? 3 * c_dofs1D * c_dofs1D * o_dofs1D -- : 2 * c_dofs1D * o_dofs1D; -- -- const IntegrationRule & Nodes = test_el->GetNodes(); -- -- pa_data.SetSize(dim * ndof_test * ne, Device::GetMemoryType()); -- auto op = Reshape(pa_data.HostWrite(), dim, ndof_test, ne); -- -- const Array &dofmap = test_el->GetDofMap(); -- -- if (dim == 3) -- { -- // Note that ND_HexahedronElement uses 6 vectors in tk rather than 3, with -- // the last 3 having negative signs. Here the signs are all positive, as -- // signs are applied in ElementRestriction. -- -- const double tk[9] = { 1.,0.,0., 0.,1.,0., 0.,0.,1. }; -- -- for (int c=0; c<3; ++c) -- { -- for (int i=0; i= 0) ? dofmap[d] : -1 - dofmap[d]; -- -- for (int e=0; eGetElementTransformation(e); -- tr->SetIntPoint(&Nodes.IntPoint(id)); -- tr->Jacobian().Mult(tk + dof2tk*dim, v); -- -- for (int j=0; j<3; ++j) -- { -- op(j,d,e) = v[j]; -- } -- } -- } -- } -- } -- else // 2D case -- { -- const double tk[4] = { 1.,0., 0.,1. }; -- for (int c=0; c<2; ++c) -- { -- for (int i=0; i= 0) ? dofmap[d] : -1 - dofmap[d]; -- -- for (int e=0; eGetElementTransformation(e); -- tr->SetIntPoint(&Nodes.IntPoint(id)); -- tr->Jacobian().Mult(tk + dof2tk*dim, v); -- -- for (int j=0; j<2; ++j) -- { -- op(j,d,e) = v[j]; -- } -- } -- } -- } -- } --} -- --void IdentityInterpolator::AddMultPA(const Vector &x, Vector &y) const --{ -- if (dim == 3) -- { -- PAHcurlVecH1IdentityApply3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->B, -- pa_data, x, y); -- } -- else if (dim == 2) -- { -- PAHcurlVecH1IdentityApply2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->B, -- pa_data, x, y); -- } -- else -- { -- mfem_error("Bad dimension!"); -- } --} -- --void IdentityInterpolator::AddMultTransposePA(const Vector &x, Vector &y) const --{ -- if (dim == 3) -- { -- PAHcurlVecH1IdentityApplyTranspose3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, -- maps_O_C->B, pa_data, x, y); -- } -- else if (dim == 2) -- { -- PAHcurlVecH1IdentityApplyTranspose2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, -- maps_O_C->B, pa_data, x, y); -- } -- else -- { -- mfem_error("Bad dimension!"); -- } --} -- --template void SmemPAHcurlMassAssembleDiagonal3D<0,0>(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &bo, -- const Array &bc, -- const Vector &pa_data, -- Vector &diag); -- --template void SmemPAHcurlMassAssembleDiagonal3D<2,3>(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &bo, -- const Array &bc, -- const Vector &pa_data, -- Vector &diag); -- --template void SmemPAHcurlMassAssembleDiagonal3D<3,4>(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &bo, -- const Array &bc, -- const Vector &pa_data, -- Vector &diag); -- --template void SmemPAHcurlMassAssembleDiagonal3D<4,5>(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &bo, -- const Array &bc, -- const Vector &pa_data, -- Vector &diag); -- --template void SmemPAHcurlMassAssembleDiagonal3D<5,6>(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &bo, -- const Array &bc, -- const Vector &pa_data, -- Vector &diag); -- --template void SmemPAHcurlMassApply3D<0,0>(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &bo, -- const Array &bc, -- const Array &bot, -- const Array &bct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y); -- --template void SmemPAHcurlMassApply3D<2,3>(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &bo, -- const Array &bc, -- const Array &bot, -- const Array &bct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y); -- --template void SmemPAHcurlMassApply3D<3,4>(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &bo, -- const Array &bc, -- const Array &bot, -- const Array &bct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y); -- --template void SmemPAHcurlMassApply3D<4,5>(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &bo, -- const Array &bc, -- const Array &bot, -- const Array &bct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y); -- --template void SmemPAHcurlMassApply3D<5,6>(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &bo, -- const Array &bc, -- const Array &bot, -- const Array &bct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y); -- --} // namespace mfem -diff --git a/fem/bilininteg_mass_pa.cpp b/fem/bilininteg_mass_pa.cpp -deleted file mode 100644 -index 06156d030..000000000 ---- a/fem/bilininteg_mass_pa.cpp -+++ /dev/null -@@ -1,737 +0,0 @@ --// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced --// at the Lawrence Livermore National Laboratory. All Rights reserved. See files --// LICENSE and NOTICE for details. LLNL-CODE-806117. --// --// This file is part of the MFEM library. For more information and source code --// availability visit https://mfem.org. --// --// MFEM is free software; you can redistribute it and/or modify it under the --// terms of the BSD-3 license. We welcome feedback and contributions, see file --// CONTRIBUTING.md for details. -- --#include "../general/forall.hpp" --#include "bilininteg.hpp" --#include "gridfunc.hpp" --#include "qfunction.hpp" --#include "ceed/integrators/mass/mass.hpp" --#include "bilininteg_mass_pa.hpp" -- --using namespace std; -- --namespace mfem --{ -- --// PA Mass Integrator -- --// PA Mass Assemble kernel -- --void MassIntegrator::AssemblePA(const FiniteElementSpace &fes) --{ -- const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ? -- Device::GetDeviceMemoryType() : pa_mt; -- -- // Assuming the same element type -- fespace = &fes; -- Mesh *mesh = fes.GetMesh(); -- if (mesh->GetNE() == 0) { return; } -- const FiniteElement &el = *fes.GetFE(0); -- ElementTransformation *T0 = mesh->GetElementTransformation(0); -- const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el, *T0); -- if (DeviceCanUseCeed()) -- { -- delete ceedOp; -- const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 || -- fes.IsVariableOrder(); -- if (mixed) -- { -- ceedOp = new ceed::MixedPAMassIntegrator(*this, fes, Q); -- } -- else -- { -- ceedOp = new ceed::PAMassIntegrator(fes, *ir, Q); -- } -- return; -- } -- int map_type = el.GetMapType(); -- dim = mesh->Dimension(); -- ne = fes.GetMesh()->GetNE(); -- nq = ir->GetNPoints(); -- geom = mesh->GetGeometricFactors(*ir, GeometricFactors::DETERMINANTS, mt); -- maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR); -- dofs1D = maps->ndof; -- quad1D = maps->nqpt; -- pa_data.SetSize(ne*nq, mt); -- -- QuadratureSpace qs(*mesh, *ir); -- CoefficientVector coeff(Q, qs, CoefficientStorage::COMPRESSED); -- -- if (dim==1) { MFEM_ABORT("Not supported yet... stay tuned!"); } -- if (dim==2) -- { -- const int NE = ne; -- const int Q1D = quad1D; -- const bool const_c = coeff.Size() == 1; -- const bool by_val = map_type == FiniteElement::VALUE; -- const auto W = Reshape(ir->GetWeights().Read(), Q1D,Q1D); -- const auto J = Reshape(geom->detJ.Read(), Q1D,Q1D,NE); -- const auto C = const_c ? Reshape(coeff.Read(), 1,1,1) : -- Reshape(coeff.Read(), Q1D,Q1D,NE); -- auto v = Reshape(pa_data.Write(), Q1D,Q1D, NE); -- mfem::forall_2D(NE,Q1D,Q1D, [=] MFEM_HOST_DEVICE (int e) -- { -- MFEM_FOREACH_THREAD(qx,x,Q1D) -- { -- MFEM_FOREACH_THREAD(qy,y,Q1D) -- { -- const double detJ = J(qx,qy,e); -- const double coeff = const_c ? C(0,0,0) : C(qx,qy,e); -- v(qx,qy,e) = W(qx,qy) * coeff * (by_val ? detJ : 1.0/detJ); -- } -- } -- }); -- } -- if (dim==3) -- { -- const int NE = ne; -- const int Q1D = quad1D; -- const bool const_c = coeff.Size() == 1; -- const bool by_val = map_type == FiniteElement::VALUE; -- const auto W = Reshape(ir->GetWeights().Read(), Q1D,Q1D,Q1D); -- const auto J = Reshape(geom->detJ.Read(), Q1D,Q1D,Q1D,NE); -- const auto C = const_c ? Reshape(coeff.Read(), 1,1,1,1) : -- Reshape(coeff.Read(), Q1D,Q1D,Q1D,NE); -- auto v = Reshape(pa_data.Write(), Q1D,Q1D,Q1D,NE); -- mfem::forall_3D(NE, Q1D, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e) -- { -- MFEM_FOREACH_THREAD(qx,x,Q1D) -- { -- MFEM_FOREACH_THREAD(qy,y,Q1D) -- { -- MFEM_FOREACH_THREAD(qz,z,Q1D) -- { -- const double detJ = J(qx,qy,qz,e); -- const double coeff = const_c ? C(0,0,0,0) : C(qx,qy,qz,e); -- v(qx,qy,qz,e) = W(qx,qy,qz) * coeff * (by_val ? detJ : 1.0/detJ); -- } -- } -- } -- }); -- } --} -- --template --static void PAMassAssembleDiagonal2D(const int NE, -- const Array &b, -- const Vector &d, -- Vector &y, -- const int d1d = 0, -- const int q1d = 0) --{ -- const int D1D = T_D1D ? T_D1D : d1d; -- const int Q1D = T_Q1D ? T_Q1D : q1d; -- MFEM_VERIFY(D1D <= MAX_D1D, ""); -- MFEM_VERIFY(Q1D <= MAX_Q1D, ""); -- auto B = Reshape(b.Read(), Q1D, D1D); -- auto D = Reshape(d.Read(), Q1D, Q1D, NE); -- auto Y = Reshape(y.ReadWrite(), D1D, D1D, NE); -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- const int D1D = T_D1D ? T_D1D : d1d; -- const int Q1D = T_Q1D ? T_Q1D : q1d; -- constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -- constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -- double QD[MQ1][MD1]; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int dy = 0; dy < D1D; ++dy) -- { -- QD[qx][dy] = 0.0; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- QD[qx][dy] += B(qy, dy) * B(qy, dy) * D(qx, qy, e); -- } -- } -- } -- for (int dy = 0; dy < D1D; ++dy) -- { -- for (int dx = 0; dx < D1D; ++dx) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- Y(dx,dy,e) += B(qx, dx) * B(qx, dx) * QD[qx][dy]; -- } -- } -- } -- }); --} -- --template --static void SmemPAMassAssembleDiagonal2D(const int NE, -- const Array &b_, -- const Vector &d_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) --{ -- const int D1D = T_D1D ? T_D1D : d1d; -- const int Q1D = T_Q1D ? T_Q1D : q1d; -- constexpr int NBZ = T_NBZ ? T_NBZ : 1; -- constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -- constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -- MFEM_VERIFY(D1D <= MD1, ""); -- MFEM_VERIFY(Q1D <= MQ1, ""); -- auto b = Reshape(b_.Read(), Q1D, D1D); -- auto D = Reshape(d_.Read(), Q1D, Q1D, NE); -- auto Y = Reshape(y_.ReadWrite(), D1D, D1D, NE); -- mfem::forall_2D_batch(NE, Q1D, Q1D, NBZ, [=] MFEM_HOST_DEVICE (int e) -- { -- const int tidz = MFEM_THREAD_ID(z); -- const int D1D = T_D1D ? T_D1D : d1d; -- const int Q1D = T_Q1D ? T_Q1D : q1d; -- constexpr int NBZ = T_NBZ ? T_NBZ : 1; -- constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -- constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -- MFEM_SHARED double B[MQ1][MD1]; -- MFEM_SHARED double QDZ[NBZ][MQ1][MD1]; -- double (*QD)[MD1] = (double (*)[MD1])(QDZ + tidz); -- if (tidz == 0) -- { -- MFEM_FOREACH_THREAD(d,y,D1D) -- { -- MFEM_FOREACH_THREAD(q,x,Q1D) -- { -- B[q][d] = b(q,d); -- } -- } -- } -- MFEM_SYNC_THREAD; -- MFEM_FOREACH_THREAD(qx,x,Q1D) -- { -- MFEM_FOREACH_THREAD(dy,y,D1D) -- { -- QD[qx][dy] = 0.0; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- QD[qx][dy] += B[qy][dy] * B[qy][dy] * D(qx, qy, e); -- } -- } -- } -- MFEM_SYNC_THREAD; -- MFEM_FOREACH_THREAD(dy,y,D1D) -- { -- MFEM_FOREACH_THREAD(dx,x,D1D) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- // might need absolute values on next line -- Y(dx,dy,e) += B[qx][dx] * B[qx][dx] * QD[qx][dy]; -- } -- } -- } -- }); --} -- --template --static void PAMassAssembleDiagonal3D(const int NE, -- const Array &b, -- const Vector &d, -- Vector &y, -- const int d1d = 0, -- const int q1d = 0) --{ -- const int D1D = T_D1D ? T_D1D : d1d; -- const int Q1D = T_Q1D ? T_Q1D : q1d; -- MFEM_VERIFY(D1D <= MAX_D1D, ""); -- MFEM_VERIFY(Q1D <= MAX_Q1D, ""); -- auto B = Reshape(b.Read(), Q1D, D1D); -- auto D = Reshape(d.Read(), Q1D, Q1D, Q1D, NE); -- auto Y = Reshape(y.ReadWrite(), D1D, D1D, D1D, NE); -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- const int D1D = T_D1D ? T_D1D : d1d; -- const int Q1D = T_Q1D ? T_Q1D : q1d; -- constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -- constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -- double QQD[MQ1][MQ1][MD1]; -- double QDD[MQ1][MD1][MD1]; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int dz = 0; dz < D1D; ++dz) -- { -- QQD[qx][qy][dz] = 0.0; -- for (int qz = 0; qz < Q1D; ++qz) -- { -- QQD[qx][qy][dz] += B(qz, dz) * B(qz, dz) * D(qx, qy, qz, e); -- } -- } -- } -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int dz = 0; dz < D1D; ++dz) -- { -- for (int dy = 0; dy < D1D; ++dy) -- { -- QDD[qx][dy][dz] = 0.0; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- QDD[qx][dy][dz] += B(qy, dy) * B(qy, dy) * QQD[qx][qy][dz]; -- } -- } -- } -- } -- for (int dz = 0; dz < D1D; ++dz) -- { -- for (int dy = 0; dy < D1D; ++dy) -- { -- for (int dx = 0; dx < D1D; ++dx) -- { -- double t = 0.0; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- t += B(qx, dx) * B(qx, dx) * QDD[qx][dy][dz]; -- } -- Y(dx, dy, dz, e) += t; -- } -- } -- } -- }); --} -- --template --static void SmemPAMassAssembleDiagonal3D(const int NE, -- const Array &b_, -- const Vector &d_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) --{ -- const int D1D = T_D1D ? T_D1D : d1d; -- const int Q1D = T_Q1D ? T_Q1D : q1d; -- constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -- constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -- MFEM_VERIFY(D1D <= MD1, ""); -- MFEM_VERIFY(Q1D <= MQ1, ""); -- auto b = Reshape(b_.Read(), Q1D, D1D); -- auto D = Reshape(d_.Read(), Q1D, Q1D, Q1D, NE); -- auto Y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE); -- mfem::forall_3D(NE, Q1D, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e) -- { -- const int tidz = MFEM_THREAD_ID(z); -- const int D1D = T_D1D ? T_D1D : d1d; -- const int Q1D = T_Q1D ? T_Q1D : q1d; -- constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -- constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -- MFEM_SHARED double B[MQ1][MD1]; -- MFEM_SHARED double QQD[MQ1][MQ1][MD1]; -- MFEM_SHARED double QDD[MQ1][MD1][MD1]; -- if (tidz == 0) -- { -- MFEM_FOREACH_THREAD(d,y,D1D) -- { -- MFEM_FOREACH_THREAD(q,x,Q1D) -- { -- B[q][d] = b(q,d); -- } -- } -- } -- MFEM_SYNC_THREAD; -- MFEM_FOREACH_THREAD(qx,x,Q1D) -- { -- MFEM_FOREACH_THREAD(qy,y,Q1D) -- { -- MFEM_FOREACH_THREAD(dz,z,D1D) -- { -- QQD[qx][qy][dz] = 0.0; -- for (int qz = 0; qz < Q1D; ++qz) -- { -- QQD[qx][qy][dz] += B[qz][dz] * B[qz][dz] * D(qx, qy, qz, e); -- } -- } -- } -- } -- MFEM_SYNC_THREAD; -- MFEM_FOREACH_THREAD(qx,x,Q1D) -- { -- MFEM_FOREACH_THREAD(dz,z,D1D) -- { -- MFEM_FOREACH_THREAD(dy,y,D1D) -- { -- QDD[qx][dy][dz] = 0.0; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- QDD[qx][dy][dz] += B[qy][dy] * B[qy][dy] * QQD[qx][qy][dz]; -- } -- } -- } -- } -- MFEM_SYNC_THREAD; -- MFEM_FOREACH_THREAD(dz,z,D1D) -- { -- MFEM_FOREACH_THREAD(dy,y,D1D) -- { -- MFEM_FOREACH_THREAD(dx,x,D1D) -- { -- double t = 0.0; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- t += B[qx][dx] * B[qx][dx] * QDD[qx][dy][dz]; -- } -- Y(dx, dy, dz, e) += t; -- } -- } -- } -- }); --} -- --static void PAMassAssembleDiagonal(const int dim, const int D1D, -- const int Q1D, const int NE, -- const Array &B, -- const Vector &D, -- Vector &Y) --{ -- if (dim == 2) -- { -- switch ((D1D << 4 ) | Q1D) -- { -- case 0x22: return SmemPAMassAssembleDiagonal2D<2,2,16>(NE,B,D,Y); -- case 0x33: return SmemPAMassAssembleDiagonal2D<3,3,16>(NE,B,D,Y); -- case 0x44: return SmemPAMassAssembleDiagonal2D<4,4,8>(NE,B,D,Y); -- case 0x55: return SmemPAMassAssembleDiagonal2D<5,5,8>(NE,B,D,Y); -- case 0x66: return SmemPAMassAssembleDiagonal2D<6,6,4>(NE,B,D,Y); -- case 0x77: return SmemPAMassAssembleDiagonal2D<7,7,4>(NE,B,D,Y); -- case 0x88: return SmemPAMassAssembleDiagonal2D<8,8,2>(NE,B,D,Y); -- case 0x99: return SmemPAMassAssembleDiagonal2D<9,9,2>(NE,B,D,Y); -- default: return PAMassAssembleDiagonal2D(NE,B,D,Y,D1D,Q1D); -- } -- } -- else if (dim == 3) -- { -- switch ((D1D << 4 ) | Q1D) -- { -- case 0x23: return SmemPAMassAssembleDiagonal3D<2,3>(NE,B,D,Y); -- case 0x24: return SmemPAMassAssembleDiagonal3D<2,4>(NE,B,D,Y); -- case 0x26: return SmemPAMassAssembleDiagonal3D<2,6>(NE,B,D,Y); -- case 0x34: return SmemPAMassAssembleDiagonal3D<3,4>(NE,B,D,Y); -- case 0x35: return SmemPAMassAssembleDiagonal3D<3,5>(NE,B,D,Y); -- case 0x45: return SmemPAMassAssembleDiagonal3D<4,5>(NE,B,D,Y); -- case 0x48: return SmemPAMassAssembleDiagonal3D<4,8>(NE,B,D,Y); -- case 0x56: return SmemPAMassAssembleDiagonal3D<5,6>(NE,B,D,Y); -- case 0x67: return SmemPAMassAssembleDiagonal3D<6,7>(NE,B,D,Y); -- case 0x78: return SmemPAMassAssembleDiagonal3D<7,8>(NE,B,D,Y); -- case 0x89: return SmemPAMassAssembleDiagonal3D<8,9>(NE,B,D,Y); -- default: return PAMassAssembleDiagonal3D(NE,B,D,Y,D1D,Q1D); -- } -- } -- MFEM_ABORT("Unknown kernel."); --} -- --void MassIntegrator::AssembleDiagonalPA(Vector &diag) --{ -- if (DeviceCanUseCeed()) -- { -- ceedOp->GetDiagonal(diag); -- } -- else -- { -- PAMassAssembleDiagonal(dim, dofs1D, quad1D, ne, maps->B, pa_data, diag); -- } --} -- -- --#ifdef MFEM_USE_OCCA --// OCCA PA Mass Apply 2D kernel --static void OccaPAMassApply2D(const int D1D, -- const int Q1D, -- const int NE, -- const Array &B, -- const Array &Bt, -- const Vector &D, -- const Vector &X, -- Vector &Y) --{ -- occa::properties props; -- props["defines/D1D"] = D1D; -- props["defines/Q1D"] = Q1D; -- const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size()); -- const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size()); -- const occa::memory o_D = OccaMemoryRead(D.GetMemory(), D.Size()); -- const occa::memory o_X = OccaMemoryRead(X.GetMemory(), X.Size()); -- occa::memory o_Y = OccaMemoryReadWrite(Y.GetMemory(), Y.Size()); -- const occa_id_t id = std::make_pair(D1D,Q1D); -- if (!Device::Allows(Backend::OCCA_CUDA)) -- { -- static occa_kernel_t OccaMassApply2D_cpu; -- if (OccaMassApply2D_cpu.find(id) == OccaMassApply2D_cpu.end()) -- { -- const occa::kernel MassApply2D_CPU = -- mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl", -- "MassApply2D_CPU", props); -- OccaMassApply2D_cpu.emplace(id, MassApply2D_CPU); -- } -- OccaMassApply2D_cpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y); -- } -- else -- { -- static occa_kernel_t OccaMassApply2D_gpu; -- if (OccaMassApply2D_gpu.find(id) == OccaMassApply2D_gpu.end()) -- { -- const occa::kernel MassApply2D_GPU = -- mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl", -- "MassApply2D_GPU", props); -- OccaMassApply2D_gpu.emplace(id, MassApply2D_GPU); -- } -- OccaMassApply2D_gpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y); -- } --} -- --// OCCA PA Mass Apply 3D kernel --static void OccaPAMassApply3D(const int D1D, -- const int Q1D, -- const int NE, -- const Array &B, -- const Array &Bt, -- const Vector &D, -- const Vector &X, -- Vector &Y) --{ -- occa::properties props; -- props["defines/D1D"] = D1D; -- props["defines/Q1D"] = Q1D; -- const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size()); -- const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size()); -- const occa::memory o_D = OccaMemoryRead(D.GetMemory(), D.Size()); -- const occa::memory o_X = OccaMemoryRead(X.GetMemory(), X.Size()); -- occa::memory o_Y = OccaMemoryReadWrite(Y.GetMemory(), Y.Size()); -- const occa_id_t id = std::make_pair(D1D,Q1D); -- if (!Device::Allows(Backend::OCCA_CUDA)) -- { -- static occa_kernel_t OccaMassApply3D_cpu; -- if (OccaMassApply3D_cpu.find(id) == OccaMassApply3D_cpu.end()) -- { -- const occa::kernel MassApply3D_CPU = -- mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl", -- "MassApply3D_CPU", props); -- OccaMassApply3D_cpu.emplace(id, MassApply3D_CPU); -- } -- OccaMassApply3D_cpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y); -- } -- else -- { -- static occa_kernel_t OccaMassApply3D_gpu; -- if (OccaMassApply3D_gpu.find(id) == OccaMassApply3D_gpu.end()) -- { -- const occa::kernel MassApply3D_GPU = -- mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl", -- "MassApply3D_GPU", props); -- OccaMassApply3D_gpu.emplace(id, MassApply3D_GPU); -- } -- OccaMassApply3D_gpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y); -- } --} --#endif // MFEM_USE_OCCA -- --template --static void PAMassApply2D(const int NE, -- const Array &b_, -- const Array &bt_, -- const Vector &d_, -- const Vector &x_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) --{ -- MFEM_VERIFY(T_D1D ? T_D1D : d1d <= MAX_D1D, ""); -- MFEM_VERIFY(T_Q1D ? T_Q1D : q1d <= MAX_Q1D, ""); -- -- const auto B = b_.Read(); -- const auto Bt = bt_.Read(); -- const auto D = d_.Read(); -- const auto X = x_.Read(); -- auto Y = y_.ReadWrite(); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- internal::PAMassApply2D_Element(e, NE, B, Bt, D, X, Y, d1d, q1d); -- }); --} -- --template --static void SmemPAMassApply2D(const int NE, -- const Array &b_, -- const Array &bt_, -- const Vector &d_, -- const Vector &x_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) --{ -- MFEM_CONTRACT_VAR(bt_); -- const int D1D = T_D1D ? T_D1D : d1d; -- const int Q1D = T_Q1D ? T_Q1D : q1d; -- constexpr int NBZ = T_NBZ ? T_NBZ : 1; -- constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -- constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -- MFEM_VERIFY(D1D <= MD1, ""); -- MFEM_VERIFY(Q1D <= MQ1, ""); -- const auto b = b_.Read(); -- const auto D = d_.Read(); -- const auto x = x_.Read(); -- auto Y = y_.ReadWrite(); -- mfem::forall_2D_batch(NE, Q1D, Q1D, NBZ, [=] MFEM_HOST_DEVICE (int e) -- { -- internal::SmemPAMassApply2D_Element(e, NE, b, D, x, Y, d1d, -- q1d); -- }); --} -- --template --static void PAMassApply3D(const int NE, -- const Array &b_, -- const Array &bt_, -- const Vector &d_, -- const Vector &x_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) --{ -- MFEM_VERIFY(T_D1D ? T_D1D : d1d <= MAX_D1D, ""); -- MFEM_VERIFY(T_Q1D ? T_Q1D : q1d <= MAX_Q1D, ""); -- -- const auto B = b_.Read(); -- const auto Bt = bt_.Read(); -- const auto D = d_.Read(); -- const auto X = x_.Read(); -- auto Y = y_.ReadWrite(); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- internal::PAMassApply3D_Element(e, NE, B, Bt, D, X, Y, d1d, q1d); -- }); --} -- --template --static void SmemPAMassApply3D(const int NE, -- const Array &b_, -- const Array &bt_, -- const Vector &d_, -- const Vector &x_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) --{ -- MFEM_CONTRACT_VAR(bt_); -- const int D1D = T_D1D ? T_D1D : d1d; -- const int Q1D = T_Q1D ? T_Q1D : q1d; -- constexpr int M1Q = T_Q1D ? T_Q1D : MAX_Q1D; -- constexpr int M1D = T_D1D ? T_D1D : MAX_D1D; -- MFEM_VERIFY(D1D <= M1D, ""); -- MFEM_VERIFY(Q1D <= M1Q, ""); -- auto b = b_.Read(); -- auto d = d_.Read(); -- auto x = x_.Read(); -- auto y = y_.ReadWrite(); -- mfem::forall_2D(NE, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e) -- { -- internal::SmemPAMassApply3D_Element(e, NE, b, d, x, y, d1d, q1d); -- }); --} -- --static void PAMassApply(const int dim, -- const int D1D, -- const int Q1D, -- const int NE, -- const Array &B, -- const Array &Bt, -- const Vector &D, -- const Vector &X, -- Vector &Y) --{ --#ifdef MFEM_USE_OCCA -- if (DeviceCanUseOcca()) -- { -- if (dim == 2) -- { -- return OccaPAMassApply2D(D1D,Q1D,NE,B,Bt,D,X,Y); -- } -- if (dim == 3) -- { -- return OccaPAMassApply3D(D1D,Q1D,NE,B,Bt,D,X,Y); -- } -- MFEM_ABORT("OCCA PA Mass Apply unknown kernel!"); -- } --#endif // MFEM_USE_OCCA -- const int id = (D1D << 4) | Q1D; -- -- if (dim == 2) -- { -- switch (id) -- { -- case 0x22: return SmemPAMassApply2D<2,2,16>(NE,B,Bt,D,X,Y); -- case 0x24: return SmemPAMassApply2D<2,4,16>(NE,B,Bt,D,X,Y); -- case 0x33: return SmemPAMassApply2D<3,3,16>(NE,B,Bt,D,X,Y); -- case 0x34: return SmemPAMassApply2D<3,4,16>(NE,B,Bt,D,X,Y); -- case 0x35: return SmemPAMassApply2D<3,5,16>(NE,B,Bt,D,X,Y); -- case 0x36: return SmemPAMassApply2D<3,6,16>(NE,B,Bt,D,X,Y); -- case 0x44: return SmemPAMassApply2D<4,4,8>(NE,B,Bt,D,X,Y); -- case 0x46: return SmemPAMassApply2D<4,6,8>(NE,B,Bt,D,X,Y); -- case 0x48: return SmemPAMassApply2D<4,8,4>(NE,B,Bt,D,X,Y); -- case 0x55: return SmemPAMassApply2D<5,5,8>(NE,B,Bt,D,X,Y); -- case 0x57: return SmemPAMassApply2D<5,7,8>(NE,B,Bt,D,X,Y); -- case 0x58: return SmemPAMassApply2D<5,8,2>(NE,B,Bt,D,X,Y); -- case 0x66: return SmemPAMassApply2D<6,6,4>(NE,B,Bt,D,X,Y); -- case 0x77: return SmemPAMassApply2D<7,7,4>(NE,B,Bt,D,X,Y); -- case 0x88: return SmemPAMassApply2D<8,8,2>(NE,B,Bt,D,X,Y); -- case 0x99: return SmemPAMassApply2D<9,9,2>(NE,B,Bt,D,X,Y); -- default: return PAMassApply2D(NE,B,Bt,D,X,Y,D1D,Q1D); -- } -- } -- else if (dim == 3) -- { -- switch (id) -- { -- case 0x22: return SmemPAMassApply3D<2,2>(NE,B,Bt,D,X,Y); -- case 0x23: return SmemPAMassApply3D<2,3>(NE,B,Bt,D,X,Y); -- case 0x24: return SmemPAMassApply3D<2,4>(NE,B,Bt,D,X,Y); -- case 0x26: return SmemPAMassApply3D<2,6>(NE,B,Bt,D,X,Y); -- case 0x34: return SmemPAMassApply3D<3,4>(NE,B,Bt,D,X,Y); -- case 0x35: return SmemPAMassApply3D<3,5>(NE,B,Bt,D,X,Y); -- case 0x36: return SmemPAMassApply3D<3,6>(NE,B,Bt,D,X,Y); -- case 0x37: return SmemPAMassApply3D<3,7>(NE,B,Bt,D,X,Y); -- case 0x45: return SmemPAMassApply3D<4,5>(NE,B,Bt,D,X,Y); -- case 0x46: return SmemPAMassApply3D<4,6>(NE,B,Bt,D,X,Y); -- case 0x48: return SmemPAMassApply3D<4,8>(NE,B,Bt,D,X,Y); -- case 0x56: return SmemPAMassApply3D<5,6>(NE,B,Bt,D,X,Y); -- case 0x58: return SmemPAMassApply3D<5,8>(NE,B,Bt,D,X,Y); -- case 0x67: return SmemPAMassApply3D<6,7>(NE,B,Bt,D,X,Y); -- case 0x78: return SmemPAMassApply3D<7,8>(NE,B,Bt,D,X,Y); -- case 0x89: return SmemPAMassApply3D<8,9>(NE,B,Bt,D,X,Y); -- case 0x9A: return SmemPAMassApply3D<9,10>(NE,B,Bt,D,X,Y); -- default: return PAMassApply3D(NE,B,Bt,D,X,Y,D1D,Q1D); -- } -- } -- mfem::out << "Unknown kernel 0x" << std::hex << id << std::endl; -- MFEM_ABORT("Unknown kernel."); --} -- --void MassIntegrator::AddMultPA(const Vector &x, Vector &y) const --{ -- if (DeviceCanUseCeed()) -- { -- ceedOp->AddMult(x, y); -- } -- else -- { -- PAMassApply(dim, dofs1D, quad1D, ne, maps->B, maps->Bt, pa_data, x, y); -- } --} -- --void MassIntegrator::AddMultTransposePA(const Vector &x, Vector &y) const --{ -- // Mass integrator is symmetric -- AddMultPA(x, y); --} -- --} // namespace mfem -diff --git a/fem/bilininteg_mass_pa.hpp b/fem/bilininteg_mass_pa.hpp -deleted file mode 100644 -index 73c8892e1..000000000 ---- a/fem/bilininteg_mass_pa.hpp -+++ /dev/null -@@ -1,632 +0,0 @@ --// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced --// at the Lawrence Livermore National Laboratory. All Rights reserved. See files --// LICENSE and NOTICE for details. LLNL-CODE-806117. --// --// This file is part of the MFEM library. For more information and source code --// availability visit https://mfem.org. --// --// MFEM is free software; you can redistribute it and/or modify it under the --// terms of the BSD-3 license. We welcome feedback and contributions, see file --// CONTRIBUTING.md for details. -- --#ifndef MFEM_BILININTEG_MASS_PA_HPP --#define MFEM_BILININTEG_MASS_PA_HPP -- --#include "../config/config.hpp" --#include "../general/forall.hpp" --#include "../linalg/dtensor.hpp" -- --namespace mfem --{ -- --namespace internal --{ -- --template --MFEM_HOST_DEVICE inline --void PAMassApply2D_Element(const int e, -- const int NE, -- const double *b_, -- const double *bt_, -- const double *d_, -- const double *x_, -- double *y_, -- const int d1d = 0, -- const int q1d = 0) --{ -- const int D1D = d1d; -- const int Q1D = q1d; -- auto B = ConstDeviceMatrix(b_, Q1D, D1D); -- auto Bt = ConstDeviceMatrix(bt_, D1D, Q1D); -- auto D = ConstDeviceCube(d_, Q1D, Q1D, NE); -- auto X = ConstDeviceCube(x_, D1D, D1D, NE); -- auto Y = DeviceCube(y_, D1D, D1D, NE); -- -- if (!ACCUMULATE) -- { -- for (int dy = 0; dy < D1D; ++dy) -- { -- for (int dx = 0; dx < D1D; ++dx) -- { -- Y(dx, dy, e) = 0.0; -- } -- } -- } -- -- constexpr int max_D1D = MAX_D1D; -- constexpr int max_Q1D = MAX_Q1D; -- double sol_xy[max_Q1D][max_Q1D]; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- sol_xy[qy][qx] = 0.0; -- } -- } -- for (int dy = 0; dy < D1D; ++dy) -- { -- double sol_x[max_Q1D]; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- sol_x[qy] = 0.0; -- } -- for (int dx = 0; dx < D1D; ++dx) -- { -- const double s = X(dx,dy,e); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- sol_x[qx] += B(qx,dx)* s; -- } -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double d2q = B(qy,dy); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- sol_xy[qy][qx] += d2q * sol_x[qx]; -- } -- } -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- sol_xy[qy][qx] *= D(qx,qy,e); -- } -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- double sol_x[max_D1D]; -- for (int dx = 0; dx < D1D; ++dx) -- { -- sol_x[dx] = 0.0; -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double s = sol_xy[qy][qx]; -- for (int dx = 0; dx < D1D; ++dx) -- { -- sol_x[dx] += Bt(dx,qx) * s; -- } -- } -- for (int dy = 0; dy < D1D; ++dy) -- { -- const double q2d = Bt(dy,qy); -- for (int dx = 0; dx < D1D; ++dx) -- { -- Y(dx,dy,e) += q2d * sol_x[dx]; -- } -- } -- } --} -- --template --MFEM_HOST_DEVICE inline --void SmemPAMassApply2D_Element(const int e, -- const int NE, -- const double *b_, -- const double *d_, -- const double *x_, -- double *y_, -- int d1d = 0, -- int q1d = 0) --{ -- const int D1D = T_D1D ? T_D1D : d1d; -- const int Q1D = T_Q1D ? T_Q1D : q1d; -- constexpr int NBZ = T_NBZ ? T_NBZ : 1; -- -- constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -- constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -- constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1; -- -- auto b = ConstDeviceMatrix(b_, Q1D, D1D); -- auto D = ConstDeviceCube(d_, Q1D, Q1D, NE); -- auto x = ConstDeviceCube(x_, D1D, D1D, NE); -- auto Y = DeviceCube(y_, D1D, D1D, NE); -- -- const int tidz = MFEM_THREAD_ID(z); -- -- MFEM_SHARED double BBt[MQ1*MD1]; -- double (*B)[MD1] = (double (*)[MD1]) BBt; -- double (*Bt)[MQ1] = (double (*)[MQ1]) BBt; -- MFEM_SHARED double sm0[NBZ][MDQ*MDQ]; -- MFEM_SHARED double sm1[NBZ][MDQ*MDQ]; -- double (*X)[MD1] = (double (*)[MD1]) (sm0 + tidz); -- double (*DQ)[MQ1] = (double (*)[MQ1]) (sm1 + tidz); -- double (*QQ)[MQ1] = (double (*)[MQ1]) (sm0 + tidz); -- double (*QD)[MD1] = (double (*)[MD1]) (sm1 + tidz); -- -- -- MFEM_FOREACH_THREAD(dy,y,D1D) -- { -- MFEM_FOREACH_THREAD(dx,x,D1D) -- { -- X[dy][dx] = x(dx,dy,e); -- } -- } -- if (tidz == 0) -- { -- MFEM_FOREACH_THREAD(dy,y,D1D) -- { -- MFEM_FOREACH_THREAD(q,x,Q1D) -- { -- B[q][dy] = b(q,dy); -- } -- } -- } -- MFEM_SYNC_THREAD; -- MFEM_FOREACH_THREAD(dy,y,D1D) -- { -- MFEM_FOREACH_THREAD(qx,x,Q1D) -- { -- double dq = 0.0; -- for (int dx = 0; dx < D1D; ++dx) -- { -- dq += X[dy][dx] * B[qx][dx]; -- } -- DQ[dy][qx] = dq; -- } -- } -- MFEM_SYNC_THREAD; -- MFEM_FOREACH_THREAD(qy,y,Q1D) -- { -- MFEM_FOREACH_THREAD(qx,x,Q1D) -- { -- double qq = 0.0; -- for (int dy = 0; dy < D1D; ++dy) -- { -- qq += DQ[dy][qx] * B[qy][dy]; -- } -- QQ[qy][qx] = qq * D(qx, qy, e); -- } -- } -- MFEM_SYNC_THREAD; -- if (tidz == 0) -- { -- MFEM_FOREACH_THREAD(dy,y,D1D) -- { -- MFEM_FOREACH_THREAD(q,x,Q1D) -- { -- Bt[dy][q] = b(q,dy); -- } -- } -- } -- MFEM_SYNC_THREAD; -- MFEM_FOREACH_THREAD(qy,y,Q1D) -- { -- MFEM_FOREACH_THREAD(dx,x,D1D) -- { -- double dq = 0.0; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- dq += QQ[qy][qx] * Bt[dx][qx]; -- } -- QD[qy][dx] = dq; -- } -- } -- MFEM_SYNC_THREAD; -- MFEM_FOREACH_THREAD(dy,y,D1D) -- { -- MFEM_FOREACH_THREAD(dx,x,D1D) -- { -- double dd = 0.0; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- dd += (QD[qy][dx] * Bt[dy][qy]); -- } -- if (ACCUMULATE) -- { -- Y(dx, dy, e) += dd; -- } -- else -- { -- Y(dx, dy, e) = dd; -- } -- } -- } --} -- --template --MFEM_HOST_DEVICE inline --void PAMassApply3D_Element(const int e, -- const int NE, -- const double *b_, -- const double *bt_, -- const double *d_, -- const double *x_, -- double *y_, -- const int d1d, -- const int q1d) --{ -- const int D1D = d1d; -- const int Q1D = q1d; -- auto B = ConstDeviceMatrix(b_, Q1D, D1D); -- auto Bt = ConstDeviceMatrix(bt_, D1D, Q1D); -- auto D = DeviceTensor<4,const double>(d_, Q1D, Q1D, Q1D, NE); -- auto X = DeviceTensor<4,const double>(x_, D1D, D1D, D1D, NE); -- auto Y = DeviceTensor<4,double>(y_, D1D, D1D, D1D, NE); -- -- if (!ACCUMULATE) -- { -- for (int dz = 0; dz < D1D; ++dz) -- { -- for (int dy = 0; dy < D1D; ++dy) -- { -- for (int dx = 0; dx < D1D; ++dx) -- { -- Y(dx, dy, dz, e) = 0.0; -- } -- } -- } -- } -- -- constexpr int max_D1D = MAX_D1D; -- constexpr int max_Q1D = MAX_Q1D; -- double sol_xyz[max_Q1D][max_Q1D][max_Q1D]; -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- sol_xyz[qz][qy][qx] = 0.0; -- } -- } -- } -- for (int dz = 0; dz < D1D; ++dz) -- { -- double sol_xy[max_Q1D][max_Q1D]; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- sol_xy[qy][qx] = 0.0; -- } -- } -- for (int dy = 0; dy < D1D; ++dy) -- { -- double sol_x[max_Q1D]; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- sol_x[qx] = 0; -- } -- for (int dx = 0; dx < D1D; ++dx) -- { -- const double s = X(dx,dy,dz,e); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- sol_x[qx] += B(qx,dx) * s; -- } -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = B(qy,dy); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- sol_xy[qy][qx] += wy * sol_x[qx]; -- } -- } -- } -- for (int qz = 0; qz < Q1D; ++qz) -- { -- const double wz = B(qz,dz); -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- sol_xyz[qz][qy][qx] += wz * sol_xy[qy][qx]; -- } -- } -- } -- } -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- sol_xyz[qz][qy][qx] *= D(qx,qy,qz,e); -- } -- } -- } -- for (int qz = 0; qz < Q1D; ++qz) -- { -- double sol_xy[max_D1D][max_D1D]; -- for (int dy = 0; dy < D1D; ++dy) -- { -- for (int dx = 0; dx < D1D; ++dx) -- { -- sol_xy[dy][dx] = 0; -- } -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- double sol_x[max_D1D]; -- for (int dx = 0; dx < D1D; ++dx) -- { -- sol_x[dx] = 0; -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double s = sol_xyz[qz][qy][qx]; -- for (int dx = 0; dx < D1D; ++dx) -- { -- sol_x[dx] += Bt(dx,qx) * s; -- } -- } -- for (int dy = 0; dy < D1D; ++dy) -- { -- const double wy = Bt(dy,qy); -- for (int dx = 0; dx < D1D; ++dx) -- { -- sol_xy[dy][dx] += wy * sol_x[dx]; -- } -- } -- } -- for (int dz = 0; dz < D1D; ++dz) -- { -- const double wz = Bt(dz,qz); -- for (int dy = 0; dy < D1D; ++dy) -- { -- for (int dx = 0; dx < D1D; ++dx) -- { -- Y(dx,dy,dz,e) += wz * sol_xy[dy][dx]; -- } -- } -- } -- } --} -- --template --MFEM_HOST_DEVICE inline --void SmemPAMassApply3D_Element(const int e, -- const int NE, -- const double *b_, -- const double *d_, -- const double *x_, -- double *y_, -- const int d1d = 0, -- const int q1d = 0) --{ -- constexpr int D1D = T_D1D ? T_D1D : d1d; -- constexpr int Q1D = T_Q1D ? T_Q1D : q1d; -- constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -- constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -- constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1; -- -- auto b = ConstDeviceMatrix(b_, Q1D, D1D); -- auto d = DeviceTensor<4,const double>(d_, Q1D, Q1D, Q1D, NE); -- auto x = DeviceTensor<4,const double>(x_, D1D, D1D, D1D, NE); -- auto y = DeviceTensor<4,double>(y_, D1D, D1D, D1D, NE); -- -- MFEM_SHARED double sDQ[MQ1*MD1]; -- double (*B)[MD1] = (double (*)[MD1]) sDQ; -- double (*Bt)[MQ1] = (double (*)[MQ1]) sDQ; -- MFEM_SHARED double sm0[MDQ*MDQ*MDQ]; -- MFEM_SHARED double sm1[MDQ*MDQ*MDQ]; -- double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) sm0; -- double (*DDQ)[MD1][MQ1] = (double (*)[MD1][MQ1]) sm1; -- double (*DQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm0; -- double (*QQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm1; -- double (*QQD)[MQ1][MD1] = (double (*)[MQ1][MD1]) sm0; -- double (*QDD)[MD1][MD1] = (double (*)[MD1][MD1]) sm1; -- MFEM_FOREACH_THREAD(dy,y,D1D) -- { -- MFEM_FOREACH_THREAD(dx,x,D1D) -- { -- MFEM_UNROLL(MD1) -- for (int dz = 0; dz < D1D; ++dz) -- { -- X[dz][dy][dx] = x(dx,dy,dz,e); -- } -- } -- MFEM_FOREACH_THREAD(dx,x,Q1D) -- { -- B[dx][dy] = b(dx,dy); -- } -- } -- MFEM_SYNC_THREAD; -- MFEM_FOREACH_THREAD(dy,y,D1D) -- { -- MFEM_FOREACH_THREAD(qx,x,Q1D) -- { -- double u[D1D]; -- MFEM_UNROLL(MD1) -- for (int dz = 0; dz < D1D; dz++) -- { -- u[dz] = 0; -- } -- MFEM_UNROLL(MD1) -- for (int dx = 0; dx < D1D; ++dx) -- { -- MFEM_UNROLL(MD1) -- for (int dz = 0; dz < D1D; ++dz) -- { -- u[dz] += X[dz][dy][dx] * B[qx][dx]; -- } -- } -- MFEM_UNROLL(MD1) -- for (int dz = 0; dz < D1D; ++dz) -- { -- DDQ[dz][dy][qx] = u[dz]; -- } -- } -- } -- MFEM_SYNC_THREAD; -- MFEM_FOREACH_THREAD(qy,y,Q1D) -- { -- MFEM_FOREACH_THREAD(qx,x,Q1D) -- { -- double u[D1D]; -- MFEM_UNROLL(MD1) -- for (int dz = 0; dz < D1D; dz++) -- { -- u[dz] = 0; -- } -- MFEM_UNROLL(MD1) -- for (int dy = 0; dy < D1D; ++dy) -- { -- MFEM_UNROLL(MD1) -- for (int dz = 0; dz < D1D; dz++) -- { -- u[dz] += DDQ[dz][dy][qx] * B[qy][dy]; -- } -- } -- MFEM_UNROLL(MD1) -- for (int dz = 0; dz < D1D; dz++) -- { -- DQQ[dz][qy][qx] = u[dz]; -- } -- } -- } -- MFEM_SYNC_THREAD; -- MFEM_FOREACH_THREAD(qy,y,Q1D) -- { -- MFEM_FOREACH_THREAD(qx,x,Q1D) -- { -- double u[Q1D]; -- MFEM_UNROLL(MQ1) -- for (int qz = 0; qz < Q1D; qz++) -- { -- u[qz] = 0; -- } -- MFEM_UNROLL(MD1) -- for (int dz = 0; dz < D1D; ++dz) -- { -- MFEM_UNROLL(MQ1) -- for (int qz = 0; qz < Q1D; qz++) -- { -- u[qz] += DQQ[dz][qy][qx] * B[qz][dz]; -- } -- } -- MFEM_UNROLL(MQ1) -- for (int qz = 0; qz < Q1D; qz++) -- { -- QQQ[qz][qy][qx] = u[qz] * d(qx,qy,qz,e); -- } -- } -- } -- MFEM_SYNC_THREAD; -- MFEM_FOREACH_THREAD(di,y,D1D) -- { -- MFEM_FOREACH_THREAD(q,x,Q1D) -- { -- Bt[di][q] = b(q,di); -- } -- } -- MFEM_SYNC_THREAD; -- MFEM_FOREACH_THREAD(qy,y,Q1D) -- { -- MFEM_FOREACH_THREAD(dx,x,D1D) -- { -- double u[Q1D]; -- MFEM_UNROLL(MQ1) -- for (int qz = 0; qz < Q1D; ++qz) -- { -- u[qz] = 0; -- } -- MFEM_UNROLL(MQ1) -- for (int qx = 0; qx < Q1D; ++qx) -- { -- MFEM_UNROLL(MQ1) -- for (int qz = 0; qz < Q1D; ++qz) -- { -- u[qz] += QQQ[qz][qy][qx] * Bt[dx][qx]; -- } -- } -- MFEM_UNROLL(MQ1) -- for (int qz = 0; qz < Q1D; ++qz) -- { -- QQD[qz][qy][dx] = u[qz]; -- } -- } -- } -- MFEM_SYNC_THREAD; -- MFEM_FOREACH_THREAD(dy,y,D1D) -- { -- MFEM_FOREACH_THREAD(dx,x,D1D) -- { -- double u[Q1D]; -- MFEM_UNROLL(MQ1) -- for (int qz = 0; qz < Q1D; ++qz) -- { -- u[qz] = 0; -- } -- MFEM_UNROLL(MQ1) -- for (int qy = 0; qy < Q1D; ++qy) -- { -- MFEM_UNROLL(MQ1) -- for (int qz = 0; qz < Q1D; ++qz) -- { -- u[qz] += QQD[qz][qy][dx] * Bt[dy][qy]; -- } -- } -- MFEM_UNROLL(MQ1) -- for (int qz = 0; qz < Q1D; ++qz) -- { -- QDD[qz][dy][dx] = u[qz]; -- } -- } -- } -- MFEM_SYNC_THREAD; -- MFEM_FOREACH_THREAD(dy,y,D1D) -- { -- MFEM_FOREACH_THREAD(dx,x,D1D) -- { -- double u[D1D]; -- MFEM_UNROLL(MD1) -- for (int dz = 0; dz < D1D; ++dz) -- { -- u[dz] = 0; -- } -- MFEM_UNROLL(MQ1) -- for (int qz = 0; qz < Q1D; ++qz) -- { -- MFEM_UNROLL(MD1) -- for (int dz = 0; dz < D1D; ++dz) -- { -- u[dz] += QDD[qz][dy][dx] * Bt[dz][qz]; -- } -- } -- MFEM_UNROLL(MD1) -- for (int dz = 0; dz < D1D; ++dz) -- { -- if (ACCUMULATE) -- { -- y(dx,dy,dz,e) += u[dz]; -- } -- else -- { -- y(dx,dy,dz,e) = u[dz]; -- } -- } -- } -- } -- MFEM_SYNC_THREAD; --} -- --} // namespace internal -- --} // namespace mfem -- --#endif -diff --git a/fem/bilininteg_transpose_ea.cpp b/fem/bilininteg_transpose_ea.cpp -deleted file mode 100644 -index bea53f1b3..000000000 ---- a/fem/bilininteg_transpose_ea.cpp -+++ /dev/null -@@ -1,186 +0,0 @@ --// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced --// at the Lawrence Livermore National Laboratory. All Rights reserved. See files --// LICENSE and NOTICE for details. LLNL-CODE-806117. --// --// This file is part of the MFEM library. For more information and source code --// availability visit https://mfem.org. --// --// MFEM is free software; you can redistribute it and/or modify it under the --// terms of the BSD-3 license. We welcome feedback and contributions, see file --// CONTRIBUTING.md for details. -- --#include "../general/forall.hpp" --#include "bilininteg.hpp" -- --namespace mfem --{ -- --void TransposeIntegrator::AssembleEA(const FiniteElementSpace &fes, -- Vector &ea_data, const bool add) --{ -- if (add) -- { -- Vector ea_data_tmp(ea_data.Size()); -- bfi->AssembleEA(fes, ea_data_tmp, false); -- const int ne = fes.GetNE(); -- if (ne == 0) { return; } -- const int dofs = fes.GetFE(0)->GetDof(); -- auto A = Reshape(ea_data_tmp.Read(), dofs, dofs, ne); -- auto AT = Reshape(ea_data.ReadWrite(), dofs, dofs, ne); -- mfem::forall(ne, [=] MFEM_HOST_DEVICE (int e) -- { -- for (int i = 0; i < dofs; i++) -- { -- for (int j = 0; j < dofs; j++) -- { -- const double a = A(i, j, e); -- AT(j, i, e) += a; -- } -- } -- }); -- } -- else -- { -- bfi->AssembleEA(fes, ea_data, false); -- const int ne = fes.GetNE(); -- if (ne == 0) { return; } -- const int dofs = fes.GetFE(0)->GetDof(); -- auto A = Reshape(ea_data.ReadWrite(), dofs, dofs, ne); -- mfem::forall(ne, [=] MFEM_HOST_DEVICE (int e) -- { -- for (int i = 0; i < dofs; i++) -- { -- for (int j = i+1; j < dofs; j++) -- { -- const double aij = A(i, j, e); -- const double aji = A(j, i, e); -- A(j, i, e) = aij; -- A(i, j, e) = aji; -- } -- } -- }); -- } --} -- --void TransposeIntegrator::AssembleEAInteriorFaces(const FiniteElementSpace& fes, -- Vector &ea_data_int, -- Vector &ea_data_ext, -- const bool add) --{ -- const int nf = fes.GetNFbyType(FaceType::Interior); -- if (nf == 0) { return; } -- if (add) -- { -- Vector ea_data_int_tmp(ea_data_int.Size()); -- Vector ea_data_ext_tmp(ea_data_ext.Size()); -- bfi->AssembleEAInteriorFaces(fes, ea_data_int_tmp, ea_data_ext_tmp, false); -- const int faceDofs = fes.GetTraceElement(0, -- fes.GetMesh()->GetFaceGeometry(0))->GetDof(); -- auto A_int = Reshape(ea_data_int_tmp.Read(), faceDofs, faceDofs, 2, nf); -- auto A_ext = Reshape(ea_data_ext_tmp.Read(), faceDofs, faceDofs, 2, nf); -- auto AT_int = Reshape(ea_data_int.ReadWrite(), faceDofs, faceDofs, 2, nf); -- auto AT_ext = Reshape(ea_data_ext.ReadWrite(), faceDofs, faceDofs, 2, nf); -- mfem::forall(nf, [=] MFEM_HOST_DEVICE (int f) -- { -- for (int i = 0; i < faceDofs; i++) -- { -- for (int j = 0; j < faceDofs; j++) -- { -- const double a_int0 = A_int(i, j, 0, f); -- const double a_int1 = A_int(i, j, 1, f); -- const double a_ext0 = A_ext(i, j, 0, f); -- const double a_ext1 = A_ext(i, j, 1, f); -- AT_int(j, i, 0, f) += a_int0; -- AT_int(j, i, 1, f) += a_int1; -- AT_ext(j, i, 0, f) += a_ext1; -- AT_ext(j, i, 1, f) += a_ext0; -- } -- } -- }); -- } -- else -- { -- bfi->AssembleEAInteriorFaces(fes, ea_data_int, ea_data_ext, false); -- const int faceDofs = fes.GetTraceElement(0, -- fes.GetMesh()->GetFaceGeometry(0))->GetDof(); -- auto A_int = Reshape(ea_data_int.ReadWrite(), faceDofs, faceDofs, 2, nf); -- auto A_ext = Reshape(ea_data_ext.ReadWrite(), faceDofs, faceDofs, 2, nf); -- mfem::forall(nf, [=] MFEM_HOST_DEVICE (int f) -- { -- for (int i = 0; i < faceDofs; i++) -- { -- for (int j = i+1; j < faceDofs; j++) -- { -- const double aij_int0 = A_int(i, j, 0, f); -- const double aij_int1 = A_int(i, j, 1, f); -- const double aji_int0 = A_int(j, i, 0, f); -- const double aji_int1 = A_int(j, i, 1, f); -- A_int(j, i, 0, f) = aij_int0; -- A_int(j, i, 1, f) = aij_int1; -- A_int(i, j, 0, f) = aji_int0; -- A_int(i, j, 1, f) = aji_int1; -- } -- } -- for (int i = 0; i < faceDofs; i++) -- { -- for (int j = 0; j < faceDofs; j++) -- { -- const double aij_ext0 = A_ext(i, j, 0, f); -- const double aji_ext1 = A_ext(j, i, 1, f); -- A_ext(j, i, 1, f) = aij_ext0; -- A_ext(i, j, 0, f) = aji_ext1; -- } -- } -- }); -- } --} -- --void TransposeIntegrator::AssembleEABoundaryFaces(const FiniteElementSpace& fes, -- Vector &ea_data_bdr, -- const bool add) --{ -- const int nf = fes.GetNFbyType(FaceType::Boundary); -- if (nf == 0) { return; } -- if (add) -- { -- Vector ea_data_bdr_tmp(ea_data_bdr.Size()); -- bfi->AssembleEABoundaryFaces(fes, ea_data_bdr_tmp, false); -- const int faceDofs = fes.GetTraceElement(0, -- fes.GetMesh()->GetFaceGeometry(0))->GetDof(); -- auto A_bdr = Reshape(ea_data_bdr_tmp.Read(), faceDofs, faceDofs, nf); -- auto AT_bdr = Reshape(ea_data_bdr.ReadWrite(), faceDofs, faceDofs, nf); -- mfem::forall(nf, [=] MFEM_HOST_DEVICE (int f) -- { -- for (int i = 0; i < faceDofs; i++) -- { -- for (int j = 0; j < faceDofs; j++) -- { -- const double a_bdr = A_bdr(i, j, f); -- AT_bdr(j, i, f) += a_bdr; -- } -- } -- }); -- } -- else -- { -- bfi->AssembleEABoundaryFaces(fes, ea_data_bdr, false); -- const int faceDofs = fes.GetTraceElement(0, -- fes.GetMesh()->GetFaceGeometry(0))->GetDof(); -- auto A_bdr = Reshape(ea_data_bdr.ReadWrite(), faceDofs, faceDofs, nf); -- mfem::forall(nf, [=] MFEM_HOST_DEVICE (int f) -- { -- for (int i = 0; i < faceDofs; i++) -- { -- for (int j = i+1; j < faceDofs; j++) -- { -- const double aij_bdr = A_bdr(i, j, f); -- const double aji_bdr = A_bdr(j, i, f); -- A_bdr(j, i, f) = aij_bdr; -- A_bdr(i, j, f) = aji_bdr; -- } -- } -- }); -- } --} -- --} -diff --git a/fem/bilininteg_vectorfe.cpp b/fem/bilininteg_vectorfe.cpp -deleted file mode 100644 -index 4c2180cf5..000000000 ---- a/fem/bilininteg_vectorfe.cpp -+++ /dev/null -@@ -1,1144 +0,0 @@ --// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced --// at the Lawrence Livermore National Laboratory. All Rights reserved. See files --// LICENSE and NOTICE for details. LLNL-CODE-806117. --// --// This file is part of the MFEM library. For more information and source code --// availability visit https://mfem.org. --// --// MFEM is free software; you can redistribute it and/or modify it under the --// terms of the BSD-3 license. We welcome feedback and contributions, see file --// CONTRIBUTING.md for details. -- --#include "../general/forall.hpp" --#include "bilininteg.hpp" --#include "qspace.hpp" --#include "gridfunc.hpp" -- --namespace mfem --{ -- --void PADiffusionSetup3D(const int Q1D, -- const int coeffDim, -- const int NE, -- const Array &w, -- const Vector &j, -- const Vector &coeff_, -- Vector &op); -- --void PAHcurlMassAssembleDiagonal2D(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &bo, -- const Array &bc, -- const Vector &pa_data, -- Vector &diag); -- --void PAHcurlMassAssembleDiagonal3D(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &bo, -- const Array &bc, -- const Vector &pa_data, -- Vector &diag); -- --template --void SmemPAHcurlMassAssembleDiagonal3D(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &bo, -- const Array &bc, -- const Vector &pa_data, -- Vector &diag); -- --void PAHcurlMassApply2D(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &bo, -- const Array &bc, -- const Array &bot, -- const Array &bct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y); -- --void PAHcurlMassApply3D(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &bo, -- const Array &bc, -- const Array &bot, -- const Array &bct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y); -- --template --void SmemPAHcurlMassApply3D(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &bo, -- const Array &bc, -- const Array &bot, -- const Array &bct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y); -- --void PAHdivSetup2D(const int Q1D, -- const int coeffDim, -- const int NE, -- const Array &w, -- const Vector &j, -- Vector &coeff_, -- Vector &op); -- --void PAHdivSetup3D(const int Q1D, -- const int coeffDim, -- const int NE, -- const Array &w, -- const Vector &j, -- Vector &coeff_, -- Vector &op); -- --void PAHcurlH1Apply2D(const int D1D, -- const int Q1D, -- const int NE, -- const Array &bc, -- const Array &gc, -- const Array &bot, -- const Array &bct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y); -- --void PAHcurlH1ApplyTranspose2D(const int D1D, -- const int Q1D, -- const int NE, -- const Array &bc, -- const Array &bo, -- const Array &bct, -- const Array &gct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y); -- --void PAHcurlH1Apply3D(const int D1D, -- const int Q1D, -- const int NE, -- const Array &bc, -- const Array &gc, -- const Array &bot, -- const Array &bct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y); -- --void PAHcurlH1ApplyTranspose3D(const int D1D, -- const int Q1D, -- const int NE, -- const Array &bc, -- const Array &bo, -- const Array &bct, -- const Array &gct, -- const Vector &pa_data, -- const Vector &x, -- Vector &y); -- --void PAHdivMassAssembleDiagonal2D(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &Bo_, -- const Array &Bc_, -- const Vector &op_, -- Vector &diag_); -- --void PAHdivMassAssembleDiagonal3D(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &Bo_, -- const Array &Bc_, -- const Vector &op_, -- Vector &diag_); -- --void PAHdivMassApply(const int dim, -- const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &Bo, -- const Array &Bc, -- const Array &Bot, -- const Array &Bct, -- const Vector &op, -- const Vector &x, -- Vector &y); -- --void PAHcurlL2Setup(const int NQ, -- const int coeffDim, -- const int NE, -- const Array &w, -- Vector &coeff_, -- Vector &op); -- --// PA H(curl) x H(div) mass assemble 3D kernel, with factor --// dF^{-1} C dF for a vector or matrix coefficient C. --// If transpose, use dF^T C dF^{-T} for H(div) x H(curl). --void PAHcurlHdivSetup3D(const int Q1D, -- const int coeffDim, -- const int NE, -- const bool transpose, -- const Array &w_, -- const Vector &j, -- Vector &coeff_, -- Vector &op) --{ -- const bool symmetric = (coeffDim != 9); -- auto W = Reshape(w_.Read(), Q1D, Q1D, Q1D); -- auto J = Reshape(j.Read(), Q1D, Q1D, Q1D, 3, 3, NE); -- auto coeff = Reshape(coeff_.Read(), coeffDim, Q1D, Q1D, Q1D, NE); -- auto y = Reshape(op.Write(), 9, Q1D, Q1D, Q1D, NE); -- -- const int i11 = 0; -- const int i12 = transpose ? 3 : 1; -- const int i13 = transpose ? 6 : 2; -- const int i21 = transpose ? 1 : 3; -- const int i22 = 4; -- const int i23 = transpose ? 7 : 5; -- const int i31 = transpose ? 2 : 6; -- const int i32 = transpose ? 5 : 7; -- const int i33 = 8; -- -- mfem::forall_3D(NE, Q1D, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e) -- { -- MFEM_FOREACH_THREAD(qx,x,Q1D) -- { -- MFEM_FOREACH_THREAD(qy,y,Q1D) -- { -- MFEM_FOREACH_THREAD(qz,z,Q1D) -- { -- const double J11 = J(qx,qy,qz,0,0,e); -- const double J21 = J(qx,qy,qz,1,0,e); -- const double J31 = J(qx,qy,qz,2,0,e); -- const double J12 = J(qx,qy,qz,0,1,e); -- const double J22 = J(qx,qy,qz,1,1,e); -- const double J32 = J(qx,qy,qz,2,1,e); -- const double J13 = J(qx,qy,qz,0,2,e); -- const double J23 = J(qx,qy,qz,1,2,e); -- const double J33 = J(qx,qy,qz,2,2,e); -- const double detJ = J11 * (J22 * J33 - J32 * J23) - -- J21 * (J12 * J33 - J32 * J13) + -- J31 * (J12 * J23 - J22 * J13); -- const double w_detJ = W(qx,qy,qz) / detJ; -- // adj(J) -- const double A11 = (J22 * J33) - (J23 * J32); -- const double A12 = (J32 * J13) - (J12 * J33); -- const double A13 = (J12 * J23) - (J22 * J13); -- const double A21 = (J31 * J23) - (J21 * J33); -- const double A22 = (J11 * J33) - (J13 * J31); -- const double A23 = (J21 * J13) - (J11 * J23); -- const double A31 = (J21 * J32) - (J31 * J22); -- const double A32 = (J31 * J12) - (J11 * J32); -- const double A33 = (J11 * J22) - (J12 * J21); -- -- if (coeffDim == 6 || coeffDim == 9) // Matrix coefficient version -- { -- // First compute entries of R = M^T J -- const double M11 = (!symmetric) ? coeff(i11,qx,qy,qz,e) : coeff(0,qx,qy,qz,e); -- const double M12 = (!symmetric) ? coeff(i12,qx,qy,qz,e) : coeff(1,qx,qy,qz,e); -- const double M13 = (!symmetric) ? coeff(i13,qx,qy,qz,e) : coeff(2,qx,qy,qz,e); -- const double M21 = (!symmetric) ? coeff(i21,qx,qy,qz,e) : M12; -- const double M22 = (!symmetric) ? coeff(i22,qx,qy,qz,e) : coeff(3,qx,qy,qz,e); -- const double M23 = (!symmetric) ? coeff(i23,qx,qy,qz,e) : coeff(4,qx,qy,qz,e); -- const double M31 = (!symmetric) ? coeff(i31,qx,qy,qz,e) : M13; -- const double M32 = (!symmetric) ? coeff(i32,qx,qy,qz,e) : M23; -- const double M33 = (!symmetric) ? coeff(i33,qx,qy,qz,e) : coeff(5,qx,qy,qz,e); -- -- const double R11 = M11*J11 + M21*J21 + M31*J31; -- const double R12 = M11*J12 + M21*J22 + M31*J32; -- const double R13 = M11*J13 + M21*J23 + M31*J33; -- const double R21 = M12*J11 + M22*J21 + M32*J31; -- const double R22 = M12*J12 + M22*J22 + M32*J32; -- const double R23 = M12*J13 + M22*J23 + M32*J33; -- const double R31 = M13*J11 + M23*J21 + M33*J31; -- const double R32 = M13*J12 + M23*J22 + M33*J32; -- const double R33 = M13*J13 + M23*J23 + M33*J33; -- -- // y = (J^{-1} M^T J)^T -- y(i11,qx,qy,qz,e) = w_detJ * (A11*R11 + A12*R21 + A13*R31); // 1,1 -- y(i21,qx,qy,qz,e) = w_detJ * (A11*R12 + A12*R22 + A13*R32); // 1,2 -- y(i31,qx,qy,qz,e) = w_detJ * (A11*R13 + A12*R23 + A13*R33); // 1,3 -- y(i12,qx,qy,qz,e) = w_detJ * (A21*R11 + A22*R21 + A23*R31); // 2,1 -- y(i22,qx,qy,qz,e) = w_detJ * (A21*R12 + A22*R22 + A23*R32); // 2,2 -- y(i32,qx,qy,qz,e) = w_detJ * (A21*R13 + A22*R23 + A23*R33); // 2,3 -- y(i13,qx,qy,qz,e) = w_detJ * (A31*R11 + A32*R21 + A33*R31); // 3,1 -- y(i23,qx,qy,qz,e) = w_detJ * (A31*R12 + A32*R22 + A33*R32); // 3,2 -- y(i33,qx,qy,qz,e) = w_detJ * (A31*R13 + A32*R23 + A33*R33); // 3,3 -- } -- else if (coeffDim == 3) // Vector coefficient version -- { -- const double D1 = coeff(0,qx,qy,qz,e); -- const double D2 = coeff(1,qx,qy,qz,e); -- const double D3 = coeff(2,qx,qy,qz,e); -- // detJ J^{-1} DJ = adj(J) DJ -- // transpose -- y(i11,qx,qy,qz,e) = w_detJ * (D1*A11*J11 + D2*A12*J21 + D3*A13*J31); // 1,1 -- y(i21,qx,qy,qz,e) = w_detJ * (D1*A11*J12 + D2*A12*J22 + D3*A13*J32); // 1,2 -- y(i31,qx,qy,qz,e) = w_detJ * (D1*A11*J13 + D2*A12*J23 + D3*A13*J33); // 1,3 -- y(i12,qx,qy,qz,e) = w_detJ * (D1*A21*J11 + D2*A22*J21 + D3*A23*J31); // 2,1 -- y(i22,qx,qy,qz,e) = w_detJ * (D1*A21*J12 + D2*A22*J22 + D3*A23*J32); // 2,2 -- y(i32,qx,qy,qz,e) = w_detJ * (D1*A21*J13 + D2*A22*J23 + D3*A23*J33); // 2,3 -- y(i13,qx,qy,qz,e) = w_detJ * (D1*A31*J11 + D2*A32*J21 + D3*A33*J31); // 3,1 -- y(i23,qx,qy,qz,e) = w_detJ * (D1*A31*J12 + D2*A32*J22 + D3*A33*J32); // 3,2 -- y(i33,qx,qy,qz,e) = w_detJ * (D1*A31*J13 + D2*A32*J23 + D3*A33*J33); // 3,3 -- } -- } -- } -- } -- }); --} -- --// PA H(curl) x H(div) mass assemble 2D kernel, with factor --// dF^{-1} C dF for a vector or matrix coefficient C. --// If transpose, use dF^T C dF^{-T} for H(div) x H(curl). --void PAHcurlHdivSetup2D(const int Q1D, -- const int coeffDim, -- const int NE, -- const bool transpose, -- const Array &w_, -- const Vector &j, -- Vector &coeff_, -- Vector &op) --{ -- const bool symmetric = (coeffDim != 4); -- auto W = Reshape(w_.Read(), Q1D, Q1D); -- auto J = Reshape(j.Read(), Q1D, Q1D, 2, 2, NE); -- auto coeff = Reshape(coeff_.Read(), coeffDim, Q1D, Q1D, NE); -- auto y = Reshape(op.Write(), 4, Q1D, Q1D, NE); -- -- const int i11 = 0; -- const int i12 = transpose ? 2 : 1; -- const int i21 = transpose ? 1 : 2; -- const int i22 = 3; -- -- mfem::forall_2D(NE, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e) -- { -- MFEM_FOREACH_THREAD(qx,x,Q1D) -- { -- MFEM_FOREACH_THREAD(qy,y,Q1D) -- { -- const double J11 = J(qx,qy,0,0,e); -- const double J21 = J(qx,qy,1,0,e); -- const double J12 = J(qx,qy,0,1,e); -- const double J22 = J(qx,qy,1,1,e); -- const double w_detJ = W(qx,qy) / ((J11*J22) - (J21*J12)); -- -- if (coeffDim == 3 || coeffDim == 4) // Matrix coefficient version -- { -- // First compute entries of R = MJ -- const double M11 = coeff(i11,qx,qy,e); -- const double M12 = (!symmetric) ? coeff(i12,qx,qy,e) : coeff(1,qx,qy,e); -- const double M21 = (!symmetric) ? coeff(i21,qx,qy,e) : M12; -- const double M22 = (!symmetric) ? coeff(i22,qx,qy,e) : coeff(2,qx,qy,e); -- -- // J^{-1} M^T -- const double R11 = ( J22*M11 - J12*M12); // 1,1 -- const double R12 = ( J22*M21 - J12*M22); // 1,2 -- const double R21 = (-J21*M11 + J11*M12); // 2,1 -- const double R22 = (-J21*M21 + J11*M22); // 2,2 -- -- // (RJ)^T -- y(i11,qx,qy,e) = w_detJ * (R11*J11 + R12*J21); // 1,1 -- y(i21,qx,qy,e) = w_detJ * (R11*J12 + R12*J22); // 1,2 (transpose) -- y(i12,qx,qy,e) = w_detJ * (R21*J11 + R22*J21); // 2,1 (transpose) -- y(i22,qx,qy,e) = w_detJ * (R21*J12 + R22*J22); // 2,2 -- } -- else if (coeffDim == 2) // Vector coefficient version -- { -- const double D1 = coeff(0,qx,qy,e); -- const double D2 = coeff(1,qx,qy,e); -- const double R11 = D1*J11; -- const double R12 = D1*J12; -- const double R21 = D2*J21; -- const double R22 = D2*J22; -- y(i11,qx,qy,e) = w_detJ * ( J22*R11 - J12*R21); // 1,1 -- y(i21,qx,qy,e) = w_detJ * ( J22*R12 - J12*R22); // 1,2 (transpose) -- y(i12,qx,qy,e) = w_detJ * (-J21*R11 + J11*R21); // 2,1 (transpose) -- y(i22,qx,qy,e) = w_detJ * (-J21*R12 + J11*R22); // 2,2 -- } -- } -- } -- }); --} -- --// Mass operator for H(curl) and H(div) functions, using Piola transformations --// u = dF^{-T} \hat{u} in H(curl), v = (1 / det dF) dF \hat{v} in H(div). --void PAHcurlHdivMassApply3D(const int D1D, -- const int D1Dtest, -- const int Q1D, -- const int NE, -- const bool scalarCoeff, -- const bool trialHcurl, -- const bool transpose, -- const Array &Bo_, -- const Array &Bc_, -- const Array &Bot_, -- const Array &Bct_, -- const Vector &op_, -- const Vector &x_, -- Vector &y_) --{ -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -- -- MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -- MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -- constexpr static int VDIM = 3; -- -- auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1); -- auto Bc = Reshape(Bc_.Read(), Q1D, D1D); -- auto Bot = Reshape(Bot_.Read(), D1Dtest-1, Q1D); -- auto Bct = Reshape(Bct_.Read(), D1Dtest, Q1D); -- auto op = Reshape(op_.Read(), scalarCoeff ? 1 : 9, Q1D, Q1D, Q1D, NE); -- auto x = Reshape(x_.Read(), 3*(D1D-1)*D1D*(trialHcurl ? D1D : D1D-1), NE); -- auto y = Reshape(y_.ReadWrite(), 3*(D1Dtest-1)*D1Dtest* -- (trialHcurl ? D1Dtest-1 : D1Dtest), NE); -- -- const int i12 = transpose ? 3 : 1; -- const int i13 = transpose ? 6 : 2; -- const int i21 = transpose ? 1 : 3; -- const int i23 = transpose ? 7 : 5; -- const int i31 = transpose ? 2 : 6; -- const int i32 = transpose ? 5 : 7; -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM]; -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int c = 0; c < VDIM; ++c) -- { -- mass[qz][qy][qx][c] = 0.0; -- } -- } -- } -- } -- -- int osc = 0; -- for (int c = 0; c < VDIM; ++c) // loop over x, y, z trial components -- { -- const int D1Dz = trialHcurl ? ((c == 2) ? D1D - 1 : D1D) : -- ((c == 2) ? D1D : D1D - 1); -- const int D1Dy = trialHcurl ? ((c == 1) ? D1D - 1 : D1D) : -- ((c == 1) ? D1D : D1D - 1); -- const int D1Dx = trialHcurl ? ((c == 0) ? D1D - 1 : D1D) : -- ((c == 0) ? D1D : D1D - 1); -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- double massXY[MAX_Q1D][MAX_Q1D]; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massXY[qy][qx] = 0.0; -- } -- } -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- double massX[MAX_Q1D]; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massX[qx] = 0.0; -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- const double t = x(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massX[qx] += t * (trialHcurl ? ((c == 0) ? Bo(qx,dx) : Bc(qx,dx)) : -- ((c == 0) ? Bc(qx,dx) : Bo(qx,dx))); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = trialHcurl ? ((c == 1) ? Bo(qy,dy) : Bc(qy,dy)) : -- ((c == 1) ? Bc(qy,dy) : Bo(qy,dy)); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = massX[qx]; -- massXY[qy][qx] += wx * wy; -- } -- } -- } -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- const double wz = trialHcurl ? ((c == 2) ? Bo(qz,dz) : Bc(qz,dz)) : -- ((c == 2) ? Bc(qz,dz) : Bo(qz,dz)); -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- mass[qz][qy][qx][c] += massXY[qy][qx] * wz; -- } -- } -- } -- } -- -- osc += D1Dx * D1Dy * D1Dz; -- } // loop (c) over components -- -- // Apply D operator. -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double O11 = op(0,qx,qy,qz,e); -- const double O12 = scalarCoeff ? 0.0 : op(i12,qx,qy,qz,e); -- const double O13 = scalarCoeff ? 0.0 : op(i13,qx,qy,qz,e); -- const double O21 = scalarCoeff ? 0.0 : op(i21,qx,qy,qz,e); -- const double O22 = scalarCoeff ? O11 : op(4,qx,qy,qz,e); -- const double O23 = scalarCoeff ? 0.0 : op(i23,qx,qy,qz,e); -- const double O31 = scalarCoeff ? 0.0 : op(i31,qx,qy,qz,e); -- const double O32 = scalarCoeff ? 0.0 : op(i32,qx,qy,qz,e); -- const double O33 = scalarCoeff ? O11 : op(8,qx,qy,qz,e); -- const double massX = mass[qz][qy][qx][0]; -- const double massY = mass[qz][qy][qx][1]; -- const double massZ = mass[qz][qy][qx][2]; -- mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ); -- mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ); -- mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ); -- } -- } -- } -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- double massXY[HDIV_MAX_D1D][HDIV_MAX_D1D]; -- -- osc = 0; -- for (int c = 0; c < VDIM; ++c) // loop over x, y, z test components -- { -- const int D1Dz = trialHcurl ? ((c == 2) ? D1Dtest : D1Dtest - 1) : -- ((c == 2) ? D1Dtest - 1 : D1Dtest); -- const int D1Dy = trialHcurl ? ((c == 1) ? D1Dtest : D1Dtest - 1) : -- ((c == 1) ? D1Dtest - 1 : D1Dtest); -- const int D1Dx = trialHcurl ? ((c == 0) ? D1Dtest : D1Dtest - 1) : -- ((c == 0) ? D1Dtest - 1 : D1Dtest); -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massXY[dy][dx] = 0.0; -- } -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- double massX[HDIV_MAX_D1D]; -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massX[dx] = 0.0; -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massX[dx] += mass[qz][qy][qx][c] * (trialHcurl ? -- ((c == 0) ? Bct(dx,qx) : Bot(dx,qx)) : -- ((c == 0) ? Bot(dx,qx) : Bct(dx,qx))); -- } -- } -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double wy = trialHcurl ? ((c == 1) ? Bct(dy,qy) : Bot(dy,qy)) : -- ((c == 1) ? Bot(dy,qy) : Bct(dy,qy)); -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massXY[dy][dx] += massX[dx] * wy; -- } -- } -- } -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- const double wz = trialHcurl ? ((c == 2) ? Bct(dz,qz) : Bot(dz,qz)) : -- ((c == 2) ? Bot(dz,qz) : Bct(dz,qz)); -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += -- massXY[dy][dx] * wz; -- } -- } -- } -- -- osc += D1Dx * D1Dy * D1Dz; -- } // loop c -- } // loop qz -- }); // end of element loop --} -- --// Mass operator for H(curl) and H(div) functions, using Piola transformations --// u = dF^{-T} \hat{u} in H(curl), v = (1 / det dF) dF \hat{v} in H(div). --void PAHcurlHdivMassApply2D(const int D1D, -- const int D1Dtest, -- const int Q1D, -- const int NE, -- const bool scalarCoeff, -- const bool trialHcurl, -- const bool transpose, -- const Array &Bo_, -- const Array &Bc_, -- const Array &Bot_, -- const Array &Bct_, -- const Vector &op_, -- const Vector &x_, -- Vector &y_) --{ -- constexpr static int MAX_D1D = HCURL_MAX_D1D; -- constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -- -- MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -- MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -- constexpr static int VDIM = 2; -- -- auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1); -- auto Bc = Reshape(Bc_.Read(), Q1D, D1D); -- auto Bot = Reshape(Bot_.Read(), D1Dtest-1, Q1D); -- auto Bct = Reshape(Bct_.Read(), D1Dtest, Q1D); -- auto op = Reshape(op_.Read(), scalarCoeff ? 1 : 4, Q1D, Q1D, NE); -- auto x = Reshape(x_.Read(), 2*(D1D-1)*D1D, NE); -- auto y = Reshape(y_.ReadWrite(), 2*(D1Dtest-1)*D1Dtest, NE); -- -- const int i12 = transpose ? 2 : 1; -- const int i21 = transpose ? 1 : 2; -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- double mass[MAX_Q1D][MAX_Q1D][VDIM]; -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int c = 0; c < VDIM; ++c) -- { -- mass[qy][qx][c] = 0.0; -- } -- } -- } -- -- int osc = 0; -- for (int c = 0; c < VDIM; ++c) // loop over x, y trial components -- { -- const int D1Dy = trialHcurl ? ((c == 1) ? D1D - 1 : D1D) : -- ((c == 1) ? D1D : D1D - 1); -- const int D1Dx = trialHcurl ? ((c == 0) ? D1D - 1 : D1D) : -- ((c == 0) ? D1D : D1D - 1); -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- double massX[MAX_Q1D]; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massX[qx] = 0.0; -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- const double t = x(dx + (dy * D1Dx) + osc, e); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- massX[qx] += t * (trialHcurl ? ((c == 0) ? Bo(qx,dx) : Bc(qx,dx)) : -- ((c == 0) ? Bc(qx,dx) : Bo(qx,dx))); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = trialHcurl ? ((c == 1) ? Bo(qy,dy) : Bc(qy,dy)) : -- ((c == 1) ? Bc(qy,dy) : Bo(qy,dy)); -- for (int qx = 0; qx < Q1D; ++qx) -- { -- mass[qy][qx][c] += massX[qx] * wy; -- } -- } -- } -- -- osc += D1Dx * D1Dy; -- } // loop (c) over components -- -- // Apply D operator. -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double O11 = op(0,qx,qy,e); -- const double O12 = scalarCoeff ? 0.0 : op(i12,qx,qy,e); -- const double O21 = scalarCoeff ? 0.0 : op(i21,qx,qy,e); -- const double O22 = scalarCoeff ? O11 : op(3,qx,qy,e); -- const double massX = mass[qy][qx][0]; -- const double massY = mass[qy][qx][1]; -- mass[qy][qx][0] = (O11*massX)+(O12*massY); -- mass[qy][qx][1] = (O21*massX)+(O22*massY); -- } -- } -- -- osc = 0; -- for (int c = 0; c < VDIM; ++c) // loop over x, y test components -- { -- const int D1Dy = trialHcurl ? ((c == 1) ? D1Dtest : D1Dtest - 1) : -- ((c == 1) ? D1Dtest - 1 : D1Dtest); -- const int D1Dx = trialHcurl ? ((c == 0) ? D1Dtest : D1Dtest - 1) : -- ((c == 0) ? D1Dtest - 1 : D1Dtest); -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- double massX[HDIV_MAX_D1D]; -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massX[dx] = 0.0; -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- massX[dx] += mass[qy][qx][c] * (trialHcurl ? -- ((c == 0) ? Bct(dx,qx) : Bot(dx,qx)) : -- ((c == 0) ? Bot(dx,qx) : Bct(dx,qx))); -- } -- } -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double wy = trialHcurl ? ((c == 1) ? Bct(dy,qy) : Bot(dy,qy)) : -- ((c == 1) ? Bot(dy,qy) : Bct(dy,qy)); -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy; -- } -- } -- } -- -- osc += D1Dx * D1Dy; -- } // loop c -- }); // end of element loop --} -- --void VectorFEMassIntegrator::AssemblePA(const FiniteElementSpace &fes) --{ -- AssemblePA(fes, fes); --} -- --void VectorFEMassIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, -- const FiniteElementSpace &test_fes) --{ -- // Assumes tensor-product elements -- Mesh *mesh = trial_fes.GetMesh(); -- -- const FiniteElement *trial_fel = trial_fes.GetFE(0); -- const VectorTensorFiniteElement *trial_el = -- dynamic_cast(trial_fel); -- MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!"); -- -- const FiniteElement *test_fel = test_fes.GetFE(0); -- const VectorTensorFiniteElement *test_el = -- dynamic_cast(test_fel); -- MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!"); -- -- const IntegrationRule *ir -- = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el, -- *mesh->GetElementTransformation(0)); -- const int dims = trial_el->GetDim(); -- MFEM_VERIFY(dims == 2 || dims == 3, ""); -- -- const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6 -- nq = ir->GetNPoints(); -- dim = mesh->Dimension(); -- MFEM_VERIFY(dim == 2 || dim == 3, ""); -- -- ne = trial_fes.GetNE(); -- MFEM_VERIFY(ne == test_fes.GetNE(), -- "Different meshes for test and trial spaces"); -- geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS); -- mapsC = &trial_el->GetDofToQuad(*ir, DofToQuad::TENSOR); -- mapsO = &trial_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); -- dofs1D = mapsC->ndof; -- quad1D = mapsC->nqpt; -- -- mapsCtest = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR); -- mapsOtest = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); -- dofs1Dtest = mapsCtest->ndof; -- -- MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, ""); -- -- trial_fetype = trial_el->GetDerivType(); -- test_fetype = test_el->GetDerivType(); -- -- const bool trial_curl = (trial_fetype == mfem::FiniteElement::CURL); -- const bool trial_div = (trial_fetype == mfem::FiniteElement::DIV); -- const bool test_curl = (test_fetype == mfem::FiniteElement::CURL); -- const bool test_div = (test_fetype == mfem::FiniteElement::DIV); -- -- QuadratureSpace qs(*mesh, *ir); -- CoefficientVector coeff(qs, CoefficientStorage::SYMMETRIC); -- if (Q) { coeff.Project(*Q); } -- else if (MQ) { coeff.ProjectTranspose(*MQ); } -- else if (DQ) { coeff.Project(*DQ); } -- else { coeff.SetConstant(1.0); } -- -- const int coeff_dim = coeff.GetVDim(); -- symmetric = (coeff_dim != dim*dim); -- -- if ((trial_curl && test_div) || (trial_div && test_curl)) -- pa_data.SetSize((coeff_dim == 1 ? 1 : dim*dim) * nq * ne, -- Device::GetMemoryType()); -- else -- pa_data.SetSize((symmetric ? symmDims : dims*dims) * nq * ne, -- Device::GetMemoryType()); -- -- if (trial_curl && test_curl && dim == 3) -- { -- PADiffusionSetup3D(quad1D, coeff_dim, ne, ir->GetWeights(), geom->J, -- coeff, pa_data); -- } -- else if (trial_curl && test_curl && dim == 2) -- { -- PADiffusionSetup2D<2>(quad1D, coeff_dim, ne, ir->GetWeights(), geom->J, -- coeff, pa_data); -- } -- else if (trial_div && test_div && dim == 3) -- { -- PAHdivSetup3D(quad1D, coeff_dim, ne, ir->GetWeights(), geom->J, -- coeff, pa_data); -- } -- else if (trial_div && test_div && dim == 2) -- { -- PAHdivSetup2D(quad1D, coeff_dim, ne, ir->GetWeights(), geom->J, -- coeff, pa_data); -- } -- else if (((trial_curl && test_div) || (trial_div && test_curl)) && -- test_fel->GetOrder() == trial_fel->GetOrder()) -- { -- if (coeff_dim == 1) -- { -- PAHcurlL2Setup(nq, coeff_dim, ne, ir->GetWeights(), coeff, pa_data); -- } -- else -- { -- const bool tr = (trial_div && test_curl); -- if (dim == 3) -- PAHcurlHdivSetup3D(quad1D, coeff_dim, ne, tr, ir->GetWeights(), -- geom->J, coeff, pa_data); -- else -- PAHcurlHdivSetup2D(quad1D, coeff_dim, ne, tr, ir->GetWeights(), -- geom->J, coeff, pa_data); -- } -- } -- else -- { -- MFEM_ABORT("Unknown kernel."); -- } --} -- --void VectorFEMassIntegrator::AssembleDiagonalPA(Vector& diag) --{ -- if (dim == 3) -- { -- if (trial_fetype == mfem::FiniteElement::CURL && test_fetype == trial_fetype) -- { -- if (Device::Allows(Backend::DEVICE_MASK)) -- { -- const int ID = (dofs1D << 4) | quad1D; -- switch (ID) -- { -- case 0x23: return SmemPAHcurlMassAssembleDiagonal3D<2,3>(dofs1D, quad1D, ne, -- symmetric, -- mapsO->B, mapsC->B, pa_data, diag); -- case 0x34: return SmemPAHcurlMassAssembleDiagonal3D<3,4>(dofs1D, quad1D, ne, -- symmetric, -- mapsO->B, mapsC->B, pa_data, diag); -- case 0x45: return SmemPAHcurlMassAssembleDiagonal3D<4,5>(dofs1D, quad1D, ne, -- symmetric, -- mapsO->B, mapsC->B, pa_data, diag); -- case 0x56: return SmemPAHcurlMassAssembleDiagonal3D<5,6>(dofs1D, quad1D, ne, -- symmetric, -- mapsO->B, mapsC->B, pa_data, diag); -- default: return SmemPAHcurlMassAssembleDiagonal3D(dofs1D, quad1D, ne, symmetric, -- mapsO->B, mapsC->B, pa_data, diag); -- } -- } -- else -- PAHcurlMassAssembleDiagonal3D(dofs1D, quad1D, ne, symmetric, -- mapsO->B, mapsC->B, pa_data, diag); -- } -- else if (trial_fetype == mfem::FiniteElement::DIV && -- test_fetype == trial_fetype) -- { -- PAHdivMassAssembleDiagonal3D(dofs1D, quad1D, ne, symmetric, -- mapsO->B, mapsC->B, pa_data, diag); -- } -- else -- { -- MFEM_ABORT("Unknown kernel."); -- } -- } -- else // 2D -- { -- if (trial_fetype == mfem::FiniteElement::CURL && test_fetype == trial_fetype) -- { -- PAHcurlMassAssembleDiagonal2D(dofs1D, quad1D, ne, symmetric, -- mapsO->B, mapsC->B, pa_data, diag); -- } -- else if (trial_fetype == mfem::FiniteElement::DIV && -- test_fetype == trial_fetype) -- { -- PAHdivMassAssembleDiagonal2D(dofs1D, quad1D, ne, symmetric, -- mapsO->B, mapsC->B, pa_data, diag); -- } -- else -- { -- MFEM_ABORT("Unknown kernel."); -- } -- } --} -- --void VectorFEMassIntegrator::AddMultPA(const Vector &x, Vector &y) const --{ -- const bool trial_curl = (trial_fetype == mfem::FiniteElement::CURL); -- const bool trial_div = (trial_fetype == mfem::FiniteElement::DIV); -- const bool test_curl = (test_fetype == mfem::FiniteElement::CURL); -- const bool test_div = (test_fetype == mfem::FiniteElement::DIV); -- -- if (dim == 3) -- { -- if (trial_curl && test_curl) -- { -- if (Device::Allows(Backend::DEVICE_MASK)) -- { -- const int ID = (dofs1D << 4) | quad1D; -- switch (ID) -- { -- case 0x23: return SmemPAHcurlMassApply3D<2,3>(dofs1D, quad1D, ne, symmetric, -- mapsO->B, -- mapsC->B, mapsO->Bt, -- mapsC->Bt, pa_data, x, y); -- case 0x34: return SmemPAHcurlMassApply3D<3,4>(dofs1D, quad1D, ne, symmetric, -- mapsO->B, -- mapsC->B, mapsO->Bt, -- mapsC->Bt, pa_data, x, y); -- case 0x45: return SmemPAHcurlMassApply3D<4,5>(dofs1D, quad1D, ne, symmetric, -- mapsO->B, -- mapsC->B, mapsO->Bt, -- mapsC->Bt, pa_data, x, y); -- case 0x56: return SmemPAHcurlMassApply3D<5,6>(dofs1D, quad1D, ne, symmetric, -- mapsO->B, -- mapsC->B, mapsO->Bt, -- mapsC->Bt, pa_data, x, y); -- default: return SmemPAHcurlMassApply3D(dofs1D, quad1D, ne, symmetric, mapsO->B, -- mapsC->B, -- mapsO->Bt, mapsC->Bt, pa_data, x, y); -- } -- } -- else -- PAHcurlMassApply3D(dofs1D, quad1D, ne, symmetric, mapsO->B, mapsC->B, mapsO->Bt, -- mapsC->Bt, pa_data, x, y); -- } -- else if (trial_div && test_div) -- { -- PAHdivMassApply(3, dofs1D, quad1D, ne, symmetric, mapsO->B, mapsC->B, mapsO->Bt, -- mapsC->Bt, pa_data, x, y); -- } -- else if (trial_curl && test_div) -- { -- const bool scalarCoeff = !(DQ || MQ); -- PAHcurlHdivMassApply3D(dofs1D, dofs1Dtest, quad1D, ne, scalarCoeff, -- true, false, mapsO->B, mapsC->B, mapsOtest->Bt, -- mapsCtest->Bt, pa_data, x, y); -- } -- else if (trial_div && test_curl) -- { -- const bool scalarCoeff = !(DQ || MQ); -- PAHcurlHdivMassApply3D(dofs1D, dofs1Dtest, quad1D, ne, scalarCoeff, -- false, false, mapsO->B, mapsC->B, mapsOtest->Bt, -- mapsCtest->Bt, pa_data, x, y); -- } -- else -- { -- MFEM_ABORT("Unknown kernel."); -- } -- } -- else // 2D -- { -- if (trial_curl && test_curl) -- { -- PAHcurlMassApply2D(dofs1D, quad1D, ne, symmetric, mapsO->B, mapsC->B, -- mapsO->Bt, mapsC->Bt, pa_data, x, y); -- } -- else if (trial_div && test_div) -- { -- PAHdivMassApply(2, dofs1D, quad1D, ne, symmetric, mapsO->B, mapsC->B, mapsO->Bt, -- mapsC->Bt, pa_data, x, y); -- } -- else if ((trial_curl && test_div) || (trial_div && test_curl)) -- { -- const bool scalarCoeff = !(DQ || MQ); -- PAHcurlHdivMassApply2D(dofs1D, dofs1Dtest, quad1D, ne, scalarCoeff, -- trial_curl, false, mapsO->B, mapsC->B, -- mapsOtest->Bt, mapsCtest->Bt, pa_data, x, y); -- } -- else -- { -- MFEM_ABORT("Unknown kernel."); -- } -- } --} -- --void VectorFEMassIntegrator::AddMultTransposePA(const Vector &x, -- Vector &y) const --{ -- const bool trial_curl = (trial_fetype == mfem::FiniteElement::CURL); -- const bool trial_div = (trial_fetype == mfem::FiniteElement::DIV); -- const bool test_curl = (test_fetype == mfem::FiniteElement::CURL); -- const bool test_div = (test_fetype == mfem::FiniteElement::DIV); -- -- bool symmetricSpaces = true; -- -- if (dim == 3 && ((trial_div && test_curl) || (trial_curl && test_div))) -- { -- const bool scalarCoeff = !(DQ || MQ); -- PAHcurlHdivMassApply3D(dofs1D, dofs1Dtest, quad1D, ne, scalarCoeff, -- trial_div, true, mapsO->B, mapsC->B, mapsOtest->Bt, -- mapsCtest->Bt, pa_data, x, y); -- symmetricSpaces = false; -- } -- else if (dim == 2 && ((trial_curl && test_div) || (trial_div && test_curl))) -- { -- const bool scalarCoeff = !(DQ || MQ); -- PAHcurlHdivMassApply2D(dofs1D, dofs1Dtest, quad1D, ne, scalarCoeff, -- !trial_curl, true, mapsO->B, mapsC->B, mapsOtest->Bt, -- mapsCtest->Bt, pa_data, x, y); -- symmetricSpaces = false; -- } -- -- if (symmetricSpaces) -- { -- if (MQ && dynamic_cast(MQ) == NULL) -- { -- MFEM_ABORT("VectorFEMassIntegrator transpose not implemented for asymmetric MatrixCoefficient"); -- } -- -- this->AddMultPA(x, y); -- } --} -- --void MixedVectorGradientIntegrator::AssemblePA(const FiniteElementSpace -- &trial_fes, -- const FiniteElementSpace &test_fes) --{ -- // Assumes tensor-product elements, with a vector test space and H^1 trial space. -- Mesh *mesh = trial_fes.GetMesh(); -- const FiniteElement *trial_fel = trial_fes.GetFE(0); -- const FiniteElement *test_fel = test_fes.GetFE(0); -- -- const NodalTensorFiniteElement *trial_el = -- dynamic_cast(trial_fel); -- MFEM_VERIFY(trial_el != NULL, "Only NodalTensorFiniteElement is supported!"); -- -- const VectorTensorFiniteElement *test_el = -- dynamic_cast(test_fel); -- MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!"); -- -- const IntegrationRule *ir -- = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el, -- *mesh->GetElementTransformation(0)); -- const int dims = trial_el->GetDim(); -- MFEM_VERIFY(dims == 2 || dims == 3, ""); -- -- const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6 -- const int nq = ir->GetNPoints(); -- dim = mesh->Dimension(); -- MFEM_VERIFY(dim == 2 || dim == 3, ""); -- -- MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), ""); -- -- ne = trial_fes.GetNE(); -- geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS); -- mapsC = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR); -- mapsO = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); -- dofs1D = mapsC->ndof; -- quad1D = mapsC->nqpt; -- -- MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, ""); -- -- pa_data.SetSize(symmDims * nq * ne, Device::GetMemoryType()); -- -- QuadratureSpace qs(*mesh, *ir); -- CoefficientVector coeff(Q, qs, CoefficientStorage::FULL); -- -- // Use the same setup functions as VectorFEMassIntegrator. -- if (test_el->GetDerivType() == mfem::FiniteElement::CURL && dim == 3) -- { -- PADiffusionSetup3D(quad1D, 1, ne, ir->GetWeights(), geom->J, -- coeff, pa_data); -- } -- else if (test_el->GetDerivType() == mfem::FiniteElement::CURL && dim == 2) -- { -- PADiffusionSetup2D<2>(quad1D, 1, ne, ir->GetWeights(), geom->J, -- coeff, pa_data); -- } -- else -- { -- MFEM_ABORT("Unknown kernel."); -- } --} -- --void MixedVectorGradientIntegrator::AddMultPA(const Vector &x, Vector &y) const --{ -- if (dim == 3) -- PAHcurlH1Apply3D(dofs1D, quad1D, ne, mapsC->B, mapsC->G, -- mapsO->Bt, mapsC->Bt, pa_data, x, y); -- else if (dim == 2) -- PAHcurlH1Apply2D(dofs1D, quad1D, ne, mapsC->B, mapsC->G, -- mapsO->Bt, mapsC->Bt, pa_data, x, y); -- else -- { -- MFEM_ABORT("Unsupported dimension!"); -- } --} -- --void MixedVectorGradientIntegrator::AddMultTransposePA(const Vector &x, -- Vector &y) const --{ -- if (dim == 3) -- PAHcurlH1ApplyTranspose3D(dofs1D, quad1D, ne, mapsC->B, mapsO->B, -- mapsC->Bt, mapsC->Gt, pa_data, x, y); -- else if (dim == 2) -- PAHcurlH1ApplyTranspose2D(dofs1D, quad1D, ne, mapsC->B, mapsO->B, -- mapsC->Bt, mapsC->Gt, pa_data, x, y); -- else -- { -- MFEM_ABORT("Unsupported dimension!"); -- } --} -- --} // namespace mfem -diff --git a/fem/ceed/interface/operator.cpp b/fem/ceed/interface/operator.cpp -index 8545ccaa8..745e474e5 100644 ---- a/fem/ceed/interface/operator.cpp -+++ b/fem/ceed/interface/operator.cpp -@@ -46,7 +46,7 @@ void Operator::Mult(const mfem::Vector &x, mfem::Vector &y) const - CeedScalar *y_ptr; - CeedMemType mem; - CeedGetPreferredMemType(mfem::internal::ceed, &mem); -- if ( Device::Allows(Backend::DEVICE_MASK) && mem==CEED_MEM_DEVICE ) -+ if (Device::Allows(Backend::DEVICE_MASK) && mem == CEED_MEM_DEVICE) - { - x_ptr = x.Read(); - y_ptr = y.Write(); -@@ -78,7 +78,7 @@ void Operator::AddMult(const mfem::Vector &x, mfem::Vector &y, - CeedScalar *y_ptr; - CeedMemType mem; - CeedGetPreferredMemType(mfem::internal::ceed, &mem); -- if ( Device::Allows(Backend::DEVICE_MASK) && mem==CEED_MEM_DEVICE ) -+ if (Device::Allows(Backend::DEVICE_MASK) && mem == CEED_MEM_DEVICE) - { - x_ptr = x.Read(); - y_ptr = y.ReadWrite(); -@@ -107,7 +107,7 @@ void Operator::GetDiagonal(mfem::Vector &diag) const - CeedScalar *d_ptr; - CeedMemType mem; - CeedGetPreferredMemType(mfem::internal::ceed, &mem); -- if ( Device::Allows(Backend::DEVICE_MASK) && mem==CEED_MEM_DEVICE ) -+ if (Device::Allows(Backend::DEVICE_MASK) && mem == CEED_MEM_DEVICE) - { - d_ptr = diag.ReadWrite(); - } -diff --git a/fem/ceed/interface/operator.hpp b/fem/ceed/interface/operator.hpp -index cffea2fc7..9e4a4faaf 100644 ---- a/fem/ceed/interface/operator.hpp -+++ b/fem/ceed/interface/operator.hpp -@@ -37,11 +37,12 @@ public: - /// This class takes ownership of op and will delete it - Operator(CeedOperator op); - #endif -+ - void Mult(const mfem::Vector &x, mfem::Vector &y) const override; - void AddMult(const mfem::Vector &x, mfem::Vector &y, - const double a = 1.0) const override; - void GetDiagonal(mfem::Vector &diag) const; -- using mfem::Operator::SetupRAP; -+ - virtual ~Operator() - { - #ifdef MFEM_USE_CEED -diff --git a/fem/ceed/interface/util.cpp b/fem/ceed/interface/util.cpp -index d122c2ab5..b65fd2197 100644 ---- a/fem/ceed/interface/util.cpp -+++ b/fem/ceed/interface/util.cpp -@@ -217,7 +217,7 @@ const IntegrationRule & GetRule( - const FiniteElement &test_fe, - ElementTransformation &trans) - { -- return ConvectionIntegrator::GetRule(trial_fe, test_fe, trans); -+ return ConvectionIntegrator::GetRule(trial_fe, trans); - } - - template <> -diff --git a/fem/ceed/solvers/algebraic.cpp b/fem/ceed/solvers/algebraic.cpp -index 2cc325dbc..280a19960 100644 ---- a/fem/ceed/solvers/algebraic.cpp -+++ b/fem/ceed/solvers/algebraic.cpp -@@ -46,7 +46,7 @@ private: - Array ess_tdofs; - const mfem::Operator *P; - ceed::Operator *unconstrained_op; -- mfem::ConstrainedOperator *constrained_op; -+ mfem::Operator *constrained_op; - }; - - ConstrainedOperator::ConstrainedOperator( -@@ -56,10 +56,8 @@ ConstrainedOperator::ConstrainedOperator( - : ess_tdofs(ess_tdofs_), P(P_) - { - unconstrained_op = new ceed::Operator(oper); -- mfem::Operator *rap = unconstrained_op->SetupRAP(P, P); -- height = width = rap->Height(); -- bool own_rap = (rap != unconstrained_op); -- constrained_op = new mfem::ConstrainedOperator(rap, ess_tdofs, own_rap); -+ unconstrained_op->FormSystemOperator(ess_tdofs, constrained_op); -+ height = width = constrained_op->Height(); - } - - ConstrainedOperator::ConstrainedOperator(CeedOperator oper, -@@ -535,7 +533,7 @@ void AlgebraicInterpolation::Mult(const mfem::Vector& x, mfem::Vector& y) const - CeedScalar *out_ptr; - CeedMemType mem; - ierr = CeedGetPreferredMemType(internal::ceed, &mem); PCeedChk(ierr); -- if ( Device::Allows(Backend::DEVICE_MASK) && mem==CEED_MEM_DEVICE ) -+ if (Device::Allows(Backend::DEVICE_MASK) && mem == CEED_MEM_DEVICE) - { - in_ptr = x.Read(); - out_ptr = y.ReadWrite(); -@@ -568,7 +566,7 @@ void AlgebraicInterpolation::MultTranspose(const mfem::Vector& x, - ierr = CeedGetPreferredMemType(internal::ceed, &mem); PCeedChk(ierr); - const CeedScalar *in_ptr; - CeedScalar *out_ptr; -- if ( Device::Allows(Backend::DEVICE_MASK) && mem==CEED_MEM_DEVICE ) -+ if (Device::Allows(Backend::DEVICE_MASK) && mem == CEED_MEM_DEVICE) - { - in_ptr = x.Read(); - out_ptr = y.ReadWrite(); -@@ -808,15 +806,6 @@ ParAlgebraicCoarseSpace::ParAlgebraicCoarseSpace( - } - } - R_mat->Finalize(); -- -- if (Device::Allows(Backend::DEVICE_MASK)) -- { -- P = new DeviceConformingProlongationOperator(*gc, R_mat); -- } -- else -- { -- P = new ConformingProlongationOperator(lsize, *gc); -- } - P_mat = NULL; - } - -@@ -828,8 +817,8 @@ HypreParMatrix *ParAlgebraicCoarseSpace::GetProlongationHypreParMatrix() - MFEM_VERIFY(pmesh != NULL, ""); - Array dof_offsets, tdof_offsets, tdof_nb_offsets; - Array *offsets[2] = {&dof_offsets, &tdof_offsets}; -- int lsize = P->Height(); -- int ltsize = P->Width(); -+ int ltsize = R_mat->Height(); -+ int lsize = R_mat->Width(); - HYPRE_BigInt loc_sizes[2] = {lsize, ltsize}; - pmesh->GenerateOffsets(2, loc_sizes, offsets); - -@@ -936,7 +925,6 @@ HypreParMatrix *ParAlgebraicCoarseSpace::GetProlongationHypreParMatrix() - - ParAlgebraicCoarseSpace::~ParAlgebraicCoarseSpace() - { -- delete P; - delete R_mat; - delete P_mat; - delete gc; -diff --git a/fem/ceed/solvers/algebraic.hpp b/fem/ceed/solvers/algebraic.hpp -index 49cdbca98..8ede8324e 100644 ---- a/fem/ceed/solvers/algebraic.hpp -+++ b/fem/ceed/solvers/algebraic.hpp -@@ -33,12 +33,13 @@ class AlgebraicCoarseSpace : public FiniteElementSpace - public: - AlgebraicCoarseSpace(FiniteElementSpace &fine_fes, CeedElemRestriction fine_er, - int order, int dim, int order_reduction_); -+ ~AlgebraicCoarseSpace(); -+ - int GetOrderReduction() const { return order_reduction; } - CeedElemRestriction GetCeedElemRestriction() const { return ceed_elem_restriction; } - CeedBasis GetCeedCoarseToFine() const { return coarse_to_fine; } - virtual const mfem::Operator *GetProlongationMatrix() const override { return NULL; } - virtual const SparseMatrix *GetRestrictionMatrix() const override { return NULL; } -- ~AlgebraicCoarseSpace(); - - protected: - int *dof_map; -@@ -64,16 +65,16 @@ public: - int order_reduction_, - GroupCommunicator *gc_fine - ); -- virtual const mfem::Operator *GetProlongationMatrix() const override { return P; } -+ ~ParAlgebraicCoarseSpace(); -+ -+ virtual const mfem::Operator *GetProlongationMatrix() const override { return P_mat; } - virtual const SparseMatrix *GetRestrictionMatrix() const override { return R_mat; } - GroupCommunicator *GetGroupCommunicator() const { return gc; } - HypreParMatrix *GetProlongationHypreParMatrix(); -- ~ParAlgebraicCoarseSpace(); - - private: -- SparseMatrix *R_mat; - GroupCommunicator *gc; -- ConformingProlongationOperator *P; -+ SparseMatrix *R_mat; - HypreParMatrix *P_mat; - Array ldof_group, ldof_ltdof; - }; -@@ -92,14 +93,11 @@ public: - Ceed ceed, CeedBasis basisctof, - CeedElemRestriction erestrictu_coarse, - CeedElemRestriction erestrictu_fine); -- - ~AlgebraicInterpolation(); - - virtual void Mult(const mfem::Vector& x, mfem::Vector& y) const; -- - virtual void MultTranspose(const mfem::Vector& x, mfem::Vector& y) const; - -- using mfem::Operator::SetupRAP; - private: - int Initialize(Ceed ceed, CeedBasis basisctof, - CeedElemRestriction erestrictu_coarse, -@@ -127,11 +125,6 @@ public: - The given space is a real (geometric) space, but the coarse spaces are - constructed semi-algebraically with no mesh information. */ - AlgebraicSpaceHierarchy(FiniteElementSpace &fespace); -- AlgebraicCoarseSpace& GetAlgebraicCoarseSpace(int level) -- { -- MFEM_ASSERT(level < GetNumLevels() - 1, ""); -- return static_cast(*fespaces[level]); -- } - ~AlgebraicSpaceHierarchy() - { - for (int i=0; i(*fespaces[level]); -+ } -+ - private: - CeedElemRestriction fine_er; - Array ceed_interpolations; -@@ -200,6 +199,7 @@ public: - */ - AlgebraicSolver(BilinearForm &form, const Array& ess_tdofs); - ~AlgebraicSolver(); -+ - void Mult(const Vector& x, Vector& y) const; - void SetOperator(const mfem::Operator& op); - }; -diff --git a/fem/coefficient.cpp b/fem/coefficient.cpp -index 46ad4cf4c..e47073e3f 100644 ---- a/fem/coefficient.cpp -+++ b/fem/coefficient.cpp -@@ -144,8 +144,8 @@ double FunctionCoefficient::Eval(ElementTransformation & T, - } - } - --double GridFunctionCoefficient::Eval (ElementTransformation &T, -- const IntegrationPoint &ip) -+double GridFunctionCoefficient::Eval(ElementTransformation &T, -+ const IntegrationPoint &ip) - { - Mesh *gf_mesh = GridF->FESpace()->GetMesh(); - if (T.mesh == gf_mesh) -@@ -623,12 +623,6 @@ void PWMatrixCoefficient::UpdateCoefficient(int attr, MatrixCoefficient & coef) - MFEM_VERIFY(coef.GetWidth() == width, - "PWMatrixCoefficient::UpdateCoefficient: " - "MatrixCoefficient has incompatible width."); -- if (symmetric) -- { -- MFEM_VERIFY(coef.IsSymmetric(), -- "PWMatrixCoefficient::UpdateCoefficient: " -- "MatrixCoefficient has incompatible symmetry."); -- } - pieces[attr] = &coef; - } - -@@ -680,68 +674,17 @@ void MatrixFunctionCoefficient::Eval(DenseMatrix &K, ElementTransformation &T, - - K.SetSize(height, width); - -- if (symmetric) // Use SymmFunction (deprecated version) -- { -- MFEM_VERIFY(height == width && SymmFunction, -- "MatrixFunctionCoefficient is not symmetric"); -- -- Vector Ksym((width * (width + 1)) / 2); // 1x1: 1, 2x2: 3, 3x3: 6 -- -- SymmFunction(transip, Ksym); -- -- // Copy upper triangular values from Ksym to the full matrix K -- int os = 0; -- for (int i=0; iEval(T, ip, GetTime()); -+ TDFunction(transip, GetTime(), K); - } --} -- --void MatrixFunctionCoefficient::EvalSymmetric(Vector &K, -- ElementTransformation &T, -- const IntegrationPoint &ip) --{ -- MFEM_VERIFY(symmetric && height == width && SymmFunction, -- "MatrixFunctionCoefficient is not symmetric"); -- -- double x[3]; -- Vector transip(x, 3); -- -- T.Transform(ip, transip); -- -- K.SetSize((width * (width + 1)) / 2); // 1x1: 1, 2x2: 3, 3x3: 6 -- -- if (SymmFunction) -+ else - { -- SymmFunction(transip, K); -+ K = mat; - } - - if (Q) -@@ -782,7 +725,7 @@ void SymmetricMatrixCoefficient::Eval(DenseMatrix &K, ElementTransformation &T, - Eval(mat, T, ip); - for (int j = 0; j < width; ++j) - { -- for (int i = 0; i < height; ++ i) -+ for (int i = 0; i < height; ++i) - { - K(i, j) = mat(i, j); - } -@@ -1782,3 +1725,4 @@ CoefficientVector::~CoefficientVector() - } - - } -+ -diff --git a/fem/coefficient.hpp b/fem/coefficient.hpp -index ada5b91a3..fc54fd2ef 100644 ---- a/fem/coefficient.hpp -+++ b/fem/coefficient.hpp -@@ -106,7 +106,6 @@ private: - Vector constants; - - public: -- - /// Constructs a piecewise constant coefficient in NumOfSubD subdomains - explicit PWConstCoefficient(int NumOfSubD = 0) : constants(NumOfSubD) - { constants = 0.0; } -@@ -177,7 +176,6 @@ private: - const Array & coefs); - - public: -- - /// Constructs a piecewise coefficient - explicit PWCoefficient() {} - -@@ -340,7 +338,6 @@ protected: - double (*tdf)(double); - - public: -- - /// Construct a unit delta function centered at (0.0,0.0,0.0) - DeltaCoefficient() - { -@@ -503,15 +500,16 @@ class VectorConstantCoefficient : public VectorCoefficient - { - private: - Vector vec; -+ - public: - /// Construct the coefficient with constant vector @a v. - VectorConstantCoefficient(const Vector &v) - : VectorCoefficient(v.Size()), vec(v) { } -- using VectorCoefficient::Eval; - - /// Evaluate the vector coefficient at @a ip. - virtual void Eval(Vector &V, ElementTransformation &T, - const IntegrationPoint &ip) { V = vec; } -+ using VectorCoefficient::Eval; - - /// Return a reference to the constant vector in this class. - const Vector& GetVec() const { return vec; } -@@ -561,7 +559,6 @@ private: - const Array & coefs); - - public: -- - /// Constructs a piecewise vector coefficient of dimension vd - explicit PWVectorCoefficient(int vd): VectorCoefficient(vd) {} - -@@ -629,10 +626,10 @@ public: - : VectorCoefficient(dim), TDFunction(std::move(TDF)), Q(q) - { } - -- using VectorCoefficient::Eval; - /// Evaluate the vector coefficient at @a ip. - virtual void Eval(Vector &V, ElementTransformation &T, - const IntegrationPoint &ip); -+ using VectorCoefficient::Eval; - - virtual ~VectorFunctionCoefficient() { } - }; -@@ -669,11 +666,11 @@ public: - double Eval(int i, ElementTransformation &T, const IntegrationPoint &ip) - { return Coeff[i] ? Coeff[i]->Eval(T, ip, GetTime()) : 0.0; } - -- using VectorCoefficient::Eval; - /** @brief Evaluate the coefficient. Each element of vector V comes from the - associated array of scalar coefficients. */ - virtual void Eval(Vector &V, ElementTransformation &T, - const IntegrationPoint &ip); -+ using VectorCoefficient::Eval; - - /// Destroys vector coefficient. - virtual ~VectorArrayCoefficient(); -@@ -728,7 +725,6 @@ protected: - const GridFunction *GridFunc; - - public: -- - /** @brief Construct the coefficient with a scalar grid function @a gf. The - grid function is not owned by the coefficient. */ - GradientGridFunctionCoefficient(const GridFunction *gf); -@@ -769,10 +765,10 @@ public: - /// Get the vector grid function. - const GridFunction * GetGridFunction() const { return GridFunc; } - -- using VectorCoefficient::Eval; - /// Evaluate the vector curl coefficient at @a ip. - virtual void Eval(Vector &V, ElementTransformation &T, - const IntegrationPoint &ip); -+ using VectorCoefficient::Eval; - - virtual ~CurlGridFunctionCoefficient() { } - }; -@@ -861,12 +857,13 @@ public: - virtual void EvalDelta(Vector &V, ElementTransformation &T, - const IntegrationPoint &ip); - -- using VectorCoefficient::Eval; - /** @brief A VectorDeltaFunction cannot be evaluated. Calling this method - will cause an MFEM error, terminating the application. */ - virtual void Eval(Vector &V, ElementTransformation &T, - const IntegrationPoint &ip) - { mfem_error("VectorDeltaCoefficient::Eval"); } -+ using VectorCoefficient::Eval; -+ - virtual ~VectorDeltaCoefficient() { } - }; - -@@ -908,16 +905,15 @@ class MatrixCoefficient - protected: - int height, width; - double time; -- bool symmetric; // deprecated - - public: - /// Construct a dim x dim matrix coefficient. -- explicit MatrixCoefficient(int dim, bool symm=false) -- { height = width = dim; time = 0.; symmetric = symm; } -+ explicit MatrixCoefficient(int dim) -+ { height = width = dim; time = 0.; } - - /// Construct a h x w matrix coefficient. -- MatrixCoefficient(int h, int w, bool symm=false) : -- height(h), width(w), time(0.), symmetric(symm) { } -+ MatrixCoefficient(int h, int w) : -+ height(h), width(w), time(0.) { } - - /// Set the time for time dependent coefficients - virtual void SetTime(double t) { time = t; } -@@ -934,9 +930,6 @@ public: - /// For backward compatibility get the width of the matrix. - int GetVDim() const { return width; } - -- /** @deprecated Use SymmetricMatrixCoefficient instead */ -- bool IsSymmetric() const { return symmetric; } -- - /** @brief Evaluate the matrix coefficient in the element described by @a T - at the point @a ip, storing the result in @a K. */ - /** @note When this method is called, the caller must make sure that the -@@ -953,17 +946,6 @@ public: - /// the width of the matrix. - virtual void Project(QuadratureFunction &qf, bool transpose=false); - -- /// (DEPRECATED) Evaluate a symmetric matrix coefficient. -- /** @brief Evaluate the upper triangular entries of the matrix coefficient -- in the symmetric case, similarly to Eval. Matrix entry (i,j) is stored -- in K[j - i + os_i] for 0 <= i <= j < width, os_0 = 0, -- os_{i+1} = os_i + width - i. That is, K = {M(0,0), ..., M(0,w-1), -- M(1,1), ..., M(1,w-1), ..., M(w-1,w-1) with w = width. -- @deprecated Use Eval() instead. */ -- virtual void EvalSymmetric(Vector &K, ElementTransformation &T, -- const IntegrationPoint &ip) -- { mfem_error("MatrixCoefficient::EvalSymmetric"); } -- - virtual ~MatrixCoefficient() { } - }; - -@@ -973,14 +955,17 @@ class MatrixConstantCoefficient : public MatrixCoefficient - { - private: - DenseMatrix mat; -+ - public: - ///Construct using matrix @a m for the constant. - MatrixConstantCoefficient(const DenseMatrix &m) - : MatrixCoefficient(m.Height(), m.Width()), mat(m) { } -- using MatrixCoefficient::Eval; -+ - /// Evaluate the matrix coefficient at @a ip. - virtual void Eval(DenseMatrix &M, ElementTransformation &T, - const IntegrationPoint &ip) { M = mat; } -+ using MatrixCoefficient::Eval; -+ - /// Return a reference to the constant matrix. - const DenseMatrix& GetMatrix() { return mat; } - }; -@@ -1030,20 +1015,18 @@ private: - const Array & coefs); - - public: -- - /// Constructs a piecewise matrix coefficient of dimension dim by dim -- explicit PWMatrixCoefficient(int dim, bool symm = false) -- : MatrixCoefficient(dim, symm) {} -+ explicit PWMatrixCoefficient(int dim) -+ : MatrixCoefficient(dim) {} - - /// Constructs a piecewise matrix coefficient of dimension h by w -- explicit PWMatrixCoefficient(int h, int w, bool symm = false) -- : MatrixCoefficient(h, w, symm) {} -+ explicit PWMatrixCoefficient(int h, int w) -+ : MatrixCoefficient(h, w) {} - - /// Construct the coefficient using arrays describing the pieces - /** \param dim - size of the square matrix-valued result - \param attr - an array of attribute numbers for each piece - \param coefs - the corresponding array of MatrixCoefficient pointers -- \param symm - true if the result will be symmetric, false otherwise - Any missing attributes or NULL coefficient pointers will result in a - zero matrix being returned. - -@@ -1051,16 +1034,14 @@ public: - transferred to this object. - */ - PWMatrixCoefficient(int dim, const Array & attr, -- const Array & coefs, -- bool symm=false) -- : MatrixCoefficient(dim, symm) { InitMap(attr, coefs); } -+ const Array & coefs) -+ : MatrixCoefficient(dim) { InitMap(attr, coefs); } - - /// Construct the coefficient using arrays describing the pieces - /** \param h - height of the matrix-valued result - \param w - width of the matrix-valued result - \param attr - an array of attribute numbers for each piece - \param coefs - the corresponding array of MatrixCoefficient pointers -- \param symm - true if the result will be symmetric, false otherwise - Any missing attributes or NULL coefficient pointers will result in a - zero matrix being returned for that attribute. - -@@ -1068,9 +1049,8 @@ public: - transferred to this object. - */ - PWMatrixCoefficient(int h, int w, const Array & attr, -- const Array & coefs, -- bool symm=false) -- : MatrixCoefficient(h, w, symm) { InitMap(attr, coefs); } -+ const Array & coefs) -+ : MatrixCoefficient(h, w) { InitMap(attr, coefs); } - - /// Set the time for time dependent coefficients - virtual void SetTime(double t); -@@ -1099,7 +1079,6 @@ class MatrixFunctionCoefficient : public MatrixCoefficient - { - private: - std::function Function; -- std::function SymmFunction; // deprecated - std::function TDFunction; - - Coefficient *Q; -@@ -1123,28 +1102,6 @@ public: - : MatrixCoefficient(m.Height(), m.Width()), Q(&q), mat(m) - { } - -- /** @brief Define a time-independent symmetric square matrix coefficient from -- a std function */ -- /** \param dim - the size of the matrix -- \param SymmF - function used in EvalSymmetric -- \param q - optional scalar Coefficient to scale the matrix coefficient -- @deprecated Use another constructor without setting SymmFunction. */ -- MatrixFunctionCoefficient(int dim, -- std::function SymmF, -- Coefficient *q = NULL) -- : MatrixCoefficient(dim, true), SymmFunction(std::move(SymmF)), Q(q), mat(0) -- { } -- -- /// Define a time-dependent square matrix coefficient from a std function -- /** \param dim - the size of the matrix -- \param TDF - time-dependent function -- \param q - optional scalar Coefficient to scale the matrix coefficient */ -- MatrixFunctionCoefficient(int dim, -- std::function TDF, -- Coefficient *q = nullptr) -- : MatrixCoefficient(dim), TDFunction(std::move(TDF)), Q(q) -- { } -- - /// Set the time for internally stored coefficients - void SetTime(double t); - -@@ -1152,11 +1109,6 @@ public: - virtual void Eval(DenseMatrix &K, ElementTransformation &T, - const IntegrationPoint &ip); - -- /// (DEPRECATED) Evaluate the symmetric matrix coefficient at @a ip. -- /** @deprecated Use Eval() instead. */ -- virtual void EvalSymmetric(Vector &K, ElementTransformation &T, -- const IntegrationPoint &ip); -- - virtual ~MatrixFunctionCoefficient() { } - }; - -@@ -1186,12 +1138,11 @@ public: - can be overridden with the @a own parameter. */ - void Set(int i, int j, Coefficient * c, bool own=true); - -- using MatrixCoefficient::Eval; -- - /// Evaluate coefficient located at (i,j) in the matrix using integration - /// point @a ip. - double Eval(int i, int j, ElementTransformation &T, const IntegrationPoint &ip) - { return Coeff[i*width+j] ? Coeff[i*width+j] -> Eval(T, ip, GetTime()) : 0.0; } -+ using MatrixCoefficient::Eval; - - /// Evaluate the matrix coefficient @a ip. - virtual void Eval(DenseMatrix &K, ElementTransformation &T, -@@ -1294,10 +1245,11 @@ class SymmetricMatrixCoefficient : public MatrixCoefficient - protected: - /// Internal matrix used when evaluating this coefficient as a DenseMatrix. - DenseSymmetricMatrix mat; -+ - public: - /// Construct a dim x dim matrix coefficient. - explicit SymmetricMatrixCoefficient(int dimension) -- : MatrixCoefficient(dimension, true) { } -+ : MatrixCoefficient(dimension) { } - - /// Get the size of the matrix. - int GetSize() const { return height; } -@@ -1347,10 +1299,11 @@ public: - ///Construct using matrix @a m for the constant. - SymmetricMatrixConstantCoefficient(const DenseSymmetricMatrix &m) - : SymmetricMatrixCoefficient(m.Height()), mat(m) { } -- using SymmetricMatrixCoefficient::Eval; -+ - /// Evaluate the matrix coefficient at @a ip. - virtual void Eval(DenseSymmetricMatrix &M, ElementTransformation &T, - const IntegrationPoint &ip) { M = mat; } -+ using SymmetricMatrixCoefficient::Eval; - }; - - -@@ -1398,10 +1351,10 @@ public: - /// Set the time for internally stored coefficients - void SetTime(double t); - -- using SymmetricMatrixCoefficient::Eval; - /// Evaluate the matrix coefficient at @a ip. - virtual void Eval(DenseSymmetricMatrix &K, ElementTransformation &T, - const IntegrationPoint &ip); -+ using SymmetricMatrixCoefficient::Eval; - - virtual ~SymmetricMatrixFunctionCoefficient() { } - }; -@@ -1548,6 +1501,7 @@ private: - - mutable Vector va; - mutable Vector vb; -+ - public: - /// Construct with the two vector coefficients. Result is \f$ A \cdot B \f$. - InnerProductCoefficient(VectorCoefficient &A, VectorCoefficient &B); -@@ -2120,9 +2074,9 @@ public: - - const QuadratureFunction& GetQuadFunction() const { return QuadF; } - -- using VectorCoefficient::Eval; - virtual void Eval(Vector &V, ElementTransformation &T, - const IntegrationPoint &ip); -+ using VectorCoefficient::Eval; - - virtual void Project(QuadratureFunction &qf); - -@@ -2186,6 +2140,7 @@ protected: - int vdim; ///< Number of values per quadrature point. - QuadratureSpaceBase &qs; ///< Associated QuadratureSpaceBase. - QuadratureFunction *qf; ///< Internal QuadratureFunction (owned, may be NULL). -+ - public: - /// Create an empty CoefficientVector. - CoefficientVector(QuadratureSpaceBase &qs_, -@@ -2287,3 +2242,4 @@ double ComputeGlobalLpNorm(double p, VectorCoefficient &coeff, ParMesh &pmesh, - } - - #endif -+ -diff --git a/fem/dgmassinv.cpp b/fem/dgmassinv.cpp -index 88774b3ad..3cff5d05d 100644 ---- a/fem/dgmassinv.cpp -+++ b/fem/dgmassinv.cpp -@@ -107,7 +107,7 @@ void DGMassInverse::Update() - { - M->Assemble(); - M->AssembleDiagonal(diag_inv); -- internal::MakeReciprocal(diag_inv.Size(), diag_inv.ReadWrite()); -+ diag_inv.Reciprocal(); - } - - DGMassInverse::~DGMassInverse() -diff --git a/fem/dgmassinv_kernels.hpp b/fem/dgmassinv_kernels.hpp -index c497621d4..e78a9bc98 100644 ---- a/fem/dgmassinv_kernels.hpp -+++ b/fem/dgmassinv_kernels.hpp -@@ -12,9 +12,9 @@ - #ifndef MFEM_DGMASSINV_KERNELS_HPP - #define MFEM_DGMASSINV_KERNELS_HPP - --#include "bilininteg_mass_pa.hpp" - #include "../linalg/kernels.hpp" - #include "kernels.hpp" -+#include "integ/bilininteg_mass_kernels.hpp" - - namespace mfem - { -@@ -22,11 +22,6 @@ namespace mfem - namespace internal - { - --void MakeReciprocal(int n, double *x) --{ -- mfem::forall(n, [=] MFEM_HOST_DEVICE (int i) { x[i] = 1.0/x[i]; }); --} -- - template - MFEM_HOST_DEVICE inline - void DGMassApply(const int e, -diff --git a/fem/fespace.cpp b/fem/fespace.cpp -index 29a1aa980..cb82c6008 100644 ---- a/fem/fespace.cpp -+++ b/fem/fespace.cpp -@@ -428,15 +428,6 @@ void FiniteElementSpace::BuildFaceToDofTable() const - face_dof = fc_dof; - } - --void FiniteElementSpace::RebuildElementToDofTable() --{ -- delete elem_dof; -- delete elem_fos; -- elem_dof = NULL; -- elem_fos = NULL; -- BuildElementToDofTable(); --} -- - void FiniteElementSpace::ReorderElementToDofTable() - { - Array dof_marker(ndofs); -@@ -1256,7 +1247,7 @@ int FiniteElementSpace::GetNConformingDofs() const - return P ? (P->Width() / vdim) : ndofs; - } - --const ElementRestrictionOperator *FiniteElementSpace::GetElementRestriction( -+const ElementRestriction *FiniteElementSpace::GetElementRestriction( - ElementDofOrdering e_ordering) const - { - // Check if we have a discontinuous space using the FE collection: -@@ -1271,22 +1262,22 @@ const ElementRestrictionOperator *FiniteElementSpace::GetElementRestriction( - // The output E-vector layout is: ND x VDIM x NE. - L2E_nat.Reset(new L2ElementRestriction(*this)); - } -- return L2E_nat.Is(); -+ return L2E_nat.Is(); - } - if (e_ordering == ElementDofOrdering::LEXICOGRAPHIC) - { - if (L2E_lex.Ptr() == NULL) - { -- L2E_lex.Reset(new ElementRestriction(*this, e_ordering)); -+ L2E_lex.Reset(new ConformingElementRestriction(*this, e_ordering)); - } -- return L2E_lex.Is(); -+ return L2E_lex.Is(); - } - // e_ordering == ElementDofOrdering::NATIVE - if (L2E_nat.Ptr() == NULL) - { -- L2E_nat.Reset(new ElementRestriction(*this, e_ordering)); -+ L2E_nat.Reset(new ConformingElementRestriction(*this, e_ordering)); - } -- return L2E_nat.Is(); -+ return L2E_nat.Is(); - } - - const FaceRestriction *FiniteElementSpace::GetFaceRestriction( -diff --git a/fem/fespace.hpp b/fem/fespace.hpp -index f777bf871..00b290c09 100644 ---- a/fem/fespace.hpp -+++ b/fem/fespace.hpp -@@ -602,18 +602,6 @@ public: - virtual const Operator *GetProlongationMatrix() const - { return GetConformingProlongation(); } - -- /// Return an operator that performs the transpose of GetRestrictionOperator -- /** The returned operator is owned by the FiniteElementSpace. In serial this -- is the same as GetProlongationMatrix() */ -- virtual const Operator *GetRestrictionTransposeOperator() const -- { return GetConformingProlongation(); } -- -- /// An abstract operator that performs the same action as GetRestrictionMatrix -- /** In some cases this is an optimized matrix-free implementation. The -- returned operator is owned by the FiniteElementSpace. */ -- virtual const Operator *GetRestrictionOperator() const -- { return GetConformingRestriction(); } -- - /// The returned SparseMatrix is owned by the FiniteElementSpace. - virtual const SparseMatrix *GetRestrictionMatrix() const - { return GetConformingRestriction(); } -@@ -639,7 +627,7 @@ public: - L2ElementRestriction class. - - The returned Operator is owned by the FiniteElementSpace. */ -- const ElementRestrictionOperator *GetElementRestriction( -+ const ElementRestriction *GetElementRestriction( - ElementDofOrdering e_ordering) const; - - /// Return an Operator that converts L-vectors to E-vectors on each face. -@@ -1058,9 +1046,6 @@ public: - void GetEdgeInteriorVDofs(int i, Array &vdofs) const; - /// @} - -- /// (@deprecated) Use the Update() method if the space or mesh changed. -- MFEM_DEPRECATED void RebuildElementToDofTable(); -- - /** @brief Reorder the scalar DOFs based on the element ordering. - - The new ordering is constructed as follows: 1) loop over all elements as -diff --git a/fem/bilininteg_br2.cpp b/fem/integ/bilininteg_br2.cpp -similarity index 99% -rename from fem/bilininteg_br2.cpp -rename to fem/integ/bilininteg_br2.cpp -index dba87a8b5..159947029 100644 ---- a/fem/bilininteg_br2.cpp -+++ b/fem/integ/bilininteg_br2.cpp -@@ -9,8 +9,8 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "bilininteg.hpp" --#include "pfespace.hpp" -+#include "../bilininteg.hpp" -+#include "../pfespace.hpp" - #include - - namespace mfem -diff --git a/fem/bilininteg_convection_ea.cpp b/fem/integ/bilininteg_convection_ea.cpp -similarity index 85% -rename from fem/bilininteg_convection_ea.cpp -rename to fem/integ/bilininteg_convection_ea.cpp -index 52e3b4e81..08422ce86 100644 ---- a/fem/bilininteg_convection_ea.cpp -+++ b/fem/integ/bilininteg_convection_ea.cpp -@@ -9,9 +9,9 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "../general/forall.hpp" --#include "bilininteg.hpp" --#include "gridfunc.hpp" -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" - - namespace mfem - { -@@ -22,7 +22,6 @@ static void EAConvectionAssemble1D(const int NE, - const Array &g, - const Vector &padata, - Vector &eadata, -- const bool add, - const int d1d = 0, - const int q1d = 0) - { -@@ -55,14 +54,7 @@ static void EAConvectionAssemble1D(const int NE, - { - val += r_Bj[k1] * D(k1, e) * r_Gi[k1]; - } -- if (add) -- { -- A(i1, j1, e) += val; -- } -- else -- { -- A(i1, j1, e) = val; -- } -+ A(i1, j1, e) += val; - } - } - }); -@@ -74,7 +66,6 @@ static void EAConvectionAssemble2D(const int NE, - const Array &g, - const Vector &padata, - Vector &eadata, -- const bool add, - const int d1d = 0, - const int q1d = 0) - { -@@ -130,14 +121,7 @@ static void EAConvectionAssemble2D(const int NE, - * r_B[k1][j1]* r_B[k2][j2]; - } - } -- if (add) -- { -- A(i1, i2, j1, j2, e) += val; -- } -- else -- { -- A(i1, i2, j1, j2, e) = val; -- } -+ A(i1, i2, j1, j2, e) += val; - } - } - } -@@ -151,7 +135,6 @@ static void EAConvectionAssemble3D(const int NE, - const Array &g, - const Vector &padata, - Vector &eadata, -- const bool add, - const int d1d = 0, - const int q1d = 0) - { -@@ -208,14 +191,7 @@ static void EAConvectionAssemble3D(const int NE, - } - } - } -- if (add) -- { -- A(i1, i2, i3, j1, j2, j3, e) += val; -- } -- else -- { -- A(i1, i2, i3, j1, j2, j3, e) = val; -- } -+ A(i1, i2, i3, j1, j2, j3, e) += val; - } - } - } -@@ -226,8 +202,7 @@ static void EAConvectionAssemble3D(const int NE, - } - - void ConvectionIntegrator::AssembleEA(const FiniteElementSpace &fes, -- Vector &ea_data, -- const bool add) -+ Vector &ea_data) - { - AssemblePA(fes); - ne = fes.GetMesh()->GetNE(); -@@ -237,15 +212,15 @@ void ConvectionIntegrator::AssembleEA(const FiniteElementSpace &fes, - { - switch ((dofs1D << 4 ) | quad1D) - { -- case 0x22: return EAConvectionAssemble1D<2,2>(ne,B,G,pa_data,ea_data,add); -- case 0x33: return EAConvectionAssemble1D<3,3>(ne,B,G,pa_data,ea_data,add); -- case 0x44: return EAConvectionAssemble1D<4,4>(ne,B,G,pa_data,ea_data,add); -- case 0x55: return EAConvectionAssemble1D<5,5>(ne,B,G,pa_data,ea_data,add); -- case 0x66: return EAConvectionAssemble1D<6,6>(ne,B,G,pa_data,ea_data,add); -- case 0x77: return EAConvectionAssemble1D<7,7>(ne,B,G,pa_data,ea_data,add); -- case 0x88: return EAConvectionAssemble1D<8,8>(ne,B,G,pa_data,ea_data,add); -- case 0x99: return EAConvectionAssemble1D<9,9>(ne,B,G,pa_data,ea_data,add); -- default: return EAConvectionAssemble1D(ne,B,G,pa_data,ea_data,add, -+ case 0x22: return EAConvectionAssemble1D<2,2>(ne,B,G,pa_data,ea_data); -+ case 0x33: return EAConvectionAssemble1D<3,3>(ne,B,G,pa_data,ea_data); -+ case 0x44: return EAConvectionAssemble1D<4,4>(ne,B,G,pa_data,ea_data); -+ case 0x55: return EAConvectionAssemble1D<5,5>(ne,B,G,pa_data,ea_data); -+ case 0x66: return EAConvectionAssemble1D<6,6>(ne,B,G,pa_data,ea_data); -+ case 0x77: return EAConvectionAssemble1D<7,7>(ne,B,G,pa_data,ea_data); -+ case 0x88: return EAConvectionAssemble1D<8,8>(ne,B,G,pa_data,ea_data); -+ case 0x99: return EAConvectionAssemble1D<9,9>(ne,B,G,pa_data,ea_data); -+ default: return EAConvectionAssemble1D(ne,B,G,pa_data,ea_data, - dofs1D,quad1D); - } - } -@@ -253,15 +228,15 @@ void ConvectionIntegrator::AssembleEA(const FiniteElementSpace &fes, - { - switch ((dofs1D << 4 ) | quad1D) - { -- case 0x22: return EAConvectionAssemble2D<2,2>(ne,B,G,pa_data,ea_data,add); -- case 0x33: return EAConvectionAssemble2D<3,3>(ne,B,G,pa_data,ea_data,add); -- case 0x44: return EAConvectionAssemble2D<4,4>(ne,B,G,pa_data,ea_data,add); -- case 0x55: return EAConvectionAssemble2D<5,5>(ne,B,G,pa_data,ea_data,add); -- case 0x66: return EAConvectionAssemble2D<6,6>(ne,B,G,pa_data,ea_data,add); -- case 0x77: return EAConvectionAssemble2D<7,7>(ne,B,G,pa_data,ea_data,add); -- case 0x88: return EAConvectionAssemble2D<8,8>(ne,B,G,pa_data,ea_data,add); -- case 0x99: return EAConvectionAssemble2D<9,9>(ne,B,G,pa_data,ea_data,add); -- default: return EAConvectionAssemble2D(ne,B,G,pa_data,ea_data,add, -+ case 0x22: return EAConvectionAssemble2D<2,2>(ne,B,G,pa_data,ea_data); -+ case 0x33: return EAConvectionAssemble2D<3,3>(ne,B,G,pa_data,ea_data); -+ case 0x44: return EAConvectionAssemble2D<4,4>(ne,B,G,pa_data,ea_data); -+ case 0x55: return EAConvectionAssemble2D<5,5>(ne,B,G,pa_data,ea_data); -+ case 0x66: return EAConvectionAssemble2D<6,6>(ne,B,G,pa_data,ea_data); -+ case 0x77: return EAConvectionAssemble2D<7,7>(ne,B,G,pa_data,ea_data); -+ case 0x88: return EAConvectionAssemble2D<8,8>(ne,B,G,pa_data,ea_data); -+ case 0x99: return EAConvectionAssemble2D<9,9>(ne,B,G,pa_data,ea_data); -+ default: return EAConvectionAssemble2D(ne,B,G,pa_data,ea_data, - dofs1D,quad1D); - } - } -@@ -269,14 +244,14 @@ void ConvectionIntegrator::AssembleEA(const FiniteElementSpace &fes, - { - switch ((dofs1D << 4 ) | quad1D) - { -- case 0x23: return EAConvectionAssemble3D<2,3>(ne,B,G,pa_data,ea_data,add); -- case 0x34: return EAConvectionAssemble3D<3,4>(ne,B,G,pa_data,ea_data,add); -- case 0x45: return EAConvectionAssemble3D<4,5>(ne,B,G,pa_data,ea_data,add); -- case 0x56: return EAConvectionAssemble3D<5,6>(ne,B,G,pa_data,ea_data,add); -- case 0x67: return EAConvectionAssemble3D<6,7>(ne,B,G,pa_data,ea_data,add); -- case 0x78: return EAConvectionAssemble3D<7,8>(ne,B,G,pa_data,ea_data,add); -- case 0x89: return EAConvectionAssemble3D<8,9>(ne,B,G,pa_data,ea_data,add); -- default: return EAConvectionAssemble3D(ne,B,G,pa_data,ea_data,add, -+ case 0x23: return EAConvectionAssemble3D<2,3>(ne,B,G,pa_data,ea_data); -+ case 0x34: return EAConvectionAssemble3D<3,4>(ne,B,G,pa_data,ea_data); -+ case 0x45: return EAConvectionAssemble3D<4,5>(ne,B,G,pa_data,ea_data); -+ case 0x56: return EAConvectionAssemble3D<5,6>(ne,B,G,pa_data,ea_data); -+ case 0x67: return EAConvectionAssemble3D<6,7>(ne,B,G,pa_data,ea_data); -+ case 0x78: return EAConvectionAssemble3D<7,8>(ne,B,G,pa_data,ea_data); -+ case 0x89: return EAConvectionAssemble3D<8,9>(ne,B,G,pa_data,ea_data); -+ default: return EAConvectionAssemble3D(ne,B,G,pa_data,ea_data, - dofs1D,quad1D); - } - } -diff --git a/fem/bilininteg_convection_mf.cpp b/fem/integ/bilininteg_convection_mf.cpp -similarity index 92% -rename from fem/bilininteg_convection_mf.cpp -rename to fem/integ/bilininteg_convection_mf.cpp -index 61520c135..bbaf82788 100644 ---- a/fem/bilininteg_convection_mf.cpp -+++ b/fem/integ/bilininteg_convection_mf.cpp -@@ -9,12 +9,10 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "../general/forall.hpp" --#include "bilininteg.hpp" --#include "gridfunc.hpp" --#include "ceed/integrators/convection/convection.hpp" -- --using namespace std; -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../ceed/integrators/convection/convection.hpp" - - namespace mfem - { -diff --git a/fem/bilininteg_convection_pa.cpp b/fem/integ/bilininteg_convection_pa.cpp -similarity index 90% -rename from fem/bilininteg_convection_pa.cpp -rename to fem/integ/bilininteg_convection_pa.cpp -index 48080ecdf..25928f002 100644 ---- a/fem/bilininteg_convection_pa.cpp -+++ b/fem/integ/bilininteg_convection_pa.cpp -@@ -9,18 +9,15 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "../general/forall.hpp" --#include "bilininteg.hpp" --#include "gridfunc.hpp" --#include "qfunction.hpp" --#include "ceed/integrators/convection/convection.hpp" --#include "quadinterpolator.hpp" -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../qfunction.hpp" -+#include "../ceed/integrators/convection/convection.hpp" - - namespace mfem - { - --// PA Convection Integrator -- - // PA Convection Assemble 2D kernel - static void PAConvectionSetup2D(const int NQ, - const int NE, -@@ -115,38 +112,85 @@ static void PAConvectionSetup3D(const int NQ, - }); - } - --static void PAConvectionSetup(const int dim, -- const int NQ, -- const int NE, -- const Array &W, -- const Vector &J, -- const Vector &coeff, -- const double alpha, -- Vector &op) -+void ConvectionIntegrator::AssemblePA(const FiniteElementSpace &fes) - { -- if (dim == 1) { MFEM_ABORT("dim==1 not supported in PAConvectionSetup"); } -- if (dim == 2) -+ const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ? -+ Device::GetDeviceMemoryType() : pa_mt; -+ // Assumes tensor-product elements -+ Mesh *mesh = fes.GetMesh(); -+ const FiniteElement &el = *fes.GetFE(0); -+ ElementTransformation &Trans = *fes.GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Trans); -+ if (DeviceCanUseCeed()) - { -- PAConvectionSetup2D(NQ, NE, W, J, coeff, alpha, op); -+ delete ceedOp; -+ const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 || -+ fes.IsVariableOrder(); -+ if (mixed) -+ { -+ ceedOp = new ceed::MixedPAConvectionIntegrator(*this, fes, Q, alpha); -+ } -+ else -+ { -+ ceedOp = new ceed::PAConvectionIntegrator(fes, *ir, Q, alpha); -+ } -+ return; - } -- if (dim == 3) -+ const int dims = el.GetDim(); -+ const int symmDims = dims; -+ nq = ir->GetNPoints(); -+ dim = mesh->Dimension(); -+ ne = fes.GetNE(); -+ geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS, mt); -+ maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR); -+ dofs1D = maps->ndof; -+ quad1D = maps->nqpt; -+ pa_data.SetSize(symmDims * nq * ne, mt); -+ -+ QuadratureSpace qs(*mesh, *ir); -+ CoefficientVector vel(*Q, qs, CoefficientStorage::COMPRESSED); -+ -+ if (dim == 1) - { -- PAConvectionSetup3D(NQ, NE, W, J, coeff, alpha, op); -+ MFEM_ABORT("dim==1 not supported in ConvectionIntegrator::AssemblePA"); -+ } -+ else if (dim == 2) -+ { -+ PAConvectionSetup2D(nq, ne, ir->GetWeights(), geom->J, -+ vel, alpha, pa_data); -+ } -+ else if (dim == 3) -+ { -+ PAConvectionSetup3D(nq, ne, ir->GetWeights(), geom->J, -+ vel, alpha, pa_data); -+ } -+} -+ -+void ConvectionIntegrator::AssembleDiagonalPA(Vector &diag) -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->GetDiagonal(diag); -+ } -+ else -+ { -+ MFEM_ABORT("AssembleDiagonalPA not yet implemented for" -+ " ConvectionIntegrator."); - } - } - - // PA Convection Apply 2D kernel --template static --void PAConvectionApply2D(const int ne, -- const Array &b, -- const Array &g, -- const Array &bt, -- const Array >, -- const Vector &op_, -- const Vector &x_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) -+template -+static void PAConvectionApply2D(const int ne, -+ const Array &b, -+ const Array &g, -+ const Array &bt, -+ const Array >, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) - { - const int NE = ne; - const int D1D = T_D1D ? T_D1D : d1d; -@@ -255,17 +299,17 @@ void PAConvectionApply2D(const int ne, - } - - // Optimized PA Convection Apply 2D kernel --template static --void SmemPAConvectionApply2D(const int ne, -- const Array &b, -- const Array &g, -- const Array &bt, -- const Array >, -- const Vector &op_, -- const Vector &x_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) -+template -+static void SmemPAConvectionApply2D(const int ne, -+ const Array &b, -+ const Array &g, -+ const Array &bt, -+ const Array >, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) - { - const int NE = ne; - const int D1D = T_D1D ? T_D1D : d1d; -@@ -383,17 +427,17 @@ void SmemPAConvectionApply2D(const int ne, - } - - // PA Convection Apply 3D kernel --template static --void PAConvectionApply3D(const int ne, -- const Array &b, -- const Array &g, -- const Array &bt, -- const Array >, -- const Vector &op_, -- const Vector &x_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) -+template -+static void PAConvectionApply3D(const int ne, -+ const Array &b, -+ const Array &g, -+ const Array &bt, -+ const Array >, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) - { - const int NE = ne; - const int D1D = T_D1D ? T_D1D : d1d; -@@ -564,17 +608,17 @@ void PAConvectionApply3D(const int ne, - } - - // Optimized PA Convection Apply 3D kernel --template static --void SmemPAConvectionApply3D(const int ne, -- const Array &b, -- const Array &g, -- const Array &bt, -- const Array >, -- const Vector &op_, -- const Vector &x_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) -+template -+static void SmemPAConvectionApply3D(const int ne, -+ const Array &b, -+ const Array &g, -+ const Array &bt, -+ const Array >, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) - { - const int NE = ne; - const int D1D = T_D1D ? T_D1D : d1d; -@@ -768,17 +812,17 @@ void SmemPAConvectionApply3D(const int ne, - } - - // PA Convection Apply 2D kernel --template static --void PAConvectionApplyT2D(const int ne, -- const Array &b, -- const Array &g, -- const Array &bt, -- const Array >, -- const Vector &op_, -- const Vector &x_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) -+template -+static void PAConvectionApplyT2D(const int ne, -+ const Array &b, -+ const Array &g, -+ const Array &bt, -+ const Array >, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) - { - const int NE = ne; - const int D1D = T_D1D ? T_D1D : d1d; -@@ -883,17 +927,17 @@ void PAConvectionApplyT2D(const int ne, - } - - // Optimized PA Convection Apply 2D kernel --template static --void SmemPAConvectionApplyT2D(const int ne, -- const Array &b, -- const Array &g, -- const Array &bt, -- const Array >, -- const Vector &op_, -- const Vector &x_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) -+template -+static void SmemPAConvectionApplyT2D(const int ne, -+ const Array &b, -+ const Array &g, -+ const Array &bt, -+ const Array >, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) - { - const int NE = ne; - const int D1D = T_D1D ? T_D1D : d1d; -@@ -1006,17 +1050,17 @@ void SmemPAConvectionApplyT2D(const int ne, - } - - // PA Convection Apply 3D kernel --template static --void PAConvectionApplyT3D(const int ne, -- const Array &b, -- const Array &g, -- const Array &bt, -- const Array >, -- const Vector &op_, -- const Vector &x_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) -+template -+static void PAConvectionApplyT3D(const int ne, -+ const Array &b, -+ const Array &g, -+ const Array &bt, -+ const Array >, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) - { - const int NE = ne; - const int D1D = T_D1D ? T_D1D : d1d; -@@ -1182,17 +1226,17 @@ void PAConvectionApplyT3D(const int ne, - } - - // Optimized PA Convection Apply 3D kernel --template static --void SmemPAConvectionApplyT3D(const int ne, -- const Array &b, -- const Array &g, -- const Array &bt, -- const Array >, -- const Vector &op_, -- const Vector &x_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) -+template -+static void SmemPAConvectionApplyT3D(const int ne, -+ const Array &b, -+ const Array &g, -+ const Array &bt, -+ const Array >, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) - { - const int NE = ne; - const int D1D = T_D1D ? T_D1D : d1d; -@@ -1375,48 +1419,6 @@ void SmemPAConvectionApplyT3D(const int ne, - }); - } - --void ConvectionIntegrator::AssemblePA(const FiniteElementSpace &fes) --{ -- const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ? -- Device::GetDeviceMemoryType() : pa_mt; -- // Assumes tensor-product elements -- Mesh *mesh = fes.GetMesh(); -- const FiniteElement &el = *fes.GetFE(0); -- ElementTransformation &Trans = *fes.GetElementTransformation(0); -- const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, Trans); -- if (DeviceCanUseCeed()) -- { -- delete ceedOp; -- const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 || -- fes.IsVariableOrder(); -- if (mixed) -- { -- ceedOp = new ceed::MixedPAConvectionIntegrator(*this, fes, Q, alpha); -- } -- else -- { -- ceedOp = new ceed::PAConvectionIntegrator(fes, *ir, Q, alpha); -- } -- return; -- } -- const int dims = el.GetDim(); -- const int symmDims = dims; -- nq = ir->GetNPoints(); -- dim = mesh->Dimension(); -- ne = fes.GetNE(); -- geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS, mt); -- maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR); -- dofs1D = maps->ndof; -- quad1D = maps->nqpt; -- pa_data.SetSize(symmDims * nq * ne, mt); -- -- QuadratureSpace qs(*mesh, *ir); -- CoefficientVector vel(*Q, qs, CoefficientStorage::COMPRESSED); -- -- PAConvectionSetup(dim, nq, ne, ir->GetWeights(), geom->J, -- vel, alpha, pa_data); --} -- - static void PAConvectionApply(const int dim, - const int D1D, - const int Q1D, -@@ -1521,7 +1523,6 @@ static void PAConvectionApplyT(const int dim, - MFEM_ABORT("Unknown kernel."); - } - --// PA Convection Apply kernel - void ConvectionIntegrator::AddMultPA(const Vector &x, Vector &y) const - { - if (DeviceCanUseCeed()) -@@ -1536,12 +1537,11 @@ void ConvectionIntegrator::AddMultPA(const Vector &x, Vector &y) const - } - } - --// PA Convection Apply transpose kernel - void ConvectionIntegrator::AddMultTransposePA(const Vector &x, Vector &y) const - { - if (DeviceCanUseCeed()) - { -- MFEM_ABORT("AddMultPA not yet implemented with libCEED for" -+ MFEM_ABORT("AddMultTransposePA not yet implemented with libCEED for" - " ConvectionIntegrator."); - } - else -@@ -1552,17 +1552,4 @@ void ConvectionIntegrator::AddMultTransposePA(const Vector &x, Vector &y) const - } - } - --void ConvectionIntegrator::AssembleDiagonalPA(Vector &diag) --{ -- if (DeviceCanUseCeed()) -- { -- ceedOp->GetDiagonal(diag); -- } -- else -- { -- MFEM_ABORT("AssembleDiagonalPA not yet implemented for" -- " ConvectionIntegrator."); -- } --} -- - } // namespace mfem -diff --git a/fem/integ/bilininteg_curlcurl_pa.cpp b/fem/integ/bilininteg_curlcurl_pa.cpp -new file mode 100644 -index 000000000..3d12d978a ---- /dev/null -+++ b/fem/integ/bilininteg_curlcurl_pa.cpp -@@ -0,0 +1,208 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../qfunction.hpp" -+#include "bilininteg_hcurl_kernels.hpp" -+ -+namespace mfem -+{ -+ -+void CurlCurlIntegrator::AssemblePA(const FiniteElementSpace &fes) -+{ -+ // Assumes tensor-product elements -+ Mesh *mesh = fes.GetMesh(); -+ const FiniteElement *fel = fes.GetFE(0); -+ -+ const VectorTensorFiniteElement *el = -+ dynamic_cast(fel); -+ MFEM_VERIFY(el != NULL, "Only VectorTensorFiniteElement is supported!"); -+ -+ const IntegrationRule *ir -+ = IntRule ? IntRule : &MassIntegrator::GetRule(*el, *el, -+ *mesh->GetElementTransformation(0)); -+ -+ const int dims = el->GetDim(); -+ MFEM_VERIFY(dims == 2 || dims == 3, ""); -+ -+ nq = ir->GetNPoints(); -+ dim = mesh->Dimension(); -+ MFEM_VERIFY(dim == 2 || dim == 3, ""); -+ -+ ne = fes.GetNE(); -+ geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS); -+ mapsC = &el->GetDofToQuad(*ir, DofToQuad::TENSOR); -+ mapsO = &el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); -+ dofs1D = mapsC->ndof; -+ quad1D = mapsC->nqpt; -+ -+ MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, ""); -+ -+ QuadratureSpace qs(*mesh, *ir); -+ CoefficientVector coeff(qs, CoefficientStorage::SYMMETRIC); -+ if (Q) { coeff.Project(*Q); } -+ else if (MQ) { coeff.ProjectTranspose(*MQ); } -+ else if (DQ) { coeff.Project(*DQ); } -+ else { coeff.SetConstant(1.0); } -+ -+ const int coeff_dim = coeff.GetVDim(); -+ symmetric = (coeff_dim != dim*dim); -+ const int sym_dims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6 -+ const int ndata = (dim == 2) ? 1 : (symmetric ? sym_dims : dim*dim); -+ pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType()); -+ -+ if (el->GetDerivType() != mfem::FiniteElement::CURL) -+ { -+ MFEM_ABORT("Unknown kernel."); -+ } -+ -+ if (dim == 3) -+ { -+ internal::PACurlCurlSetup3D(quad1D, coeff_dim, ne, ir->GetWeights(), geom->J, -+ coeff, pa_data); -+ } -+ else -+ { -+ internal::PACurlCurlSetup2D(quad1D, ne, ir->GetWeights(), geom->J, coeff, -+ pa_data); -+ } -+} -+ -+void CurlCurlIntegrator::AssembleDiagonalPA(Vector& diag) -+{ -+ if (dim == 3) -+ { -+ if (Device::Allows(Backend::DEVICE_MASK)) -+ { -+ const int ID = (dofs1D << 4) | quad1D; -+ switch (ID) -+ { -+ case 0x23: -+ return internal::SmemPACurlCurlAssembleDiagonal3D<2,3>( -+ dofs1D, -+ quad1D, -+ symmetric, ne, -+ mapsO->B, mapsC->B, -+ mapsO->G, mapsC->G, -+ pa_data, diag); -+ case 0x34: -+ return internal::SmemPACurlCurlAssembleDiagonal3D<3,4>( -+ dofs1D, -+ quad1D, -+ symmetric, ne, -+ mapsO->B, mapsC->B, -+ mapsO->G, mapsC->G, -+ pa_data, diag); -+ case 0x45: -+ return internal::SmemPACurlCurlAssembleDiagonal3D<4,5>( -+ dofs1D, -+ quad1D, -+ symmetric, ne, -+ mapsO->B, mapsC->B, -+ mapsO->G, mapsC->G, -+ pa_data, diag); -+ case 0x56: -+ return internal::SmemPACurlCurlAssembleDiagonal3D<5,6>( -+ dofs1D, -+ quad1D, -+ symmetric, ne, -+ mapsO->B, mapsC->B, -+ mapsO->G, mapsC->G, -+ pa_data, diag); -+ default: -+ return internal::SmemPACurlCurlAssembleDiagonal3D( -+ dofs1D, quad1D, -+ symmetric, ne, -+ mapsO->B, mapsC->B, -+ mapsO->G, mapsC->G, -+ pa_data, diag); -+ } -+ } -+ else -+ { -+ internal::PACurlCurlAssembleDiagonal3D(dofs1D, quad1D, symmetric, ne, -+ mapsO->B, mapsC->B, -+ mapsO->G, mapsC->G, -+ pa_data, diag); -+ } -+ } -+ else if (dim == 2) -+ { -+ internal::PACurlCurlAssembleDiagonal2D(dofs1D, quad1D, ne, -+ mapsO->B, mapsC->G, pa_data, diag); -+ } -+ else -+ { -+ MFEM_ABORT("Unsupported dimension!"); -+ } -+} -+ -+void CurlCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const -+{ -+ if (dim == 3) -+ { -+ if (Device::Allows(Backend::DEVICE_MASK)) -+ { -+ const int ID = (dofs1D << 4) | quad1D; -+ switch (ID) -+ { -+ case 0x23: -+ return internal::SmemPACurlCurlApply3D<2,3>( -+ dofs1D, quad1D, -+ symmetric, ne, -+ mapsO->B, mapsC->B, mapsO->Bt, mapsC->Bt, -+ mapsC->G, mapsC->Gt, pa_data, x, y); -+ case 0x34: -+ return internal::SmemPACurlCurlApply3D<3,4>( -+ dofs1D, quad1D, -+ symmetric, ne, -+ mapsO->B, mapsC->B, mapsO->Bt, mapsC->Bt, -+ mapsC->G, mapsC->Gt, pa_data, x, y); -+ case 0x45: -+ return internal::SmemPACurlCurlApply3D<4,5>( -+ dofs1D, quad1D, -+ symmetric, ne, -+ mapsO->B, mapsC->B, mapsO->Bt, mapsC->Bt, -+ mapsC->G, mapsC->Gt, pa_data, x, y); -+ case 0x56: -+ return internal::SmemPACurlCurlApply3D<5,6>( -+ dofs1D, quad1D, -+ symmetric, ne, -+ mapsO->B, mapsC->B, mapsO->Bt, mapsC->Bt, -+ mapsC->G, mapsC->Gt, pa_data, x, y); -+ default: -+ return internal::SmemPACurlCurlApply3D( -+ dofs1D, quad1D, symmetric, ne, -+ mapsO->B, mapsC->B, mapsO->Bt, mapsC->Bt, -+ mapsC->G, mapsC->Gt, pa_data, x, y); -+ } -+ } -+ else -+ { -+ internal::PACurlCurlApply3D(dofs1D, quad1D, symmetric, ne, mapsO->B, mapsC->B, -+ mapsO->Bt, mapsC->Bt, mapsC->G, mapsC->Gt, -+ pa_data, x, y); -+ } -+ } -+ else if (dim == 2) -+ { -+ internal::PACurlCurlApply2D(dofs1D, quad1D, ne, mapsO->B, mapsO->Bt, -+ mapsC->G, mapsC->Gt, pa_data, x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Unsupported dimension!"); -+ } -+} -+ -+} // namespace mfem -diff --git a/fem/bilininteg_dgtrace_ea.cpp b/fem/integ/bilininteg_dgtrace_ea.cpp -similarity index 75% -rename from fem/bilininteg_dgtrace_ea.cpp -rename to fem/integ/bilininteg_dgtrace_ea.cpp -index c40d2ff46..602c266ab 100644 ---- a/fem/bilininteg_dgtrace_ea.cpp -+++ b/fem/integ/bilininteg_dgtrace_ea.cpp -@@ -9,9 +9,9 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "../general/forall.hpp" --#include "bilininteg.hpp" --#include "gridfunc.hpp" -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" - - namespace mfem - { -@@ -20,8 +20,7 @@ static void EADGTraceAssemble1DInt(const int NF, - const Array &basis, - const Vector &padata, - Vector &eadata_int, -- Vector &eadata_ext, -- const bool add) -+ Vector &eadata_ext) - { - auto D = Reshape(padata.Read(), 2, 2, NF); - auto A_int = Reshape(eadata_int.ReadWrite(), 2, NF); -@@ -33,41 +32,23 @@ static void EADGTraceAssemble1DInt(const int NF, - val_ext10 = D(1, 0, f); - val_ext01 = D(0, 1, f); - val_int1 = D(1, 1, f); -- if (add) -- { -- A_int(0, f) += val_int0; -- A_int(1, f) += val_int1; -- A_ext(0, f) += val_ext01; -- A_ext(1, f) += val_ext10; -- } -- else -- { -- A_int(0, f) = val_int0; -- A_int(1, f) = val_int1; -- A_ext(0, f) = val_ext01; -- A_ext(1, f) = val_ext10; -- } -+ A_int(0, f) += val_int0; -+ A_int(1, f) += val_int1; -+ A_ext(0, f) += val_ext01; -+ A_ext(1, f) += val_ext10; - }); - } - - static void EADGTraceAssemble1DBdr(const int NF, - const Array &basis, - const Vector &padata, -- Vector &eadata_bdr, -- const bool add) -+ Vector &eadata_bdr) - { - auto D = Reshape(padata.Read(), 2, 2, NF); - auto A_bdr = Reshape(eadata_bdr.ReadWrite(), NF); - mfem::forall(NF, [=] MFEM_HOST_DEVICE (int f) - { -- if (add) -- { -- A_bdr(f) += D(0, 0, f); -- } -- else -- { -- A_bdr(f) = D(0, 0, f); -- } -+ A_bdr(f) += D(0, 0, f); - }); - } - -@@ -77,7 +58,6 @@ static void EADGTraceAssemble2DInt(const int NF, - const Vector &padata, - Vector &eadata_int, - Vector &eadata_ext, -- const bool add, - const int d1d = 0, - const int q1d = 0) - { -@@ -108,20 +88,10 @@ static void EADGTraceAssemble2DInt(const int NF, - val_ext10 += B(k1,i1) * B(k1,j1) * D(k1, 1, 0, f); - val_int1 += B(k1,i1) * B(k1,j1) * D(k1, 1, 1, f); - } -- if (add) -- { -- A_int(i1, j1, 0, f) += val_int0; -- A_int(i1, j1, 1, f) += val_int1; -- A_ext(i1, j1, 0, f) += val_ext01; -- A_ext(i1, j1, 1, f) += val_ext10; -- } -- else -- { -- A_int(i1, j1, 0, f) = val_int0; -- A_int(i1, j1, 1, f) = val_int1; -- A_ext(i1, j1, 0, f) = val_ext01; -- A_ext(i1, j1, 1, f) = val_ext10; -- } -+ A_int(i1, j1, 0, f) += val_int0; -+ A_int(i1, j1, 1, f) += val_int1; -+ A_ext(i1, j1, 0, f) += val_ext01; -+ A_ext(i1, j1, 1, f) += val_ext10; - } - } - }); -@@ -132,7 +102,6 @@ static void EADGTraceAssemble2DBdr(const int NF, - const Array &basis, - const Vector &padata, - Vector &eadata_bdr, -- const bool add, - const int d1d = 0, - const int q1d = 0) - { -@@ -156,14 +125,7 @@ static void EADGTraceAssemble2DBdr(const int NF, - { - val_bdr += B(k1,i1) * B(k1,j1) * D(k1, 0, 0, f); - } -- if (add) -- { -- A_bdr(i1, j1, f) += val_bdr; -- } -- else -- { -- A_bdr(i1, j1, f) = val_bdr; -- } -+ A_bdr(i1, j1, f) += val_bdr; - } - } - }); -@@ -175,7 +137,6 @@ static void EADGTraceAssemble3DInt(const int NF, - const Vector &padata, - Vector &eadata_int, - Vector &eadata_ext, -- const bool add, - const int d1d = 0, - const int q1d = 0) - { -@@ -246,20 +207,10 @@ static void EADGTraceAssemble3DInt(const int NF, - * s_D[k1][k2][1][0]; - } - } -- if (add) -- { -- A_int(i1, i2, j1, j2, 0, f) += val_int0; -- A_int(i1, i2, j1, j2, 1, f) += val_int1; -- A_ext(i1, i2, j1, j2, 0, f) += val_ext01; -- A_ext(i1, i2, j1, j2, 1, f) += val_ext10; -- } -- else -- { -- A_int(i1, i2, j1, j2, 0, f) = val_int0; -- A_int(i1, i2, j1, j2, 1, f) = val_int1; -- A_ext(i1, i2, j1, j2, 0, f) = val_ext01; -- A_ext(i1, i2, j1, j2, 1, f) = val_ext10; -- } -+ A_int(i1, i2, j1, j2, 0, f) += val_int0; -+ A_int(i1, i2, j1, j2, 1, f) += val_int1; -+ A_ext(i1, i2, j1, j2, 0, f) += val_ext01; -+ A_ext(i1, i2, j1, j2, 1, f) += val_ext10; - } - } - } -@@ -272,7 +223,6 @@ static void EADGTraceAssemble3DBdr(const int NF, - const Array &basis, - const Vector &padata, - Vector &eadata_bdr, -- const bool add, - const int d1d = 0, - const int q1d = 0) - { -@@ -330,14 +280,7 @@ static void EADGTraceAssemble3DBdr(const int NF, - * s_D[k1][k2][0][0]; - } - } -- if (add) -- { -- A_bdr(i1, i2, j1, j2, f) += val_bdr; -- } -- else -- { -- A_bdr(i1, i2, j1, j2, f) = val_bdr; -- } -+ A_bdr(i1, i2, j1, j2, f) += val_bdr; - } - } - } -@@ -347,8 +290,7 @@ static void EADGTraceAssemble3DBdr(const int NF, - - void DGTraceIntegrator::AssembleEAInteriorFaces(const FiniteElementSpace& fes, - Vector &ea_data_int, -- Vector &ea_data_ext, -- const bool add) -+ Vector &ea_data_ext) - { - SetupPA(fes, FaceType::Interior); - nf = fes.GetNFbyType(FaceType::Interior); -@@ -356,7 +298,7 @@ void DGTraceIntegrator::AssembleEAInteriorFaces(const FiniteElementSpace& fes, - const Array &B = maps->B; - if (dim == 1) - { -- return EADGTraceAssemble1DInt(nf,B,pa_data,ea_data_int,ea_data_ext,add); -+ return EADGTraceAssemble1DInt(nf,B,pa_data,ea_data_int,ea_data_ext); - } - else if (dim == 2) - { -@@ -364,31 +306,31 @@ void DGTraceIntegrator::AssembleEAInteriorFaces(const FiniteElementSpace& fes, - { - case 0x22: - return EADGTraceAssemble2DInt<2,2>(nf,B,pa_data,ea_data_int, -- ea_data_ext,add); -+ ea_data_ext); - case 0x33: - return EADGTraceAssemble2DInt<3,3>(nf,B,pa_data,ea_data_int, -- ea_data_ext,add); -+ ea_data_ext); - case 0x44: - return EADGTraceAssemble2DInt<4,4>(nf,B,pa_data,ea_data_int, -- ea_data_ext,add); -+ ea_data_ext); - case 0x55: - return EADGTraceAssemble2DInt<5,5>(nf,B,pa_data,ea_data_int, -- ea_data_ext,add); -+ ea_data_ext); - case 0x66: - return EADGTraceAssemble2DInt<6,6>(nf,B,pa_data,ea_data_int, -- ea_data_ext,add); -+ ea_data_ext); - case 0x77: - return EADGTraceAssemble2DInt<7,7>(nf,B,pa_data,ea_data_int, -- ea_data_ext,add); -+ ea_data_ext); - case 0x88: - return EADGTraceAssemble2DInt<8,8>(nf,B,pa_data,ea_data_int, -- ea_data_ext,add); -+ ea_data_ext); - case 0x99: - return EADGTraceAssemble2DInt<9,9>(nf,B,pa_data,ea_data_int, -- ea_data_ext,add); -+ ea_data_ext); - default: - return EADGTraceAssemble2DInt(nf,B,pa_data,ea_data_int, -- ea_data_ext,add,dofs1D,quad1D); -+ ea_data_ext,dofs1D,quad1D); - } - } - else if (dim == 3) -@@ -397,36 +339,35 @@ void DGTraceIntegrator::AssembleEAInteriorFaces(const FiniteElementSpace& fes, - { - case 0x23: - return EADGTraceAssemble3DInt<2,3>(nf,B,pa_data,ea_data_int, -- ea_data_ext,add); -+ ea_data_ext); - case 0x34: - return EADGTraceAssemble3DInt<3,4>(nf,B,pa_data,ea_data_int, -- ea_data_ext,add); -+ ea_data_ext); - case 0x45: - return EADGTraceAssemble3DInt<4,5>(nf,B,pa_data,ea_data_int, -- ea_data_ext,add); -+ ea_data_ext); - case 0x56: - return EADGTraceAssemble3DInt<5,6>(nf,B,pa_data,ea_data_int, -- ea_data_ext,add); -+ ea_data_ext); - case 0x67: - return EADGTraceAssemble3DInt<6,7>(nf,B,pa_data,ea_data_int, -- ea_data_ext,add); -+ ea_data_ext); - case 0x78: - return EADGTraceAssemble3DInt<7,8>(nf,B,pa_data,ea_data_int, -- ea_data_ext,add); -+ ea_data_ext); - case 0x89: - return EADGTraceAssemble3DInt<8,9>(nf,B,pa_data,ea_data_int, -- ea_data_ext,add); -+ ea_data_ext); - default: - return EADGTraceAssemble3DInt(nf,B,pa_data,ea_data_int, -- ea_data_ext,add,dofs1D,quad1D); -+ ea_data_ext,dofs1D,quad1D); - } - } - MFEM_ABORT("Unknown kernel."); - } - - void DGTraceIntegrator::AssembleEABoundaryFaces(const FiniteElementSpace& fes, -- Vector &ea_data_bdr, -- const bool add) -+ Vector &ea_data_bdr) - { - SetupPA(fes, FaceType::Boundary); - nf = fes.GetNFbyType(FaceType::Boundary); -@@ -434,37 +375,37 @@ void DGTraceIntegrator::AssembleEABoundaryFaces(const FiniteElementSpace& fes, - const Array &B = maps->B; - if (dim == 1) - { -- return EADGTraceAssemble1DBdr(nf,B,pa_data,ea_data_bdr,add); -+ return EADGTraceAssemble1DBdr(nf,B,pa_data,ea_data_bdr); - } - else if (dim == 2) - { - switch ((dofs1D << 4 ) | quad1D) - { -- case 0x22: return EADGTraceAssemble2DBdr<2,2>(nf,B,pa_data,ea_data_bdr,add); -- case 0x33: return EADGTraceAssemble2DBdr<3,3>(nf,B,pa_data,ea_data_bdr,add); -- case 0x44: return EADGTraceAssemble2DBdr<4,4>(nf,B,pa_data,ea_data_bdr,add); -- case 0x55: return EADGTraceAssemble2DBdr<5,5>(nf,B,pa_data,ea_data_bdr,add); -- case 0x66: return EADGTraceAssemble2DBdr<6,6>(nf,B,pa_data,ea_data_bdr,add); -- case 0x77: return EADGTraceAssemble2DBdr<7,7>(nf,B,pa_data,ea_data_bdr,add); -- case 0x88: return EADGTraceAssemble2DBdr<8,8>(nf,B,pa_data,ea_data_bdr,add); -- case 0x99: return EADGTraceAssemble2DBdr<9,9>(nf,B,pa_data,ea_data_bdr,add); -+ case 0x22: return EADGTraceAssemble2DBdr<2,2>(nf,B,pa_data,ea_data_bdr); -+ case 0x33: return EADGTraceAssemble2DBdr<3,3>(nf,B,pa_data,ea_data_bdr); -+ case 0x44: return EADGTraceAssemble2DBdr<4,4>(nf,B,pa_data,ea_data_bdr); -+ case 0x55: return EADGTraceAssemble2DBdr<5,5>(nf,B,pa_data,ea_data_bdr); -+ case 0x66: return EADGTraceAssemble2DBdr<6,6>(nf,B,pa_data,ea_data_bdr); -+ case 0x77: return EADGTraceAssemble2DBdr<7,7>(nf,B,pa_data,ea_data_bdr); -+ case 0x88: return EADGTraceAssemble2DBdr<8,8>(nf,B,pa_data,ea_data_bdr); -+ case 0x99: return EADGTraceAssemble2DBdr<9,9>(nf,B,pa_data,ea_data_bdr); - default: -- return EADGTraceAssemble2DBdr(nf,B,pa_data,ea_data_bdr,add,dofs1D,quad1D); -+ return EADGTraceAssemble2DBdr(nf,B,pa_data,ea_data_bdr,dofs1D,quad1D); - } - } - else if (dim == 3) - { - switch ((dofs1D << 4 ) | quad1D) - { -- case 0x23: return EADGTraceAssemble3DBdr<2,3>(nf,B,pa_data,ea_data_bdr,add); -- case 0x34: return EADGTraceAssemble3DBdr<3,4>(nf,B,pa_data,ea_data_bdr,add); -- case 0x45: return EADGTraceAssemble3DBdr<4,5>(nf,B,pa_data,ea_data_bdr,add); -- case 0x56: return EADGTraceAssemble3DBdr<5,6>(nf,B,pa_data,ea_data_bdr,add); -- case 0x67: return EADGTraceAssemble3DBdr<6,7>(nf,B,pa_data,ea_data_bdr,add); -- case 0x78: return EADGTraceAssemble3DBdr<7,8>(nf,B,pa_data,ea_data_bdr,add); -- case 0x89: return EADGTraceAssemble3DBdr<8,9>(nf,B,pa_data,ea_data_bdr,add); -+ case 0x23: return EADGTraceAssemble3DBdr<2,3>(nf,B,pa_data,ea_data_bdr); -+ case 0x34: return EADGTraceAssemble3DBdr<3,4>(nf,B,pa_data,ea_data_bdr); -+ case 0x45: return EADGTraceAssemble3DBdr<4,5>(nf,B,pa_data,ea_data_bdr); -+ case 0x56: return EADGTraceAssemble3DBdr<5,6>(nf,B,pa_data,ea_data_bdr); -+ case 0x67: return EADGTraceAssemble3DBdr<6,7>(nf,B,pa_data,ea_data_bdr); -+ case 0x78: return EADGTraceAssemble3DBdr<7,8>(nf,B,pa_data,ea_data_bdr); -+ case 0x89: return EADGTraceAssemble3DBdr<8,9>(nf,B,pa_data,ea_data_bdr); - default: -- return EADGTraceAssemble3DBdr(nf,B,pa_data,ea_data_bdr,add,dofs1D,quad1D); -+ return EADGTraceAssemble3DBdr(nf,B,pa_data,ea_data_bdr,dofs1D,quad1D); - } - } - MFEM_ABORT("Unknown kernel."); -diff --git a/fem/bilininteg_dgtrace_pa.cpp b/fem/integ/bilininteg_dgtrace_pa.cpp -similarity index 90% -rename from fem/bilininteg_dgtrace_pa.cpp -rename to fem/integ/bilininteg_dgtrace_pa.cpp -index 6987d3455..f4b8d837c 100644 ---- a/fem/bilininteg_dgtrace_pa.cpp -+++ b/fem/integ/bilininteg_dgtrace_pa.cpp -@@ -9,16 +9,15 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "../general/forall.hpp" --#include "bilininteg.hpp" --#include "gridfunc.hpp" --#include "qfunction.hpp" --#include "restriction.hpp" -- --using namespace std; -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../qfunction.hpp" -+#include "../restriction.hpp" - - namespace mfem - { -+ - // PA DG Trace Integrator - static void PADGTraceSetup2D(const int Q1D, - const int NF, -@@ -111,30 +110,6 @@ static void PADGTraceSetup3D(const int Q1D, - }); - } - --static void PADGTraceSetup(const int dim, -- const int D1D, -- const int Q1D, -- const int NF, -- const Array &W, -- const Vector &det, -- const Vector &nor, -- const Vector &rho, -- const Vector &u, -- const double alpha, -- const double beta, -- Vector &op) --{ -- if (dim == 1) { MFEM_ABORT("dim==1 not supported in PADGTraceSetup"); } -- if (dim == 2) -- { -- PADGTraceSetup2D(Q1D, NF, W, det, nor, rho, u, alpha, beta, op); -- } -- if (dim == 3) -- { -- PADGTraceSetup3D(Q1D, NF, W, det, nor, rho, u, alpha, beta, op); -- } --} -- - void DGTraceIntegrator::SetupPA(const FiniteElementSpace &fes, FaceType type) - { - const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ? -@@ -230,9 +205,21 @@ void DGTraceIntegrator::SetupPA(const FiniteElementSpace &fes, FaceType type) - } - MFEM_VERIFY(f_ind==nf, "Incorrect number of faces."); - } -- PADGTraceSetup(dim, dofs1D, quad1D, nf, ir->GetWeights(), -- geom->detJ, geom->normal, r, vel, -- alpha, beta, pa_data); -+ -+ if (dim == 1) -+ { -+ MFEM_ABORT("dim==1 not supported in DGTraceIntegrator::SetupPA"); -+ } -+ else if (dim == 2) -+ { -+ PADGTraceSetup2D(quad1D, nf, ir->GetWeights(), geom->detJ, geom->normal, -+ r, vel, alpha, beta, pa_data); -+ } -+ else if (dim == 3) -+ { -+ PADGTraceSetup3D(quad1D, nf, ir->GetWeights(), geom->detJ, geom->normal, -+ r, vel, alpha, beta, pa_data); -+ } - } - - void DGTraceIntegrator::AssemblePAInteriorFaces(const FiniteElementSpace& fes) -@@ -246,15 +233,15 @@ void DGTraceIntegrator::AssemblePABoundaryFaces(const FiniteElementSpace& fes) - } - - // PA DGTrace Apply 2D kernel for Gauss-Lobatto/Bernstein --template static --void PADGTraceApply2D(const int NF, -- const Array &b, -- const Array &bt, -- const Vector &op_, -- const Vector &x_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) -+template -+static void PADGTraceApply2D(const int NF, -+ const Array &b, -+ const Array &bt, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) - { - const int VDIM = 1; - const int D1D = T_D1D ? T_D1D : d1d; -@@ -337,15 +324,15 @@ void PADGTraceApply2D(const int NF, - } - - // PA DGTrace Apply 3D kernel for Gauss-Lobatto/Bernstein --template static --void PADGTraceApply3D(const int NF, -- const Array &b, -- const Array &bt, -- const Vector &op_, -- const Vector &x_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) -+template -+static void PADGTraceApply3D(const int NF, -+ const Array &b, -+ const Array &bt, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) - { - const int VDIM = 1; - const int D1D = T_D1D ? T_D1D : d1d; -@@ -482,15 +469,15 @@ void PADGTraceApply3D(const int NF, - } - - // Optimized PA DGTrace Apply 3D kernel for Gauss-Lobatto/Bernstein --template static --void SmemPADGTraceApply3D(const int NF, -- const Array &b, -- const Array &bt, -- const Vector &op_, -- const Vector &x_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) -+template -+static void SmemPADGTraceApply3D(const int NF, -+ const Array &b, -+ const Array &bt, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) - { - const int D1D = T_D1D ? T_D1D : d1d; - const int Q1D = T_Q1D ? T_Q1D : q1d; -@@ -647,15 +634,15 @@ static void PADGTraceApply(const int dim, - } - - // PA DGTrace Apply 2D kernel for Gauss-Lobatto/Bernstein --template static --void PADGTraceApplyTranspose2D(const int NF, -- const Array &b, -- const Array &bt, -- const Vector &op_, -- const Vector &x_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) -+template -+static void PADGTraceApplyTranspose2D(const int NF, -+ const Array &b, -+ const Array &bt, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) - { - const int VDIM = 1; - const int D1D = T_D1D ? T_D1D : d1d; -@@ -743,15 +730,15 @@ void PADGTraceApplyTranspose2D(const int NF, - } - - // PA DGTrace Apply Transpose 3D kernel for Gauss-Lobatto/Bernstein --template static --void PADGTraceApplyTranspose3D(const int NF, -- const Array &b, -- const Array &bt, -- const Vector &op_, -- const Vector &x_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) -+template -+static void PADGTraceApplyTranspose3D(const int NF, -+ const Array &b, -+ const Array &bt, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) - { - const int VDIM = 1; - const int D1D = T_D1D ? T_D1D : d1d; -@@ -899,15 +886,15 @@ void PADGTraceApplyTranspose3D(const int NF, - } - - // Optimized PA DGTrace Apply Transpose 3D kernel for Gauss-Lobatto/Bernstein --template static --void SmemPADGTraceApplyTranspose3D(const int NF, -- const Array &b, -- const Array &bt, -- const Vector &op_, -- const Vector &x_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) -+template -+static void SmemPADGTraceApplyTranspose3D(const int NF, -+ const Array &b, -+ const Array &bt, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) - { - const int D1D = T_D1D ? T_D1D : d1d; - const int Q1D = T_Q1D ? T_Q1D : q1d; -@@ -1076,7 +1063,6 @@ static void PADGTraceApplyTranspose(const int dim, - MFEM_ABORT("Unknown kernel."); - } - --// PA DGTraceIntegrator Apply kernel - void DGTraceIntegrator::AddMultPA(const Vector &x, Vector &y) const - { - PADGTraceApply(dim, dofs1D, quad1D, nf, -diff --git a/fem/bilininteg_diffusion_ea.cpp b/fem/integ/bilininteg_diffusion_ea.cpp -similarity index 87% -rename from fem/bilininteg_diffusion_ea.cpp -rename to fem/integ/bilininteg_diffusion_ea.cpp -index c6b43053c..aa36233c4 100644 ---- a/fem/bilininteg_diffusion_ea.cpp -+++ b/fem/integ/bilininteg_diffusion_ea.cpp -@@ -9,9 +9,9 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "../general/forall.hpp" --#include "bilininteg.hpp" --#include "gridfunc.hpp" -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" - - namespace mfem - { -@@ -22,7 +22,6 @@ static void EADiffusionAssemble1D(const int NE, - const Array &g, - const Vector &padata, - Vector &eadata, -- const bool add, - const int d1d = 0, - const int q1d = 0) - { -@@ -54,14 +53,7 @@ static void EADiffusionAssemble1D(const int NE, - { - val += r_Gj[k1] * D(k1, e) * r_Gi[k1]; - } -- if (add) -- { -- A(i1, j1, e) += val; -- } -- else -- { -- A(i1, j1, e) = val; -- } -+ A(i1, j1, e) += val; - } - } - }); -@@ -73,7 +65,6 @@ static void EADiffusionAssemble2D(const int NE, - const Array &g, - const Vector &padata, - Vector &eadata, -- const bool add, - const int d1d = 0, - const int q1d = 0) - { -@@ -129,14 +120,7 @@ static void EADiffusionAssemble2D(const int NE, - + gbi * D11 * gbj; - } - } -- if (add) -- { -- A(i1, i2, j1, j2, e) += val; -- } -- else -- { -- A(i1, i2, j1, j2, e) = val; -- } -+ A(i1, i2, j1, j2, e) += val; - } - } - } -@@ -150,7 +134,6 @@ static void EADiffusionAssemble3D(const int NE, - const Array &g, - const Vector &padata, - Vector &eadata, -- const bool add, - const int d1d = 0, - const int q1d = 0) - { -@@ -225,14 +208,7 @@ static void EADiffusionAssemble3D(const int NE, - } - } - } -- if (add) -- { -- A(i1, i2, i3, j1, j2, j3, e) += val; -- } -- else -- { -- A(i1, i2, i3, j1, j2, j3, e) = val; -- } -+ A(i1, i2, i3, j1, j2, j3, e) += val; - } - } - } -@@ -243,8 +219,7 @@ static void EADiffusionAssemble3D(const int NE, - } - - void DiffusionIntegrator::AssembleEA(const FiniteElementSpace &fes, -- Vector &ea_data, -- const bool add) -+ Vector &ea_data) - { - AssemblePA(fes); - ne = fes.GetMesh()->GetNE(); -@@ -254,15 +229,15 @@ void DiffusionIntegrator::AssembleEA(const FiniteElementSpace &fes, - { - switch ((dofs1D << 4 ) | quad1D) - { -- case 0x22: return EADiffusionAssemble1D<2,2>(ne,B,G,pa_data,ea_data,add); -- case 0x33: return EADiffusionAssemble1D<3,3>(ne,B,G,pa_data,ea_data,add); -- case 0x44: return EADiffusionAssemble1D<4,4>(ne,B,G,pa_data,ea_data,add); -- case 0x55: return EADiffusionAssemble1D<5,5>(ne,B,G,pa_data,ea_data,add); -- case 0x66: return EADiffusionAssemble1D<6,6>(ne,B,G,pa_data,ea_data,add); -- case 0x77: return EADiffusionAssemble1D<7,7>(ne,B,G,pa_data,ea_data,add); -- case 0x88: return EADiffusionAssemble1D<8,8>(ne,B,G,pa_data,ea_data,add); -- case 0x99: return EADiffusionAssemble1D<9,9>(ne,B,G,pa_data,ea_data,add); -- default: return EADiffusionAssemble1D(ne,B,G,pa_data,ea_data,add, -+ case 0x22: return EADiffusionAssemble1D<2,2>(ne,B,G,pa_data,ea_data); -+ case 0x33: return EADiffusionAssemble1D<3,3>(ne,B,G,pa_data,ea_data); -+ case 0x44: return EADiffusionAssemble1D<4,4>(ne,B,G,pa_data,ea_data); -+ case 0x55: return EADiffusionAssemble1D<5,5>(ne,B,G,pa_data,ea_data); -+ case 0x66: return EADiffusionAssemble1D<6,6>(ne,B,G,pa_data,ea_data); -+ case 0x77: return EADiffusionAssemble1D<7,7>(ne,B,G,pa_data,ea_data); -+ case 0x88: return EADiffusionAssemble1D<8,8>(ne,B,G,pa_data,ea_data); -+ case 0x99: return EADiffusionAssemble1D<9,9>(ne,B,G,pa_data,ea_data); -+ default: return EADiffusionAssemble1D(ne,B,G,pa_data,ea_data, - dofs1D,quad1D); - } - } -@@ -270,15 +245,15 @@ void DiffusionIntegrator::AssembleEA(const FiniteElementSpace &fes, - { - switch ((dofs1D << 4 ) | quad1D) - { -- case 0x22: return EADiffusionAssemble2D<2,2>(ne,B,G,pa_data,ea_data,add); -- case 0x33: return EADiffusionAssemble2D<3,3>(ne,B,G,pa_data,ea_data,add); -- case 0x44: return EADiffusionAssemble2D<4,4>(ne,B,G,pa_data,ea_data,add); -- case 0x55: return EADiffusionAssemble2D<5,5>(ne,B,G,pa_data,ea_data,add); -- case 0x66: return EADiffusionAssemble2D<6,6>(ne,B,G,pa_data,ea_data,add); -- case 0x77: return EADiffusionAssemble2D<7,7>(ne,B,G,pa_data,ea_data,add); -- case 0x88: return EADiffusionAssemble2D<8,8>(ne,B,G,pa_data,ea_data,add); -- case 0x99: return EADiffusionAssemble2D<9,9>(ne,B,G,pa_data,ea_data,add); -- default: return EADiffusionAssemble2D(ne,B,G,pa_data,ea_data,add, -+ case 0x22: return EADiffusionAssemble2D<2,2>(ne,B,G,pa_data,ea_data); -+ case 0x33: return EADiffusionAssemble2D<3,3>(ne,B,G,pa_data,ea_data); -+ case 0x44: return EADiffusionAssemble2D<4,4>(ne,B,G,pa_data,ea_data); -+ case 0x55: return EADiffusionAssemble2D<5,5>(ne,B,G,pa_data,ea_data); -+ case 0x66: return EADiffusionAssemble2D<6,6>(ne,B,G,pa_data,ea_data); -+ case 0x77: return EADiffusionAssemble2D<7,7>(ne,B,G,pa_data,ea_data); -+ case 0x88: return EADiffusionAssemble2D<8,8>(ne,B,G,pa_data,ea_data); -+ case 0x99: return EADiffusionAssemble2D<9,9>(ne,B,G,pa_data,ea_data); -+ default: return EADiffusionAssemble2D(ne,B,G,pa_data,ea_data, - dofs1D,quad1D); - } - } -@@ -286,14 +261,14 @@ void DiffusionIntegrator::AssembleEA(const FiniteElementSpace &fes, - { - switch ((dofs1D << 4 ) | quad1D) - { -- case 0x23: return EADiffusionAssemble3D<2,3>(ne,B,G,pa_data,ea_data,add); -- case 0x34: return EADiffusionAssemble3D<3,4>(ne,B,G,pa_data,ea_data,add); -- case 0x45: return EADiffusionAssemble3D<4,5>(ne,B,G,pa_data,ea_data,add); -- case 0x56: return EADiffusionAssemble3D<5,6>(ne,B,G,pa_data,ea_data,add); -- case 0x67: return EADiffusionAssemble3D<6,7>(ne,B,G,pa_data,ea_data,add); -- case 0x78: return EADiffusionAssemble3D<7,8>(ne,B,G,pa_data,ea_data,add); -- case 0x89: return EADiffusionAssemble3D<8,9>(ne,B,G,pa_data,ea_data,add); -- default: return EADiffusionAssemble3D(ne,B,G,pa_data,ea_data,add, -+ case 0x23: return EADiffusionAssemble3D<2,3>(ne,B,G,pa_data,ea_data); -+ case 0x34: return EADiffusionAssemble3D<3,4>(ne,B,G,pa_data,ea_data); -+ case 0x45: return EADiffusionAssemble3D<4,5>(ne,B,G,pa_data,ea_data); -+ case 0x56: return EADiffusionAssemble3D<5,6>(ne,B,G,pa_data,ea_data); -+ case 0x67: return EADiffusionAssemble3D<6,7>(ne,B,G,pa_data,ea_data); -+ case 0x78: return EADiffusionAssemble3D<7,8>(ne,B,G,pa_data,ea_data); -+ case 0x89: return EADiffusionAssemble3D<8,9>(ne,B,G,pa_data,ea_data); -+ default: return EADiffusionAssemble3D(ne,B,G,pa_data,ea_data, - dofs1D,quad1D); - } - } -diff --git a/fem/bilininteg_diffusion_pa.cpp b/fem/integ/bilininteg_diffusion_kernels.hpp -similarity index 85% -rename from fem/bilininteg_diffusion_pa.cpp -rename to fem/integ/bilininteg_diffusion_kernels.hpp -index 2d953952e..63bc52bd8 100644 ---- a/fem/bilininteg_diffusion_pa.cpp -+++ b/fem/integ/bilininteg_diffusion_kernels.hpp -@@ -9,28 +9,29 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "../general/forall.hpp" --#include "bilininteg.hpp" --#include "gridfunc.hpp" --#include "qfunction.hpp" --#include "ceed/integrators/diffusion/diffusion.hpp" -+#ifndef MFEM_BILININTEG_DIFFUSION_KERNELS_HPP -+#define MFEM_BILININTEG_DIFFUSION_KERNELS_HPP - --using namespace std; -+#include "../../config/config.hpp" -+#include "../../general/forall.hpp" -+#include "../../linalg/dtensor.hpp" - - namespace mfem - { - --// PA Diffusion Integrator -+namespace internal -+{ - - // OCCA 2D Assemble kernel - #ifdef MFEM_USE_OCCA --static void OccaPADiffusionSetup2D(const int D1D, -- const int Q1D, -- const int NE, -- const Array &W, -- const Vector &J, -- const Vector &C, -- Vector &op) -+MFEM_HOST_DEVICE inline -+void OccaPADiffusionSetup2D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const Array &W, -+ const Vector &J, -+ const Vector &C, -+ Vector &op) - { - occa::properties props; - props["defines/D1D"] = D1D; -@@ -52,13 +53,14 @@ static void OccaPADiffusionSetup2D(const int D1D, - OccaDiffSetup2D_ker.at(id)(NE, o_W, o_J, o_C, o_op, const_c); - } - --static void OccaPADiffusionSetup3D(const int D1D, -- const int Q1D, -- const int NE, -- const Array &W, -- const Vector &J, -- const Vector &C, -- Vector &op) -+MFEM_HOST_DEVICE inline -+void OccaPADiffusionSetup3D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const Array &W, -+ const Vector &J, -+ const Vector &C, -+ Vector &op) - { - occa::properties props; - props["defines/D1D"] = D1D; -@@ -81,7 +83,19 @@ static void OccaPADiffusionSetup3D(const int D1D, - } - #endif // MFEM_USE_OCCA - -+// PA Diffusion Assemble 2D kernel -+template -+MFEM_HOST_DEVICE inline -+void PADiffusionSetup2D(const int Q1D, -+ const int coeffDim, -+ const int NE, -+ const Array &w, -+ const Vector &j, -+ const Vector &c, -+ Vector &d); -+ - template<> -+MFEM_HOST_DEVICE inline - void PADiffusionSetup2D<2>(const int Q1D, - const int coeffDim, - const int NE, -@@ -149,6 +163,7 @@ void PADiffusionSetup2D<2>(const int Q1D, - - // PA Diffusion Assemble 2D kernel with 3D node coords - template<> -+MFEM_HOST_DEVICE inline - void PADiffusionSetup2D<3>(const int Q1D, - const int coeffDim, - const int NE, -@@ -194,6 +209,7 @@ void PADiffusionSetup2D<3>(const int Q1D, - } - - // PA Diffusion Assemble 3D kernel -+MFEM_HOST_DEVICE inline - void PADiffusionSetup3D(const int Q1D, - const int coeffDim, - const int NE, -@@ -314,16 +330,17 @@ void PADiffusionSetup3D(const int Q1D, - }); - } - --static void PADiffusionSetup(const int dim, -- const int sdim, -- const int D1D, -- const int Q1D, -- const int coeffDim, -- const int NE, -- const Array &W, -- const Vector &J, -- const Vector &C, -- Vector &D) -+MFEM_HOST_DEVICE inline -+void PADiffusionSetup(const int dim, -+ const int sdim, -+ const int D1D, -+ const int Q1D, -+ const int coeffDim, -+ const int NE, -+ const Array &W, -+ const Vector &J, -+ const Vector &C, -+ Vector &D) - { - if (dim == 1) { MFEM_ABORT("dim==1 not supported in PADiffusionSetup"); } - if (dim == 2) -@@ -353,71 +370,16 @@ static void PADiffusionSetup(const int dim, - } - } - --void DiffusionIntegrator::AssemblePA(const FiniteElementSpace &fes) --{ -- const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ? -- Device::GetDeviceMemoryType() : pa_mt; -- // Assuming the same element type -- fespace = &fes; -- Mesh *mesh = fes.GetMesh(); -- if (mesh->GetNE() == 0) { return; } -- const FiniteElement &el = *fes.GetFE(0); -- const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el); -- if (DeviceCanUseCeed()) -- { -- delete ceedOp; -- MFEM_VERIFY(!VQ && !MQ, -- "Only scalar coefficient supported for DiffusionIntegrator" -- " with libCEED"); -- const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 || -- fes.IsVariableOrder(); -- if (mixed) -- { -- ceedOp = new ceed::MixedPADiffusionIntegrator(*this, fes, Q); -- } -- else -- { -- ceedOp = new ceed::PADiffusionIntegrator(fes, *ir, Q); -- } -- return; -- } -- const int dims = el.GetDim(); -- const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6 -- const int nq = ir->GetNPoints(); -- dim = mesh->Dimension(); -- ne = fes.GetNE(); -- geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS, mt); -- const int sdim = mesh->SpaceDimension(); -- maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR); -- dofs1D = maps->ndof; -- quad1D = maps->nqpt; -- -- QuadratureSpace qs(*mesh, *ir); -- CoefficientVector coeff(qs, CoefficientStorage::COMPRESSED); -- -- if (MQ) { coeff.ProjectTranspose(*MQ); } -- else if (VQ) { coeff.Project(*VQ); } -- else if (Q) { coeff.Project(*Q); } -- else { coeff.SetConstant(1.0); } -- -- const int coeff_dim = coeff.GetVDim(); -- symmetric = (coeff_dim != dims*dims); -- const int pa_size = symmetric ? symmDims : dims*dims; -- -- pa_data.SetSize(pa_size * nq * ne, mt); -- PADiffusionSetup(dim, sdim, dofs1D, quad1D, coeff_dim, ne, ir->GetWeights(), -- geom->J, coeff, pa_data); --} -- - template --static void PADiffusionDiagonal2D(const int NE, -- const bool symmetric, -- const Array &b, -- const Array &g, -- const Vector &d, -- Vector &y, -- const int d1d = 0, -- const int q1d = 0) -+MFEM_HOST_DEVICE inline -+void PADiffusionDiagonal2D(const int NE, -+ const bool symmetric, -+ const Array &b, -+ const Array &g, -+ const Vector &d, -+ Vector &y, -+ const int d1d = 0, -+ const int q1d = 0) - { - const int D1D = T_D1D ? T_D1D : d1d; - const int Q1D = T_Q1D ? T_Q1D : q1d; -@@ -476,14 +438,15 @@ static void PADiffusionDiagonal2D(const int NE, - - // Shared memory PA Diffusion Diagonal 2D kernel - template --static void SmemPADiffusionDiagonal2D(const int NE, -- const bool symmetric, -- const Array &b_, -- const Array &g_, -- const Vector &d_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) -+MFEM_HOST_DEVICE inline -+void SmemPADiffusionDiagonal2D(const int NE, -+ const bool symmetric, -+ const Array &b_, -+ const Array &g_, -+ const Vector &d_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) - { - const int D1D = T_D1D ? T_D1D : d1d; - const int Q1D = T_Q1D ? T_Q1D : q1d; -@@ -570,14 +533,15 @@ static void SmemPADiffusionDiagonal2D(const int NE, - } - - template --static void PADiffusionDiagonal3D(const int NE, -- const bool symmetric, -- const Array &b, -- const Array &g, -- const Vector &d, -- Vector &y, -- const int d1d = 0, -- const int q1d = 0) -+MFEM_HOST_DEVICE inline -+void PADiffusionDiagonal3D(const int NE, -+ const bool symmetric, -+ const Array &b, -+ const Array &g, -+ const Vector &d, -+ Vector &y, -+ const int d1d = 0, -+ const int q1d = 0) - { - constexpr int DIM = 3; - const int D1D = T_D1D ? T_D1D : d1d; -@@ -671,14 +635,15 @@ static void PADiffusionDiagonal3D(const int NE, - - // Shared memory PA Diffusion Diagonal 3D kernel - template --static void SmemPADiffusionDiagonal3D(const int NE, -- const bool symmetric, -- const Array &b_, -- const Array &g_, -- const Vector &d_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) -+MFEM_HOST_DEVICE inline -+void SmemPADiffusionDiagonal3D(const int NE, -+ const bool symmetric, -+ const Array &b_, -+ const Array &g_, -+ const Vector &d_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) - { - constexpr int DIM = 3; - const int D1D = T_D1D ? T_D1D : d1d; -@@ -788,15 +753,16 @@ static void SmemPADiffusionDiagonal3D(const int NE, - }); - } - --static void PADiffusionAssembleDiagonal(const int dim, -- const int D1D, -- const int Q1D, -- const int NE, -- const bool symm, -- const Array &B, -- const Array &G, -- const Vector &D, -- Vector &Y) -+MFEM_HOST_DEVICE inline -+void PADiffusionAssembleDiagonal(const int dim, -+ const int D1D, -+ const int Q1D, -+ const int NE, -+ const bool symm, -+ const Array &B, -+ const Array &G, -+ const Vector &D, -+ Vector &Y) - { - if (dim == 2) - { -@@ -833,33 +799,19 @@ static void PADiffusionAssembleDiagonal(const int dim, - MFEM_ABORT("Unknown kernel."); - } - --void DiffusionIntegrator::AssembleDiagonalPA(Vector &diag) --{ -- if (DeviceCanUseCeed()) -- { -- ceedOp->GetDiagonal(diag); -- } -- else -- { -- if (pa_data.Size()==0) { AssemblePA(*fespace); } -- PADiffusionAssembleDiagonal(dim, dofs1D, quad1D, ne, symmetric, -- maps->B, maps->G, pa_data, diag); -- } --} -- -- - #ifdef MFEM_USE_OCCA - // OCCA PA Diffusion Apply 2D kernel --static void OccaPADiffusionApply2D(const int D1D, -- const int Q1D, -- const int NE, -- const Array &B, -- const Array &G, -- const Array &Bt, -- const Array &Gt, -- const Vector &D, -- const Vector &X, -- Vector &Y) -+MFEM_HOST_DEVICE inline -+void OccaPADiffusionApply2D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const Array &B, -+ const Array &G, -+ const Array &Bt, -+ const Array &Gt, -+ const Vector &D, -+ const Vector &X, -+ Vector &Y) - { - occa::properties props; - props["defines/D1D"] = D1D; -@@ -899,16 +851,17 @@ static void OccaPADiffusionApply2D(const int D1D, - } - - // OCCA PA Diffusion Apply 3D kernel --static void OccaPADiffusionApply3D(const int D1D, -- const int Q1D, -- const int NE, -- const Array &B, -- const Array &G, -- const Array &Bt, -- const Array &Gt, -- const Vector &D, -- const Vector &X, -- Vector &Y) -+MFEM_HOST_DEVICE inline -+void OccaPADiffusionApply3D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const Array &B, -+ const Array &G, -+ const Array &Bt, -+ const Array &Gt, -+ const Vector &D, -+ const Vector &X, -+ Vector &Y) - { - occa::properties props; - props["defines/D1D"] = D1D; -@@ -950,17 +903,18 @@ static void OccaPADiffusionApply3D(const int D1D, - - // PA Diffusion Apply 2D kernel - template --static void PADiffusionApply2D(const int NE, -- const bool symmetric, -- const Array &b_, -- const Array &g_, -- const Array &bt_, -- const Array >_, -- const Vector &d_, -- const Vector &x_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) -+MFEM_HOST_DEVICE inline -+void PADiffusionApply2D(const int NE, -+ const bool symmetric, -+ const Array &b_, -+ const Array &g_, -+ const Array &bt_, -+ const Array >_, -+ const Vector &d_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) - { - const int D1D = T_D1D ? T_D1D : d1d; - const int Q1D = T_Q1D ? T_Q1D : q1d; -@@ -1072,15 +1026,16 @@ static void PADiffusionApply2D(const int NE, - - // Shared memory PA Diffusion Apply 2D kernel - template --static void SmemPADiffusionApply2D(const int NE, -- const bool symmetric, -- const Array &b_, -- const Array &g_, -- const Vector &d_, -- const Vector &x_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) -+MFEM_HOST_DEVICE inline -+void SmemPADiffusionApply2D(const int NE, -+ const bool symmetric, -+ const Array &b_, -+ const Array &g_, -+ const Vector &d_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) - { - const int D1D = T_D1D ? T_D1D : d1d; - const int Q1D = T_Q1D ? T_Q1D : q1d; -@@ -1230,16 +1185,17 @@ static void SmemPADiffusionApply2D(const int NE, - - // PA Diffusion Apply 3D kernel - template --static void PADiffusionApply3D(const int NE, -- const bool symmetric, -- const Array &b, -- const Array &g, -- const Array &bt, -- const Array >, -- const Vector &d_, -- const Vector &x_, -- Vector &y_, -- int d1d = 0, int q1d = 0) -+MFEM_HOST_DEVICE inline -+void PADiffusionApply3D(const int NE, -+ const bool symmetric, -+ const Array &b, -+ const Array &g, -+ const Array &bt, -+ const Array >, -+ const Vector &d_, -+ const Vector &x_, -+ Vector &y_, -+ int d1d = 0, int q1d = 0) - { - const int D1D = T_D1D ? T_D1D : d1d; - const int Q1D = T_Q1D ? T_Q1D : q1d; -@@ -1422,15 +1378,16 @@ static void PADiffusionApply3D(const int NE, - } - - template --static void SmemPADiffusionApply3D(const int NE, -- const bool symmetric, -- const Array &b_, -- const Array &g_, -- const Vector &d_, -- const Vector &x_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0) -+MFEM_HOST_DEVICE inline -+void SmemPADiffusionApply3D(const int NE, -+ const bool symmetric, -+ const Array &b_, -+ const Array &g_, -+ const Vector &d_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) - { - const int D1D = T_D1D ? T_D1D : d1d; - const int Q1D = T_Q1D ? T_Q1D : q1d; -@@ -1643,18 +1600,19 @@ static void SmemPADiffusionApply3D(const int NE, - }); - } - --static void PADiffusionApply(const int dim, -- const int D1D, -- const int Q1D, -- const int NE, -- const bool symm, -- const Array &B, -- const Array &G, -- const Array &Bt, -- const Array &Gt, -- const Vector &D, -- const Vector &X, -- Vector &Y) -+MFEM_HOST_DEVICE inline -+void PADiffusionApply(const int dim, -+ const int D1D, -+ const int Q1D, -+ const int NE, -+ const bool symm, -+ const Array &B, -+ const Array &G, -+ const Array &Bt, -+ const Array &Gt, -+ const Vector &D, -+ const Vector &X, -+ Vector &Y) - { - #ifdef MFEM_USE_OCCA - if (DeviceCanUseOcca()) -@@ -1710,32 +1668,8 @@ static void PADiffusionApply(const int dim, - MFEM_ABORT("Unknown kernel: 0x"<AddMult(x, y); -- } -- else -- { -- PADiffusionApply(dim, dofs1D, quad1D, ne, symmetric, -- maps->B, maps->G, maps->Bt, maps->Gt, -- pa_data, x, y); -- } --} -- --void DiffusionIntegrator::AddMultTransposePA(const Vector &x, Vector &y) const --{ -- if (symmetric) -- { -- AddMultPA(x, y); -- } -- else -- { -- MFEM_ABORT("DiffusionIntegrator::AddMultTransposePA only implemented in " -- "the symmetric case.") -- } --} -+} // namespace internal - - } // namespace mfem -+ -+#endif -diff --git a/fem/bilininteg_diffusion_mf.cpp b/fem/integ/bilininteg_diffusion_mf.cpp -similarity index 91% -rename from fem/bilininteg_diffusion_mf.cpp -rename to fem/integ/bilininteg_diffusion_mf.cpp -index c6bd5c728..0896b8bf9 100644 ---- a/fem/bilininteg_diffusion_mf.cpp -+++ b/fem/integ/bilininteg_diffusion_mf.cpp -@@ -9,12 +9,9 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "../general/forall.hpp" --#include "bilininteg.hpp" --#include "gridfunc.hpp" --#include "ceed/integrators/diffusion/diffusion.hpp" -- --using namespace std; -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../ceed/integrators/diffusion/diffusion.hpp" - - namespace mfem - { -@@ -22,7 +19,6 @@ namespace mfem - void DiffusionIntegrator::AssembleMF(const FiniteElementSpace &fes) - { - // Assuming the same element type -- fespace = &fes; - Mesh *mesh = fes.GetMesh(); - if (mesh->GetNE() == 0) { return; } - const FiniteElement &el = *fes.GetFE(0); -diff --git a/fem/integ/bilininteg_diffusion_pa.cpp b/fem/integ/bilininteg_diffusion_pa.cpp -new file mode 100644 -index 000000000..a966c8520 ---- /dev/null -+++ b/fem/integ/bilininteg_diffusion_pa.cpp -@@ -0,0 +1,124 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../qfunction.hpp" -+#include "../ceed/integrators/diffusion/diffusion.hpp" -+#include "bilininteg_diffusion_kernels.hpp" -+ -+namespace mfem -+{ -+ -+void DiffusionIntegrator::AssemblePA(const FiniteElementSpace &fes) -+{ -+ const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ? -+ Device::GetDeviceMemoryType() : pa_mt; -+ // Assuming the same element type -+ Mesh *mesh = fes.GetMesh(); -+ if (mesh->GetNE() == 0) { return; } -+ const FiniteElement &el = *fes.GetFE(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el); -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ MFEM_VERIFY(!VQ && !MQ, -+ "Only scalar coefficient supported for DiffusionIntegrator" -+ " with libCEED"); -+ const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 || -+ fes.IsVariableOrder(); -+ if (mixed) -+ { -+ ceedOp = new ceed::MixedPADiffusionIntegrator(*this, fes, Q); -+ } -+ else -+ { -+ ceedOp = new ceed::PADiffusionIntegrator(fes, *ir, Q); -+ } -+ return; -+ } -+ const int dims = el.GetDim(); -+ const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6 -+ const int nq = ir->GetNPoints(); -+ dim = mesh->Dimension(); -+ ne = fes.GetNE(); -+ geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS, mt); -+ const int sdim = mesh->SpaceDimension(); -+ maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR); -+ dofs1D = maps->ndof; -+ quad1D = maps->nqpt; -+ -+ QuadratureSpace qs(*mesh, *ir); -+ CoefficientVector coeff(qs, CoefficientStorage::COMPRESSED); -+ -+ if (MQ) { coeff.ProjectTranspose(*MQ); } -+ else if (VQ) { coeff.Project(*VQ); } -+ else if (Q) { coeff.Project(*Q); } -+ else { coeff.SetConstant(1.0); } -+ -+ const int coeff_dim = coeff.GetVDim(); -+ symmetric = (coeff_dim != dims*dims); -+ const int pa_size = symmetric ? symmDims : dims*dims; -+ -+ pa_data.SetSize(pa_size * nq * ne, mt); -+ internal::PADiffusionSetup(dim, sdim, dofs1D, quad1D, coeff_dim, ne, -+ ir->GetWeights(), geom->J, coeff, pa_data); -+} -+ -+void DiffusionIntegrator::AssembleDiagonalPA(Vector &diag) -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->GetDiagonal(diag); -+ } -+ else -+ { -+ internal::PADiffusionAssembleDiagonal(dim, dofs1D, quad1D, ne, symmetric, -+ maps->B, maps->G, pa_data, diag); -+ } -+} -+ -+void DiffusionIntegrator::AddMultPA(const Vector &x, Vector &y) const -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->AddMult(x, y); -+ } -+ else -+ { -+ internal::PADiffusionApply(dim, dofs1D, quad1D, ne, symmetric, -+ maps->B, maps->G, maps->Bt, maps->Gt, -+ pa_data, x, y); -+ } -+} -+ -+void DiffusionIntegrator::AddMultTransposePA(const Vector &x, Vector &y) const -+{ -+ if (DeviceCanUseCeed()) -+ { -+ MFEM_ABORT("AddMultTransposePA not yet implemented with libCEED for" -+ " DiffusionIntegrator."); -+ } -+ else -+ { -+ if (symmetric) -+ { -+ AddMultPA(x, y); -+ } -+ else -+ { -+ MFEM_ABORT("DiffusionIntegrator::AddMultTransposePA only implemented in " -+ "the symmetric case.") -+ } -+ } -+} -+ -+} // namespace mfem -diff --git a/fem/integ/bilininteg_divdiv_pa.cpp b/fem/integ/bilininteg_divdiv_pa.cpp -new file mode 100644 -index 000000000..8abf233a7 ---- /dev/null -+++ b/fem/integ/bilininteg_divdiv_pa.cpp -@@ -0,0 +1,99 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../qfunction.hpp" -+#include "bilininteg_hdiv_kernels.hpp" -+ -+namespace mfem -+{ -+ -+void DivDivIntegrator::AssemblePA(const FiniteElementSpace &fes) -+{ -+ // Assumes tensor-product elements -+ Mesh *mesh = fes.GetMesh(); -+ const FiniteElement *fel = fes.GetFE(0); -+ -+ const VectorTensorFiniteElement *el = -+ dynamic_cast(fel); -+ MFEM_VERIFY(el != NULL, "Only VectorTensorFiniteElement is supported!"); -+ -+ const IntegrationRule *ir = IntRule ? IntRule : &MassIntegrator::GetRule -+ (*el, *el, *mesh->GetElementTransformation(0)); -+ -+ const int dims = el->GetDim(); -+ MFEM_VERIFY(dims == 2 || dims == 3, ""); -+ -+ const int nq = ir->GetNPoints(); -+ dim = mesh->Dimension(); -+ MFEM_VERIFY(dim == 2 || dim == 3, ""); -+ -+ ne = fes.GetNE(); -+ geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS); -+ mapsC = &el->GetDofToQuad(*ir, DofToQuad::TENSOR); -+ mapsO = &el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); -+ dofs1D = mapsC->ndof; -+ quad1D = mapsC->nqpt; -+ -+ MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, ""); -+ -+ pa_data.SetSize(nq * ne, Device::GetMemoryType()); -+ -+ QuadratureSpace qs(*mesh, *ir); -+ CoefficientVector coeff(Q, qs, CoefficientStorage::FULL); -+ -+ if (el->GetDerivType() == mfem::FiniteElement::DIV && dim == 3) -+ { -+ internal::PADivDivSetup3D(quad1D, ne, ir->GetWeights(), geom->J, coeff, -+ pa_data); -+ } -+ else if (el->GetDerivType() == mfem::FiniteElement::DIV && dim == 2) -+ { -+ internal::PADivDivSetup2D(quad1D, ne, ir->GetWeights(), geom->J, coeff, -+ pa_data); -+ } -+ else -+ { -+ MFEM_ABORT("Unknown kernel."); -+ } -+} -+ -+void DivDivIntegrator::AssembleDiagonalPA(Vector& diag) -+{ -+ if (dim == 3) -+ { -+ internal::PADivDivAssembleDiagonal3D(dofs1D, quad1D, ne, -+ mapsO->B, mapsC->G, pa_data, diag); -+ } -+ else -+ { -+ internal::PADivDivAssembleDiagonal2D(dofs1D, quad1D, ne, -+ mapsO->B, mapsC->G, pa_data, diag); -+ } -+} -+ -+void DivDivIntegrator::AddMultPA(const Vector &x, Vector &y) const -+{ -+ if (dim == 3) -+ internal::PADivDivApply3D(dofs1D, quad1D, ne, mapsO->B, mapsC->G, -+ mapsO->Bt, mapsC->Gt, pa_data, x, y); -+ else if (dim == 2) -+ internal::PADivDivApply2D(dofs1D, quad1D, ne, mapsO->B, mapsC->G, -+ mapsO->Bt, mapsC->Gt, pa_data, x, y); -+ else -+ { -+ MFEM_ABORT("Unsupported dimension!"); -+ } -+} -+ -+} // namespace mfem -diff --git a/fem/bilininteg_gradient.cpp b/fem/integ/bilininteg_gradient_pa.cpp -similarity index 93% -rename from fem/bilininteg_gradient.cpp -rename to fem/integ/bilininteg_gradient_pa.cpp -index 73b9d1859..20ef4684d 100644 ---- a/fem/bilininteg_gradient.cpp -+++ b/fem/integ/bilininteg_gradient_pa.cpp -@@ -9,18 +9,14 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "../general/forall.hpp" --#include "bilininteg.hpp" --#include "gridfunc.hpp" --#include "qfunction.hpp" -- --using namespace std; -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../qfunction.hpp" - - namespace mfem - { - --// PA Gradient Integrator -- - /* Description of the *SetupND functions - Inputs are as follows - \b Q1D number of quadrature points in one dimension. -@@ -162,27 +158,6 @@ static void PAGradientSetup3D(const int Q1D, - }); - } - --static void PAGradientSetup(const int dim, -- const int TR_D1D, -- const int TE_D1D, -- const int Q1D, -- const int NE, -- const Array &W, -- const Vector &J, -- const Vector &COEFF, -- Vector &op) --{ -- if (dim == 1) { MFEM_ABORT("dim==1 not supported in PAGradientSetup"); } -- if (dim == 2) -- { -- PAGradientSetup2D(Q1D, NE, W, J, COEFF, op); -- } -- if (dim == 3) -- { -- PAGradientSetup3D(Q1D, NE, W, J, COEFF, op); -- } --} -- - void GradientIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, - const FiniteElementSpace &test_fes) - { -@@ -213,8 +188,18 @@ void GradientIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, - QuadratureSpace qs(*mesh, *ir); - CoefficientVector coeff(Q, qs, CoefficientStorage::COMPRESSED); - -- PAGradientSetup(dim, trial_dofs1D, test_dofs1D, quad1D, -- ne, ir->GetWeights(), geom->J, coeff, pa_data); -+ if (dim == 1) -+ { -+ MFEM_ABORT("dim==1 not supported in GradientIntegrator::AssemblePA"); -+ } -+ else if (dim == 2) -+ { -+ PAGradientSetup2D(quad1D, ne, ir->GetWeights(), geom->J, coeff, pa_data); -+ } -+ else if (dim == 3) -+ { -+ PAGradientSetup3D(quad1D, ne, ir->GetWeights(), geom->J, coeff, pa_data); -+ } - } - - // PA Gradient Apply 2D kernel -@@ -791,40 +776,21 @@ static void SmemPAGradientApply3D(const int NE, - }); - } - --static void PAGradientApply(const int dim, -- const int TR_D1D, -- const int TE_D1D, -- const int Q1D, -- const int NE, -- const Array &B, -- const Array &G, -- const Array &Bt, -- const Vector &op, -- const Vector &x, -- Vector &y, -- bool transpose=false) -+void GradientIntegrator::AddMultPA(const Vector &x, Vector &y) const - { -- - if (dim == 2) - { -- return PAGradientApply2D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D); -+ return PAGradientApply2D(ne, trial_maps->B, trial_maps->G, test_maps->Bt, -+ pa_data, x, y, trial_dofs1D, test_dofs1D, quad1D); - } - if (dim == 3) - { -- return PAGradientApply3D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D); -+ return PAGradientApply3D(ne, trial_maps->B, trial_maps->G, test_maps->Bt, -+ pa_data, x, y, trial_dofs1D, test_dofs1D, quad1D); - } - MFEM_ABORT("Unknown kernel."); - } - --// PA Gradient Apply kernel --void GradientIntegrator::AddMultPA(const Vector &x, Vector &y) const --{ -- PAGradientApply(dim, trial_dofs1D, test_dofs1D, quad1D, ne, -- trial_maps->B, trial_maps->G, test_maps->Bt, pa_data, x, y, -- false); --} -- --// PA Gradient Apply kernel - void GradientIntegrator::AddMultTransposePA(const Vector &x, Vector &y) const - { - MFEM_ABORT("PA Gradient AddMultTransposePA not implemented."); -diff --git a/fem/integ/bilininteg_hcurl_kernels.hpp b/fem/integ/bilininteg_hcurl_kernels.hpp -new file mode 100644 -index 000000000..a1545f888 ---- /dev/null -+++ b/fem/integ/bilininteg_hcurl_kernels.hpp -@@ -0,0 +1,3891 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#ifndef MFEM_BILININTEG_HCURL_KERNELS_HPP -+#define MFEM_BILININTEG_HCURL_KERNELS_HPP -+ -+#include "../../config/config.hpp" -+#include "../../general/forall.hpp" -+#include "../../linalg/dtensor.hpp" -+ -+// Piola transformation in H(curl): w = dF^{-T} \hat{w} -+// curl w = (1 / det (dF)) dF \hat{curl} \hat{w} -+ -+namespace mfem -+{ -+ -+namespace internal -+{ -+ -+MFEM_HOST_DEVICE inline -+void PAHcurlMassAssembleDiagonal2D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const bool symmetric, -+ const Array &bo, -+ const Array &bc, -+ const Vector &pa_data, -+ Vector &diag) -+{ -+ constexpr static int VDIM = 2; -+ constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -+ -+ auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -+ auto Bc = Reshape(bc.Read(), Q1D, D1D); -+ auto op = Reshape(pa_data.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE); -+ auto D = Reshape(diag.ReadWrite(), 2*(D1D-1)*D1D, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ int osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y components -+ { -+ const int D1Dy = (c == 1) ? D1D - 1 : D1D; -+ const int D1Dx = (c == 0) ? D1D - 1 : D1D; -+ -+ double mass[MAX_Q1D]; -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ mass[qx] = 0.0; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy); -+ -+ mass[qx] += wy * wy * ((c == 0) ? op(qx,qy,0,e) : -+ op(qx,qy,symmetric ? 2 : 3, e)); -+ } -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx)); -+ D(dx + (dy * D1Dx) + osc, e) += mass[qx] * wx * wx; -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy; -+ } // loop c -+ }); // end of element loop -+} -+ -+MFEM_HOST_DEVICE inline -+void PAHcurlMassAssembleDiagonal3D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const bool symmetric, -+ const Array &bo, -+ const Array &bc, -+ const Vector &pa_data, -+ Vector &diag) -+{ -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -+ -+ MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -+ MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -+ constexpr static int VDIM = 3; -+ -+ auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -+ auto Bc = Reshape(bc.Read(), Q1D, D1D); -+ auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE); -+ auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ int osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -+ { -+ const int D1Dz = (c == 2) ? D1D - 1 : D1D; -+ const int D1Dy = (c == 1) ? D1D - 1 : D1D; -+ const int D1Dx = (c == 0) ? D1D - 1 : D1D; -+ -+ const int opc = (c == 0) ? 0 : ((c == 1) ? (symmetric ? 3 : 4) : -+ (symmetric ? 5 : 8)); -+ -+ double mass[MAX_Q1D]; -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ mass[qx] = 0.0; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy); -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz); -+ -+ mass[qx] += wy * wy * wz * wz * op(qx,qy,qz,opc,e); -+ } -+ } -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = ((c == 0) ? Bo(qx,dx) : Bc(qx,dx)); -+ D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += mass[qx] * wx * wx; -+ } -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } // loop c -+ }); // end of element loop -+} -+ -+template -+MFEM_HOST_DEVICE inline -+void SmemPAHcurlMassAssembleDiagonal3D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const bool symmetric, -+ const Array &bo, -+ const Array &bc, -+ const Vector &pa_data, -+ Vector &diag) -+{ -+ MFEM_VERIFY(D1D <= HCURL_MAX_D1D, "Error: D1D > MAX_D1D"); -+ MFEM_VERIFY(Q1D <= HCURL_MAX_Q1D, "Error: Q1D > MAX_Q1D"); -+ -+ auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -+ auto Bc = Reshape(bc.Read(), Q1D, D1D); -+ auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE); -+ auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -+ -+ mfem::forall_3D(NE, Q1D, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e) -+ { -+ constexpr int VDIM = 3; -+ -+ MFEM_SHARED double sBo[T_Q1D][T_D1D]; -+ MFEM_SHARED double sBc[T_Q1D][T_D1D]; -+ -+ double op3[3]; -+ MFEM_SHARED double sop[3][T_Q1D][T_Q1D]; -+ -+ MFEM_FOREACH_THREAD(qx,x,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qy,y,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qz,z,Q1D) -+ { -+ op3[0] = op(qx,qy,qz,0,e); -+ op3[1] = op(qx,qy,qz,symmetric ? 3 : 4,e); -+ op3[2] = op(qx,qy,qz,symmetric ? 5 : 8,e); -+ } -+ } -+ } -+ -+ const int tidx = MFEM_THREAD_ID(x); -+ const int tidy = MFEM_THREAD_ID(y); -+ const int tidz = MFEM_THREAD_ID(z); -+ -+ if (tidz == 0) -+ { -+ MFEM_FOREACH_THREAD(d,y,D1D) -+ { -+ MFEM_FOREACH_THREAD(q,x,Q1D) -+ { -+ sBc[q][d] = Bc(q,d); -+ if (d < D1D-1) -+ { -+ sBo[q][d] = Bo(q,d); -+ } -+ } -+ } -+ } -+ MFEM_SYNC_THREAD; -+ -+ int osc = 0; -+ for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -+ { -+ const int D1Dz = (c == 2) ? D1D - 1 : D1D; -+ const int D1Dy = (c == 1) ? D1D - 1 : D1D; -+ const int D1Dx = (c == 0) ? D1D - 1 : D1D; -+ -+ double dxyz = 0.0; -+ -+ for (int qz=0; qz < Q1D; ++qz) -+ { -+ if (tidz == qz) -+ { -+ for (int i=0; i<3; ++i) -+ { -+ sop[i][tidx][tidy] = op3[i]; -+ } -+ } -+ -+ MFEM_SYNC_THREAD; -+ -+ MFEM_FOREACH_THREAD(dz,z,D1Dz) -+ { -+ const double wz = ((c == 2) ? sBo[qz][dz] : sBc[qz][dz]); -+ -+ MFEM_FOREACH_THREAD(dy,y,D1Dy) -+ { -+ MFEM_FOREACH_THREAD(dx,x,D1Dx) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = ((c == 1) ? sBo[qy][dy] : sBc[qy][dy]); -+ -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = ((c == 0) ? sBo[qx][dx] : sBc[qx][dx]); -+ dxyz += sop[c][qx][qy] * wx * wx * wy * wy * wz * wz; -+ } -+ } -+ } -+ } -+ } -+ -+ MFEM_SYNC_THREAD; -+ } // qz loop -+ -+ MFEM_FOREACH_THREAD(dz,z,D1Dz) -+ { -+ MFEM_FOREACH_THREAD(dy,y,D1Dy) -+ { -+ MFEM_FOREACH_THREAD(dx,x,D1Dx) -+ { -+ D(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += dxyz; -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } // c loop -+ }); // end of element loop -+} -+ -+MFEM_HOST_DEVICE inline -+void PAHcurlMassApply2D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const bool symmetric, -+ const Array &bo, -+ const Array &bc, -+ const Array &bot, -+ const Array &bct, -+ const Vector &pa_data, -+ const Vector &x, -+ Vector &y) -+{ -+ constexpr static int VDIM = 2; -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -+ -+ auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -+ auto Bc = Reshape(bc.Read(), Q1D, D1D); -+ auto Bot = Reshape(bot.Read(), D1D-1, Q1D); -+ auto Bct = Reshape(bct.Read(), D1D, Q1D); -+ auto op = Reshape(pa_data.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE); -+ auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE); -+ auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double mass[MAX_Q1D][MAX_Q1D][VDIM]; -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int c = 0; c < VDIM; ++c) -+ { -+ mass[qy][qx][c] = 0.0; -+ } -+ } -+ } -+ -+ int osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y components -+ { -+ const int D1Dy = (c == 1) ? D1D - 1 : D1D; -+ const int D1Dx = (c == 0) ? D1D - 1 : D1D; -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ double massX[MAX_Q1D]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massX[qx] = 0.0; -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ const double t = X(dx + (dy * D1Dx) + osc, e); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx)); -+ } -+ } -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ mass[qy][qx][c] += massX[qx] * wy; -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy; -+ } // loop (c) over components -+ -+ // Apply D operator. -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double O11 = op(qx,qy,0,e); -+ const double O21 = op(qx,qy,1,e); -+ const double O12 = symmetric ? O21 : op(qx,qy,2,e); -+ const double O22 = symmetric ? op(qx,qy,2,e) : op(qx,qy,3,e); -+ const double massX = mass[qy][qx][0]; -+ const double massY = mass[qy][qx][1]; -+ mass[qy][qx][0] = (O11*massX)+(O12*massY); -+ mass[qy][qx][1] = (O21*massX)+(O22*massY); -+ } -+ } -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y components -+ { -+ const int D1Dy = (c == 1) ? D1D - 1 : D1D; -+ const int D1Dx = (c == 0) ? D1D - 1 : D1D; -+ -+ double massX[MAX_D1D]; -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massX[dx] = 0.0; -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx)); -+ } -+ } -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy); -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ Y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy; -+ } -+ } -+ -+ osc += D1Dx * D1Dy; -+ } // loop c -+ } // loop qy -+ }); // end of element loop -+} -+ -+MFEM_HOST_DEVICE inline -+void PAHcurlMassApply3D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const bool symmetric, -+ const Array &bo, -+ const Array &bc, -+ const Array &bot, -+ const Array &bct, -+ const Vector &pa_data, -+ const Vector &x, -+ Vector &y) -+{ -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -+ -+ MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -+ MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -+ constexpr static int VDIM = 3; -+ -+ auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -+ auto Bc = Reshape(bc.Read(), Q1D, D1D); -+ auto Bot = Reshape(bot.Read(), D1D-1, Q1D); -+ auto Bct = Reshape(bct.Read(), D1D, Q1D); -+ auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE); -+ auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE); -+ auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM]; -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int c = 0; c < VDIM; ++c) -+ { -+ mass[qz][qy][qx][c] = 0.0; -+ } -+ } -+ } -+ } -+ -+ int osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -+ { -+ const int D1Dz = (c == 2) ? D1D - 1 : D1D; -+ const int D1Dy = (c == 1) ? D1D - 1 : D1D; -+ const int D1Dx = (c == 0) ? D1D - 1 : D1D; -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ double massXY[MAX_Q1D][MAX_Q1D]; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massXY[qy][qx] = 0.0; -+ } -+ } -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ double massX[MAX_Q1D]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massX[qx] = 0.0; -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx)); -+ } -+ } -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = massX[qx]; -+ massXY[qy][qx] += wx * wy; -+ } -+ } -+ } -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz); -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ mass[qz][qy][qx][c] += massXY[qy][qx] * wz; -+ } -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } // loop (c) over components -+ -+ // Apply D operator. -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double O11 = op(qx,qy,qz,0,e); -+ const double O12 = op(qx,qy,qz,1,e); -+ const double O13 = op(qx,qy,qz,2,e); -+ const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e); -+ const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e); -+ const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e); -+ const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e); -+ const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e); -+ const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e); -+ const double massX = mass[qz][qy][qx][0]; -+ const double massY = mass[qz][qy][qx][1]; -+ const double massZ = mass[qz][qy][qx][2]; -+ mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ); -+ mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ); -+ mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ); -+ } -+ } -+ } -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ double massXY[MAX_D1D][MAX_D1D]; -+ -+ osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -+ { -+ const int D1Dz = (c == 2) ? D1D - 1 : D1D; -+ const int D1Dy = (c == 1) ? D1D - 1 : D1D; -+ const int D1Dx = (c == 0) ? D1D - 1 : D1D; -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massXY[dy][dx] = 0.0; -+ } -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ double massX[MAX_D1D]; -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massX[dx] = 0; -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massX[dx] += mass[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx)); -+ } -+ } -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy); -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massXY[dy][dx] += massX[dx] * wy; -+ } -+ } -+ } -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz); -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz; -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } // loop c -+ } // loop qz -+ }); // end of element loop -+} -+ -+template -+MFEM_HOST_DEVICE inline -+void SmemPAHcurlMassApply3D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const bool symmetric, -+ const Array &bo, -+ const Array &bc, -+ const Array &bot, -+ const Array &bct, -+ const Vector &pa_data, -+ const Vector &x, -+ Vector &y) -+{ -+ MFEM_VERIFY(D1D <= HCURL_MAX_D1D, "Error: D1D > MAX_D1D"); -+ MFEM_VERIFY(Q1D <= HCURL_MAX_Q1D, "Error: Q1D > MAX_Q1D"); -+ -+ const int dataSize = symmetric ? 6 : 9; -+ -+ auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -+ auto Bc = Reshape(bc.Read(), Q1D, D1D); -+ auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, dataSize, NE); -+ auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE); -+ auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -+ -+ mfem::forall_3D(NE, Q1D, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e) -+ { -+ constexpr int VDIM = 3; -+ -+ MFEM_SHARED double sBo[T_Q1D][T_D1D]; -+ MFEM_SHARED double sBc[T_Q1D][T_D1D]; -+ -+ double op9[9]; -+ MFEM_SHARED double sop[9*T_Q1D*T_Q1D]; -+ MFEM_SHARED double mass[T_Q1D][T_Q1D][3]; -+ -+ MFEM_SHARED double sX[T_D1D][T_D1D][T_D1D]; -+ -+ MFEM_FOREACH_THREAD(qx,x,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qy,y,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qz,z,Q1D) -+ { -+ for (int i=0; i &w, -+ const Vector &j, -+ Vector &coeff, -+ Vector &op) -+{ -+ const int NQ = Q1D*Q1D; -+ auto W = w.Read(); -+ auto J = Reshape(j.Read(), NQ, 2, 2, NE); -+ auto C = Reshape(coeff.Read(), NQ, NE); -+ auto y = Reshape(op.Write(), NQ, NE); -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ for (int q = 0; q < NQ; ++q) -+ { -+ const double J11 = J(q,0,0,e); -+ const double J21 = J(q,1,0,e); -+ const double J12 = J(q,0,1,e); -+ const double J22 = J(q,1,1,e); -+ const double detJ = (J11*J22)-(J21*J12); -+ y(q,e) = W[q] * C(q,e) / detJ; -+ } -+ }); -+} -+ -+// PA H(curl) curl-curl assemble 3D kernel -+MFEM_HOST_DEVICE inline -+void PACurlCurlSetup3D(const int Q1D, -+ const int coeffDim, -+ const int NE, -+ const Array &w, -+ const Vector &j, -+ Vector &coeff, -+ Vector &op) -+{ -+ const int NQ = Q1D*Q1D*Q1D; -+ const bool symmetric = (coeffDim != 9); -+ auto W = w.Read(); -+ auto J = Reshape(j.Read(), NQ, 3, 3, NE); -+ auto C = Reshape(coeff.Read(), coeffDim, NQ, NE); -+ auto y = Reshape(op.Write(), NQ, symmetric ? 6 : 9, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ for (int q = 0; q < NQ; ++q) -+ { -+ const double J11 = J(q,0,0,e); -+ const double J21 = J(q,1,0,e); -+ const double J31 = J(q,2,0,e); -+ const double J12 = J(q,0,1,e); -+ const double J22 = J(q,1,1,e); -+ const double J32 = J(q,2,1,e); -+ const double J13 = J(q,0,2,e); -+ const double J23 = J(q,1,2,e); -+ const double J33 = J(q,2,2,e); -+ const double detJ = J11 * (J22 * J33 - J32 * J23) - -+ J21 * (J12 * J33 - J32 * J13) + -+ J31 * (J12 * J23 - J22 * J13); -+ -+ const double c_detJ = W[q] / detJ; -+ -+ if (coeffDim == 6 || coeffDim == 9) // Matrix coefficient version -+ { -+ // Set y to the 6 or 9 entries of J^T M J / det -+ const double M11 = C(0, q, e); -+ const double M12 = C(1, q, e); -+ const double M13 = C(2, q, e); -+ const double M21 = (!symmetric) ? C(3, q, e) : M12; -+ const double M22 = (!symmetric) ? C(4, q, e) : C(3, q, e); -+ const double M23 = (!symmetric) ? C(5, q, e) : C(4, q, e); -+ const double M31 = (!symmetric) ? C(6, q, e) : M13; -+ const double M32 = (!symmetric) ? C(7, q, e) : M23; -+ const double M33 = (!symmetric) ? C(8, q, e) : C(5, q, e); -+ -+ // First compute R = MJ -+ const double R11 = M11*J11 + M12*J21 + M13*J31; -+ const double R12 = M11*J12 + M12*J22 + M13*J32; -+ const double R13 = M11*J13 + M12*J23 + M13*J33; -+ const double R21 = M21*J11 + M22*J21 + M23*J31; -+ const double R22 = M21*J12 + M22*J22 + M23*J32; -+ const double R23 = M21*J13 + M22*J23 + M23*J33; -+ const double R31 = M31*J11 + M32*J21 + M33*J31; -+ const double R32 = M31*J12 + M32*J22 + M33*J32; -+ const double R33 = M31*J13 + M32*J23 + M33*J33; -+ -+ // Now set y to J^T R / det -+ y(q,0,e) = c_detJ * (J11*R11 + J21*R21 + J31*R31); // 1,1 -+ const double Y12 = c_detJ * (J11*R12 + J21*R22 + J31*R32); -+ y(q,1,e) = Y12; // 1,2 -+ y(q,2,e) = c_detJ * (J11*R13 + J21*R23 + J31*R33); // 1,3 -+ -+ const double Y21 = c_detJ * (J12*R11 + J22*R21 + J32*R31); -+ const double Y22 = c_detJ * (J12*R12 + J22*R22 + J32*R32); -+ const double Y23 = c_detJ * (J12*R13 + J22*R23 + J32*R33); -+ -+ const double Y33 = c_detJ * (J13*R13 + J23*R23 + J33*R33); -+ -+ y(q,3,e) = symmetric ? Y22 : Y21; // 2,2 or 2,1 -+ y(q,4,e) = symmetric ? Y23 : Y22; // 2,3 or 2,2 -+ y(q,5,e) = symmetric ? Y33 : Y23; // 3,3 or 2,3 -+ -+ if (!symmetric) -+ { -+ y(q,6,e) = c_detJ * (J13*R11 + J23*R21 + J33*R31); // 3,1 -+ y(q,7,e) = c_detJ * (J13*R12 + J23*R22 + J33*R32); // 3,2 -+ y(q,8,e) = Y33; // 3,3 -+ } -+ } -+ else // Vector or scalar coefficient version -+ { -+ // Set y to the 6 entries of J^T D J / det^2 -+ const double D1 = C(0, q, e); -+ const double D2 = coeffDim == 3 ? C(1, q, e) : D1; -+ const double D3 = coeffDim == 3 ? C(2, q, e) : D1; -+ -+ y(q,0,e) = c_detJ * (D1*J11*J11 + D2*J21*J21 + D3*J31*J31); // 1,1 -+ y(q,1,e) = c_detJ * (D1*J11*J12 + D2*J21*J22 + D3*J31*J32); // 1,2 -+ y(q,2,e) = c_detJ * (D1*J11*J13 + D2*J21*J23 + D3*J31*J33); // 1,3 -+ y(q,3,e) = c_detJ * (D1*J12*J12 + D2*J22*J22 + D3*J32*J32); // 2,2 -+ y(q,4,e) = c_detJ * (D1*J12*J13 + D2*J22*J23 + D3*J32*J33); // 2,3 -+ y(q,5,e) = c_detJ * (D1*J13*J13 + D2*J23*J23 + D3*J33*J33); // 3,3 -+ } -+ } -+ }); -+} -+ -+MFEM_HOST_DEVICE inline -+void PACurlCurlAssembleDiagonal2D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const Array &bo, -+ const Array &gc, -+ const Vector &pa_data, -+ Vector &diag) -+{ -+ constexpr static int VDIM = 2; -+ constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -+ -+ auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -+ auto Gc = Reshape(gc.Read(), Q1D, D1D); -+ auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE); -+ auto D = Reshape(diag.ReadWrite(), 2*(D1D-1)*D1D, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ int osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y components -+ { -+ const int D1Dy = (c == 1) ? D1D - 1 : D1D; -+ const int D1Dx = (c == 0) ? D1D - 1 : D1D; -+ -+ double t[MAX_Q1D]; -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ t[qx] = 0.0; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = (c == 1) ? Bo(qy,dy) : -Gc(qy,dy); -+ t[qx] += wy * wy * op(qx,qy,e); -+ } -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = ((c == 0) ? Bo(qx,dx) : Gc(qx,dx)); -+ D(dx + (dy * D1Dx) + osc, e) += t[qx] * wx * wx; -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy; -+ } // loop c -+ }); // end of element loop -+} -+ -+template -+MFEM_HOST_DEVICE inline -+void PACurlCurlAssembleDiagonal3D(const int D1D, -+ const int Q1D, -+ const bool symmetric, -+ const int NE, -+ const Array &bo, -+ const Array &bc, -+ const Array &go, -+ const Array &gc, -+ const Vector &pa_data, -+ Vector &diag) -+{ -+ constexpr static int VDIM = 3; -+ MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -+ MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -+ -+ auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -+ auto Bc = Reshape(bc.Read(), Q1D, D1D); -+ auto Go = Reshape(go.Read(), Q1D, D1D-1); -+ auto Gc = Reshape(gc.Read(), Q1D, D1D); -+ auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE); -+ auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -+ -+ const int s = symmetric ? 6 : 9; -+ const int i11 = 0; -+ const int i12 = 1; -+ const int i13 = 2; -+ const int i21 = symmetric ? i12 : 3; -+ const int i22 = symmetric ? 3 : 4; -+ const int i23 = symmetric ? 4 : 5; -+ const int i31 = symmetric ? i13 : 6; -+ const int i32 = symmetric ? i23 : 7; -+ const int i33 = symmetric ? 5 : 8; -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get -+ // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u} -+ // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}] -+ // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -+ // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -+ -+ // For each c, we will keep 9 arrays for derivatives multiplied by the 9 entries of the 3x3 matrix (dF^T C dF), -+ // which may be non-symmetric depending on a possibly non-symmetric matrix coefficient. -+ -+ int osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -+ { -+ const int D1Dz = (c == 2) ? D1D - 1 : D1D; -+ const int D1Dy = (c == 1) ? D1D - 1 : D1D; -+ const int D1Dx = (c == 0) ? D1D - 1 : D1D; -+ -+ double zt[MAX_Q1D][MAX_Q1D][MAX_D1D][9][3]; -+ -+ // z contraction -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ for (int i=0; i -+MFEM_HOST_DEVICE inline -+void SmemPACurlCurlAssembleDiagonal3D(const int D1D, -+ const int Q1D, -+ const bool symmetric, -+ const int NE, -+ const Array &bo, -+ const Array &bc, -+ const Array &go, -+ const Array &gc, -+ const Vector &pa_data, -+ Vector &diag) -+{ -+ MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -+ MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -+ -+ auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -+ auto Bc = Reshape(bc.Read(), Q1D, D1D); -+ auto Go = Reshape(go.Read(), Q1D, D1D-1); -+ auto Gc = Reshape(gc.Read(), Q1D, D1D); -+ auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE); -+ auto D = Reshape(diag.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -+ -+ const int s = symmetric ? 6 : 9; -+ const int i11 = 0; -+ const int i12 = 1; -+ const int i13 = 2; -+ const int i21 = symmetric ? i12 : 3; -+ const int i22 = symmetric ? 3 : 4; -+ const int i23 = symmetric ? 4 : 5; -+ const int i31 = symmetric ? i13 : 6; -+ const int i32 = symmetric ? i23 : 7; -+ const int i33 = symmetric ? 5 : 8; -+ -+ mfem::forall_3D(NE, Q1D, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e) -+ { -+ // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get -+ // (\nabla\times u) \cdot (\nabla\times u) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{u} -+ // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}] -+ // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -+ // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -+ -+ constexpr int VDIM = 3; -+ -+ MFEM_SHARED double sBo[MAX_Q1D][MAX_D1D]; -+ MFEM_SHARED double sBc[MAX_Q1D][MAX_D1D]; -+ MFEM_SHARED double sGo[MAX_Q1D][MAX_D1D]; -+ MFEM_SHARED double sGc[MAX_Q1D][MAX_D1D]; -+ -+ double ope[9]; -+ MFEM_SHARED double sop[9][MAX_Q1D][MAX_Q1D]; -+ -+ MFEM_FOREACH_THREAD(qx,x,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qy,y,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qz,z,Q1D) -+ { -+ for (int i=0; i &bo, -+ const Array &bot, -+ const Array &gc, -+ const Array &gct, -+ const Vector &pa_data, -+ const Vector &x, -+ Vector &y) -+{ -+ constexpr static int VDIM = 2; -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -+ -+ auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -+ auto Bot = Reshape(bot.Read(), D1D-1, Q1D); -+ auto Gc = Reshape(gc.Read(), Q1D, D1D); -+ auto Gct = Reshape(gct.Read(), D1D, Q1D); -+ auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE); -+ auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE); -+ auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double curl[MAX_Q1D][MAX_Q1D]; -+ -+ // curl[qy][qx] will be computed as du_y/dx - du_x/dy -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ curl[qy][qx] = 0.0; -+ } -+ } -+ -+ int osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y components -+ { -+ const int D1Dy = (c == 1) ? D1D - 1 : D1D; -+ const int D1Dx = (c == 0) ? D1D - 1 : D1D; -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ double gradX[MAX_Q1D]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ gradX[qx] = 0; -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ const double t = X(dx + (dy * D1Dx) + osc, e); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ gradX[qx] += t * ((c == 0) ? Bo(qx,dx) : Gc(qx,dx)); -+ } -+ } -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = (c == 0) ? -Gc(qy,dy) : Bo(qy,dy); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ curl[qy][qx] += gradX[qx] * wy; -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy; -+ } // loop (c) over components -+ -+ // Apply D operator. -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ curl[qy][qx] *= op(qx,qy,e); -+ } -+ } -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y components -+ { -+ const int D1Dy = (c == 1) ? D1D - 1 : D1D; -+ const int D1Dx = (c == 0) ? D1D - 1 : D1D; -+ -+ double gradX[MAX_D1D]; -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ gradX[dx] = 0.0; -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ gradX[dx] += curl[qy][qx] * ((c == 0) ? Bot(dx,qx) : Gct(dx,qx)); -+ } -+ } -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double wy = (c == 0) ? -Gct(dy,qy) : Bot(dy,qy); -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ Y(dx + (dy * D1Dx) + osc, e) += gradX[dx] * wy; -+ } -+ } -+ -+ osc += D1Dx * D1Dy; -+ } // loop c -+ } // loop qy -+ }); // end of element loop -+} -+ -+template -+MFEM_HOST_DEVICE inline -+void PACurlCurlApply3D(const int D1D, -+ const int Q1D, -+ const bool symmetric, -+ const int NE, -+ const Array &bo, -+ const Array &bc, -+ const Array &bot, -+ const Array &bct, -+ const Array &gc, -+ const Array &gct, -+ const Vector &pa_data, -+ const Vector &x, -+ Vector &y) -+{ -+ MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -+ MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -+ // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), -+ // we get: -+ // (\nabla\times u) \cdot (\nabla\times v) -+ // = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{v} -+ // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}] -+ // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -+ // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -+ -+ constexpr static int VDIM = 3; -+ -+ auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -+ auto Bc = Reshape(bc.Read(), Q1D, D1D); -+ auto Bot = Reshape(bot.Read(), D1D-1, Q1D); -+ auto Bct = Reshape(bct.Read(), D1D, Q1D); -+ auto Gc = Reshape(gc.Read(), Q1D, D1D); -+ auto Gct = Reshape(gct.Read(), D1D, Q1D); -+ auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, (symmetric ? 6 : 9), NE); -+ auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE); -+ auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM]; -+ // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point. -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int c = 0; c < VDIM; ++c) -+ { -+ curl[qz][qy][qx][c] = 0.0; -+ } -+ } -+ } -+ } -+ -+ // We treat x, y, z components separately for optimization specific to each. -+ -+ int osc = 0; -+ -+ { -+ // x component -+ const int D1Dz = D1D; -+ const int D1Dy = D1D; -+ const int D1Dx = D1D - 1; -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ double gradXY[MAX_Q1D][MAX_Q1D][2]; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int d = 0; d < 2; ++d) -+ { -+ gradXY[qy][qx][d] = 0.0; -+ } -+ } -+ } -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ double massX[MAX_Q1D]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massX[qx] = 0.0; -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massX[qx] += t * Bo(qx,dx); -+ } -+ } -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = Bc(qy,dy); -+ const double wDy = Gc(qy,dy); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = massX[qx]; -+ gradXY[qy][qx][0] += wx * wDy; -+ gradXY[qy][qx][1] += wx * wy; -+ } -+ } -+ } -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ const double wz = Bc(qz,dz); -+ const double wDz = Gc(qz,dz); -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}] -+ curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2} -+ curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1} -+ } -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } -+ -+ { -+ // y component -+ const int D1Dz = D1D; -+ const int D1Dy = D1D - 1; -+ const int D1Dx = D1D; -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ double gradXY[MAX_Q1D][MAX_Q1D][2]; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int d = 0; d < 2; ++d) -+ { -+ gradXY[qy][qx][d] = 0.0; -+ } -+ } -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ double massY[MAX_Q1D]; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ massY[qy] = 0.0; -+ } -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ massY[qy] += t * Bo(qy,dy); -+ } -+ } -+ -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = Bc(qx,dx); -+ const double wDx = Gc(qx,dx); -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = massY[qy]; -+ gradXY[qy][qx][0] += wDx * wy; -+ gradXY[qy][qx][1] += wx * wy; -+ } -+ } -+ } -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ const double wz = Bc(qz,dz); -+ const double wDz = Gc(qz,dz); -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -+ curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2} -+ curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0} -+ } -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } -+ -+ { -+ // z component -+ const int D1Dz = D1D - 1; -+ const int D1Dy = D1D; -+ const int D1Dx = D1D; -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ double gradYZ[MAX_Q1D][MAX_Q1D][2]; -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int d = 0; d < 2; ++d) -+ { -+ gradYZ[qz][qy][d] = 0.0; -+ } -+ } -+ } -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ double massZ[MAX_Q1D]; -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ massZ[qz] = 0.0; -+ } -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ massZ[qz] += t * Bo(qz,dz); -+ } -+ } -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = Bc(qy,dy); -+ const double wDy = Gc(qy,dy); -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ const double wz = massZ[qz]; -+ gradYZ[qz][qy][0] += wz * wy; -+ gradYZ[qz][qy][1] += wz * wDy; -+ } -+ } -+ } -+ -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = Bc(qx,dx); -+ const double wDx = Gc(qx,dx); -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -+ curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1} -+ curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0} -+ } -+ } -+ } -+ } -+ } -+ -+ // Apply D operator. -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double O11 = op(qx,qy,qz,0,e); -+ const double O12 = op(qx,qy,qz,1,e); -+ const double O13 = op(qx,qy,qz,2,e); -+ const double O21 = symmetric ? O12 : op(qx,qy,qz,3,e); -+ const double O22 = symmetric ? op(qx,qy,qz,3,e) : op(qx,qy,qz,4,e); -+ const double O23 = symmetric ? op(qx,qy,qz,4,e) : op(qx,qy,qz,5,e); -+ const double O31 = symmetric ? O13 : op(qx,qy,qz,6,e); -+ const double O32 = symmetric ? O23 : op(qx,qy,qz,7,e); -+ const double O33 = symmetric ? op(qx,qy,qz,5,e) : op(qx,qy,qz,8,e); -+ -+ const double c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) + -+ (O13 * curl[qz][qy][qx][2]); -+ const double c2 = (O21 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) + -+ (O23 * curl[qz][qy][qx][2]); -+ const double c3 = (O31 * curl[qz][qy][qx][0]) + (O32 * curl[qz][qy][qx][1]) + -+ (O33 * curl[qz][qy][qx][2]); -+ -+ curl[qz][qy][qx][0] = c1; -+ curl[qz][qy][qx][1] = c2; -+ curl[qz][qy][qx][2] = c3; -+ } -+ } -+ } -+ -+ // x component -+ osc = 0; -+ { -+ const int D1Dz = D1D; -+ const int D1Dy = D1D; -+ const int D1Dx = D1D - 1; -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ double gradXY12[MAX_D1D][MAX_D1D]; -+ double gradXY21[MAX_D1D][MAX_D1D]; -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ gradXY12[dy][dx] = 0.0; -+ gradXY21[dy][dx] = 0.0; -+ } -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ double massX[MAX_D1D][2]; -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ for (int n = 0; n < 2; ++n) -+ { -+ massX[dx][n] = 0.0; -+ } -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ const double wx = Bot(dx,qx); -+ -+ massX[dx][0] += wx * curl[qz][qy][qx][1]; -+ massX[dx][1] += wx * curl[qz][qy][qx][2]; -+ } -+ } -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double wy = Bct(dy,qy); -+ const double wDy = Gct(dy,qy); -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ gradXY21[dy][dx] += massX[dx][0] * wy; -+ gradXY12[dy][dx] += massX[dx][1] * wDy; -+ } -+ } -+ } -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ const double wz = Bct(dz,qz); -+ const double wDz = Gct(dz,qz); -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}] -+ // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2 -+ Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, -+ e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz); -+ } -+ } -+ } -+ } // loop qz -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } -+ -+ // y component -+ { -+ const int D1Dz = D1D; -+ const int D1Dy = D1D - 1; -+ const int D1Dx = D1D; -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ double gradXY02[MAX_D1D][MAX_D1D]; -+ double gradXY20[MAX_D1D][MAX_D1D]; -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ gradXY02[dy][dx] = 0.0; -+ gradXY20[dy][dx] = 0.0; -+ } -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ double massY[MAX_D1D][2]; -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ massY[dy][0] = 0.0; -+ massY[dy][1] = 0.0; -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double wy = Bot(dy,qy); -+ -+ massY[dy][0] += wy * curl[qz][qy][qx][2]; -+ massY[dy][1] += wy * curl[qz][qy][qx][0]; -+ } -+ } -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ const double wx = Bct(dx,qx); -+ const double wDx = Gct(dx,qx); -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ gradXY02[dy][dx] += massY[dy][0] * wDx; -+ gradXY20[dy][dx] += massY[dy][1] * wx; -+ } -+ } -+ } -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ const double wz = Bct(dz,qz); -+ const double wDz = Gct(dz,qz); -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -+ // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2 -+ Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, -+ e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz); -+ } -+ } -+ } -+ } // loop qz -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } -+ -+ // z component -+ { -+ const int D1Dz = D1D - 1; -+ const int D1Dy = D1D; -+ const int D1Dx = D1D; -+ -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ double gradYZ01[MAX_D1D][MAX_D1D]; -+ double gradYZ10[MAX_D1D][MAX_D1D]; -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ gradYZ01[dz][dy] = 0.0; -+ gradYZ10[dz][dy] = 0.0; -+ } -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ double massZ[MAX_D1D][2]; -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ for (int n = 0; n < 2; ++n) -+ { -+ massZ[dz][n] = 0.0; -+ } -+ } -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ const double wz = Bot(dz,qz); -+ -+ massZ[dz][0] += wz * curl[qz][qy][qx][0]; -+ massZ[dz][1] += wz * curl[qz][qy][qx][1]; -+ } -+ } -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double wy = Bct(dy,qy); -+ const double wDy = Gct(dy,qy); -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ gradYZ01[dz][dy] += wy * massZ[dz][1]; -+ gradYZ10[dz][dy] += wDy * massZ[dz][0]; -+ } -+ } -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ const double wx = Bct(dx,qx); -+ const double wDx = Gct(dx,qx); -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -+ // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1 -+ Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, -+ e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx); -+ } -+ } -+ } -+ } // loop qx -+ } -+ }); // end of element loop -+} -+ -+template -+MFEM_HOST_DEVICE inline -+void SmemPACurlCurlApply3D(const int D1D, -+ const int Q1D, -+ const bool symmetric, -+ const int NE, -+ const Array &bo, -+ const Array &bc, -+ const Array &bot, -+ const Array &bct, -+ const Array &gc, -+ const Array &gct, -+ const Vector &pa_data, -+ const Vector &x, -+ Vector &y) -+{ -+ MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -+ MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -+ // Using (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get -+ // (\nabla\times u) \cdot (\nabla\times v) = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{\nabla}\times\hat{v} -+ // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}] -+ // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -+ // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -+ -+ auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -+ auto Bc = Reshape(bc.Read(), Q1D, D1D); -+ auto Gc = Reshape(gc.Read(), Q1D, D1D); -+ auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE); -+ auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE); -+ auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -+ -+ const int s = symmetric ? 6 : 9; -+ -+ auto device_kernel = [=] MFEM_DEVICE (int e) -+ { -+ constexpr int VDIM = 3; -+ -+ MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D]; -+ MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D]; -+ MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D]; -+ -+ double ope[9]; -+ MFEM_SHARED double sop[9][MAX_Q1D][MAX_Q1D]; -+ MFEM_SHARED double curl[MAX_Q1D][MAX_Q1D][3]; -+ -+ MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D]; -+ -+ MFEM_FOREACH_THREAD(qx,x,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qy,y,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qz,z,Q1D) -+ { -+ for (int i=0; i(true, NE, device_kernel, host_kernel, Q1D, Q1D, Q1D); -+} -+ -+// PA H(curl)-L2 assemble 2D kernel -+MFEM_HOST_DEVICE inline -+void PAHcurlL2Setup2D(const int Q1D, -+ const int NE, -+ const Array &w, -+ Vector &coeff, -+ Vector &op) -+{ -+ const int NQ = Q1D*Q1D; -+ auto W = w.Read(); -+ auto C = Reshape(coeff.Read(), NQ, NE); -+ auto y = Reshape(op.Write(), NQ, NE); -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ for (int q = 0; q < NQ; ++q) -+ { -+ y(q,e) = W[q] * C(q,e); -+ } -+ }); -+} -+ -+// PA H(curl) Mass Assemble 3D kernel -+MFEM_HOST_DEVICE inline -+void PAHcurlL2Setup3D(const int NQ, -+ const int coeffDim, -+ const int NE, -+ const Array &w, -+ Vector &coeff, -+ Vector &op) -+{ -+ auto W = w.Read(); -+ auto C = Reshape(coeff.Read(), coeffDim, NQ, NE); -+ auto y = Reshape(op.Write(), coeffDim, NQ, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ for (int q = 0; q < NQ; ++q) -+ { -+ for (int c=0; c &bo, -+ const Array &bot, -+ const Array &bt, -+ const Array &gc, -+ const Vector &pa_data, -+ const Vector &x, // trial = H(curl) -+ Vector &y) // test = L2 or H1 -+{ -+ constexpr static int VDIM = 2; -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -+ const int H1 = (D1Dtest == D1D); -+ -+ MFEM_VERIFY(y.Size() == NE*D1Dtest*D1Dtest, "Test vector of wrong dimension"); -+ -+ auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -+ auto Bot = Reshape(bot.Read(), D1D-1, Q1D); -+ auto Bt = Reshape(bt.Read(), D1D, Q1D); -+ auto Gc = Reshape(gc.Read(), Q1D, D1D); -+ auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE); -+ auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE); -+ auto Y = Reshape(y.ReadWrite(), D1Dtest, D1Dtest, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double curl[MAX_Q1D][MAX_Q1D]; -+ -+ // curl[qy][qx] will be computed as du_y/dx - du_x/dy -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ curl[qy][qx] = 0.0; -+ } -+ } -+ -+ int osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y components -+ { -+ const int D1Dy = (c == 1) ? D1D - 1 : D1D; -+ const int D1Dx = (c == 0) ? D1D - 1 : D1D; -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ double gradX[MAX_Q1D]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ gradX[qx] = 0; -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ const double t = X(dx + (dy * D1Dx) + osc, e); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ gradX[qx] += t * ((c == 0) ? Bo(qx,dx) : Gc(qx,dx)); -+ } -+ } -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = (c == 0) ? -Gc(qy,dy) : Bo(qy,dy); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ curl[qy][qx] += gradX[qx] * wy; -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy; -+ } // loop (c) over components -+ -+ // Apply D operator. -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ curl[qy][qx] *= op(qx,qy,e); -+ } -+ } -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ double sol_x[MAX_D1D]; -+ for (int dx = 0; dx < D1Dtest; ++dx) -+ { -+ sol_x[dx] = 0.0; -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double s = curl[qy][qx]; -+ for (int dx = 0; dx < D1Dtest; ++dx) -+ { -+ sol_x[dx] += s * ((H1 == 1) ? Bt(dx,qx) : Bot(dx,qx)); -+ } -+ } -+ for (int dy = 0; dy < D1Dtest; ++dy) -+ { -+ const double wy = (H1 == 1) ? Bt(dy,qy) : Bot(dy,qy); -+ -+ for (int dx = 0; dx < D1Dtest; ++dx) -+ { -+ Y(dx,dy,e) += sol_x[dx] * wy; -+ } -+ } -+ } // loop qy -+ }); // end of element loop -+} -+ -+MFEM_HOST_DEVICE inline -+void PAHcurlL2ApplyTranspose2D(const int D1D, -+ const int D1Dtest, -+ const int Q1D, -+ const int NE, -+ const Array &bo, -+ const Array &bot, -+ const Array &b, -+ const Array &gct, -+ const Vector &pa_data, -+ const Vector &x, // trial = H(curl) -+ Vector &y) // test = L2 or H1 -+{ -+ constexpr static int VDIM = 2; -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -+ const int H1 = (D1Dtest == D1D); -+ -+ MFEM_VERIFY(x.Size() == NE*D1Dtest*D1Dtest, "Test vector of wrong dimension"); -+ -+ auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -+ auto B = Reshape(b.Read(), Q1D, D1D); -+ auto Bot = Reshape(bot.Read(), D1D-1, Q1D); -+ auto Gct = Reshape(gct.Read(), D1D, Q1D); -+ auto op = Reshape(pa_data.Read(), Q1D, Q1D, NE); -+ auto X = Reshape(x.Read(), D1Dtest, D1Dtest, NE); -+ auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double mass[MAX_Q1D][MAX_Q1D]; -+ -+ // Zero-order term in L2 or H1 test space -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ mass[qy][qx] = 0.0; -+ } -+ } -+ -+ for (int dy = 0; dy < D1Dtest; ++dy) -+ { -+ double sol_x[MAX_Q1D]; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ sol_x[qy] = 0.0; -+ } -+ for (int dx = 0; dx < D1Dtest; ++dx) -+ { -+ const double s = X(dx,dy,e); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ sol_x[qx] += s * ((H1 == 1) ? B(qx,dx) : Bo(qx,dx)); -+ } -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double d2q = (H1 == 1) ? B(qy,dy) : Bo(qy,dy); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ mass[qy][qx] += d2q * sol_x[qx]; -+ } -+ } -+ } -+ -+ // Apply D operator. -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ mass[qy][qx] *= op(qx,qy,e); -+ } -+ } -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ int osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y components -+ { -+ const int D1Dy = (c == 1) ? D1D - 1 : D1D; -+ const int D1Dx = (c == 0) ? D1D - 1 : D1D; -+ -+ double gradX[MAX_D1D]; -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ gradX[dx] = 0.0; -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ gradX[dx] += mass[qy][qx] * ((c == 0) ? Bot(dx,qx) : Gct(dx,qx)); -+ } -+ } -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double wy = (c == 0) ? -Gct(dy,qy) : Bot(dy,qy); -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ Y(dx + (dy * D1Dx) + osc, e) += gradX[dx] * wy; -+ } -+ } -+ -+ osc += D1Dx * D1Dy; -+ } // loop c -+ } // loop qy -+ }); // end of element loop -+} -+ -+// Apply to x corresponding to DOFs in H(curl) (trial), whose curl is -+// integrated against H(curl) test functions corresponding to y. -+template -+MFEM_HOST_DEVICE inline -+void PAHcurlL2Apply3D(const int D1D, -+ const int Q1D, -+ const int coeffDim, -+ const int NE, -+ const Array &bo, -+ const Array &bc, -+ const Array &bot, -+ const Array &bct, -+ const Array &gc, -+ const Vector &pa_data, -+ const Vector &x, -+ Vector &y) -+{ -+ MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -+ MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -+ // Using u = dF^{-T} \hat{u} and (\nabla\times u) F = -+ // 1/det(dF) dF \hat{\nabla}\times\hat{u} (p. 78 of Monk), we get: -+ // (\nabla\times u) \cdot v -+ // = 1/det(dF) \hat{\nabla}\times\hat{u}^T dF^T dF^{-T} \hat{v} -+ // = 1/det(dF) \hat{\nabla}\times\hat{u}^T \hat{v} -+ // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}] -+ // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -+ // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -+ -+ constexpr static int VDIM = 3; -+ -+ auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -+ auto Bc = Reshape(bc.Read(), Q1D, D1D); -+ auto Bot = Reshape(bot.Read(), D1D-1, Q1D); -+ auto Bct = Reshape(bct.Read(), D1D, Q1D); -+ auto Gc = Reshape(gc.Read(), Q1D, D1D); -+ auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE); -+ auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE); -+ auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM]; -+ // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point. -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int c = 0; c < VDIM; ++c) -+ { -+ curl[qz][qy][qx][c] = 0.0; -+ } -+ } -+ } -+ } -+ -+ // We treat x, y, z components separately for optimization specific to each. -+ -+ int osc = 0; -+ -+ { -+ // x component -+ const int D1Dz = D1D; -+ const int D1Dy = D1D; -+ const int D1Dx = D1D - 1; -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ double gradXY[MAX_Q1D][MAX_Q1D][2]; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int d = 0; d < 2; ++d) -+ { -+ gradXY[qy][qx][d] = 0.0; -+ } -+ } -+ } -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ double massX[MAX_Q1D]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massX[qx] = 0.0; -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massX[qx] += t * Bo(qx,dx); -+ } -+ } -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = Bc(qy,dy); -+ const double wDy = Gc(qy,dy); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = massX[qx]; -+ gradXY[qy][qx][0] += wx * wDy; -+ gradXY[qy][qx][1] += wx * wy; -+ } -+ } -+ } -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ const double wz = Bc(qz,dz); -+ const double wDz = Gc(qz,dz); -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}] -+ curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2} -+ curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1} -+ } -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } -+ -+ { -+ // y component -+ const int D1Dz = D1D; -+ const int D1Dy = D1D - 1; -+ const int D1Dx = D1D; -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ double gradXY[MAX_Q1D][MAX_Q1D][2]; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int d = 0; d < 2; ++d) -+ { -+ gradXY[qy][qx][d] = 0.0; -+ } -+ } -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ double massY[MAX_Q1D]; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ massY[qy] = 0.0; -+ } -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ massY[qy] += t * Bo(qy,dy); -+ } -+ } -+ -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = Bc(qx,dx); -+ const double wDx = Gc(qx,dx); -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = massY[qy]; -+ gradXY[qy][qx][0] += wDx * wy; -+ gradXY[qy][qx][1] += wx * wy; -+ } -+ } -+ } -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ const double wz = Bc(qz,dz); -+ const double wDz = Gc(qz,dz); -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -+ curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2} -+ curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0} -+ } -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } -+ -+ { -+ // z component -+ const int D1Dz = D1D - 1; -+ const int D1Dy = D1D; -+ const int D1Dx = D1D; -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ double gradYZ[MAX_Q1D][MAX_Q1D][2]; -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int d = 0; d < 2; ++d) -+ { -+ gradYZ[qz][qy][d] = 0.0; -+ } -+ } -+ } -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ double massZ[MAX_Q1D]; -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ massZ[qz] = 0.0; -+ } -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ massZ[qz] += t * Bo(qz,dz); -+ } -+ } -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = Bc(qy,dy); -+ const double wDy = Gc(qy,dy); -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ const double wz = massZ[qz]; -+ gradYZ[qz][qy][0] += wz * wy; -+ gradYZ[qz][qy][1] += wz * wDy; -+ } -+ } -+ } -+ -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = Bc(qx,dx); -+ const double wDx = Gc(qx,dx); -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -+ curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1} -+ curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0} -+ } -+ } -+ } -+ } -+ } -+ -+ // Apply D operator. -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double O11 = op(0,qx,qy,qz,e); -+ if (coeffDim == 1) -+ { -+ for (int c = 0; c < VDIM; ++c) -+ { -+ curl[qz][qy][qx][c] *= O11; -+ } -+ } -+ else -+ { -+ const double O21 = op(1,qx,qy,qz,e); -+ const double O31 = op(2,qx,qy,qz,e); -+ const double O12 = op(3,qx,qy,qz,e); -+ const double O22 = op(4,qx,qy,qz,e); -+ const double O32 = op(5,qx,qy,qz,e); -+ const double O13 = op(6,qx,qy,qz,e); -+ const double O23 = op(7,qx,qy,qz,e); -+ const double O33 = op(8,qx,qy,qz,e); -+ const double curlX = curl[qz][qy][qx][0]; -+ const double curlY = curl[qz][qy][qx][1]; -+ const double curlZ = curl[qz][qy][qx][2]; -+ curl[qz][qy][qx][0] = (O11*curlX)+(O12*curlY)+(O13*curlZ); -+ curl[qz][qy][qx][1] = (O21*curlX)+(O22*curlY)+(O23*curlZ); -+ curl[qz][qy][qx][2] = (O31*curlX)+(O32*curlY)+(O33*curlZ); -+ } -+ } -+ } -+ } -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ double massXY[MAX_D1D][MAX_D1D]; -+ -+ osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -+ { -+ const int D1Dz = (c == 2) ? D1D - 1 : D1D; -+ const int D1Dy = (c == 1) ? D1D - 1 : D1D; -+ const int D1Dx = (c == 0) ? D1D - 1 : D1D; -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massXY[dy][dx] = 0; -+ } -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ double massX[MAX_D1D]; -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massX[dx] = 0.0; -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massX[dx] += curl[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx)); -+ } -+ } -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy); -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massXY[dy][dx] += massX[dx] * wy; -+ } -+ } -+ } -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz); -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz; -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } // loop c -+ } // loop qz -+ }); // end of element loop -+} -+ -+// Apply to x corresponding to DOFs in H(curl) (trial), whose curl is -+// integrated against H(curl) test functions corresponding to y. -+template -+MFEM_HOST_DEVICE inline -+void SmemPAHcurlL2Apply3D(const int D1D, -+ const int Q1D, -+ const int coeffDim, -+ const int NE, -+ const Array &bo, -+ const Array &bc, -+ const Array &gc, -+ const Vector &pa_data, -+ const Vector &x, -+ Vector &y) -+{ -+ MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -+ MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -+ -+ auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -+ auto Bc = Reshape(bc.Read(), Q1D, D1D); -+ auto Gc = Reshape(gc.Read(), Q1D, D1D); -+ auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE); -+ auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE); -+ auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -+ -+ auto device_kernel = [=] MFEM_DEVICE (int e) -+ { -+ constexpr int VDIM = 3; -+ constexpr int maxCoeffDim = 9; -+ -+ MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D]; -+ MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D]; -+ MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D]; -+ -+ double opc[maxCoeffDim]; -+ MFEM_SHARED double sop[maxCoeffDim][MAX_Q1D][MAX_Q1D]; -+ MFEM_SHARED double curl[MAX_Q1D][MAX_Q1D][3]; -+ -+ MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D]; -+ -+ MFEM_FOREACH_THREAD(qx,x,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qy,y,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qz,z,Q1D) -+ { -+ for (int i=0; i(true, NE, device_kernel, host_kernel, Q1D, Q1D, Q1D); -+} -+ -+// Apply to x corresponding to DOFs in H(curl) (trial), integrated against curl -+// of H(curl) test functions corresponding to y. -+template -+MFEM_HOST_DEVICE inline -+void PAHcurlL2Apply3DTranspose(const int D1D, -+ const int Q1D, -+ const int coeffDim, -+ const int NE, -+ const Array &bo, -+ const Array &bc, -+ const Array &bot, -+ const Array &bct, -+ const Array &gct, -+ const Vector &pa_data, -+ const Vector &x, -+ Vector &y) -+{ -+ // See PAHcurlL2Apply3D for comments. -+ -+ MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -+ MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -+ -+ constexpr static int VDIM = 3; -+ -+ auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -+ auto Bc = Reshape(bc.Read(), Q1D, D1D); -+ auto Bot = Reshape(bot.Read(), D1D-1, Q1D); -+ auto Bct = Reshape(bct.Read(), D1D, Q1D); -+ auto Gct = Reshape(gct.Read(), D1D, Q1D); -+ auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE); -+ auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE); -+ auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM]; -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int c = 0; c < VDIM; ++c) -+ { -+ mass[qz][qy][qx][c] = 0.0; -+ } -+ } -+ } -+ } -+ -+ int osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -+ { -+ const int D1Dz = (c == 2) ? D1D - 1 : D1D; -+ const int D1Dy = (c == 1) ? D1D - 1 : D1D; -+ const int D1Dx = (c == 0) ? D1D - 1 : D1D; -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ double massXY[MAX_Q1D][MAX_Q1D]; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massXY[qy][qx] = 0.0; -+ } -+ } -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ double massX[MAX_Q1D]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massX[qx] = 0.0; -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx)); -+ } -+ } -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = massX[qx]; -+ massXY[qy][qx] += wx * wy; -+ } -+ } -+ } -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz); -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ mass[qz][qy][qx][c] += massXY[qy][qx] * wz; -+ } -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } // loop (c) over components -+ -+ // Apply D operator. -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double O11 = op(0,qx,qy,qz,e); -+ if (coeffDim == 1) -+ { -+ for (int c = 0; c < VDIM; ++c) -+ { -+ mass[qz][qy][qx][c] *= O11; -+ } -+ } -+ else -+ { -+ const double O12 = op(1,qx,qy,qz,e); -+ const double O13 = op(2,qx,qy,qz,e); -+ const double O21 = op(3,qx,qy,qz,e); -+ const double O22 = op(4,qx,qy,qz,e); -+ const double O23 = op(5,qx,qy,qz,e); -+ const double O31 = op(6,qx,qy,qz,e); -+ const double O32 = op(7,qx,qy,qz,e); -+ const double O33 = op(8,qx,qy,qz,e); -+ const double massX = mass[qz][qy][qx][0]; -+ const double massY = mass[qz][qy][qx][1]; -+ const double massZ = mass[qz][qy][qx][2]; -+ mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ); -+ mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ); -+ mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ); -+ } -+ } -+ } -+ } -+ -+ // x component -+ osc = 0; -+ { -+ const int D1Dz = D1D; -+ const int D1Dy = D1D; -+ const int D1Dx = D1D - 1; -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ double gradXY12[MAX_D1D][MAX_D1D]; -+ double gradXY21[MAX_D1D][MAX_D1D]; -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ gradXY12[dy][dx] = 0.0; -+ gradXY21[dy][dx] = 0.0; -+ } -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ double massX[MAX_D1D][2]; -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ for (int n = 0; n < 2; ++n) -+ { -+ massX[dx][n] = 0.0; -+ } -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ const double wx = Bot(dx,qx); -+ -+ massX[dx][0] += wx * mass[qz][qy][qx][1]; -+ massX[dx][1] += wx * mass[qz][qy][qx][2]; -+ } -+ } -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double wy = Bct(dy,qy); -+ const double wDy = Gct(dy,qy); -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ gradXY21[dy][dx] += massX[dx][0] * wy; -+ gradXY12[dy][dx] += massX[dx][1] * wDy; -+ } -+ } -+ } -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ const double wz = Bct(dz,qz); -+ const double wDz = Gct(dz,qz); -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}] -+ // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2 -+ Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, -+ e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz); -+ } -+ } -+ } -+ } // loop qz -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } -+ -+ // y component -+ { -+ const int D1Dz = D1D; -+ const int D1Dy = D1D - 1; -+ const int D1Dx = D1D; -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ double gradXY02[MAX_D1D][MAX_D1D]; -+ double gradXY20[MAX_D1D][MAX_D1D]; -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ gradXY02[dy][dx] = 0.0; -+ gradXY20[dy][dx] = 0.0; -+ } -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ double massY[MAX_D1D][2]; -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ massY[dy][0] = 0.0; -+ massY[dy][1] = 0.0; -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double wy = Bot(dy,qy); -+ -+ massY[dy][0] += wy * mass[qz][qy][qx][2]; -+ massY[dy][1] += wy * mass[qz][qy][qx][0]; -+ } -+ } -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ const double wx = Bct(dx,qx); -+ const double wDx = Gct(dx,qx); -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ gradXY02[dy][dx] += massY[dy][0] * wDx; -+ gradXY20[dy][dx] += massY[dy][1] * wx; -+ } -+ } -+ } -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ const double wz = Bct(dz,qz); -+ const double wDz = Gct(dz,qz); -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -+ // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2 -+ Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, -+ e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz); -+ } -+ } -+ } -+ } // loop qz -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } -+ -+ // z component -+ { -+ const int D1Dz = D1D - 1; -+ const int D1Dy = D1D; -+ const int D1Dx = D1D; -+ -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ double gradYZ01[MAX_D1D][MAX_D1D]; -+ double gradYZ10[MAX_D1D][MAX_D1D]; -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ gradYZ01[dz][dy] = 0.0; -+ gradYZ10[dz][dy] = 0.0; -+ } -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ double massZ[MAX_D1D][2]; -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ for (int n = 0; n < 2; ++n) -+ { -+ massZ[dz][n] = 0.0; -+ } -+ } -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ const double wz = Bot(dz,qz); -+ -+ massZ[dz][0] += wz * mass[qz][qy][qx][0]; -+ massZ[dz][1] += wz * mass[qz][qy][qx][1]; -+ } -+ } -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double wy = Bct(dy,qy); -+ const double wDy = Gct(dy,qy); -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ gradYZ01[dz][dy] += wy * massZ[dz][1]; -+ gradYZ10[dz][dy] += wDy * massZ[dz][0]; -+ } -+ } -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ const double wx = Bct(dx,qx); -+ const double wDx = Gct(dx,qx); -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -+ // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1 -+ Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, -+ e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx); -+ } -+ } -+ } -+ } // loop qx -+ } -+ }); -+} -+ -+template -+MFEM_HOST_DEVICE inline -+void SmemPAHcurlL2Apply3DTranspose(const int D1D, -+ const int Q1D, -+ const int coeffDim, -+ const int NE, -+ const Array &bo, -+ const Array &bc, -+ const Array &gc, -+ const Vector &pa_data, -+ const Vector &x, -+ Vector &y) -+{ -+ MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -+ MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -+ -+ auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -+ auto Bc = Reshape(bc.Read(), Q1D, D1D); -+ auto Gc = Reshape(gc.Read(), Q1D, D1D); -+ auto op = Reshape(pa_data.Read(), coeffDim, Q1D, Q1D, Q1D, NE); -+ auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE); -+ auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -+ -+ auto device_kernel = [=] MFEM_DEVICE (int e) -+ { -+ constexpr int VDIM = 3; -+ constexpr int maxCoeffDim = 9; -+ -+ MFEM_SHARED double sBo[MAX_D1D][MAX_Q1D]; -+ MFEM_SHARED double sBc[MAX_D1D][MAX_Q1D]; -+ MFEM_SHARED double sGc[MAX_D1D][MAX_Q1D]; -+ -+ double opc[maxCoeffDim]; -+ MFEM_SHARED double sop[maxCoeffDim][MAX_Q1D][MAX_Q1D]; -+ MFEM_SHARED double mass[MAX_Q1D][MAX_Q1D][3]; -+ -+ MFEM_SHARED double sX[MAX_D1D][MAX_D1D][MAX_D1D]; -+ -+ MFEM_FOREACH_THREAD(qx,x,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qy,y,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qz,z,Q1D) -+ { -+ for (int i=0; i(true, NE, device_kernel, host_kernel, Q1D, Q1D, Q1D); -+} -+ -+} // namespace internal -+ -+} // namespace mfem -+ -+#endif -diff --git a/fem/integ/bilininteg_hcurlhdiv_kernels.hpp b/fem/integ/bilininteg_hcurlhdiv_kernels.hpp -new file mode 100644 -index 000000000..c7165654a ---- /dev/null -+++ b/fem/integ/bilininteg_hcurlhdiv_kernels.hpp -@@ -0,0 +1,1303 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#ifndef MFEM_BILININTEG_HCURLHDIV_KERNELS_HPP -+#define MFEM_BILININTEG_HCURLHDIV_KERNELS_HPP -+ -+#include "../../config/config.hpp" -+#include "../../general/forall.hpp" -+#include "../../linalg/dtensor.hpp" -+ -+namespace mfem -+{ -+ -+namespace internal -+{ -+ -+// PA H(curl) x H(div) mass assemble 2D kernel, with factor -+// dF^{-1} C dF for a vector or matrix coefficient C. -+// If transpose, use dF^T C dF^{-T} for H(div) x H(curl). -+MFEM_HOST_DEVICE inline -+void PAHcurlHdivMassSetup2D(const int Q1D, -+ const int coeffDim, -+ const int NE, -+ const bool transpose, -+ const Array &w_, -+ const Vector &j, -+ Vector &coeff_, -+ Vector &op) -+{ -+ const bool symmetric = (coeffDim != 4); -+ auto W = Reshape(w_.Read(), Q1D, Q1D); -+ auto J = Reshape(j.Read(), Q1D, Q1D, 2, 2, NE); -+ auto coeff = Reshape(coeff_.Read(), coeffDim, Q1D, Q1D, NE); -+ auto y = Reshape(op.Write(), 4, Q1D, Q1D, NE); -+ -+ const int i11 = 0; -+ const int i12 = transpose ? 2 : 1; -+ const int i21 = transpose ? 1 : 2; -+ const int i22 = 3; -+ -+ mfem::forall_2D(NE, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e) -+ { -+ MFEM_FOREACH_THREAD(qx,x,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qy,y,Q1D) -+ { -+ const double J11 = J(qx,qy,0,0,e); -+ const double J21 = J(qx,qy,1,0,e); -+ const double J12 = J(qx,qy,0,1,e); -+ const double J22 = J(qx,qy,1,1,e); -+ const double w_detJ = W(qx,qy) / ((J11*J22) - (J21*J12)); -+ -+ if (coeffDim == 3 || coeffDim == 4) // Matrix coefficient version -+ { -+ // First compute entries of R = MJ -+ const double M11 = coeff(i11,qx,qy,e); -+ const double M12 = (!symmetric) ? coeff(i12,qx,qy,e) : coeff(1,qx,qy,e); -+ const double M21 = (!symmetric) ? coeff(i21,qx,qy,e) : M12; -+ const double M22 = (!symmetric) ? coeff(i22,qx,qy,e) : coeff(2,qx,qy,e); -+ -+ // J^{-1} M^T -+ const double R11 = ( J22*M11 - J12*M12); // 1,1 -+ const double R12 = ( J22*M21 - J12*M22); // 1,2 -+ const double R21 = (-J21*M11 + J11*M12); // 2,1 -+ const double R22 = (-J21*M21 + J11*M22); // 2,2 -+ -+ // (RJ)^T -+ y(i11,qx,qy,e) = w_detJ * (R11*J11 + R12*J21); // 1,1 -+ y(i21,qx,qy,e) = w_detJ * (R11*J12 + R12*J22); // 1,2 (transpose) -+ y(i12,qx,qy,e) = w_detJ * (R21*J11 + R22*J21); // 2,1 (transpose) -+ y(i22,qx,qy,e) = w_detJ * (R21*J12 + R22*J22); // 2,2 -+ } -+ else if (coeffDim == 2) // Vector coefficient version -+ { -+ const double D1 = coeff(0,qx,qy,e); -+ const double D2 = coeff(1,qx,qy,e); -+ const double R11 = D1*J11; -+ const double R12 = D1*J12; -+ const double R21 = D2*J21; -+ const double R22 = D2*J22; -+ y(i11,qx,qy,e) = w_detJ * ( J22*R11 - J12*R21); // 1,1 -+ y(i21,qx,qy,e) = w_detJ * ( J22*R12 - J12*R22); // 1,2 (transpose) -+ y(i12,qx,qy,e) = w_detJ * (-J21*R11 + J11*R21); // 2,1 (transpose) -+ y(i22,qx,qy,e) = w_detJ * (-J21*R12 + J11*R22); // 2,2 -+ } -+ } -+ } -+ }); -+} -+ -+// PA H(curl) x H(div) mass assemble 3D kernel, with factor -+// dF^{-1} C dF for a vector or matrix coefficient C. -+// If transpose, use dF^T C dF^{-T} for H(div) x H(curl). -+MFEM_HOST_DEVICE inline -+void PAHcurlHdivMassSetup3D(const int Q1D, -+ const int coeffDim, -+ const int NE, -+ const bool transpose, -+ const Array &w_, -+ const Vector &j, -+ Vector &coeff_, -+ Vector &op) -+{ -+ const bool symmetric = (coeffDim != 9); -+ auto W = Reshape(w_.Read(), Q1D, Q1D, Q1D); -+ auto J = Reshape(j.Read(), Q1D, Q1D, Q1D, 3, 3, NE); -+ auto coeff = Reshape(coeff_.Read(), coeffDim, Q1D, Q1D, Q1D, NE); -+ auto y = Reshape(op.Write(), 9, Q1D, Q1D, Q1D, NE); -+ -+ const int i11 = 0; -+ const int i12 = transpose ? 3 : 1; -+ const int i13 = transpose ? 6 : 2; -+ const int i21 = transpose ? 1 : 3; -+ const int i22 = 4; -+ const int i23 = transpose ? 7 : 5; -+ const int i31 = transpose ? 2 : 6; -+ const int i32 = transpose ? 5 : 7; -+ const int i33 = 8; -+ -+ mfem::forall_3D(NE, Q1D, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e) -+ { -+ MFEM_FOREACH_THREAD(qx,x,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qy,y,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qz,z,Q1D) -+ { -+ const double J11 = J(qx,qy,qz,0,0,e); -+ const double J21 = J(qx,qy,qz,1,0,e); -+ const double J31 = J(qx,qy,qz,2,0,e); -+ const double J12 = J(qx,qy,qz,0,1,e); -+ const double J22 = J(qx,qy,qz,1,1,e); -+ const double J32 = J(qx,qy,qz,2,1,e); -+ const double J13 = J(qx,qy,qz,0,2,e); -+ const double J23 = J(qx,qy,qz,1,2,e); -+ const double J33 = J(qx,qy,qz,2,2,e); -+ const double detJ = J11 * (J22 * J33 - J32 * J23) - -+ J21 * (J12 * J33 - J32 * J13) + -+ J31 * (J12 * J23 - J22 * J13); -+ const double w_detJ = W(qx,qy,qz) / detJ; -+ // adj(J) -+ const double A11 = (J22 * J33) - (J23 * J32); -+ const double A12 = (J32 * J13) - (J12 * J33); -+ const double A13 = (J12 * J23) - (J22 * J13); -+ const double A21 = (J31 * J23) - (J21 * J33); -+ const double A22 = (J11 * J33) - (J13 * J31); -+ const double A23 = (J21 * J13) - (J11 * J23); -+ const double A31 = (J21 * J32) - (J31 * J22); -+ const double A32 = (J31 * J12) - (J11 * J32); -+ const double A33 = (J11 * J22) - (J12 * J21); -+ -+ if (coeffDim == 6 || coeffDim == 9) // Matrix coefficient version -+ { -+ // First compute entries of R = M^T J -+ const double M11 = (!symmetric) ? coeff(i11,qx,qy,qz,e) : coeff(0,qx,qy,qz,e); -+ const double M12 = (!symmetric) ? coeff(i12,qx,qy,qz,e) : coeff(1,qx,qy,qz,e); -+ const double M13 = (!symmetric) ? coeff(i13,qx,qy,qz,e) : coeff(2,qx,qy,qz,e); -+ const double M21 = (!symmetric) ? coeff(i21,qx,qy,qz,e) : M12; -+ const double M22 = (!symmetric) ? coeff(i22,qx,qy,qz,e) : coeff(3,qx,qy,qz,e); -+ const double M23 = (!symmetric) ? coeff(i23,qx,qy,qz,e) : coeff(4,qx,qy,qz,e); -+ const double M31 = (!symmetric) ? coeff(i31,qx,qy,qz,e) : M13; -+ const double M32 = (!symmetric) ? coeff(i32,qx,qy,qz,e) : M23; -+ const double M33 = (!symmetric) ? coeff(i33,qx,qy,qz,e) : coeff(5,qx,qy,qz,e); -+ -+ const double R11 = M11*J11 + M21*J21 + M31*J31; -+ const double R12 = M11*J12 + M21*J22 + M31*J32; -+ const double R13 = M11*J13 + M21*J23 + M31*J33; -+ const double R21 = M12*J11 + M22*J21 + M32*J31; -+ const double R22 = M12*J12 + M22*J22 + M32*J32; -+ const double R23 = M12*J13 + M22*J23 + M32*J33; -+ const double R31 = M13*J11 + M23*J21 + M33*J31; -+ const double R32 = M13*J12 + M23*J22 + M33*J32; -+ const double R33 = M13*J13 + M23*J23 + M33*J33; -+ -+ // y = (J^{-1} M^T J)^T -+ y(i11,qx,qy,qz,e) = w_detJ * (A11*R11 + A12*R21 + A13*R31); // 1,1 -+ y(i21,qx,qy,qz,e) = w_detJ * (A11*R12 + A12*R22 + A13*R32); // 1,2 -+ y(i31,qx,qy,qz,e) = w_detJ * (A11*R13 + A12*R23 + A13*R33); // 1,3 -+ y(i12,qx,qy,qz,e) = w_detJ * (A21*R11 + A22*R21 + A23*R31); // 2,1 -+ y(i22,qx,qy,qz,e) = w_detJ * (A21*R12 + A22*R22 + A23*R32); // 2,2 -+ y(i32,qx,qy,qz,e) = w_detJ * (A21*R13 + A22*R23 + A23*R33); // 2,3 -+ y(i13,qx,qy,qz,e) = w_detJ * (A31*R11 + A32*R21 + A33*R31); // 3,1 -+ y(i23,qx,qy,qz,e) = w_detJ * (A31*R12 + A32*R22 + A33*R32); // 3,2 -+ y(i33,qx,qy,qz,e) = w_detJ * (A31*R13 + A32*R23 + A33*R33); // 3,3 -+ } -+ else if (coeffDim == 3) // Vector coefficient version -+ { -+ const double D1 = coeff(0,qx,qy,qz,e); -+ const double D2 = coeff(1,qx,qy,qz,e); -+ const double D3 = coeff(2,qx,qy,qz,e); -+ // detJ J^{-1} DJ = adj(J) DJ -+ // transpose -+ y(i11,qx,qy,qz,e) = w_detJ * (D1*A11*J11 + D2*A12*J21 + D3*A13*J31); // 1,1 -+ y(i21,qx,qy,qz,e) = w_detJ * (D1*A11*J12 + D2*A12*J22 + D3*A13*J32); // 1,2 -+ y(i31,qx,qy,qz,e) = w_detJ * (D1*A11*J13 + D2*A12*J23 + D3*A13*J33); // 1,3 -+ y(i12,qx,qy,qz,e) = w_detJ * (D1*A21*J11 + D2*A22*J21 + D3*A23*J31); // 2,1 -+ y(i22,qx,qy,qz,e) = w_detJ * (D1*A21*J12 + D2*A22*J22 + D3*A23*J32); // 2,2 -+ y(i32,qx,qy,qz,e) = w_detJ * (D1*A21*J13 + D2*A22*J23 + D3*A23*J33); // 2,3 -+ y(i13,qx,qy,qz,e) = w_detJ * (D1*A31*J11 + D2*A32*J21 + D3*A33*J31); // 3,1 -+ y(i23,qx,qy,qz,e) = w_detJ * (D1*A31*J12 + D2*A32*J22 + D3*A33*J32); // 3,2 -+ y(i33,qx,qy,qz,e) = w_detJ * (D1*A31*J13 + D2*A32*J23 + D3*A33*J33); // 3,3 -+ } -+ } -+ } -+ } -+ }); -+} -+ -+// Mass operator for H(curl) and H(div) functions, using Piola transformations -+// u = dF^{-T} \hat{u} in H(curl), v = (1 / det dF) dF \hat{v} in H(div). -+MFEM_HOST_DEVICE inline -+void PAHcurlHdivMassApply2D(const int D1D, -+ const int D1Dtest, -+ const int Q1D, -+ const int NE, -+ const bool scalarCoeff, -+ const bool trialHcurl, -+ const bool transpose, -+ const Array &Bo_, -+ const Array &Bc_, -+ const Array &Bot_, -+ const Array &Bct_, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_) -+{ -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -+ -+ MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -+ MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -+ constexpr static int VDIM = 2; -+ -+ auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1); -+ auto Bc = Reshape(Bc_.Read(), Q1D, D1D); -+ auto Bot = Reshape(Bot_.Read(), D1Dtest-1, Q1D); -+ auto Bct = Reshape(Bct_.Read(), D1Dtest, Q1D); -+ auto op = Reshape(op_.Read(), scalarCoeff ? 1 : 4, Q1D, Q1D, NE); -+ auto x = Reshape(x_.Read(), 2*(D1D-1)*D1D, NE); -+ auto y = Reshape(y_.ReadWrite(), 2*(D1Dtest-1)*D1Dtest, NE); -+ -+ const int i12 = transpose ? 2 : 1; -+ const int i21 = transpose ? 1 : 2; -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double mass[MAX_Q1D][MAX_Q1D][VDIM]; -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int c = 0; c < VDIM; ++c) -+ { -+ mass[qy][qx][c] = 0.0; -+ } -+ } -+ } -+ -+ int osc = 0; -+ for (int c = 0; c < VDIM; ++c) // loop over x, y trial components -+ { -+ const int D1Dy = trialHcurl ? ((c == 1) ? D1D - 1 : D1D) : -+ ((c == 1) ? D1D : D1D - 1); -+ const int D1Dx = trialHcurl ? ((c == 0) ? D1D - 1 : D1D) : -+ ((c == 0) ? D1D : D1D - 1); -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ double massX[MAX_Q1D]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massX[qx] = 0.0; -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ const double t = x(dx + (dy * D1Dx) + osc, e); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massX[qx] += t * (trialHcurl ? ((c == 0) ? Bo(qx,dx) : Bc(qx,dx)) : -+ ((c == 0) ? Bc(qx,dx) : Bo(qx,dx))); -+ } -+ } -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = trialHcurl ? ((c == 1) ? Bo(qy,dy) : Bc(qy,dy)) : -+ ((c == 1) ? Bc(qy,dy) : Bo(qy,dy)); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ mass[qy][qx][c] += massX[qx] * wy; -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy; -+ } // loop (c) over components -+ -+ // Apply D operator. -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double O11 = op(0,qx,qy,e); -+ const double O12 = scalarCoeff ? 0.0 : op(i12,qx,qy,e); -+ const double O21 = scalarCoeff ? 0.0 : op(i21,qx,qy,e); -+ const double O22 = scalarCoeff ? O11 : op(3,qx,qy,e); -+ const double massX = mass[qy][qx][0]; -+ const double massY = mass[qy][qx][1]; -+ mass[qy][qx][0] = (O11*massX)+(O12*massY); -+ mass[qy][qx][1] = (O21*massX)+(O22*massY); -+ } -+ } -+ -+ osc = 0; -+ for (int c = 0; c < VDIM; ++c) // loop over x, y test components -+ { -+ const int D1Dy = trialHcurl ? ((c == 1) ? D1Dtest : D1Dtest - 1) : -+ ((c == 1) ? D1Dtest - 1 : D1Dtest); -+ const int D1Dx = trialHcurl ? ((c == 0) ? D1Dtest : D1Dtest - 1) : -+ ((c == 0) ? D1Dtest - 1 : D1Dtest); -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ double massX[HDIV_MAX_D1D]; -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massX[dx] = 0.0; -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massX[dx] += mass[qy][qx][c] * (trialHcurl ? -+ ((c == 0) ? Bct(dx,qx) : Bot(dx,qx)) : -+ ((c == 0) ? Bot(dx,qx) : Bct(dx,qx))); -+ } -+ } -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double wy = trialHcurl ? ((c == 1) ? Bct(dy,qy) : Bot(dy,qy)) : -+ ((c == 1) ? Bot(dy,qy) : Bct(dy,qy)); -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy; -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy; -+ } // loop c -+ }); // end of element loop -+} -+ -+// Mass operator for H(curl) and H(div) functions, using Piola transformations -+// u = dF^{-T} \hat{u} in H(curl), v = (1 / det dF) dF \hat{v} in H(div). -+MFEM_HOST_DEVICE inline -+void PAHcurlHdivMassApply3D(const int D1D, -+ const int D1Dtest, -+ const int Q1D, -+ const int NE, -+ const bool scalarCoeff, -+ const bool trialHcurl, -+ const bool transpose, -+ const Array &Bo_, -+ const Array &Bc_, -+ const Array &Bot_, -+ const Array &Bct_, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_) -+{ -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -+ -+ MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -+ MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -+ constexpr static int VDIM = 3; -+ -+ auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1); -+ auto Bc = Reshape(Bc_.Read(), Q1D, D1D); -+ auto Bot = Reshape(Bot_.Read(), D1Dtest-1, Q1D); -+ auto Bct = Reshape(Bct_.Read(), D1Dtest, Q1D); -+ auto op = Reshape(op_.Read(), scalarCoeff ? 1 : 9, Q1D, Q1D, Q1D, NE); -+ auto x = Reshape(x_.Read(), 3*(D1D-1)*D1D*(trialHcurl ? D1D : D1D-1), NE); -+ auto y = Reshape(y_.ReadWrite(), 3*(D1Dtest-1)*D1Dtest* -+ (trialHcurl ? D1Dtest-1 : D1Dtest), NE); -+ -+ const int i12 = transpose ? 3 : 1; -+ const int i13 = transpose ? 6 : 2; -+ const int i21 = transpose ? 1 : 3; -+ const int i23 = transpose ? 7 : 5; -+ const int i31 = transpose ? 2 : 6; -+ const int i32 = transpose ? 5 : 7; -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM]; -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int c = 0; c < VDIM; ++c) -+ { -+ mass[qz][qy][qx][c] = 0.0; -+ } -+ } -+ } -+ } -+ -+ int osc = 0; -+ for (int c = 0; c < VDIM; ++c) // loop over x, y, z trial components -+ { -+ const int D1Dz = trialHcurl ? ((c == 2) ? D1D - 1 : D1D) : -+ ((c == 2) ? D1D : D1D - 1); -+ const int D1Dy = trialHcurl ? ((c == 1) ? D1D - 1 : D1D) : -+ ((c == 1) ? D1D : D1D - 1); -+ const int D1Dx = trialHcurl ? ((c == 0) ? D1D - 1 : D1D) : -+ ((c == 0) ? D1D : D1D - 1); -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ double massXY[MAX_Q1D][MAX_Q1D]; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massXY[qy][qx] = 0.0; -+ } -+ } -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ double massX[MAX_Q1D]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massX[qx] = 0.0; -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ const double t = x(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massX[qx] += t * (trialHcurl ? ((c == 0) ? Bo(qx,dx) : Bc(qx,dx)) : -+ ((c == 0) ? Bc(qx,dx) : Bo(qx,dx))); -+ } -+ } -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = trialHcurl ? ((c == 1) ? Bo(qy,dy) : Bc(qy,dy)) : -+ ((c == 1) ? Bc(qy,dy) : Bo(qy,dy)); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = massX[qx]; -+ massXY[qy][qx] += wx * wy; -+ } -+ } -+ } -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ const double wz = trialHcurl ? ((c == 2) ? Bo(qz,dz) : Bc(qz,dz)) : -+ ((c == 2) ? Bc(qz,dz) : Bo(qz,dz)); -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ mass[qz][qy][qx][c] += massXY[qy][qx] * wz; -+ } -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } // loop (c) over components -+ -+ // Apply D operator. -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double O11 = op(0,qx,qy,qz,e); -+ const double O12 = scalarCoeff ? 0.0 : op(i12,qx,qy,qz,e); -+ const double O13 = scalarCoeff ? 0.0 : op(i13,qx,qy,qz,e); -+ const double O21 = scalarCoeff ? 0.0 : op(i21,qx,qy,qz,e); -+ const double O22 = scalarCoeff ? O11 : op(4,qx,qy,qz,e); -+ const double O23 = scalarCoeff ? 0.0 : op(i23,qx,qy,qz,e); -+ const double O31 = scalarCoeff ? 0.0 : op(i31,qx,qy,qz,e); -+ const double O32 = scalarCoeff ? 0.0 : op(i32,qx,qy,qz,e); -+ const double O33 = scalarCoeff ? O11 : op(8,qx,qy,qz,e); -+ const double massX = mass[qz][qy][qx][0]; -+ const double massY = mass[qz][qy][qx][1]; -+ const double massZ = mass[qz][qy][qx][2]; -+ mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ); -+ mass[qz][qy][qx][1] = (O21*massX)+(O22*massY)+(O23*massZ); -+ mass[qz][qy][qx][2] = (O31*massX)+(O32*massY)+(O33*massZ); -+ } -+ } -+ } -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ double massXY[HDIV_MAX_D1D][HDIV_MAX_D1D]; -+ -+ osc = 0; -+ for (int c = 0; c < VDIM; ++c) // loop over x, y, z test components -+ { -+ const int D1Dz = trialHcurl ? ((c == 2) ? D1Dtest : D1Dtest - 1) : -+ ((c == 2) ? D1Dtest - 1 : D1Dtest); -+ const int D1Dy = trialHcurl ? ((c == 1) ? D1Dtest : D1Dtest - 1) : -+ ((c == 1) ? D1Dtest - 1 : D1Dtest); -+ const int D1Dx = trialHcurl ? ((c == 0) ? D1Dtest : D1Dtest - 1) : -+ ((c == 0) ? D1Dtest - 1 : D1Dtest); -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massXY[dy][dx] = 0.0; -+ } -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ double massX[HDIV_MAX_D1D]; -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massX[dx] = 0.0; -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massX[dx] += mass[qz][qy][qx][c] * (trialHcurl ? -+ ((c == 0) ? Bct(dx,qx) : Bot(dx,qx)) : -+ ((c == 0) ? Bot(dx,qx) : Bct(dx,qx))); -+ } -+ } -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double wy = trialHcurl ? ((c == 1) ? Bct(dy,qy) : Bot(dy,qy)) : -+ ((c == 1) ? Bot(dy,qy) : Bct(dy,qy)); -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massXY[dy][dx] += massX[dx] * wy; -+ } -+ } -+ } -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ const double wz = trialHcurl ? ((c == 2) ? Bct(dz,qz) : Bot(dz,qz)) : -+ ((c == 2) ? Bot(dz,qz) : Bct(dz,qz)); -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += -+ massXY[dy][dx] * wz; -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } // loop c -+ } // loop qz -+ }); // end of element loop -+} -+ -+// Apply to x corresponding to DOFs in H(curl) (trial), whose curl is -+// integrated against H(div) test functions corresponding to y. -+template -+MFEM_HOST_DEVICE inline -+void PAHcurlHdivApply3D(const int D1D, -+ const int D1Dtest, -+ const int Q1D, -+ const int NE, -+ const Array &bo, -+ const Array &bc, -+ const Array &bot, -+ const Array &bct, -+ const Array &gc, -+ const Vector &pa_data, -+ const Vector &x, -+ Vector &y) -+{ -+ MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -+ MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -+ // Using Piola transformations (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} -+ // for u in H(curl) and w = (1 / det (dF)) dF \hat{w} for w in H(div), we get -+ // (\nabla\times u) \cdot w = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{w} -+ // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}] -+ // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -+ // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -+ -+ constexpr static int VDIM = 3; -+ -+ auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -+ auto Bc = Reshape(bc.Read(), Q1D, D1D); -+ auto Bot = Reshape(bot.Read(), D1Dtest-1, Q1D); -+ auto Bct = Reshape(bct.Read(), D1Dtest, Q1D); -+ auto Gc = Reshape(gc.Read(), Q1D, D1D); -+ auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE); -+ auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE); -+ auto Y = Reshape(y.ReadWrite(), 3*(D1Dtest-1)*(D1Dtest-1)*D1D, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double curl[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM]; -+ // curl[qz][qy][qx] will be computed as the vector curl at each quadrature point. -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int c = 0; c < VDIM; ++c) -+ { -+ curl[qz][qy][qx][c] = 0.0; -+ } -+ } -+ } -+ } -+ -+ // We treat x, y, z components separately for optimization specific to each. -+ -+ int osc = 0; -+ -+ { -+ // x component -+ const int D1Dz = D1D; -+ const int D1Dy = D1D; -+ const int D1Dx = D1D - 1; -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ double gradXY[MAX_Q1D][MAX_Q1D][2]; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int d = 0; d < 2; ++d) -+ { -+ gradXY[qy][qx][d] = 0.0; -+ } -+ } -+ } -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ double massX[MAX_Q1D]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massX[qx] = 0.0; -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massX[qx] += t * Bo(qx,dx); -+ } -+ } -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = Bc(qy,dy); -+ const double wDy = Gc(qy,dy); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = massX[qx]; -+ gradXY[qy][qx][0] += wx * wDy; -+ gradXY[qy][qx][1] += wx * wy; -+ } -+ } -+ } -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ const double wz = Bc(qz,dz); -+ const double wDz = Gc(qz,dz); -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}] -+ curl[qz][qy][qx][1] += gradXY[qy][qx][1] * wDz; // (u_0)_{x_2} -+ curl[qz][qy][qx][2] -= gradXY[qy][qx][0] * wz; // -(u_0)_{x_1} -+ } -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } -+ -+ { -+ // y component -+ const int D1Dz = D1D; -+ const int D1Dy = D1D - 1; -+ const int D1Dx = D1D; -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ double gradXY[MAX_Q1D][MAX_Q1D][2]; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int d = 0; d < 2; ++d) -+ { -+ gradXY[qy][qx][d] = 0.0; -+ } -+ } -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ double massY[MAX_Q1D]; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ massY[qy] = 0.0; -+ } -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ massY[qy] += t * Bo(qy,dy); -+ } -+ } -+ -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = Bc(qx,dx); -+ const double wDx = Gc(qx,dx); -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = massY[qy]; -+ gradXY[qy][qx][0] += wDx * wy; -+ gradXY[qy][qx][1] += wx * wy; -+ } -+ } -+ } -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ const double wz = Bc(qz,dz); -+ const double wDz = Gc(qz,dz); -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -+ curl[qz][qy][qx][0] -= gradXY[qy][qx][1] * wDz; // -(u_1)_{x_2} -+ curl[qz][qy][qx][2] += gradXY[qy][qx][0] * wz; // (u_1)_{x_0} -+ } -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } -+ -+ { -+ // z component -+ const int D1Dz = D1D - 1; -+ const int D1Dy = D1D; -+ const int D1Dx = D1D; -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ double gradYZ[MAX_Q1D][MAX_Q1D][2]; -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int d = 0; d < 2; ++d) -+ { -+ gradYZ[qz][qy][d] = 0.0; -+ } -+ } -+ } -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ double massZ[MAX_Q1D]; -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ massZ[qz] = 0.0; -+ } -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ massZ[qz] += t * Bo(qz,dz); -+ } -+ } -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = Bc(qy,dy); -+ const double wDy = Gc(qy,dy); -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ const double wz = massZ[qz]; -+ gradYZ[qz][qy][0] += wz * wy; -+ gradYZ[qz][qy][1] += wz * wDy; -+ } -+ } -+ } -+ -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = Bc(qx,dx); -+ const double wDx = Gc(qx,dx); -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -+ curl[qz][qy][qx][0] += gradYZ[qz][qy][1] * wx; // (u_2)_{x_1} -+ curl[qz][qy][qx][1] -= gradYZ[qz][qy][0] * wDx; // -(u_2)_{x_0} -+ } -+ } -+ } -+ } -+ } -+ -+ // Apply D operator. -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double O11 = op(qx,qy,qz,0,e); -+ const double O12 = op(qx,qy,qz,1,e); -+ const double O13 = op(qx,qy,qz,2,e); -+ const double O22 = op(qx,qy,qz,3,e); -+ const double O23 = op(qx,qy,qz,4,e); -+ const double O33 = op(qx,qy,qz,5,e); -+ -+ const double c1 = (O11 * curl[qz][qy][qx][0]) + (O12 * curl[qz][qy][qx][1]) + -+ (O13 * curl[qz][qy][qx][2]); -+ const double c2 = (O12 * curl[qz][qy][qx][0]) + (O22 * curl[qz][qy][qx][1]) + -+ (O23 * curl[qz][qy][qx][2]); -+ const double c3 = (O13 * curl[qz][qy][qx][0]) + (O23 * curl[qz][qy][qx][1]) + -+ (O33 * curl[qz][qy][qx][2]); -+ -+ curl[qz][qy][qx][0] = c1; -+ curl[qz][qy][qx][1] = c2; -+ curl[qz][qy][qx][2] = c3; -+ } -+ } -+ } -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ double massXY[HCURL_MAX_D1D][HCURL_MAX_D1D]; // Assuming HDIV_MAX_D1D <= HCURL_MAX_D1D -+ -+ osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -+ { -+ const int D1Dz = (c == 2) ? D1Dtest : D1Dtest - 1; -+ const int D1Dy = (c == 1) ? D1Dtest : D1Dtest - 1; -+ const int D1Dx = (c == 0) ? D1Dtest : D1Dtest - 1; -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massXY[dy][dx] = 0; -+ } -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ double massX[HCURL_MAX_D1D]; -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massX[dx] = 0; -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massX[dx] += curl[qz][qy][qx][c] * -+ ((c == 0) ? Bct(dx,qx) : Bot(dx,qx)); -+ } -+ } -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double wy = (c == 1) ? Bct(dy,qy) : Bot(dy,qy); -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massXY[dy][dx] += massX[dx] * wy; -+ } -+ } -+ } -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ const double wz = (c == 2) ? Bct(dz,qz) : Bot(dz,qz); -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += -+ massXY[dy][dx] * wz; -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } // loop c -+ } // loop qz -+ }); // end of element loop -+} -+ -+// Apply to x corresponding to DOFs in H(div) (test), integrated against the -+// curl of H(curl) trial functions corresponding to y. -+template -+MFEM_HOST_DEVICE inline -+void PAHcurlHdivApply3DTranspose(const int D1D, -+ const int D1Dtest, -+ const int Q1D, -+ const int NE, -+ const Array &bo, -+ const Array &bc, -+ const Array &bot, -+ const Array &bct, -+ const Array &gct, -+ const Vector &pa_data, -+ const Vector &x, -+ Vector &y) -+{ -+ MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -+ MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -+ // Using Piola transformations (\nabla\times u) F = 1/det(dF) dF \hat{\nabla}\times\hat{u} -+ // for u in H(curl) and w = (1 / det (dF)) dF \hat{w} for w in H(div), we get -+ // (\nabla\times u) \cdot w = 1/det(dF)^2 \hat{\nabla}\times\hat{u}^T dF^T dF \hat{w} -+ // If c = 0, \hat{\nabla}\times\hat{u} reduces to [0, (u_0)_{x_2}, -(u_0)_{x_1}] -+ // If c = 1, \hat{\nabla}\times\hat{u} reduces to [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -+ // If c = 2, \hat{\nabla}\times\hat{u} reduces to [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -+ -+ constexpr static int VDIM = 3; -+ -+ auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -+ auto Bc = Reshape(bc.Read(), Q1D, D1D); -+ auto Bot = Reshape(bot.Read(), D1Dtest-1, Q1D); -+ auto Bct = Reshape(bct.Read(), D1Dtest, Q1D); -+ auto Gct = Reshape(gct.Read(), D1D, Q1D); -+ auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE); -+ auto X = Reshape(x.Read(), 3*(D1Dtest-1)*(D1Dtest-1)*D1D, NE); -+ auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM]; // Assuming HDIV_MAX_D1D <= HCURL_MAX_D1D -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int c = 0; c < VDIM; ++c) -+ { -+ mass[qz][qy][qx][c] = 0.0; -+ } -+ } -+ } -+ } -+ -+ int osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -+ { -+ const int D1Dz = (c == 2) ? D1D : D1D - 1; -+ const int D1Dy = (c == 1) ? D1D : D1D - 1; -+ const int D1Dx = (c == 0) ? D1D : D1D - 1; -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ double massXY[HDIV_MAX_Q1D][HDIV_MAX_Q1D]; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massXY[qy][qx] = 0.0; -+ } -+ } -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ double massX[HDIV_MAX_Q1D]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massX[qx] = 0.0; -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massX[qx] += t * ((c == 0) ? Bc(qx,dx) : Bo(qx,dx)); -+ } -+ } -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = (c == 1) ? Bc(qy,dy) : Bo(qy,dy); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = massX[qx]; -+ massXY[qy][qx] += wx * wy; -+ } -+ } -+ } -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ const double wz = (c == 2) ? Bc(qz,dz) : Bo(qz,dz); -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ mass[qz][qy][qx][c] += massXY[qy][qx] * wz; -+ } -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } // loop (c) over components -+ -+ // Apply D operator. -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double O11 = op(qx,qy,qz,0,e); -+ const double O12 = op(qx,qy,qz,1,e); -+ const double O13 = op(qx,qy,qz,2,e); -+ const double O22 = op(qx,qy,qz,3,e); -+ const double O23 = op(qx,qy,qz,4,e); -+ const double O33 = op(qx,qy,qz,5,e); -+ const double massX = mass[qz][qy][qx][0]; -+ const double massY = mass[qz][qy][qx][1]; -+ const double massZ = mass[qz][qy][qx][2]; -+ mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ); -+ mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ); -+ mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ); -+ } -+ } -+ } -+ -+ // x component -+ osc = 0; -+ { -+ const int D1Dz = D1D; -+ const int D1Dy = D1D; -+ const int D1Dx = D1D - 1; -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ double gradXY12[MAX_D1D][MAX_D1D]; -+ double gradXY21[MAX_D1D][MAX_D1D]; -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ gradXY12[dy][dx] = 0.0; -+ gradXY21[dy][dx] = 0.0; -+ } -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ double massX[MAX_D1D][2]; -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ for (int n = 0; n < 2; ++n) -+ { -+ massX[dx][n] = 0.0; -+ } -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ const double wx = Bot(dx,qx); -+ -+ massX[dx][0] += wx * mass[qz][qy][qx][1]; -+ massX[dx][1] += wx * mass[qz][qy][qx][2]; -+ } -+ } -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double wy = Bct(dy,qy); -+ const double wDy = Gct(dy,qy); -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ gradXY21[dy][dx] += massX[dx][0] * wy; -+ gradXY12[dy][dx] += massX[dx][1] * wDy; -+ } -+ } -+ } -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ const double wz = Bct(dz,qz); -+ const double wDz = Gct(dz,qz); -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ // \hat{\nabla}\times\hat{u} is [0, (u_0)_{x_2}, -(u_0)_{x_1}] -+ // (u_0)_{x_2} * (op * curl)_1 - (u_0)_{x_1} * (op * curl)_2 -+ Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, -+ e) += (gradXY21[dy][dx] * wDz) - (gradXY12[dy][dx] * wz); -+ } -+ } -+ } -+ } // loop qz -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } -+ -+ // y component -+ { -+ const int D1Dz = D1D; -+ const int D1Dy = D1D - 1; -+ const int D1Dx = D1D; -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ double gradXY02[MAX_D1D][MAX_D1D]; -+ double gradXY20[MAX_D1D][MAX_D1D]; -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ gradXY02[dy][dx] = 0.0; -+ gradXY20[dy][dx] = 0.0; -+ } -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ double massY[MAX_D1D][2]; -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ massY[dy][0] = 0.0; -+ massY[dy][1] = 0.0; -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double wy = Bot(dy,qy); -+ -+ massY[dy][0] += wy * mass[qz][qy][qx][2]; -+ massY[dy][1] += wy * mass[qz][qy][qx][0]; -+ } -+ } -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ const double wx = Bct(dx,qx); -+ const double wDx = Gct(dx,qx); -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ gradXY02[dy][dx] += massY[dy][0] * wDx; -+ gradXY20[dy][dx] += massY[dy][1] * wx; -+ } -+ } -+ } -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ const double wz = Bct(dz,qz); -+ const double wDz = Gct(dz,qz); -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ // \hat{\nabla}\times\hat{u} is [-(u_1)_{x_2}, 0, (u_1)_{x_0}] -+ // -(u_1)_{x_2} * (op * curl)_0 + (u_1)_{x_0} * (op * curl)_2 -+ Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, -+ e) += (-gradXY20[dy][dx] * wDz) + (gradXY02[dy][dx] * wz); -+ } -+ } -+ } -+ } // loop qz -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } -+ -+ // z component -+ { -+ const int D1Dz = D1D - 1; -+ const int D1Dy = D1D; -+ const int D1Dx = D1D; -+ -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ double gradYZ01[MAX_D1D][MAX_D1D]; -+ double gradYZ10[MAX_D1D][MAX_D1D]; -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ gradYZ01[dz][dy] = 0.0; -+ gradYZ10[dz][dy] = 0.0; -+ } -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ double massZ[MAX_D1D][2]; -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ for (int n = 0; n < 2; ++n) -+ { -+ massZ[dz][n] = 0.0; -+ } -+ } -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ const double wz = Bot(dz,qz); -+ -+ massZ[dz][0] += wz * mass[qz][qy][qx][0]; -+ massZ[dz][1] += wz * mass[qz][qy][qx][1]; -+ } -+ } -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double wy = Bct(dy,qy); -+ const double wDy = Gct(dy,qy); -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ gradYZ01[dz][dy] += wy * massZ[dz][1]; -+ gradYZ10[dz][dy] += wDy * massZ[dz][0]; -+ } -+ } -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ const double wx = Bct(dx,qx); -+ const double wDx = Gct(dx,qx); -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ // \hat{\nabla}\times\hat{u} is [(u_2)_{x_1}, -(u_2)_{x_0}, 0] -+ // (u_2)_{x_1} * (op * curl)_0 - (u_2)_{x_0} * (op * curl)_1 -+ Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, -+ e) += (gradYZ10[dz][dy] * wx) - (gradYZ01[dz][dy] * wDx); -+ } -+ } -+ } -+ } // loop qx -+ } -+ }); // end of element loop -+} -+ -+} // namespace internal -+ -+} // namespace mfem -+ -+#endif -diff --git a/fem/bilininteg_hdiv.cpp b/fem/integ/bilininteg_hdiv_kernels.hpp -similarity index 84% -rename from fem/bilininteg_hdiv.cpp -rename to fem/integ/bilininteg_hdiv_kernels.hpp -index 26e0ed973..cf083a2c6 100644 ---- a/fem/bilininteg_hdiv.cpp -+++ b/fem/integ/bilininteg_hdiv_kernels.hpp -@@ -9,13 +9,12 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "../general/forall.hpp" --#include "bilininteg.hpp" --#include "gridfunc.hpp" --#include "qspace.hpp" -- --using namespace std; -+#ifndef MFEM_BILININTEG_HDIV_KERNELS_HPP -+#define MFEM_BILININTEG_HDIV_KERNELS_HPP - -+#include "../../config/config.hpp" -+#include "../../general/forall.hpp" -+#include "../../linalg/dtensor.hpp" - - // Piola transformation in H(div): w = (1 / det (dF)) dF \hat{w} - // div w = (1 / det (dF)) \hat{div} \hat{w} -@@ -23,14 +22,18 @@ using namespace std; - namespace mfem - { - -+namespace internal -+{ -+ - // PA H(div) Mass Assemble 2D kernel --void PAHdivSetup2D(const int Q1D, -- const int coeffDim, -- const int NE, -- const Array &w, -- const Vector &j, -- Vector &coeff_, -- Vector &op) -+MFEM_HOST_DEVICE inline -+void PAHdivMassSetup2D(const int Q1D, -+ const int coeffDim, -+ const int NE, -+ const Array &w, -+ const Vector &j, -+ Vector &coeff_, -+ Vector &op) - { - const bool symmetric = (coeffDim != 4); - const int NQ = Q1D*Q1D; -@@ -88,13 +91,14 @@ void PAHdivSetup2D(const int Q1D, - } - - // PA H(div) Mass Assemble 3D kernel --void PAHdivSetup3D(const int Q1D, -- const int coeffDim, -- const int NE, -- const Array &w, -- const Vector &j, -- Vector &coeff_, -- Vector &op) -+MFEM_HOST_DEVICE inline -+void PAHdivMassSetup3D(const int Q1D, -+ const int coeffDim, -+ const int NE, -+ const Array &w, -+ const Vector &j, -+ Vector &coeff_, -+ Vector &op) - { - const bool symmetric = (coeffDim != 9); - const int NQ = Q1D*Q1D*Q1D; -@@ -175,6 +179,134 @@ void PAHdivSetup3D(const int Q1D, - }); - } - -+MFEM_HOST_DEVICE inline -+void PAHdivMassAssembleDiagonal2D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const bool symmetric, -+ const Array &Bo_, -+ const Array &Bc_, -+ const Vector &op_, -+ Vector &diag_) -+{ -+ constexpr static int VDIM = 2; -+ constexpr static int MAX_Q1D = HDIV_MAX_Q1D; -+ -+ auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1); -+ auto Bc = Reshape(Bc_.Read(), Q1D, D1D); -+ auto op = Reshape(op_.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE); -+ auto diag = Reshape(diag_.ReadWrite(), 2*(D1D-1)*D1D, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ int osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y components -+ { -+ const int D1Dx = (c == 1) ? D1D - 1 : D1D; -+ const int D1Dy = (c == 0) ? D1D - 1 : D1D; -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ double mass[MAX_Q1D]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ mass[qx] = 0.0; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = (c == 1) ? Bc(qy,dy) : Bo(qy,dy); -+ mass[qx] += wy*wy*((c == 0) ? op(qx,qy,0,e) : op(qx,qy,symmetric ? 2 : 3,e)); -+ } -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ double val = 0.0; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = (c == 0) ? Bc(qx,dx) : Bo(qx,dx); -+ val += mass[qx] * wx * wx; -+ } -+ diag(dx + (dy * D1Dx) + osc, e) += val; -+ } -+ } -+ -+ osc += D1Dx * D1Dy; -+ } // loop (c) over components -+ }); // end of element loop -+} -+ -+MFEM_HOST_DEVICE inline -+void PAHdivMassAssembleDiagonal3D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const bool symmetric, -+ const Array &Bo_, -+ const Array &Bc_, -+ const Vector &op_, -+ Vector &diag_) -+{ -+ MFEM_VERIFY(D1D <= HDIV_MAX_D1D, "Error: D1D > HDIV_MAX_D1D"); -+ MFEM_VERIFY(Q1D <= HDIV_MAX_Q1D, "Error: Q1D > HDIV_MAX_Q1D"); -+ constexpr static int VDIM = 3; -+ -+ auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1); -+ auto Bc = Reshape(Bc_.Read(), Q1D, D1D); -+ auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE); -+ auto diag = Reshape(diag_.ReadWrite(), 3*(D1D-1)*(D1D-1)*D1D, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ int osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -+ { -+ const int D1Dz = (c == 2) ? D1D : D1D - 1; -+ const int D1Dy = (c == 1) ? D1D : D1D - 1; -+ const int D1Dx = (c == 0) ? D1D : D1D - 1; -+ -+ const int opc = (c == 0) ? 0 : ((c == 1) ? (symmetric ? 3 : 4) : -+ (symmetric ? 5 : 8)); -+ -+ double mass[HDIV_MAX_Q1D]; -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ mass[qx] = 0.0; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = (c == 1) ? Bc(qy,dy) : Bo(qy,dy); -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ const double wz = (c == 2) ? Bc(qz,dz) : Bo(qz,dz); -+ mass[qx] += wy * wy * wz * wz * op(qx,qy,qz,opc,e); -+ } -+ } -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ double val = 0.0; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = (c == 0) ? Bc(qx,dx) : Bo(qx,dx); -+ val += mass[qx] * wx * wx; -+ } -+ diag(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += val; -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } // loop c -+ }); // end of element loop -+} -+ -+MFEM_HOST_DEVICE inline - void PAHdivMassApply2D(const int D1D, - const int Q1D, - const int NE, -@@ -307,6 +439,7 @@ void PAHdivMassApply2D(const int D1D, - } - - template -+MFEM_HOST_DEVICE inline - void SmemPAHdivMassApply2D(const int NE, - const bool symmetric, - const Array &Bo_, -@@ -475,131 +608,7 @@ void SmemPAHdivMassApply2D(const int NE, - }); - } - --void PAHdivMassAssembleDiagonal2D(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &Bo_, -- const Array &Bc_, -- const Vector &op_, -- Vector &diag_) --{ -- constexpr static int VDIM = 2; -- constexpr static int MAX_Q1D = HDIV_MAX_Q1D; -- -- auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1); -- auto Bc = Reshape(Bc_.Read(), Q1D, D1D); -- auto op = Reshape(op_.Read(), Q1D, Q1D, symmetric ? 3 : 4, NE); -- auto diag = Reshape(diag_.ReadWrite(), 2*(D1D-1)*D1D, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- int osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y components -- { -- const int D1Dx = (c == 1) ? D1D - 1 : D1D; -- const int D1Dy = (c == 0) ? D1D - 1 : D1D; -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- double mass[MAX_Q1D]; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- mass[qx] = 0.0; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = (c == 1) ? Bc(qy,dy) : Bo(qy,dy); -- mass[qx] += wy*wy*((c == 0) ? op(qx,qy,0,e) : op(qx,qy,symmetric ? 2 : 3,e)); -- } -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- double val = 0.0; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = (c == 0) ? Bc(qx,dx) : Bo(qx,dx); -- val += mass[qx] * wx * wx; -- } -- diag(dx + (dy * D1Dx) + osc, e) += val; -- } -- } -- -- osc += D1Dx * D1Dy; -- } // loop (c) over components -- }); // end of element loop --} -- --void PAHdivMassAssembleDiagonal3D(const int D1D, -- const int Q1D, -- const int NE, -- const bool symmetric, -- const Array &Bo_, -- const Array &Bc_, -- const Vector &op_, -- Vector &diag_) --{ -- MFEM_VERIFY(D1D <= HDIV_MAX_D1D, "Error: D1D > HDIV_MAX_D1D"); -- MFEM_VERIFY(Q1D <= HDIV_MAX_Q1D, "Error: Q1D > HDIV_MAX_Q1D"); -- constexpr static int VDIM = 3; -- -- auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1); -- auto Bc = Reshape(Bc_.Read(), Q1D, D1D); -- auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, symmetric ? 6 : 9, NE); -- auto diag = Reshape(diag_.ReadWrite(), 3*(D1D-1)*(D1D-1)*D1D, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- int osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -- { -- const int D1Dz = (c == 2) ? D1D : D1D - 1; -- const int D1Dy = (c == 1) ? D1D : D1D - 1; -- const int D1Dx = (c == 0) ? D1D : D1D - 1; -- -- const int opc = (c == 0) ? 0 : ((c == 1) ? (symmetric ? 3 : 4) : -- (symmetric ? 5 : 8)); -- -- double mass[HDIV_MAX_Q1D]; -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- mass[qx] = 0.0; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = (c == 1) ? Bc(qy,dy) : Bo(qy,dy); -- for (int qz = 0; qz < Q1D; ++qz) -- { -- const double wz = (c == 2) ? Bc(qz,dz) : Bo(qz,dz); -- mass[qx] += wy * wy * wz * wz * op(qx,qy,qz,opc,e); -- } -- } -- } -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- double val = 0.0; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = (c == 0) ? Bc(qx,dx) : Bo(qx,dx); -- val += mass[qx] * wx * wx; -- } -- diag(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += val; -- } -- } -- } -- -- osc += D1Dx * D1Dy * D1Dz; -- } // loop c -- }); // end of element loop --} -- -+MFEM_HOST_DEVICE inline - void PAHdivMassApply3D(const int D1D, - const int Q1D, - const int NE, -@@ -796,6 +805,7 @@ void PAHdivMassApply3D(const int D1D, - } - - template -+MFEM_HOST_DEVICE inline - void SmemPAHdivMassApply3D(const int NE, - const bool symmetric, - const Array &Bo_, -@@ -1083,6 +1093,7 @@ void SmemPAHdivMassApply3D(const int NE, - }); - } - -+MFEM_HOST_DEVICE inline - void PAHdivMassApply(const int dim, - const int D1D, - const int Q1D, -@@ -1127,13 +1138,14 @@ void PAHdivMassApply(const int dim, - } - - // PA H(div) div-div assemble 2D kernel --// NOTE: this is identical to PACurlCurlSetup3D --static void PADivDivSetup2D(const int Q1D, -- const int NE, -- const Array &w, -- const Vector &j, -- Vector &coeff_, -- Vector &op) -+// NOTE: this is identical to PACurlCurlSetup2D -+MFEM_HOST_DEVICE inline -+void PADivDivSetup2D(const int Q1D, -+ const int NE, -+ const Array &w, -+ const Vector &j, -+ Vector &coeff_, -+ Vector &op) - { - const int NQ = Q1D*Q1D; - auto W = w.Read(); -@@ -1154,12 +1166,13 @@ static void PADivDivSetup2D(const int Q1D, - }); - } - --static void PADivDivSetup3D(const int Q1D, -- const int NE, -- const Array &w, -- const Vector &j, -- Vector &coeff_, -- Vector &op) -+MFEM_HOST_DEVICE inline -+void PADivDivSetup3D(const int Q1D, -+ const int NE, -+ const Array &w, -+ const Vector &j, -+ Vector &coeff_, -+ Vector &op) - { - const int NQ = Q1D*Q1D*Q1D; - auto W = w.Read(); -@@ -1188,16 +1201,141 @@ static void PADivDivSetup3D(const int Q1D, - }); - } - --static void PADivDivApply2D(const int D1D, -- const int Q1D, -- const int NE, -- const Array &Bo_, -- const Array &Gc_, -- const Array &Bot_, -- const Array &Gct_, -- const Vector &op_, -- const Vector &x_, -- Vector &y_) -+MFEM_HOST_DEVICE inline -+void PADivDivAssembleDiagonal2D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const Array &Bo_, -+ const Array &Gc_, -+ const Vector &op_, -+ Vector &diag_) -+{ -+ constexpr static int VDIM = 2; -+ constexpr static int MAX_Q1D = HDIV_MAX_Q1D; -+ -+ auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1); -+ auto Gc = Reshape(Gc_.Read(), Q1D, D1D); -+ auto op = Reshape(op_.Read(), Q1D, Q1D, NE); -+ auto diag = Reshape(diag_.ReadWrite(), 2*(D1D-1)*D1D, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ int osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y components -+ { -+ const int D1Dx = (c == 1) ? D1D - 1 : D1D; -+ const int D1Dy = (c == 0) ? D1D - 1 : D1D; -+ -+ double div[MAX_Q1D]; -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ div[qx] = 0.0; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = (c == 0) ? Bo(qy,dy) : Gc(qy,dy); -+ div[qx] += wy * wy * op(qx,qy,e); -+ } -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ double val = 0.0; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = (c == 0) ? Gc(qx,dx) : Bo(qx,dx); -+ val += div[qx] * wx * wx; -+ } -+ diag(dx + (dy * D1Dx) + osc, e) += val; -+ } -+ } -+ -+ osc += D1Dx * D1Dy; -+ } // loop c -+ }); -+} -+ -+MFEM_HOST_DEVICE inline -+void PADivDivAssembleDiagonal3D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const Array &Bo_, -+ const Array &Gc_, -+ const Vector &op_, -+ Vector &diag_) -+{ -+ MFEM_VERIFY(D1D <= HDIV_MAX_D1D, "Error: D1D > HDIV_MAX_D1D"); -+ MFEM_VERIFY(Q1D <= HDIV_MAX_Q1D, "Error: Q1D > HDIV_MAX_Q1D"); -+ constexpr static int VDIM = 3; -+ -+ auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1); -+ auto Gc = Reshape(Gc_.Read(), Q1D, D1D); -+ auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, NE); -+ auto diag = Reshape(diag_.ReadWrite(), 3*(D1D-1)*(D1D-1)*D1D, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ int osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -+ { -+ const int D1Dz = (c == 2) ? D1D : D1D - 1; -+ const int D1Dy = (c == 1) ? D1D : D1D - 1; -+ const int D1Dx = (c == 0) ? D1D : D1D - 1; -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ double a[HDIV_MAX_Q1D]; -+ -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ a[qx] = 0.0; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = (c == 1) ? Gc(qy,dy) : Bo(qy,dy); -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ const double wz = (c == 2) ? Gc(qz,dz) : Bo(qz,dz); -+ a[qx] += wy * wy * wz * wz * op(qx,qy,qz,e); -+ } -+ } -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ double val = 0.0; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = (c == 0) ? Gc(qx,dx) : Bo(qx,dx); -+ val += a[qx] * wx * wx; -+ } -+ diag(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += val; -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } // loop c -+ }); // end of element loop -+} -+ -+MFEM_HOST_DEVICE inline -+void PADivDivApply2D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const Array &Bo_, -+ const Array &Gc_, -+ const Array &Bot_, -+ const Array &Gct_, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_) - { - constexpr static int VDIM = 2; - constexpr static int MAX_D1D = HDIV_MAX_D1D; -@@ -1307,16 +1445,17 @@ static void PADivDivApply2D(const int D1D, - }); // end of element loop - } - --static void PADivDivApply3D(const int D1D, -- const int Q1D, -- const int NE, -- const Array &Bo_, -- const Array &Gc_, -- const Array &Bot_, -- const Array &Gct_, -- const Vector &op_, -- const Vector &x_, -- Vector &y_) -+MFEM_HOST_DEVICE inline -+void PADivDivApply3D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const Array &Bo_, -+ const Array &Gc_, -+ const Array &Bot_, -+ const Array &Gct_, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_) - { - MFEM_VERIFY(D1D <= HDIV_MAX_D1D, "Error: D1D > HDIV_MAX_D1D"); - MFEM_VERIFY(Q1D <= HDIV_MAX_Q1D, "Error: Q1D > HDIV_MAX_Q1D"); -@@ -1483,332 +1622,280 @@ static void PADivDivApply3D(const int D1D, - }); // end of element loop - } - --void DivDivIntegrator::AssemblePA(const FiniteElementSpace &fes) -+// PA H(div)-L2 (div u, p) assemble 2D kernel -+MFEM_HOST_DEVICE inline -+void PAHdivL2Setup2D(const int Q1D, -+ const int NE, -+ const Array &w, -+ Vector &coeff_, -+ Vector &op) - { -- // Assumes tensor-product elements -- Mesh *mesh = fes.GetMesh(); -- const FiniteElement *fel = fes.GetFE(0); -- -- const VectorTensorFiniteElement *el = -- dynamic_cast(fel); -- MFEM_VERIFY(el != NULL, "Only VectorTensorFiniteElement is supported!"); -- -- const IntegrationRule *ir = IntRule ? IntRule : &MassIntegrator::GetRule -- (*el, *el, *mesh->GetElementTransformation(0)); -- -- const int dims = el->GetDim(); -- MFEM_VERIFY(dims == 2 || dims == 3, ""); -- -- const int nq = ir->GetNPoints(); -- dim = mesh->Dimension(); -- MFEM_VERIFY(dim == 2 || dim == 3, ""); -- -- ne = fes.GetNE(); -- geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS); -- mapsC = &el->GetDofToQuad(*ir, DofToQuad::TENSOR); -- mapsO = &el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); -- dofs1D = mapsC->ndof; -- quad1D = mapsC->nqpt; -- -- MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, ""); -- -- pa_data.SetSize(nq * ne, Device::GetMemoryType()); -- -- QuadratureSpace qs(*mesh, *ir); -- CoefficientVector coeff(Q, qs, CoefficientStorage::FULL); -- -- if (el->GetDerivType() == mfem::FiniteElement::DIV && dim == 3) -- { -- PADivDivSetup3D(quad1D, ne, ir->GetWeights(), geom->J, coeff, pa_data); -- } -- else if (el->GetDerivType() == mfem::FiniteElement::DIV && dim == 2) -- { -- PADivDivSetup2D(quad1D, ne, ir->GetWeights(), geom->J, coeff, pa_data); -- } -- else -+ const int NQ = Q1D*Q1D; -+ auto W = w.Read(); -+ auto coeff = Reshape(coeff_.Read(), NQ, NE); -+ auto y = Reshape(op.Write(), NQ, NE); -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) - { -- MFEM_ABORT("Unknown kernel."); -- } -+ for (int q = 0; q < NQ; ++q) -+ { -+ y(q,e) = W[q] * coeff(q,e); -+ } -+ }); - } - --void DivDivIntegrator::AddMultPA(const Vector &x, Vector &y) const -+MFEM_HOST_DEVICE inline -+void PAHdivL2Setup3D(const int Q1D, -+ const int NE, -+ const Array &w, -+ Vector &coeff_, -+ Vector &op) - { -- if (dim == 3) -- PADivDivApply3D(dofs1D, quad1D, ne, mapsO->B, mapsC->G, -- mapsO->Bt, mapsC->Gt, pa_data, x, y); -- else if (dim == 2) -- PADivDivApply2D(dofs1D, quad1D, ne, mapsO->B, mapsC->G, -- mapsO->Bt, mapsC->Gt, pa_data, x, y); -- else -+ const int NQ = Q1D*Q1D*Q1D; -+ auto W = w.Read(); -+ auto coeff = Reshape(coeff_.Read(), NQ, NE); -+ auto y = Reshape(op.Write(), NQ, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) - { -- MFEM_ABORT("Unsupported dimension!"); -- } -+ for (int q = 0; q < NQ; ++q) -+ { -+ y(q,e) = W[q] * coeff(q, e); -+ } -+ }); - } - --static void PADivDivAssembleDiagonal2D(const int D1D, -- const int Q1D, -- const int NE, -- const Array &Bo_, -- const Array &Gc_, -- const Vector &op_, -- Vector &diag_) -+MFEM_HOST_DEVICE inline -+void PAHdivL2AssembleDiagonal_ADAt_2D(const int D1D, -+ const int Q1D, -+ const int L2D1D, -+ const int NE, -+ const Array &L2Bo_, -+ const Array &Gct_, -+ const Array &Bot_, -+ const Vector &op_, -+ const Vector &D_, -+ Vector &diag_) - { - constexpr static int VDIM = 2; -- constexpr static int MAX_Q1D = HDIV_MAX_Q1D; - -- auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1); -- auto Gc = Reshape(Gc_.Read(), Q1D, D1D); -+ auto L2Bo = Reshape(L2Bo_.Read(), Q1D, L2D1D); -+ auto Gct = Reshape(Gct_.Read(), D1D, Q1D); -+ auto Bot = Reshape(Bot_.Read(), D1D-1, Q1D); - auto op = Reshape(op_.Read(), Q1D, Q1D, NE); -- auto diag = Reshape(diag_.ReadWrite(), 2*(D1D-1)*D1D, NE); -+ auto D = Reshape(D_.Read(), 2*(D1D-1)*D1D, NE); -+ auto diag = Reshape(diag_.ReadWrite(), L2D1D, L2D1D, NE); - - mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) - { -- int osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y components -+ for (int ry = 0; ry < L2D1D; ++ry) - { -- const int D1Dx = (c == 1) ? D1D - 1 : D1D; -- const int D1Dy = (c == 0) ? D1D - 1 : D1D; -+ for (int rx = 0; rx < L2D1D; ++rx) -+ { -+ // Compute row (rx,ry), assuming all contributions are from -+ // a single element. - -- double div[MAX_Q1D]; -+ double row[2*HDIV_MAX_D1D*(HDIV_MAX_D1D-1)]; -+ double div[HDIV_MAX_Q1D][HDIV_MAX_Q1D]; - -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -+ for (int i=0; i<2*D1D*(D1D - 1); ++i) - { -- div[qx] = 0.0; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double wy = (c == 0) ? Bo(qy,dy) : Gc(qy,dy); -- div[qx] += wy * wy * op(qx,qy,e); -- } -+ row[i] = 0; - } - -- for (int dx = 0; dx < D1Dx; ++dx) -+ for (int qy = 0; qy < Q1D; ++qy) - { -- double val = 0.0; - for (int qx = 0; qx < Q1D; ++qx) - { -- const double wx = (c == 0) ? Gc(qx,dx) : Bo(qx,dx); -- val += div[qx] * wx * wx; -+ div[qy][qx] = op(qx,qy,e) * L2Bo(qx,rx) * L2Bo(qy,ry); - } -- diag(dx + (dy * D1Dx) + osc, e) += val; - } -- } - -- osc += D1Dx * D1Dy; -- } // loop c -- }); -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ int osc = 0; -+ for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -+ { -+ const int D1Dy = (c == 1) ? D1D : D1D - 1; -+ const int D1Dx = (c == 0) ? D1D : D1D - 1; -+ -+ double aX[HDIV_MAX_D1D]; -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ aX[dx] = 0; -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ aX[dx] += div[qy][qx] * ((c == 0) ? Gct(dx,qx) : -+ Bot(dx,qx)); -+ } -+ } -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double wy = (c == 1) ? Gct(dy,qy) : Bot(dy,qy); -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ row[dx + (dy * D1Dx) + osc] += aX[dx] * wy; -+ } -+ } -+ -+ osc += D1Dx * D1Dy; -+ } // loop c -+ } // loop qy -+ -+ double val = 0.0; -+ for (int i=0; i<2*D1D*(D1D - 1); ++i) -+ { -+ val += row[i] * row[i] * D(i,e); -+ } -+ diag(rx,ry,e) += val; -+ } // loop rx -+ } // loop ry -+ }); // end of element loop - } - --static void PADivDivAssembleDiagonal3D(const int D1D, -- const int Q1D, -- const int NE, -- const Array &Bo_, -- const Array &Gc_, -- const Vector &op_, -- Vector &diag_) -+MFEM_HOST_DEVICE inline -+void PAHdivL2AssembleDiagonal_ADAt_3D(const int D1D, -+ const int Q1D, -+ const int L2D1D, -+ const int NE, -+ const Array &L2Bo_, -+ const Array &Gct_, -+ const Array &Bot_, -+ const Vector &op_, -+ const Vector &D_, -+ Vector &diag_) - { - MFEM_VERIFY(D1D <= HDIV_MAX_D1D, "Error: D1D > HDIV_MAX_D1D"); - MFEM_VERIFY(Q1D <= HDIV_MAX_Q1D, "Error: Q1D > HDIV_MAX_Q1D"); - constexpr static int VDIM = 3; - -- auto Bo = Reshape(Bo_.Read(), Q1D, D1D-1); -- auto Gc = Reshape(Gc_.Read(), Q1D, D1D); -+ auto L2Bo = Reshape(L2Bo_.Read(), Q1D, L2D1D); -+ auto Gct = Reshape(Gct_.Read(), D1D, Q1D); -+ auto Bot = Reshape(Bot_.Read(), D1D-1, Q1D); - auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, NE); -- auto diag = Reshape(diag_.ReadWrite(), 3*(D1D-1)*(D1D-1)*D1D, NE); -+ auto D = Reshape(D_.Read(), 3*(D1D-1)*(D1D-1)*D1D, NE); -+ auto diag = Reshape(diag_.ReadWrite(), L2D1D, L2D1D, L2D1D, NE); - - mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) - { -- int osc = 0; -- -- for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -+ for (int rz = 0; rz < L2D1D; ++rz) - { -- const int D1Dz = (c == 2) ? D1D : D1D - 1; -- const int D1Dy = (c == 1) ? D1D : D1D - 1; -- const int D1Dx = (c == 0) ? D1D : D1D - 1; -- -- for (int dz = 0; dz < D1Dz; ++dz) -+ for (int ry = 0; ry < L2D1D; ++ry) - { -- for (int dy = 0; dy < D1Dy; ++dy) -+ for (int rx = 0; rx < L2D1D; ++rx) - { -- double a[HDIV_MAX_Q1D]; -+ // Compute row (rx,ry,rz), assuming all contributions are from -+ // a single element. - -- for (int qx = 0; qx < Q1D; ++qx) -+ double row[3*HDIV_MAX_D1D*(HDIV_MAX_D1D-1)*(HDIV_MAX_D1D-1)]; -+ double div[HDIV_MAX_Q1D][HDIV_MAX_Q1D][HDIV_MAX_Q1D]; -+ -+ for (int i=0; i<3*D1D*(D1D - 1)*(D1D - 1); ++i) -+ { -+ row[i] = 0; -+ } -+ -+ for (int qz = 0; qz < Q1D; ++qz) - { -- a[qx] = 0.0; - for (int qy = 0; qy < Q1D; ++qy) - { -- const double wy = (c == 1) ? Gc(qy,dy) : Bo(qy,dy); -- -- for (int qz = 0; qz < Q1D; ++qz) -+ for (int qx = 0; qx < Q1D; ++qx) - { -- const double wz = (c == 2) ? Gc(qz,dz) : Bo(qz,dz); -- a[qx] += wy * wy * wz * wz * op(qx,qy,qz,e); -+ div[qz][qy][qx] = op(qx,qy,qz,e) * L2Bo(qx,rx) * -+ L2Bo(qy,ry) * L2Bo(qz,rz); - } - } - } - -- for (int dx = 0; dx < D1Dx; ++dx) -+ for (int qz = 0; qz < Q1D; ++qz) - { -- double val = 0.0; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double wx = (c == 0) ? Gc(qx,dx) : Bo(qx,dx); -- val += a[qx] * wx * wx; -- } -- diag(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += val; -- } -- } -- } -- -- osc += D1Dx * D1Dy * D1Dz; -- } // loop c -- }); // end of element loop --} -- --void DivDivIntegrator::AssembleDiagonalPA(Vector& diag) --{ -- if (dim == 3) -- { -- PADivDivAssembleDiagonal3D(dofs1D, quad1D, ne, -- mapsO->B, mapsC->G, pa_data, diag); -- } -- else -- { -- PADivDivAssembleDiagonal2D(dofs1D, quad1D, ne, -- mapsO->B, mapsC->G, pa_data, diag); -- } --} -- --// PA H(div)-L2 (div u, p) assemble 2D kernel --static void PADivL2Setup2D(const int Q1D, -- const int NE, -- const Array &w, -- Vector &coeff_, -- Vector &op) --{ -- const int NQ = Q1D*Q1D; -- auto W = w.Read(); -- auto coeff = Reshape(coeff_.Read(), NQ, NE); -- auto y = Reshape(op.Write(), NQ, NE); -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- for (int q = 0; q < NQ; ++q) -- { -- y(q,e) = W[q] * coeff(q,e); -- } -- }); --} -- --static void PADivL2Setup3D(const int Q1D, -- const int NE, -- const Array &w, -- Vector &coeff_, -- Vector &op) --{ -- const int NQ = Q1D*Q1D*Q1D; -- auto W = w.Read(); -- auto coeff = Reshape(coeff_.Read(), NQ, NE); -- auto y = Reshape(op.Write(), NQ, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- for (int q = 0; q < NQ; ++q) -- { -- y(q,e) = W[q] * coeff(q, e); -- } -- }); --} -- --void --VectorFEDivergenceIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, -- const FiniteElementSpace &test_fes) --{ -- // Assumes tensor-product elements, with a vector test space and -- // scalar trial space. -- Mesh *mesh = trial_fes.GetMesh(); -- const FiniteElement *trial_fel = trial_fes.GetFE(0); -- const FiniteElement *test_fel = test_fes.GetFE(0); -- -- const VectorTensorFiniteElement *trial_el = -- dynamic_cast(trial_fel); -- MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!"); -- -- const NodalTensorFiniteElement *test_el = -- dynamic_cast(test_fel); -- MFEM_VERIFY(test_el != NULL, "Only NodalTensorFiniteElement is supported!"); -- -- const IntegrationRule *ir = IntRule ? IntRule : &MassIntegrator::GetRule( -- *trial_el, *trial_el, -- *mesh->GetElementTransformation(0)); -- -- const int dims = trial_el->GetDim(); -- MFEM_VERIFY(dims == 2 || dims == 3, ""); -- -- const int nq = ir->GetNPoints(); -- dim = mesh->Dimension(); -- MFEM_VERIFY(dim == 2 || dim == 3, ""); -- -- MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder() + 1, ""); -- -- ne = trial_fes.GetNE(); -- mapsC = &trial_el->GetDofToQuad(*ir, DofToQuad::TENSOR); -- mapsO = &trial_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); -- dofs1D = mapsC->ndof; -- quad1D = mapsC->nqpt; -- -- L2mapsO = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR); -- L2dofs1D = L2mapsO->ndof; -+ double aXY[HDIV_MAX_D1D][HDIV_MAX_D1D]; - -- MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, ""); -- if (dim == 2) -- { -- MFEM_VERIFY(nq == quad1D * quad1D, ""); -- } -- else -- { -- MFEM_VERIFY(nq == quad1D * quad1D * quad1D, ""); -- } -+ int osc = 0; -+ for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -+ { -+ const int D1Dz = (c == 2) ? D1D : D1D - 1; -+ const int D1Dy = (c == 1) ? D1D : D1D - 1; -+ const int D1Dx = (c == 0) ? D1D : D1D - 1; - -- pa_data.SetSize(nq * ne, Device::GetMemoryType()); -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ aXY[dy][dx] = 0; -+ } -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ double aX[HDIV_MAX_D1D]; -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ aX[dx] = 0; -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ aX[dx] += div[qz][qy][qx] * ((c == 0) ? Gct(dx,qx) -+ : Bot(dx,qx)); -+ } -+ } -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double wy = (c == 1) ? Gct(dy,qy) : Bot(dy,qy); -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ aXY[dy][dx] += aX[dx] * wy; -+ } -+ } -+ } - -- QuadratureSpace qs(*mesh, *ir); -- CoefficientVector coeff(Q, qs, CoefficientStorage::FULL); -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ const double wz = (c == 2) ? Gct(dz,qz) : Bot(dz,qz); -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ row[dx + ((dy + (dz * D1Dy)) * D1Dx) + osc] += -+ aXY[dy][dx] * wz; -+ } -+ } -+ } - -- if (test_el->GetMapType() == FiniteElement::INTEGRAL) -- { -- const GeometricFactors *geom = -- mesh->GetGeometricFactors(*ir, GeometricFactors::DETERMINANTS); -- coeff /= geom->detJ; -- } -+ osc += D1Dx * D1Dy * D1Dz; -+ } // loop c -+ } // loop qz - -- if (trial_el->GetDerivType() == mfem::FiniteElement::DIV && dim == 3) -- { -- PADivL2Setup3D(quad1D, ne, ir->GetWeights(), coeff, pa_data); -- } -- else if (trial_el->GetDerivType() == mfem::FiniteElement::DIV && dim == 2) -- { -- PADivL2Setup2D(quad1D, ne, ir->GetWeights(), coeff, pa_data); -- } -- else -- { -- MFEM_ABORT("Unknown kernel."); -- } -+ double val = 0.0; -+ for (int i=0; i<3*D1D*(D1D - 1)*(D1D - 1); ++i) -+ { -+ val += row[i] * row[i] * D(i,e); -+ } -+ diag(rx,ry,rz,e) += val; -+ } // loop rx -+ } // loop ry -+ } // loop rz -+ }); // end of element loop - } - - // Apply to x corresponding to DOFs in H(div) (trial), whose divergence is - // integrated against L_2 test functions corresponding to y. --static void PAHdivL2Apply3D(const int D1D, -- const int Q1D, -- const int L2D1D, -- const int NE, -- const Array &Bo_, -- const Array &Gc_, -- const Array &L2Bot_, -- const Vector &op_, -- const Vector &x_, -- Vector &y_) -+MFEM_HOST_DEVICE inline -+void PAHdivL2Apply3D(const int D1D, -+ const int Q1D, -+ const int L2D1D, -+ const int NE, -+ const Array &Bo_, -+ const Array &Gc_, -+ const Array &L2Bot_, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_) - { - MFEM_VERIFY(D1D <= HDIV_MAX_D1D, "Error: D1D > HDIV_MAX_D1D"); - MFEM_VERIFY(Q1D <= HDIV_MAX_Q1D, "Error: Q1D > HDIV_MAX_Q1D"); -@@ -1962,16 +2049,17 @@ static void PAHdivL2Apply3D(const int D1D, - - // Apply to x corresponding to DOFs in H(div) (trial), whose divergence is - // integrated against L_2 test functions corresponding to y. --static void PAHdivL2Apply2D(const int D1D, -- const int Q1D, -- const int L2D1D, -- const int NE, -- const Array &Bo_, -- const Array &Gc_, -- const Array &L2Bot_, -- const Vector &op_, -- const Vector &x_, -- Vector &y_) -+MFEM_HOST_DEVICE inline -+void PAHdivL2Apply2D(const int D1D, -+ const int Q1D, -+ const int L2D1D, -+ const int NE, -+ const Array &Bo_, -+ const Array &Gc_, -+ const Array &L2Bot_, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_) - { - constexpr static int VDIM = 2; - constexpr static int MAX_D1D = HDIV_MAX_D1D; -@@ -2068,16 +2156,17 @@ static void PAHdivL2Apply2D(const int D1D, - }); // end of element loop - } - --static void PAHdivL2ApplyTranspose3D(const int D1D, -- const int Q1D, -- const int L2D1D, -- const int NE, -- const Array &L2Bo_, -- const Array &Gct_, -- const Array &Bot_, -- const Vector &op_, -- const Vector &x_, -- Vector &y_) -+MFEM_HOST_DEVICE inline -+void PAHdivL2ApplyTranspose3D(const int D1D, -+ const int Q1D, -+ const int L2D1D, -+ const int NE, -+ const Array &L2Bo_, -+ const Array &Gct_, -+ const Array &Bot_, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_) - { - MFEM_VERIFY(D1D <= HDIV_MAX_D1D, "Error: D1D > HDIV_MAX_D1D"); - MFEM_VERIFY(Q1D <= HDIV_MAX_Q1D, "Error: Q1D > HDIV_MAX_Q1D"); -@@ -2230,16 +2319,17 @@ static void PAHdivL2ApplyTranspose3D(const int D1D, - }); // end of element loop - } - --static void PAHdivL2ApplyTranspose2D(const int D1D, -- const int Q1D, -- const int L2D1D, -- const int NE, -- const Array &L2Bo_, -- const Array &Gct_, -- const Array &Bot_, -- const Vector &op_, -- const Vector &x_, -- Vector &y_) -+MFEM_HOST_DEVICE inline -+void PAHdivL2ApplyTranspose2D(const int D1D, -+ const int Q1D, -+ const int L2D1D, -+ const int NE, -+ const Array &L2Bo_, -+ const Array &Gct_, -+ const Array &Bot_, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_) - { - constexpr static int VDIM = 2; - constexpr static int MAX_D1D = HDIV_MAX_D1D; -@@ -2336,265 +2426,8 @@ static void PAHdivL2ApplyTranspose2D(const int D1D, - }); // end of element loop - } - --void VectorFEDivergenceIntegrator::AddMultPA(const Vector &x, Vector &y) const --{ -- if (dim == 3) -- PAHdivL2Apply3D(dofs1D, quad1D, L2dofs1D, ne, mapsO->B, mapsC->G, -- L2mapsO->Bt, pa_data, x, y); -- else if (dim == 2) -- PAHdivL2Apply2D(dofs1D, quad1D, L2dofs1D, ne, mapsO->B, mapsC->G, -- L2mapsO->Bt, pa_data, x, y); -- else -- { -- MFEM_ABORT("Unsupported dimension!"); -- } --} -- --void VectorFEDivergenceIntegrator::AddMultTransposePA(const Vector &x, -- Vector &y) const --{ -- if (dim == 3) -- PAHdivL2ApplyTranspose3D(dofs1D, quad1D, L2dofs1D, ne, L2mapsO->B, -- mapsC->Gt, mapsO->Bt, pa_data, x, y); -- else if (dim == 2) -- PAHdivL2ApplyTranspose2D(dofs1D, quad1D, L2dofs1D, ne, L2mapsO->B, -- mapsC->Gt, mapsO->Bt, pa_data, x, y); -- else -- { -- MFEM_ABORT("Unsupported dimension!"); -- } --} -- --static void PAHdivL2AssembleDiagonal_ADAt_3D(const int D1D, -- const int Q1D, -- const int L2D1D, -- const int NE, -- const Array &L2Bo_, -- const Array &Gct_, -- const Array &Bot_, -- const Vector &op_, -- const Vector &D_, -- Vector &diag_) --{ -- MFEM_VERIFY(D1D <= HDIV_MAX_D1D, "Error: D1D > HDIV_MAX_D1D"); -- MFEM_VERIFY(Q1D <= HDIV_MAX_Q1D, "Error: Q1D > HDIV_MAX_Q1D"); -- constexpr static int VDIM = 3; -- -- auto L2Bo = Reshape(L2Bo_.Read(), Q1D, L2D1D); -- auto Gct = Reshape(Gct_.Read(), D1D, Q1D); -- auto Bot = Reshape(Bot_.Read(), D1D-1, Q1D); -- auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, NE); -- auto D = Reshape(D_.Read(), 3*(D1D-1)*(D1D-1)*D1D, NE); -- auto diag = Reshape(diag_.ReadWrite(), L2D1D, L2D1D, L2D1D, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- for (int rz = 0; rz < L2D1D; ++rz) -- { -- for (int ry = 0; ry < L2D1D; ++ry) -- { -- for (int rx = 0; rx < L2D1D; ++rx) -- { -- // Compute row (rx,ry,rz), assuming all contributions are from -- // a single element. -- -- double row[3*HDIV_MAX_D1D*(HDIV_MAX_D1D-1)*(HDIV_MAX_D1D-1)]; -- double div[HDIV_MAX_Q1D][HDIV_MAX_Q1D][HDIV_MAX_Q1D]; -- -- for (int i=0; i<3*D1D*(D1D - 1)*(D1D - 1); ++i) -- { -- row[i] = 0; -- } -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- div[qz][qy][qx] = op(qx,qy,qz,e) * L2Bo(qx,rx) * -- L2Bo(qy,ry) * L2Bo(qz,rz); -- } -- } -- } -- -- for (int qz = 0; qz < Q1D; ++qz) -- { -- double aXY[HDIV_MAX_D1D][HDIV_MAX_D1D]; -- -- int osc = 0; -- for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -- { -- const int D1Dz = (c == 2) ? D1D : D1D - 1; -- const int D1Dy = (c == 1) ? D1D : D1D - 1; -- const int D1Dx = (c == 0) ? D1D : D1D - 1; -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- aXY[dy][dx] = 0; -- } -- } -- for (int qy = 0; qy < Q1D; ++qy) -- { -- double aX[HDIV_MAX_D1D]; -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- aX[dx] = 0; -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- aX[dx] += div[qz][qy][qx] * ((c == 0) ? Gct(dx,qx) -- : Bot(dx,qx)); -- } -- } -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double wy = (c == 1) ? Gct(dy,qy) : Bot(dy,qy); -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- aXY[dy][dx] += aX[dx] * wy; -- } -- } -- } -- -- for (int dz = 0; dz < D1Dz; ++dz) -- { -- const double wz = (c == 2) ? Gct(dz,qz) : Bot(dz,qz); -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- row[dx + ((dy + (dz * D1Dy)) * D1Dx) + osc] += -- aXY[dy][dx] * wz; -- } -- } -- } -- -- osc += D1Dx * D1Dy * D1Dz; -- } // loop c -- } // loop qz -- -- double val = 0.0; -- for (int i=0; i<3*D1D*(D1D - 1)*(D1D - 1); ++i) -- { -- val += row[i] * row[i] * D(i,e); -- } -- diag(rx,ry,rz,e) += val; -- } // loop rx -- } // loop ry -- } // loop rz -- }); // end of element loop --} -- --static void PAHdivL2AssembleDiagonal_ADAt_2D(const int D1D, -- const int Q1D, -- const int L2D1D, -- const int NE, -- const Array &L2Bo_, -- const Array &Gct_, -- const Array &Bot_, -- const Vector &op_, -- const Vector &D_, -- Vector &diag_) --{ -- constexpr static int VDIM = 2; -- -- auto L2Bo = Reshape(L2Bo_.Read(), Q1D, L2D1D); -- auto Gct = Reshape(Gct_.Read(), D1D, Q1D); -- auto Bot = Reshape(Bot_.Read(), D1D-1, Q1D); -- auto op = Reshape(op_.Read(), Q1D, Q1D, NE); -- auto D = Reshape(D_.Read(), 2*(D1D-1)*D1D, NE); -- auto diag = Reshape(diag_.ReadWrite(), L2D1D, L2D1D, NE); -- -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- for (int ry = 0; ry < L2D1D; ++ry) -- { -- for (int rx = 0; rx < L2D1D; ++rx) -- { -- // Compute row (rx,ry), assuming all contributions are from -- // a single element. -- -- double row[2*HDIV_MAX_D1D*(HDIV_MAX_D1D-1)]; -- double div[HDIV_MAX_Q1D][HDIV_MAX_Q1D]; -- -- for (int i=0; i<2*D1D*(D1D - 1); ++i) -- { -- row[i] = 0; -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int qx = 0; qx < Q1D; ++qx) -- { -- div[qy][qx] = op(qx,qy,e) * L2Bo(qx,rx) * L2Bo(qy,ry); -- } -- } -- -- for (int qy = 0; qy < Q1D; ++qy) -- { -- int osc = 0; -- for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -- { -- const int D1Dy = (c == 1) ? D1D : D1D - 1; -- const int D1Dx = (c == 0) ? D1D : D1D - 1; -- -- double aX[HDIV_MAX_D1D]; -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- aX[dx] = 0; -- } -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- aX[dx] += div[qy][qx] * ((c == 0) ? Gct(dx,qx) : -- Bot(dx,qx)); -- } -- } -- -- for (int dy = 0; dy < D1Dy; ++dy) -- { -- const double wy = (c == 1) ? Gct(dy,qy) : Bot(dy,qy); -- -- for (int dx = 0; dx < D1Dx; ++dx) -- { -- row[dx + (dy * D1Dx) + osc] += aX[dx] * wy; -- } -- } -- -- osc += D1Dx * D1Dy; -- } // loop c -- } // loop qy -- -- double val = 0.0; -- for (int i=0; i<2*D1D*(D1D - 1); ++i) -- { -- val += row[i] * row[i] * D(i,e); -- } -- diag(rx,ry,e) += val; -- } // loop rx -- } // loop ry -- }); // end of element loop --} -- --void VectorFEDivergenceIntegrator::AssembleDiagonalPA_ADAt(const Vector &D, -- Vector &diag) --{ -- if (dim == 3) -- PAHdivL2AssembleDiagonal_ADAt_3D(dofs1D, quad1D, L2dofs1D, ne, L2mapsO->B, -- mapsC->Gt, mapsO->Bt, pa_data, D, diag); -- else if (dim == 2) -- PAHdivL2AssembleDiagonal_ADAt_2D(dofs1D, quad1D, L2dofs1D, ne, L2mapsO->B, -- mapsC->Gt, mapsO->Bt, pa_data, D, diag); -- else -- { -- MFEM_ABORT("Unsupported dimension!"); -- } --} -+} // namespace internal - - } // namespace mfem -+ -+#endif -diff --git a/fem/integ/bilininteg_interp_pa.cpp b/fem/integ/bilininteg_interp_pa.cpp -new file mode 100644 -index 000000000..3cac18c65 ---- /dev/null -+++ b/fem/integ/bilininteg_interp_pa.cpp -@@ -0,0 +1,1937 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../qfunction.hpp" -+ -+namespace mfem -+{ -+ -+// Apply to x corresponding to DOFs in H^1 (domain) the (topological) gradient -+// to get a dof in H(curl) (range). You can think of the range as the "test" space -+// and the domain as the "trial" space, but there's no integration. -+static void PAHcurlApplyGradient2D(const int c_dofs1D, -+ const int o_dofs1D, -+ const int NE, -+ const Array &B_, -+ const Array &G_, -+ const Vector &x_, -+ Vector &y_) -+{ -+ auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D); -+ auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D); -+ -+ auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, NE); -+ auto y = Reshape(y_.ReadWrite(), 2 * c_dofs1D * o_dofs1D, NE); -+ -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double w[MAX_D1D][MAX_D1D]; -+ -+ // horizontal part -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ w[dx][ey] = 0.0; -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ w[dx][ey] += B(ey, dy) * x(dx, dy, e); -+ } -+ } -+ } -+ -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int ex = 0; ex < o_dofs1D; ++ex) -+ { -+ double s = 0.0; -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ s += G(ex, dx) * w[dx][ey]; -+ } -+ const int local_index = ey*o_dofs1D + ex; -+ y(local_index, e) += s; -+ } -+ } -+ -+ // vertical part -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ w[dx][ey] = 0.0; -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ w[dx][ey] += G(ey, dy) * x(dx, dy, e); -+ } -+ } -+ } -+ -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ double s = 0.0; -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ s += B(ex, dx) * w[dx][ey]; -+ } -+ const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex; -+ y(local_index, e) += s; -+ } -+ } -+ }); -+} -+ -+// Specialization of PAHcurlApplyGradient2D to the case where B is identity -+static void PAHcurlApplyGradient2DBId(const int c_dofs1D, -+ const int o_dofs1D, -+ const int NE, -+ const Array &G_, -+ const Vector &x_, -+ Vector &y_) -+{ -+ auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D); -+ -+ auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, NE); -+ auto y = Reshape(y_.ReadWrite(), 2 * c_dofs1D * o_dofs1D, NE); -+ -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double w[MAX_D1D][MAX_D1D]; -+ -+ // horizontal part -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ const int dy = ey; -+ w[dx][ey] = x(dx, dy, e); -+ } -+ } -+ -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int ex = 0; ex < o_dofs1D; ++ex) -+ { -+ double s = 0.0; -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ s += G(ex, dx) * w[dx][ey]; -+ } -+ const int local_index = ey*o_dofs1D + ex; -+ y(local_index, e) += s; -+ } -+ } -+ -+ // vertical part -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ w[dx][ey] = 0.0; -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ w[dx][ey] += G(ey, dy) * x(dx, dy, e); -+ } -+ } -+ } -+ -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ const int dx = ex; -+ const double s = w[dx][ey]; -+ const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex; -+ y(local_index, e) += s; -+ } -+ } -+ }); -+} -+ -+static void PAHcurlApplyGradientTranspose2D( -+ const int c_dofs1D, const int o_dofs1D, const int NE, -+ const Array &B_, const Array &G_, -+ const Vector &x_, Vector &y_) -+{ -+ auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D); -+ auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D); -+ -+ auto x = Reshape(x_.Read(), 2 * c_dofs1D * o_dofs1D, NE); -+ auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, NE); -+ -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double w[MAX_D1D][MAX_D1D]; -+ -+ // horizontal part (open x, closed y) -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int ex = 0; ex < o_dofs1D; ++ex) -+ { -+ w[dy][ex] = 0.0; -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ const int local_index = ey*o_dofs1D + ex; -+ w[dy][ex] += B(ey, dy) * x(local_index, e); -+ } -+ } -+ } -+ -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ double s = 0.0; -+ for (int ex = 0; ex < o_dofs1D; ++ex) -+ { -+ s += G(ex, dx) * w[dy][ex]; -+ } -+ y(dx, dy, e) += s; -+ } -+ } -+ -+ // vertical part (open y, closed x) -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ w[dy][ex] = 0.0; -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex; -+ w[dy][ex] += G(ey, dy) * x(local_index, e); -+ } -+ } -+ } -+ -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ double s = 0.0; -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ s += B(ex, dx) * w[dy][ex]; -+ } -+ y(dx, dy, e) += s; -+ } -+ } -+ }); -+} -+ -+// Specialization of PAHcurlApplyGradientTranspose2D to the case where -+// B is identity -+static void PAHcurlApplyGradientTranspose2DBId( -+ const int c_dofs1D, const int o_dofs1D, const int NE, -+ const Array &G_, -+ const Vector &x_, Vector &y_) -+{ -+ auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D); -+ -+ auto x = Reshape(x_.Read(), 2 * c_dofs1D * o_dofs1D, NE); -+ auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, NE); -+ -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double w[MAX_D1D][MAX_D1D]; -+ -+ // horizontal part (open x, closed y) -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int ex = 0; ex < o_dofs1D; ++ex) -+ { -+ const int ey = dy; -+ const int local_index = ey*o_dofs1D + ex; -+ w[dy][ex] = x(local_index, e); -+ } -+ } -+ -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ double s = 0.0; -+ for (int ex = 0; ex < o_dofs1D; ++ex) -+ { -+ s += G(ex, dx) * w[dy][ex]; -+ } -+ y(dx, dy, e) += s; -+ } -+ } -+ -+ // vertical part (open y, closed x) -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ w[dy][ex] = 0.0; -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ const int local_index = c_dofs1D * o_dofs1D + ey*c_dofs1D + ex; -+ w[dy][ex] += G(ey, dy) * x(local_index, e); -+ } -+ } -+ } -+ -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ const int ex = dx; -+ const double s = w[dy][ex]; -+ y(dx, dy, e) += s; -+ } -+ } -+ }); -+} -+ -+static void PAHcurlApplyGradient3D(const int c_dofs1D, -+ const int o_dofs1D, -+ const int NE, -+ const Array &B_, -+ const Array &G_, -+ const Vector &x_, -+ Vector &y_) -+{ -+ auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D); -+ auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D); -+ -+ auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, NE); -+ auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE); -+ -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double w1[MAX_D1D][MAX_D1D][MAX_D1D]; -+ double w2[MAX_D1D][MAX_D1D][MAX_D1D]; -+ -+ // --- -+ // dofs that point parallel to x-axis (open in x, closed in y, z) -+ // --- -+ -+ // contract in z -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ w1[dx][dy][ez] = 0.0; -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ w1[dx][dy][ez] += B(ez, dz) * x(dx, dy, dz, e); -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ w2[dx][ey][ez] = 0.0; -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ w2[dx][ey][ez] += B(ey, dy) * w1[dx][dy][ez]; -+ } -+ } -+ } -+ } -+ -+ // contract in x -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int ex = 0; ex < o_dofs1D; ++ex) -+ { -+ double s = 0.0; -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ s += G(ex, dx) * w2[dx][ey][ez]; -+ } -+ const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex; -+ y(local_index, e) += s; -+ } -+ } -+ } -+ -+ // --- -+ // dofs that point parallel to y-axis (open in y, closed in x, z) -+ // --- -+ -+ // contract in z -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ w1[dx][dy][ez] = 0.0; -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ w1[dx][dy][ez] += B(ez, dz) * x(dx, dy, dz, e); -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ w2[dx][ey][ez] = 0.0; -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ w2[dx][ey][ez] += G(ey, dy) * w1[dx][dy][ez]; -+ } -+ } -+ } -+ } -+ -+ // contract in x -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ double s = 0.0; -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ s += B(ex, dx) * w2[dx][ey][ez]; -+ } -+ const int local_index = c_dofs1D*c_dofs1D*o_dofs1D + -+ ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -+ y(local_index, e) += s; -+ } -+ } -+ } -+ -+ // --- -+ // dofs that point parallel to z-axis (open in z, closed in x, y) -+ // --- -+ -+ // contract in z -+ for (int ez = 0; ez < o_dofs1D; ++ez) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ w1[dx][dy][ez] = 0.0; -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ w1[dx][dy][ez] += G(ez, dz) * x(dx, dy, dz, e); -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int ez = 0; ez < o_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ w2[dx][ey][ez] = 0.0; -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ w2[dx][ey][ez] += B(ey, dy) * w1[dx][dy][ez]; -+ } -+ } -+ } -+ } -+ -+ // contract in x -+ for (int ez = 0; ez < o_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ double s = 0.0; -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ s += B(ex, dx) * w2[dx][ey][ez]; -+ } -+ const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D + -+ ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex; -+ y(local_index, e) += s; -+ } -+ } -+ } -+ }); -+} -+ -+// Specialization of PAHcurlApplyGradient3D to the case where -+static void PAHcurlApplyGradient3DBId(const int c_dofs1D, -+ const int o_dofs1D, -+ const int NE, -+ const Array &G_, -+ const Vector &x_, -+ Vector &y_) -+{ -+ auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D); -+ -+ auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, NE); -+ auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE); -+ -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double w1[MAX_D1D][MAX_D1D][MAX_D1D]; -+ double w2[MAX_D1D][MAX_D1D][MAX_D1D]; -+ -+ // --- -+ // dofs that point parallel to x-axis (open in x, closed in y, z) -+ // --- -+ -+ // contract in z -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ const int dz = ez; -+ w1[dx][dy][ez] = x(dx, dy, dz, e); -+ } -+ } -+ } -+ -+ // contract in y -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ const int dy = ey; -+ w2[dx][ey][ez] = w1[dx][dy][ez]; -+ } -+ } -+ } -+ -+ // contract in x -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int ex = 0; ex < o_dofs1D; ++ex) -+ { -+ double s = 0.0; -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ s += G(ex, dx) * w2[dx][ey][ez]; -+ } -+ const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex; -+ y(local_index, e) += s; -+ } -+ } -+ } -+ -+ // --- -+ // dofs that point parallel to y-axis (open in y, closed in x, z) -+ // --- -+ -+ // contract in z -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ const int dz = ez; -+ w1[dx][dy][ez] = x(dx, dy, dz, e); -+ } -+ } -+ } -+ -+ // contract in y -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ w2[dx][ey][ez] = 0.0; -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ w2[dx][ey][ez] += G(ey, dy) * w1[dx][dy][ez]; -+ } -+ } -+ } -+ } -+ -+ // contract in x -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ const int dx = ex; -+ const double s = w2[dx][ey][ez]; -+ const int local_index = c_dofs1D*c_dofs1D*o_dofs1D + -+ ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -+ y(local_index, e) += s; -+ } -+ } -+ } -+ -+ // --- -+ // dofs that point parallel to z-axis (open in z, closed in x, y) -+ // --- -+ -+ // contract in z -+ for (int ez = 0; ez < o_dofs1D; ++ez) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ w1[dx][dy][ez] = 0.0; -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ w1[dx][dy][ez] += G(ez, dz) * x(dx, dy, dz, e); -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int ez = 0; ez < o_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ const int dy = ey; -+ w2[dx][ey][ez] = w1[dx][dy][ez]; -+ } -+ } -+ } -+ -+ // contract in x -+ for (int ez = 0; ez < o_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ const int dx = ex; -+ const double s = w2[dx][ey][ez]; -+ const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D + -+ ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex; -+ y(local_index, e) += s; -+ } -+ } -+ } -+ }); -+} -+ -+static void PAHcurlApplyGradientTranspose3D( -+ const int c_dofs1D, const int o_dofs1D, const int NE, -+ const Array &B_, const Array &G_, -+ const Vector &x_, Vector &y_) -+{ -+ auto B = Reshape(B_.Read(), c_dofs1D, c_dofs1D); -+ auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D); -+ -+ auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE); -+ auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, NE); -+ -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double w1[MAX_D1D][MAX_D1D][MAX_D1D]; -+ double w2[MAX_D1D][MAX_D1D][MAX_D1D]; -+ // --- -+ // dofs that point parallel to x-axis (open in x, closed in y, z) -+ // --- -+ -+ // contract in z -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int ex = 0; ex < o_dofs1D; ++ex) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ w1[ex][ey][dz] = 0.0; -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex; -+ w1[ex][ey][dz] += B(ez, dz) * x(local_index, e); -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int ex = 0; ex < o_dofs1D; ++ex) -+ { -+ w2[ex][dy][dz] = 0.0; -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ w2[ex][dy][dz] += B(ey, dy) * w1[ex][ey][dz]; -+ } -+ } -+ } -+ } -+ -+ // contract in x -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ double s = 0.0; -+ for (int ex = 0; ex < o_dofs1D; ++ex) -+ { -+ s += G(ex, dx) * w2[ex][dy][dz]; -+ } -+ y(dx, dy, dz, e) += s; -+ } -+ } -+ } -+ -+ // --- -+ // dofs that point parallel to y-axis (open in y, closed in x, z) -+ // --- -+ -+ // contract in z -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ w1[ex][ey][dz] = 0.0; -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ const int local_index = c_dofs1D*c_dofs1D*o_dofs1D + -+ ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -+ w1[ex][ey][dz] += B(ez, dz) * x(local_index, e); -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ w2[ex][dy][dz] = 0.0; -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ w2[ex][dy][dz] += G(ey, dy) * w1[ex][ey][dz]; -+ } -+ } -+ } -+ } -+ -+ // contract in x -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ double s = 0.0; -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ s += B(ex, dx) * w2[ex][dy][dz]; -+ } -+ y(dx, dy, dz, e) += s; -+ } -+ } -+ } -+ -+ // --- -+ // dofs that point parallel to z-axis (open in z, closed in x, y) -+ // --- -+ -+ // contract in z -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ w1[ex][ey][dz] = 0.0; -+ for (int ez = 0; ez < o_dofs1D; ++ez) -+ { -+ const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D + -+ ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex; -+ w1[ex][ey][dz] += G(ez, dz) * x(local_index, e); -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ w2[ex][dy][dz] = 0.0; -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ w2[ex][dy][dz] += B(ey, dy) * w1[ex][ey][dz]; -+ } -+ } -+ } -+ } -+ -+ // contract in x -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ double s = 0.0; -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ s += B(ex, dx) * w2[ex][dy][dz]; -+ } -+ y(dx, dy, dz, e) += s; -+ } -+ } -+ } -+ }); -+} -+ -+// Specialization of PAHcurlApplyGradientTranspose3D to the case where -+static void PAHcurlApplyGradientTranspose3DBId( -+ const int c_dofs1D, const int o_dofs1D, const int NE, -+ const Array &G_, -+ const Vector &x_, Vector &y_) -+{ -+ auto G = Reshape(G_.Read(), o_dofs1D, c_dofs1D); -+ -+ auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE); -+ auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, NE); -+ -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double w1[MAX_D1D][MAX_D1D][MAX_D1D]; -+ double w2[MAX_D1D][MAX_D1D][MAX_D1D]; -+ // --- -+ // dofs that point parallel to x-axis (open in x, closed in y, z) -+ // --- -+ -+ // contract in z -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int ex = 0; ex < o_dofs1D; ++ex) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ const int ez = dz; -+ const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex; -+ w1[ex][ey][dz] = x(local_index, e); -+ } -+ } -+ } -+ -+ // contract in y -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int ex = 0; ex < o_dofs1D; ++ex) -+ { -+ const int ey = dy; -+ w2[ex][dy][dz] = w1[ex][ey][dz]; -+ } -+ } -+ } -+ -+ // contract in x -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ double s = 0.0; -+ for (int ex = 0; ex < o_dofs1D; ++ex) -+ { -+ s += G(ex, dx) * w2[ex][dy][dz]; -+ } -+ y(dx, dy, dz, e) += s; -+ } -+ } -+ } -+ -+ // --- -+ // dofs that point parallel to y-axis (open in y, closed in x, z) -+ // --- -+ -+ // contract in z -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ const int ez = dz; -+ const int local_index = c_dofs1D*c_dofs1D*o_dofs1D + -+ ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -+ w1[ex][ey][dz] = x(local_index, e); -+ } -+ } -+ } -+ -+ // contract in y -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ w2[ex][dy][dz] = 0.0; -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ w2[ex][dy][dz] += G(ey, dy) * w1[ex][ey][dz]; -+ } -+ } -+ } -+ } -+ -+ // contract in x -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ const int ex = dx; -+ double s = w2[ex][dy][dz]; -+ y(dx, dy, dz, e) += s; -+ } -+ } -+ } -+ -+ // --- -+ // dofs that point parallel to z-axis (open in z, closed in x, y) -+ // --- -+ -+ // contract in z -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ w1[ex][ey][dz] = 0.0; -+ for (int ez = 0; ez < o_dofs1D; ++ez) -+ { -+ const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D + -+ ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex; -+ w1[ex][ey][dz] += G(ez, dz) * x(local_index, e); -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ const int ey = dy; -+ w2[ex][dy][dz] = w1[ex][ey][dz]; -+ } -+ } -+ } -+ -+ // contract in x -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ const int ex = dx; -+ double s = w2[ex][dy][dz]; -+ y(dx, dy, dz, e) += s; -+ } -+ } -+ } -+ }); -+} -+ -+void GradientInterpolator::AssemblePA(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) -+{ -+ // Assumes tensor-product elements, with a vector test space and H^1 trial space. -+ Mesh *mesh = trial_fes.GetMesh(); -+ const FiniteElement *trial_fel = trial_fes.GetFE(0); -+ const FiniteElement *test_fel = test_fes.GetFE(0); -+ -+ const NodalTensorFiniteElement *trial_el = -+ dynamic_cast(trial_fel); -+ MFEM_VERIFY(trial_el != NULL, "Only NodalTensorFiniteElement is supported!"); -+ -+ const VectorTensorFiniteElement *test_el = -+ dynamic_cast(test_fel); -+ MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!"); -+ -+ const int dims = trial_el->GetDim(); -+ MFEM_VERIFY(dims == 2 || dims == 3, "Bad dimension!"); -+ dim = mesh->Dimension(); -+ MFEM_VERIFY(dim == 2 || dim == 3, "Bad dimension!"); -+ MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), -+ "Orders do not match!"); -+ ne = trial_fes.GetNE(); -+ -+ const int order = trial_el->GetOrder(); -+ dofquad_fe = new H1_SegmentElement(order, trial_el->GetBasisType()); -+ mfem::QuadratureFunctions1D qf1d; -+ mfem::IntegrationRule closed_ir; -+ closed_ir.SetSize(order + 1); -+ qf1d.GaussLobatto(order + 1, &closed_ir); -+ mfem::IntegrationRule open_ir; -+ open_ir.SetSize(order); -+ qf1d.GaussLegendre(order, &open_ir); -+ -+ maps_O_C = &dofquad_fe->GetDofToQuad(open_ir, DofToQuad::TENSOR); -+ o_dofs1D = maps_O_C->nqpt; -+ if (trial_el->GetBasisType() == BasisType::GaussLobatto) -+ { -+ B_id = true; -+ c_dofs1D = maps_O_C->ndof; -+ } -+ else -+ { -+ B_id = false; -+ maps_C_C = &dofquad_fe->GetDofToQuad(closed_ir, DofToQuad::TENSOR); -+ c_dofs1D = maps_C_C->nqpt; -+ } -+} -+ -+void GradientInterpolator::AddMultPA(const Vector &x, Vector &y) const -+{ -+ if (dim == 3) -+ { -+ if (B_id) -+ { -+ PAHcurlApplyGradient3DBId(c_dofs1D, o_dofs1D, ne, -+ maps_O_C->G, x, y); -+ } -+ else -+ { -+ PAHcurlApplyGradient3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, -+ maps_O_C->G, x, y); -+ } -+ } -+ else if (dim == 2) -+ { -+ if (B_id) -+ { -+ PAHcurlApplyGradient2DBId(c_dofs1D, o_dofs1D, ne, -+ maps_O_C->G, x, y); -+ } -+ else -+ { -+ PAHcurlApplyGradient2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->G, -+ x, y); -+ } -+ } -+ else -+ { -+ mfem_error("Bad dimension!"); -+ } -+} -+ -+void GradientInterpolator::AddMultTransposePA(const Vector &x, Vector &y) const -+{ -+ if (dim == 3) -+ { -+ if (B_id) -+ { -+ PAHcurlApplyGradientTranspose3DBId(c_dofs1D, o_dofs1D, ne, -+ maps_O_C->G, x, y); -+ } -+ else -+ { -+ PAHcurlApplyGradientTranspose3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, -+ maps_O_C->G, x, y); -+ } -+ } -+ else if (dim == 2) -+ { -+ if (B_id) -+ { -+ PAHcurlApplyGradientTranspose2DBId(c_dofs1D, o_dofs1D, ne, -+ maps_O_C->G, x, y); -+ } -+ else -+ { -+ PAHcurlApplyGradientTranspose2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, -+ maps_O_C->G, x, y); -+ } -+ } -+ else -+ { -+ mfem_error("Bad dimension!"); -+ } -+} -+ -+static void PAHcurlVecH1IdentityApply2D(const int c_dofs1D, -+ const int o_dofs1D, -+ const int NE, -+ const Array &Bclosed, -+ const Array &Bopen, -+ const Vector &pa_data, -+ const Vector &x_, -+ Vector &y_) -+{ -+ auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D); -+ auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D); -+ -+ auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, 2, NE); -+ auto y = Reshape(y_.ReadWrite(), (2 * c_dofs1D * o_dofs1D), NE); -+ -+ auto vk = Reshape(pa_data.Read(), 2, (2 * c_dofs1D * o_dofs1D), NE); -+ -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ -+ MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double w[2][MAX_D1D][MAX_D1D]; -+ -+ // dofs that point parallel to x-axis (open in x, closed in y) -+ -+ // contract in y -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int j=0; j<2; ++j) -+ { -+ w[j][dx][ey] = 0.0; -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ w[j][dx][ey] += Bc(ey, dy) * x(dx, dy, j, e); -+ } -+ } -+ } -+ } -+ -+ // contract in x -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int ex = 0; ex < o_dofs1D; ++ex) -+ { -+ for (int j=0; j<2; ++j) -+ { -+ double s = 0.0; -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ s += Bo(ex, dx) * w[j][dx][ey]; -+ } -+ const int local_index = ey*o_dofs1D + ex; -+ y(local_index, e) += s * vk(j, local_index, e); -+ } -+ } -+ } -+ -+ // dofs that point parallel to y-axis (open in y, closed in x) -+ -+ // contract in y -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int j=0; j<2; ++j) -+ { -+ w[j][dx][ey] = 0.0; -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ w[j][dx][ey] += Bo(ey, dy) * x(dx, dy, j, e); -+ } -+ } -+ } -+ } -+ -+ // contract in x -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ for (int j=0; j<2; ++j) -+ { -+ double s = 0.0; -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ s += Bc(ex, dx) * w[j][dx][ey]; -+ } -+ const int local_index = c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -+ y(local_index, e) += s * vk(j, local_index, e); -+ } -+ } -+ } -+ }); -+} -+ -+static void PAHcurlVecH1IdentityApplyTranspose2D(const int c_dofs1D, -+ const int o_dofs1D, -+ const int NE, -+ const Array &Bclosed, -+ const Array &Bopen, -+ const Vector &pa_data, -+ const Vector &x_, -+ Vector &y_) -+{ -+ auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D); -+ auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D); -+ -+ auto x = Reshape(x_.Read(), (2 * c_dofs1D * o_dofs1D), NE); -+ auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, 2, NE); -+ -+ auto vk = Reshape(pa_data.Read(), 2, (2 * c_dofs1D * o_dofs1D), NE); -+ -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ //constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -+ -+ MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double w[2][MAX_D1D][MAX_D1D]; -+ -+ // dofs that point parallel to x-axis (open in x, closed in y) -+ -+ // contract in x -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int j=0; j<2; ++j) { w[j][dx][ey] = 0.0; } -+ } -+ for (int ex = 0; ex < o_dofs1D; ++ex) -+ { -+ const int local_index = ey*o_dofs1D + ex; -+ const double xd = x(local_index, e); -+ -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int j=0; j<2; ++j) -+ { -+ w[j][dx][ey] += Bo(ex, dx) * xd * vk(j, local_index, e); -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int j=0; j<2; ++j) -+ { -+ double s = 0.0; -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ s += w[j][dx][ey] * Bc(ey, dy); -+ } -+ y(dx, dy, j, e) += s; -+ } -+ } -+ } -+ -+ // dofs that point parallel to y-axis (open in y, closed in x) -+ -+ // contract in x -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int j=0; j<2; ++j) { w[j][dx][ey] = 0.0; } -+ } -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ const int local_index = c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -+ const double xd = x(local_index, e); -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int j=0; j<2; ++j) -+ { -+ w[j][dx][ey] += Bc(ex, dx) * xd * vk(j, local_index, e); -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int j=0; j<2; ++j) -+ { -+ double s = 0.0; -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ s += w[j][dx][ey] * Bo(ey, dy); -+ } -+ y(dx, dy, j, e) += s; -+ } -+ } -+ } -+ }); -+} -+ -+static void PAHcurlVecH1IdentityApply3D(const int c_dofs1D, -+ const int o_dofs1D, -+ const int NE, -+ const Array &Bclosed, -+ const Array &Bopen, -+ const Vector &pa_data, -+ const Vector &x_, -+ Vector &y_) -+{ -+ auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D); -+ auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D); -+ -+ auto x = Reshape(x_.Read(), c_dofs1D, c_dofs1D, c_dofs1D, 3, NE); -+ auto y = Reshape(y_.ReadWrite(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE); -+ -+ auto vk = Reshape(pa_data.Read(), 3, (3 * c_dofs1D * c_dofs1D * o_dofs1D), -+ NE); -+ -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double w1[3][MAX_D1D][MAX_D1D][MAX_D1D]; -+ double w2[3][MAX_D1D][MAX_D1D][MAX_D1D]; -+ -+ // dofs that point parallel to x-axis (open in x, closed in y, z) -+ -+ // contract in z -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ w1[j][dx][dy][ez] = 0.0; -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ w1[j][dx][dy][ez] += Bc(ez, dz) * x(dx, dy, dz, j, e); -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ w2[j][dx][ey][ez] = 0.0; -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ w2[j][dx][ey][ez] += Bc(ey, dy) * w1[j][dx][dy][ez]; -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in x -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int ex = 0; ex < o_dofs1D; ++ex) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ double s = 0.0; -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ s += Bo(ex, dx) * w2[j][dx][ey][ez]; -+ } -+ const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex; -+ y(local_index, e) += s * vk(j, local_index, e); -+ } -+ } -+ } -+ } -+ -+ // dofs that point parallel to y-axis (open in y, closed in x, z) -+ -+ // contract in z -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ w1[j][dx][dy][ez] = 0.0; -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ w1[j][dx][dy][ez] += Bc(ez, dz) * x(dx, dy, dz, j, e); -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ w2[j][dx][ey][ez] = 0.0; -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ w2[j][dx][ey][ez] += Bo(ey, dy) * w1[j][dx][dy][ez]; -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in x -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ double s = 0.0; -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ s += Bc(ex, dx) * w2[j][dx][ey][ez]; -+ } -+ const int local_index = c_dofs1D*c_dofs1D*o_dofs1D + -+ ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -+ y(local_index, e) += s * vk(j, local_index, e); -+ } -+ } -+ } -+ } -+ -+ // dofs that point parallel to z-axis (open in z, closed in x, y) -+ -+ // contract in z -+ for (int ez = 0; ez < o_dofs1D; ++ez) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ w1[j][dx][dy][ez] = 0.0; -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ w1[j][dx][dy][ez] += Bo(ez, dz) * x(dx, dy, dz, j, e); -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int ez = 0; ez < o_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ w2[j][dx][ey][ez] = 0.0; -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ w2[j][dx][ey][ez] += Bc(ey, dy) * w1[j][dx][dy][ez]; -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in x -+ for (int ez = 0; ez < o_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ double s = 0.0; -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ s += Bc(ex, dx) * w2[j][dx][ey][ez]; -+ } -+ const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D + -+ ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex; -+ y(local_index, e) += s * vk(j, local_index, e); -+ } -+ } -+ } -+ } -+ }); -+} -+ -+static void PAHcurlVecH1IdentityApplyTranspose3D(const int c_dofs1D, -+ const int o_dofs1D, -+ const int NE, -+ const Array &Bclosed, -+ const Array &Bopen, -+ const Vector &pa_data, -+ const Vector &x_, -+ Vector &y_) -+{ -+ auto Bc = Reshape(Bclosed.Read(), c_dofs1D, c_dofs1D); -+ auto Bo = Reshape(Bopen.Read(), o_dofs1D, c_dofs1D); -+ -+ auto x = Reshape(x_.Read(), (3 * c_dofs1D * c_dofs1D * o_dofs1D), NE); -+ auto y = Reshape(y_.ReadWrite(), c_dofs1D, c_dofs1D, c_dofs1D, 3, NE); -+ -+ auto vk = Reshape(pa_data.Read(), 3, (3 * c_dofs1D * c_dofs1D * o_dofs1D), -+ NE); -+ -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ -+ MFEM_VERIFY(c_dofs1D <= MAX_D1D && o_dofs1D <= c_dofs1D, ""); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double w1[3][MAX_D1D][MAX_D1D][MAX_D1D]; -+ double w2[3][MAX_D1D][MAX_D1D][MAX_D1D]; -+ -+ // dofs that point parallel to x-axis (open in x, closed in y, z) -+ -+ // contract in x -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ w2[j][dx][ey][ez] = 0.0; -+ } -+ for (int ex = 0; ex < o_dofs1D; ++ex) -+ { -+ const int local_index = ez*c_dofs1D*o_dofs1D + ey*o_dofs1D + ex; -+ const double xv = x(local_index, e) * vk(j, local_index, e); -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ w2[j][dx][ey][ez] += xv * Bo(ex, dx); -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ w1[j][dx][dy][ez] = 0.0; -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ w1[j][dx][dy][ez] += w2[j][dx][ey][ez] * Bc(ey, dy); -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in z -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ double s = 0.0; -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ s += w1[j][dx][dy][ez] * Bc(ez, dz); -+ } -+ y(dx, dy, dz, j, e) += s; -+ } -+ } -+ } -+ } -+ -+ // dofs that point parallel to y-axis (open in y, closed in x, z) -+ -+ // contract in x -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ w2[j][dx][ey][ez] = 0.0; -+ } -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ const int local_index = c_dofs1D*c_dofs1D*o_dofs1D + -+ ez*c_dofs1D*o_dofs1D + ey*c_dofs1D + ex; -+ const double xv = x(local_index, e) * vk(j, local_index, e); -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ w2[j][dx][ey][ez] += xv * Bc(ex, dx); -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ w1[j][dx][dy][ez] = 0.0; -+ for (int ey = 0; ey < o_dofs1D; ++ey) -+ { -+ w1[j][dx][dy][ez] += w2[j][dx][ey][ez] * Bo(ey, dy); -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in z -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ double s = 0.0; -+ for (int ez = 0; ez < c_dofs1D; ++ez) -+ { -+ s += w1[j][dx][dy][ez] * Bc(ez, dz); -+ } -+ y(dx, dy, dz, j, e) += s; -+ } -+ } -+ } -+ } -+ -+ // dofs that point parallel to z-axis (open in z, closed in x, y) -+ -+ // contract in x -+ for (int ez = 0; ez < o_dofs1D; ++ez) -+ { -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ w2[j][dx][ey][ez] = 0.0; -+ } -+ for (int ex = 0; ex < c_dofs1D; ++ex) -+ { -+ const int local_index = 2*c_dofs1D*c_dofs1D*o_dofs1D + -+ ez*c_dofs1D*c_dofs1D + ey*c_dofs1D + ex; -+ const double xv = x(local_index, e) * vk(j, local_index, e); -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ w2[j][dx][ey][ez] += xv * Bc(ex, dx); -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in y -+ for (int ez = 0; ez < o_dofs1D; ++ez) -+ { -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ w1[j][dx][dy][ez] = 0.0; -+ for (int ey = 0; ey < c_dofs1D; ++ey) -+ { -+ w1[j][dx][dy][ez] += w2[j][dx][ey][ez] * Bc(ey, dy); -+ } -+ } -+ } -+ } -+ } -+ -+ // contract in z -+ for (int dx = 0; dx < c_dofs1D; ++dx) -+ { -+ for (int dy = 0; dy < c_dofs1D; ++dy) -+ { -+ for (int dz = 0; dz < c_dofs1D; ++dz) -+ { -+ for (int j=0; j<3; ++j) -+ { -+ double s = 0.0; -+ for (int ez = 0; ez < o_dofs1D; ++ez) -+ { -+ s += w1[j][dx][dy][ez] * Bo(ez, dz); -+ } -+ y(dx, dy, dz, j, e) += s; -+ } -+ } -+ } -+ } -+ }); -+} -+ -+void IdentityInterpolator::AssemblePA(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) -+{ -+ // Assumes tensor-product elements, with a vector test space and H^1 trial space. -+ Mesh *mesh = trial_fes.GetMesh(); -+ const FiniteElement *trial_fel = trial_fes.GetFE(0); -+ const FiniteElement *test_fel = test_fes.GetFE(0); -+ -+ const NodalTensorFiniteElement *trial_el = -+ dynamic_cast(trial_fel); -+ MFEM_VERIFY(trial_el != NULL, "Only NodalTensorFiniteElement is supported!"); -+ -+ const VectorTensorFiniteElement *test_el = -+ dynamic_cast(test_fel); -+ MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!"); -+ -+ const int dims = trial_el->GetDim(); -+ MFEM_VERIFY(dims == 2 || dims == 3, ""); -+ -+ dim = mesh->Dimension(); -+ MFEM_VERIFY(dim == 2 || dim == 3, ""); -+ -+ MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), ""); -+ -+ ne = trial_fes.GetNE(); -+ -+ const int order = trial_el->GetOrder(); -+ dofquad_fe = new H1_SegmentElement(order); -+ mfem::QuadratureFunctions1D qf1d; -+ mfem::IntegrationRule closed_ir; -+ closed_ir.SetSize(order + 1); -+ qf1d.GaussLobatto(order + 1, &closed_ir); -+ mfem::IntegrationRule open_ir; -+ open_ir.SetSize(order); -+ qf1d.GaussLegendre(order, &open_ir); -+ -+ maps_C_C = &dofquad_fe->GetDofToQuad(closed_ir, DofToQuad::TENSOR); -+ maps_O_C = &dofquad_fe->GetDofToQuad(open_ir, DofToQuad::TENSOR); -+ -+ o_dofs1D = maps_O_C->nqpt; -+ c_dofs1D = maps_C_C->nqpt; -+ MFEM_VERIFY(maps_O_C->ndof == c_dofs1D && -+ maps_C_C->ndof == c_dofs1D, "Discrepancy in the number of DOFs"); -+ -+ const int ndof_test = (dim == 3) ? 3 * c_dofs1D * c_dofs1D * o_dofs1D -+ : 2 * c_dofs1D * o_dofs1D; -+ -+ const IntegrationRule & Nodes = test_el->GetNodes(); -+ -+ pa_data.SetSize(dim * ndof_test * ne, Device::GetMemoryType()); -+ auto op = Reshape(pa_data.HostWrite(), dim, ndof_test, ne); -+ -+ const Array &dofmap = test_el->GetDofMap(); -+ -+ if (dim == 3) -+ { -+ // Note that ND_HexahedronElement uses 6 vectors in tk rather than 3, with -+ // the last 3 having negative signs. Here the signs are all positive, as -+ // signs are applied in ElementRestriction. -+ -+ const double tk[9] = { 1.,0.,0., 0.,1.,0., 0.,0.,1. }; -+ -+ for (int c=0; c<3; ++c) -+ { -+ for (int i=0; i= 0) ? dofmap[d] : -1 - dofmap[d]; -+ -+ for (int e=0; eGetElementTransformation(e); -+ tr->SetIntPoint(&Nodes.IntPoint(id)); -+ tr->Jacobian().Mult(tk + dof2tk*dim, v); -+ -+ for (int j=0; j<3; ++j) -+ { -+ op(j,d,e) = v[j]; -+ } -+ } -+ } -+ } -+ } -+ else // 2D case -+ { -+ const double tk[4] = { 1.,0., 0.,1. }; -+ for (int c=0; c<2; ++c) -+ { -+ for (int i=0; i= 0) ? dofmap[d] : -1 - dofmap[d]; -+ -+ for (int e=0; eGetElementTransformation(e); -+ tr->SetIntPoint(&Nodes.IntPoint(id)); -+ tr->Jacobian().Mult(tk + dof2tk*dim, v); -+ -+ for (int j=0; j<2; ++j) -+ { -+ op(j,d,e) = v[j]; -+ } -+ } -+ } -+ } -+ } -+} -+ -+void IdentityInterpolator::AddMultPA(const Vector &x, Vector &y) const -+{ -+ if (dim == 3) -+ { -+ PAHcurlVecH1IdentityApply3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->B, -+ pa_data, x, y); -+ } -+ else if (dim == 2) -+ { -+ PAHcurlVecH1IdentityApply2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, maps_O_C->B, -+ pa_data, x, y); -+ } -+ else -+ { -+ mfem_error("Bad dimension!"); -+ } -+} -+ -+void IdentityInterpolator::AddMultTransposePA(const Vector &x, Vector &y) const -+{ -+ if (dim == 3) -+ { -+ PAHcurlVecH1IdentityApplyTranspose3D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, -+ maps_O_C->B, pa_data, x, y); -+ } -+ else if (dim == 2) -+ { -+ PAHcurlVecH1IdentityApplyTranspose2D(c_dofs1D, o_dofs1D, ne, maps_C_C->B, -+ maps_O_C->B, pa_data, x, y); -+ } -+ else -+ { -+ mfem_error("Bad dimension!"); -+ } -+} -+ -+} // namespace mfem -diff --git a/fem/bilininteg_mass_ea.cpp b/fem/integ/bilininteg_mass_ea.cpp -similarity index 88% -rename from fem/bilininteg_mass_ea.cpp -rename to fem/integ/bilininteg_mass_ea.cpp -index cb1e7e064..315b9da8f 100644 ---- a/fem/bilininteg_mass_ea.cpp -+++ b/fem/integ/bilininteg_mass_ea.cpp -@@ -9,9 +9,9 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "../general/forall.hpp" --#include "bilininteg.hpp" --#include "gridfunc.hpp" -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" - - namespace mfem - { -@@ -21,7 +21,6 @@ static void EAMassAssemble1D(const int NE, - const Array &basis, - const Vector &padata, - Vector &eadata, -- const bool add, - const int d1d = 0, - const int q1d = 0) - { -@@ -53,14 +52,7 @@ static void EAMassAssemble1D(const int NE, - { - val += r_Bi[k1] * r_Bj[k1] * D(k1, e); - } -- if (add) -- { -- M(i1, j1, e) += val; -- } -- else -- { -- M(i1, j1, e) = val; -- } -+ M(i1, j1, e) += val; - } - } - }); -@@ -71,7 +63,6 @@ static void EAMassAssemble2D(const int NE, - const Array &basis, - const Vector &padata, - Vector &eadata, -- const bool add, - const int d1d = 0, - const int q1d = 0) - { -@@ -123,14 +114,7 @@ static void EAMassAssemble2D(const int NE, - * s_D[k1][k2]; - } - } -- if (add) -- { -- M(i1, i2, j1, j2, e) += val; -- } -- else -- { -- M(i1, i2, j1, j2, e) = val; -- } -+ M(i1, i2, j1, j2, e) += val; - } - } - } -@@ -143,7 +127,6 @@ static void EAMassAssemble3D(const int NE, - const Array &basis, - const Vector &padata, - Vector &eadata, -- const bool add, - const int d1d = 0, - const int q1d = 0) - { -@@ -237,14 +220,7 @@ static void EAMassAssemble3D(const int NE, - } - } - } -- if (add) -- { -- M(i1, i2, i3, j1, j2, j3, e) += val; -- } -- else -- { -- M(i1, i2, i3, j1, j2, j3, e) = val; -- } -+ M(i1, i2, i3, j1, j2, j3, e) += val; - } - } - } -@@ -255,8 +231,7 @@ static void EAMassAssemble3D(const int NE, - } - - void MassIntegrator::AssembleEA(const FiniteElementSpace &fes, -- Vector &ea_data, -- const bool add) -+ Vector &ea_data) - { - AssemblePA(fes); - ne = fes.GetMesh()->GetNE(); -@@ -265,15 +240,15 @@ void MassIntegrator::AssembleEA(const FiniteElementSpace &fes, - { - switch ((dofs1D << 4 ) | quad1D) - { -- case 0x22: return EAMassAssemble1D<2,2>(ne,B,pa_data,ea_data,add); -- case 0x33: return EAMassAssemble1D<3,3>(ne,B,pa_data,ea_data,add); -- case 0x44: return EAMassAssemble1D<4,4>(ne,B,pa_data,ea_data,add); -- case 0x55: return EAMassAssemble1D<5,5>(ne,B,pa_data,ea_data,add); -- case 0x66: return EAMassAssemble1D<6,6>(ne,B,pa_data,ea_data,add); -- case 0x77: return EAMassAssemble1D<7,7>(ne,B,pa_data,ea_data,add); -- case 0x88: return EAMassAssemble1D<8,8>(ne,B,pa_data,ea_data,add); -- case 0x99: return EAMassAssemble1D<9,9>(ne,B,pa_data,ea_data,add); -- default: return EAMassAssemble1D(ne,B,pa_data,ea_data,add, -+ case 0x22: return EAMassAssemble1D<2,2>(ne,B,pa_data,ea_data); -+ case 0x33: return EAMassAssemble1D<3,3>(ne,B,pa_data,ea_data); -+ case 0x44: return EAMassAssemble1D<4,4>(ne,B,pa_data,ea_data); -+ case 0x55: return EAMassAssemble1D<5,5>(ne,B,pa_data,ea_data); -+ case 0x66: return EAMassAssemble1D<6,6>(ne,B,pa_data,ea_data); -+ case 0x77: return EAMassAssemble1D<7,7>(ne,B,pa_data,ea_data); -+ case 0x88: return EAMassAssemble1D<8,8>(ne,B,pa_data,ea_data); -+ case 0x99: return EAMassAssemble1D<9,9>(ne,B,pa_data,ea_data); -+ default: return EAMassAssemble1D(ne,B,pa_data,ea_data, - dofs1D,quad1D); - } - } -@@ -281,15 +256,15 @@ void MassIntegrator::AssembleEA(const FiniteElementSpace &fes, - { - switch ((dofs1D << 4 ) | quad1D) - { -- case 0x22: return EAMassAssemble2D<2,2>(ne,B,pa_data,ea_data,add); -- case 0x33: return EAMassAssemble2D<3,3>(ne,B,pa_data,ea_data,add); -- case 0x44: return EAMassAssemble2D<4,4>(ne,B,pa_data,ea_data,add); -- case 0x55: return EAMassAssemble2D<5,5>(ne,B,pa_data,ea_data,add); -- case 0x66: return EAMassAssemble2D<6,6>(ne,B,pa_data,ea_data,add); -- case 0x77: return EAMassAssemble2D<7,7>(ne,B,pa_data,ea_data,add); -- case 0x88: return EAMassAssemble2D<8,8>(ne,B,pa_data,ea_data,add); -- case 0x99: return EAMassAssemble2D<9,9>(ne,B,pa_data,ea_data,add); -- default: return EAMassAssemble2D(ne,B,pa_data,ea_data,add, -+ case 0x22: return EAMassAssemble2D<2,2>(ne,B,pa_data,ea_data); -+ case 0x33: return EAMassAssemble2D<3,3>(ne,B,pa_data,ea_data); -+ case 0x44: return EAMassAssemble2D<4,4>(ne,B,pa_data,ea_data); -+ case 0x55: return EAMassAssemble2D<5,5>(ne,B,pa_data,ea_data); -+ case 0x66: return EAMassAssemble2D<6,6>(ne,B,pa_data,ea_data); -+ case 0x77: return EAMassAssemble2D<7,7>(ne,B,pa_data,ea_data); -+ case 0x88: return EAMassAssemble2D<8,8>(ne,B,pa_data,ea_data); -+ case 0x99: return EAMassAssemble2D<9,9>(ne,B,pa_data,ea_data); -+ default: return EAMassAssemble2D(ne,B,pa_data,ea_data, - dofs1D,quad1D); - } - } -@@ -297,14 +272,14 @@ void MassIntegrator::AssembleEA(const FiniteElementSpace &fes, - { - switch ((dofs1D << 4 ) | quad1D) - { -- case 0x23: return EAMassAssemble3D<2,3>(ne,B,pa_data,ea_data,add); -- case 0x34: return EAMassAssemble3D<3,4>(ne,B,pa_data,ea_data,add); -- case 0x45: return EAMassAssemble3D<4,5>(ne,B,pa_data,ea_data,add); -- case 0x56: return EAMassAssemble3D<5,6>(ne,B,pa_data,ea_data,add); -- case 0x67: return EAMassAssemble3D<6,7>(ne,B,pa_data,ea_data,add); -- case 0x78: return EAMassAssemble3D<7,8>(ne,B,pa_data,ea_data,add); -- case 0x89: return EAMassAssemble3D<8,9>(ne,B,pa_data,ea_data,add); -- default: return EAMassAssemble3D(ne,B,pa_data,ea_data,add, -+ case 0x23: return EAMassAssemble3D<2,3>(ne,B,pa_data,ea_data); -+ case 0x34: return EAMassAssemble3D<3,4>(ne,B,pa_data,ea_data); -+ case 0x45: return EAMassAssemble3D<4,5>(ne,B,pa_data,ea_data); -+ case 0x56: return EAMassAssemble3D<5,6>(ne,B,pa_data,ea_data); -+ case 0x67: return EAMassAssemble3D<6,7>(ne,B,pa_data,ea_data); -+ case 0x78: return EAMassAssemble3D<7,8>(ne,B,pa_data,ea_data); -+ case 0x89: return EAMassAssemble3D<8,9>(ne,B,pa_data,ea_data); -+ default: return EAMassAssemble3D(ne,B,pa_data,ea_data, - dofs1D,quad1D); - } - } -diff --git a/fem/integ/bilininteg_mass_kernels.hpp b/fem/integ/bilininteg_mass_kernels.hpp -new file mode 100644 -index 000000000..c26f242ef ---- /dev/null -+++ b/fem/integ/bilininteg_mass_kernels.hpp -@@ -0,0 +1,1329 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#ifndef MFEM_BILININTEG_MASS_KERNELS_HPP -+#define MFEM_BILININTEG_MASS_KERNELS_HPP -+ -+#include "../../config/config.hpp" -+#include "../../general/forall.hpp" -+#include "../../linalg/dtensor.hpp" -+ -+namespace mfem -+{ -+ -+namespace internal -+{ -+ -+MFEM_HOST_DEVICE inline -+void PAMassAssembleDiagonal1D(const int NE, -+ const Array &b, -+ const Vector &d, -+ Vector &y, -+ const int D1D, -+ const int Q1D) -+{ -+ MFEM_VERIFY(D1D <= MAX_D1D, ""); -+ MFEM_VERIFY(Q1D <= MAX_Q1D, ""); -+ auto B = Reshape(b.Read(), Q1D, D1D); -+ auto D = Reshape(d.Read(), Q1D, NE); -+ auto Y = Reshape(y.ReadWrite(), D1D, NE); -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ Y(dx, e) = 0.0; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ Y(dx, e) += B(qx, dx) * B(qx, dx) * D(qx, e); -+ } -+ } -+ }); -+} -+ -+template -+MFEM_HOST_DEVICE inline -+void PAMassAssembleDiagonal2D(const int NE, -+ const Array &b, -+ const Vector &d, -+ Vector &y, -+ const int d1d = 0, -+ const int q1d = 0) -+{ -+ const int D1D = T_D1D ? T_D1D : d1d; -+ const int Q1D = T_Q1D ? T_Q1D : q1d; -+ MFEM_VERIFY(D1D <= MAX_D1D, ""); -+ MFEM_VERIFY(Q1D <= MAX_Q1D, ""); -+ auto B = Reshape(b.Read(), Q1D, D1D); -+ auto D = Reshape(d.Read(), Q1D, Q1D, NE); -+ auto Y = Reshape(y.ReadWrite(), D1D, D1D, NE); -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ const int D1D = T_D1D ? T_D1D : d1d; -+ const int Q1D = T_Q1D ? T_Q1D : q1d; -+ constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -+ constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -+ double QD[MQ1][MD1]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ QD[qx][dy] = 0.0; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ QD[qx][dy] += B(qy, dy) * B(qy, dy) * D(qx, qy, e); -+ } -+ } -+ } -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ Y(dx,dy,e) += B(qx, dx) * B(qx, dx) * QD[qx][dy]; -+ } -+ } -+ } -+ }); -+} -+ -+template -+MFEM_HOST_DEVICE inline -+void SmemPAMassAssembleDiagonal2D(const int NE, -+ const Array &b_, -+ const Vector &d_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) -+{ -+ const int D1D = T_D1D ? T_D1D : d1d; -+ const int Q1D = T_Q1D ? T_Q1D : q1d; -+ constexpr int NBZ = T_NBZ ? T_NBZ : 1; -+ constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -+ constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -+ MFEM_VERIFY(D1D <= MD1, ""); -+ MFEM_VERIFY(Q1D <= MQ1, ""); -+ auto b = Reshape(b_.Read(), Q1D, D1D); -+ auto D = Reshape(d_.Read(), Q1D, Q1D, NE); -+ auto Y = Reshape(y_.ReadWrite(), D1D, D1D, NE); -+ mfem::forall_2D_batch(NE, Q1D, Q1D, NBZ, [=] MFEM_HOST_DEVICE (int e) -+ { -+ const int tidz = MFEM_THREAD_ID(z); -+ const int D1D = T_D1D ? T_D1D : d1d; -+ const int Q1D = T_Q1D ? T_Q1D : q1d; -+ constexpr int NBZ = T_NBZ ? T_NBZ : 1; -+ constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -+ constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -+ MFEM_SHARED double B[MQ1][MD1]; -+ MFEM_SHARED double QDZ[NBZ][MQ1][MD1]; -+ double (*QD)[MD1] = (double (*)[MD1])(QDZ + tidz); -+ if (tidz == 0) -+ { -+ MFEM_FOREACH_THREAD(d,y,D1D) -+ { -+ MFEM_FOREACH_THREAD(q,x,Q1D) -+ { -+ B[q][d] = b(q,d); -+ } -+ } -+ } -+ MFEM_SYNC_THREAD; -+ MFEM_FOREACH_THREAD(qx,x,Q1D) -+ { -+ MFEM_FOREACH_THREAD(dy,y,D1D) -+ { -+ QD[qx][dy] = 0.0; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ QD[qx][dy] += B[qy][dy] * B[qy][dy] * D(qx, qy, e); -+ } -+ } -+ } -+ MFEM_SYNC_THREAD; -+ MFEM_FOREACH_THREAD(dy,y,D1D) -+ { -+ MFEM_FOREACH_THREAD(dx,x,D1D) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ // might need absolute values on next line -+ Y(dx,dy,e) += B[qx][dx] * B[qx][dx] * QD[qx][dy]; -+ } -+ } -+ } -+ }); -+} -+ -+template -+MFEM_HOST_DEVICE inline -+void PAMassAssembleDiagonal3D(const int NE, -+ const Array &b, -+ const Vector &d, -+ Vector &y, -+ const int d1d = 0, -+ const int q1d = 0) -+{ -+ const int D1D = T_D1D ? T_D1D : d1d; -+ const int Q1D = T_Q1D ? T_Q1D : q1d; -+ MFEM_VERIFY(D1D <= MAX_D1D, ""); -+ MFEM_VERIFY(Q1D <= MAX_Q1D, ""); -+ auto B = Reshape(b.Read(), Q1D, D1D); -+ auto D = Reshape(d.Read(), Q1D, Q1D, Q1D, NE); -+ auto Y = Reshape(y.ReadWrite(), D1D, D1D, D1D, NE); -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ const int D1D = T_D1D ? T_D1D : d1d; -+ const int Q1D = T_Q1D ? T_Q1D : q1d; -+ constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -+ constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -+ double QQD[MQ1][MQ1][MD1]; -+ double QDD[MQ1][MD1][MD1]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int dz = 0; dz < D1D; ++dz) -+ { -+ QQD[qx][qy][dz] = 0.0; -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ QQD[qx][qy][dz] += B(qz, dz) * B(qz, dz) * D(qx, qy, qz, e); -+ } -+ } -+ } -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int dz = 0; dz < D1D; ++dz) -+ { -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ QDD[qx][dy][dz] = 0.0; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ QDD[qx][dy][dz] += B(qy, dy) * B(qy, dy) * QQD[qx][qy][dz]; -+ } -+ } -+ } -+ } -+ for (int dz = 0; dz < D1D; ++dz) -+ { -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ double t = 0.0; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ t += B(qx, dx) * B(qx, dx) * QDD[qx][dy][dz]; -+ } -+ Y(dx, dy, dz, e) += t; -+ } -+ } -+ } -+ }); -+} -+ -+template -+MFEM_HOST_DEVICE inline -+void SmemPAMassAssembleDiagonal3D(const int NE, -+ const Array &b_, -+ const Vector &d_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) -+{ -+ const int D1D = T_D1D ? T_D1D : d1d; -+ const int Q1D = T_Q1D ? T_Q1D : q1d; -+ constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -+ constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -+ MFEM_VERIFY(D1D <= MD1, ""); -+ MFEM_VERIFY(Q1D <= MQ1, ""); -+ auto b = Reshape(b_.Read(), Q1D, D1D); -+ auto D = Reshape(d_.Read(), Q1D, Q1D, Q1D, NE); -+ auto Y = Reshape(y_.ReadWrite(), D1D, D1D, D1D, NE); -+ mfem::forall_3D(NE, Q1D, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e) -+ { -+ const int tidz = MFEM_THREAD_ID(z); -+ const int D1D = T_D1D ? T_D1D : d1d; -+ const int Q1D = T_Q1D ? T_Q1D : q1d; -+ constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -+ constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -+ MFEM_SHARED double B[MQ1][MD1]; -+ MFEM_SHARED double QQD[MQ1][MQ1][MD1]; -+ MFEM_SHARED double QDD[MQ1][MD1][MD1]; -+ if (tidz == 0) -+ { -+ MFEM_FOREACH_THREAD(d,y,D1D) -+ { -+ MFEM_FOREACH_THREAD(q,x,Q1D) -+ { -+ B[q][d] = b(q,d); -+ } -+ } -+ } -+ MFEM_SYNC_THREAD; -+ MFEM_FOREACH_THREAD(qx,x,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qy,y,Q1D) -+ { -+ MFEM_FOREACH_THREAD(dz,z,D1D) -+ { -+ QQD[qx][qy][dz] = 0.0; -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ QQD[qx][qy][dz] += B[qz][dz] * B[qz][dz] * D(qx, qy, qz, e); -+ } -+ } -+ } -+ } -+ MFEM_SYNC_THREAD; -+ MFEM_FOREACH_THREAD(qx,x,Q1D) -+ { -+ MFEM_FOREACH_THREAD(dz,z,D1D) -+ { -+ MFEM_FOREACH_THREAD(dy,y,D1D) -+ { -+ QDD[qx][dy][dz] = 0.0; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ QDD[qx][dy][dz] += B[qy][dy] * B[qy][dy] * QQD[qx][qy][dz]; -+ } -+ } -+ } -+ } -+ MFEM_SYNC_THREAD; -+ MFEM_FOREACH_THREAD(dz,z,D1D) -+ { -+ MFEM_FOREACH_THREAD(dy,y,D1D) -+ { -+ MFEM_FOREACH_THREAD(dx,x,D1D) -+ { -+ double t = 0.0; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ t += B[qx][dx] * B[qx][dx] * QDD[qx][dy][dz]; -+ } -+ Y(dx, dy, dz, e) += t; -+ } -+ } -+ } -+ }); -+} -+ -+MFEM_HOST_DEVICE inline -+void PAMassAssembleDiagonal(const int dim, const int D1D, -+ const int Q1D, const int NE, -+ const Array &B, -+ const Vector &D, -+ Vector &Y) -+{ -+ if (dim == 1) -+ { -+ return PAMassAssembleDiagonal1D(NE,B,D,Y,D1D,Q1D); -+ } -+ else if (dim == 2) -+ { -+ switch ((D1D << 4 ) | Q1D) -+ { -+ case 0x22: return SmemPAMassAssembleDiagonal2D<2,2,16>(NE,B,D,Y); -+ case 0x33: return SmemPAMassAssembleDiagonal2D<3,3,16>(NE,B,D,Y); -+ case 0x44: return SmemPAMassAssembleDiagonal2D<4,4,8>(NE,B,D,Y); -+ case 0x55: return SmemPAMassAssembleDiagonal2D<5,5,8>(NE,B,D,Y); -+ case 0x66: return SmemPAMassAssembleDiagonal2D<6,6,4>(NE,B,D,Y); -+ case 0x77: return SmemPAMassAssembleDiagonal2D<7,7,4>(NE,B,D,Y); -+ case 0x88: return SmemPAMassAssembleDiagonal2D<8,8,2>(NE,B,D,Y); -+ case 0x99: return SmemPAMassAssembleDiagonal2D<9,9,2>(NE,B,D,Y); -+ default: return PAMassAssembleDiagonal2D(NE,B,D,Y,D1D,Q1D); -+ } -+ } -+ else if (dim == 3) -+ { -+ switch ((D1D << 4 ) | Q1D) -+ { -+ case 0x23: return SmemPAMassAssembleDiagonal3D<2,3>(NE,B,D,Y); -+ case 0x24: return SmemPAMassAssembleDiagonal3D<2,4>(NE,B,D,Y); -+ case 0x26: return SmemPAMassAssembleDiagonal3D<2,6>(NE,B,D,Y); -+ case 0x34: return SmemPAMassAssembleDiagonal3D<3,4>(NE,B,D,Y); -+ case 0x35: return SmemPAMassAssembleDiagonal3D<3,5>(NE,B,D,Y); -+ case 0x45: return SmemPAMassAssembleDiagonal3D<4,5>(NE,B,D,Y); -+ case 0x48: return SmemPAMassAssembleDiagonal3D<4,8>(NE,B,D,Y); -+ case 0x56: return SmemPAMassAssembleDiagonal3D<5,6>(NE,B,D,Y); -+ case 0x67: return SmemPAMassAssembleDiagonal3D<6,7>(NE,B,D,Y); -+ case 0x78: return SmemPAMassAssembleDiagonal3D<7,8>(NE,B,D,Y); -+ case 0x89: return SmemPAMassAssembleDiagonal3D<8,9>(NE,B,D,Y); -+ default: return PAMassAssembleDiagonal3D(NE,B,D,Y,D1D,Q1D); -+ } -+ } -+ MFEM_ABORT("Unknown kernel."); -+} -+ -+#ifdef MFEM_USE_OCCA -+// OCCA PA Mass Apply 2D kernel -+MFEM_HOST_DEVICE inline -+void OccaPAMassApply2D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const Array &B, -+ const Array &Bt, -+ const Vector &D, -+ const Vector &X, -+ Vector &Y) -+{ -+ occa::properties props; -+ props["defines/D1D"] = D1D; -+ props["defines/Q1D"] = Q1D; -+ const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size()); -+ const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size()); -+ const occa::memory o_D = OccaMemoryRead(D.GetMemory(), D.Size()); -+ const occa::memory o_X = OccaMemoryRead(X.GetMemory(), X.Size()); -+ occa::memory o_Y = OccaMemoryReadWrite(Y.GetMemory(), Y.Size()); -+ const occa_id_t id = std::make_pair(D1D,Q1D); -+ if (!Device::Allows(Backend::OCCA_CUDA)) -+ { -+ static occa_kernel_t OccaMassApply2D_cpu; -+ if (OccaMassApply2D_cpu.find(id) == OccaMassApply2D_cpu.end()) -+ { -+ const occa::kernel MassApply2D_CPU = -+ mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl", -+ "MassApply2D_CPU", props); -+ OccaMassApply2D_cpu.emplace(id, MassApply2D_CPU); -+ } -+ OccaMassApply2D_cpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y); -+ } -+ else -+ { -+ static occa_kernel_t OccaMassApply2D_gpu; -+ if (OccaMassApply2D_gpu.find(id) == OccaMassApply2D_gpu.end()) -+ { -+ const occa::kernel MassApply2D_GPU = -+ mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl", -+ "MassApply2D_GPU", props); -+ OccaMassApply2D_gpu.emplace(id, MassApply2D_GPU); -+ } -+ OccaMassApply2D_gpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y); -+ } -+} -+ -+// OCCA PA Mass Apply 3D kernel -+MFEM_HOST_DEVICE inline -+void OccaPAMassApply3D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const Array &B, -+ const Array &Bt, -+ const Vector &D, -+ const Vector &X, -+ Vector &Y) -+{ -+ occa::properties props; -+ props["defines/D1D"] = D1D; -+ props["defines/Q1D"] = Q1D; -+ const occa::memory o_B = OccaMemoryRead(B.GetMemory(), B.Size()); -+ const occa::memory o_Bt = OccaMemoryRead(Bt.GetMemory(), Bt.Size()); -+ const occa::memory o_D = OccaMemoryRead(D.GetMemory(), D.Size()); -+ const occa::memory o_X = OccaMemoryRead(X.GetMemory(), X.Size()); -+ occa::memory o_Y = OccaMemoryReadWrite(Y.GetMemory(), Y.Size()); -+ const occa_id_t id = std::make_pair(D1D,Q1D); -+ if (!Device::Allows(Backend::OCCA_CUDA)) -+ { -+ static occa_kernel_t OccaMassApply3D_cpu; -+ if (OccaMassApply3D_cpu.find(id) == OccaMassApply3D_cpu.end()) -+ { -+ const occa::kernel MassApply3D_CPU = -+ mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl", -+ "MassApply3D_CPU", props); -+ OccaMassApply3D_cpu.emplace(id, MassApply3D_CPU); -+ } -+ OccaMassApply3D_cpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y); -+ } -+ else -+ { -+ static occa_kernel_t OccaMassApply3D_gpu; -+ if (OccaMassApply3D_gpu.find(id) == OccaMassApply3D_gpu.end()) -+ { -+ const occa::kernel MassApply3D_GPU = -+ mfem::OccaDev().buildKernel("occa://mfem/fem/occa.okl", -+ "MassApply3D_GPU", props); -+ OccaMassApply3D_gpu.emplace(id, MassApply3D_GPU); -+ } -+ OccaMassApply3D_gpu.at(id)(NE, o_B, o_Bt, o_D, o_X, o_Y); -+ } -+} -+#endif // MFEM_USE_OCCA -+ -+MFEM_HOST_DEVICE inline -+void PAMassApply1D_Element(const int e, -+ const int NE, -+ const double *b_, -+ const double *bt_, -+ const double *d_, -+ const double *x_, -+ double *y_, -+ const int d1d = 0, -+ const int q1d = 0) -+{ -+ const int D1D = d1d; -+ const int Q1D = q1d; -+ auto B = ConstDeviceMatrix(b_, Q1D, D1D); -+ auto Bt = ConstDeviceMatrix(bt_, D1D, Q1D); -+ auto D = ConstDeviceMatrix(d_, Q1D, NE); -+ auto X = ConstDeviceMatrix(x_, D1D, NE); -+ auto Y = DeviceMatrix(y_, D1D, NE); -+ -+ constexpr int max_Q1D = MAX_Q1D; -+ double XQ[max_Q1D]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ XQ[qx] = 0.0; -+ } -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ const double s = X(dx,e); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ XQ[qx] += B(qx,dx)*s; -+ } -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double q = XQ[qx]*D(qx,e); -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ Y(dx,e) += Bt(dx,qx) * q; -+ } -+ } -+} -+ -+template -+MFEM_HOST_DEVICE inline -+void PAMassApply2D_Element(const int e, -+ const int NE, -+ const double *b_, -+ const double *bt_, -+ const double *d_, -+ const double *x_, -+ double *y_, -+ const int d1d = 0, -+ const int q1d = 0) -+{ -+ const int D1D = d1d; -+ const int Q1D = q1d; -+ auto B = ConstDeviceMatrix(b_, Q1D, D1D); -+ auto Bt = ConstDeviceMatrix(bt_, D1D, Q1D); -+ auto D = ConstDeviceCube(d_, Q1D, Q1D, NE); -+ auto X = ConstDeviceCube(x_, D1D, D1D, NE); -+ auto Y = DeviceCube(y_, D1D, D1D, NE); -+ -+ if (!ACCUMULATE) -+ { -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ Y(dx, dy, e) = 0.0; -+ } -+ } -+ } -+ -+ constexpr int max_D1D = MAX_D1D; -+ constexpr int max_Q1D = MAX_Q1D; -+ double sol_xy[max_Q1D][max_Q1D]; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ sol_xy[qy][qx] = 0.0; -+ } -+ } -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ double sol_x[max_Q1D]; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ sol_x[qy] = 0.0; -+ } -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ const double s = X(dx,dy,e); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ sol_x[qx] += B(qx,dx)* s; -+ } -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double d2q = B(qy,dy); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ sol_xy[qy][qx] += d2q * sol_x[qx]; -+ } -+ } -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ sol_xy[qy][qx] *= D(qx,qy,e); -+ } -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ double sol_x[max_D1D]; -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ sol_x[dx] = 0.0; -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double s = sol_xy[qy][qx]; -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ sol_x[dx] += Bt(dx,qx) * s; -+ } -+ } -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ const double q2d = Bt(dy,qy); -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ Y(dx,dy,e) += q2d * sol_x[dx]; -+ } -+ } -+ } -+} -+ -+template -+MFEM_HOST_DEVICE inline -+void SmemPAMassApply2D_Element(const int e, -+ const int NE, -+ const double *b_, -+ const double *d_, -+ const double *x_, -+ double *y_, -+ int d1d = 0, -+ int q1d = 0) -+{ -+ const int D1D = T_D1D ? T_D1D : d1d; -+ const int Q1D = T_Q1D ? T_Q1D : q1d; -+ constexpr int NBZ = T_NBZ ? T_NBZ : 1; -+ -+ constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -+ constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -+ constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1; -+ -+ auto b = ConstDeviceMatrix(b_, Q1D, D1D); -+ auto D = ConstDeviceCube(d_, Q1D, Q1D, NE); -+ auto x = ConstDeviceCube(x_, D1D, D1D, NE); -+ auto Y = DeviceCube(y_, D1D, D1D, NE); -+ -+ const int tidz = MFEM_THREAD_ID(z); -+ -+ MFEM_SHARED double BBt[MQ1*MD1]; -+ double (*B)[MD1] = (double (*)[MD1]) BBt; -+ double (*Bt)[MQ1] = (double (*)[MQ1]) BBt; -+ MFEM_SHARED double sm0[NBZ][MDQ*MDQ]; -+ MFEM_SHARED double sm1[NBZ][MDQ*MDQ]; -+ double (*X)[MD1] = (double (*)[MD1]) (sm0 + tidz); -+ double (*DQ)[MQ1] = (double (*)[MQ1]) (sm1 + tidz); -+ double (*QQ)[MQ1] = (double (*)[MQ1]) (sm0 + tidz); -+ double (*QD)[MD1] = (double (*)[MD1]) (sm1 + tidz); -+ -+ MFEM_FOREACH_THREAD(dy,y,D1D) -+ { -+ MFEM_FOREACH_THREAD(dx,x,D1D) -+ { -+ X[dy][dx] = x(dx,dy,e); -+ } -+ } -+ if (tidz == 0) -+ { -+ MFEM_FOREACH_THREAD(dy,y,D1D) -+ { -+ MFEM_FOREACH_THREAD(q,x,Q1D) -+ { -+ B[q][dy] = b(q,dy); -+ } -+ } -+ } -+ MFEM_SYNC_THREAD; -+ MFEM_FOREACH_THREAD(dy,y,D1D) -+ { -+ MFEM_FOREACH_THREAD(qx,x,Q1D) -+ { -+ double dq = 0.0; -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ dq += X[dy][dx] * B[qx][dx]; -+ } -+ DQ[dy][qx] = dq; -+ } -+ } -+ MFEM_SYNC_THREAD; -+ MFEM_FOREACH_THREAD(qy,y,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qx,x,Q1D) -+ { -+ double qq = 0.0; -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ qq += DQ[dy][qx] * B[qy][dy]; -+ } -+ QQ[qy][qx] = qq * D(qx, qy, e); -+ } -+ } -+ MFEM_SYNC_THREAD; -+ if (tidz == 0) -+ { -+ MFEM_FOREACH_THREAD(dy,y,D1D) -+ { -+ MFEM_FOREACH_THREAD(q,x,Q1D) -+ { -+ Bt[dy][q] = b(q,dy); -+ } -+ } -+ } -+ MFEM_SYNC_THREAD; -+ MFEM_FOREACH_THREAD(qy,y,Q1D) -+ { -+ MFEM_FOREACH_THREAD(dx,x,D1D) -+ { -+ double dq = 0.0; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ dq += QQ[qy][qx] * Bt[dx][qx]; -+ } -+ QD[qy][dx] = dq; -+ } -+ } -+ MFEM_SYNC_THREAD; -+ MFEM_FOREACH_THREAD(dy,y,D1D) -+ { -+ MFEM_FOREACH_THREAD(dx,x,D1D) -+ { -+ double dd = 0.0; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ dd += (QD[qy][dx] * Bt[dy][qy]); -+ } -+ if (ACCUMULATE) -+ { -+ Y(dx, dy, e) += dd; -+ } -+ else -+ { -+ Y(dx, dy, e) = dd; -+ } -+ } -+ } -+} -+ -+template -+MFEM_HOST_DEVICE inline -+void PAMassApply3D_Element(const int e, -+ const int NE, -+ const double *b_, -+ const double *bt_, -+ const double *d_, -+ const double *x_, -+ double *y_, -+ const int d1d, -+ const int q1d) -+{ -+ const int D1D = d1d; -+ const int Q1D = q1d; -+ auto B = ConstDeviceMatrix(b_, Q1D, D1D); -+ auto Bt = ConstDeviceMatrix(bt_, D1D, Q1D); -+ auto D = DeviceTensor<4,const double>(d_, Q1D, Q1D, Q1D, NE); -+ auto X = DeviceTensor<4,const double>(x_, D1D, D1D, D1D, NE); -+ auto Y = DeviceTensor<4,double>(y_, D1D, D1D, D1D, NE); -+ -+ if (!ACCUMULATE) -+ { -+ for (int dz = 0; dz < D1D; ++dz) -+ { -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ Y(dx, dy, dz, e) = 0.0; -+ } -+ } -+ } -+ } -+ -+ constexpr int max_D1D = MAX_D1D; -+ constexpr int max_Q1D = MAX_Q1D; -+ double sol_xyz[max_Q1D][max_Q1D][max_Q1D]; -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ sol_xyz[qz][qy][qx] = 0.0; -+ } -+ } -+ } -+ for (int dz = 0; dz < D1D; ++dz) -+ { -+ double sol_xy[max_Q1D][max_Q1D]; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ sol_xy[qy][qx] = 0.0; -+ } -+ } -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ double sol_x[max_Q1D]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ sol_x[qx] = 0; -+ } -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ const double s = X(dx,dy,dz,e); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ sol_x[qx] += B(qx,dx) * s; -+ } -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = B(qy,dy); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ sol_xy[qy][qx] += wy * sol_x[qx]; -+ } -+ } -+ } -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ const double wz = B(qz,dz); -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ sol_xyz[qz][qy][qx] += wz * sol_xy[qy][qx]; -+ } -+ } -+ } -+ } -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ sol_xyz[qz][qy][qx] *= D(qx,qy,qz,e); -+ } -+ } -+ } -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ double sol_xy[max_D1D][max_D1D]; -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ sol_xy[dy][dx] = 0; -+ } -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ double sol_x[max_D1D]; -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ sol_x[dx] = 0; -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double s = sol_xyz[qz][qy][qx]; -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ sol_x[dx] += Bt(dx,qx) * s; -+ } -+ } -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ const double wy = Bt(dy,qy); -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ sol_xy[dy][dx] += wy * sol_x[dx]; -+ } -+ } -+ } -+ for (int dz = 0; dz < D1D; ++dz) -+ { -+ const double wz = Bt(dz,qz); -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ Y(dx,dy,dz,e) += wz * sol_xy[dy][dx]; -+ } -+ } -+ } -+ } -+} -+ -+template -+MFEM_HOST_DEVICE inline -+void SmemPAMassApply3D_Element(const int e, -+ const int NE, -+ const double *b_, -+ const double *d_, -+ const double *x_, -+ double *y_, -+ const int d1d = 0, -+ const int q1d = 0) -+{ -+ constexpr int D1D = T_D1D ? T_D1D : d1d; -+ constexpr int Q1D = T_Q1D ? T_Q1D : q1d; -+ constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -+ constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -+ constexpr int MDQ = (MQ1 > MD1) ? MQ1 : MD1; -+ -+ auto b = ConstDeviceMatrix(b_, Q1D, D1D); -+ auto d = DeviceTensor<4,const double>(d_, Q1D, Q1D, Q1D, NE); -+ auto x = DeviceTensor<4,const double>(x_, D1D, D1D, D1D, NE); -+ auto y = DeviceTensor<4,double>(y_, D1D, D1D, D1D, NE); -+ -+ MFEM_SHARED double sDQ[MQ1*MD1]; -+ double (*B)[MD1] = (double (*)[MD1]) sDQ; -+ double (*Bt)[MQ1] = (double (*)[MQ1]) sDQ; -+ MFEM_SHARED double sm0[MDQ*MDQ*MDQ]; -+ MFEM_SHARED double sm1[MDQ*MDQ*MDQ]; -+ double (*X)[MD1][MD1] = (double (*)[MD1][MD1]) sm0; -+ double (*DDQ)[MD1][MQ1] = (double (*)[MD1][MQ1]) sm1; -+ double (*DQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm0; -+ double (*QQQ)[MQ1][MQ1] = (double (*)[MQ1][MQ1]) sm1; -+ double (*QQD)[MQ1][MD1] = (double (*)[MQ1][MD1]) sm0; -+ double (*QDD)[MD1][MD1] = (double (*)[MD1][MD1]) sm1; -+ MFEM_FOREACH_THREAD(dy,y,D1D) -+ { -+ MFEM_FOREACH_THREAD(dx,x,D1D) -+ { -+ MFEM_UNROLL(MD1) -+ for (int dz = 0; dz < D1D; ++dz) -+ { -+ X[dz][dy][dx] = x(dx,dy,dz,e); -+ } -+ } -+ MFEM_FOREACH_THREAD(dx,x,Q1D) -+ { -+ B[dx][dy] = b(dx,dy); -+ } -+ } -+ MFEM_SYNC_THREAD; -+ MFEM_FOREACH_THREAD(dy,y,D1D) -+ { -+ MFEM_FOREACH_THREAD(qx,x,Q1D) -+ { -+ double u[D1D]; -+ MFEM_UNROLL(MD1) -+ for (int dz = 0; dz < D1D; dz++) -+ { -+ u[dz] = 0; -+ } -+ MFEM_UNROLL(MD1) -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ MFEM_UNROLL(MD1) -+ for (int dz = 0; dz < D1D; ++dz) -+ { -+ u[dz] += X[dz][dy][dx] * B[qx][dx]; -+ } -+ } -+ MFEM_UNROLL(MD1) -+ for (int dz = 0; dz < D1D; ++dz) -+ { -+ DDQ[dz][dy][qx] = u[dz]; -+ } -+ } -+ } -+ MFEM_SYNC_THREAD; -+ MFEM_FOREACH_THREAD(qy,y,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qx,x,Q1D) -+ { -+ double u[D1D]; -+ MFEM_UNROLL(MD1) -+ for (int dz = 0; dz < D1D; dz++) -+ { -+ u[dz] = 0; -+ } -+ MFEM_UNROLL(MD1) -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ MFEM_UNROLL(MD1) -+ for (int dz = 0; dz < D1D; dz++) -+ { -+ u[dz] += DDQ[dz][dy][qx] * B[qy][dy]; -+ } -+ } -+ MFEM_UNROLL(MD1) -+ for (int dz = 0; dz < D1D; dz++) -+ { -+ DQQ[dz][qy][qx] = u[dz]; -+ } -+ } -+ } -+ MFEM_SYNC_THREAD; -+ MFEM_FOREACH_THREAD(qy,y,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qx,x,Q1D) -+ { -+ double u[Q1D]; -+ MFEM_UNROLL(MQ1) -+ for (int qz = 0; qz < Q1D; qz++) -+ { -+ u[qz] = 0; -+ } -+ MFEM_UNROLL(MD1) -+ for (int dz = 0; dz < D1D; ++dz) -+ { -+ MFEM_UNROLL(MQ1) -+ for (int qz = 0; qz < Q1D; qz++) -+ { -+ u[qz] += DQQ[dz][qy][qx] * B[qz][dz]; -+ } -+ } -+ MFEM_UNROLL(MQ1) -+ for (int qz = 0; qz < Q1D; qz++) -+ { -+ QQQ[qz][qy][qx] = u[qz] * d(qx,qy,qz,e); -+ } -+ } -+ } -+ MFEM_SYNC_THREAD; -+ MFEM_FOREACH_THREAD(di,y,D1D) -+ { -+ MFEM_FOREACH_THREAD(q,x,Q1D) -+ { -+ Bt[di][q] = b(q,di); -+ } -+ } -+ MFEM_SYNC_THREAD; -+ MFEM_FOREACH_THREAD(qy,y,Q1D) -+ { -+ MFEM_FOREACH_THREAD(dx,x,D1D) -+ { -+ double u[Q1D]; -+ MFEM_UNROLL(MQ1) -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ u[qz] = 0; -+ } -+ MFEM_UNROLL(MQ1) -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ MFEM_UNROLL(MQ1) -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ u[qz] += QQQ[qz][qy][qx] * Bt[dx][qx]; -+ } -+ } -+ MFEM_UNROLL(MQ1) -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ QQD[qz][qy][dx] = u[qz]; -+ } -+ } -+ } -+ MFEM_SYNC_THREAD; -+ MFEM_FOREACH_THREAD(dy,y,D1D) -+ { -+ MFEM_FOREACH_THREAD(dx,x,D1D) -+ { -+ double u[Q1D]; -+ MFEM_UNROLL(MQ1) -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ u[qz] = 0; -+ } -+ MFEM_UNROLL(MQ1) -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ MFEM_UNROLL(MQ1) -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ u[qz] += QQD[qz][qy][dx] * Bt[dy][qy]; -+ } -+ } -+ MFEM_UNROLL(MQ1) -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ QDD[qz][dy][dx] = u[qz]; -+ } -+ } -+ } -+ MFEM_SYNC_THREAD; -+ MFEM_FOREACH_THREAD(dy,y,D1D) -+ { -+ MFEM_FOREACH_THREAD(dx,x,D1D) -+ { -+ double u[D1D]; -+ MFEM_UNROLL(MD1) -+ for (int dz = 0; dz < D1D; ++dz) -+ { -+ u[dz] = 0; -+ } -+ MFEM_UNROLL(MQ1) -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ MFEM_UNROLL(MD1) -+ for (int dz = 0; dz < D1D; ++dz) -+ { -+ u[dz] += QDD[qz][dy][dx] * Bt[dz][qz]; -+ } -+ } -+ MFEM_UNROLL(MD1) -+ for (int dz = 0; dz < D1D; ++dz) -+ { -+ if (ACCUMULATE) -+ { -+ y(dx,dy,dz,e) += u[dz]; -+ } -+ else -+ { -+ y(dx,dy,dz,e) = u[dz]; -+ } -+ } -+ } -+ } -+ MFEM_SYNC_THREAD; -+} -+ -+MFEM_HOST_DEVICE inline -+void PAMassApply1D(const int NE, -+ const Array &b_, -+ const Array &bt_, -+ const Vector &d_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) -+{ -+ MFEM_VERIFY(d1d <= MAX_D1D, ""); -+ MFEM_VERIFY(q1d <= MAX_Q1D, ""); -+ -+ const auto B = b_.Read(); -+ const auto Bt = bt_.Read(); -+ const auto D = d_.Read(); -+ const auto X = x_.Read(); -+ auto Y = y_.ReadWrite(); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ internal::PAMassApply1D_Element(e, NE, B, Bt, D, X, Y, d1d, q1d); -+ }); -+} -+ -+template -+MFEM_HOST_DEVICE inline -+void PAMassApply2D(const int NE, -+ const Array &b_, -+ const Array &bt_, -+ const Vector &d_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) -+{ -+ MFEM_VERIFY(T_D1D ? T_D1D : d1d <= MAX_D1D, ""); -+ MFEM_VERIFY(T_Q1D ? T_Q1D : q1d <= MAX_Q1D, ""); -+ -+ const auto B = b_.Read(); -+ const auto Bt = bt_.Read(); -+ const auto D = d_.Read(); -+ const auto X = x_.Read(); -+ auto Y = y_.ReadWrite(); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ internal::PAMassApply2D_Element(e, NE, B, Bt, D, X, Y, d1d, q1d); -+ }); -+} -+ -+template -+MFEM_HOST_DEVICE inline -+void SmemPAMassApply2D(const int NE, -+ const Array &b_, -+ const Array &bt_, -+ const Vector &d_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) -+{ -+ MFEM_CONTRACT_VAR(bt_); -+ const int D1D = T_D1D ? T_D1D : d1d; -+ const int Q1D = T_Q1D ? T_Q1D : q1d; -+ constexpr int NBZ = T_NBZ ? T_NBZ : 1; -+ constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -+ constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -+ MFEM_VERIFY(D1D <= MD1, ""); -+ MFEM_VERIFY(Q1D <= MQ1, ""); -+ const auto b = b_.Read(); -+ const auto D = d_.Read(); -+ const auto x = x_.Read(); -+ auto Y = y_.ReadWrite(); -+ mfem::forall_2D_batch(NE, Q1D, Q1D, NBZ, [=] MFEM_HOST_DEVICE (int e) -+ { -+ internal::SmemPAMassApply2D_Element(e, NE, b, D, x, Y, d1d, -+ q1d); -+ }); -+} -+ -+template -+MFEM_HOST_DEVICE inline -+void PAMassApply3D(const int NE, -+ const Array &b_, -+ const Array &bt_, -+ const Vector &d_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) -+{ -+ MFEM_VERIFY(T_D1D ? T_D1D : d1d <= MAX_D1D, ""); -+ MFEM_VERIFY(T_Q1D ? T_Q1D : q1d <= MAX_Q1D, ""); -+ -+ const auto B = b_.Read(); -+ const auto Bt = bt_.Read(); -+ const auto D = d_.Read(); -+ const auto X = x_.Read(); -+ auto Y = y_.ReadWrite(); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ internal::PAMassApply3D_Element(e, NE, B, Bt, D, X, Y, d1d, q1d); -+ }); -+} -+ -+template -+MFEM_HOST_DEVICE inline -+void SmemPAMassApply3D(const int NE, -+ const Array &b_, -+ const Array &bt_, -+ const Vector &d_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0) -+{ -+ MFEM_CONTRACT_VAR(bt_); -+ const int D1D = T_D1D ? T_D1D : d1d; -+ const int Q1D = T_Q1D ? T_Q1D : q1d; -+ constexpr int M1Q = T_Q1D ? T_Q1D : MAX_Q1D; -+ constexpr int M1D = T_D1D ? T_D1D : MAX_D1D; -+ MFEM_VERIFY(D1D <= M1D, ""); -+ MFEM_VERIFY(Q1D <= M1Q, ""); -+ auto b = b_.Read(); -+ auto d = d_.Read(); -+ auto x = x_.Read(); -+ auto y = y_.ReadWrite(); -+ mfem::forall_2D(NE, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e) -+ { -+ internal::SmemPAMassApply3D_Element(e, NE, b, d, x, y, d1d, q1d); -+ }); -+} -+ -+MFEM_HOST_DEVICE inline -+void PAMassApply(const int dim, -+ const int D1D, -+ const int Q1D, -+ const int NE, -+ const Array &B, -+ const Array &Bt, -+ const Vector &D, -+ const Vector &X, -+ Vector &Y) -+{ -+#ifdef MFEM_USE_OCCA -+ if (DeviceCanUseOcca()) -+ { -+ if (dim == 2) -+ { -+ return OccaPAMassApply2D(D1D,Q1D,NE,B,Bt,D,X,Y); -+ } -+ if (dim == 3) -+ { -+ return OccaPAMassApply3D(D1D,Q1D,NE,B,Bt,D,X,Y); -+ } -+ MFEM_ABORT("OCCA PA Mass Apply unknown kernel!"); -+ } -+#endif // MFEM_USE_OCCA -+ const int id = (D1D << 4) | Q1D; -+ -+ if (dim == 1) -+ { -+ return PAMassApply1D(NE,B,Bt,D,X,Y,D1D,Q1D); -+ } -+ else if (dim == 2) -+ { -+ switch (id) -+ { -+ case 0x22: return SmemPAMassApply2D<2,2,16>(NE,B,Bt,D,X,Y); -+ case 0x24: return SmemPAMassApply2D<2,4,16>(NE,B,Bt,D,X,Y); -+ case 0x33: return SmemPAMassApply2D<3,3,16>(NE,B,Bt,D,X,Y); -+ case 0x34: return SmemPAMassApply2D<3,4,16>(NE,B,Bt,D,X,Y); -+ case 0x35: return SmemPAMassApply2D<3,5,16>(NE,B,Bt,D,X,Y); -+ case 0x36: return SmemPAMassApply2D<3,6,16>(NE,B,Bt,D,X,Y); -+ case 0x44: return SmemPAMassApply2D<4,4,8>(NE,B,Bt,D,X,Y); -+ case 0x46: return SmemPAMassApply2D<4,6,8>(NE,B,Bt,D,X,Y); -+ case 0x48: return SmemPAMassApply2D<4,8,4>(NE,B,Bt,D,X,Y); -+ case 0x55: return SmemPAMassApply2D<5,5,8>(NE,B,Bt,D,X,Y); -+ case 0x57: return SmemPAMassApply2D<5,7,8>(NE,B,Bt,D,X,Y); -+ case 0x58: return SmemPAMassApply2D<5,8,2>(NE,B,Bt,D,X,Y); -+ case 0x66: return SmemPAMassApply2D<6,6,4>(NE,B,Bt,D,X,Y); -+ case 0x77: return SmemPAMassApply2D<7,7,4>(NE,B,Bt,D,X,Y); -+ case 0x88: return SmemPAMassApply2D<8,8,2>(NE,B,Bt,D,X,Y); -+ case 0x99: return SmemPAMassApply2D<9,9,2>(NE,B,Bt,D,X,Y); -+ default: return PAMassApply2D(NE,B,Bt,D,X,Y,D1D,Q1D); -+ } -+ } -+ else if (dim == 3) -+ { -+ switch (id) -+ { -+ case 0x22: return SmemPAMassApply3D<2,2>(NE,B,Bt,D,X,Y); -+ case 0x23: return SmemPAMassApply3D<2,3>(NE,B,Bt,D,X,Y); -+ case 0x24: return SmemPAMassApply3D<2,4>(NE,B,Bt,D,X,Y); -+ case 0x26: return SmemPAMassApply3D<2,6>(NE,B,Bt,D,X,Y); -+ case 0x34: return SmemPAMassApply3D<3,4>(NE,B,Bt,D,X,Y); -+ case 0x35: return SmemPAMassApply3D<3,5>(NE,B,Bt,D,X,Y); -+ case 0x36: return SmemPAMassApply3D<3,6>(NE,B,Bt,D,X,Y); -+ case 0x37: return SmemPAMassApply3D<3,7>(NE,B,Bt,D,X,Y); -+ case 0x45: return SmemPAMassApply3D<4,5>(NE,B,Bt,D,X,Y); -+ case 0x46: return SmemPAMassApply3D<4,6>(NE,B,Bt,D,X,Y); -+ case 0x48: return SmemPAMassApply3D<4,8>(NE,B,Bt,D,X,Y); -+ case 0x56: return SmemPAMassApply3D<5,6>(NE,B,Bt,D,X,Y); -+ case 0x58: return SmemPAMassApply3D<5,8>(NE,B,Bt,D,X,Y); -+ case 0x67: return SmemPAMassApply3D<6,7>(NE,B,Bt,D,X,Y); -+ case 0x78: return SmemPAMassApply3D<7,8>(NE,B,Bt,D,X,Y); -+ case 0x89: return SmemPAMassApply3D<8,9>(NE,B,Bt,D,X,Y); -+ case 0x9A: return SmemPAMassApply3D<9,10>(NE,B,Bt,D,X,Y); -+ default: return PAMassApply3D(NE,B,Bt,D,X,Y,D1D,Q1D); -+ } -+ } -+ mfem::out << "Unknown kernel 0x" << std::hex << id << std::endl; -+ MFEM_ABORT("Unknown kernel."); -+} -+ -+} // namespace internal -+ -+} // namespace mfem -+ -+#endif -diff --git a/fem/bilininteg_mass_mf.cpp b/fem/integ/bilininteg_mass_mf.cpp -similarity index 92% -rename from fem/bilininteg_mass_mf.cpp -rename to fem/integ/bilininteg_mass_mf.cpp -index 2a89c0ce6..34a118b6d 100644 ---- a/fem/bilininteg_mass_mf.cpp -+++ b/fem/integ/bilininteg_mass_mf.cpp -@@ -9,12 +9,10 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "../general/forall.hpp" --#include "bilininteg.hpp" --#include "gridfunc.hpp" --#include "ceed/integrators/mass/mass.hpp" -- --using namespace std; -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../ceed/integrators/mass/mass.hpp" - - namespace mfem - { -@@ -22,7 +20,6 @@ namespace mfem - void MassIntegrator::AssembleMF(const FiniteElementSpace &fes) - { - // Assuming the same element type -- fespace = &fes; - Mesh *mesh = fes.GetMesh(); - if (mesh->GetNE() == 0) { return; } - const FiniteElement &el = *fes.GetFE(0); -diff --git a/fem/integ/bilininteg_mass_pa.cpp b/fem/integ/bilininteg_mass_pa.cpp -new file mode 100644 -index 000000000..ffdec1edb ---- /dev/null -+++ b/fem/integ/bilininteg_mass_pa.cpp -@@ -0,0 +1,220 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../qfunction.hpp" -+#include "../ceed/integrators/mass/mass.hpp" -+#include "bilininteg_mass_kernels.hpp" -+ -+namespace mfem -+{ -+ -+void MassIntegrator::AssemblePA(const FiniteElementSpace &fes) -+{ -+ const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ? -+ Device::GetDeviceMemoryType() : pa_mt; -+ -+ // Assuming the same element type -+ Mesh *mesh = fes.GetMesh(); -+ if (mesh->GetNE() == 0) { return; } -+ const FiniteElement &el = *fes.GetFE(0); -+ ElementTransformation *T0 = mesh->GetElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el, *T0); -+ if (DeviceCanUseCeed()) -+ { -+ delete ceedOp; -+ const bool mixed = mesh->GetNumGeometries(mesh->Dimension()) > 1 || -+ fes.IsVariableOrder(); -+ if (mixed) -+ { -+ ceedOp = new ceed::MixedPAMassIntegrator(*this, fes, Q); -+ } -+ else -+ { -+ ceedOp = new ceed::PAMassIntegrator(fes, *ir, Q); -+ } -+ return; -+ } -+ int map_type = el.GetMapType(); -+ dim = mesh->Dimension(); -+ ne = fes.GetMesh()->GetNE(); -+ nq = ir->GetNPoints(); -+ geom = mesh->GetGeometricFactors(*ir, GeometricFactors::DETERMINANTS, mt); -+ maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR); -+ dofs1D = maps->ndof; -+ quad1D = maps->nqpt; -+ pa_data.SetSize(ne*nq, mt); -+ -+ QuadratureSpace qs(*mesh, *ir); -+ CoefficientVector coeff(Q, qs, CoefficientStorage::COMPRESSED); -+ -+ if (dim==1) { MFEM_ABORT("Not supported yet... stay tuned!"); } -+ if (dim==2) -+ { -+ const int NE = ne; -+ const int Q1D = quad1D; -+ const bool const_c = coeff.Size() == 1; -+ const bool by_val = map_type == FiniteElement::VALUE; -+ const auto W = Reshape(ir->GetWeights().Read(), Q1D,Q1D); -+ const auto J = Reshape(geom->detJ.Read(), Q1D,Q1D,NE); -+ const auto C = const_c ? Reshape(coeff.Read(), 1,1,1) : -+ Reshape(coeff.Read(), Q1D,Q1D,NE); -+ auto v = Reshape(pa_data.Write(), Q1D,Q1D, NE); -+ mfem::forall_2D(NE, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e) -+ { -+ MFEM_FOREACH_THREAD(qx,x,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qy,y,Q1D) -+ { -+ const double detJ = J(qx,qy,e); -+ const double coeff = const_c ? C(0,0,0) : C(qx,qy,e); -+ v(qx,qy,e) = W(qx,qy) * coeff * (by_val ? detJ : 1.0/detJ); -+ } -+ } -+ }); -+ } -+ if (dim==3) -+ { -+ const int NE = ne; -+ const int Q1D = quad1D; -+ const bool const_c = coeff.Size() == 1; -+ const bool by_val = map_type == FiniteElement::VALUE; -+ const auto W = Reshape(ir->GetWeights().Read(), Q1D,Q1D,Q1D); -+ const auto J = Reshape(geom->detJ.Read(), Q1D,Q1D,Q1D,NE); -+ const auto C = const_c ? Reshape(coeff.Read(), 1,1,1,1) : -+ Reshape(coeff.Read(), Q1D,Q1D,Q1D,NE); -+ auto v = Reshape(pa_data.Write(), Q1D,Q1D,Q1D,NE); -+ mfem::forall_3D(NE, Q1D, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e) -+ { -+ MFEM_FOREACH_THREAD(qx,x,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qy,y,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qz,z,Q1D) -+ { -+ const double detJ = J(qx,qy,qz,e); -+ const double coeff = const_c ? C(0,0,0,0) : C(qx,qy,qz,e); -+ v(qx,qy,qz,e) = W(qx,qy,qz) * coeff * (by_val ? detJ : 1.0/detJ); -+ } -+ } -+ } -+ }); -+ } -+} -+ -+void MassIntegrator::AssemblePABoundary(const FiniteElementSpace &fes) -+{ -+ const MemoryType mt = (pa_mt == MemoryType::DEFAULT) ? -+ Device::GetDeviceMemoryType() : pa_mt; -+ -+ // Assuming the same element type -+ Mesh *mesh = fes.GetMesh(); -+ if (mesh->GetNBE() == 0) { return; } -+ const FiniteElement &el = *fes.GetBE(0); -+ ElementTransformation *T0 = mesh->GetBdrElementTransformation(0); -+ const IntegrationRule *ir = IntRule ? IntRule : &GetRule(el, el, *T0); -+ -+ int map_type = el.GetMapType(); -+ dim = el.GetDim(); // Dimension of the boundary element, *not* the mesh -+ ne = fes.GetMesh()->GetNBE(); -+ nq = ir->GetNPoints(); -+ face_geom = mesh->GetFaceGeometricFactors(*ir, GeometricFactors::DETERMINANTS, -+ FaceType::Boundary, mt); -+ maps = &el.GetDofToQuad(*ir, DofToQuad::TENSOR); -+ dofs1D = maps->ndof; -+ quad1D = maps->nqpt; -+ pa_data.SetSize(ne*nq, mt); -+ -+ FaceQuadratureSpace qs(*mesh, *ir, FaceType::Boundary); -+ CoefficientVector coeff(Q, qs, CoefficientStorage::COMPRESSED); -+ -+ const int NE = ne; -+ const int Q1D = quad1D; -+ const bool const_c = coeff.Size() == 1; -+ const bool by_val = map_type == FiniteElement::VALUE; -+ if (dim==1) -+ { -+ const auto W = Reshape(ir->GetWeights().Read(), Q1D); -+ const auto J = Reshape(face_geom->detJ.Read(), Q1D, NE); -+ const auto C = const_c ? Reshape(coeff.Read(), 1, 1) : -+ Reshape(coeff.Read(), Q1D, NE); -+ auto v = Reshape(pa_data.Write(), Q1D, NE); -+ mfem::forall_2D(NE, Q1D, 1, [=] MFEM_HOST_DEVICE (int e) -+ { -+ MFEM_FOREACH_THREAD(qx,x,Q1D) -+ { -+ const double detJ = J(qx,e); -+ const double coeff = const_c ? C(0,0) : C(qx,e); -+ v(qx,e) = W(qx) * coeff * (by_val ? detJ : 1.0/detJ); -+ } -+ }); -+ } -+ else if (dim==2) -+ { -+ const auto W = Reshape(ir->GetWeights().Read(), Q1D,Q1D); -+ const auto J = Reshape(face_geom->detJ.Read(), Q1D,Q1D,NE); -+ const auto C = const_c ? Reshape(coeff.Read(), 1,1,1) : -+ Reshape(coeff.Read(), Q1D,Q1D,NE); -+ auto v = Reshape(pa_data.Write(), Q1D,Q1D, NE); -+ mfem::forall_2D(NE, Q1D, Q1D, [=] MFEM_HOST_DEVICE (int e) -+ { -+ MFEM_FOREACH_THREAD(qx,x,Q1D) -+ { -+ MFEM_FOREACH_THREAD(qy,y,Q1D) -+ { -+ const double detJ = J(qx,qy,e); -+ const double coeff = const_c ? C(0,0,0) : C(qx,qy,e); -+ v(qx,qy,e) = W(qx,qy) * coeff * (by_val ? detJ : 1.0/detJ); -+ } -+ } -+ }); -+ } -+ else -+ { -+ MFEM_ABORT("Not supported."); -+ } -+} -+ -+void MassIntegrator::AssembleDiagonalPA(Vector &diag) -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->GetDiagonal(diag); -+ } -+ else -+ { -+ internal::PAMassAssembleDiagonal(dim, dofs1D, quad1D, ne, maps->B, pa_data, -+ diag); -+ } -+} -+ -+void MassIntegrator::AddMultPA(const Vector &x, Vector &y) const -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->AddMult(x, y); -+ } -+ else -+ { -+ internal::PAMassApply(dim, dofs1D, quad1D, ne, maps->B, maps->Bt, pa_data, x, -+ y); -+ } -+} -+ -+void MassIntegrator::AddMultTransposePA(const Vector &x, Vector &y) const -+{ -+ // Mass integrator is symmetric -+ AddMultPA(x, y); -+} -+ -+} // namespace mfem -diff --git a/fem/integ/bilininteg_mixedcurl_pa.cpp b/fem/integ/bilininteg_mixedcurl_pa.cpp -new file mode 100644 -index 000000000..3d70bc4c9 ---- /dev/null -+++ b/fem/integ/bilininteg_mixedcurl_pa.cpp -@@ -0,0 +1,424 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../qfunction.hpp" -+#include "bilininteg_hcurl_kernels.hpp" -+#include "bilininteg_hcurlhdiv_kernels.hpp" -+ -+namespace mfem -+{ -+ -+void MixedScalarCurlIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) -+{ -+ // Assumes tensor-product elements -+ Mesh *mesh = trial_fes.GetMesh(); -+ const FiniteElement *fel = trial_fes.GetFE(0); // In H(curl) -+ const FiniteElement *eltest = test_fes.GetFE(0); // In scalar space -+ -+ const VectorTensorFiniteElement *el = -+ dynamic_cast(fel); -+ MFEM_VERIFY(el != NULL, "Only VectorTensorFiniteElement is supported!"); -+ -+ if (el->GetDerivType() != mfem::FiniteElement::CURL) -+ { -+ MFEM_ABORT("Unknown kernel."); -+ } -+ -+ const IntegrationRule *ir -+ = IntRule ? IntRule : &MassIntegrator::GetRule(*eltest, *eltest, -+ *mesh->GetElementTransformation(0)); -+ -+ const int dims = el->GetDim(); -+ MFEM_VERIFY(dims == 2, ""); -+ -+ const int nq = ir->GetNPoints(); -+ dim = mesh->Dimension(); -+ MFEM_VERIFY(dim == 2, ""); -+ -+ ne = test_fes.GetNE(); -+ mapsC = &el->GetDofToQuad(*ir, DofToQuad::TENSOR); -+ mapsO = &el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); -+ dofs1D = mapsC->ndof; -+ quad1D = mapsC->nqpt; -+ -+ MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, ""); -+ -+ if (el->GetOrder() == eltest->GetOrder()) -+ { -+ dofs1Dtest = dofs1D; -+ } -+ else -+ { -+ dofs1Dtest = dofs1D - 1; -+ } -+ -+ pa_data.SetSize(nq * ne, Device::GetMemoryType()); -+ -+ QuadratureSpace qs(*mesh, *ir); -+ CoefficientVector coeff(Q, qs, CoefficientStorage::FULL); -+ -+ if (dim == 2) -+ { -+ internal::PAHcurlL2Setup2D(quad1D, ne, ir->GetWeights(), coeff, pa_data); -+ } -+ else -+ { -+ MFEM_ABORT("Unsupported dimension!"); -+ } -+} -+ -+void MixedScalarCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const -+{ -+ if (dim == 2) -+ { -+ internal::PAHcurlL2Apply2D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B, -+ mapsO->Bt, mapsC->Bt, mapsC->G, pa_data, -+ x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Unsupported dimension!"); -+ } -+} -+ -+void MixedScalarCurlIntegrator::AddMultTransposePA(const Vector &x, -+ Vector &y) const -+{ -+ if (dim == 2) -+ { -+ internal::PAHcurlL2ApplyTranspose2D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B, -+ mapsO->Bt, mapsC->B, mapsC->Gt, pa_data, -+ x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Unsupported dimension!"); -+ } -+} -+ -+void MixedVectorCurlIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) -+{ -+ // Assumes tensor-product elements, with vector test and trial spaces. -+ Mesh *mesh = trial_fes.GetMesh(); -+ const FiniteElement *trial_fel = trial_fes.GetFE(0); -+ const FiniteElement *test_fel = test_fes.GetFE(0); -+ -+ const VectorTensorFiniteElement *trial_el = -+ dynamic_cast(trial_fel); -+ MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!"); -+ -+ const VectorTensorFiniteElement *test_el = -+ dynamic_cast(test_fel); -+ MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!"); -+ -+ const IntegrationRule *ir -+ = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el, -+ *mesh->GetElementTransformation(0)); -+ const int dims = trial_el->GetDim(); -+ MFEM_VERIFY(dims == 3, ""); -+ -+ const int nq = ir->GetNPoints(); -+ dim = mesh->Dimension(); -+ MFEM_VERIFY(dim == 3, ""); -+ -+ MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), ""); -+ -+ ne = trial_fes.GetNE(); -+ geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS); -+ mapsC = &trial_el->GetDofToQuad(*ir, DofToQuad::TENSOR); -+ mapsO = &trial_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); -+ mapsCtest = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR); -+ mapsOtest = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); -+ dofs1D = mapsC->ndof; -+ quad1D = mapsC->nqpt; -+ dofs1Dtest = mapsCtest->ndof; -+ -+ MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, ""); -+ -+ testType = test_el->GetDerivType(); -+ trialType = trial_el->GetDerivType(); -+ -+ const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6 -+ coeffDim = (DQ ? 3 : 1); -+ -+ const bool curlSpaces = (testType == mfem::FiniteElement::CURL && -+ trialType == mfem::FiniteElement::CURL); -+ -+ const int ndata = curlSpaces ? (coeffDim == 1 ? 1 : 9) : symmDims; -+ pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType()); -+ -+ QuadratureSpace qs(*mesh, *ir); -+ CoefficientVector coeff(qs, CoefficientStorage::FULL); -+ if (Q) { coeff.Project(*Q); } -+ else if (DQ) { coeff.Project(*DQ); } -+ else { coeff.SetConstant(1.0); } -+ -+ if (testType == mfem::FiniteElement::CURL && -+ trialType == mfem::FiniteElement::CURL && dim == 3) -+ { -+ if (coeffDim == 1) -+ { -+ internal::PAHcurlL2Setup3D(nq, coeffDim, ne, ir->GetWeights(), coeff, pa_data); -+ } -+ else -+ { -+ internal::PAHcurlHdivMassSetup3D(quad1D, coeffDim, ne, false, ir->GetWeights(), -+ geom->J, coeff, pa_data); -+ } -+ } -+ else if (testType == mfem::FiniteElement::DIV && -+ trialType == mfem::FiniteElement::CURL && dim == 3 && -+ test_fel->GetOrder() == trial_fel->GetOrder()) -+ { -+ internal::PACurlCurlSetup3D(quad1D, coeffDim, ne, ir->GetWeights(), geom->J, -+ coeff, pa_data); -+ } -+ else -+ { -+ MFEM_ABORT("Unknown kernel."); -+ } -+} -+ -+void MixedVectorCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const -+{ -+ if (testType == mfem::FiniteElement::CURL && -+ trialType == mfem::FiniteElement::CURL && dim == 3) -+ { -+ const int ndata = coeffDim == 1 ? 1 : 9; -+ -+ if (Device::Allows(Backend::DEVICE_MASK)) -+ { -+ const int ID = (dofs1D << 4) | quad1D; -+ switch (ID) -+ { -+ case 0x23: -+ return internal::SmemPAHcurlL2Apply3D<2,3>( -+ dofs1D, quad1D, ndata, ne, -+ mapsO->B, mapsC->B, mapsC->G, -+ pa_data, x, y); -+ case 0x34: -+ return internal::SmemPAHcurlL2Apply3D<3,4>( -+ dofs1D, quad1D, ndata, ne, -+ mapsO->B, mapsC->B, mapsC->G, -+ pa_data, x, y); -+ case 0x45: -+ return internal::SmemPAHcurlL2Apply3D<4,5>( -+ dofs1D, quad1D, ndata, ne, -+ mapsO->B, mapsC->B, mapsC->G, -+ pa_data, x, y); -+ case 0x56: -+ return internal::SmemPAHcurlL2Apply3D<5,6>( -+ dofs1D, quad1D, ndata, ne, -+ mapsO->B, mapsC->B, mapsC->G, -+ pa_data, x, y); -+ default: -+ return internal::SmemPAHcurlL2Apply3D( -+ dofs1D, quad1D, ndata, ne, -+ mapsO->B, mapsC->B, mapsC->G, -+ pa_data, x, y); -+ } -+ } -+ else -+ { -+ internal::PAHcurlL2Apply3D(dofs1D, quad1D, ndata, ne, mapsO->B, mapsC->B, -+ mapsO->Bt, mapsC->Bt, mapsC->G, pa_data, x, y); -+ } -+ } -+ else if (testType == mfem::FiniteElement::DIV && -+ trialType == mfem::FiniteElement::CURL && dim == 3) -+ { -+ internal::PAHcurlHdivApply3D(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B, -+ mapsC->B, mapsOtest->Bt, mapsCtest->Bt, mapsC->G, -+ pa_data, x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Unsupported dimension or space!"); -+ } -+} -+ -+void MixedVectorCurlIntegrator::AddMultTransposePA(const Vector &x, -+ Vector &y) const -+{ -+ if (testType == mfem::FiniteElement::DIV && -+ trialType == mfem::FiniteElement::CURL && dim == 3) -+ { -+ internal::PAHcurlHdivApply3DTranspose(dofs1D, dofs1Dtest, quad1D, ne, mapsO->B, -+ mapsC->B, mapsOtest->Bt, mapsCtest->Bt, -+ mapsC->Gt, pa_data, x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Unsupported dimension or space!"); -+ } -+} -+ -+void MixedVectorWeakCurlIntegrator::AssemblePA( -+ const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) -+{ -+ // Assumes tensor-product elements, with vector test and trial spaces. -+ Mesh *mesh = trial_fes.GetMesh(); -+ const FiniteElement *trial_fel = trial_fes.GetFE(0); -+ const FiniteElement *test_fel = test_fes.GetFE(0); -+ -+ const VectorTensorFiniteElement *trial_el = -+ dynamic_cast(trial_fel); -+ MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!"); -+ -+ const VectorTensorFiniteElement *test_el = -+ dynamic_cast(test_fel); -+ MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!"); -+ -+ const IntegrationRule *ir -+ = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el, -+ *mesh->GetElementTransformation(0)); -+ const int dims = trial_el->GetDim(); -+ MFEM_VERIFY(dims == 3, ""); -+ -+ const int nq = ir->GetNPoints(); -+ dim = mesh->Dimension(); -+ MFEM_VERIFY(dim == 3, ""); -+ -+ MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), ""); -+ -+ ne = trial_fes.GetNE(); -+ geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS); -+ mapsC = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR); -+ mapsO = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); -+ dofs1D = mapsC->ndof; -+ quad1D = mapsC->nqpt; -+ -+ MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, ""); -+ -+ testType = test_el->GetDerivType(); -+ trialType = trial_el->GetDerivType(); -+ -+ const bool curlSpaces = (testType == mfem::FiniteElement::CURL && -+ trialType == mfem::FiniteElement::CURL); -+ -+ const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6 -+ -+ coeffDim = DQ ? 3 : 1; -+ const int ndata = curlSpaces ? (DQ ? 9 : 1) : symmDims; -+ -+ pa_data.SetSize(ndata * nq * ne, Device::GetMemoryType()); -+ -+ QuadratureSpace qs(*mesh, *ir); -+ CoefficientVector coeff(qs, CoefficientStorage::FULL); -+ if (Q) { coeff.Project(*Q); } -+ else if (DQ) { coeff.Project(*DQ); } -+ else { coeff.SetConstant(1.0); } -+ -+ if (trialType == mfem::FiniteElement::CURL && dim == 3) -+ { -+ if (coeffDim == 1) -+ { -+ internal::PAHcurlL2Setup3D(nq, coeffDim, ne, ir->GetWeights(), coeff, pa_data); -+ } -+ else -+ { -+ internal::PAHcurlHdivMassSetup3D(quad1D, coeffDim, ne, false, ir->GetWeights(), -+ geom->J, coeff, pa_data); -+ } -+ } -+ else if (trialType == mfem::FiniteElement::DIV && dim == 3 && -+ test_el->GetOrder() == trial_el->GetOrder()) -+ { -+ internal::PACurlCurlSetup3D(quad1D, coeffDim, ne, ir->GetWeights(), geom->J, -+ coeff, pa_data); -+ } -+ else -+ { -+ MFEM_ABORT("Unknown kernel."); -+ } -+} -+ -+void MixedVectorWeakCurlIntegrator::AddMultPA(const Vector &x, Vector &y) const -+{ -+ if (testType == mfem::FiniteElement::CURL && -+ trialType == mfem::FiniteElement::CURL && dim == 3) -+ { -+ const int ndata = coeffDim == 1 ? 1 : 9; -+ if (Device::Allows(Backend::DEVICE_MASK)) -+ { -+ const int ID = (dofs1D << 4) | quad1D; -+ switch (ID) -+ { -+ case 0x23: -+ return internal::SmemPAHcurlL2Apply3DTranspose<2,3>( -+ dofs1D, quad1D, ndata, -+ ne, mapsO->B, mapsC->B, -+ mapsC->G, pa_data, x, y); -+ case 0x34: -+ return internal::SmemPAHcurlL2Apply3DTranspose<3,4>( -+ dofs1D, quad1D, ndata, -+ ne, mapsO->B, mapsC->B, -+ mapsC->G, pa_data, x, y); -+ case 0x45: -+ return internal::SmemPAHcurlL2Apply3DTranspose<4,5>( -+ dofs1D, quad1D, ndata, -+ ne, mapsO->B, mapsC->B, -+ mapsC->G, pa_data, x, y); -+ case 0x56: -+ return internal::SmemPAHcurlL2Apply3DTranspose<5,6>( -+ dofs1D, quad1D, ndata, -+ ne, mapsO->B, mapsC->B, -+ mapsC->G, pa_data, x, y); -+ default: -+ return internal::SmemPAHcurlL2Apply3DTranspose( -+ dofs1D, quad1D, ndata, ne, -+ mapsO->B, mapsC->B, -+ mapsC->G, pa_data, x, y); -+ } -+ } -+ else -+ { -+ internal::PAHcurlL2Apply3DTranspose(dofs1D, quad1D, ndata, ne, mapsO->B, -+ mapsC->B, mapsO->Bt, mapsC->Bt, mapsC->Gt, -+ pa_data, x, y); -+ } -+ } -+ else if (testType == mfem::FiniteElement::CURL && -+ trialType == mfem::FiniteElement::DIV && dim == 3) -+ { -+ internal::PAHcurlHdivApply3DTranspose(dofs1D, dofs1D, quad1D, ne, mapsO->B, -+ mapsC->B, mapsO->Bt, mapsC->Bt, -+ mapsC->Gt, pa_data, x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Unsupported dimension or space!"); -+ } -+} -+ -+void MixedVectorWeakCurlIntegrator::AddMultTransposePA(const Vector &x, -+ Vector &y) const -+{ -+ if (testType == mfem::FiniteElement::CURL && -+ trialType == mfem::FiniteElement::DIV && dim == 3) -+ { -+ internal::PAHcurlHdivApply3D(dofs1D, dofs1D, quad1D, ne, mapsO->B, -+ mapsC->B, mapsO->Bt, mapsC->Bt, mapsC->G, -+ pa_data, x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Unsupported dimension or space!"); -+ } -+} -+ -+} // namespace mfem -diff --git a/fem/integ/bilininteg_mixedvecgrad_pa.cpp b/fem/integ/bilininteg_mixedvecgrad_pa.cpp -new file mode 100644 -index 000000000..f9e6d3ee8 ---- /dev/null -+++ b/fem/integ/bilininteg_mixedvecgrad_pa.cpp -@@ -0,0 +1,757 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../qfunction.hpp" -+#include "bilininteg_diffusion_kernels.hpp" -+ -+namespace mfem -+{ -+ -+// Apply to x corresponding to DOFs in H^1 (trial), whose gradients are -+// integrated against H(curl) test functions corresponding to y. -+static void PAHcurlH1Apply2D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const Array &bc, -+ const Array &gc, -+ const Array &bot, -+ const Array &bct, -+ const Vector &pa_data, -+ const Vector &x, -+ Vector &y) -+{ -+ constexpr static int VDIM = 2; -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -+ -+ auto Bc = Reshape(bc.Read(), Q1D, D1D); -+ auto Gc = Reshape(gc.Read(), Q1D, D1D); -+ auto Bot = Reshape(bot.Read(), D1D-1, Q1D); -+ auto Bct = Reshape(bct.Read(), D1D, Q1D); -+ auto op = Reshape(pa_data.Read(), Q1D, Q1D, 3, NE); -+ auto X = Reshape(x.Read(), D1D, D1D, NE); -+ auto Y = Reshape(y.ReadWrite(), 2*(D1D-1)*D1D, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double mass[MAX_Q1D][MAX_Q1D][VDIM]; -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int c = 0; c < VDIM; ++c) -+ { -+ mass[qy][qx][c] = 0.0; -+ } -+ } -+ } -+ -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ double gradX[MAX_Q1D][2]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ gradX[qx][0] = 0.0; -+ gradX[qx][1] = 0.0; -+ } -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ const double s = X(dx,dy,e); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ gradX[qx][0] += s * Bc(qx,dx); -+ gradX[qx][1] += s * Gc(qx,dx); -+ } -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = Bc(qy,dy); -+ const double wDy = Gc(qy,dy); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = gradX[qx][0]; -+ const double wDx = gradX[qx][1]; -+ mass[qy][qx][0] += wDx * wy; -+ mass[qy][qx][1] += wx * wDy; -+ } -+ } -+ } -+ -+ // Apply D operator. -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double O11 = op(qx,qy,0,e); -+ const double O12 = op(qx,qy,1,e); -+ const double O22 = op(qx,qy,2,e); -+ const double massX = mass[qy][qx][0]; -+ const double massY = mass[qy][qx][1]; -+ mass[qy][qx][0] = (O11*massX)+(O12*massY); -+ mass[qy][qx][1] = (O12*massX)+(O22*massY); -+ } -+ } -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ int osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y components -+ { -+ const int D1Dy = (c == 1) ? D1D - 1 : D1D; -+ const int D1Dx = (c == 0) ? D1D - 1 : D1D; -+ -+ double massX[MAX_D1D]; -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massX[dx] = 0; -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massX[dx] += mass[qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx)); -+ } -+ } -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy); -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ Y(dx + (dy * D1Dx) + osc, e) += massX[dx] * wy; -+ } -+ } -+ -+ osc += D1Dx * D1Dy; -+ } // loop c -+ } -+ }); // end of element loop -+} -+ -+// Apply to x corresponding to DOFs in H(curl), integrated -+// against gradients of H^1 functions corresponding to y. -+static void PAHcurlH1ApplyTranspose2D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const Array &bc, -+ const Array &bo, -+ const Array &bct, -+ const Array &gct, -+ const Vector &pa_data, -+ const Vector &x, -+ Vector &y) -+{ -+ constexpr static int VDIM = 2; -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -+ -+ auto Bc = Reshape(bc.Read(), Q1D, D1D); -+ auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -+ auto Bt = Reshape(bct.Read(), D1D, Q1D); -+ auto Gt = Reshape(gct.Read(), D1D, Q1D); -+ auto op = Reshape(pa_data.Read(), Q1D, Q1D, 3, NE); -+ auto X = Reshape(x.Read(), 2*(D1D-1)*D1D, NE); -+ auto Y = Reshape(y.ReadWrite(), D1D, D1D, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double mass[MAX_Q1D][MAX_Q1D][VDIM]; -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int c = 0; c < VDIM; ++c) -+ { -+ mass[qy][qx][c] = 0.0; -+ } -+ } -+ } -+ -+ int osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y components -+ { -+ const int D1Dy = (c == 1) ? D1D - 1 : D1D; -+ const int D1Dx = (c == 0) ? D1D - 1 : D1D; -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ double massX[MAX_Q1D]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massX[qx] = 0.0; -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ const double t = X(dx + (dy * D1Dx) + osc, e); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx)); -+ } -+ } -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ mass[qy][qx][c] += massX[qx] * wy; -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy; -+ } // loop (c) over components -+ -+ // Apply D operator. -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double O11 = op(qx,qy,0,e); -+ const double O12 = op(qx,qy,1,e); -+ const double O22 = op(qx,qy,2,e); -+ const double massX = mass[qy][qx][0]; -+ const double massY = mass[qy][qx][1]; -+ mass[qy][qx][0] = (O11*massX)+(O12*massY); -+ mass[qy][qx][1] = (O12*massX)+(O22*massY); -+ } -+ } -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ double gradX[MAX_D1D][2]; -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ gradX[dx][0] = 0; -+ gradX[dx][1] = 0; -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double gX = mass[qy][qx][0]; -+ const double gY = mass[qy][qx][1]; -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ const double wx = Bt(dx,qx); -+ const double wDx = Gt(dx,qx); -+ gradX[dx][0] += gX * wDx; -+ gradX[dx][1] += gY * wx; -+ } -+ } -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ const double wy = Bt(dy,qy); -+ const double wDy = Gt(dy,qy); -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ Y(dx,dy,e) += ((gradX[dx][0] * wy) + (gradX[dx][1] * wDy)); -+ } -+ } -+ } -+ }); // end of element loop -+} -+ -+// Apply to x corresponding to DOFs in H^1 (trial), whose gradients are -+// integrated against H(curl) test functions corresponding to y. -+static void PAHcurlH1Apply3D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const Array &bc, -+ const Array &gc, -+ const Array &bot, -+ const Array &bct, -+ const Vector &pa_data, -+ const Vector &x, -+ Vector &y) -+{ -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -+ -+ MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -+ MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -+ -+ constexpr static int VDIM = 3; -+ -+ auto Bc = Reshape(bc.Read(), Q1D, D1D); -+ auto Gc = Reshape(gc.Read(), Q1D, D1D); -+ auto Bot = Reshape(bot.Read(), D1D-1, Q1D); -+ auto Bct = Reshape(bct.Read(), D1D, Q1D); -+ auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE); -+ auto X = Reshape(x.Read(), D1D, D1D, D1D, NE); -+ auto Y = Reshape(y.ReadWrite(), 3*(D1D-1)*D1D*D1D, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM]; -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int c = 0; c < VDIM; ++c) -+ { -+ mass[qz][qy][qx][c] = 0.0; -+ } -+ } -+ } -+ } -+ -+ for (int dz = 0; dz < D1D; ++dz) -+ { -+ double gradXY[MAX_Q1D][MAX_Q1D][3]; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ gradXY[qy][qx][0] = 0.0; -+ gradXY[qy][qx][1] = 0.0; -+ gradXY[qy][qx][2] = 0.0; -+ } -+ } -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ double gradX[MAX_Q1D][2]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ gradX[qx][0] = 0.0; -+ gradX[qx][1] = 0.0; -+ } -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ const double s = X(dx,dy,dz,e); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ gradX[qx][0] += s * Bc(qx,dx); -+ gradX[qx][1] += s * Gc(qx,dx); -+ } -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = Bc(qy,dy); -+ const double wDy = Gc(qy,dy); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = gradX[qx][0]; -+ const double wDx = gradX[qx][1]; -+ gradXY[qy][qx][0] += wDx * wy; -+ gradXY[qy][qx][1] += wx * wDy; -+ gradXY[qy][qx][2] += wx * wy; -+ } -+ } -+ } -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ const double wz = Bc(qz,dz); -+ const double wDz = Gc(qz,dz); -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ mass[qz][qy][qx][0] += gradXY[qy][qx][0] * wz; -+ mass[qz][qy][qx][1] += gradXY[qy][qx][1] * wz; -+ mass[qz][qy][qx][2] += gradXY[qy][qx][2] * wDz; -+ } -+ } -+ } -+ } -+ -+ // Apply D operator. -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double O11 = op(qx,qy,qz,0,e); -+ const double O12 = op(qx,qy,qz,1,e); -+ const double O13 = op(qx,qy,qz,2,e); -+ const double O22 = op(qx,qy,qz,3,e); -+ const double O23 = op(qx,qy,qz,4,e); -+ const double O33 = op(qx,qy,qz,5,e); -+ const double massX = mass[qz][qy][qx][0]; -+ const double massY = mass[qz][qy][qx][1]; -+ const double massZ = mass[qz][qy][qx][2]; -+ mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ); -+ mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ); -+ mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ); -+ } -+ } -+ } -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ double massXY[MAX_D1D][MAX_D1D]; -+ -+ int osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -+ { -+ const int D1Dz = (c == 2) ? D1D - 1 : D1D; -+ const int D1Dy = (c == 1) ? D1D - 1 : D1D; -+ const int D1Dx = (c == 0) ? D1D - 1 : D1D; -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massXY[dy][dx] = 0.0; -+ } -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ double massX[MAX_D1D]; -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massX[dx] = 0; -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massX[dx] += mass[qz][qy][qx][c] * ((c == 0) ? Bot(dx,qx) : Bct(dx,qx)); -+ } -+ } -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ const double wy = (c == 1) ? Bot(dy,qy) : Bct(dy,qy); -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ massXY[dy][dx] += massX[dx] * wy; -+ } -+ } -+ } -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ const double wz = (c == 2) ? Bot(dz,qz) : Bct(dz,qz); -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ Y(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e) += massXY[dy][dx] * wz; -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } // loop c -+ } // loop qz -+ }); // end of element loop -+} -+ -+// Apply to x corresponding to DOFs in H(curl), integrated -+// against gradients of H^1 functions corresponding to y. -+static void PAHcurlH1ApplyTranspose3D(const int D1D, -+ const int Q1D, -+ const int NE, -+ const Array &bc, -+ const Array &bo, -+ const Array &bct, -+ const Array &gct, -+ const Vector &pa_data, -+ const Vector &x, -+ Vector &y) -+{ -+ constexpr static int MAX_D1D = HCURL_MAX_D1D; -+ constexpr static int MAX_Q1D = HCURL_MAX_Q1D; -+ -+ MFEM_VERIFY(D1D <= MAX_D1D, "Error: D1D > MAX_D1D"); -+ MFEM_VERIFY(Q1D <= MAX_Q1D, "Error: Q1D > MAX_Q1D"); -+ -+ constexpr static int VDIM = 3; -+ -+ auto Bc = Reshape(bc.Read(), Q1D, D1D); -+ auto Bo = Reshape(bo.Read(), Q1D, D1D-1); -+ auto Bt = Reshape(bct.Read(), D1D, Q1D); -+ auto Gt = Reshape(gct.Read(), D1D, Q1D); -+ auto op = Reshape(pa_data.Read(), Q1D, Q1D, Q1D, 6, NE); -+ auto X = Reshape(x.Read(), 3*(D1D-1)*D1D*D1D, NE); -+ auto Y = Reshape(y.ReadWrite(), D1D, D1D, D1D, NE); -+ -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ double mass[MAX_Q1D][MAX_Q1D][MAX_Q1D][VDIM]; -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int c = 0; c < VDIM; ++c) -+ { -+ mass[qz][qy][qx][c] = 0.0; -+ } -+ } -+ } -+ } -+ -+ int osc = 0; -+ -+ for (int c = 0; c < VDIM; ++c) // loop over x, y, z components -+ { -+ const int D1Dz = (c == 2) ? D1D - 1 : D1D; -+ const int D1Dy = (c == 1) ? D1D - 1 : D1D; -+ const int D1Dx = (c == 0) ? D1D - 1 : D1D; -+ -+ for (int dz = 0; dz < D1Dz; ++dz) -+ { -+ double massXY[MAX_Q1D][MAX_Q1D]; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massXY[qy][qx] = 0.0; -+ } -+ } -+ -+ for (int dy = 0; dy < D1Dy; ++dy) -+ { -+ double massX[MAX_Q1D]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massX[qx] = 0.0; -+ } -+ -+ for (int dx = 0; dx < D1Dx; ++dx) -+ { -+ const double t = X(dx + ((dy + (dz * D1Dy)) * D1Dx) + osc, e); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ massX[qx] += t * ((c == 0) ? Bo(qx,dx) : Bc(qx,dx)); -+ } -+ } -+ -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double wy = (c == 1) ? Bo(qy,dy) : Bc(qy,dy); -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double wx = massX[qx]; -+ massXY[qy][qx] += wx * wy; -+ } -+ } -+ } -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ const double wz = (c == 2) ? Bo(qz,dz) : Bc(qz,dz); -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ mass[qz][qy][qx][c] += massXY[qy][qx] * wz; -+ } -+ } -+ } -+ } -+ -+ osc += D1Dx * D1Dy * D1Dz; -+ } // loop (c) over components -+ -+ // Apply D operator. -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double O11 = op(qx,qy,qz,0,e); -+ const double O12 = op(qx,qy,qz,1,e); -+ const double O13 = op(qx,qy,qz,2,e); -+ const double O22 = op(qx,qy,qz,3,e); -+ const double O23 = op(qx,qy,qz,4,e); -+ const double O33 = op(qx,qy,qz,5,e); -+ const double massX = mass[qz][qy][qx][0]; -+ const double massY = mass[qz][qy][qx][1]; -+ const double massZ = mass[qz][qy][qx][2]; -+ mass[qz][qy][qx][0] = (O11*massX)+(O12*massY)+(O13*massZ); -+ mass[qz][qy][qx][1] = (O12*massX)+(O22*massY)+(O23*massZ); -+ mass[qz][qy][qx][2] = (O13*massX)+(O23*massY)+(O33*massZ); -+ } -+ } -+ } -+ -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ double gradXY[MAX_D1D][MAX_D1D][3]; -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ gradXY[dy][dx][0] = 0; -+ gradXY[dy][dx][1] = 0; -+ gradXY[dy][dx][2] = 0; -+ } -+ } -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ double gradX[MAX_D1D][3]; -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ gradX[dx][0] = 0; -+ gradX[dx][1] = 0; -+ gradX[dx][2] = 0; -+ } -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double gX = mass[qz][qy][qx][0]; -+ const double gY = mass[qz][qy][qx][1]; -+ const double gZ = mass[qz][qy][qx][2]; -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ const double wx = Bt(dx,qx); -+ const double wDx = Gt(dx,qx); -+ gradX[dx][0] += gX * wDx; -+ gradX[dx][1] += gY * wx; -+ gradX[dx][2] += gZ * wx; -+ } -+ } -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ const double wy = Bt(dy,qy); -+ const double wDy = Gt(dy,qy); -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ gradXY[dy][dx][0] += gradX[dx][0] * wy; -+ gradXY[dy][dx][1] += gradX[dx][1] * wDy; -+ gradXY[dy][dx][2] += gradX[dx][2] * wy; -+ } -+ } -+ } -+ for (int dz = 0; dz < D1D; ++dz) -+ { -+ const double wz = Bt(dz,qz); -+ const double wDz = Gt(dz,qz); -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ Y(dx,dy,dz,e) += -+ ((gradXY[dy][dx][0] * wz) + -+ (gradXY[dy][dx][1] * wz) + -+ (gradXY[dy][dx][2] * wDz)); -+ } -+ } -+ } -+ } // loop qz -+ }); // end of element loop -+} -+ -+void MixedVectorGradientIntegrator::AssemblePA( -+ const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) -+{ -+ // Assumes tensor-product elements, with a vector test space and H^1 trial space. -+ Mesh *mesh = trial_fes.GetMesh(); -+ const FiniteElement *trial_fel = trial_fes.GetFE(0); -+ const FiniteElement *test_fel = test_fes.GetFE(0); -+ -+ const NodalTensorFiniteElement *trial_el = -+ dynamic_cast(trial_fel); -+ MFEM_VERIFY(trial_el != NULL, "Only NodalTensorFiniteElement is supported!"); -+ -+ const VectorTensorFiniteElement *test_el = -+ dynamic_cast(test_fel); -+ MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!"); -+ -+ const IntegrationRule *ir -+ = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el, -+ *mesh->GetElementTransformation(0)); -+ const int dims = trial_el->GetDim(); -+ MFEM_VERIFY(dims == 2 || dims == 3, ""); -+ -+ const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6 -+ const int nq = ir->GetNPoints(); -+ dim = mesh->Dimension(); -+ MFEM_VERIFY(dim == 2 || dim == 3, ""); -+ -+ MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder(), ""); -+ -+ ne = trial_fes.GetNE(); -+ geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS); -+ mapsC = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR); -+ mapsO = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); -+ dofs1D = mapsC->ndof; -+ quad1D = mapsC->nqpt; -+ -+ MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, ""); -+ -+ pa_data.SetSize(symmDims * nq * ne, Device::GetMemoryType()); -+ -+ QuadratureSpace qs(*mesh, *ir); -+ CoefficientVector coeff(Q, qs, CoefficientStorage::FULL); -+ -+ // Use the same setup functions as VectorFEMassIntegrator. -+ if (test_el->GetDerivType() == mfem::FiniteElement::CURL && dim == 3) -+ { -+ internal::PADiffusionSetup3D(quad1D, 1, ne, ir->GetWeights(), geom->J, -+ coeff, pa_data); -+ } -+ else if (test_el->GetDerivType() == mfem::FiniteElement::CURL && dim == 2) -+ { -+ internal::PADiffusionSetup2D<2>(quad1D, 1, ne, ir->GetWeights(), geom->J, -+ coeff, pa_data); -+ } -+ else -+ { -+ MFEM_ABORT("Unknown kernel."); -+ } -+} -+ -+void MixedVectorGradientIntegrator::AddMultPA(const Vector &x, Vector &y) const -+{ -+ if (dim == 3) -+ { -+ PAHcurlH1Apply3D(dofs1D, quad1D, ne, mapsC->B, mapsC->G, -+ mapsO->Bt, mapsC->Bt, pa_data, x, y); -+ } -+ else if (dim == 2) -+ { -+ PAHcurlH1Apply2D(dofs1D, quad1D, ne, mapsC->B, mapsC->G, -+ mapsO->Bt, mapsC->Bt, pa_data, x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Unsupported dimension!"); -+ } -+} -+ -+void MixedVectorGradientIntegrator::AddMultTransposePA(const Vector &x, -+ Vector &y) const -+{ -+ if (dim == 3) -+ { -+ PAHcurlH1ApplyTranspose3D(dofs1D, quad1D, ne, mapsC->B, mapsO->B, -+ mapsC->Bt, mapsC->Gt, pa_data, x, y); -+ } -+ else if (dim == 2) -+ { -+ PAHcurlH1ApplyTranspose2D(dofs1D, quad1D, ne, mapsC->B, mapsO->B, -+ mapsC->Bt, mapsC->Gt, pa_data, x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Unsupported dimension!"); -+ } -+} -+ -+} // namespace mfem -diff --git a/fem/integ/bilininteg_transpose_ea.cpp b/fem/integ/bilininteg_transpose_ea.cpp -new file mode 100644 -index 000000000..e1ac154fc ---- /dev/null -+++ b/fem/integ/bilininteg_transpose_ea.cpp -@@ -0,0 +1,106 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+ -+namespace mfem -+{ -+ -+void TransposeIntegrator::AssembleEA(const FiniteElementSpace &fes, -+ Vector &ea_data) -+{ -+ const int ne = fes.GetNE(); -+ if (ne == 0) { return; } -+ -+ const int dofs = fes.GetFE(0)->GetDof(); -+ Vector ea_data_tmp(ea_data.Size()); -+ ea_data_tmp = 0.0; -+ bfi->AssembleEA(fes, ea_data_tmp); -+ auto A = Reshape(ea_data_tmp.Read(), dofs, dofs, ne); -+ auto AT = Reshape(ea_data.ReadWrite(), dofs, dofs, ne); -+ mfem::forall(ne, [=] MFEM_HOST_DEVICE (int e) -+ { -+ for (int i = 0; i < dofs; i++) -+ { -+ for (int j = 0; j < dofs; j++) -+ { -+ const double a = A(i, j, e); -+ AT(j, i, e) += a; -+ } -+ } -+ }); -+} -+ -+void TransposeIntegrator::AssembleEAInteriorFaces(const FiniteElementSpace &fes, -+ Vector &ea_data_int, -+ Vector &ea_data_ext) -+{ -+ const int nf = fes.GetNFbyType(FaceType::Interior); -+ if (nf == 0) { return; } -+ -+ const int face_dofs = fes.GetTraceElement(0, -+ fes.GetMesh()->GetFaceGeometry(0))->GetDof(); -+ Vector ea_data_int_tmp(ea_data_int.Size()); -+ Vector ea_data_ext_tmp(ea_data_ext.Size()); -+ ea_data_int_tmp = 0.0; -+ ea_data_ext_tmp = 0.0; -+ bfi->AssembleEAInteriorFaces(fes, ea_data_int_tmp, ea_data_ext_tmp); -+ auto A_int = Reshape(ea_data_int_tmp.Read(), face_dofs, face_dofs, 2, nf); -+ auto A_ext = Reshape(ea_data_ext_tmp.Read(), face_dofs, face_dofs, 2, nf); -+ auto AT_int = Reshape(ea_data_int.ReadWrite(), face_dofs, face_dofs, 2, nf); -+ auto AT_ext = Reshape(ea_data_ext.ReadWrite(), face_dofs, face_dofs, 2, nf); -+ mfem::forall(nf, [=] MFEM_HOST_DEVICE (int f) -+ { -+ for (int i = 0; i < face_dofs; i++) -+ { -+ for (int j = 0; j < face_dofs; j++) -+ { -+ const double a_int0 = A_int(i, j, 0, f); -+ const double a_int1 = A_int(i, j, 1, f); -+ const double a_ext0 = A_ext(i, j, 0, f); -+ const double a_ext1 = A_ext(i, j, 1, f); -+ AT_int(j, i, 0, f) += a_int0; -+ AT_int(j, i, 1, f) += a_int1; -+ AT_ext(j, i, 0, f) += a_ext1; -+ AT_ext(j, i, 1, f) += a_ext0; -+ } -+ } -+ }); -+} -+ -+void TransposeIntegrator::AssembleEABoundaryFaces(const FiniteElementSpace &fes, -+ Vector &ea_data_bdr) -+{ -+ const int nf = fes.GetNFbyType(FaceType::Boundary); -+ if (nf == 0) { return; } -+ -+ const int face_dofs = fes.GetTraceElement(0, -+ fes.GetMesh()->GetFaceGeometry(0))->GetDof(); -+ Vector ea_data_bdr_tmp(ea_data_bdr.Size()); -+ ea_data_bdr_tmp = 0.0; -+ bfi->AssembleEABoundaryFaces(fes, ea_data_bdr_tmp); -+ auto A_bdr = Reshape(ea_data_bdr_tmp.Read(), face_dofs, face_dofs, nf); -+ auto AT_bdr = Reshape(ea_data_bdr.ReadWrite(), face_dofs, face_dofs, nf); -+ mfem::forall(nf, [=] MFEM_HOST_DEVICE (int f) -+ { -+ for (int i = 0; i < face_dofs; i++) -+ { -+ for (int j = 0; j < face_dofs; j++) -+ { -+ const double a_bdr = A_bdr(i, j, f); -+ AT_bdr(j, i, f) += a_bdr; -+ } -+ } -+ }); -+} -+ -+} -diff --git a/fem/bilininteg_vecdiffusion_mf.cpp b/fem/integ/bilininteg_vecdiffusion_mf.cpp -similarity index 93% -rename from fem/bilininteg_vecdiffusion_mf.cpp -rename to fem/integ/bilininteg_vecdiffusion_mf.cpp -index dae344544..7cad61496 100644 ---- a/fem/bilininteg_vecdiffusion_mf.cpp -+++ b/fem/integ/bilininteg_vecdiffusion_mf.cpp -@@ -9,12 +9,10 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "../general/forall.hpp" --#include "bilininteg.hpp" --#include "gridfunc.hpp" --#include "ceed/integrators/diffusion/diffusion.hpp" -- --using namespace std; -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../ceed/integrators/diffusion/diffusion.hpp" - - namespace mfem - { -diff --git a/fem/bilininteg_vecdiffusion.cpp b/fem/integ/bilininteg_vecdiffusion_pa.cpp -similarity index 88% -rename from fem/bilininteg_vecdiffusion.cpp -rename to fem/integ/bilininteg_vecdiffusion_pa.cpp -index 1915fca37..84e4d5b2a 100644 ---- a/fem/bilininteg_vecdiffusion.cpp -+++ b/fem/integ/bilininteg_vecdiffusion_pa.cpp -@@ -9,19 +9,15 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "../general/forall.hpp" --#include "bilininteg.hpp" --#include "gridfunc.hpp" --#include "qfunction.hpp" --#include "ceed/integrators/diffusion/diffusion.hpp" -- --using namespace std; -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../qfunction.hpp" -+#include "../ceed/integrators/diffusion/diffusion.hpp" - - namespace mfem - { - --// PA Vector Diffusion Integrator -- - // PA Diffusion Assemble 2D kernel - static void PAVectorDiffusionSetup2D(const int Q1D, - const int NE, -@@ -40,7 +36,6 @@ static void PAVectorDiffusionSetup2D(const int Q1D, - const auto C = const_c ? Reshape(c.Read(), 1,1) : - Reshape(c.Read(), NQ, NE); - -- - mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) - { - for (int q = 0; q < NQ; ++q) -@@ -76,7 +71,6 @@ static void PAVectorDiffusionSetup3D(const int Q1D, - const auto C = const_c ? Reshape(c.Read(), 1,1) : - Reshape(c.Read(), NQ,NE); - -- - mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) - { - for (int q = 0; q < NQ; ++q) -@@ -118,28 +112,6 @@ static void PAVectorDiffusionSetup3D(const int Q1D, - }); - } - --static void PAVectorDiffusionSetup(const int dim, -- const int Q1D, -- const int NE, -- const Array &W, -- const Vector &J, -- const Vector &C, -- Vector &op) --{ -- if (!(dim == 2 || dim == 3)) -- { -- MFEM_ABORT("Dimension not supported."); -- } -- if (dim == 2) -- { -- PAVectorDiffusionSetup2D(Q1D, NE, W, J, C, op); -- } -- if (dim == 3) -- { -- PAVectorDiffusionSetup3D(Q1D, NE, W, J, C, op); -- } --} -- - void VectorDiffusionIntegrator::AssemblePA(const FiniteElementSpace &fes) - { - // Assumes tensor-product elements -@@ -183,7 +155,10 @@ void VectorDiffusionIntegrator::AssemblePA(const FiniteElementSpace &fes) - const Array &w = ir->GetWeights(); - const Vector &j = geom->J; - Vector &d = pa_data; -- if (dim == 1) { MFEM_ABORT("dim==1 not supported in PAVectorDiffusionSetup"); } -+ if (dim == 1) -+ { -+ MFEM_ABORT("dim==1 not supported in VectorDiffusionIntegrator::AssemblePA"); -+ } - if (dim == 2 && sdim == 3) - { - constexpr int DIM = 2; -@@ -222,23 +197,224 @@ void VectorDiffusionIntegrator::AssemblePA(const FiniteElementSpace &fes) - } - else - { -- PAVectorDiffusionSetup(dim, quad1D, ne, w, j, coeff, d); -+ if (dim == 2) -+ { -+ return PAVectorDiffusionSetup2D(quad1D, ne, w, j, coeff, d); -+ } -+ if (dim == 3) -+ { -+ return PAVectorDiffusionSetup3D(quad1D, ne, w, j, coeff, d); -+ } -+ MFEM_ABORT("Dimension not supported."); -+ } -+} -+ -+template -+static void PAVectorDiffusionDiagonal2D(const int NE, -+ const Array &b, -+ const Array &g, -+ const Vector &d, -+ Vector &y, -+ const int d1d = 0, -+ const int q1d = 0) -+{ -+ const int D1D = T_D1D ? T_D1D : d1d; -+ const int Q1D = T_Q1D ? T_Q1D : q1d; -+ MFEM_VERIFY(D1D <= MAX_D1D, ""); -+ MFEM_VERIFY(Q1D <= MAX_Q1D, ""); -+ auto B = Reshape(b.Read(), Q1D, D1D); -+ auto G = Reshape(g.Read(), Q1D, D1D); -+ // note the different shape for D, this is a (symmetric) matrix so we only -+ // store necessary entries -+ auto D = Reshape(d.Read(), Q1D*Q1D, 3, NE); -+ auto Y = Reshape(y.ReadWrite(), D1D, D1D, 2, NE); -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ const int D1D = T_D1D ? T_D1D : d1d; -+ const int Q1D = T_Q1D ? T_Q1D : q1d; -+ constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -+ constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -+ // gradphi \cdot Q \gradphi has four terms -+ double QD0[MQ1][MD1]; -+ double QD1[MQ1][MD1]; -+ double QD2[MQ1][MD1]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ QD0[qx][dy] = 0.0; -+ QD1[qx][dy] = 0.0; -+ QD2[qx][dy] = 0.0; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const int q = qx + qy * Q1D; -+ const double D0 = D(q,0,e); -+ const double D1 = D(q,1,e); -+ const double D2 = D(q,2,e); -+ QD0[qx][dy] += B(qy, dy) * B(qy, dy) * D0; -+ QD1[qx][dy] += B(qy, dy) * G(qy, dy) * D1; -+ QD2[qx][dy] += G(qy, dy) * G(qy, dy) * D2; -+ } -+ } -+ } -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ double temp = 0.0; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ temp += G(qx, dx) * G(qx, dx) * QD0[qx][dy]; -+ temp += G(qx, dx) * B(qx, dx) * QD1[qx][dy]; -+ temp += B(qx, dx) * G(qx, dx) * QD1[qx][dy]; -+ temp += B(qx, dx) * B(qx, dx) * QD2[qx][dy]; -+ } -+ Y(dx,dy,0,e) += temp; -+ Y(dx,dy,1,e) += temp; -+ } -+ } -+ }); -+} -+ -+template -+static void PAVectorDiffusionDiagonal3D(const int NE, -+ const Array &b, -+ const Array &g, -+ const Vector &d, -+ Vector &y, -+ const int d1d = 0, -+ const int q1d = 0) -+{ -+ constexpr int DIM = 3; -+ const int D1D = T_D1D ? T_D1D : d1d; -+ const int Q1D = T_Q1D ? T_Q1D : q1d; -+ constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -+ constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -+ MFEM_VERIFY(D1D <= MD1, ""); -+ MFEM_VERIFY(Q1D <= MQ1, ""); -+ auto B = Reshape(b.Read(), Q1D, D1D); -+ auto G = Reshape(g.Read(), Q1D, D1D); -+ auto Q = Reshape(d.Read(), Q1D*Q1D*Q1D, 6, NE); -+ auto Y = Reshape(y.ReadWrite(), D1D, D1D, D1D, 3, NE); -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ const int D1D = T_D1D ? T_D1D : d1d; -+ const int Q1D = T_Q1D ? T_Q1D : q1d; -+ constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -+ constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -+ double QQD[MQ1][MQ1][MD1]; -+ double QDD[MQ1][MD1][MD1]; -+ for (int i = 0; i < DIM; ++i) -+ { -+ for (int j = 0; j < DIM; ++j) -+ { -+ // first tensor contraction, along z direction -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int dz = 0; dz < D1D; ++dz) -+ { -+ QQD[qx][qy][dz] = 0.0; -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ const int q = qx + (qy + qz * Q1D) * Q1D; -+ const int k = j >= i ? -+ 3 - (3-i)*(2-i)/2 + j: -+ 3 - (3-j)*(2-j)/2 + i; -+ const double O = Q(q,k,e); -+ const double Bz = B(qz,dz); -+ const double Gz = G(qz,dz); -+ const double L = i==2 ? Gz : Bz; -+ const double R = j==2 ? Gz : Bz; -+ QQD[qx][qy][dz] += L * O * R; -+ } -+ } -+ } -+ } -+ // second tensor contraction, along y direction -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int dz = 0; dz < D1D; ++dz) -+ { -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ QDD[qx][dy][dz] = 0.0; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ const double By = B(qy,dy); -+ const double Gy = G(qy,dy); -+ const double L = i==1 ? Gy : By; -+ const double R = j==1 ? Gy : By; -+ QDD[qx][dy][dz] += L * QQD[qx][qy][dz] * R; -+ } -+ } -+ } -+ } -+ // third tensor contraction, along x direction -+ for (int dz = 0; dz < D1D; ++dz) -+ { -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ double temp = 0.0; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ const double Bx = B(qx,dx); -+ const double Gx = G(qx,dx); -+ const double L = i==0 ? Gx : Bx; -+ const double R = j==0 ? Gx : Bx; -+ temp += L * QDD[qx][dy][dz] * R; -+ } -+ Y(dx, dy, dz, 0, e) += temp; -+ Y(dx, dy, dz, 1, e) += temp; -+ Y(dx, dy, dz, 2, e) += temp; -+ } -+ } -+ } -+ } -+ } -+ }); -+} -+ -+void VectorDiffusionIntegrator::AssembleDiagonalPA(Vector &diag) -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->GetDiagonal(diag); -+ } -+ else -+ { -+ if (dim == 2) -+ { -+ return PAVectorDiffusionDiagonal2D(ne, maps->B, maps->G, -+ pa_data, diag, -+ dofs1D, quad1D); -+ } -+ else if (dim == 3) -+ { -+ return PAVectorDiffusionDiagonal3D(ne, maps->B, maps->G, -+ pa_data, diag, -+ dofs1D, quad1D); -+ } -+ MFEM_ABORT("Dimension not implemented."); - } - } - - // PA Diffusion Apply 2D kernel --template static --void PAVectorDiffusionApply2D(const int NE, -- const Array &b, -- const Array &g, -- const Array &bt, -- const Array >, -- const Vector &d_, -- const Vector &x_, -- Vector &y_, -- const int d1d = 0, -- const int q1d = 0, -- const int vdim = 0) -+template -+static void PAVectorDiffusionApply2D(const int NE, -+ const Array &b, -+ const Array &g, -+ const Array &bt, -+ const Array >, -+ const Vector &d_, -+ const Vector &x_, -+ Vector &y_, -+ const int d1d = 0, -+ const int q1d = 0, -+ const int vdim = 0) - { - const int D1D = T_D1D ? T_D1D : d1d; - const int Q1D = T_Q1D ? T_Q1D : q1d; -@@ -349,17 +525,16 @@ void PAVectorDiffusionApply2D(const int NE, - } - - // PA Diffusion Apply 3D kernel --template static --void PAVectorDiffusionApply3D(const int NE, -- const Array &b, -- const Array &g, -- const Array &bt, -- const Array >, -- const Vector &op_, -- const Vector &x_, -- Vector &y_, -- int d1d = 0, int q1d = 0) -+template -+static void PAVectorDiffusionApply3D(const int NE, -+ const Array &b, -+ const Array &g, -+ const Array &bt, -+ const Array >, -+ const Vector &op_, -+ const Vector &x_, -+ Vector &y_, -+ int d1d = 0, int q1d = 0) - { - const int D1D = T_D1D ? T_D1D : d1d; - const int Q1D = T_Q1D ? T_Q1D : q1d; -@@ -542,7 +717,6 @@ void PAVectorDiffusionApply3D(const int NE, - }); - } - --// PA Diffusion Apply kernel - void VectorDiffusionIntegrator::AddMultPA(const Vector &x, Vector &y) const - { - if (DeviceCanUseCeed()) -@@ -572,220 +746,14 @@ void VectorDiffusionIntegrator::AddMultPA(const Vector &x, Vector &y) const - } - } - if (dim == 2 && sdim == 2) -- { return PAVectorDiffusionApply2D(ne,B,G,Bt,Gt,D,x,y,D1D,Q1D,sdim); } -- -- if (dim == 3 && sdim == 3) -- { return PAVectorDiffusionApply3D(ne,B,G,Bt,Gt,D,x,y,D1D,Q1D); } -- -- MFEM_ABORT("Unknown kernel."); -- } --} -- --template --static void PAVectorDiffusionDiagonal2D(const int NE, -- const Array &b, -- const Array &g, -- const Vector &d, -- Vector &y, -- const int d1d = 0, -- const int q1d = 0) --{ -- const int D1D = T_D1D ? T_D1D : d1d; -- const int Q1D = T_Q1D ? T_Q1D : q1d; -- MFEM_VERIFY(D1D <= MAX_D1D, ""); -- MFEM_VERIFY(Q1D <= MAX_Q1D, ""); -- auto B = Reshape(b.Read(), Q1D, D1D); -- auto G = Reshape(g.Read(), Q1D, D1D); -- // note the different shape for D, this is a (symmetric) matrix so we only -- // store necessary entries -- auto D = Reshape(d.Read(), Q1D*Q1D, 3, NE); -- auto Y = Reshape(y.ReadWrite(), D1D, D1D, 2, NE); -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- const int D1D = T_D1D ? T_D1D : d1d; -- const int Q1D = T_Q1D ? T_Q1D : q1d; -- constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -- constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -- // gradphi \cdot Q \gradphi has four terms -- double QD0[MQ1][MD1]; -- double QD1[MQ1][MD1]; -- double QD2[MQ1][MD1]; -- for (int qx = 0; qx < Q1D; ++qx) - { -- for (int dy = 0; dy < D1D; ++dy) -- { -- QD0[qx][dy] = 0.0; -- QD1[qx][dy] = 0.0; -- QD2[qx][dy] = 0.0; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const int q = qx + qy * Q1D; -- const double D0 = D(q,0,e); -- const double D1 = D(q,1,e); -- const double D2 = D(q,2,e); -- QD0[qx][dy] += B(qy, dy) * B(qy, dy) * D0; -- QD1[qx][dy] += B(qy, dy) * G(qy, dy) * D1; -- QD2[qx][dy] += G(qy, dy) * G(qy, dy) * D2; -- } -- } -+ return PAVectorDiffusionApply2D(ne,B,G,Bt,Gt,D,x,y,D1D,Q1D,sdim); - } -- for (int dy = 0; dy < D1D; ++dy) -- { -- for (int dx = 0; dx < D1D; ++dx) -- { -- double temp = 0.0; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- temp += G(qx, dx) * G(qx, dx) * QD0[qx][dy]; -- temp += G(qx, dx) * B(qx, dx) * QD1[qx][dy]; -- temp += B(qx, dx) * G(qx, dx) * QD1[qx][dy]; -- temp += B(qx, dx) * B(qx, dx) * QD2[qx][dy]; -- } -- Y(dx,dy,0,e) += temp; -- Y(dx,dy,1,e) += temp; -- } -- } -- }); --} -- --template --static void PAVectorDiffusionDiagonal3D(const int NE, -- const Array &b, -- const Array &g, -- const Vector &d, -- Vector &y, -- const int d1d = 0, -- const int q1d = 0) --{ -- constexpr int DIM = 3; -- const int D1D = T_D1D ? T_D1D : d1d; -- const int Q1D = T_Q1D ? T_Q1D : q1d; -- constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -- constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -- MFEM_VERIFY(D1D <= MD1, ""); -- MFEM_VERIFY(Q1D <= MQ1, ""); -- auto B = Reshape(b.Read(), Q1D, D1D); -- auto G = Reshape(g.Read(), Q1D, D1D); -- auto Q = Reshape(d.Read(), Q1D*Q1D*Q1D, 6, NE); -- auto Y = Reshape(y.ReadWrite(), D1D, D1D, D1D, 3, NE); -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- const int D1D = T_D1D ? T_D1D : d1d; -- const int Q1D = T_Q1D ? T_Q1D : q1d; -- constexpr int MD1 = T_D1D ? T_D1D : MAX_D1D; -- constexpr int MQ1 = T_Q1D ? T_Q1D : MAX_Q1D; -- double QQD[MQ1][MQ1][MD1]; -- double QDD[MQ1][MD1][MD1]; -- for (int i = 0; i < DIM; ++i) -+ if (dim == 3 && sdim == 3) - { -- for (int j = 0; j < DIM; ++j) -- { -- // first tensor contraction, along z direction -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int dz = 0; dz < D1D; ++dz) -- { -- QQD[qx][qy][dz] = 0.0; -- for (int qz = 0; qz < Q1D; ++qz) -- { -- const int q = qx + (qy + qz * Q1D) * Q1D; -- const int k = j >= i ? -- 3 - (3-i)*(2-i)/2 + j: -- 3 - (3-j)*(2-j)/2 + i; -- const double O = Q(q,k,e); -- const double Bz = B(qz,dz); -- const double Gz = G(qz,dz); -- const double L = i==2 ? Gz : Bz; -- const double R = j==2 ? Gz : Bz; -- QQD[qx][qy][dz] += L * O * R; -- } -- } -- } -- } -- // second tensor contraction, along y direction -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int dz = 0; dz < D1D; ++dz) -- { -- for (int dy = 0; dy < D1D; ++dy) -- { -- QDD[qx][dy][dz] = 0.0; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- const double By = B(qy,dy); -- const double Gy = G(qy,dy); -- const double L = i==1 ? Gy : By; -- const double R = j==1 ? Gy : By; -- QDD[qx][dy][dz] += L * QQD[qx][qy][dz] * R; -- } -- } -- } -- } -- // third tensor contraction, along x direction -- for (int dz = 0; dz < D1D; ++dz) -- { -- for (int dy = 0; dy < D1D; ++dy) -- { -- for (int dx = 0; dx < D1D; ++dx) -- { -- double temp = 0.0; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- const double Bx = B(qx,dx); -- const double Gx = G(qx,dx); -- const double L = i==0 ? Gx : Bx; -- const double R = j==0 ? Gx : Bx; -- temp += L * QDD[qx][dy][dz] * R; -- } -- Y(dx, dy, dz, 0, e) += temp; -- Y(dx, dy, dz, 1, e) += temp; -- Y(dx, dy, dz, 2, e) += temp; -- } -- } -- } -- } -+ return PAVectorDiffusionApply3D(ne,B,G,Bt,Gt,D,x,y,D1D,Q1D); - } -- }); --} -- --static void PAVectorDiffusionAssembleDiagonal(const int dim, -- const int D1D, -- const int Q1D, -- const int NE, -- const Array &B, -- const Array &G, -- const Vector &op, -- Vector &y) --{ -- if (dim == 2) -- { -- return PAVectorDiffusionDiagonal2D(NE, B, G, op, y, D1D, Q1D); -- } -- else if (dim == 3) -- { -- return PAVectorDiffusionDiagonal3D(NE, B, G, op, y, D1D, Q1D); -- } -- MFEM_ABORT("Dimension not implemented."); --} -- --void VectorDiffusionIntegrator::AssembleDiagonalPA(Vector &diag) --{ -- if (DeviceCanUseCeed()) -- { -- ceedOp->GetDiagonal(diag); -- } -- else -- { -- PAVectorDiffusionAssembleDiagonal(dim, -- dofs1D, -- quad1D, -- ne, -- maps->B, -- maps->G, -- pa_data, -- diag); -+ MFEM_ABORT("Unknown kernel."); - } - } - -diff --git a/fem/bilininteg_divergence.cpp b/fem/integ/bilininteg_vecdiv_pa.cpp -similarity index 94% -rename from fem/bilininteg_divergence.cpp -rename to fem/integ/bilininteg_vecdiv_pa.cpp -index c0102ff5e..63f7a3308 100644 ---- a/fem/bilininteg_divergence.cpp -+++ b/fem/integ/bilininteg_vecdiv_pa.cpp -@@ -9,17 +9,13 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "../general/forall.hpp" --#include "bilininteg.hpp" --#include "gridfunc.hpp" -- --using namespace std; -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" - - namespace mfem - { - --// PA Divergence Integrator -- - // PA Divergence Assemble 2D kernel - static void PADivergenceSetup2D(const int Q1D, - const int NE, -@@ -100,27 +96,6 @@ static void PADivergenceSetup3D(const int Q1D, - }); - } - --static void PADivergenceSetup(const int dim, -- const int TR_D1D, -- const int TE_D1D, -- const int Q1D, -- const int NE, -- const Array &W, -- const Vector &J, -- const double COEFF, -- Vector &op) --{ -- if (dim == 1) { MFEM_ABORT("dim==1 not supported in PADivergenceSetup"); } -- if (dim == 2) -- { -- PADivergenceSetup2D(Q1D, NE, W, J, COEFF, op); -- } -- if (dim == 3) -- { -- PADivergenceSetup3D(Q1D, NE, W, J, COEFF, op); -- } --} -- - void VectorDivergenceIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, - const FiniteElementSpace &test_fes) - { -@@ -147,6 +122,7 @@ void VectorDivergenceIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, - MFEM_ASSERT(quad1D == test_maps->nqpt, - "PA requires test and trial space to have same number of quadrature points!"); - pa_data.SetSize(nq * dimsToStore * ne, Device::GetMemoryType()); -+ - double coeff = 1.0; - if (Q) - { -@@ -154,8 +130,19 @@ void VectorDivergenceIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, - MFEM_VERIFY(cQ != NULL, "only ConstantCoefficient is supported!"); - coeff = cQ->constant; - } -- PADivergenceSetup(dim, trial_dofs1D, test_dofs1D, quad1D, -- ne, ir->GetWeights(), geom->J, coeff, pa_data); -+ -+ if (dim == 1) -+ { -+ MFEM_ABORT("dim==1 not supported in VectorDivergenceIntegrator::AssemblePA"); -+ } -+ else if (dim == 2) -+ { -+ PADivergenceSetup2D(quad1D, ne, ir->GetWeights(), geom->J, coeff, pa_data); -+ } -+ else if (dim == 3) -+ { -+ PADivergenceSetup3D(quad1D, ne, ir->GetWeights(), geom->J, coeff, pa_data); -+ } - } - - // PA Divergence Apply 2D kernel -@@ -1025,45 +1012,37 @@ static void SmemPADivergenceApply3D(const int NE, - }); - } - --static void PADivergenceApply(const int dim, -- const int TR_D1D, -- const int TE_D1D, -- const int Q1D, -- const int NE, -- const Array &B, -- const Array &G, -- const Array &Bt, -- const Vector &op, -- const Vector &x, -- Vector &y, -- bool transpose=false) -+void VectorDivergenceIntegrator::AddMultPA(const Vector &x, Vector &y) const - { - if (dim == 2) - { -- return PADivergenceApply2D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D); -+ return PADivergenceApply2D(ne, trial_maps->B, trial_maps->G, test_maps->Bt, -+ pa_data, x, y, trial_dofs1D, test_dofs1D, quad1D); - } - if (dim == 3) - { -- return PADivergenceApply3D(NE,B,G,Bt,op,x,y,TR_D1D,TE_D1D,Q1D); -+ return PADivergenceApply3D(ne, trial_maps->B, trial_maps->G, test_maps->Bt, -+ pa_data, x, y, trial_dofs1D, test_dofs1D, quad1D); - } - MFEM_ABORT("Unknown kernel."); - } - --// PA Divergence Apply kernel --void VectorDivergenceIntegrator::AddMultPA(const Vector &x, Vector &y) const --{ -- PADivergenceApply(dim, trial_dofs1D, test_dofs1D, quad1D, ne, -- trial_maps->B, trial_maps->G, test_maps->Bt, pa_data, x, y, -- false); --} -- --// PA Divergence Apply kernel - void VectorDivergenceIntegrator::AddMultTransposePA(const Vector &x, - Vector &y) const - { -- PADivergenceApply(dim, trial_dofs1D, test_dofs1D, quad1D, ne, -- trial_maps->Bt, trial_maps->Gt, test_maps->B, pa_data, x, y, -- true); -+ if (dim == 2) -+ { -+ return PADivergenceApplyTranspose2D(ne, trial_maps->Bt, trial_maps->Gt, -+ test_maps->B, pa_data, x, y, -+ trial_dofs1D, test_dofs1D, quad1D); -+ } -+ if (dim == 3) -+ { -+ return PADivergenceApplyTranspose3D(ne, trial_maps->Bt, trial_maps->Gt, -+ test_maps->B, pa_data, x, y, -+ trial_dofs1D, test_dofs1D, quad1D); -+ } -+ MFEM_ABORT("Unknown kernel."); - } - - } // namespace mfem -diff --git a/fem/bilininteg_vecmass_mf.cpp b/fem/integ/bilininteg_vecmass_mf.cpp -similarity index 90% -rename from fem/bilininteg_vecmass_mf.cpp -rename to fem/integ/bilininteg_vecmass_mf.cpp -index 2e8d74491..cc2eb0174 100644 ---- a/fem/bilininteg_vecmass_mf.cpp -+++ b/fem/integ/bilininteg_vecmass_mf.cpp -@@ -9,19 +9,14 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "../general/forall.hpp" --#include "bilininteg.hpp" --#include "gridfunc.hpp" --#include "ceed/integrators/mass/mass.hpp" -- --using namespace std; -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../ceed/integrators/mass/mass.hpp" - - namespace mfem - { - --// MF Mass Integrator -- --// MF Mass Assemble kernel - void VectorMassIntegrator::AssembleMF(const FiniteElementSpace &fes) - { - // Assuming the same element type -diff --git a/fem/bilininteg_vecmass.cpp b/fem/integ/bilininteg_vecmass_pa.cpp -similarity index 88% -rename from fem/bilininteg_vecmass.cpp -rename to fem/integ/bilininteg_vecmass_pa.cpp -index 512cd42d5..b1c20b4c4 100644 ---- a/fem/bilininteg_vecmass.cpp -+++ b/fem/integ/bilininteg_vecmass_pa.cpp -@@ -9,19 +9,14 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "../general/forall.hpp" --#include "bilininteg.hpp" --#include "gridfunc.hpp" --#include "ceed/integrators/mass/mass.hpp" -- --using namespace std; -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../ceed/integrators/mass/mass.hpp" - - namespace mfem - { - --// PA Mass Integrator -- --// PA Mass Assemble kernel - void VectorMassIntegrator::AssemblePA(const FiniteElementSpace &fes) - { - // Assuming the same element type -@@ -83,7 +78,7 @@ void VectorMassIntegrator::AssemblePA(const FiniteElementSpace &fes) - const double J21 = J(q,0,1,e); - const double J22 = J(q,1,1,e); - const double detJ = (J11*J22)-(J21*J12); -- v(q,e) = w[q] * constant * detJ; -+ v(q,e) = w[q] * constant * detJ; - } - }); - } -@@ -111,8 +106,159 @@ void VectorMassIntegrator::AssemblePA(const FiniteElementSpace &fes) - } - } - --template -+template -+static void PAVectorMassAssembleDiagonal2D(const int NE, -+ const Array &B_, -+ const Array &Bt_, -+ const Vector &op_, -+ Vector &diag_, -+ const int d1d = 0, -+ const int q1d = 0) -+{ -+ const int D1D = T_D1D ? T_D1D : d1d; -+ const int Q1D = T_Q1D ? T_Q1D : q1d; -+ constexpr int VDIM = 2; -+ MFEM_VERIFY(D1D <= MAX_D1D, ""); -+ MFEM_VERIFY(Q1D <= MAX_Q1D, ""); -+ auto B = Reshape(B_.Read(), Q1D, D1D); -+ auto op = Reshape(op_.Read(), Q1D, Q1D, NE); -+ auto y = Reshape(diag_.ReadWrite(), D1D, D1D, VDIM, NE); -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ const int D1D = T_D1D ? T_D1D : d1d; -+ const int Q1D = T_Q1D ? T_Q1D : q1d; -+ constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D; -+ constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D; -+ -+ double temp[max_Q1D][max_D1D]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ temp[qx][dy] = 0.0; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ temp[qx][dy] += B(qy, dy) * B(qy, dy) * op(qx, qy, e); -+ } -+ } -+ } -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ double temp1 = 0.0; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ temp1 += B(qx, dx) * B(qx, dx) * temp[qx][dy]; -+ } -+ y(dx, dy, 0, e) = temp1; -+ y(dx, dy, 1, e) = temp1; -+ } -+ } -+ }); -+} -+ -+template -+static void PAVectorMassAssembleDiagonal3D(const int NE, -+ const Array &B_, -+ const Array &Bt_, -+ const Vector &op_, -+ Vector &diag_, -+ const int d1d = 0, -+ const int q1d = 0) -+{ -+ const int D1D = T_D1D ? T_D1D : d1d; -+ const int Q1D = T_Q1D ? T_Q1D : q1d; -+ constexpr int VDIM = 3; -+ MFEM_VERIFY(D1D <= MAX_D1D, ""); -+ MFEM_VERIFY(Q1D <= MAX_Q1D, ""); -+ auto B = Reshape(B_.Read(), Q1D, D1D); -+ auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, NE); -+ auto y = Reshape(diag_.ReadWrite(), D1D, D1D, D1D, VDIM, NE); -+ mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -+ { -+ const int D1D = T_D1D ? T_D1D : d1d; // nvcc workaround -+ const int Q1D = T_Q1D ? T_Q1D : q1d; -+ // the following variables are evaluated at compile time -+ constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D; -+ constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D; -+ -+ double temp[max_Q1D][max_Q1D][max_D1D]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ for (int dz = 0; dz < D1D; ++dz) -+ { -+ temp[qx][qy][dz] = 0.0; -+ for (int qz = 0; qz < Q1D; ++qz) -+ { -+ temp[qx][qy][dz] += B(qz, dz) * B(qz, dz) * op(qx, qy, qz, e); -+ } -+ } -+ } -+ } -+ double temp2[max_Q1D][max_D1D][max_D1D]; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ for (int dz = 0; dz < D1D; ++dz) -+ { -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ temp2[qx][dy][dz] = 0.0; -+ for (int qy = 0; qy < Q1D; ++qy) -+ { -+ temp2[qx][dy][dz] += B(qy, dy) * B(qy, dy) * temp[qx][qy][dz]; -+ } -+ } -+ } -+ } -+ for (int dz = 0; dz < D1D; ++dz) -+ { -+ for (int dy = 0; dy < D1D; ++dy) -+ { -+ for (int dx = 0; dx < D1D; ++dx) -+ { -+ double temp3 = 0.0; -+ for (int qx = 0; qx < Q1D; ++qx) -+ { -+ temp3 += B(qx, dx) * B(qx, dx) -+ * temp2[qx][dy][dz]; -+ } -+ y(dx, dy, dz, 0, e) = temp3; -+ y(dx, dy, dz, 1, e) = temp3; -+ y(dx, dy, dz, 2, e) = temp3; -+ } -+ } -+ } -+ }); -+} -+ -+void VectorMassIntegrator::AssembleDiagonalPA(Vector &diag) -+{ -+ if (DeviceCanUseCeed()) -+ { -+ ceedOp->GetDiagonal(diag); -+ } -+ else -+ { -+ if (dim == 2) -+ { -+ return PAVectorMassAssembleDiagonal2D(ne, maps->B, maps->Bt, -+ pa_data, diag, -+ dofs1D, quad1D); -+ } -+ else if (dim == 3) -+ { -+ return PAVectorMassAssembleDiagonal3D(ne, maps->B, maps->Bt, -+ pa_data, diag, -+ dofs1D, quad1D); -+ } -+ MFEM_ABORT("Dimension not implemented."); -+ } -+} -+ -+template - static void PAVectorMassApply2D(const int NE, - const Array &B_, - const Array &Bt_, -@@ -208,8 +354,7 @@ static void PAVectorMassApply2D(const int NE, - }); - } - --template -+template - static void PAVectorMassApply3D(const int NE, - const Array &B_, - const Array &Bt_, -@@ -354,27 +499,6 @@ static void PAVectorMassApply3D(const int NE, - }); - } - --static void PAVectorMassApply(const int dim, -- const int D1D, -- const int Q1D, -- const int NE, -- const Array &B, -- const Array &Bt, -- const Vector &op, -- const Vector &x, -- Vector &y) --{ -- if (dim == 2) -- { -- return PAVectorMassApply2D(NE, B, Bt, op, x, y, D1D, Q1D); -- } -- if (dim == 3) -- { -- return PAVectorMassApply3D(NE, B, Bt, op, x, y, D1D, Q1D); -- } -- MFEM_ABORT("Unknown kernel."); --} -- - void VectorMassIntegrator::AddMultPA(const Vector &x, Vector &y) const - { - if (DeviceCanUseCeed()) -@@ -383,174 +507,17 @@ void VectorMassIntegrator::AddMultPA(const Vector &x, Vector &y) const - } - else - { -- PAVectorMassApply(dim, dofs1D, quad1D, ne, maps->B, maps->Bt, pa_data, x, y); -- } --} -- --template --static void PAVectorMassAssembleDiagonal2D(const int NE, -- const Array &B_, -- const Array &Bt_, -- const Vector &op_, -- Vector &diag_, -- const int d1d = 0, -- const int q1d = 0) --{ -- const int D1D = T_D1D ? T_D1D : d1d; -- const int Q1D = T_Q1D ? T_Q1D : q1d; -- constexpr int VDIM = 2; -- MFEM_VERIFY(D1D <= MAX_D1D, ""); -- MFEM_VERIFY(Q1D <= MAX_Q1D, ""); -- auto B = Reshape(B_.Read(), Q1D, D1D); -- auto op = Reshape(op_.Read(), Q1D, Q1D, NE); -- auto y = Reshape(diag_.ReadWrite(), D1D, D1D, VDIM, NE); -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- const int D1D = T_D1D ? T_D1D : d1d; -- const int Q1D = T_Q1D ? T_Q1D : q1d; -- constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D; -- constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D; -- -- double temp[max_Q1D][max_D1D]; -- for (int qx = 0; qx < Q1D; ++qx) -+ if (dim == 2) - { -- for (int dy = 0; dy < D1D; ++dy) -- { -- temp[qx][dy] = 0.0; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- temp[qx][dy] += B(qy, dy) * B(qy, dy) * op(qx, qy, e); -- } -- } -+ return PAVectorMassApply2D(ne, maps->B, maps->Bt, pa_data, x, y, -+ dofs1D, quad1D); - } -- for (int dy = 0; dy < D1D; ++dy) -+ if (dim == 3) - { -- for (int dx = 0; dx < D1D; ++dx) -- { -- double temp1 = 0.0; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- temp1 += B(qx, dx) * B(qx, dx) * temp[qx][dy]; -- } -- y(dx, dy, 0, e) = temp1; -- y(dx, dy, 1, e) = temp1; -- } -+ return PAVectorMassApply3D(ne, maps->B, maps->Bt, pa_data, x, y, -+ dofs1D, quad1D); - } -- }); --} -- --template --static void PAVectorMassAssembleDiagonal3D(const int NE, -- const Array &B_, -- const Array &Bt_, -- const Vector &op_, -- Vector &diag_, -- const int d1d = 0, -- const int q1d = 0) --{ -- const int D1D = T_D1D ? T_D1D : d1d; -- const int Q1D = T_Q1D ? T_Q1D : q1d; -- constexpr int VDIM = 3; -- MFEM_VERIFY(D1D <= MAX_D1D, ""); -- MFEM_VERIFY(Q1D <= MAX_Q1D, ""); -- auto B = Reshape(B_.Read(), Q1D, D1D); -- auto op = Reshape(op_.Read(), Q1D, Q1D, Q1D, NE); -- auto y = Reshape(diag_.ReadWrite(), D1D, D1D, D1D, VDIM, NE); -- mfem::forall(NE, [=] MFEM_HOST_DEVICE (int e) -- { -- const int D1D = T_D1D ? T_D1D : d1d; // nvcc workaround -- const int Q1D = T_Q1D ? T_Q1D : q1d; -- // the following variables are evaluated at compile time -- constexpr int max_D1D = T_D1D ? T_D1D : MAX_D1D; -- constexpr int max_Q1D = T_Q1D ? T_Q1D : MAX_Q1D; -- -- double temp[max_Q1D][max_Q1D][max_D1D]; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int qy = 0; qy < Q1D; ++qy) -- { -- for (int dz = 0; dz < D1D; ++dz) -- { -- temp[qx][qy][dz] = 0.0; -- for (int qz = 0; qz < Q1D; ++qz) -- { -- temp[qx][qy][dz] += B(qz, dz) * B(qz, dz) * op(qx, qy, qz, e); -- } -- } -- } -- } -- double temp2[max_Q1D][max_D1D][max_D1D]; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- for (int dz = 0; dz < D1D; ++dz) -- { -- for (int dy = 0; dy < D1D; ++dy) -- { -- temp2[qx][dy][dz] = 0.0; -- for (int qy = 0; qy < Q1D; ++qy) -- { -- temp2[qx][dy][dz] += B(qy, dy) * B(qy, dy) * temp[qx][qy][dz]; -- } -- } -- } -- } -- for (int dz = 0; dz < D1D; ++dz) -- { -- for (int dy = 0; dy < D1D; ++dy) -- { -- for (int dx = 0; dx < D1D; ++dx) -- { -- double temp3 = 0.0; -- for (int qx = 0; qx < Q1D; ++qx) -- { -- temp3 += B(qx, dx) * B(qx, dx) -- * temp2[qx][dy][dz]; -- } -- y(dx, dy, dz, 0, e) = temp3; -- y(dx, dy, dz, 1, e) = temp3; -- y(dx, dy, dz, 2, e) = temp3; -- } -- } -- } -- }); --} -- --static void PAVectorMassAssembleDiagonal(const int dim, -- const int D1D, -- const int Q1D, -- const int NE, -- const Array &B, -- const Array &Bt, -- const Vector &op, -- Vector &y) --{ -- if (dim == 2) -- { -- return PAVectorMassAssembleDiagonal2D(NE, B, Bt, op, y, D1D, Q1D); -- } -- else if (dim == 3) -- { -- return PAVectorMassAssembleDiagonal3D(NE, B, Bt, op, y, D1D, Q1D); -- } -- MFEM_ABORT("Dimension not implemented."); --} -- --void VectorMassIntegrator::AssembleDiagonalPA(Vector &diag) --{ -- if (DeviceCanUseCeed()) -- { -- ceedOp->GetDiagonal(diag); -- } -- else -- { -- PAVectorMassAssembleDiagonal(dim, -- dofs1D, -- quad1D, -- ne, -- maps->B, -- maps->Bt, -- pa_data, -- diag); -+ MFEM_ABORT("Unknown kernel."); - } - } - -diff --git a/fem/integ/bilininteg_vectorfediv_pa.cpp b/fem/integ/bilininteg_vectorfediv_pa.cpp -new file mode 100644 -index 000000000..2915a253b ---- /dev/null -+++ b/fem/integ/bilininteg_vectorfediv_pa.cpp -@@ -0,0 +1,157 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#include "../../general/forall.hpp" -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../qfunction.hpp" -+#include "bilininteg_hdiv_kernels.hpp" -+ -+using namespace std; -+ -+namespace mfem -+{ -+ -+void -+VectorFEDivergenceIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) -+{ -+ // Assumes tensor-product elements, with a vector test space and -+ // scalar trial space. -+ Mesh *mesh = trial_fes.GetMesh(); -+ const FiniteElement *trial_fel = trial_fes.GetFE(0); -+ const FiniteElement *test_fel = test_fes.GetFE(0); -+ -+ const VectorTensorFiniteElement *trial_el = -+ dynamic_cast(trial_fel); -+ MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!"); -+ -+ const NodalTensorFiniteElement *test_el = -+ dynamic_cast(test_fel); -+ MFEM_VERIFY(test_el != NULL, "Only NodalTensorFiniteElement is supported!"); -+ -+ const IntegrationRule *ir = IntRule ? IntRule : &MassIntegrator::GetRule( -+ *trial_el, *trial_el, -+ *mesh->GetElementTransformation(0)); -+ -+ const int dims = trial_el->GetDim(); -+ MFEM_VERIFY(dims == 2 || dims == 3, ""); -+ -+ const int nq = ir->GetNPoints(); -+ dim = mesh->Dimension(); -+ MFEM_VERIFY(dim == 2 || dim == 3, ""); -+ -+ MFEM_VERIFY(trial_el->GetOrder() == test_el->GetOrder() + 1, ""); -+ -+ ne = trial_fes.GetNE(); -+ mapsC = &trial_el->GetDofToQuad(*ir, DofToQuad::TENSOR); -+ mapsO = &trial_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); -+ dofs1D = mapsC->ndof; -+ quad1D = mapsC->nqpt; -+ -+ L2mapsO = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR); -+ L2dofs1D = L2mapsO->ndof; -+ -+ MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, ""); -+ if (dim == 2) -+ { -+ MFEM_VERIFY(nq == quad1D * quad1D, ""); -+ } -+ else -+ { -+ MFEM_VERIFY(nq == quad1D * quad1D * quad1D, ""); -+ } -+ -+ pa_data.SetSize(nq * ne, Device::GetMemoryType()); -+ -+ QuadratureSpace qs(*mesh, *ir); -+ CoefficientVector coeff(Q, qs, CoefficientStorage::FULL); -+ -+ if (test_el->GetMapType() == FiniteElement::INTEGRAL) -+ { -+ const GeometricFactors *geom = -+ mesh->GetGeometricFactors(*ir, GeometricFactors::DETERMINANTS); -+ coeff /= geom->detJ; -+ } -+ -+ if (trial_el->GetDerivType() == mfem::FiniteElement::DIV && dim == 3) -+ { -+ internal::PAHdivL2Setup3D(quad1D, ne, ir->GetWeights(), coeff, pa_data); -+ } -+ else if (trial_el->GetDerivType() == mfem::FiniteElement::DIV && dim == 2) -+ { -+ internal::PAHdivL2Setup2D(quad1D, ne, ir->GetWeights(), coeff, pa_data); -+ } -+ else -+ { -+ MFEM_ABORT("Unknown kernel."); -+ } -+} -+ -+void VectorFEDivergenceIntegrator::AssembleDiagonalPA_ADAt(const Vector &D, -+ Vector &diag) -+{ -+ if (dim == 3) -+ { -+ internal::PAHdivL2AssembleDiagonal_ADAt_3D(dofs1D, quad1D, L2dofs1D, ne, -+ L2mapsO->B, -+ mapsC->Gt, mapsO->Bt, pa_data, D, diag); -+ } -+ else if (dim == 2) -+ { -+ internal::PAHdivL2AssembleDiagonal_ADAt_2D(dofs1D, quad1D, L2dofs1D, ne, -+ L2mapsO->B, -+ mapsC->Gt, mapsO->Bt, pa_data, D, diag); -+ } -+ else -+ { -+ MFEM_ABORT("Unsupported dimension!"); -+ } -+} -+ -+void VectorFEDivergenceIntegrator::AddMultPA(const Vector &x, Vector &y) const -+{ -+ if (dim == 3) -+ { -+ internal::PAHdivL2Apply3D(dofs1D, quad1D, L2dofs1D, ne, mapsO->B, mapsC->G, -+ L2mapsO->Bt, pa_data, x, y); -+ } -+ else if (dim == 2) -+ { -+ internal::PAHdivL2Apply2D(dofs1D, quad1D, L2dofs1D, ne, mapsO->B, mapsC->G, -+ L2mapsO->Bt, pa_data, x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Unsupported dimension!"); -+ } -+} -+ -+void VectorFEDivergenceIntegrator::AddMultTransposePA(const Vector &x, -+ Vector &y) const -+{ -+ if (dim == 3) -+ { -+ internal::PAHdivL2ApplyTranspose3D(dofs1D, quad1D, L2dofs1D, ne, L2mapsO->B, -+ mapsC->Gt, mapsO->Bt, pa_data, x, y); -+ } -+ else if (dim == 2) -+ { -+ internal::PAHdivL2ApplyTranspose2D(dofs1D, quad1D, L2dofs1D, ne, L2mapsO->B, -+ mapsC->Gt, mapsO->Bt, pa_data, x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Unsupported dimension!"); -+ } -+} -+ -+} // namespace mfem -diff --git a/fem/integ/bilininteg_vectorfemass_pa.cpp b/fem/integ/bilininteg_vectorfemass_pa.cpp -new file mode 100644 -index 000000000..c07e9f816 ---- /dev/null -+++ b/fem/integ/bilininteg_vectorfemass_pa.cpp -@@ -0,0 +1,346 @@ -+// Copyright (c) 2010-2023, Lawrence Livermore National Security, LLC. Produced -+// at the Lawrence Livermore National Laboratory. All Rights reserved. See files -+// LICENSE and NOTICE for details. LLNL-CODE-806117. -+// -+// This file is part of the MFEM library. For more information and source code -+// availability visit https://mfem.org. -+// -+// MFEM is free software; you can redistribute it and/or modify it under the -+// terms of the BSD-3 license. We welcome feedback and contributions, see file -+// CONTRIBUTING.md for details. -+ -+#include "../bilininteg.hpp" -+#include "../gridfunc.hpp" -+#include "../qfunction.hpp" -+#include "bilininteg_diffusion_kernels.hpp" -+#include "bilininteg_hcurl_kernels.hpp" -+#include "bilininteg_hdiv_kernels.hpp" -+#include "bilininteg_hcurlhdiv_kernels.hpp" -+ -+namespace mfem -+{ -+ -+void VectorFEMassIntegrator::AssemblePA(const FiniteElementSpace &trial_fes, -+ const FiniteElementSpace &test_fes) -+{ -+ // Assumes tensor-product elements -+ Mesh *mesh = trial_fes.GetMesh(); -+ -+ const FiniteElement *trial_fel = trial_fes.GetFE(0); -+ const VectorTensorFiniteElement *trial_el = -+ dynamic_cast(trial_fel); -+ MFEM_VERIFY(trial_el != NULL, "Only VectorTensorFiniteElement is supported!"); -+ -+ const FiniteElement *test_fel = test_fes.GetFE(0); -+ const VectorTensorFiniteElement *test_el = -+ dynamic_cast(test_fel); -+ MFEM_VERIFY(test_el != NULL, "Only VectorTensorFiniteElement is supported!"); -+ -+ const IntegrationRule *ir -+ = IntRule ? IntRule : &MassIntegrator::GetRule(*trial_el, *trial_el, -+ *mesh->GetElementTransformation(0)); -+ const int dims = trial_el->GetDim(); -+ MFEM_VERIFY(dims == 2 || dims == 3, ""); -+ -+ const int symmDims = (dims * (dims + 1)) / 2; // 1x1: 1, 2x2: 3, 3x3: 6 -+ nq = ir->GetNPoints(); -+ dim = mesh->Dimension(); -+ MFEM_VERIFY(dim == 2 || dim == 3, ""); -+ -+ ne = trial_fes.GetNE(); -+ MFEM_VERIFY(ne == test_fes.GetNE(), -+ "Different meshes for test and trial spaces"); -+ geom = mesh->GetGeometricFactors(*ir, GeometricFactors::JACOBIANS); -+ mapsC = &trial_el->GetDofToQuad(*ir, DofToQuad::TENSOR); -+ mapsO = &trial_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); -+ dofs1D = mapsC->ndof; -+ quad1D = mapsC->nqpt; -+ -+ mapsCtest = &test_el->GetDofToQuad(*ir, DofToQuad::TENSOR); -+ mapsOtest = &test_el->GetDofToQuadOpen(*ir, DofToQuad::TENSOR); -+ dofs1Dtest = mapsCtest->ndof; -+ -+ MFEM_VERIFY(dofs1D == mapsO->ndof + 1 && quad1D == mapsO->nqpt, ""); -+ -+ trial_fetype = trial_el->GetDerivType(); -+ test_fetype = test_el->GetDerivType(); -+ -+ const bool trial_curl = (trial_fetype == mfem::FiniteElement::CURL); -+ const bool trial_div = (trial_fetype == mfem::FiniteElement::DIV); -+ const bool test_curl = (test_fetype == mfem::FiniteElement::CURL); -+ const bool test_div = (test_fetype == mfem::FiniteElement::DIV); -+ -+ QuadratureSpace qs(*mesh, *ir); -+ CoefficientVector coeff(qs, CoefficientStorage::SYMMETRIC); -+ if (Q) { coeff.Project(*Q); } -+ else if (MQ) { coeff.ProjectTranspose(*MQ); } -+ else if (DQ) { coeff.Project(*DQ); } -+ else { coeff.SetConstant(1.0); } -+ -+ const int coeff_dim = coeff.GetVDim(); -+ symmetric = (coeff_dim != dim*dim); -+ -+ if ((trial_curl && test_div) || (trial_div && test_curl)) -+ { -+ pa_data.SetSize((coeff_dim == 1 ? 1 : dim*dim) * nq * ne, -+ Device::GetMemoryType()); -+ } -+ else -+ { -+ pa_data.SetSize((symmetric ? symmDims : dims*dims) * nq * ne, -+ Device::GetMemoryType()); -+ } -+ if (trial_curl && test_curl && dim == 3) -+ { -+ internal::PADiffusionSetup3D(quad1D, coeff_dim, ne, ir->GetWeights(), geom->J, -+ coeff, pa_data); -+ } -+ else if (trial_curl && test_curl && dim == 2) -+ { -+ internal::PADiffusionSetup2D<2>(quad1D, coeff_dim, ne, ir->GetWeights(), -+ geom->J, coeff, pa_data); -+ } -+ else if (trial_div && test_div && dim == 3) -+ { -+ internal::PAHdivMassSetup3D(quad1D, coeff_dim, ne, ir->GetWeights(), geom->J, -+ coeff, pa_data); -+ } -+ else if (trial_div && test_div && dim == 2) -+ { -+ internal::PAHdivMassSetup2D(quad1D, coeff_dim, ne, ir->GetWeights(), geom->J, -+ coeff, pa_data); -+ } -+ else if (((trial_curl && test_div) || (trial_div && test_curl)) && -+ test_fel->GetOrder() == trial_fel->GetOrder()) -+ { -+ if (coeff_dim == 1) -+ { -+ internal::PAHcurlL2Setup3D(nq, coeff_dim, ne, ir->GetWeights(), coeff, pa_data); -+ } -+ else -+ { -+ const bool tr = (trial_div && test_curl); -+ if (dim == 3) -+ { -+ internal::PAHcurlHdivMassSetup3D(quad1D, coeff_dim, ne, tr, ir->GetWeights(), -+ geom->J, coeff, pa_data); -+ } -+ else -+ { -+ internal::PAHcurlHdivMassSetup2D(quad1D, coeff_dim, ne, tr, ir->GetWeights(), -+ geom->J, coeff, pa_data); -+ } -+ } -+ } -+ else -+ { -+ MFEM_ABORT("Unknown kernel."); -+ } -+} -+ -+void VectorFEMassIntegrator::AssembleDiagonalPA(Vector& diag) -+{ -+ if (dim == 3) -+ { -+ if (trial_fetype == mfem::FiniteElement::CURL && test_fetype == trial_fetype) -+ { -+ if (Device::Allows(Backend::DEVICE_MASK)) -+ { -+ const int ID = (dofs1D << 4) | quad1D; -+ switch (ID) -+ { -+ case 0x23: -+ return internal::SmemPAHcurlMassAssembleDiagonal3D<2,3>( -+ dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, pa_data, diag); -+ case 0x34: -+ return internal::SmemPAHcurlMassAssembleDiagonal3D<3,4>( -+ dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, pa_data, diag); -+ case 0x45: -+ return internal::SmemPAHcurlMassAssembleDiagonal3D<4,5>( -+ dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, pa_data, diag); -+ case 0x56: -+ return internal::SmemPAHcurlMassAssembleDiagonal3D<5,6>( -+ dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, pa_data, diag); -+ default: -+ return internal::SmemPAHcurlMassAssembleDiagonal3D( -+ dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, pa_data, diag); -+ } -+ } -+ else -+ { -+ internal::PAHcurlMassAssembleDiagonal3D(dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, pa_data, diag); -+ } -+ } -+ else if (trial_fetype == mfem::FiniteElement::DIV && -+ test_fetype == trial_fetype) -+ { -+ internal::PAHdivMassAssembleDiagonal3D(dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, pa_data, diag); -+ } -+ else -+ { -+ MFEM_ABORT("Unknown kernel."); -+ } -+ } -+ else // 2D -+ { -+ if (trial_fetype == mfem::FiniteElement::CURL && test_fetype == trial_fetype) -+ { -+ internal::PAHcurlMassAssembleDiagonal2D(dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, pa_data, diag); -+ } -+ else if (trial_fetype == mfem::FiniteElement::DIV && -+ test_fetype == trial_fetype) -+ { -+ internal::PAHdivMassAssembleDiagonal2D(dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, pa_data, diag); -+ } -+ else -+ { -+ MFEM_ABORT("Unknown kernel."); -+ } -+ } -+} -+ -+void VectorFEMassIntegrator::AddMultPA(const Vector &x, Vector &y) const -+{ -+ const bool trial_curl = (trial_fetype == mfem::FiniteElement::CURL); -+ const bool trial_div = (trial_fetype == mfem::FiniteElement::DIV); -+ const bool test_curl = (test_fetype == mfem::FiniteElement::CURL); -+ const bool test_div = (test_fetype == mfem::FiniteElement::DIV); -+ -+ if (dim == 3) -+ { -+ if (trial_curl && test_curl) -+ { -+ if (Device::Allows(Backend::DEVICE_MASK)) -+ { -+ const int ID = (dofs1D << 4) | quad1D; -+ switch (ID) -+ { -+ case 0x23: -+ return internal::SmemPAHcurlMassApply3D<2,3>( -+ dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, mapsO->Bt, -+ mapsC->Bt, pa_data, x, y); -+ case 0x34: -+ return internal::SmemPAHcurlMassApply3D<3,4>( -+ dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, mapsO->Bt, -+ mapsC->Bt, pa_data, x, y); -+ case 0x45: -+ return internal::SmemPAHcurlMassApply3D<4,5>( -+ dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, mapsO->Bt, -+ mapsC->Bt, pa_data, x, y); -+ case 0x56: -+ return internal::SmemPAHcurlMassApply3D<5,6>( -+ dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, mapsO->Bt, -+ mapsC->Bt, pa_data, x, y); -+ default: -+ return internal::SmemPAHcurlMassApply3D( -+ dofs1D, quad1D, ne, symmetric, -+ mapsO->B, mapsC->B, mapsO->Bt, -+ mapsC->Bt, pa_data, x, y); -+ } -+ } -+ else -+ { -+ internal::PAHcurlMassApply3D(dofs1D, quad1D, ne, symmetric, mapsO->B, mapsC->B, -+ mapsO->Bt, mapsC->Bt, pa_data, x, y); -+ } -+ } -+ else if (trial_div && test_div) -+ { -+ internal::PAHdivMassApply(3, dofs1D, quad1D, ne, symmetric, mapsO->B, mapsC->B, -+ mapsO->Bt, mapsC->Bt, pa_data, x, y); -+ } -+ else if (trial_curl && test_div) -+ { -+ const bool scalarCoeff = !(DQ || MQ); -+ internal::PAHcurlHdivMassApply3D(dofs1D, dofs1Dtest, quad1D, ne, scalarCoeff, -+ true, false, mapsO->B, mapsC->B, mapsOtest->Bt, -+ mapsCtest->Bt, pa_data, x, y); -+ } -+ else if (trial_div && test_curl) -+ { -+ const bool scalarCoeff = !(DQ || MQ); -+ internal::PAHcurlHdivMassApply3D(dofs1D, dofs1Dtest, quad1D, ne, scalarCoeff, -+ false, false, mapsO->B, mapsC->B, mapsOtest->Bt, -+ mapsCtest->Bt, pa_data, x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Unknown kernel."); -+ } -+ } -+ else // 2D -+ { -+ if (trial_curl && test_curl) -+ { -+ internal::PAHcurlMassApply2D(dofs1D, quad1D, ne, symmetric, mapsO->B, mapsC->B, -+ mapsO->Bt, mapsC->Bt, pa_data, x, y); -+ } -+ else if (trial_div && test_div) -+ { -+ internal::PAHdivMassApply(2, dofs1D, quad1D, ne, symmetric, mapsO->B, mapsC->B, -+ mapsO->Bt, -+ mapsC->Bt, pa_data, x, y); -+ } -+ else if ((trial_curl && test_div) || (trial_div && test_curl)) -+ { -+ const bool scalarCoeff = !(DQ || MQ); -+ internal::PAHcurlHdivMassApply2D(dofs1D, dofs1Dtest, quad1D, ne, scalarCoeff, -+ trial_curl, false, mapsO->B, mapsC->B, -+ mapsOtest->Bt, mapsCtest->Bt, pa_data, x, y); -+ } -+ else -+ { -+ MFEM_ABORT("Unknown kernel."); -+ } -+ } -+} -+ -+void VectorFEMassIntegrator::AddMultTransposePA(const Vector &x, -+ Vector &y) const -+{ -+ const bool trial_curl = (trial_fetype == mfem::FiniteElement::CURL); -+ const bool trial_div = (trial_fetype == mfem::FiniteElement::DIV); -+ const bool test_curl = (test_fetype == mfem::FiniteElement::CURL); -+ const bool test_div = (test_fetype == mfem::FiniteElement::DIV); -+ -+ bool symmetricSpaces = true; -+ if (dim == 3 && ((trial_div && test_curl) || (trial_curl && test_div))) -+ { -+ const bool scalarCoeff = !(DQ || MQ); -+ internal::PAHcurlHdivMassApply3D(dofs1D, dofs1Dtest, quad1D, ne, scalarCoeff, -+ trial_div, true, mapsO->B, mapsC->B, -+ mapsOtest->Bt, mapsCtest->Bt, pa_data, x, y); -+ symmetricSpaces = false; -+ } -+ else if (dim == 2 && ((trial_curl && test_div) || (trial_div && test_curl))) -+ { -+ const bool scalarCoeff = !(DQ || MQ); -+ internal::PAHcurlHdivMassApply2D(dofs1D, dofs1Dtest, quad1D, ne, scalarCoeff, -+ !trial_curl, true, mapsO->B, mapsC->B, -+ mapsOtest->Bt, mapsCtest->Bt, pa_data, x, y); -+ symmetricSpaces = false; -+ } -+ if (symmetricSpaces) -+ { -+ if (MQ && dynamic_cast(MQ) == NULL) -+ { -+ MFEM_ABORT("VectorFEMassIntegrator transpose not implemented for asymmetric MatrixCoefficient"); -+ } -+ AddMultPA(x, y); -+ } -+} -+ -+} // namespace mfem -diff --git a/fem/lininteg_boundary.cpp b/fem/integ/lininteg_boundary.cpp -similarity index 89% -rename from fem/lininteg_boundary.cpp -rename to fem/integ/lininteg_boundary.cpp -index 68e54dd1b..9b785335c 100644 ---- a/fem/lininteg_boundary.cpp -+++ b/fem/integ/lininteg_boundary.cpp -@@ -9,18 +9,19 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "fem.hpp" --#include "../fem/kernels.hpp" --#include "../general/forall.hpp" -+#include "../../general/forall.hpp" -+#include "../../fem/kernels.hpp" -+#include "../fem.hpp" - - namespace mfem - { - --template static --void BLFEvalAssemble2D(const int vdim, const int nbe, const int d, const int q, -- const bool normals, const int *markers, const double *b, -- const double *detj, const double *n, const double *weights, -- const Vector &coeff, double *y) -+template -+static void BLFEvalAssemble2D(const int vdim, const int nbe, const int d, -+ const int q, -+ const bool normals, const int *markers, const double *b, -+ const double *detj, const double *n, const double *weights, -+ const Vector &coeff, double *y) - { - const auto F = coeff.Read(); - const auto M = Reshape(markers, nbe); -@@ -69,11 +70,12 @@ void BLFEvalAssemble2D(const int vdim, const int nbe, const int d, const int q, - }); - } - --template static --void BLFEvalAssemble3D(const int vdim, const int nbe, const int d, const int q, -- const bool normals, const int *markers, const double *b, -- const double *detj, const double *n, const double *weights, -- const Vector &coeff, double *y) -+template -+static void BLFEvalAssemble3D(const int vdim, const int nbe, const int d, -+ const int q, -+ const bool normals, const int *markers, const double *b, -+ const double *detj, const double *n, const double *weights, -+ const Vector &coeff, double *y) - { - const auto F = coeff.Read(); - const auto M = Reshape(markers, nbe); -diff --git a/fem/lininteg_boundary_flux.cpp b/fem/integ/lininteg_boundary_flux.cpp -similarity index 89% -rename from fem/lininteg_boundary_flux.cpp -rename to fem/integ/lininteg_boundary_flux.cpp -index 411ba0314..b9f047817 100644 ---- a/fem/lininteg_boundary_flux.cpp -+++ b/fem/integ/lininteg_boundary_flux.cpp -@@ -9,17 +9,17 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "fem.hpp" --#include "../fem/kernels.hpp" --#include "../general/forall.hpp" -+#include "../../general/forall.hpp" -+#include "../../fem/kernels.hpp" -+#include "../fem.hpp" - - namespace mfem - { - --template static --void BFLFEvalAssemble2D(const int nbe, const int d, const int q, -- const int *markers, const double *b, -- const double *weights, const Vector &coeff, double *y) -+template -+static void BFLFEvalAssemble2D(const int nbe, const int d, const int q, -+ const int *markers, const double *b, -+ const double *weights, const Vector &coeff, double *y) - { - const auto F = coeff.Read(); - const auto M = Reshape(markers, nbe); -@@ -50,10 +50,10 @@ void BFLFEvalAssemble2D(const int nbe, const int d, const int q, - }); - } - --template static --void BFLFEvalAssemble3D(const int nbe, const int d, const int q, -- const int *markers, const double *b, -- const double *weights, const Vector &coeff, double *y) -+template -+static void BFLFEvalAssemble3D(const int nbe, const int d, const int q, -+ const int *markers, const double *b, -+ const double *weights, const Vector &coeff, double *y) - { - const auto F = coeff.Read(); - const auto M = Reshape(markers, nbe); -diff --git a/fem/lininteg_domain.cpp b/fem/integ/lininteg_domain.cpp -similarity index 91% -rename from fem/lininteg_domain.cpp -rename to fem/integ/lininteg_domain.cpp -index 6fa0ec82b..6ff7b090d 100644 ---- a/fem/lininteg_domain.cpp -+++ b/fem/integ/lininteg_domain.cpp -@@ -9,18 +9,19 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "fem.hpp" --#include "../fem/kernels.hpp" --#include "../general/forall.hpp" -+#include "../../general/forall.hpp" -+#include "../../fem/kernels.hpp" -+#include "../fem.hpp" - - namespace mfem - { - --template static --void DLFEvalAssemble2D(const int vdim, const int ne, const int d, const int q, -- const int map_type, const int *markers, const double *b, -- const double *detj, const double *weights, -- const Vector &coeff, double *y) -+template -+static void DLFEvalAssemble2D(const int vdim, const int ne, const int d, -+ const int q, -+ const int map_type, const int *markers, const double *b, -+ const double *detj, const double *weights, -+ const Vector &coeff, double *y) - { - const auto F = coeff.Read(); - const auto M = Reshape(markers, ne); -@@ -85,11 +86,12 @@ void DLFEvalAssemble2D(const int vdim, const int ne, const int d, const int q, - }); - } - --template static --void DLFEvalAssemble3D(const int vdim, const int ne, const int d, const int q, -- const int map_type, const int *markers, const double *b, -- const double *detj, const double *weights, -- const Vector &coeff, double *y) -+template -+static void DLFEvalAssemble3D(const int vdim, const int ne, const int d, -+ const int q, -+ const int map_type, const int *markers, const double *b, -+ const double *detj, const double *weights, -+ const Vector &coeff, double *y) - { - const auto F = coeff.Read(); - const auto M = Reshape(markers, ne); -diff --git a/fem/lininteg_domain_grad.cpp b/fem/integ/lininteg_domain_grad.cpp -similarity index 93% -rename from fem/lininteg_domain_grad.cpp -rename to fem/integ/lininteg_domain_grad.cpp -index 16131e11c..5cca01a1d 100644 ---- a/fem/lininteg_domain_grad.cpp -+++ b/fem/integ/lininteg_domain_grad.cpp -@@ -9,18 +9,19 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "fem.hpp" --#include "../fem/kernels.hpp" --#include "../general/forall.hpp" -+#include "../../general/forall.hpp" -+#include "../../fem/kernels.hpp" -+#include "../fem.hpp" - - namespace mfem - { - --template static --void DLFGradAssemble2D(const int vdim, const int ne, const int d, const int q, -- const int *markers, const double *b, const double *g, -- const double *jacobians, -- const double *weights, const Vector &coeff, double *y) -+template -+static void DLFGradAssemble2D(const int vdim, const int ne, const int d, -+ const int q, -+ const int *markers, const double *b, const double *g, -+ const double *jacobians, -+ const double *weights, const Vector &coeff, double *y) - { - const auto F = coeff.Read(); - const auto M = Reshape(markers, ne); -@@ -108,12 +109,13 @@ void DLFGradAssemble2D(const int vdim, const int ne, const int d, const int q, - }); - } - --template static --void DLFGradAssemble3D(const int vdim, const int ne, const int d, const int q, -- const int *markers, const double *b, const double *g, -- const double *jacobians, -- const double *weights, const Vector &coeff, -- double *output) -+template -+static void DLFGradAssemble3D(const int vdim, const int ne, const int d, -+ const int q, -+ const int *markers, const double *b, const double *g, -+ const double *jacobians, -+ const double *weights, const Vector &coeff, -+ double *output) - { - const auto F = coeff.Read(); - const auto M = Reshape(markers, ne); -diff --git a/fem/lininteg_vectorfe_domain.cpp b/fem/integ/lininteg_domain_vectorfe.cpp -similarity index 99% -rename from fem/lininteg_vectorfe_domain.cpp -rename to fem/integ/lininteg_domain_vectorfe.cpp -index 55a3dda7e..16d9e866c 100644 ---- a/fem/lininteg_vectorfe_domain.cpp -+++ b/fem/integ/lininteg_domain_vectorfe.cpp -@@ -9,9 +9,9 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "fem.hpp" --#include "../fem/kernels.hpp" --#include "../general/forall.hpp" -+#include "../../general/forall.hpp" -+#include "../../fem/kernels.hpp" -+#include "../fem.hpp" - - namespace mfem - { -diff --git a/fem/nonlininteg_vectorconvection_mf.cpp b/fem/integ/nonlininteg_vecconvection_mf.cpp -similarity index 92% -rename from fem/nonlininteg_vectorconvection_mf.cpp -rename to fem/integ/nonlininteg_vecconvection_mf.cpp -index c29f4e920..4005d6836 100644 ---- a/fem/nonlininteg_vectorconvection_mf.cpp -+++ b/fem/integ/nonlininteg_vecconvection_mf.cpp -@@ -9,14 +9,13 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "../general/forall.hpp" --#include "nonlininteg.hpp" --#include "ceed/integrators/nlconvection/nlconvection.hpp" -- --using namespace std; -+#include "../../general/forall.hpp" -+#include "../nonlininteg.hpp" -+#include "../ceed/integrators/nlconvection/nlconvection.hpp" - - namespace mfem - { -+ - void VectorConvectionNLFIntegrator::AssembleMF(const FiniteElementSpace &fes) - { - MFEM_ASSERT(fes.GetOrdering() == Ordering::byNODES, -diff --git a/fem/nonlininteg_vectorconvection.cpp b/fem/integ/nonlininteg_vecconvection_pa.cpp -similarity index 99% -rename from fem/nonlininteg_vectorconvection.cpp -rename to fem/integ/nonlininteg_vecconvection_pa.cpp -index efa7a10a3..7bed31800 100644 ---- a/fem/nonlininteg_vectorconvection.cpp -+++ b/fem/integ/nonlininteg_vecconvection_pa.cpp -@@ -9,14 +9,13 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#include "../general/forall.hpp" --#include "nonlininteg.hpp" --#include "ceed/integrators/nlconvection/nlconvection.hpp" -- --using namespace std; -+#include "../../general/forall.hpp" -+#include "../nonlininteg.hpp" -+#include "../ceed/integrators/nlconvection/nlconvection.hpp" - - namespace mfem - { -+ - void VectorConvectionNLFIntegrator::AssemblePA(const FiniteElementSpace &fes) - { - MFEM_ASSERT(fes.GetOrdering() == Ordering::byNODES, -diff --git a/fem/intrules.cpp b/fem/intrules.cpp -index 67ab66320..1494043d6 100644 ---- a/fem/intrules.cpp -+++ b/fem/intrules.cpp -@@ -946,22 +946,14 @@ const IntegrationRule &IntegrationRules::Get(int GeomType, int Order) - - if (!HaveIntRule(*ir_array, Order)) - { --#ifdef MFEM_USE_LEGACY_OPENMP -- #pragma omp critical --#endif -+ IntegrationRule *ir = GenerateIntegrationRule(GeomType, Order); -+ int RealOrder = Order; -+ while (RealOrder+1 < ir_array->Size() && -+ (*ir_array)[RealOrder+1] == ir) - { -- if (!HaveIntRule(*ir_array, Order)) -- { -- IntegrationRule *ir = GenerateIntegrationRule(GeomType, Order); -- int RealOrder = Order; -- while (RealOrder+1 < ir_array->Size() && -- (*ir_array)[RealOrder+1] == ir) -- { -- RealOrder++; -- } -- ir->SetOrder(RealOrder); -- } -+ RealOrder++; - } -+ ir->SetOrder(RealOrder); - } - - return *(*ir_array)[Order]; -diff --git a/fem/linearform.hpp b/fem/linearform.hpp -index c24118426..29f816db1 100644 ---- a/fem/linearform.hpp -+++ b/fem/linearform.hpp -@@ -121,10 +121,6 @@ public: - LinearForm &operator=(const LinearForm &rhs) - { return operator=((const Vector &)rhs); } - -- /// (DEPRECATED) Return the FE space associated with the LinearForm. -- /** @deprecated Use FESpace() instead. */ -- MFEM_DEPRECATED FiniteElementSpace *GetFES() { return fes; } -- - /// Read+write access to the associated FiniteElementSpace. - FiniteElementSpace *FESpace() { return fes; } - /// Read-only access to the associated FiniteElementSpace. -diff --git a/fem/linearform_ext.cpp b/fem/linearform_ext.cpp -index f70a7b6e3..3475574fd 100644 ---- a/fem/linearform_ext.cpp -+++ b/fem/linearform_ext.cpp -@@ -164,10 +164,8 @@ void LinearFormExtension::Update() - } - } - -- bdr_restrict_lex = -- dynamic_cast( -- fes.GetFaceRestriction(ordering, FaceType::Boundary, -- L2FaceValues::SingleValued)); -+ bdr_restrict_lex = fes.GetFaceRestriction(ordering, FaceType::Boundary, -+ L2FaceValues::SingleValued); - MFEM_VERIFY(bdr_restrict_lex, "Face restriction not available"); - bdr_b.SetSize(bdr_restrict_lex->Height(), Device::GetMemoryType()); - bdr_b.UseDevice(true); -diff --git a/fem/linearform_ext.hpp b/fem/linearform_ext.hpp -index 2cc861cea..46acf637d 100644 ---- a/fem/linearform_ext.hpp -+++ b/fem/linearform_ext.hpp -@@ -34,7 +34,7 @@ class LinearFormExtension - LinearForm *lf; - - /// Operator that converts FiniteElementSpace L-vectors to E-vectors. -- const ElementRestrictionOperator *elem_restrict_lex; // Not owned -+ const ElementRestriction *elem_restrict_lex; // Not owned - - /// Operator that converts L-vectors to boundary E-vectors. - const FaceRestriction *bdr_restrict_lex; // Not owned -diff --git a/fem/lininteg.cpp b/fem/lininteg.cpp -index 52abc3cad..c9b6b4699 100644 ---- a/fem/lininteg.cpp -+++ b/fem/lininteg.cpp -@@ -9,7 +9,6 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - -- - #include "fem.hpp" - #include - -diff --git a/fem/lor/lor_ads.cpp b/fem/lor/lor_ads.cpp -index 3ba4816ee..f962f8469 100644 ---- a/fem/lor/lor_ads.cpp -+++ b/fem/lor/lor_ads.cpp -@@ -103,9 +103,9 @@ void BatchedLOR_ADS::FormCurlMatrix() - Form3DFaceToEdge(face2edge); - - ElementDofOrdering ordering = ElementDofOrdering::LEXICOGRAPHIC; -- const auto *R_f = dynamic_cast( -+ const auto *R_f = dynamic_cast( - face_fes.GetElementRestriction(ordering)); -- const auto *R_e = dynamic_cast( -+ const auto *R_e = dynamic_cast( - edge_fes.GetElementRestriction(ordering)); - MFEM_VERIFY(R_f != NULL && R_e != NULL, ""); - -diff --git a/fem/lor/lor_ams.cpp b/fem/lor/lor_ams.cpp -index 1c37f165b..0d7ac222f 100644 ---- a/fem/lor/lor_ams.cpp -+++ b/fem/lor/lor_ams.cpp -@@ -163,9 +163,9 @@ void BatchedLOR_AMS::FormGradientMatrix() - else { Form3DEdgeToVertex(edge2vertex); } - - ElementDofOrdering ordering = ElementDofOrdering::LEXICOGRAPHIC; -- const auto *R_v = dynamic_cast( -+ const auto *R_v = dynamic_cast( - vert_fes.GetElementRestriction(ordering)); -- const auto *R_e = dynamic_cast( -+ const auto *R_e = dynamic_cast( - edge_fes.GetElementRestriction(ordering)); - MFEM_VERIFY(R_v != NULL && R_e != NULL, ""); - -@@ -268,7 +268,7 @@ void BatchedLOR_AMS::FormCoordinateVectors(const Vector &X_vert) - // Create the H1 vertex space and get the element restriction - ElementDofOrdering ordering = ElementDofOrdering::LEXICOGRAPHIC; - const Operator *op = vert_fes.GetElementRestriction(ordering); -- const auto *el_restr = dynamic_cast(op); -+ const auto *el_restr = dynamic_cast(op); - MFEM_VERIFY(el_restr != NULL, ""); - const SparseMatrix *R = vert_fes.GetRestrictionMatrix(); - -diff --git a/fem/lor/lor_batched.cpp b/fem/lor/lor_batched.cpp -index fe0494880..0050c3de5 100644 ---- a/fem/lor/lor_batched.cpp -+++ b/fem/lor/lor_batched.cpp -@@ -145,8 +145,8 @@ int BatchedLORAssembly::FillI(SparseMatrix &A) const - - const ElementDofOrdering ordering = ElementDofOrdering::LEXICOGRAPHIC; - const Operator *op = fes_ho.GetElementRestriction(ordering); -- const ElementRestriction *el_restr = -- dynamic_cast(op); -+ const auto *el_restr = -+ dynamic_cast(op); - MFEM_VERIFY(el_restr != nullptr, "Bad element restriction"); - - const Array &el_dof_lex_ = el_restr->GatherMap(); -@@ -235,8 +235,8 @@ void BatchedLORAssembly::FillJAndData(SparseMatrix &A) const - - const ElementDofOrdering ordering = ElementDofOrdering::LEXICOGRAPHIC; - const Operator *op = fes_ho.GetElementRestriction(ordering); -- const ElementRestriction *el_restr = -- dynamic_cast(op); -+ const auto *el_restr = -+ dynamic_cast(op); - MFEM_VERIFY(el_restr != nullptr, "Bad element restriction"); - - const Array &el_dof_lex_ = el_restr->GatherMap(); -diff --git a/fem/nonlinearform.hpp b/fem/nonlinearform.hpp -index d15d09e04..60cae2055 100644 ---- a/fem/nonlinearform.hpp -+++ b/fem/nonlinearform.hpp -@@ -330,7 +330,6 @@ public: - virtual ~BlockNonlinearForm(); - }; - -- - } - - #endif -diff --git a/fem/nonlininteg.cpp b/fem/nonlininteg.cpp -index e1558fda4..5ee1febea 100644 ---- a/fem/nonlininteg.cpp -+++ b/fem/nonlininteg.cpp -@@ -15,68 +15,69 @@ - namespace mfem - { - --double NonlinearFormIntegrator::GetLocalStateEnergyPA(const Vector &x) const --{ -- mfem_error ("NonlinearFormIntegrator::GetLocalStateEnergyPA(...)\n" -- " is not implemented for this class."); -- return 0.0; --} -- - void NonlinearFormIntegrator::AssemblePA(const FiniteElementSpace&) - { -- mfem_error ("NonlinearFormIntegrator::AssemblePA(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("NonlinearFormIntegrator::AssemblePA(...)\n" -+ " is not implemented for this class."); - } - --void NonlinearFormIntegrator::AssemblePA(const FiniteElementSpace &, -- const FiniteElementSpace &) -+void NonlinearFormIntegrator::AssembleGradPA(const Vector &x, -+ const FiniteElementSpace &fes) - { -- mfem_error ("NonlinearFormIntegrator::AssemblePA(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("NonlinearFormIntegrator::AssembleGradPA(...)\n" -+ " is not implemented for this class."); - } - --void NonlinearFormIntegrator::AssembleGradPA(const Vector &x, -- const FiniteElementSpace &fes) -+void NonlinearFormIntegrator::AssembleGradDiagonalPA(Vector &diag) const - { -- mfem_error ("NonlinearFormIntegrator::AssembleGradPA(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("NonlinearFormIntegrator::AssembleGradDiagonalPA(...)\n" -+ " is not implemented for this class."); - } - - void NonlinearFormIntegrator::AddMultPA(const Vector &, Vector &) const - { -- mfem_error ("NonlinearFormIntegrator::AddMultPA(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("NonlinearFormIntegrator::AddMultPA(...)\n" -+ " is not implemented for this class."); - } - - void NonlinearFormIntegrator::AddMultGradPA(const Vector&, Vector&) const - { -- mfem_error ("NonlinearFormIntegrator::AddMultGradPA(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("NonlinearFormIntegrator::AddMultGradPA(...)\n" -+ " is not implemented for this class."); - } - --void NonlinearFormIntegrator::AssembleGradDiagonalPA(Vector &diag) const -+double NonlinearFormIntegrator::GetLocalStateEnergyPA(const Vector &x) const - { -- mfem_error ("NonlinearFormIntegrator::AssembleGradDiagonalPA(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("NonlinearFormIntegrator::GetLocalStateEnergyPA(...)\n" -+ " is not implemented for this class."); -+ return 0.0; - } - - void NonlinearFormIntegrator::AssembleMF(const FiniteElementSpace &fes) - { -- mfem_error ("NonlinearFormIntegrator::AssembleMF(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("NonlinearFormIntegrator::AssembleMF(...)\n" -+ " is not implemented for this class."); - } - - void NonlinearFormIntegrator::AddMultMF(const Vector &, Vector &) const - { -- mfem_error ("NonlinearFormIntegrator::AddMultMF(...)\n" -- " is not implemented for this class."); -+ MFEM_ABORT("NonlinearFormIntegrator::AddMultMF(...)\n" -+ " is not implemented for this class."); -+} -+ -+double NonlinearFormIntegrator::GetElementEnergy( -+ const FiniteElement &el, ElementTransformation &Tr, const Vector &elfun) -+{ -+ MFEM_ABORT("NonlinearFormIntegrator::GetElementEnergy" -+ " is not overloaded!"); -+ return 0.0; - } - - void NonlinearFormIntegrator::AssembleElementVector( - const FiniteElement &el, ElementTransformation &Tr, - const Vector &elfun, Vector &elvect) - { -- mfem_error("NonlinearFormIntegrator::AssembleElementVector" -+ MFEM_ABORT("NonlinearFormIntegrator::AssembleElementVector" - " is not overloaded!"); - } - -@@ -84,7 +85,7 @@ void NonlinearFormIntegrator::AssembleFaceVector( - const FiniteElement &el1, const FiniteElement &el2, - FaceElementTransformations &Tr, const Vector &elfun, Vector &elvect) - { -- mfem_error("NonlinearFormIntegrator::AssembleFaceVector" -+ MFEM_ABORT("NonlinearFormIntegrator::AssembleFaceVector" - " is not overloaded!"); - } - -@@ -92,7 +93,7 @@ void NonlinearFormIntegrator::AssembleElementGrad( - const FiniteElement &el, ElementTransformation &Tr, const Vector &elfun, - DenseMatrix &elmat) - { -- mfem_error("NonlinearFormIntegrator::AssembleElementGrad" -+ MFEM_ABORT("NonlinearFormIntegrator::AssembleElementGrad" - " is not overloaded!"); - } - -@@ -101,18 +102,10 @@ void NonlinearFormIntegrator::AssembleFaceGrad( - FaceElementTransformations &Tr, const Vector &elfun, - DenseMatrix &elmat) - { -- mfem_error("NonlinearFormIntegrator::AssembleFaceGrad" -+ MFEM_ABORT("NonlinearFormIntegrator::AssembleFaceGrad" - " is not overloaded!"); - } - --double NonlinearFormIntegrator::GetElementEnergy( -- const FiniteElement &el, ElementTransformation &Tr, const Vector &elfun) --{ -- mfem_error("NonlinearFormIntegrator::GetElementEnergy" -- " is not overloaded!"); -- return 0.0; --} -- - - void BlockNonlinearFormIntegrator::AssembleElementVector( - const Array &el, -@@ -120,7 +113,7 @@ void BlockNonlinearFormIntegrator::AssembleElementVector( - const Array &elfun, - const Array &elvec) - { -- mfem_error("BlockNonlinearFormIntegrator::AssembleElementVector" -+ MFEM_ABORT("BlockNonlinearFormIntegrator::AssembleElementVector" - " is not overloaded!"); - } - -@@ -131,7 +124,7 @@ void BlockNonlinearFormIntegrator::AssembleFaceVector( - const Array &elfun, - const Array &elvect) - { -- mfem_error("BlockNonlinearFormIntegrator::AssembleFaceVector" -+ MFEM_ABORT("BlockNonlinearFormIntegrator::AssembleFaceVector" - " is not overloaded!"); - } - -@@ -141,7 +134,7 @@ void BlockNonlinearFormIntegrator::AssembleElementGrad( - const Array &elfun, - const Array2D &elmats) - { -- mfem_error("BlockNonlinearFormIntegrator::AssembleElementGrad" -+ MFEM_ABORT("BlockNonlinearFormIntegrator::AssembleElementGrad" - " is not overloaded!"); - } - -@@ -152,7 +145,7 @@ void BlockNonlinearFormIntegrator::AssembleFaceGrad( - const Array &elfun, - const Array2D &elmats) - { -- mfem_error("BlockNonlinearFormIntegrator::AssembleFaceGrad" -+ MFEM_ABORT("BlockNonlinearFormIntegrator::AssembleFaceGrad" - " is not overloaded!"); - } - -@@ -161,7 +154,7 @@ double BlockNonlinearFormIntegrator::GetElementEnergy( - ElementTransformation &Tr, - const Array&elfun) - { -- mfem_error("BlockNonlinearFormIntegrator::GetElementEnergy" -+ MFEM_ABORT("BlockNonlinearFormIntegrator::GetElementEnergy" - " is not overloaded!"); - return 0.0; - } -@@ -497,6 +490,7 @@ void HyperelasticNLFIntegrator::AssembleElementGrad(const FiniteElement &el, - } - } - -+ - double IncompressibleNeoHookeanIntegrator::GetElementEnergy( - const Array&el, - ElementTransformation &Tr, -@@ -504,7 +498,7 @@ double IncompressibleNeoHookeanIntegrator::GetElementEnergy( - { - if (el.Size() != 2) - { -- mfem_error("IncompressibleNeoHookeanIntegrator::GetElementEnergy" -+ MFEM_ABORT("IncompressibleNeoHookeanIntegrator::GetElementEnergy" - " has incorrect block finite element space size!"); - } - -@@ -549,7 +543,7 @@ void IncompressibleNeoHookeanIntegrator::AssembleElementVector( - { - if (el.Size() != 2) - { -- mfem_error("IncompressibleNeoHookeanIntegrator::AssembleElementVector" -+ MFEM_ABORT("IncompressibleNeoHookeanIntegrator::AssembleElementVector" - " has finite element space of incorrect block number"); - } - -@@ -561,11 +555,10 @@ void IncompressibleNeoHookeanIntegrator::AssembleElementVector( - - if (dim != spaceDim) - { -- mfem_error("IncompressibleNeoHookeanIntegrator::AssembleElementVector" -+ MFEM_ABORT("IncompressibleNeoHookeanIntegrator::AssembleElementVector" - " is not defined on manifold meshes"); - } - -- - DSh_u.SetSize(dof_u, dim); - DS_u.SetSize(dof_u, dim); - J0i.SetSize(dim); -@@ -731,6 +724,7 @@ void IncompressibleNeoHookeanIntegrator::AssembleElementGrad( - - } - -+ - const IntegrationRule& - VectorConvectionNLFIntegrator::GetRule(const FiniteElement &fe, - ElementTransformation &T) -diff --git a/fem/nonlininteg.hpp b/fem/nonlininteg.hpp -index 54f342b85..38b133244 100644 ---- a/fem/nonlininteg.hpp -+++ b/fem/nonlininteg.hpp -@@ -29,13 +29,12 @@ class NonlinearFormIntegrator - protected: - const IntegrationRule *IntRule; - -- // CEED extension -- ceed::Operator* ceedOp; -+ ceed::Operator *ceedOp; // libCEED extension - - MemoryType pa_mt = MemoryType::DEFAULT; - - NonlinearFormIntegrator(const IntegrationRule *ir = NULL) -- : IntRule(ir), ceedOp(NULL) { } -+ : IntRule(ir), ceedOp(NULL) {} - - public: - /** @brief Prescribe a fixed IntegrationRule to use (when @a ir != NULL) or -@@ -52,46 +51,11 @@ public: - /// Get the integration rule of the integrator (possibly NULL). - const IntegrationRule *GetIntegrationRule() const { return IntRule; } - -- /// Perform the local action of the NonlinearFormIntegrator -- virtual void AssembleElementVector(const FiniteElement &el, -- ElementTransformation &Tr, -- const Vector &elfun, Vector &elvect); -- -- /// @brief Perform the local action of the NonlinearFormIntegrator resulting -- /// from a face integral term. -- virtual void AssembleFaceVector(const FiniteElement &el1, -- const FiniteElement &el2, -- FaceElementTransformations &Tr, -- const Vector &elfun, Vector &elvect); -- -- /// Assemble the local gradient matrix -- virtual void AssembleElementGrad(const FiniteElement &el, -- ElementTransformation &Tr, -- const Vector &elfun, DenseMatrix &elmat); -- -- /// @brief Assemble the local action of the gradient of the -- /// NonlinearFormIntegrator resulting from a face integral term. -- virtual void AssembleFaceGrad(const FiniteElement &el1, -- const FiniteElement &el2, -- FaceElementTransformations &Tr, -- const Vector &elfun, DenseMatrix &elmat); -- -- /// Compute the local energy -- virtual double GetElementEnergy(const FiniteElement &el, -- ElementTransformation &Tr, -- const Vector &elfun); -- - /// Method defining partial assembly. - /** The result of the partial assembly is stored internally so that it can be - used later in the methods AddMultPA(). */ - virtual void AssemblePA(const FiniteElementSpace &fes); - -- /** The result of the partial assembly is stored internally so that it can be -- used later in the methods AddMultPA(). -- Used with BilinearFormIntegrators that have different spaces. */ -- virtual void AssemblePA(const FiniteElementSpace &trial_fes, -- const FiniteElementSpace &test_fes); -- - /** @brief Prepare the integrator for partial assembly (PA) gradient - evaluations on the given FE space @a fes at the state @a x. */ - /** The result of the partial assembly is stored internally so that it can be -@@ -99,10 +63,12 @@ public: - The state Vector @a x is an E-vector. */ - virtual void AssembleGradPA(const Vector &x, const FiniteElementSpace &fes); - -- /// Compute the local (to the MPI rank) energy with partial assembly. -- /** Here the state @a x is an E-vector. This method can be called only after -- the method AssemblePA() has been called. */ -- virtual double GetLocalStateEnergyPA(const Vector &x) const; -+ /// Method for computing the diagonal of the gradient with partial assembly. -+ /** The result Vector @a diag is an E-Vector. This method can be called only -+ after the method AssembleGradPA() has been called. -+ -+ @param[in,out] diag The result Vector: @f$ diag += diag(G) @f$. */ -+ virtual void AssembleGradDiagonalPA(Vector &diag) const; - - /// Method for partially assembled action. - /** Perform the action of integrator on the input @a x and add the result to -@@ -121,15 +87,10 @@ public: - @param[in,out] y The result Vector: @f$ y += G x @f$. */ - virtual void AddMultGradPA(const Vector &x, Vector &y) const; - -- /// Method for computing the diagonal of the gradient with partial assembly. -- /** The result Vector @a diag is an E-Vector. This method can be called only -- after the method AssembleGradPA() has been called. -- -- @param[in,out] diag The result Vector: @f$ diag += diag(G) @f$. */ -- virtual void AssembleGradDiagonalPA(Vector &diag) const; -- -- /// Indicates whether this integrator can use a Ceed backend. -- virtual bool SupportsCeed() const { return false; } -+ /// Compute the local (to the MPI rank) energy with partial assembly. -+ /** Here the state @a x is an E-vector. This method can be called only after -+ the method AssemblePA() has been called. */ -+ virtual double GetLocalStateEnergyPA(const Vector &x) const; - - /// Method defining fully unassembled operator. - virtual void AssembleMF(const FiniteElementSpace &fes); -@@ -142,7 +103,39 @@ public: - called. */ - virtual void AddMultMF(const Vector &x, Vector &y) const; - -- ceed::Operator& GetCeedOp() { return *ceedOp; } -+ /// Compute the local energy -+ virtual double GetElementEnergy(const FiniteElement &el, -+ ElementTransformation &Tr, -+ const Vector &elfun); -+ -+ /// Perform the local action of the NonlinearFormIntegrator -+ virtual void AssembleElementVector(const FiniteElement &el, -+ ElementTransformation &Tr, -+ const Vector &elfun, Vector &elvect); -+ -+ /// @brief Perform the local action of the NonlinearFormIntegrator resulting -+ /// from a face integral term. -+ virtual void AssembleFaceVector(const FiniteElement &el1, -+ const FiniteElement &el2, -+ FaceElementTransformations &Tr, -+ const Vector &elfun, Vector &elvect); -+ -+ /// Assemble the local gradient matrix -+ virtual void AssembleElementGrad(const FiniteElement &el, -+ ElementTransformation &Tr, -+ const Vector &elfun, DenseMatrix &elmat); -+ -+ /// @brief Assemble the local action of the gradient of the -+ /// NonlinearFormIntegrator resulting from a face integral term. -+ virtual void AssembleFaceGrad(const FiniteElement &el1, -+ const FiniteElement &el2, -+ FaceElementTransformations &Tr, -+ const Vector &elfun, DenseMatrix &elmat); -+ -+ /// Indicates whether this integrator can use a Ceed backend. -+ virtual bool SupportsCeed() const { return false; } -+ -+ ceed::Operator &GetCeedOp() { return *ceedOp; } - - virtual ~NonlinearFormIntegrator() - { -@@ -150,6 +143,7 @@ public: - } - }; - -+ - /** The abstract base class BlockNonlinearFormIntegrator is - a generalization of the NonlinearFormIntegrator class suitable - for block state vectors. */ -@@ -185,7 +179,7 @@ public: - const Array &elfun, - const Array2D &elmats); - -- virtual ~BlockNonlinearFormIntegrator() { } -+ virtual ~BlockNonlinearFormIntegrator() {} - }; - - -@@ -197,8 +191,8 @@ protected: - transformation. */ - - public: -- HyperelasticModel() : Ttr(NULL) { } -- virtual ~HyperelasticModel() { } -+ HyperelasticModel() : Ttr(NULL) {} -+ virtual ~HyperelasticModel() {} - - /// A reference-element to target-element transformation that can be used to - /// evaluate Coefficient%s. -@@ -277,7 +271,7 @@ public: - - NeoHookeanModel(Coefficient &mu_, Coefficient &K_, Coefficient *g_ = NULL) - : mu(0.0), K(0.0), g(1.0), c_mu(&mu_), c_K(&K_), c_g(g_), -- have_coeffs(true) { } -+ have_coeffs(true) {} - - virtual double EvalW(const DenseMatrix &J) const; - -@@ -314,7 +308,7 @@ private: - - public: - /** @param[in] m HyperelasticModel that will be integrated. */ -- HyperelasticNLFIntegrator(HyperelasticModel *m) : model(m) { } -+ HyperelasticNLFIntegrator(HyperelasticModel *m) : model(m) {} - - /** @brief Computes the integral of W(Jacobian(Trt)) over a target zone - @param[in] el Type of FiniteElement. -@@ -333,6 +327,7 @@ public: - const Vector &elfun, DenseMatrix &elmat); - }; - -+ - /** Hyperelastic incompressible Neo-Hookean integrator with the PK1 stress - \f$P = \mu F - p F^{-T}\f$ where \f$\mu\f$ is the shear modulus, - \f$p\f$ is the pressure, and \f$F\f$ is the deformation gradient */ -@@ -345,7 +340,7 @@ private: - Vector Sh_p; - - public: -- IncompressibleNeoHookeanIntegrator(Coefficient &mu_) : c_mu(&mu_) { } -+ IncompressibleNeoHookeanIntegrator(Coefficient &mu_) : c_mu(&mu_) {} - - virtual double GetElementEnergy(const Array&el, - ElementTransformation &Tr, -@@ -371,6 +366,7 @@ private: - Coefficient *Q{}; - DenseMatrix dshape, dshapex, EF, gradEF, ELV, elmat_comp; - Vector shape; -+ - // PA extension - Vector pa_data; - const DofToQuad *maps; ///< Not owned -@@ -378,7 +374,7 @@ private: - int dim, ne, nq; - - public: -- VectorConvectionNLFIntegrator(Coefficient &q): Q(&q) { } -+ VectorConvectionNLFIntegrator(Coefficient &q): Q(&q) {} - - VectorConvectionNLFIntegrator() = default; - -@@ -395,14 +391,12 @@ public: - const Vector &elfun, - DenseMatrix &elmat); - -- using NonlinearFormIntegrator::AssemblePA; -- - virtual void AssemblePA(const FiniteElementSpace &fes); - -- virtual void AssembleMF(const FiniteElementSpace &fes); -- - virtual void AddMultPA(const Vector &x, Vector &y) const; - -+ virtual void AssembleMF(const FiniteElementSpace &fes); -+ - virtual void AddMultMF(const Vector &x, Vector &y) const; - }; - -@@ -418,7 +412,7 @@ private: - Vector shape; - - public: -- ConvectiveVectorConvectionNLFIntegrator(Coefficient &q): Q(&q) { } -+ ConvectiveVectorConvectionNLFIntegrator(Coefficient &q): Q(&q) {} - - ConvectiveVectorConvectionNLFIntegrator() = default; - -@@ -441,7 +435,7 @@ private: - Vector shape; - - public: -- SkewSymmetricVectorConvectionNLFIntegrator(Coefficient &q): Q(&q) { } -+ SkewSymmetricVectorConvectionNLFIntegrator(Coefficient &q): Q(&q) {} - - SkewSymmetricVectorConvectionNLFIntegrator() = default; - -diff --git a/fem/pbilinearform.cpp b/fem/pbilinearform.cpp -index ee1030c48..707473f87 100644 ---- a/fem/pbilinearform.cpp -+++ b/fem/pbilinearform.cpp -@@ -19,107 +19,109 @@ - namespace mfem - { - --void ParBilinearForm::pAllocMat() --{ -- int nbr_size = pfes->GetFaceNbrVSize(); - -- if (precompute_sparsity == 0 || fes->GetVDim() > 1) -- { -- if (keep_nbr_block) -- { -- mat = new SparseMatrix(height + nbr_size, width + nbr_size); -- } -- else -- { -- mat = new SparseMatrix(height, width + nbr_size); -- } -- return; -- } -- -- // the sparsity pattern is defined from the map: face->element->dof -- const Table &lelem_ldof = fes->GetElementToDofTable(); // <-- dofs -- const Table &nelem_ndof = pfes->face_nbr_element_dof; // <-- vdofs -- Table elem_dof; // element + nbr-element <---> dof -- if (nbr_size > 0) -- { -- // merge lelem_ldof and nelem_ndof into elem_dof -- int s1 = lelem_ldof.Size(), s2 = nelem_ndof.Size(); -- const int *I1 = lelem_ldof.GetI(), *J1 = lelem_ldof.GetJ(); -- const int *I2 = nelem_ndof.GetI(), *J2 = nelem_ndof.GetJ(); -- const int nnz1 = I1[s1], nnz2 = I2[s2]; -- -- elem_dof.SetDims(s1 + s2, nnz1 + nnz2); -- -- int *I = elem_dof.GetI(), *J = elem_dof.GetJ(); -- for (int i = 0; i <= s1; i++) -- { -- I[i] = I1[i]; -- } -- for (int j = 0; j < nnz1; j++) -- { -- J[j] = J1[j]; -- } -- for (int i = 0; i <= s2; i++) -- { -- I[s1+i] = I2[i] + nnz1; -- } -- for (int j = 0; j < nnz2; j++) -- { -- J[nnz1+j] = J2[j] + height; -- } -- } -- // dof_elem x elem_face x face_elem x elem_dof (keep_nbr_block = true) -- // ldof_lelem x lelem_face x face_elem x elem_dof (keep_nbr_block = false) -- Table dof_dof; -- { -- Table face_dof; // face_elem x elem_dof -- { -- Table *face_elem = pfes->GetParMesh()->GetFaceToAllElementTable(); -- if (nbr_size > 0) -- { -- mfem::Mult(*face_elem, elem_dof, face_dof); -- } -- else -- { -- mfem::Mult(*face_elem, lelem_ldof, face_dof); -- } -- delete face_elem; -- if (nbr_size > 0) -- { -- elem_dof.Clear(); -- } -- } -- -- if (keep_nbr_block) -- { -- Table dof_face; -- Transpose(face_dof, dof_face, height + nbr_size); -- mfem::Mult(dof_face, face_dof, dof_dof); -- } -- else -- { -- Table ldof_face; -- { -- Table face_ldof; -- Table *face_lelem = fes->GetMesh()->GetFaceToElementTable(); -- mfem::Mult(*face_lelem, lelem_ldof, face_ldof); -- delete face_lelem; -- Transpose(face_ldof, ldof_face, height); -- } -- mfem::Mult(ldof_face, face_dof, dof_dof); -- } -- } -- -- int *I = dof_dof.GetI(); -- int *J = dof_dof.GetJ(); -- int nrows = dof_dof.Size(); -- double *data = Memory(I[nrows]); -- -- mat = new SparseMatrix(I, J, data, nrows, height + nbr_size); -- *mat = 0.0; -- -- dof_dof.LoseData(); --} -+//XX TODO -+// void ParBilinearForm::pAllocMat() -+// { -+// int nbr_size = pfes->GetFaceNbrVSize(); -+ -+// if (precompute_sparsity == 0 || fes->GetVDim() > 1) -+// { -+// if (keep_nbr_block) -+// { -+// mat = new SparseMatrix(height + nbr_size, width + nbr_size); -+// } -+// else -+// { -+// mat = new SparseMatrix(height, width + nbr_size); -+// } -+// return; -+// } -+ -+// // the sparsity pattern is defined from the map: face->element->dof -+// const Table &lelem_ldof = fes->GetElementToDofTable(); // <-- dofs -+// const Table &nelem_ndof = pfes->face_nbr_element_dof; // <-- vdofs -+// Table elem_dof; // element + nbr-element <---> dof -+// if (nbr_size > 0) -+// { -+// // merge lelem_ldof and nelem_ndof into elem_dof -+// int s1 = lelem_ldof.Size(), s2 = nelem_ndof.Size(); -+// const int *I1 = lelem_ldof.GetI(), *J1 = lelem_ldof.GetJ(); -+// const int *I2 = nelem_ndof.GetI(), *J2 = nelem_ndof.GetJ(); -+// const int nnz1 = I1[s1], nnz2 = I2[s2]; -+ -+// elem_dof.SetDims(s1 + s2, nnz1 + nnz2); -+ -+// int *I = elem_dof.GetI(), *J = elem_dof.GetJ(); -+// for (int i = 0; i <= s1; i++) -+// { -+// I[i] = I1[i]; -+// } -+// for (int j = 0; j < nnz1; j++) -+// { -+// J[j] = J1[j]; -+// } -+// for (int i = 0; i <= s2; i++) -+// { -+// I[s1+i] = I2[i] + nnz1; -+// } -+// for (int j = 0; j < nnz2; j++) -+// { -+// J[nnz1+j] = J2[j] + height; -+// } -+// } -+// // dof_elem x elem_face x face_elem x elem_dof (keep_nbr_block = true) -+// // ldof_lelem x lelem_face x face_elem x elem_dof (keep_nbr_block = false) -+// Table dof_dof; -+// { -+// Table face_dof; // face_elem x elem_dof -+// { -+// Table *face_elem = pfes->GetParMesh()->GetFaceToAllElementTable(); -+// if (nbr_size > 0) -+// { -+// mfem::Mult(*face_elem, elem_dof, face_dof); -+// } -+// else -+// { -+// mfem::Mult(*face_elem, lelem_ldof, face_dof); -+// } -+// delete face_elem; -+// if (nbr_size > 0) -+// { -+// elem_dof.Clear(); -+// } -+// } -+ -+// if (keep_nbr_block) -+// { -+// Table dof_face; -+// Transpose(face_dof, dof_face, height + nbr_size); -+// mfem::Mult(dof_face, face_dof, dof_dof); -+// } -+// else -+// { -+// Table ldof_face; -+// { -+// Table face_ldof; -+// Table *face_lelem = fes->GetMesh()->GetFaceToElementTable(); -+// mfem::Mult(*face_lelem, lelem_ldof, face_ldof); -+// delete face_lelem; -+// Transpose(face_ldof, ldof_face, height); -+// } -+// mfem::Mult(ldof_face, face_dof, dof_dof); -+// } -+// } -+ -+// int *I = dof_dof.GetI(); -+// int *J = dof_dof.GetJ(); -+// int nrows = dof_dof.Size(); -+// double *data = Memory(I[nrows]); -+ -+// mat = new SparseMatrix(I, J, data, nrows, height + nbr_size); -+// *mat = 0.0; -+ -+// dof_dof.LoseData(); -+// } - - void ParBilinearForm::ParallelRAP(SparseMatrix &loc_A, OperatorHandle &A, - bool steal_loc_A) -@@ -151,7 +153,8 @@ void ParBilinearForm::ParallelRAP(SparseMatrix &loc_A, OperatorHandle &A, - } - } - --void ParBilinearForm::ParallelAssemble(OperatorHandle &A, SparseMatrix *A_local) -+void ParBilinearForm::ParallelAssemble(OperatorHandle &A, -+ SparseMatrix *A_local) const - { - A.Clear(); - -@@ -201,6 +204,7 @@ void ParBilinearForm::ParallelAssemble(OperatorHandle &A, SparseMatrix *A_local) - - // TODO - assemble the Dof_TrueDof_Matrix directly in the required format? - Ph.ConvertFrom(pfes->Dof_TrueDof_Matrix()); -+ - // TODO: When Ph.Type() == Operator::ANY_TYPE we want to use the Operator - // returned by pfes->GetProlongationMatrix(), however that Operator is a - // const Operator, so we cannot store it in OperatorHandle. We need a const -@@ -209,7 +213,7 @@ void ParBilinearForm::ParallelAssemble(OperatorHandle &A, SparseMatrix *A_local) - A.MakePtAP(dA, Ph); - } - --HypreParMatrix *ParBilinearForm::ParallelAssemble(SparseMatrix *m) -+HypreParMatrix *ParBilinearForm::ParallelAssemble(SparseMatrix *m) const - { - OperatorHandle Mh(Operator::Hypre_ParCSR); - ParallelAssemble(Mh, m); -@@ -246,10 +250,9 @@ void ParBilinearForm::AssembleSharedFaces(int skip_zeros) - vdofs_all.Append(vdofs2); - for (int k = 0; k < interior_face_integs.Size(); k++) - { -- interior_face_integs[k]-> -- AssembleFaceMatrix(*pfes->GetFE(T->Elem1No), -- *pfes->GetFaceNbrFE(Elem2NbrNo), -- *T, elemmat); -+ interior_face_integs[k]->AssembleFaceMatrix(*pfes->GetFE(T->Elem1No), -+ *pfes->GetFaceNbrFE(Elem2NbrNo), -+ *T, elemmat); - if (keep_nbr_block) - { - mat->AddSubMatrix(vdofs_all, vdofs_all, elemmat, skip_zeros); -@@ -269,7 +272,15 @@ void ParBilinearForm::Assemble(int skip_zeros) - pfes->ExchangeFaceNbrData(); - if (!ext && mat == NULL) - { -- pAllocMat(); -+ int nbr_size = pfes->GetFaceNbrVSize(); -+ if (keep_nbr_block) -+ { -+ mat = new SparseMatrix(height + nbr_size, width + nbr_size); -+ } -+ else -+ { -+ mat = new SparseMatrix(height, width + nbr_size); -+ } - } - } - -@@ -301,29 +312,23 @@ void ParBilinearForm::AssembleDiagonal(Vector &diag) const - // Here, we have extension, ext, and parallel/conforming prolongation, P. - Vector local_diag(P->Height()); - ext->AssembleDiagonal(local_diag); -- if (fes->Conforming()) -+ const HypreParMatrix *HP = dynamic_cast(P); -+ if (!HP) - { -+ // This is a parallel prolongation - P->MultTranspose(local_diag, diag); - return; - } - // For an AMR mesh, a convergent diagonal is assembled with |P^T| d_l, - // where |P^T| has the entry-wise absolute values of the conforming - // prolongation transpose operator. -- const HypreParMatrix *HP = dynamic_cast(P); -- if (HP) -- { -- HP->AbsMultTranspose(1.0, local_diag, 0.0, diag); -- } -- else -- { -- MFEM_ABORT("unsupported prolongation matrix type."); -- } -+ HP->AbsMultTranspose(1.0, local_diag, 0.0, diag); - } - --void ParBilinearForm --::ParallelEliminateEssentialBC(const Array &bdr_attr_is_ess, -- HypreParMatrix &A, const HypreParVector &X, -- HypreParVector &B) const -+void ParBilinearForm::ParallelEliminateEssentialBC( -+ const Array &bdr_attr_is_ess, -+ HypreParMatrix &A, const HypreParVector &X, -+ HypreParVector &B) const - { - Array dof_list; - -@@ -333,9 +338,9 @@ void ParBilinearForm - A.EliminateRowsCols(dof_list, X, B); - } - --HypreParMatrix *ParBilinearForm:: --ParallelEliminateEssentialBC(const Array &bdr_attr_is_ess, -- HypreParMatrix &A) const -+HypreParMatrix *ParBilinearForm::ParallelEliminateEssentialBC( -+ const Array &bdr_attr_is_ess, -+ HypreParMatrix &A) const - { - Array dof_list; - -@@ -374,7 +379,17 @@ void ParBilinearForm::FormLinearSystem( - { - if (ext) - { -- ext->FormLinearSystem(ess_tdof_list, x, b, A, X, B, copy_interior); -+ Operator *oper; -+ ext->FormLinearSystem(ess_tdof_list, x, b, oper, X, B, copy_interior); -+ if (assembly == AssemblyLevel::FULL) -+ { -+ delete oper; -+ FormSystemMatrix(ess_tdof_list, A); -+ } -+ else -+ { -+ A.Reset(oper); -+ } - return; - } - -@@ -418,18 +433,28 @@ void ParBilinearForm::FormLinearSystem( - } - } - --void ParBilinearForm::EliminateVDofsInRHS( -- const Array &vdofs, const Vector &x, Vector &b) --{ -- p_mat.EliminateBC(p_mat_e, vdofs, x, b); --} -- - void ParBilinearForm::FormSystemMatrix(const Array &ess_tdof_list, - OperatorHandle &A) - { - if (ext) - { -- ext->FormSystemMatrix(ess_tdof_list, A); -+ if (assembly == AssemblyLevel::FULL) -+ { -+ // Always does `DIAG_ONE` policy to be consistent with -+ // `Operator::FormConstrainedSystemOperator`. -+ MFEM_VERIFY(diag_policy == DiagonalPolicy::DIAG_ONE, -+ "Only DiagonalPolicy::DIAG_ONE supported with" -+ " FABilinearFormExtension."); -+ ParallelRAP(*mat, A); -+ A.As()->EliminateBC(ess_tdof_list, -+ DiagonalPolicy::DIAG_ONE); -+ } -+ else -+ { -+ Operator *oper; -+ ext->FormSystemOperator(ess_tdof_list, oper); -+ A.Reset(oper); -+ } - return; - } - -@@ -460,6 +485,7 @@ void ParBilinearForm::FormSystemMatrix(const Array &ess_tdof_list, - delete mat_e; - mat_e = NULL; - p_mat_e.EliminateRowsCols(p_mat, ess_tdof_list); -+ A = p_mat; - } - if (hybridization) - { -@@ -521,30 +547,22 @@ void ParBilinearForm::Update(FiniteElementSpace *nfes) - p_mat_e.Clear(); - } - -- --HypreParMatrix *ParMixedBilinearForm::ParallelAssemble() -+void ParBilinearForm::EliminateVDofsInRHS( -+ const Array &vdofs, const Vector &x, Vector &b) - { -- // construct the block-diagonal matrix A -- HypreParMatrix *A = -- new HypreParMatrix(trial_pfes->GetComm(), -- test_pfes->GlobalVSize(), -- trial_pfes->GlobalVSize(), -- test_pfes->GetDofOffsets(), -- trial_pfes->GetDofOffsets(), -- mat); -+ p_mat.EliminateBC(p_mat_e, vdofs, x, b); -+} - -- HypreParMatrix *rap = RAP(test_pfes->Dof_TrueDof_Matrix(), A, -- trial_pfes->Dof_TrueDof_Matrix()); -+void ParMixedBilinearForm::ParallelAssemble(OperatorHandle &A) const -+{ -+ A.Clear(); - -- delete A; -+ if (mat == NULL) { return; } -+ MFEM_VERIFY(mat->Finalized(), "the local matrix must be finalized"); - -- return rap; --} -+ OperatorHandle dA(A.Type()), P_test(A.Type()), P_trial(A.Type()); - --void ParMixedBilinearForm::ParallelAssemble(OperatorHandle &A) --{ - // construct the rectangular block-diagonal matrix dA -- OperatorHandle dA(A.Type()); - dA.MakeRectangularBlockDiag(trial_pfes->GetComm(), - test_pfes->GlobalVSize(), - trial_pfes->GlobalVSize(), -@@ -552,8 +570,6 @@ void ParMixedBilinearForm::ParallelAssemble(OperatorHandle &A) - trial_pfes->GetDofOffsets(), - mat); - -- OperatorHandle P_test(A.Type()), P_trial(A.Type()); -- - // TODO - construct the Dof_TrueDof_Matrix directly in the required format. - P_test.ConvertFrom(test_pfes->Dof_TrueDof_Matrix()); - P_trial.ConvertFrom(trial_pfes->Dof_TrueDof_Matrix()); -@@ -561,6 +577,14 @@ void ParMixedBilinearForm::ParallelAssemble(OperatorHandle &A) - A.MakeRAP(P_test, dA, P_trial); - } - -+HypreParMatrix *ParMixedBilinearForm::ParallelAssemble() const -+{ -+ OperatorHandle Mh(Operator::Hypre_ParCSR); -+ ParallelAssemble(Mh); -+ Mh.SetOperatorOwner(false); -+ return Mh.As(); -+} -+ - /// Compute y += a (P^t A P) x, where x and y are vectors on the true dofs - void ParMixedBilinearForm::TrueAddMult(const Vector &x, Vector &y, - const double a) const -@@ -576,21 +600,55 @@ void ParMixedBilinearForm::TrueAddMult(const Vector &x, Vector &y, - test_pfes->Dof_TrueDof_Matrix()->MultTranspose(a, Yaux, 1.0, y); - } - -+void ParMixedBilinearForm::FormRectangularLinearSystem( -+ const Array &trial_tdof_list, -+ const Array &test_tdof_list, Vector &x, -+ Vector &b, OperatorHandle &A, Vector &X, -+ Vector &B) -+{ -+ if (ext) -+ { -+ Operator *oper; -+ ext->FormRectangularLinearSystem(trial_tdof_list, test_tdof_list, -+ x, b, oper, X, B); -+ A.Reset(oper); -+ return; -+ } -+ -+ FormRectangularSystemMatrix(trial_tdof_list, test_tdof_list, A); -+ -+ const Operator *test_P = test_pfes->GetProlongationMatrix(); -+ const SparseMatrix *trial_R = trial_pfes->GetRestrictionMatrix(); -+ -+ X.SetSize(trial_pfes->TrueVSize()); -+ B.SetSize(test_pfes->TrueVSize()); -+ test_P->MultTranspose(b, B); -+ trial_R->Mult(x, X); -+ -+ p_mat_e.As()->Mult(-1.0, X, 1.0, B); -+ B.SetSubVector(test_tdof_list, 0.0); -+} -+ - void ParMixedBilinearForm::FormRectangularSystemMatrix( -- const Array -- &trial_tdof_list, -+ const Array &trial_tdof_list, - const Array &test_tdof_list, - OperatorHandle &A) - { - if (ext) - { -- ext->FormRectangularSystemOperator(trial_tdof_list, test_tdof_list, A); -+ Operator *oper; -+ ext->FormRectangularSystemOperator(trial_tdof_list, test_tdof_list, oper); -+ A.Reset(oper); - return; - } - - if (mat) - { -- Finalize(); -+ const int remove_zeros = 0; -+ Finalize(remove_zeros); -+ MFEM_VERIFY(p_mat.Ptr() == NULL && p_mat_e.Ptr() == NULL, -+ "The ParMixedBilinearFormBilinearForm must be updated " -+ "with Update() before re-assembling the ParMixedBilinearFormBilinearForm."); - ParallelAssemble(p_mat); - delete mat; - mat = NULL; -@@ -600,97 +658,89 @@ void ParMixedBilinearForm::FormRectangularSystemMatrix( - p_mat.As()->EliminateCols(trial_tdof_list); - p_mat.As()->EliminateRows(test_tdof_list); - p_mat_e.Reset(temp, true); -+ A = p_mat; - } -- -- A = p_mat; - } - --void ParMixedBilinearForm::FormRectangularLinearSystem( -- const Array -- &trial_tdof_list, -- const Array &test_tdof_list, Vector &x, -- Vector &b, OperatorHandle &A, Vector &X, -- Vector &B) -+void ParDiscreteLinearOperator::ParallelAssemble(OperatorHandle &A) const - { -- if (ext) -+ A.Clear(); -+ -+ if (mat == NULL) { return; } -+ MFEM_VERIFY(mat->Finalized(), "the local matrix must be finalized"); -+ -+ if (A.Type() == Operator::Hypre_ParCSR) - { -- ext->FormRectangularLinearSystem(trial_tdof_list, test_tdof_list, -- x, b, A, X, B); -- return; -+ const SparseMatrix *R = range_fes->GetRestrictionMatrix(); -+ const HypreParMatrix *P = domain_fes->Dof_TrueDof_Matrix(); -+ SparseMatrix *RA = mfem::Mult(*R, *mat); -+ A.Reset(P->LeftDiagMult(*RA, range_fes->GetTrueDofOffsets())); -+ delete RA; - } -+ else -+ { -+ OperatorHandle dA(A.Type()), P_trial(A.Type()), Rt_test(A.Type()); - -- FormRectangularSystemMatrix(trial_tdof_list, test_tdof_list, A); -- -- const Operator *test_P = test_pfes->GetProlongationMatrix(); -- const SparseMatrix *trial_R = trial_pfes->GetRestrictionMatrix(); -+ // construct the rectangular block-diagonal matrix dA -+ dA.MakeRectangularBlockDiag(domain_fes->GetComm(), -+ range_fes->GlobalVSize(), -+ domain_fes->GlobalVSize(), -+ range_fes->GetDofOffsets(), -+ domain_fes->GetDofOffsets(), -+ mat); - -- X.SetSize(trial_pfes->TrueVSize()); -- B.SetSize(test_pfes->TrueVSize()); -- test_P->MultTranspose(b, B); -- trial_R->Mult(x, X); -+ SparseMatrix *Rt = Transpose(*range_fes->GetRestrictionMatrix()); -+ Rt_test.MakeRectangularBlockDiag(range_fes->GetComm(), -+ range_fes->GlobalVSize(), -+ range_fes->GlobalTrueVSize(), -+ range_fes->GetDofOffsets(), -+ range_fes->GetTrueDofOffsets(), -+ Rt); - -- p_mat_e.As()->Mult(-1.0, X, 1.0, B); -- B.SetSubVector(test_tdof_list, 0.0); --} -+ // TODO - construct the Dof_TrueDof_Matrix directly in the required format. -+ P_trial.ConvertFrom(domain_fes->Dof_TrueDof_Matrix()); - --HypreParMatrix* ParDiscreteLinearOperator::ParallelAssemble() const --{ -- MFEM_ASSERT(mat, "Matrix is not assembled"); -- MFEM_ASSERT(mat->Finalized(), "Matrix is not finalized"); -- SparseMatrix* RA = mfem::Mult(*range_fes->GetRestrictionMatrix(), *mat); -- HypreParMatrix* P = domain_fes->Dof_TrueDof_Matrix(); -- HypreParMatrix* RAP = P->LeftDiagMult(*RA, range_fes->GetTrueDofOffsets()); -- delete RA; -- return RAP; -+ A.MakeRAP(Rt_test, dA, P_trial); -+ delete Rt; -+ } - } - --void ParDiscreteLinearOperator::ParallelAssemble(OperatorHandle &A) -+HypreParMatrix *ParDiscreteLinearOperator::ParallelAssemble() const - { -- // construct the rectangular block-diagonal matrix dA -- OperatorHandle dA(A.Type()); -- dA.MakeRectangularBlockDiag(domain_fes->GetComm(), -- range_fes->GlobalVSize(), -- domain_fes->GlobalVSize(), -- range_fes->GetDofOffsets(), -- domain_fes->GetDofOffsets(), -- mat); -- -- SparseMatrix *Rt = Transpose(*range_fes->GetRestrictionMatrix()); -- OperatorHandle R_test_transpose(A.Type()); -- R_test_transpose.MakeRectangularBlockDiag(range_fes->GetComm(), -- range_fes->GlobalVSize(), -- range_fes->GlobalTrueVSize(), -- range_fes->GetDofOffsets(), -- range_fes->GetTrueDofOffsets(), -- Rt); -- -- // TODO - construct the Dof_TrueDof_Matrix directly in the required format. -- OperatorHandle P_trial(A.Type()); -- P_trial.ConvertFrom(domain_fes->Dof_TrueDof_Matrix()); -- -- A.MakeRAP(R_test_transpose, dA, P_trial); -- delete Rt; -+ OperatorHandle Mh(Operator::Hypre_ParCSR); -+ ParallelAssemble(Mh); -+ Mh.SetOperatorOwner(false); -+ return Mh.As(); - } - --void ParDiscreteLinearOperator::FormRectangularSystemMatrix(OperatorHandle &A) -+void ParDiscreteLinearOperator::FormDiscreteOperatorMatrix(OperatorHandle &A) - { - if (ext) - { -- Array empty; -- ext->FormRectangularSystemOperator(empty, empty, A); -+ Operator *oper; -+ ext->FormDiscreteOperator(oper); -+ A.Reset(oper); - return; - } - -- mfem_error("not implemented!"); -+ if (mat) -+ { -+ Finalize(); -+ ParallelAssemble(A); -+ delete mat; -+ mat = NULL; -+ delete mat_e; -+ mat_e = NULL; -+ } - } - --void ParDiscreteLinearOperator::GetParBlocks(Array2D &blocks) --const -+void ParDiscreteLinearOperator::GetParBlocks( -+ Array2D &blocks) - { - MFEM_VERIFY(mat->Finalized(), "Local matrix needs to be finalized for " - "GetParBlocks"); - -- HypreParMatrix* RLP = ParallelAssemble(); -+ HypreParMatrix *RLP = ParallelAssemble(); - - blocks.SetSize(range_fes->GetVDim(), domain_fes->GetVDim()); - -diff --git a/fem/pbilinearform.hpp b/fem/pbilinearform.hpp -index c8fef567b..be8e50ca3 100644 ---- a/fem/pbilinearform.hpp -+++ b/fem/pbilinearform.hpp -@@ -28,27 +28,31 @@ namespace mfem - class ParBilinearForm : public BilinearForm - { - friend FABilinearFormExtension; -+ - protected: -- ParFiniteElementSpace *pfes; ///< Points to the same object as #fes -+ ///< Points to the same object as #fes -+ ParFiniteElementSpace *pfes; - - /// Auxiliary vectors used in TrueAddMult(): L-, L-, and T-vector, resp. - mutable Vector Xaux, Yaux, Ytmp; - -+ /// Matrix and eliminated matrix - OperatorHandle p_mat, p_mat_e; - - bool keep_nbr_block; - -- // Allocate mat - called when (mat == NULL && fbfi.Size() > 0) -- void pAllocMat(); -+ //XX TODO -+ // // Allocate mat - called when (mat == NULL && fbfi.Size() > 0) -+ // void pAllocMat(); - - void AssembleSharedFaces(int skip_zeros = 1); - - private: -- /// Copy construction is not supported; body is undefined. -- ParBilinearForm(const ParBilinearForm &); -+ /// Copy construction is not supported. -+ ParBilinearForm(const ParBilinearForm &) = delete; - -- /// Copy assignment is not supported; body is undefined. -- ParBilinearForm &operator=(const ParBilinearForm &); -+ /// Copy assignment is not supported. -+ ParBilinearForm &operator=(const ParBilinearForm &) = delete; - - public: - /// Creates parallel bilinear form associated with the FE space @a *pf. -@@ -103,40 +107,40 @@ public: - - /// Returns the matrix assembled on the true dofs, i.e. P^t A P. - /** The returned matrix has to be deleted by the caller. */ -- HypreParMatrix *ParallelAssemble() { return ParallelAssemble(mat); } -+ HypreParMatrix *ParallelAssemble() const { return ParallelAssemble(mat); } - - /// Returns the eliminated matrix assembled on the true dofs, i.e. P^t A_e P. - /** The returned matrix has to be deleted by the caller. */ -- HypreParMatrix *ParallelAssembleElim() { return ParallelAssemble(mat_e); } -+ HypreParMatrix *ParallelAssembleElim() const { return ParallelAssemble(mat_e); } - - /// Return the matrix @a m assembled on the true dofs, i.e. P^t A P. - /** The returned matrix has to be deleted by the caller. */ -- HypreParMatrix *ParallelAssemble(SparseMatrix *m); -- -- /** @brief Compute parallel RAP operator and store it in @a A as a HypreParMatrix. -- -- @param[in] loc_A The rank-local `SparseMatrix`. -- @param[out] A The `OperatorHandle` containing the global `HypreParMatrix`. -- @param[in] steal_loc_A Have the `HypreParMatrix` in @a A take ownership of -- the memory objects in @a loc_A. -- */ -- void ParallelRAP(SparseMatrix &loc_A, -- OperatorHandle &A, -- bool steal_loc_A = false); -+ HypreParMatrix *ParallelAssemble(SparseMatrix *m) const; - - /** @brief Returns the matrix assembled on the true dofs, i.e. - @a A = P^t A_local P, in the format (type id) specified by @a A. */ -- void ParallelAssemble(OperatorHandle &A) { ParallelAssemble(A, mat); } -+ void ParallelAssemble(OperatorHandle &A) const { ParallelAssemble(A, mat); } - - /** Returns the eliminated matrix assembled on the true dofs, i.e. - @a A_elim = P^t A_elim_local P in the format (type id) specified by @a A. - */ -- void ParallelAssembleElim(OperatorHandle &A_elim) -+ void ParallelAssembleElim(OperatorHandle &A_elim) const - { ParallelAssemble(A_elim, mat_e); } - - /** Returns the matrix @a A_local assembled on the true dofs, i.e. - @a A = P^t A_local P in the format (type id) specified by @a A. */ -- void ParallelAssemble(OperatorHandle &A, SparseMatrix *A_local); -+ void ParallelAssemble(OperatorHandle &A, SparseMatrix *A_local) const; -+ -+ /** @brief Compute parallel RAP operator and store it in @a A as a HypreParMatrix. -+ -+ @param[in] loc_A The rank-local `SparseMatrix`. -+ @param[out] A The `OperatorHandle` containing the global `HypreParMatrix`. -+ @param[in] steal_loc_A Have the `HypreParMatrix` in @a A take ownership of -+ the memory objects in @a loc_A. -+ */ -+ void ParallelRAP(SparseMatrix &loc_A, -+ OperatorHandle &A, -+ bool steal_loc_A = false); - - /// Eliminate essential boundary DOFs from a parallel assembled system. - /** The array @a bdr_attr_is_ess marks boundary attributes that constitute -@@ -183,9 +187,7 @@ public: - /// Get the parallel finite element space prolongation matrix - virtual const Operator *GetProlongation() const - { return pfes->GetProlongationMatrix(); } -- /// Get the transpose of GetRestriction, useful for matrix-free RAP -- virtual const Operator *GetRestrictionTranspose() const -- { return pfes->GetRestrictionTransposeOperator(); } -+ - /// Get the parallel finite element space restriction matrix - virtual const Operator *GetRestriction() const - { return pfes->GetRestrictionMatrix(); } -@@ -209,7 +211,7 @@ public: - - void EliminateVDofsInRHS(const Array &vdofs, const Vector &x, Vector &b); - -- virtual ~ParBilinearForm() { } -+ virtual ~ParBilinearForm() {} - }; - - /// Class for parallel bilinear form using different test and trial FE spaces. -@@ -220,6 +222,7 @@ protected: - ParFiniteElementSpace *trial_pfes; - /// Points to the same object as #test_fes - ParFiniteElementSpace *test_pfes; -+ - /// Auxiliary objects used in TrueAddMult(). - mutable ParGridFunction Xaux, Yaux; - -@@ -227,11 +230,11 @@ protected: - OperatorHandle p_mat, p_mat_e; - - private: -- /// Copy construction is not supported; body is undefined. -- ParMixedBilinearForm(const ParMixedBilinearForm &); -+ /// Copy construction is not supported. -+ ParMixedBilinearForm(const ParMixedBilinearForm &) = delete; - -- /// Copy assignment is not supported; body is undefined. -- ParMixedBilinearForm &operator=(const ParMixedBilinearForm &); -+ /// Copy assignment is not supported. -+ ParMixedBilinearForm &operator=(const ParMixedBilinearForm &) = delete; - - public: - /** @brief Construct a ParMixedBilinearForm on the given FiniteElementSpace%s -@@ -258,7 +261,7 @@ public: - by the newly constructed ParMixedBilinearForm. */ - ParMixedBilinearForm(ParFiniteElementSpace *trial_fes, - ParFiniteElementSpace *test_fes, -- ParMixedBilinearForm * mbf) -+ ParMixedBilinearForm *mbf) - : MixedBilinearForm(trial_fes, test_fes, mbf), - p_mat(Operator::Hypre_ParCSR), p_mat_e(Operator::Hypre_ParCSR) - { -@@ -267,24 +270,16 @@ public: - } - - /// Returns the matrix assembled on the true dofs, i.e. P_test^t A P_trial. -- HypreParMatrix *ParallelAssemble(); -+ HypreParMatrix *ParallelAssemble() const; - - /** @brief Returns the matrix assembled on the true dofs, i.e. - @a A = P_test^t A_local P_trial, in the format (type id) specified by - @a A. */ -- void ParallelAssemble(OperatorHandle &A); -+ void ParallelAssemble(OperatorHandle &A) const; - - using MixedBilinearForm::FormRectangularSystemMatrix; - using MixedBilinearForm::FormRectangularLinearSystem; - -- /** @brief Return in @a A a parallel (on truedofs) version of this operator. -- -- This returns the same operator as FormRectangularLinearSystem(), but does -- without the transformations of the right-hand side. */ -- virtual void FormRectangularSystemMatrix(const Array &trial_tdof_list, -- const Array &test_tdof_list, -- OperatorHandle &A); -- - /** @brief Form the parallel linear system A X = B, corresponding to this mixed - bilinear form and the linear form @a b(.). - -@@ -296,10 +291,18 @@ public: - Vector &b, OperatorHandle &A, Vector &X, - Vector &B); - -+ /** @brief Return in @a A a parallel (on truedofs) version of this operator. -+ -+ This returns the same operator as FormRectangularLinearSystem(), but does -+ without the transformations of the right-hand side. */ -+ virtual void FormRectangularSystemMatrix(const Array &trial_tdof_list, -+ const Array &test_tdof_list, -+ OperatorHandle &A); -+ - /// Compute y += a (P^t A P) x, where x and y are vectors on the true dofs - void TrueAddMult(const Vector &x, Vector &y, const double a = 1.0) const; - -- virtual ~ParMixedBilinearForm() { } -+ virtual ~ParMixedBilinearForm() {} - }; - - /** The parallel matrix representation a linear operator between parallel finite -@@ -313,11 +316,12 @@ protected: - ParFiniteElementSpace *range_fes; - - private: -- /// Copy construction is not supported; body is undefined. -- ParDiscreteLinearOperator(const ParDiscreteLinearOperator &); -+ /// Copy construction is not supported. -+ ParDiscreteLinearOperator(const ParDiscreteLinearOperator &) = delete; - -- /// Copy assignment is not supported; body is undefined. -- ParDiscreteLinearOperator &operator=(const ParDiscreteLinearOperator &); -+ /// Copy assignment is not supported. -+ ParDiscreteLinearOperator &operator=(const ParDiscreteLinearOperator &) = -+ delete; - - public: - /** @brief Construct a ParDiscreteLinearOperator on the given -@@ -327,7 +331,11 @@ public: - object. */ - ParDiscreteLinearOperator(ParFiniteElementSpace *dfes, - ParFiniteElementSpace *rfes) -- : DiscreteLinearOperator(dfes, rfes) { domain_fes=dfes; range_fes=rfes; } -+ : DiscreteLinearOperator(dfes, rfes) -+ { -+ domain_fes = dfes; -+ range_fes = rfes; -+ } - - /// Returns the matrix "assembled" on the true dofs - HypreParMatrix *ParallelAssemble() const; -@@ -335,18 +343,18 @@ public: - /** @brief Returns the matrix assembled on the true dofs, i.e. - @a A = R_test A_local P_trial, in the format (type id) specified by - @a A. */ -- void ParallelAssemble(OperatorHandle &A); -+ void ParallelAssemble(OperatorHandle &A) const; - -- /** Extract the parallel blocks corresponding to the vector dimensions of the -- domain and range parallel finite element spaces */ -- void GetParBlocks(Array2D &blocks) const; -- -- using MixedBilinearForm::FormRectangularSystemMatrix; -+ using DiscreteLinearOperator::FormDiscreteOperatorMatrix; - - /** @brief Return in @a A a parallel (on truedofs) version of this operator. */ -- virtual void FormRectangularSystemMatrix(OperatorHandle &A); -+ virtual void FormDiscreteOperatorMatrix(OperatorHandle &A); -+ -+ /** Extract the parallel blocks corresponding to the vector dimensions of the -+ domain and range parallel finite element spaces */ -+ void GetParBlocks(Array2D &blocks); - -- virtual ~ParDiscreteLinearOperator() { } -+ virtual ~ParDiscreteLinearOperator() {} - }; - - } -diff --git a/fem/pfespace.cpp b/fem/pfespace.cpp -index 6f0af11c6..d74e08580 100644 ---- a/fem/pfespace.cpp -+++ b/fem/pfespace.cpp -@@ -101,8 +101,6 @@ void ParFiniteElementSpace::ParInit(ParMesh *pm) - P = NULL; - Pconf = NULL; - nonconf_P = false; -- Rconf = NULL; -- R_transpose = NULL; - R = NULL; - - num_face_nbr_dofs = -1; -@@ -961,6 +959,34 @@ void ParFiniteElementSpace::Build_Dof_TrueDof_Matrix() const // matrix P - R = Transpose(Pdiag); - } - -+const Operator *ParFiniteElementSpace::GetProlongationMatrix() const -+{ -+ if (Conforming() && !nd_strias) -+ { -+ if (Pconf) { return Pconf; } -+ if (NRanks == 1) -+ { -+ Pconf = new IdentityOperator(GetTrueVSize()); -+ } -+ else -+ { -+ if (!Device::Allows(Backend::DEVICE_MASK)) -+ { -+ Pconf = new ConformingProlongationOperator(*this); -+ } -+ else -+ { -+ Pconf = new DeviceConformingProlongationOperator(*this); -+ } -+ } -+ return Pconf; -+ } -+ else -+ { -+ return Dof_TrueDof_Matrix(); -+ } -+} -+ - HypreParMatrix *ParFiniteElementSpace::GetPartialConformingInterpolation() - { - HypreParMatrix *P_pc; -@@ -1150,76 +1176,6 @@ HYPRE_BigInt ParFiniteElementSpace::GetMyTDofOffset() const - return HYPRE_AssumedPartitionCheck()? tdof_offsets[0] : tdof_offsets[MyRank]; - } - --const Operator *ParFiniteElementSpace::GetProlongationMatrix() const --{ -- if (Conforming()) -- { -- if (Pconf) { return Pconf; } -- -- if (nd_strias) { return Dof_TrueDof_Matrix(); } -- -- if (NRanks == 1) -- { -- Pconf = new IdentityOperator(GetTrueVSize()); -- } -- else -- { -- if (!Device::Allows(Backend::DEVICE_MASK)) -- { -- Pconf = new ConformingProlongationOperator(*this); -- } -- else -- { -- Pconf = new DeviceConformingProlongationOperator(*this); -- } -- } -- return Pconf; -- } -- else -- { -- return Dof_TrueDof_Matrix(); -- } --} -- --const Operator *ParFiniteElementSpace::GetRestrictionOperator() const --{ -- if (Conforming()) -- { -- if (Rconf) { return Rconf; } -- -- if (NRanks == 1) -- { -- R_transpose = new IdentityOperator(GetTrueVSize()); -- } -- else -- { -- if (!Device::Allows(Backend::DEVICE_MASK)) -- { -- R_transpose = new ConformingProlongationOperator(*this, true); -- } -- else -- { -- R_transpose = -- new DeviceConformingProlongationOperator(*this, true); -- } -- } -- Rconf = new TransposeOperator(R_transpose); -- return Rconf; -- } -- else -- { -- Dof_TrueDof_Matrix(); -- R_transpose = new TransposeOperator(R); -- return R; -- } --} -- --const Operator *ParFiniteElementSpace::GetRestrictionTransposeOperator() const --{ -- GetRestrictionOperator(); -- return R_transpose; --} -- - void ParFiniteElementSpace::ExchangeFaceNbrData() - { - if (num_face_nbr_dofs >= 0) { return; } -@@ -3186,8 +3142,6 @@ void ParFiniteElementSpace::Destroy() - - delete P; P = NULL; - delete Pconf; Pconf = NULL; -- delete Rconf; Rconf = NULL; -- delete R_transpose; R_transpose = NULL; - delete R; R = NULL; - - delete gcomm; gcomm = NULL; -@@ -3393,8 +3347,8 @@ void ParFiniteElementSpace::UpdateMeshPointer(Mesh *new_mesh) - } - - ConformingProlongationOperator::ConformingProlongationOperator( -- int lsize, const GroupCommunicator &gc_, bool local_) -- : gc(gc_), local(local_) -+ int lsize, const GroupCommunicator &gc_) -+ : gc(gc_) - { - const Table &group_ldof = gc.GroupLDofTable(); - -@@ -3429,11 +3383,10 @@ const - } - - ConformingProlongationOperator::ConformingProlongationOperator( -- const ParFiniteElementSpace &pfes, bool local_) -+ const ParFiniteElementSpace &pfes) - : Operator(pfes.GetVSize(), pfes.GetTrueVSize()), - external_ldofs(), -- gc(pfes.GroupComm()), -- local(local_) -+ gc(pfes.GroupComm()) - { - MFEM_VERIFY(pfes.Conforming(), ""); - const Table &group_ldof = gc.GroupLDofTable(); -@@ -3482,14 +3435,7 @@ void ConformingProlongationOperator::Mult(const Vector &x, Vector &y) const - const int m = external_ldofs.Size(); - - const int in_layout = 2; // 2 - input is ltdofs array -- if (local) -- { -- y = 0.0; -- } -- else -- { -- gc.BcastBegin(const_cast(xdata), in_layout); -- } -+ gc.BcastBegin(const_cast(xdata), in_layout); - - int j = 0; - for (int i = 0; i < m; i++) -@@ -3501,10 +3447,7 @@ void ConformingProlongationOperator::Mult(const Vector &x, Vector &y) const - std::copy(xdata+j-m, xdata+Width(), ydata+j); - - const int out_layout = 0; // 0 - output is ldofs array -- if (!local) -- { -- gc.BcastEnd(ydata, out_layout); -- } -+ gc.BcastEnd(ydata, out_layout); - } - - void ConformingProlongationOperator::MultTranspose( -@@ -3517,10 +3460,7 @@ void ConformingProlongationOperator::MultTranspose( - double *ydata = y.HostWrite(); - const int m = external_ldofs.Size(); - -- if (!local) -- { -- gc.ReduceBegin(xdata); -- } -+ gc.ReduceBegin(xdata); - - int j = 0; - for (int i = 0; i < m; i++) -@@ -3532,15 +3472,12 @@ void ConformingProlongationOperator::MultTranspose( - std::copy(xdata+j, xdata+Height(), ydata+j-m); - - const int out_layout = 2; // 2 - output is an array on all ltdofs -- if (!local) -- { -- gc.ReduceEnd(ydata, out_layout, GroupCommunicator::Sum); -- } -+ gc.ReduceEnd(ydata, out_layout, GroupCommunicator::Sum); - } - - DeviceConformingProlongationOperator::DeviceConformingProlongationOperator( -- const GroupCommunicator &gc_, const SparseMatrix *R, bool local_) -- : ConformingProlongationOperator(R->Width(), gc_, local_), -+ const GroupCommunicator &gc_, const SparseMatrix *R) -+ : ConformingProlongationOperator(R->Width(), gc_), - mpi_gpu_aware(Device::GetGPUAwareMPI()) - { - MFEM_ASSERT(R->Finalized(), ""); -@@ -3605,10 +3542,9 @@ DeviceConformingProlongationOperator::DeviceConformingProlongationOperator( - } - - DeviceConformingProlongationOperator::DeviceConformingProlongationOperator( -- const ParFiniteElementSpace &pfes, bool local_) -+ const ParFiniteElementSpace &pfes) - : DeviceConformingProlongationOperator(pfes.GroupComm(), -- pfes.GetRestrictionMatrix(), -- local_) -+ pfes.GetRestrictionMatrix()) - { - MFEM_ASSERT(pfes.Conforming(), "internal error"); - MFEM_ASSERT(pfes.GetRestrictionMatrix()->Height() == pfes.GetTrueVSize(), ""); -@@ -3672,48 +3608,36 @@ void DeviceConformingProlongationOperator::Mult(const Vector &x, - Vector &y) const - { - const GroupTopology >opo = gc.GetGroupTopology(); -- int req_counter = 0; - // Make sure 'y' is marked as valid on device and for use on device. - // This ensures that there is no unnecessary host to device copy when the -- // input 'y' is valid on host (in 'y.SetSubVector(ext_ldof, 0.0)' when local -- // is true) or BcastLocalCopy (when local is false). -+ // input 'y' is valid on host. - y.Write(); -- if (local) -- { -- // done on device since we've marked ext_ldof for use on device: -- y.SetSubVector(ext_ldof, 0.0); -- } -- else -+ BcastBeginCopy(x); // copy to 'shr_buf' -+ int req_counter = 0; -+ for (int nbr = 1; nbr < gtopo.GetNumNeighbors(); nbr++) - { -- BcastBeginCopy(x); // copy to 'shr_buf' -- for (int nbr = 1; nbr < gtopo.GetNumNeighbors(); nbr++) -+ const int send_offset = shr_buf_offsets[nbr]; -+ const int send_size = shr_buf_offsets[nbr+1] - send_offset; -+ if (send_size > 0) - { -- const int send_offset = shr_buf_offsets[nbr]; -- const int send_size = shr_buf_offsets[nbr+1] - send_offset; -- if (send_size > 0) -- { -- auto send_buf = mpi_gpu_aware ? shr_buf.Read() : shr_buf.HostRead(); -- MPI_Isend(send_buf + send_offset, send_size, MPI_DOUBLE, -- gtopo.GetNeighborRank(nbr), 41822, -- gtopo.GetComm(), &requests[req_counter++]); -- } -- const int recv_offset = ext_buf_offsets[nbr]; -- const int recv_size = ext_buf_offsets[nbr+1] - recv_offset; -- if (recv_size > 0) -- { -- auto recv_buf = mpi_gpu_aware ? ext_buf.Write() : ext_buf.HostWrite(); -- MPI_Irecv(recv_buf + recv_offset, recv_size, MPI_DOUBLE, -- gtopo.GetNeighborRank(nbr), 41822, -- gtopo.GetComm(), &requests[req_counter++]); -- } -+ auto send_buf = mpi_gpu_aware ? shr_buf.Read() : shr_buf.HostRead(); -+ MPI_Isend(send_buf + send_offset, send_size, MPI_DOUBLE, -+ gtopo.GetNeighborRank(nbr), 41822, -+ gtopo.GetComm(), &requests[req_counter++]); -+ } -+ const int recv_offset = ext_buf_offsets[nbr]; -+ const int recv_size = ext_buf_offsets[nbr+1] - recv_offset; -+ if (recv_size > 0) -+ { -+ auto recv_buf = mpi_gpu_aware ? ext_buf.Write() : ext_buf.HostWrite(); -+ MPI_Irecv(recv_buf + recv_offset, recv_size, MPI_DOUBLE, -+ gtopo.GetNeighborRank(nbr), 41822, -+ gtopo.GetComm(), &requests[req_counter++]); - } - } - BcastLocalCopy(x, y); -- if (!local) -- { -- MPI_Waitall(req_counter, requests, MPI_STATUSES_IGNORE); -- BcastEndCopy(y); // copy from 'ext_buf' -- } -+ MPI_Waitall(req_counter, requests, MPI_STATUSES_IGNORE); -+ BcastEndCopy(y); // copy from 'ext_buf' - } - - DeviceConformingProlongationOperator::~DeviceConformingProlongationOperator() -@@ -3774,38 +3698,32 @@ void DeviceConformingProlongationOperator::MultTranspose(const Vector &x, - Vector &y) const - { - const GroupTopology >opo = gc.GetGroupTopology(); -+ ReduceBeginCopy(x); // copy to 'ext_buf' - int req_counter = 0; -- if (!local) -+ for (int nbr = 1; nbr < gtopo.GetNumNeighbors(); nbr++) - { -- ReduceBeginCopy(x); // copy to 'ext_buf' -- for (int nbr = 1; nbr < gtopo.GetNumNeighbors(); nbr++) -+ const int send_offset = ext_buf_offsets[nbr]; -+ const int send_size = ext_buf_offsets[nbr+1] - send_offset; -+ if (send_size > 0) - { -- const int send_offset = ext_buf_offsets[nbr]; -- const int send_size = ext_buf_offsets[nbr+1] - send_offset; -- if (send_size > 0) -- { -- auto send_buf = mpi_gpu_aware ? ext_buf.Read() : ext_buf.HostRead(); -- MPI_Isend(send_buf + send_offset, send_size, MPI_DOUBLE, -- gtopo.GetNeighborRank(nbr), 41823, -- gtopo.GetComm(), &requests[req_counter++]); -- } -- const int recv_offset = shr_buf_offsets[nbr]; -- const int recv_size = shr_buf_offsets[nbr+1] - recv_offset; -- if (recv_size > 0) -- { -- auto recv_buf = mpi_gpu_aware ? shr_buf.Write() : shr_buf.HostWrite(); -- MPI_Irecv(recv_buf + recv_offset, recv_size, MPI_DOUBLE, -- gtopo.GetNeighborRank(nbr), 41823, -- gtopo.GetComm(), &requests[req_counter++]); -- } -+ auto send_buf = mpi_gpu_aware ? ext_buf.Read() : ext_buf.HostRead(); -+ MPI_Isend(send_buf + send_offset, send_size, MPI_DOUBLE, -+ gtopo.GetNeighborRank(nbr), 41823, -+ gtopo.GetComm(), &requests[req_counter++]); -+ } -+ const int recv_offset = shr_buf_offsets[nbr]; -+ const int recv_size = shr_buf_offsets[nbr+1] - recv_offset; -+ if (recv_size > 0) -+ { -+ auto recv_buf = mpi_gpu_aware ? shr_buf.Write() : shr_buf.HostWrite(); -+ MPI_Irecv(recv_buf + recv_offset, recv_size, MPI_DOUBLE, -+ gtopo.GetNeighborRank(nbr), 41823, -+ gtopo.GetComm(), &requests[req_counter++]); - } - } - ReduceLocalCopy(x, y); -- if (!local) -- { -- MPI_Waitall(req_counter, requests, MPI_STATUSES_IGNORE); -- ReduceEndAssemble(y); // assemble from 'shr_buf' -- } -+ MPI_Waitall(req_counter, requests, MPI_STATUSES_IGNORE); -+ ReduceEndAssemble(y); // assemble from 'shr_buf' - } - - } // namespace mfem -diff --git a/fem/pfespace.hpp b/fem/pfespace.hpp -index 8f574670b..c4a95a365 100644 ---- a/fem/pfespace.hpp -+++ b/fem/pfespace.hpp -@@ -70,6 +70,7 @@ private: - - /// The matrix P (interpolation from true dof to dof). Owned. - mutable HypreParMatrix *P; -+ - /// Optimized action-only prolongation operator for conforming meshes. Owned. - mutable Operator *Pconf; - -@@ -80,12 +81,6 @@ private: - - /// The (block-diagonal) matrix R (restriction of dof to true dof). Owned. - mutable SparseMatrix *R; -- /// Optimized action-only restriction operator for conforming meshes. Owned. -- mutable Operator *Rconf; -- /** Transpose of R or Rconf. For conforming mesh, this is a matrix-free -- (Device)ConformingProlongationOperator, for a non-conforming mesh -- this is a TransposeOperator wrapping R. */ -- mutable Operator *R_transpose; - - /// Flag indicating the existence of shared triangles with interior ND dofs - bool nd_strias; -@@ -321,6 +316,13 @@ public: - HypreParMatrix *Dof_TrueDof_Matrix() const - { if (!P) { Build_Dof_TrueDof_Matrix(); } return P; } - -+ /// Get the P matrix which prolongates a true dof vector to local dof vector. -+ virtual const Operator *GetProlongationMatrix() const; -+ -+ /// Get the R matrix which restricts a local dof vector to true dof vector. -+ virtual const SparseMatrix *GetRestrictionMatrix() const -+ { Dof_TrueDof_Matrix(); return R; } -+ - /** @brief For a non-conforming mesh, construct and return the interpolation - matrix from the partially conforming true dofs to the local dofs. */ - /** @note The returned pointer must be deleted by the caller. */ -@@ -374,21 +376,6 @@ public: - HYPRE_BigInt GetMyDofOffset() const; - HYPRE_BigInt GetMyTDofOffset() const; - -- virtual const Operator *GetProlongationMatrix() const; -- /** @brief Return logical transpose of restriction matrix, but in -- non-assembled optimized matrix-free form. -- -- The implementation is like GetProlongationMatrix, but it sets local -- DOFs to the true DOF values if owned locally, otherwise zero. */ -- virtual const Operator *GetRestrictionTransposeOperator() const; -- /** Get an Operator that performs the action of GetRestrictionMatrix(), -- but potentially with a non-assembled optimized matrix-free -- implementation. */ -- virtual const Operator *GetRestrictionOperator() const; -- /// Get the R matrix which restricts a local dof vector to true dof vector. -- virtual const SparseMatrix *GetRestrictionMatrix() const -- { Dof_TrueDof_Matrix(); return R; } -- - // Face-neighbor functions - void ExchangeFaceNbrData(); - int GetFaceNbrVSize() const { return num_face_nbr_dofs; } -@@ -434,21 +421,17 @@ public: - int TrueVSize() const { return ltdof_size; } - }; - -- - /// Auxiliary class used by ParFiniteElementSpace. - class ConformingProlongationOperator : public Operator - { - protected: - Array external_ldofs; - const GroupCommunicator &gc; -- bool local; - - public: -- ConformingProlongationOperator(int lsize, const GroupCommunicator &gc_, -- bool local_=false); -+ ConformingProlongationOperator(int lsize, const GroupCommunicator &gc_); - -- ConformingProlongationOperator(const ParFiniteElementSpace &pfes, -- bool local_=false); -+ ConformingProlongationOperator(const ParFiniteElementSpace &pfes); - - const GroupCommunicator &GetGroupCommunicator() const; - -@@ -458,8 +441,8 @@ public: - }; - - /// Auxiliary device class used by ParFiniteElementSpace. --class DeviceConformingProlongationOperator: public -- ConformingProlongationOperator -+class DeviceConformingProlongationOperator : -+ public ConformingProlongationOperator - { - protected: - bool mpi_gpu_aware; -@@ -495,11 +478,10 @@ protected: - void ReduceEndAssemble(Vector &dst) const; - - public: -- DeviceConformingProlongationOperator( -- const GroupCommunicator &gc_, const SparseMatrix *R, bool local_=false); -+ DeviceConformingProlongationOperator(const GroupCommunicator &gc_, -+ const SparseMatrix *R); - -- DeviceConformingProlongationOperator(const ParFiniteElementSpace &pfes, -- bool local_=false); -+ DeviceConformingProlongationOperator(const ParFiniteElementSpace &pfes); - - virtual ~DeviceConformingProlongationOperator(); - -diff --git a/fem/prestriction.cpp b/fem/prestriction.cpp -index 37dee3c8d..89ce189db 100644 ---- a/fem/prestriction.cpp -+++ b/fem/prestriction.cpp -@@ -325,7 +325,7 @@ void ParL2FaceRestriction::DoubleValuedConformingMult( - auto d_x_shared = Reshape(x_gf.FaceNbrData().Read(), - t?vd:nsdofs, t?nsdofs:vd); - auto d_y = Reshape(y.Write(), nface_dofs, vd, 2, nf); -- mfem::forall(nfdofs, [=] MFEM_HOST_DEVICE (int i) -+ mfem::forall(face_dofs*nf, [=] MFEM_HOST_DEVICE (int i) - { - const int dof = i % nface_dofs; - const int face = i / nface_dofs; -diff --git a/fem/restriction.cpp b/fem/restriction.cpp -index c7343d7c7..e0877606b 100644 ---- a/fem/restriction.cpp -+++ b/fem/restriction.cpp -@@ -12,27 +12,24 @@ - #include "restriction.hpp" - #include "gridfunc.hpp" - #include "fespace.hpp" --#include "../general/forall.hpp" --#include -- - #ifdef MFEM_USE_MPI -- - #include "pfespace.hpp" -- - #endif -+#include "../general/forall.hpp" -+#include - - namespace mfem - { - --ElementRestriction::ElementRestriction(const FiniteElementSpace &f, -- ElementDofOrdering e_ordering) -+ConformingElementRestriction::ConformingElementRestriction( -+ const FiniteElementSpace &f, -+ ElementDofOrdering e_ordering) - : fes(f), - ne(fes.GetNE()), - vdim(fes.GetVDim()), - byvdim(fes.GetOrdering() == Ordering::byVDIM), - ndofs(fes.GetNDofs()), - dof(ne > 0 ? fes.GetFE(0)->GetDof() : 0), -- nedofs(ne*dof), - offsets(ndofs+1), - indices(ne*dof), - gather_map(ne*dof) -@@ -104,7 +101,7 @@ ElementRestriction::ElementRestriction(const FiniteElementSpace &f, - offsets[0] = 0; - } - --void ElementRestriction::Mult(const Vector& x, Vector& y) const -+void ConformingElementRestriction::Mult(const Vector& x, Vector& y) const - { - // Assumes all elements have the same number of dofs - const int nd = dof; -@@ -126,7 +123,8 @@ void ElementRestriction::Mult(const Vector& x, Vector& y) const - }); - } - --void ElementRestriction::MultUnsigned(const Vector& x, Vector& y) const -+void ConformingElementRestriction::MultUnsigned(const Vector& x, -+ Vector& y) const - { - // Assumes all elements have the same number of dofs - const int nd = dof; -@@ -148,12 +146,13 @@ void ElementRestriction::MultUnsigned(const Vector& x, Vector& y) const - } - - template --void ElementRestriction::TAddMultTranspose(const Vector& x, Vector& y) const -+static void TAddMultTranspose(const int nd, const int vd, const bool t, -+ const int ndofs, const int ne, -+ const Array& offsets, -+ const Array& indices, -+ const Vector& x, Vector& y) - { - // Assumes all elements have the same number of dofs -- const int nd = dof; -- const int vd = vdim; -- const bool t = byvdim; - auto d_offsets = offsets.Read(); - auto d_indices = indices.Read(); - auto d_x = Reshape(x.Read(), nd, vd, ne); -@@ -177,21 +176,23 @@ void ElementRestriction::TAddMultTranspose(const Vector& x, Vector& y) const - }); - } - --void ElementRestriction::MultTranspose(const Vector& x, Vector& y) const -+void ConformingElementRestriction::MultTranspose(const Vector& x, -+ Vector& y) const - { - constexpr bool ADD = false; -- TAddMultTranspose(x, y); -+ TAddMultTranspose(dof, vdim, byvdim, ndofs, ne, offsets, indices, x, y); - } - --void ElementRestriction::AddMultTranspose(const Vector& x, Vector& y, -- const double a) const -+void ConformingElementRestriction::AddMultTranspose(const Vector& x, Vector& y, -+ const double a) const - { - MFEM_VERIFY(a == 1.0, "General coefficient case is not yet supported!"); - constexpr bool ADD = true; -- TAddMultTranspose(x, y); -+ TAddMultTranspose(dof, vdim, byvdim, ndofs, ne, offsets, indices, x, y); - } - --void ElementRestriction::MultTransposeUnsigned(const Vector& x, Vector& y) const -+void ConformingElementRestriction::MultTransposeUnsigned(const Vector& x, -+ Vector& y) const - { - // Assumes all elements have the same number of dofs - const int nd = dof; -@@ -218,32 +219,7 @@ void ElementRestriction::MultTransposeUnsigned(const Vector& x, Vector& y) const - }); - } - --void ElementRestriction::MultLeftInverse(const Vector& x, Vector& y) const --{ -- // Assumes all elements have the same number of dofs -- const int nd = dof; -- const int vd = vdim; -- const bool t = byvdim; -- auto d_offsets = offsets.Read(); -- auto d_indices = indices.Read(); -- auto d_x = Reshape(x.Read(), nd, vd, ne); -- auto d_y = Reshape(y.Write(), t?vd:ndofs, t?ndofs:vd); -- mfem::forall(ndofs, [=] MFEM_HOST_DEVICE (int i) -- { -- const int next_offset = d_offsets[i + 1]; -- for (int c = 0; c < vd; ++c) -- { -- double dof_value = 0; -- const int j = next_offset - 1; -- const int idx_j = (d_indices[j] >= 0) ? d_indices[j] : -1 - d_indices[j]; -- dof_value = (d_indices[j] >= 0) ? d_x(idx_j % nd, c, idx_j / nd) : -- -d_x(idx_j % nd, c, idx_j / nd); -- d_y(t?c:i,t?i:c) = dof_value; -- } -- }); --} -- --void ElementRestriction::BooleanMask(Vector& y) const -+void ConformingElementRestriction::BooleanMask(Vector& y) const - { - // Assumes all elements have the same number of dofs - const int nd = dof; -@@ -280,8 +256,8 @@ void ElementRestriction::BooleanMask(Vector& y) const - } - } - --void ElementRestriction::FillSparseMatrix(const Vector &mat_ea, -- SparseMatrix &mat) const -+void ConformingElementRestriction::FillSparseMatrix(const Vector &mat_ea, -+ SparseMatrix &mat) const - { - mat.GetMemoryI().New(mat.Height()+1, mat.GetMemoryI().GetMemoryType()); - const int nnz = FillI(mat); -@@ -319,7 +295,7 @@ static MFEM_HOST_DEVICE int GetAndIncrementNnzIndex(const int i_L, int* I) - return ind; - } - --int ElementRestriction::FillI(SparseMatrix &mat) const -+int ConformingElementRestriction::FillI(SparseMatrix &mat) const - { - static constexpr int Max = MaxNbNbr; - const int all_dofs = ndofs; -@@ -396,8 +372,8 @@ int ElementRestriction::FillI(SparseMatrix &mat) const - return h_I[nTdofs]; - } - --void ElementRestriction::FillJAndData(const Vector &ea_data, -- SparseMatrix &mat) const -+void ConformingElementRestriction::FillJAndData(const Vector &ea_data, -+ SparseMatrix &mat) const - { - static constexpr int Max = MaxNbNbr; - const int all_dofs = ndofs; -@@ -523,11 +499,10 @@ void L2ElementRestriction::Mult(const Vector &x, Vector &y) const - } - - template --void L2ElementRestriction::TAddMultTranspose(const Vector &x, Vector &y) const -+static void L2TAddMultTranspose(const int nd, const int vd, const bool t, -+ const int ndofs, const int ne, -+ const Vector &x, Vector &y) - { -- const int nd = ndof; -- const int vd = vdim; -- const bool t = byvdim; - auto d_x = Reshape(x.Read(), nd, vd, ne); - auto d_y = Reshape(ADD ? y.ReadWrite() : y.Write(), t?vd:ndofs, t?ndofs:vd); - mfem::forall(ndofs, [=] MFEM_HOST_DEVICE (int i) -@@ -546,7 +521,7 @@ void L2ElementRestriction::TAddMultTranspose(const Vector &x, Vector &y) const - void L2ElementRestriction::MultTranspose(const Vector &x, Vector &y) const - { - constexpr bool ADD = false; -- TAddMultTranspose(x, y); -+ L2TAddMultTranspose(ndof, vdim, byvdim, ndofs, ne, x, y); - } - - void L2ElementRestriction::AddMultTranspose(const Vector &x, Vector &y, -@@ -554,7 +529,7 @@ void L2ElementRestriction::AddMultTranspose(const Vector &x, Vector &y, - { - MFEM_VERIFY(a == 1.0, "General coefficient case is not yet supported!"); - constexpr bool ADD = true; -- TAddMultTranspose(x, y); -+ L2TAddMultTranspose(ndof, vdim, byvdim, ndofs, ne, x, y); - } - - void L2ElementRestriction::FillI(SparseMatrix &mat) const -@@ -609,7 +584,6 @@ ConformingFaceRestriction::ConformingFaceRestriction( - byvdim(fes.GetOrdering() == Ordering::byVDIM), - face_dofs(nf > 0 ? fes.GetFaceElement(0)->GetDof() : 0), - elem_dofs(fes.GetFE(0)->GetDof()), -- nfdofs(nf*face_dofs), - ndofs(fes.GetNDofs()), - scatter_indices(nf*face_dofs), - gather_offsets(ndofs+1), -@@ -651,62 +625,102 @@ ConformingFaceRestriction::ConformingFaceRestriction( - : ConformingFaceRestriction(fes, f_ordering, type, true) - { } - --void ConformingFaceRestriction::Mult(const Vector& x, Vector& y) const -+static void ConformingFaceRestriction_Mult( -+ const int ndofs, -+ const int face_dofs, -+ const int nf, -+ const int vdim, -+ const bool by_vdim, -+ const Array &scatter_indices, -+ const Vector &x, -+ Vector &y, -+ bool use_signs) - { - if (nf==0) { return; } - // Assumes all elements have the same number of dofs -- const int nface_dofs = face_dofs; -- const int vd = vdim; -- const bool t = byvdim; - auto d_indices = scatter_indices.Read(); -- auto d_x = Reshape(x.Read(), t?vd:ndofs, t?ndofs:vd); -- auto d_y = Reshape(y.Write(), nface_dofs, vd, nf); -- mfem::forall(nfdofs, [=] MFEM_HOST_DEVICE (int i) -- { -- const int s_idx = d_indices[i]; -- const int sgn = (s_idx >= 0) ? 1 : -1; -- const int idx = (s_idx >= 0) ? s_idx : -1 - s_idx; -- const int dof = i % nface_dofs; -- const int face = i / nface_dofs; -- for (int c = 0; c < vd; ++c) -+ auto d_x = Reshape(x.Read(), by_vdim?vdim:ndofs, by_vdim?ndofs:vdim); -+ auto d_y = Reshape(y.Write(), face_dofs, vdim, nf); -+ mfem::forall(face_dofs*nf, [=] MFEM_HOST_DEVICE (int i) -+ { -+ const int s_idx_j = d_indices[i]; -+ const double sgn = (s_idx_j >= 0 || !use_signs) ? 1.0 : -1.0; -+ const int idx_j = (s_idx_j >= 0) ? s_idx_j : -1 - s_idx_j; -+ for (int c = 0; c < vdim; ++c) - { -- d_y(dof, c, face) = sgn*d_x(t?c:idx, t?idx:c); -+ d_y(i % face_dofs, c, i / face_dofs) = -+ sgn*d_x(by_vdim?c:idx_j, by_vdim?idx_j:c); - } - }); - } - --void ConformingFaceRestriction::AddMultTranspose( -- const Vector& x, Vector& y, const double a) const -+void ConformingFaceRestriction::Mult(const Vector& x, Vector& y) const -+{ -+ ConformingFaceRestriction_Mult( -+ ndofs, face_dofs, nf, vdim, byvdim, scatter_indices, x, y, true); -+} -+ -+void ConformingFaceRestriction::MultUnsigned(const Vector& x, Vector& y) const -+{ -+ ConformingFaceRestriction_Mult( -+ ndofs, face_dofs, nf, vdim, byvdim, scatter_indices, x, y, false); -+} -+ -+static void ConformingFaceRestriction_AddMultTranspose( -+ const int ndofs, -+ const int face_dofs, -+ const int nf, -+ const int vdim, -+ const bool by_vdim, -+ const Array &gather_offsets, -+ const Array &gather_indices, -+ const Vector &x, -+ Vector &y, -+ bool use_signs, -+ const double a) - { - MFEM_VERIFY(a == 1.0, "General coefficient case is not yet supported!"); - if (nf==0) { return; } - // Assumes all elements have the same number of dofs -- const int nface_dofs = face_dofs; -- const int vd = vdim; -- const bool t = byvdim; - auto d_offsets = gather_offsets.Read(); - auto d_indices = gather_indices.Read(); -- auto d_x = Reshape(x.Read(), nface_dofs, vd, nf); -- auto d_y = Reshape(y.ReadWrite(), t?vd:ndofs, t?ndofs:vd); -+ auto d_x = Reshape(x.Read(), face_dofs, vdim, nf); -+ auto d_y = Reshape(y.ReadWrite(), by_vdim?vdim:ndofs, by_vdim?ndofs:vdim); - mfem::forall(ndofs, [=] MFEM_HOST_DEVICE (int i) - { - const int offset = d_offsets[i]; - const int next_offset = d_offsets[i + 1]; -- for (int c = 0; c < vd; ++c) -+ for (int c = 0; c < vdim; ++c) - { - double dof_value = 0; - for (int j = offset; j < next_offset; ++j) - { - const int s_idx_j = d_indices[j]; -- const int sgn = (s_idx_j >= 0) ? 1 : -1; -+ const double sgn = (s_idx_j >= 0 || !use_signs) ? 1.0 : -1.0; - const int idx_j = (s_idx_j >= 0) ? s_idx_j : -1 - s_idx_j; -- dof_value += sgn*d_x(idx_j % nface_dofs, c, idx_j / nface_dofs); -+ dof_value += sgn*d_x(idx_j % face_dofs, c, idx_j / face_dofs); - } -- d_y(t?c:i,t?i:c) += dof_value; -+ d_y(by_vdim?c:i,by_vdim?i:c) += dof_value; - } - }); - } - -+void ConformingFaceRestriction::AddMultTranspose( -+ const Vector& x, Vector& y, const double a) const -+{ -+ ConformingFaceRestriction_AddMultTranspose( -+ ndofs, face_dofs, nf, vdim, byvdim, gather_offsets, gather_indices, x, y, -+ true, a); -+} -+ -+void ConformingFaceRestriction::AddMultTransposeUnsigned( -+ const Vector& x, Vector& y, const double a) const -+{ -+ ConformingFaceRestriction_AddMultTranspose( -+ ndofs, face_dofs, nf, vdim, byvdim, gather_offsets, gather_indices, x, y, -+ false, a); -+} -+ - void ConformingFaceRestriction::CheckFESpace(const ElementDofOrdering - f_ordering) - { -@@ -1019,7 +1033,6 @@ L2FaceRestriction::L2FaceRestriction(const FiniteElementSpace &fes, - fes.GetTraceElement(0, fes.GetMesh()->GetFaceGeometry(0))->GetDof() - : 0), - elem_dofs(fes.GetFE(0)->GetDof()), -- nfdofs(nf*face_dofs), - ndofs(fes.GetNDofs()), - type(type), - m(m), -@@ -1060,7 +1073,7 @@ void L2FaceRestriction::SingleValuedConformingMult(const Vector& x, - auto d_indices1 = scatter_indices1.Read(); - auto d_x = Reshape(x.Read(), t?vd:ndofs, t?ndofs:vd); - auto d_y = Reshape(y.Write(), nface_dofs, vd, nf); -- mfem::forall(nfdofs, [=] MFEM_HOST_DEVICE (int i) -+ mfem::forall(face_dofs*nf, [=] MFEM_HOST_DEVICE (int i) - { - const int dof = i % nface_dofs; - const int face = i / nface_dofs; -@@ -1086,7 +1099,7 @@ void L2FaceRestriction::DoubleValuedConformingMult(const Vector& x, - auto d_indices2 = scatter_indices2.Read(); - auto d_x = Reshape(x.Read(), t?vd:ndofs, t?ndofs:vd); - auto d_y = Reshape(y.Write(), nface_dofs, vd, 2, nf); -- mfem::forall(nfdofs, [=] MFEM_HOST_DEVICE (int i) -+ mfem::forall(face_dofs*nf, [=] MFEM_HOST_DEVICE (int i) - { - const int dof = i % nface_dofs; - const int face = i / nface_dofs; -@@ -1137,7 +1150,7 @@ void L2FaceRestriction::SingleValuedConformingAddMultTranspose( - for (int j = offset; j < next_offset; ++j) - { - int idx_j = d_indices[j]; -- dof_value += d_x(idx_j % nface_dofs, c, idx_j / nface_dofs); -+ dof_value += d_x(idx_j % nface_dofs, c, idx_j / nface_dofs); - } - d_y(t?c:i,t?i:c) += dof_value; - } -@@ -1151,7 +1164,7 @@ void L2FaceRestriction::DoubleValuedConformingAddMultTranspose( - const int nface_dofs = face_dofs; - const int vd = vdim; - const bool t = byvdim; -- const int dofs = nfdofs; -+ const int dofs = face_dofs*nf; - auto d_offsets = gather_offsets.Read(); - auto d_indices = gather_indices.Read(); - auto d_x = Reshape(x.Read(), nface_dofs, vd, 2, nf); -@@ -1168,9 +1181,8 @@ void L2FaceRestriction::DoubleValuedConformingAddMultTranspose( - int idx_j = d_indices[j]; - bool isE1 = idx_j < dofs; - idx_j = isE1 ? idx_j : idx_j - dofs; -- dof_value += isE1 ? -- d_x(idx_j % nface_dofs, c, 0, idx_j / nface_dofs) -- :d_x(idx_j % nface_dofs, c, 1, idx_j / nface_dofs); -+ dof_value += (isE1 ? d_x(idx_j % nface_dofs, c, 0, idx_j / nface_dofs) -+ : d_x(idx_j % nface_dofs, c, 1, idx_j / nface_dofs)); - } - d_y(t?c:i,t?i:c) += dof_value; - } -@@ -1565,7 +1577,7 @@ void L2FaceRestriction::PermuteAndSetFaceDofsGatherIndices2( - const int global_dof_elem2 = elem_map[elem_index*elem_dofs + volume_dof_elem2]; - const int restriction_dof_elem2 = face_dofs*face_index + face_dof_elem1; - // We shift restriction_dof_elem2 to express that it's elem2 of the face -- gather_indices[gather_offsets[global_dof_elem2]++] = nfdofs + -+ gather_indices[gather_offsets[global_dof_elem2]++] = face_dofs*nf + - restriction_dof_elem2; - } - } -diff --git a/fem/restriction.hpp b/fem/restriction.hpp -index 617305541..7eb6eaede 100644 ---- a/fem/restriction.hpp -+++ b/fem/restriction.hpp -@@ -22,19 +22,44 @@ class FiniteElementSpace; - enum class ElementDofOrdering; - - /// Abstract base class that defines an interface for element restrictions. --class ElementRestrictionOperator : public Operator -+class ElementRestriction : public Operator - { - public: -- /// @brief Add the E-vector degrees of freedom @a x to the L-vector degrees -- /// of freedom @a y. -+ /** @brief Extract the degrees of freedom from @a x into @a y. */ -+ void Mult(const Vector &x, Vector &y) const override = 0; -+ -+ /** @brief Set the degrees of freedom in the element degrees of freedom -+ @a y to the values given in @a x. */ -+ void MultTranspose(const Vector &x, Vector &y) const override -+ { -+ y = 0.0; -+ AddMultTranspose(x, y); -+ } -+ -+ /** @brief Add the degrees of freedom @a x to the element degrees of -+ freedom @a y. */ - void AddMultTranspose(const Vector &x, Vector &y, - const double a = 1.0) const override = 0; -+ -+ /** @brief Add the degrees of freedom @a x to the element degrees of -+ freedom @a y ignoring the signs from DOF orientation. */ -+ virtual void MultUnsigned(const Vector &x, Vector &y) const -+ { -+ Mult(x, y); -+ } -+ -+ /** @brief Add the degrees of freedom @a x to the element degrees of -+ freedom @a y ignoring the signs from DOF orientation. */ -+ virtual void MultTransposeUnsigned(const Vector &x, Vector &y) const -+ { -+ MultTranspose(x, y); -+ } - }; - - /// Operator that converts FiniteElementSpace L-vectors to E-vectors. - /** Objects of this type are typically created and owned by FiniteElementSpace - objects, see FiniteElementSpace::GetElementRestriction(). */ --class ElementRestriction : public ElementRestrictionOperator -+class ConformingElementRestriction : public ElementRestriction - { - private: - /** This number defines the maximum number of elements any dof can belong to -@@ -48,7 +73,6 @@ protected: - const bool byvdim; - const int ndofs; - const int dof; -- const int nedofs; - Array offsets; - Array indices; - Array gather_map; -@@ -65,20 +89,18 @@ protected: - ///@} - - public: -- ElementRestriction(const FiniteElementSpace&, ElementDofOrdering); -+ ConformingElementRestriction(const FiniteElementSpace&, ElementDofOrdering); -+ - void Mult(const Vector &x, Vector &y) const override; -+ - void MultTranspose(const Vector &x, Vector &y) const override; -+ - void AddMultTranspose(const Vector &x, Vector &y, - const double a = 1.0) const override; - -- /// Compute Mult without applying signs based on DOF orientations. -- void MultUnsigned(const Vector &x, Vector &y) const; -- /// Compute MultTranspose without applying signs based on DOF orientations. -- void MultTransposeUnsigned(const Vector &x, Vector &y) const; -+ void MultUnsigned(const Vector &x, Vector &y) const override; - -- /// Compute MultTranspose by setting (rather than adding) element -- /// contributions; this is a left inverse of the Mult() operation -- void MultLeftInverse(const Vector &x, Vector &y) const; -+ void MultTransposeUnsigned(const Vector &x, Vector &y) const override; - - /// @brief Fills the E-vector y with `boolean` values 0.0 and 1.0 such that each - /// each entry of the L-vector is uniquely represented in `y`. -@@ -92,16 +114,13 @@ public: - void FillSparseMatrix(const Vector &mat_ea, SparseMatrix &mat) const; - - /** Fill the I array of SparseMatrix corresponding to the sparsity pattern -- given by this ElementRestriction. */ -+ given by this ConformingElementRestriction. */ - int FillI(SparseMatrix &mat) const; -+ - /** Fill the J and Data arrays of SparseMatrix corresponding to the sparsity -- pattern given by this ElementRestriction, and the values of ea_data. */ -+ pattern given by this ConformingElementRestriction, and the values of -+ ea_data. */ - void FillJAndData(const Vector &ea_data, SparseMatrix &mat) const; -- /// @private Not part of the public interface (device kernel limitation). -- /// -- /// Performs either MultTranspose or AddMultTranspose depending on the -- /// boolean template parameter @a ADD. -- template void TAddMultTranspose(const Vector &x, Vector &y) const; - }; - - /// Operator that converts L2 FiniteElementSpace L-vectors to E-vectors. -@@ -109,37 +128,39 @@ public: - objects, see FiniteElementSpace::GetElementRestriction(). L-vectors - corresponding to grid functions in L2 finite element spaces differ from - E-vectors only in the ordering of the degrees of freedom. */ --class L2ElementRestriction : public ElementRestrictionOperator -+class L2ElementRestriction : public ElementRestriction - { -+private: - const int ne; - const int vdim; - const bool byvdim; - const int ndof; - const int ndofs; -+ - public: - L2ElementRestriction(const FiniteElementSpace&); -+ - void Mult(const Vector &x, Vector &y) const override; -+ - void MultTranspose(const Vector &x, Vector &y) const override; -+ - void AddMultTranspose(const Vector &x, Vector &y, - const double a = 1.0) const override; -+ - /** Fill the I array of SparseMatrix corresponding to the sparsity pattern - given by this ElementRestriction. */ - void FillI(SparseMatrix &mat) const; -+ - /** Fill the J and Data arrays of SparseMatrix corresponding to the sparsity - pattern given by this L2FaceRestriction, and the values of ea_data. */ - void FillJAndData(const Vector &ea_data, SparseMatrix &mat) const; -- /// @private Not part of the public interface (device kernel limitation). -- /// -- /// Performs either MultTranspose or AddMultTranspose depending on the -- /// boolean template parameter @a ADD. -- template void TAddMultTranspose(const Vector &x, Vector &y) const; - }; - - /** An enum type to specify if only e1 value is requested (SingleValued) or both - e1 and e2 (DoubleValued). */ - enum class L2FaceValues : bool {SingleValued, DoubleValued}; - --/** @brief Base class for operators that extracts Face degrees of freedom. -+/** @brief Abstract base class for operators that extracts Face degrees of freedom. - - In order to compute quantities on the faces of a mesh, it is often useful to - extract the degrees of freedom on the faces of the elements. This class -@@ -177,6 +198,19 @@ public: - */ - void Mult(const Vector &x, Vector &y) const override = 0; - -+ /** @brief Set the face degrees of freedom in the element degrees of freedom -+ @a y to the values given in @a x. -+ -+ @param[in] x The face degrees of freedom on the face. -+ @param[in,out] y The L-vector of degrees of freedom to which we add the -+ face degrees of freedom. -+ */ -+ void MultTranspose(const Vector &x, Vector &y) const override -+ { -+ y = 0.0; -+ AddMultTranspose(x, y); -+ } -+ - /** @brief Add the face degrees of freedom @a x to the element degrees of - freedom @a y. - -@@ -185,14 +219,29 @@ public: - face degrees of freedom. - @param[in] a Scalar coefficient for addition. - */ -- virtual void AddMultTranspose(const Vector &x, Vector &y, -- const double a = 1.0) const override = 0; -+ void AddMultTranspose(const Vector &x, Vector &y, -+ const double a = 1.0) const override = 0; -+ -+ /** @brief Extract the face degrees of freedom from @a x into @a y ignoring -+ the signs from DOF orientation. */ -+ virtual void MultUnsigned(const Vector &x, Vector &y) const -+ { -+ Mult(x, y); -+ } -+ -+ /** @brief Add the face degrees of freedom @a x to the element degrees of -+ freedom @a y ignoring the signs from DOF orientation. */ -+ virtual void AddMultTransposeUnsigned(const Vector &x, Vector &y, -+ const double a = 1.0) const -+ { -+ AddMultTranspose(x, y, a); -+ } - - /** @brief Add the face degrees of freedom @a x to the element degrees of - freedom @a y. Perform the same computation as AddMultTranspose, but - @a x is invalid after calling this method. - -- @param[in,out] x The face degrees of freedom on the face. -+ @param[in,out] x The face degrees of freedom on the face. - @param[in,out] y The L-vector of degrees of freedom to which we add the - face degrees of freedom. - -@@ -203,19 +252,6 @@ public: - { - AddMultTranspose(x, y); - } -- -- /** @brief Set the face degrees of freedom in the element degrees of freedom -- @a y to the values given in @a x. -- -- @param[in] x The face degrees of freedom on the face. -- @param[in,out] y The L-vector of degrees of freedom to which we add the -- face degrees of freedom. -- */ -- void MultTranspose(const Vector &x, Vector &y) const override -- { -- y = 0.0; -- AddMultTranspose(x, y); -- } - }; - - /// @brief Operator that extracts face degrees of freedom for H1, ND, or RT -@@ -232,7 +268,6 @@ protected: - const bool byvdim; - const int face_dofs; // Number of dofs on each face - const int elem_dofs; // Number of dofs in each element -- const int nfdofs; // Total number of face E-vector dofs - const int ndofs; // Total number of dofs - Array scatter_indices; // Scattering indices for element 1 on each face - Array gather_offsets; // offsets for the gathering indices of each dof -@@ -252,6 +287,7 @@ protected: - const ElementDofOrdering f_ordering, - const FaceType type, - bool build); -+ - public: - /** @brief Construct a ConformingFaceRestriction. - -@@ -274,22 +310,33 @@ public: - ElementDofOrdering. */ - void Mult(const Vector &x, Vector &y) const override; - -- using FaceRestriction::AddMultTransposeInPlace; -+ /** @brief Extract the face degrees of freedom from @a x into @a y ignoring -+ the signs from DOF orientation. -+ -+ @sa Mult(). */ -+ void MultUnsigned(const Vector &x, Vector &y) const override; - - /** @brief Gather the degrees of freedom, i.e. goes from face E-Vector to - L-Vector. - -- @param[in] x The face E-Vector degrees of freedom with the given format: -- face_dofs x vdim x nf -- where nf is the number of interior or boundary faces -- requested by @a type in the constructor. -- The face_dofs should be ordered according to the given -- ElementDofOrdering -+ @param[in] x The face E-Vector degrees of freedom with the given format: -+ face_dofs x vdim x nf -+ where nf is the number of interior or boundary faces -+ requested by @a type in the constructor. -+ The face_dofs should be ordered according to the given -+ ElementDofOrdering - @param[in,out] y The L-vector degrees of freedom. -- @param[in] a Scalar coefficient for addition. */ -+ @param[in] a Scalar coefficient for addition. */ - void AddMultTranspose(const Vector &x, Vector &y, - const double a = 1.0) const override; - -+ /** @brief Gather the degrees of freedom, i.e. goes from face E-Vector to -+ L-Vector @b not taking into account signs from DOF orientations. -+ -+ @sa AddMultTranspose(). */ -+ void AddMultTransposeUnsigned(const Vector &x, Vector &y, -+ const double a = 1.0) const override; -+ - private: - /** @brief Compute the scatter indices: L-vector to E-vector, and the offsets - for the gathering: E-vector to L-vector. -@@ -360,7 +407,6 @@ protected: - const bool byvdim; - const int face_dofs; // Number of dofs on each face - const int elem_dofs; // Number of dofs in each element -- const int nfdofs; // Total number of dofs on the faces - const int ndofs; // Total number of dofs - const FaceType type; - const L2FaceValues m; -@@ -412,8 +458,6 @@ public: - ElementDofOrdering. */ - void Mult(const Vector &x, Vector &y) const override; - -- using FaceRestriction::AddMultTranspose; -- - /** @brief Gather the degrees of freedom, i.e. goes from face E-Vector to - L-Vector. - -@@ -810,6 +854,7 @@ protected: - const FaceType type, - const L2FaceValues m, - bool build); -+ - public: - /** @brief Constructs an NCL2FaceRestriction, this is a specialization of a - L2FaceRestriction for nonconforming meshes. -diff --git a/fem/transfer.cpp b/fem/transfer.cpp -index 68dab72d5..7f95ca9fe 100644 ---- a/fem/transfer.cpp -+++ b/fem/transfer.cpp -@@ -92,9 +92,9 @@ const Operator &GridTransfer::MakeTrueOperator( - else // Parallel() == true - { - #ifdef MFEM_USE_MPI -+ const SparseMatrix *out_R = fes_out.GetRestrictionMatrix(); - if (oper_type == Operator::Hypre_ParCSR) - { -- const SparseMatrix *out_R = fes_out.GetRestrictionMatrix(); - const ParFiniteElementSpace *pfes_in = - dynamic_cast(&fes_in); - const ParFiniteElementSpace *pfes_out = -@@ -122,7 +122,6 @@ const Operator &GridTransfer::MakeTrueOperator( - } - else if (oper_type == Operator::ANY_TYPE) - { -- const Operator *out_R = fes_out.GetRestrictionOperator(); - t_oper.Reset(new TripleProductOperator( - out_R, &oper, fes_in.GetProlongationMatrix(), - false, false, false)); -@@ -1159,12 +1158,12 @@ TensorProductPRefinementTransferOperator( - localL.UseDevice(true); - localH.UseDevice(true); - -- MFEM_VERIFY(dynamic_cast(elem_restrict_lex_h), -+ const auto *elem_restrict = -+ dynamic_cast(elem_restrict_lex_h); -+ MFEM_VERIFY(elem_restrict, - "High order element restriction is of unsupported type"); -- - mask.SetSize(localH.Size(), Device::GetMemoryType()); -- static_cast(elem_restrict_lex_h) -- ->BooleanMask(mask); -+ elem_restrict->BooleanMask(mask); - mask.UseDevice(true); - } - -diff --git a/general/communication.hpp b/general/communication.hpp -index 474486f1b..c7d00f1e4 100644 ---- a/general/communication.hpp -+++ b/general/communication.hpp -@@ -217,7 +217,6 @@ protected: - int group_buf_size; - mutable Array group_buf; - MPI_Request *requests; -- // MPI_Status *statuses; - // comm_lock: 0 - no lock, 1 - locked for Bcast, 2 - locked for Reduce - mutable int comm_lock; - mutable int num_requests; -diff --git a/general/version.cpp b/general/version.cpp -index d2b05f8ed..f4c402d51 100644 ---- a/general/version.cpp -+++ b/general/version.cpp -@@ -103,9 +103,6 @@ const char *GetConfigStr() - #ifdef MFEM_USE_LAPACK - "MFEM_USE_LAPACK\n" - #endif --#ifdef MFEM_USE_LEGACY_OPENMP -- "MFEM_USE_LEGACY_OPENMP\n" --#endif - #ifdef MFEM_USE_LIBUNWIND - "MFEM_USE_LIBUNWIND\n" - #endif -diff --git a/linalg/auxiliary.cpp b/linalg/auxiliary.cpp -index 003e52695..bcb55ca3e 100644 ---- a/linalg/auxiliary.cpp -+++ b/linalg/auxiliary.cpp -@@ -171,7 +171,6 @@ MatrixFreeAuxiliarySpace::MatrixFreeAuxiliarySpace( - { - a_lor.AddDomainIntegrator(new VectorMassIntegrator); - } -- a_lor.UsePrecomputedSparsity(); - a_lor.Assemble(); - a_lor.EliminateEssentialBC(ess_bdr, policy); - a_lor.Finalize(); -@@ -244,7 +243,6 @@ MatrixFreeAuxiliarySpace::MatrixFreeAuxiliarySpace( - { - a_lor.AddDomainIntegrator(new DiffusionIntegrator); - } -- a_lor.UsePrecomputedSparsity(); - a_lor.Assemble(); - if (ess_bdr.Size()) - { -@@ -460,14 +458,14 @@ MatrixFreeAMS::MatrixFreeAMS( - pa_grad->SetAssemblyLevel(AssemblyLevel::PARTIAL); - pa_grad->AddDomainInterpolator(new GradientInterpolator); - pa_grad->Assemble(); -- pa_grad->FormRectangularSystemMatrix(Gradient); -+ pa_grad->FormDiscreteOperatorMatrix(Gradient); - - // build Pi operator - pa_interp = new ParDiscreteLinearOperator(h1_fespace_d, &nd_fespace); - pa_interp->SetAssemblyLevel(AssemblyLevel::PARTIAL); - pa_interp->AddDomainInterpolator(new IdentityInterpolator); - pa_interp->Assemble(); -- pa_interp->FormRectangularSystemMatrix(Pi); -+ pa_interp->FormDiscreteOperatorMatrix(Pi); - - // build LOR space - ParMesh mesh_lor = ParMesh::MakeRefined(*mesh, order, BasisType::GaussLobatto); -diff --git a/linalg/handle.hpp b/linalg/handle.hpp -index 818294985..96f658d6a 100644 ---- a/linalg/handle.hpp -+++ b/linalg/handle.hpp -@@ -207,7 +207,6 @@ public: - const Vector &X, Vector &B) const; - }; - -- - /// Add an alternative name for OperatorHandle -- OperatorPtr. - typedef OperatorHandle OperatorPtr; - -diff --git a/linalg/hypre.hpp b/linalg/hypre.hpp -index 913bdb2d0..3d5b4b2fe 100644 ---- a/linalg/hypre.hpp -+++ b/linalg/hypre.hpp -@@ -720,7 +720,7 @@ public: - - /** @brief The "Boolean" analog of y = alpha * A * x + beta * y, where - elements in the sparsity pattern of the matrix are treated as "true". */ -- void BooleanMult(int alpha, const int *x, int beta, int *y) -+ void BooleanMult(int alpha, const int *x, int beta, int *y) const - { - HostRead(); - internal::hypre_ParCSRMatrixBooleanMatvec(A, alpha, const_cast(x), -@@ -730,7 +730,7 @@ public: - - /** @brief The "Boolean" analog of y = alpha * A^T * x + beta * y, where - elements in the sparsity pattern of the matrix are treated as "true". */ -- void BooleanMultTranspose(int alpha, const int *x, int beta, int *y) -+ void BooleanMultTranspose(int alpha, const int *x, int beta, int *y) const - { - HostRead(); - internal::hypre_ParCSRMatrixBooleanMatvecT(A, alpha, const_cast(x), -diff --git a/linalg/operator.cpp b/linalg/operator.cpp -index 1f214ece7..64f75c8a4 100644 ---- a/linalg/operator.cpp -+++ b/linalg/operator.cpp -@@ -165,7 +165,7 @@ void Operator::RecoverFEMSolution(const Vector &X, const Vector &b, Vector &x) - } - } - --Operator * Operator::SetupRAP(const Operator *Pi, const Operator *Po) -+Operator *Operator::SetupRAP(const Operator *Pi, const Operator *Po) - { - Operator *rap; - if (!IsIdentityProlongation(Pi)) -@@ -176,15 +176,15 @@ Operator * Operator::SetupRAP(const Operator *Pi, const Operator *Po) - } - else - { -- rap = new ProductOperator(this, Pi, false,false); -+ rap = new ProductOperator(this, Pi, false, false); - } - } - else - { - if (!IsIdentityProlongation(Po)) - { -- TransposeOperator * PoT = new TransposeOperator(Po); -- rap = new ProductOperator(PoT, this, true,false); -+ TransposeOperator *PoT = new TransposeOperator(Po); -+ rap = new ProductOperator(PoT, this, true, false); - } - else - { -@@ -245,10 +245,10 @@ void Operator::FormDiscreteOperator(Operator* &Aout) - { - const Operator *Pin = this->GetProlongation(); - const Operator *Rout = this->GetOutputRestriction(); -- Aout = new TripleProductOperator(Rout, this, Pin,false, false, false); -+ Aout = new TripleProductOperator(Rout, this, Pin, false, false, false); - } - --void Operator::PrintMatlab(std::ostream & os, int n, int m) const -+void Operator::PrintMatlab(std::ostream &os, int n, int m) const - { - using namespace std; - if (n == 0) { n = width; } -diff --git a/linalg/operator.hpp b/linalg/operator.hpp -index baa9bf767..cdf700e17 100644 ---- a/linalg/operator.hpp -+++ b/linalg/operator.hpp -@@ -29,8 +29,8 @@ protected: - - /// see FormSystemOperator() - /** @note Uses DiagonalPolicy::DIAG_ONE. */ -- void FormConstrainedSystemOperator( -- const Array &ess_tdof_list, ConstrainedOperator* &Aout); -+ void FormConstrainedSystemOperator(const Array &ess_tdof_list, -+ ConstrainedOperator* &Aout); - - /// see FormRectangularSystemOperator() - void FormRectangularConstrainedSystemOperator( -@@ -38,10 +38,6 @@ protected: - const Array &test_tdof_list, - RectangularConstrainedOperator* &Aout); - -- /** @brief Returns RAP Operator of this, using input/output Prolongation matrices -- @a Pi corresponds to "P", @a Po corresponds to "Rt" */ -- Operator *SetupRAP(const Operator *Pi, const Operator *Po); -- - public: - /// Defines operator diagonal policy upon elimination of rows and/or columns. - enum DiagonalPolicy -@@ -149,12 +145,6 @@ public: - return GetProlongation(); // Assume square unless specialized - } - -- /** @brief Transpose of GetOutputRestriction, directly available in this -- form to facilitate matrix-free RAP-type operators. -- -- `NULL` means identity. */ -- virtual const Operator *GetOutputRestrictionTranspose() const { return NULL; } -- - /** @brief Restriction operator from output vectors for the operator to linear - algebra (linear system) vectors. `NULL` means identity. */ - virtual const Operator *GetOutputRestriction() const -@@ -239,6 +229,10 @@ public: - forms, though currently @a b is not used in the implementation. */ - virtual void RecoverFEMSolution(const Vector &X, const Vector &b, Vector &x); - -+ /** @brief Returns RAP Operator of this, using input/output Prolongation matrices -+ @a Pi corresponds to "P", @a Po corresponds to "Rt" */ -+ Operator *SetupRAP(const Operator *Pi, const Operator *Po); -+ - /** @brief Return in @a A a parallel (on truedofs) version of this square - operator. - -@@ -270,10 +264,10 @@ public: - void FormDiscreteOperator(Operator* &A); - - /// Prints operator with input size n and output size m in Matlab format. -- void PrintMatlab(std::ostream & out, int n, int m = 0) const; -+ void PrintMatlab(std::ostream &out, int n, int m = 0) const; - - /// Prints operator in Matlab format. -- virtual void PrintMatlab(std::ostream & out) const; -+ virtual void PrintMatlab(std::ostream &out) const; - - /// Virtual destructor. - virtual ~Operator() { } -@@ -722,6 +716,7 @@ inline bool IsIdentityProlongation(const Operator *P) - return !P || dynamic_cast(P); - } - -+ - /// Scaled Operator B: x -> a A(x). - class ScaledOperator : public Operator - { -@@ -928,6 +923,7 @@ public: - virtual ~ConstrainedOperator() { if (own_A) { delete A; } } - }; - -+ - /** @brief Rectangular Operator for imposing essential boundary conditions on - the input space using only the action, Mult(), of a given unconstrained - Operator. -@@ -981,6 +977,7 @@ public: - virtual ~RectangularConstrainedOperator() { if (own_A) { delete A; } } - }; - -+ - /** @brief PowerMethod helper class to estimate the largest eigenvalue of an - operator using the iterative power method. */ - class PowerMethod -diff --git a/linalg/solvers.hpp b/linalg/solvers.hpp -index 085cea616..434864042 100644 ---- a/linalg/solvers.hpp -+++ b/linalg/solvers.hpp -@@ -276,8 +276,11 @@ public: - - ///@} - -- /// This should be called before SetOperator -+ /// This should be called before SetOperator if you want SetOperator to -+ /// set both the solver and preconditioner operators together - virtual void SetPreconditioner(Solver &pr); -+ void SetPreconditioner(Solver *pr) -+ { if (pr) { SetPreconditioner(*pr); } else { prec = nullptr; } } - - /// Also calls SetOperator for the preconditioner if there is one - virtual void SetOperator(const Operator &op) override; -diff --git a/linalg/sparsemat.cpp b/linalg/sparsemat.cpp -index 145379c00..e55ea47b4 100644 ---- a/linalg/sparsemat.cpp -+++ b/linalg/sparsemat.cpp -@@ -764,7 +764,6 @@ void SparseMatrix::AddMult(const Vector &x, Vector &y, const double a) const - return; - } - --#ifndef MFEM_USE_LEGACY_OPENMP - const int height = this->height; - const int nnz = J.Capacity(); - auto d_I = Read(I, height+1); -@@ -885,24 +884,6 @@ void SparseMatrix::AddMult(const Vector &x, Vector &y, const double a) const - }); - - } -- --#else // MFEM_USE_LEGACY_OPENMP -- const double *Ap = A, *xp = x.GetData(); -- double *yp = y.GetData(); -- const int *Jp = J, *Ip = I; -- -- #pragma omp parallel for -- for (int i = 0; i < height; i++) -- { -- double d = 0.0; -- const int end = Ip[i+1]; -- for (int j = Ip[i]; j < end; j++) -- { -- d += Ap[j] * xp[Jp[j]]; -- } -- yp[i] += a * d; -- } --#endif // MFEM_USE_LEGACY_OPENMP - } - - void SparseMatrix::MultTranspose(const Vector &x, Vector &y) const -diff --git a/linalg/vector.cpp b/linalg/vector.cpp -index 4951ed914..db1a93666 100644 ---- a/linalg/vector.cpp -+++ b/linalg/vector.cpp -@@ -113,9 +113,6 @@ const double &Vector::Elem(int i) const - double Vector::operator*(const double *v) const - { - double dot = 0.0; --#ifdef MFEM_USE_LEGACY_OPENMP -- #pragma omp parallel for reduction(+:dot) --#endif - for (int i = 0; i < size; i++) - { - dot += data[i] * v[i]; -@@ -313,12 +310,19 @@ void Vector::Neg() - mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] = -y[i]; }); - } - -+void Vector::Reciprocal() -+{ -+ const bool use_dev = UseDevice(); -+ const int N = size; -+ auto y = ReadWrite(use_dev); -+ mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] = 1.0/y[i]; }); -+} -+ - void add(const Vector &v1, const Vector &v2, Vector &v) - { - MFEM_ASSERT(v.size == v1.size && v.size == v2.size, - "incompatible Vectors!"); - --#if !defined(MFEM_USE_LEGACY_OPENMP) - const bool use_dev = v1.UseDevice() || v2.UseDevice() || v.UseDevice(); - const int N = v.size; - // Note: get read access first, in case v is the same as v1/v2. -@@ -326,13 +330,6 @@ void add(const Vector &v1, const Vector &v2, Vector &v) - auto x2 = v2.Read(use_dev); - auto y = v.Write(use_dev); - mfem::forall_switch(use_dev, N, [=] MFEM_HOST_DEVICE (int i) { y[i] = x1[i] + x2[i]; }); --#else -- #pragma omp parallel for -- for (int i = 0; i < v.size; i++) -- { -- v.data[i] = v1.data[i] + v2.data[i]; -- } --#endif - } - - void add(const Vector &v1, double alpha, const Vector &v2, Vector &v) -@@ -350,7 +347,6 @@ void add(const Vector &v1, double alpha, const Vector &v2, Vector &v) - } - else - { --#if !defined(MFEM_USE_LEGACY_OPENMP) - const bool use_dev = v1.UseDevice() || v2.UseDevice() || v.UseDevice(); - const int N = v.size; - // Note: get read access first, in case v is the same as v1/v2. -@@ -361,16 +357,6 @@ void add(const Vector &v1, double alpha, const Vector &v2, Vector &v) - { - d_z[i] = d_x[i] + alpha * d_y[i]; - }); --#else -- const double *v1p = v1.data, *v2p = v2.data; -- double *vp = v.data; -- const int s = v.size; -- #pragma omp parallel for -- for (int i = 0; i < s; i++) -- { -- vp[i] = v1p[i] + alpha*v2p[i]; -- } --#endif - } - } - -@@ -389,7 +375,6 @@ void add(const double a, const Vector &x, const Vector &y, Vector &z) - } - else - { --#if !defined(MFEM_USE_LEGACY_OPENMP) - const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice(); - const int N = x.size; - // Note: get read access first, in case z is the same as x/y. -@@ -400,17 +385,6 @@ void add(const double a, const Vector &x, const Vector &y, Vector &z) - { - zd[i] = a * (xd[i] + yd[i]); - }); --#else -- const double *xp = x.data; -- const double *yp = y.data; -- double *zp = z.data; -- const int s = x.size; -- #pragma omp parallel for -- for (int i = 0; i < s; i++) -- { -- zp[i] = a * (xp[i] + yp[i]); -- } --#endif - } - } - -@@ -444,7 +418,6 @@ void add(const double a, const Vector &x, - #endif - else - { --#if !defined(MFEM_USE_LEGACY_OPENMP) - const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice(); - const int N = x.size; - // Note: get read access first, in case z is the same as x/y. -@@ -455,17 +428,6 @@ void add(const double a, const Vector &x, - { - zd[i] = a * xd[i] + b * yd[i]; - }); --#else -- const double *xp = x.data; -- const double *yp = y.data; -- double *zp = z.data; -- const int s = x.size; -- #pragma omp parallel for -- for (int i = 0; i < s; i++) -- { -- zp[i] = a * xp[i] + b * yp[i]; -- } --#endif - } - } - -@@ -474,7 +436,6 @@ void subtract(const Vector &x, const Vector &y, Vector &z) - MFEM_ASSERT(x.size == y.size && x.size == z.size, - "incompatible Vectors!"); - --#if !defined(MFEM_USE_LEGACY_OPENMP) - const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice(); - const int N = x.size; - // Note: get read access first, in case z is the same as x/y. -@@ -485,17 +446,6 @@ void subtract(const Vector &x, const Vector &y, Vector &z) - { - zd[i] = xd[i] - yd[i]; - }); --#else -- const double *xp = x.data; -- const double *yp = y.data; -- double *zp = z.data; -- const int s = x.size; -- #pragma omp parallel for -- for (int i = 0; i < s; i++) -- { -- zp[i] = xp[i] - yp[i]; -- } --#endif - } - - void subtract(const double a, const Vector &x, const Vector &y, Vector &z) -@@ -513,7 +463,6 @@ void subtract(const double a, const Vector &x, const Vector &y, Vector &z) - } - else - { --#if !defined(MFEM_USE_LEGACY_OPENMP) - const bool use_dev = x.UseDevice() || y.UseDevice() || z.UseDevice(); - const int N = x.size; - // Note: get read access first, in case z is the same as x/y. -@@ -524,17 +473,6 @@ void subtract(const double a, const Vector &x, const Vector &y, Vector &z) - { - zd[i] = a * (xd[i] - yd[i]); - }); --#else -- const double *xp = x.data; -- const double *yp = y.data; -- double *zp = z.data; -- const int s = x.size; -- #pragma omp parallel for -- for (int i = 0; i < s; i++) -- { -- zp[i] = a * (xp[i] - yp[i]); -- } --#endif - } - } - -diff --git a/linalg/vector.hpp b/linalg/vector.hpp -index 4d2dda36d..04605d2f9 100644 ---- a/linalg/vector.hpp -+++ b/linalg/vector.hpp -@@ -323,6 +323,9 @@ public: - /// (*this) = -(*this) - void Neg(); - -+ /// (*this)(i) = 1.0 / (*this)(i) -+ void Reciprocal(); -+ - /// Swap the contents of two Vectors - inline void Swap(Vector &other); - -diff --git a/makefile b/makefile -index 00d139b28..a606f6dfe 100644 ---- a/makefile -+++ b/makefile -@@ -265,16 +265,6 @@ endif - - DEP_CXX ?= $(MFEM_CXX) - --# Check legacy OpenMP configuration --ifeq ($(MFEM_USE_LEGACY_OPENMP),YES) -- MFEM_THREAD_SAFE ?= YES -- ifneq ($(MFEM_THREAD_SAFE),YES) -- $(error Incompatible config: MFEM_USE_LEGACY_OPENMP requires MFEM_THREAD_SAFE) -- endif -- # NOTE: MFEM_USE_LEGACY_OPENMP cannot be combined with any of: -- # MFEM_USE_OPENMP, MFEM_USE_CUDA, MFEM_USE_RAJA, MFEM_USE_OCCA --endif -- - # List of MFEM dependencies, that require the *_LIB variable to be non-empty - MFEM_REQ_LIB_DEPS = ENZYME SUPERLU MUMPS METIS FMS CONDUIT SIDRE LAPACK SUNDIALS\ - SUITESPARSE STRUMPACK GINKGO GNUTLS NETCDF PETSC SLEPC MPFR PUMI HIOP\ -@@ -339,16 +329,16 @@ endif - MFEM_DEFINES = MFEM_VERSION MFEM_VERSION_STRING MFEM_GIT_STRING MFEM_USE_MPI\ - MFEM_USE_METIS MFEM_USE_METIS_5 MFEM_DEBUG MFEM_USE_EXCEPTIONS MFEM_USE_ZLIB\ - MFEM_USE_LIBUNWIND MFEM_USE_LAPACK MFEM_THREAD_SAFE MFEM_USE_OPENMP\ -- MFEM_USE_LEGACY_OPENMP MFEM_USE_MEMALLOC MFEM_TIMER_TYPE MFEM_USE_SUNDIALS\ -- MFEM_USE_SUITESPARSE MFEM_USE_GINKGO MFEM_USE_SUPERLU MFEM_USE_SUPERLU5\ -- MFEM_USE_STRUMPACK MFEM_USE_GNUTLS MFEM_USE_NETCDF MFEM_USE_PETSC\ -- MFEM_USE_SLEPC MFEM_USE_MPFR MFEM_USE_SIDRE MFEM_USE_FMS MFEM_USE_CONDUIT\ -- MFEM_USE_PUMI MFEM_USE_HIOP MFEM_USE_GSLIB MFEM_USE_CUDA MFEM_USE_HIP\ -- MFEM_USE_OCCA MFEM_USE_MOONOLITH MFEM_USE_CEED MFEM_USE_RAJA MFEM_USE_UMPIRE\ -- MFEM_USE_SIMD MFEM_USE_ADIOS2 MFEM_USE_MKL_CPARDISO MFEM_USE_AMGX\ -- MFEM_USE_MUMPS MFEM_USE_ADFORWARD MFEM_USE_CODIPACK MFEM_USE_CALIPER\ -- MFEM_USE_BENCHMARK MFEM_USE_PARELAG MFEM_USE_ALGOIM MFEM_USE_ENZYME\ -- MFEM_SOURCE_DIR MFEM_INSTALL_DIR MFEM_SHARED_BUILD -+ MFEM_USE_MEMALLOC MFEM_TIMER_TYPE MFEM_USE_SUNDIALS MFEM_USE_SUITESPARSE\ -+ MFEM_USE_GINKGO MFEM_USE_SUPERLU MFEM_USE_SUPERLU5 MFEM_USE_STRUMPACK\ -+ MFEM_USE_GNUTLS MFEM_USE_NETCDF MFEM_USE_PETSC MFEM_USE_SLEPC MFEM_USE_MPFR\ -+ MFEM_USE_SIDRE MFEM_USE_FMS MFEM_USE_CONDUIT MFEM_USE_PUMI MFEM_USE_HIOP\ -+ MFEM_USE_GSLIB MFEM_USE_CUDA MFEM_USE_HIP MFEM_USE_OCCA MFEM_USE_MOONOLITH\ -+ MFEM_USE_CEED MFEM_USE_RAJA MFEM_USE_UMPIRE MFEM_USE_SIMD MFEM_USE_ADIOS2\ -+ MFEM_USE_MKL_CPARDISO MFEM_USE_AMGX MFEM_USE_MUMPS MFEM_USE_ADFORWARD\ -+ MFEM_USE_CODIPACK MFEM_USE_CALIPER MFEM_USE_BENCHMARK MFEM_USE_PARELAG\ -+ MFEM_USE_ALGOIM MFEM_USE_ENZYME MFEM_SOURCE_DIR MFEM_INSTALL_DIR\ -+ MFEM_SHARED_BUILD - - # List of makefile variables that will be written to config.mk: - MFEM_CONFIG_VARS = MFEM_CXX MFEM_HOST_CXX MFEM_CPPFLAGS MFEM_CXXFLAGS\ -@@ -419,7 +409,7 @@ endif - DIRS = general linalg linalg/simd mesh mesh/submesh fem fem/ceed/interface \ - fem/ceed/integrators/mass fem/ceed/integrators/convection \ - fem/ceed/integrators/diffusion fem/ceed/integrators/nlconvection \ -- fem/ceed/solvers fem/fe fem/lor fem/qinterp fem/tmop -+ fem/ceed/solvers fem/fe fem/lor fem/qinterp fem/integ fem/tmop - - ifeq ($(MFEM_USE_MOONOLITH),YES) - MFEM_CXXFLAGS += $(MOONOLITH_CXX_FLAGS) -@@ -672,7 +662,6 @@ status info: - $(info MFEM_USE_LAPACK = $(MFEM_USE_LAPACK)) - $(info MFEM_THREAD_SAFE = $(MFEM_THREAD_SAFE)) - $(info MFEM_USE_OPENMP = $(MFEM_USE_OPENMP)) -- $(info MFEM_USE_LEGACY_OPENMP = $(MFEM_USE_LEGACY_OPENMP)) - $(info MFEM_USE_MEMALLOC = $(MFEM_USE_MEMALLOC)) - $(info MFEM_TIMER_TYPE = $(MFEM_TIMER_TYPE)) - $(info MFEM_USE_SUNDIALS = $(MFEM_USE_SUNDIALS)) -@@ -756,10 +745,6 @@ deprecation-warnings: - @if [ -t 1 ]; then\ - red="\033[0;31m"; yellow="\033[0;33m"; end="\033[0m";\ - fi;\ -- if [ $(MFEM_USE_LEGACY_OPENMP) = YES ]; then\ -- printf $$red"[MFEM_USE_LEGACY_OPENMP]"$$end": "$$yellow"%s"$$end"\n"\ -- $(DEPRECATION_WARNING);\ -- fi - - # $(call mfem_check_command, command-to-execute, success_msg, failed_msg) - mfem_check_command = \ -diff --git a/miniapps/performance/ex1.cpp b/miniapps/performance/ex1.cpp -index e2271585c..916e7022e 100644 ---- a/miniapps/performance/ex1.cpp -+++ b/miniapps/performance/ex1.cpp -@@ -317,8 +317,6 @@ int ex1_t::run(Mesh *mesh, int ref_levels, int order, int basis, - cout << "Assembling the bilinear form ..." << flush; - tic_toc.Clear(); - tic_toc.Start(); -- // Pre-allocate sparsity assuming dense element matrices -- a->UsePrecomputedSparsity(); - - HPCBilinearForm *a_hpc = NULL; - Operator *a_oper = NULL; -@@ -373,7 +371,6 @@ int ex1_t::run(Mesh *mesh, int ref_levels, int order, int basis, - { - // TODO: assemble the LOR matrix using the performance code - a_pc->AddDomainIntegrator(new DiffusionIntegrator(one)); -- a_pc->UsePrecomputedSparsity(); - a_pc->Assemble(); - a_pc->FormSystemMatrix(ess_tdof_list, A_pc); - } -@@ -385,7 +382,6 @@ int ex1_t::run(Mesh *mesh, int ref_levels, int order, int basis, - } - else - { -- a_pc->UsePrecomputedSparsity(); - a_hpc->AssembleBilinearForm(*a_pc); - a_pc->FormSystemMatrix(ess_tdof_list, A_pc); - } -diff --git a/miniapps/performance/ex1p.cpp b/miniapps/performance/ex1p.cpp -index 79735c7ff..1e80576bb 100644 ---- a/miniapps/performance/ex1p.cpp -+++ b/miniapps/performance/ex1p.cpp -@@ -390,8 +390,6 @@ int ex1_t::run(Mesh *mesh, int ser_ref_levels, int par_ref_levels, - } - tic_toc.Clear(); - tic_toc.Start(); -- // Pre-allocate sparsity assuming dense element matrices -- a->UsePrecomputedSparsity(); - - HPCBilinearForm *a_hpc = NULL; - Operator *a_oper = NULL; -@@ -460,7 +458,6 @@ int ex1_t::run(Mesh *mesh, int ser_ref_levels, int par_ref_levels, - { - // TODO: assemble the LOR matrix using the performance code - a_pc->AddDomainIntegrator(new DiffusionIntegrator(one)); -- a_pc->UsePrecomputedSparsity(); - a_pc->Assemble(); - a_pc->FormSystemMatrix(ess_tdof_list, A_pc); - } -@@ -472,7 +469,6 @@ int ex1_t::run(Mesh *mesh, int ser_ref_levels, int par_ref_levels, - } - else - { -- a_pc->UsePrecomputedSparsity(); - a_hpc->AssembleBilinearForm(*a_pc); - a_pc->FormSystemMatrix(ess_tdof_list, A_pc); - } -diff --git a/tests/unit/fem/test_assemblediagonalpa.cpp b/tests/unit/fem/test_assemblediagonalpa.cpp -index ebbb8e224..050561e1d 100644 ---- a/tests/unit/fem/test_assemblediagonalpa.cpp -+++ b/tests/unit/fem/test_assemblediagonalpa.cpp -@@ -17,11 +17,9 @@ using namespace mfem; - namespace assemblediagonalpa - { - --int dimension; -- - double coeffFunction(const Vector& x) - { -- if (dimension == 2) -+ if (x.Size() == 2) - { - return sin(8.0 * M_PI * x[0]) * cos(6.0 * M_PI * x[1]) + 2.0; - } -@@ -36,12 +34,12 @@ double coeffFunction(const Vector& x) - void vectorCoeffFunction(const Vector & x, Vector & f) - { - f = 0.0; -- if (dimension > 1) -+ if (x.Size() > 1) - { - f[0] = sin(M_PI * x[1]); - f[1] = sin(2.5 * M_PI * x[0]); - } -- if (dimension == 3) -+ if (x.Size() == 3) - { - f[2] = sin(6.1 * M_PI * x[2]); - } -@@ -50,14 +48,14 @@ void vectorCoeffFunction(const Vector & x, Vector & f) - void asymmetricMatrixCoeffFunction(const Vector & x, DenseMatrix & f) - { - f = 0.0; -- if (dimension == 2) -+ if (x.Size() == 2) - { - f(0,0) = 1.1 + sin(M_PI * x[1]); // 1,1 - f(1,0) = cos(1.3 * M_PI * x[1]); // 2,1 - f(0,1) = cos(2.5 * M_PI * x[0]); // 1,2 - f(1,1) = 1.1 + sin(4.9 * M_PI * x[0]); // 2,2 - } -- else if (dimension == 3) -+ else if (x.Size() == 3) - { - f(0,0) = 1.1 + sin(M_PI * x[1]); // 1,1 - f(0,1) = cos(2.5 * M_PI * x[0]); // 1,2 -@@ -74,13 +72,13 @@ void asymmetricMatrixCoeffFunction(const Vector & x, DenseMatrix & f) - void symmetricMatrixCoeffFunction(const Vector & x, DenseSymmetricMatrix & f) - { - f = 0.0; -- if (dimension == 2) -+ if (x.Size() == 2) - { - f(0,0) = 1.1 + sin(M_PI * x[1]); // 1,1 - f(0,1) = cos(2.5 * M_PI * x[0]); // 1,2 - f(1,1) = 1.1 + sin(4.9 * M_PI * x[0]); // 2,2 - } -- else if (dimension == 3) -+ else if (x.Size() == 3) - { - f(0,0) = sin(M_PI * x[1]); // 1,1 - f(0,1) = cos(2.5 * M_PI * x[0]); // 1,2 -@@ -93,7 +91,7 @@ void symmetricMatrixCoeffFunction(const Vector & x, DenseSymmetricMatrix & f) - - TEST_CASE("Mass Diagonal PA", "[PartialAssembly][AssembleDiagonal]") - { -- for (dimension = 2; dimension < 4; ++dimension) -+ for (int dimension = 2; dimension < 4; ++dimension) - { - for (int ne = 1; ne < 3; ++ne) - { -@@ -140,9 +138,45 @@ TEST_CASE("Mass Diagonal PA", "[PartialAssembly][AssembleDiagonal]") - } - } - -+TEST_CASE("Mass Boundary Diagonal PA", "[PartialAssembly][AssembleDiagonal]") -+{ -+ const bool all_tests = launch_all_non_regression_tests; -+ -+ auto fname = GENERATE("../../data/star.mesh", "../../data/star-q3.mesh", -+ "../../data/fichera.mesh", "../../data/fichera-q3.mesh"); -+ auto order = !all_tests ? 2 : GENERATE(1, 2, 3); -+ -+ CAPTURE(fname, order); -+ -+ Mesh mesh(fname); -+ int dim = mesh.Dimension(); -+ RT_FECollection fec(order, dim); -+ FiniteElementSpace fes(&mesh, &fec); -+ -+ FunctionCoefficient coeff(coeffFunction); -+ -+ Vector diag_fa(fes.GetTrueVSize()), diag_pa(fes.GetTrueVSize()); -+ -+ BilinearForm blf_fa(&fes); -+ blf_fa.AddBoundaryIntegrator(new MassIntegrator(coeff)); -+ blf_fa.Assemble(); -+ blf_fa.Finalize(); -+ blf_fa.SpMat().GetDiag(diag_fa); -+ -+ BilinearForm blf_pa(&fes); -+ blf_pa.SetAssemblyLevel(AssemblyLevel::PARTIAL); -+ blf_pa.AddBoundaryIntegrator(new MassIntegrator(coeff)); -+ blf_pa.Assemble(); -+ blf_pa.AssembleDiagonal(diag_pa); -+ -+ diag_pa -= diag_fa; -+ -+ REQUIRE(diag_pa.Normlinf() == MFEM_Approx(0.0)); -+} -+ - TEST_CASE("Diffusion Diagonal PA", "[PartialAssembly][AssembleDiagonal]") - { -- for (dimension = 2; dimension < 4; ++dimension) -+ for (int dimension = 2; dimension < 4; ++dimension) - { - for (int ne = 1; ne < 3; ++ne) - { -@@ -322,7 +356,7 @@ TEST_CASE("Vector Diffusion Diagonal PA", - TEST_CASE("Hcurl/Hdiv diagonal PA", - "[CUDA][PartialAssembly][AssembleDiagonal]") - { -- for (dimension = 2; dimension < 4; ++dimension) -+ for (int dimension = 2; dimension < 4; ++dimension) - { - for (int coeffType = 0; coeffType < 5; ++coeffType) - { -diff --git a/tests/unit/fem/test_bilinearform.cpp b/tests/unit/fem/test_bilinearform.cpp -index 5fd00b3e1..647ae38f2 100644 ---- a/tests/unit/fem/test_bilinearform.cpp -+++ b/tests/unit/fem/test_bilinearform.cpp -@@ -127,7 +127,7 @@ TEST_CASE("FormLinearSystem/SolutionScope", - // Legacy full assembly - { - GridFunction sol(&fes); -- SolvePDE(AssemblyLevel::LEGACYFULL, sol); -+ SolvePDE(AssemblyLevel::LEGACY, sol); - // Make sure the solution is still accessible after 'X' is destroyed - sol.HostRead(); - REQUIRE(AsConst(sol)(bdr_dof) == 0.0); -diff --git a/tests/unit/fem/test_pa_grad.cpp b/tests/unit/fem/test_pa_grad.cpp -index a42d7c83c..af0038cee 100644 ---- a/tests/unit/fem/test_pa_grad.cpp -+++ b/tests/unit/fem/test_pa_grad.cpp -@@ -154,7 +154,7 @@ double par_compare_pa_assembly(int dim, int num_elements, int order, - pa_grad.AddDomainInterpolator(new GradientInterpolator); - pa_grad.Assemble(); - OperatorPtr pa_grad_oper; -- pa_grad.FormRectangularSystemMatrix(pa_grad_oper); -+ pa_grad.FormDiscreteOperatorMatrix(pa_grad_oper); - - int insize, outsize; - if (transpose) -diff --git a/tests/unit/fem/test_pa_kernels.cpp b/tests/unit/fem/test_pa_kernels.cpp -index 2277efdfa..b50a69886 100644 ---- a/tests/unit/fem/test_pa_kernels.cpp -+++ b/tests/unit/fem/test_pa_kernels.cpp -@@ -9,11 +9,6 @@ - // terms of the BSD-3 license. We welcome feedback and contributions, see file - // CONTRIBUTING.md for details. - --#ifdef _WIN32 --#define _USE_MATH_DEFINES --#include --#endif -- - #include "unit_tests.hpp" - #include "mfem.hpp" - -@@ -520,17 +515,17 @@ static void test_pa_integrator() - GridFunction x(&fes), y_fa(&fes), y_pa(&fes); - x.Randomize(1); - -- ConstantCoefficient pi(M_PI); -+ FunctionCoefficient coeff(f1); - - BilinearForm blf_fa(&fes); -- blf_fa.AddDomainIntegrator(new INTEGRATOR(pi,ir)); -+ blf_fa.AddDomainIntegrator(new INTEGRATOR(coeff,ir)); - blf_fa.Assemble(); - blf_fa.Finalize(); - blf_fa.Mult(x, y_fa); - - BilinearForm blf_pa(&fes); - blf_pa.SetAssemblyLevel(AssemblyLevel::PARTIAL); -- blf_pa.AddDomainIntegrator(new INTEGRATOR(pi,ir)); -+ blf_pa.AddDomainIntegrator(new INTEGRATOR(coeff,ir)); - blf_pa.Assemble(); - blf_pa.Mult(x, y_pa); - -@@ -549,4 +544,39 @@ TEST_CASE("PA Diffusion", "[PartialAssembly], [CUDA]") - test_pa_integrator(); - } // PA Diffusion test case - -+TEST_CASE("PA Boundary Mass", "[PartialAssembly], [CUDA]") -+{ -+ const bool all_tests = launch_all_non_regression_tests; -+ -+ auto fname = GENERATE("../../data/star.mesh", "../../data/star-q3.mesh", -+ "../../data/fichera.mesh", "../../data/fichera-q3.mesh"); -+ auto order = !all_tests ? 2 : GENERATE(1, 2, 3); -+ -+ Mesh mesh(fname); -+ int dim = mesh.Dimension(); -+ RT_FECollection fec(order, dim); -+ FiniteElementSpace fes(&mesh, &fec); -+ -+ GridFunction x(&fes), y_fa(&fes), y_pa(&fes); -+ x.Randomize(1); -+ -+ FunctionCoefficient coeff(f1); -+ -+ BilinearForm blf_fa(&fes); -+ blf_fa.AddBoundaryIntegrator(new MassIntegrator(coeff)); -+ blf_fa.Assemble(); -+ blf_fa.Finalize(); -+ blf_fa.Mult(x, y_fa); -+ -+ BilinearForm blf_pa(&fes); -+ blf_pa.SetAssemblyLevel(AssemblyLevel::PARTIAL); -+ blf_pa.AddBoundaryIntegrator(new MassIntegrator(coeff)); -+ blf_pa.Assemble(); -+ blf_pa.Mult(x, y_pa); -+ -+ y_fa -= y_pa; -+ -+ REQUIRE(y_fa.Normlinf() == MFEM_Approx(0.0)); -+} -+ - } // namespace pa_kernels diff --git a/palace/deps/patch/mfem/patch_submesh.diff b/palace/deps/patch/mfem/patch_submesh.diff index 273e307f9..4274d3707 100644 --- a/palace/deps/patch/mfem/patch_submesh.diff +++ b/palace/deps/patch/mfem/patch_submesh.diff @@ -1,3 +1,27 @@ +diff --git a/CHANGELOG b/CHANGELOG +index 1d0a1c166..aa3b60cdf 100644 +--- a/CHANGELOG ++++ b/CHANGELOG +@@ -26,6 +26,9 @@ New and updated examples and miniapps + integrators are added in support of DPG systems: TraceIntegrator, + NormalTraceIntegrator and TangentTraceIntegrator. + ++- Added new SubMesh examples demonstrating source terms and boundary conditions ++ transferred from SubMesh objects. ++ + - Added a new H(div) solvers miniapp in miniapps/hdiv-linear-solver, + demonstrating the use of a matrix-free saddle-point solver methodology, + suitable for high-order discretizations and for GPU acceleration. Examples +@@ -47,6 +50,9 @@ Discretization improvements + - Face restriction operators for Nedelec and Raviart-Thomas finite element + spaces are now supported through the ConformingFaceRestriction class. + ++- SubMesh and ParSubMesh have been extended to support the transfer of ++ Nedelec and Raviart-Thomas finite element spaces. ++ + - VectorFEBoundaryFluxLFIntegrator is now supported on device/GPU. + + - Added support for p-refined meshes in FindPointsGSLIB. diff --git a/data/fichera-quad-mixed.mesh b/data/fichera-quad-mixed.mesh new file mode 100644 index 000000000..a3458665a @@ -431,7 +455,7 @@ index 000000000..cd82b4bf1 +0.6608093135547 0.8704406864453 +0.8704406864453 0.8704406864453 diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt -index afa98324c..16c3d0552 100644 +index 7d9c835c9..a89df6220 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -77,6 +77,8 @@ if (MFEM_USE_MPI) @@ -1839,14 +1863,23 @@ index 000000000..4abc2ed3a + } +} diff --git a/examples/makefile b/examples/makefile -index 85a22f832..7982a9f3e 100644 +index 85a22f832..1e3088f1a 100644 --- a/examples/makefile +++ b/examples/makefile +@@ -26,7 +26,7 @@ SEQ_EXAMPLES = ex0 ex1 ex2 ex3 ex4 ex5 ex6 ex7 ex8 ex9 ex10 ex14 ex15 ex16 \ + ex31 ex33 + PAR_EXAMPLES = ex0p ex1p ex2p ex3p ex4p ex5p ex6p ex7p ex8p ex9p ex10p ex11p \ + ex12p ex13p ex14p ex15p ex16p ex17p ex18p ex19p ex20p ex21p ex22p ex24p \ +- ex25p ex26p ex27p ex28p ex29p ex30p ex31p ex32p ex33p ++ ex25p ex26p ex27p ex28p ex29p ex30p ex31p ex32p ex33p ex34p ex35p + SEQ_DEVICE_EXAMPLES = ex1 ex3 ex4 ex5 ex6 ex9 ex22 ex24 ex25 ex26 + PAR_DEVICE_EXAMPLES = ex1p ex2p ex3p ex4p ex5p ex6p ex7p ex9p ex13p ex22p \ + ex24p ex25p ex26p @@ -183,3 +183,4 @@ clean-exec: - @rm -f ex23.mesh ex23-*.gf - @rm -f ex25.mesh ex25-*.gf ex25p-*.* - @rm -rf ex28_* ex28p_* -+ @rm -rf cond_mesh.* cond_j.* dsol.* port_mesh.* port_mode.* + @rm -f ex23.mesh ex23-*.gf + @rm -f ex25.mesh ex25-*.gf ex25p-*.* + @rm -rf ex28_* ex28p_* ++ @rm -rf cond_mesh.* cond_j.* dsol.* port_mesh.* port_mode.* diff --git a/fem/doftrans.cpp b/fem/doftrans.cpp index 95da3859d..06355ce75 100644 --- a/fem/doftrans.cpp @@ -2406,10 +2439,10 @@ index 95da3859d..06355ce75 100644 } } diff --git a/fem/doftrans.hpp b/fem/doftrans.hpp -index 9375246b5..fbe57bb2b 100644 +index 9375246b5..a1ddb3399 100644 --- a/fem/doftrans.hpp +++ b/fem/doftrans.hpp -@@ -15,19 +15,19 @@ +@@ -15,19 +15,31 @@ #include "../config/config.hpp" #include "../linalg/linalg.hpp" #include "intrules.hpp" @@ -2421,11 +2454,25 @@ index 9375246b5..fbe57bb2b 100644 -/** The DofTransformation class is an abstract base class for a family of - transformations that map local degrees of freedom (DoFs), contained within - individual elements, to global degrees of freedom, stored within +- GridFunction objects. These transformations are necessary to ensure that +- basis functions in neighboring elements align correctly. Closely related but +/** The StatelessDofTransformation class is an abstract base class for a family + of transformations that map local degrees of freedom (DoFs), contained + within individual elements, to global degrees of freedom, stored within - GridFunction objects. These transformations are necessary to ensure that - basis functions in neighboring elements align correctly. Closely related but ++ GridFunction objects. ++ ++ In this context "stateless" means that the concrete classes derived from ++ StatelessDofTransformation do not store information about the relative ++ orientations of the faces with respect to their neighboring elements. In ++ other words there is no information specific to a particular element (aside ++ from the element type e.g. tetrahedron, wedge, or pyramid). The ++ StatelessDofTransformation provides access to the transformation operators ++ for specific relative face orientations. These are useful, for example, when ++ relating DoFs associated with distinct overlapping meshes such as parent and ++ sub-meshes. ++ ++ These transformations are necessary to ensure that basis functions in ++ neighboring (or overlapping) elements align correctly. Closely related but complementary transformations are required for the entries stored in - LinearForm and BilinearForm objects. The DofTransformation class is designed - to apply the action of both of these types of DoF transformations. @@ -2435,7 +2482,7 @@ index 9375246b5..fbe57bb2b 100644 Let the "primal transformation" be given by the operator T. This means that given a local element vector v the data that must be placed into a -@@ -53,24 +53,87 @@ namespace mfem +@@ -53,24 +65,87 @@ namespace mfem D_t = T * D * T^{-1}. This can be accomplished by using a primal transformation on the columns of D and a dual transformation on its rows. */ @@ -2528,7 +2575,7 @@ index 9375246b5..fbe57bb2b 100644 /** @brief Configure the transformation using face orientations for the current element. */ /// The face_orientation array can be obtained from Mesh::GetElementFaces. -@@ -79,42 +142,82 @@ public: +@@ -79,42 +154,82 @@ public: inline const Array & GetFaceOrientations() const { return Fo; } @@ -2625,7 +2672,7 @@ index 9375246b5..fbe57bb2b 100644 }; /** Transform a matrix of DoFs entries from different finite element spaces as -@@ -133,66 +236,145 @@ void TransformDual(const DofTransformation *ran_dof_trans, +@@ -133,66 +248,145 @@ void TransformDual(const DofTransformation *ran_dof_trans, const DofTransformation *dom_dof_trans, DenseMatrix &elmat); @@ -2797,7 +2844,7 @@ index 9375246b5..fbe57bb2b 100644 }; /** Abstract base class for high-order Nedelec spaces on elements with -@@ -207,17 +389,22 @@ public: +@@ -207,17 +401,22 @@ public: be accessed as DenseMatrices using the GetFaceTransform() and GetFaceInverseTransform() methods. */ @@ -2826,7 +2873,7 @@ index 9375246b5..fbe57bb2b 100644 public: // Return the 2x2 transformation operator for the given face orientation -@@ -226,67 +413,119 @@ public: +@@ -226,67 +425,119 @@ public: // Return the 2x2 inverse transformation operator static const DenseMatrix & GetFaceInverseTransform(int ori) { return TInv(ori); } @@ -3151,7 +3198,7 @@ index 6b05c5f31..86c1f4e66 100644 virtual int GetContType() const { return TANGENTIAL; } FiniteElementCollection *GetTraceCollection() const; diff --git a/fem/fespace.hpp b/fem/fespace.hpp -index f777bf871..47ddbf438 100644 +index c30299bfe..e2c495506 100644 --- a/fem/fespace.hpp +++ b/fem/fespace.hpp @@ -377,17 +377,6 @@ protected: @@ -3205,10 +3252,10 @@ index 6f0af11c6..51f0df2f6 100644 j_offd[i_offd[i] + 1] = j_offd[i_offd[i]] + 1; d_offd[i_offd[i]] = T[0]; d_offd[i_offd[i] + 1] = T[2]; diff --git a/linalg/hypre.cpp b/linalg/hypre.cpp -index d7069d4e6..04072c420 100644 +index 689ce675e..7c1a463be 100644 --- a/linalg/hypre.cpp +++ b/linalg/hypre.cpp -@@ -5349,20 +5349,8 @@ void HypreAMS::MakeGradientAndInterpolation( +@@ -5340,20 +5340,8 @@ void HypreAMS::MakeGradientAndInterpolation( rt_trace_space = dynamic_cast(edge_fec); trace_space = trace_space || rt_trace_space; @@ -3231,7 +3278,7 @@ index d7069d4e6..04072c420 100644 ParMesh *pmesh = edge_fespace->GetParMesh(); if (rt_trace_space) -@@ -5751,19 +5739,9 @@ void HypreADS::MakeDiscreteMatrices(ParFiniteElementSpace *face_fespace) +@@ -5742,19 +5730,9 @@ void HypreADS::MakeDiscreteMatrices(ParFiniteElementSpace *face_fespace) const FiniteElementCollection *face_fec = face_fespace->FEColl(); bool trace_space = (dynamic_cast(face_fec) != NULL); @@ -3255,10 +3302,10 @@ index d7069d4e6..04072c420 100644 // define the nodal and edge finite element spaces associated with face_fespace ParMesh *pmesh = (ParMesh *) face_fespace->GetMesh(); diff --git a/mesh/mesh.cpp b/mesh/mesh.cpp -index de2d80f15..851a61744 100644 +index 87e606510..296accf53 100644 --- a/mesh/mesh.cpp +++ b/mesh/mesh.cpp -@@ -3013,6 +3013,10 @@ void Mesh::FinalizeTopology(bool generate_bdr) +@@ -3020,6 +3020,10 @@ void Mesh::FinalizeTopology(bool generate_bdr) if (Dim == 1) { GenerateFaces(); @@ -3269,7 +3316,7 @@ index de2d80f15..851a61744 100644 } if (ncmesh) -@@ -5653,13 +5657,54 @@ int Mesh::GetTriOrientation(const int *base, const int *test) +@@ -5660,13 +5664,54 @@ int Mesh::GetTriOrientation(const int *base, const int *test) for (int j = 0; j < 3; j++) if (test[aor[j]] != base[j]) { @@ -3325,7 +3372,7 @@ index de2d80f15..851a61744 100644 int Mesh::GetQuadOrientation(const int *base, const int *test) { int i; -@@ -5708,6 +5753,37 @@ int Mesh::GetQuadOrientation(const int *base, const int *test) +@@ -5715,6 +5760,37 @@ int Mesh::GetQuadOrientation(const int *base, const int *test) return 2*i+1; } @@ -3363,7 +3410,7 @@ index de2d80f15..851a61744 100644 int Mesh::GetTetOrientation(const int *base, const int *test) { // Static method. -@@ -6523,9 +6599,9 @@ const Table & Mesh::ElementToEdgeTable() const +@@ -6530,9 +6606,9 @@ const Table & Mesh::ElementToEdgeTable() const void Mesh::AddPointFaceElement(int lf, int gf, int el) { @@ -3376,10 +3423,10 @@ index de2d80f15..851a61744 100644 faces_info[gf].Elem1Inf = 64 * lf; // face lf with orientation 0 faces_info[gf].Elem2No = -1; // in case there's no other side diff --git a/mesh/mesh.hpp b/mesh/mesh.hpp -index 3e0590067..64bb85efc 100644 +index 64cf55ae4..06f4356a7 100644 --- a/mesh/mesh.hpp +++ b/mesh/mesh.hpp -@@ -472,8 +472,30 @@ protected: +@@ -468,8 +468,30 @@ protected: /// Returns the orientation of "test" relative to "base" static int GetTriOrientation (const int * base, const int * test); @@ -3411,10 +3458,23 @@ index 3e0590067..64bb85efc 100644 static int GetTetOrientation (const int * base, const int * test); diff --git a/mesh/submesh/psubmesh.cpp b/mesh/submesh/psubmesh.cpp -index b316d8b41..cf3e23d7d 100644 +index b316d8b41..1de148a76 100644 --- a/mesh/submesh/psubmesh.cpp +++ b/mesh/submesh/psubmesh.cpp -@@ -84,7 +84,8 @@ ParSubMesh::ParSubMesh(const ParMesh &parent, SubMesh::From from, +@@ -38,10 +38,8 @@ ParSubMesh ParSubMesh::CreateFromBoundary(const ParMesh &parent, + ParSubMesh::ParSubMesh(const ParMesh &parent, SubMesh::From from, + Array &attributes) : parent_(parent), from_(from), attributes_(attributes) + { +- if (Nonconforming()) +- { +- MFEM_ABORT("SubMesh does not support non-conforming meshes"); +- } ++ MFEM_VERIFY(from == SubMesh::From::Boundary || !Nonconforming(), ++ "ParSubMesh does not support non-conforming meshes with From::Domain"); + + MyComm = parent.GetComm(); + NRanks = parent.GetNRanks(); +@@ -84,7 +82,8 @@ ParSubMesh::ParSubMesh(const ParMesh &parent, SubMesh::From from, GetEdgeVertices(i, lv); // Find vertices/edge in parent mesh @@ -3424,7 +3484,7 @@ index b316d8b41..cf3e23d7d 100644 parent_edge_ids_.Append(parent_edge_id); } -@@ -106,6 +107,72 @@ ParSubMesh::ParSubMesh(const ParMesh &parent, SubMesh::From from, +@@ -106,6 +105,72 @@ ParSubMesh::ParSubMesh(const ParMesh &parent, SubMesh::From from, { parent_to_submesh_face_ids_[parent_face_ids_[i]] = i; } @@ -3497,7 +3557,7 @@ index b316d8b41..cf3e23d7d 100644 } ListOfIntegerSets groups; -@@ -145,7 +212,7 @@ ParSubMesh::ParSubMesh(const ParMesh &parent, SubMesh::From from, +@@ -145,7 +210,7 @@ ParSubMesh::ParSubMesh(const ParMesh &parent, SubMesh::From from, { BuildFaceGroup(ngroups, rht, nstrias, rhq, nsquads); } @@ -3506,7 +3566,7 @@ index b316d8b41..cf3e23d7d 100644 { group_stria.MakeI(ngroups); group_stria.MakeJ(); -@@ -167,7 +234,9 @@ ParSubMesh::ParSubMesh(const ParMesh &parent, SubMesh::From from, +@@ -167,7 +232,9 @@ ParSubMesh::ParSubMesh(const ParMesh &parent, SubMesh::From from, // Add boundaries { @@ -3517,7 +3577,7 @@ index b316d8b41..cf3e23d7d 100644 Array &be2face = (Dim == 2) ? be_to_edge : be_to_face; if (Dim == 3) -@@ -190,9 +259,11 @@ ParSubMesh::ParSubMesh(const ParMesh &parent, SubMesh::From from, +@@ -190,9 +257,11 @@ ParSubMesh::ParSubMesh(const ParMesh &parent, SubMesh::From from, boundary.SetSize(NumOfBdrElements); be2face.SetSize(NumOfBdrElements); Array parent_face_to_be; @@ -3529,7 +3589,7 @@ index b316d8b41..cf3e23d7d 100644 } for (int i = 0, j = 0; i < num_of_faces_or_edges; i++) { -@@ -209,7 +280,7 @@ ParSubMesh::ParSubMesh(const ParMesh &parent, SubMesh::From from, +@@ -209,7 +278,7 @@ ParSubMesh::ParSubMesh(const ParMesh &parent, SubMesh::From from, } else { @@ -3538,7 +3598,7 @@ index b316d8b41..cf3e23d7d 100644 } } else -@@ -743,9 +814,14 @@ void ParSubMesh::BuildSharedEdgesMapping(const int sedges_ct, +@@ -743,9 +812,14 @@ void ParSubMesh::BuildSharedEdgesMapping(const int sedges_ct, else { Array vert; @@ -3556,7 +3616,7 @@ index b316d8b41..cf3e23d7d 100644 sedge_ledge.Append(submesh_edge_id); } } -@@ -760,36 +836,61 @@ void ParSubMesh::BuildSharedFacesMapping(const int nstrias, +@@ -760,36 +834,61 @@ void ParSubMesh::BuildSharedFacesMapping(const int nstrias, shared_quads.Reserve(nsquads); sface_lface.Reserve(nstrias + nsquads); @@ -3631,7 +3691,7 @@ index b316d8b41..cf3e23d7d 100644 { // parent shared face is not in SubMesh or is not shared } -@@ -798,7 +899,7 @@ void ParSubMesh::BuildSharedFacesMapping(const int nstrias, +@@ -798,7 +897,7 @@ void ParSubMesh::BuildSharedFacesMapping(const int nstrias, Array vert; GetFaceVertices(submesh_face_id, vert); @@ -3641,7 +3701,7 @@ index b316d8b41..cf3e23d7d 100644 } } diff --git a/mesh/submesh/psubmesh.hpp b/mesh/submesh/psubmesh.hpp -index 58acd1d4f..8d35bc451 100644 +index 8c71c7181..651be6243 100644 --- a/mesh/submesh/psubmesh.hpp +++ b/mesh/submesh/psubmesh.hpp @@ -128,6 +128,16 @@ public: @@ -3944,10 +4004,23 @@ index bd7a7bd6c..683cc5bed 100644 /// Temporary vector diff --git a/mesh/submesh/submesh.cpp b/mesh/submesh/submesh.cpp -index ac2058c57..c9f0b8508 100644 +index ac2058c57..76d4456c5 100644 --- a/mesh/submesh/submesh.cpp +++ b/mesh/submesh/submesh.cpp -@@ -61,6 +61,7 @@ SubMesh::SubMesh(const Mesh &parent, From from, +@@ -31,10 +31,8 @@ SubMesh SubMesh::CreateFromBoundary(const Mesh &parent, + SubMesh::SubMesh(const Mesh &parent, From from, + Array attributes) : parent_(parent), from_(from), attributes_(attributes) + { +- if (Nonconforming()) +- { +- MFEM_ABORT("SubMesh does not support non-conforming meshes"); +- } ++ MFEM_VERIFY(from == From::Boundary || !Nonconforming(), ++ "SubMesh does not support non-conforming meshes with From::Domain"); + + if (from == From::Domain) + { +@@ -61,6 +59,7 @@ SubMesh::SubMesh(const Mesh &parent, From from, parent_element_ids_); Array parent_face_to_be = parent.GetFaceToBdrElMap(); @@ -3955,7 +4028,7 @@ index ac2058c57..c9f0b8508 100644 for (int i = 0; i < NumOfBdrElements; i++) { -@@ -75,7 +76,73 @@ SubMesh::SubMesh(const Mesh &parent, From from, +@@ -75,7 +74,73 @@ SubMesh::SubMesh(const Mesh &parent, From from, // This case happens when a domain is extracted, but the root parent // mesh didn't have a boundary element on the surface that defined // it's boundary. It still creates a valid mesh, so we allow it. @@ -5120,7 +5193,7 @@ index 02a98f628..c5057338a 100644 multidomain_test_3d(fec_type); } diff --git a/tests/unit/mesh/test_submesh.cpp b/tests/unit/mesh/test_submesh.cpp -index 590f706d2..325f51977 100644 +index 590f706d2..05751376c 100644 --- a/tests/unit/mesh/test_submesh.cpp +++ b/tests/unit/mesh/test_submesh.cpp @@ -19,6 +19,7 @@ using namespace mfem; @@ -5141,17 +5214,28 @@ index 590f706d2..325f51977 100644 case L2: return new L2_FECollection(p, dim, BasisType::GaussLobatto); break; -@@ -56,7 +60,8 @@ void test_2d(Element::Type element_type, +@@ -56,12 +60,19 @@ void test_2d(Element::Type element_type, SubMesh::From from) { constexpr int dim = 2; - const int vdim = (field_type == FieldType::SCALAR) ? 1 : dim; + const int vdim = (field_type == FieldType::SCALAR || + fec_type == ND) ? 1 : dim; ++ const bool nonconforming = true; double Hy = 1.0; Mesh mesh = Mesh::MakeCartesian2D(5, 5, element_type, true, 1.0, Hy, false); -@@ -176,7 +181,7 @@ void test_2d(Element::Type element_type, + if (from == SubMesh::From::Boundary) + { ++ if (nonconforming) ++ { ++ mesh.EnsureNCMesh(); ++ mesh.RandomRefinement(0.5); ++ } + for (int i = 0; i < mesh.GetNBE(); i++) + { + Element *el = mesh.GetBdrElement(i); +@@ -176,7 +187,7 @@ void test_2d(Element::Type element_type, { GridFunction sub_ex_gf(&sub_fes); @@ -5160,7 +5244,7 @@ index 590f706d2..325f51977 100644 { parent_gf.ProjectCoefficient(coeff); sub_ex_gf.ProjectCoefficient(coeff); -@@ -188,6 +193,8 @@ void test_2d(Element::Type element_type, +@@ -188,6 +199,8 @@ void test_2d(Element::Type element_type, } SubMesh::Transfer(parent_gf, sub_gf); @@ -5169,7 +5253,7 @@ index 590f706d2..325f51977 100644 sub_gf -= sub_ex_gf; REQUIRE(sub_gf.Norml2() < 1e-10); } -@@ -195,7 +202,7 @@ void test_2d(Element::Type element_type, +@@ -195,7 +208,7 @@ void test_2d(Element::Type element_type, { GridFunction parent_ex_gf(&parent_fes); @@ -5178,7 +5262,7 @@ index 590f706d2..325f51977 100644 { parent_gf.ProjectCoefficient(coeff); sub_gf.ProjectCoefficient(coeff); -@@ -210,6 +217,8 @@ void test_2d(Element::Type element_type, +@@ -210,6 +223,8 @@ void test_2d(Element::Type element_type, SubMesh::Transfer(sub_gf, parent_gf); @@ -5187,17 +5271,28 @@ index 590f706d2..325f51977 100644 parent_gf -= parent_ex_gf; REQUIRE(parent_gf.Norml2() < 1e-10); } -@@ -227,7 +236,8 @@ void test_3d(Element::Type element_type, +@@ -227,12 +242,19 @@ void test_3d(Element::Type element_type, SubMesh::From from) { constexpr int dim = 3; - const int vdim = (field_type == FieldType::SCALAR) ? 1 : dim; + const int vdim = (field_type == FieldType::SCALAR || + fec_type == ND) ? 1 : dim; ++ const bool nonconforming = true; double Hy = 1.0; Mesh mesh = Mesh::MakeCartesian3D(5, 5, 5, element_type, 1.0, Hy, 1.0, false); -@@ -351,7 +361,7 @@ void test_3d(Element::Type element_type, + if (from == SubMesh::From::Boundary) + { ++ if (nonconforming) ++ { ++ mesh.EnsureNCMesh(); ++ mesh.RandomRefinement(0.5); ++ } + for (int i = 0; i < mesh.GetNBE(); i++) + { + Element *el = mesh.GetBdrElement(i); +@@ -351,7 +373,7 @@ void test_3d(Element::Type element_type, { GridFunction sub_ex_gf(&sub_fes); @@ -5206,7 +5301,7 @@ index 590f706d2..325f51977 100644 { parent_gf.ProjectCoefficient(coeff); sub_ex_gf.ProjectCoefficient(coeff); -@@ -372,7 +382,7 @@ void test_3d(Element::Type element_type, +@@ -372,7 +394,7 @@ void test_3d(Element::Type element_type, { GridFunction parent_ex_gf(&parent_fes); @@ -5215,7 +5310,7 @@ index 590f706d2..325f51977 100644 { parent_gf.ProjectCoefficient(coeff); sub_gf.ProjectCoefficient(coeff); -@@ -401,13 +411,17 @@ TEST_CASE("SubMesh", "[SubMesh]") +@@ -401,13 +423,17 @@ TEST_CASE("SubMesh", "[SubMesh]") { int polynomial_order = 4; int mesh_polynomial_order = 2; @@ -5234,7 +5329,7 @@ index 590f706d2..325f51977 100644 SECTION("2D") { auto element = GENERATE(Element::QUADRILATERAL, Element::TRIANGLE); -@@ -421,7 +435,8 @@ TEST_CASE("SubMesh", "[SubMesh]") +@@ -421,7 +447,8 @@ TEST_CASE("SubMesh", "[SubMesh]") SECTION("3D") {