From eb719e0bec1f168bd35b3b0ae90d5b03fd2269d7 Mon Sep 17 00:00:00 2001 From: Philipp Grete Date: Thu, 21 Nov 2024 12:12:02 +0100 Subject: [PATCH] Hack boundary exchanges for RKL2 --- src/hydro/diffusion/conduction.cpp | 34 ++++++--- src/hydro/hydro_driver.cpp | 107 +++++++++++++++++++---------- 2 files changed, 95 insertions(+), 46 deletions(-) diff --git a/src/hydro/diffusion/conduction.cpp b/src/hydro/diffusion/conduction.cpp index 40256024..3ab934e3 100644 --- a/src/hydro/diffusion/conduction.cpp +++ b/src/hydro/diffusion/conduction.cpp @@ -16,6 +16,7 @@ #include // AthenaPK headers +#include "../../eos/adiabatic_glmmhd.hpp" #include "../../main.hpp" #include "config.hpp" #include "diffusion.hpp" @@ -264,15 +265,25 @@ void ThermalFluxIsoFixed(MeshData *md) { void ThermalFluxGeneral(MeshData *md) { auto pmb = md->GetBlockData(0)->GetBlockPointer(); - IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::interior); - IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::interior); - IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::interior); + IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::entire); + IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::entire); + IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::entire); std::vector flags_ind({Metadata::Independent}); auto cons_pack = md->PackVariablesAndFluxes(flags_ind); auto hydro_pkg = pmb->packages.Get("Hydro"); + const auto &eos = md->GetBlockData(0) + ->GetBlockPointer() + ->packages.Get("Hydro") + ->Param("eos"); + + const auto nhydro = hydro_pkg->Param("nhydro"); + const auto nscalars = hydro_pkg->Param("nscalars"); + auto const &prim_pack = md->PackVariables(std::vector{"prim"}); + // silly workaroud to not change constorim interface + auto const &cons_pack_no_flux = md->PackVariables(std::vector{"cons"}); const int ndim = pmb->pmy_mesh->ndim; @@ -281,11 +292,14 @@ void ThermalFluxGeneral(MeshData *md) { parthenon::par_for( DEFAULT_LOOP_PATTERN, "Thermal conduction X1 fluxes (general)", - parthenon::DevExecSpace(), 0, cons_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, - ib.e + 1, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { + parthenon::DevExecSpace(), 0, cons_pack.GetDim(5) - 1, kb.s + 1, kb.e - 1, jb.s + 1, + jb.e - 1, ib.s + 1, ib.e + 1 - 1, + KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { const auto &coords = prim_pack.GetCoords(b); auto &cons = cons_pack(b); const auto &prim = prim_pack(b); + // Need this here as we're skipping the FillDerivedCall in RKL + eos.ConsToPrim(cons_pack_no_flux(b), prim, nhydro, nscalars, k, j, i); // Variables only required in 3D case Real dTdz = 0.0; @@ -376,8 +390,9 @@ void ThermalFluxGeneral(MeshData *md) { /* Compute heat fluxes in 2-direction --------------------------------------*/ parthenon::par_for( DEFAULT_LOOP_PATTERN, "Thermal conduction X2 fluxes (general)", - parthenon::DevExecSpace(), 0, cons_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e + 1, - ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { + parthenon::DevExecSpace(), 0, cons_pack.GetDim(5) - 1, kb.s + 1, kb.e - 1, jb.s + 1, + jb.e + 1 - 1, ib.s + 1, ib.e - 1, + KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { const auto &coords = prim_pack.GetCoords(b); auto &cons = cons_pack(b); const auto &prim = prim_pack(b); @@ -469,8 +484,9 @@ void ThermalFluxGeneral(MeshData *md) { parthenon::par_for( DEFAULT_LOOP_PATTERN, "Thermal conduction X3 fluxes (general)", - parthenon::DevExecSpace(), 0, cons_pack.GetDim(5) - 1, kb.s, kb.e + 1, jb.s, jb.e, - ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { + parthenon::DevExecSpace(), 0, cons_pack.GetDim(5) - 1, kb.s + 1, kb.e + 1 - 1, + jb.s + 1, jb.e - 1, ib.s + 1, ib.e - 1, + KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { const auto &coords = prim_pack.GetCoords(b); auto &cons = cons_pack(b); const auto &prim = prim_pack(b); diff --git a/src/hydro/hydro_driver.cpp b/src/hydro/hydro_driver.cpp index dd9d4540..8f60574b 100644 --- a/src/hydro/hydro_driver.cpp +++ b/src/hydro/hydro_driver.cpp @@ -40,9 +40,9 @@ HydroDriver::HydroDriver(ParameterInput *pin, ApplicationInput *app_in, Mesh *pm // Sets all fluxes to 0 TaskStatus ResetFluxes(MeshData *md) { auto pmb = md->GetBlockData(0)->GetBlockPointer(); - IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::interior); - IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::interior); - IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::interior); + IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::entire); + IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::entire); + IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::entire); // In principle, we'd only need to pack Metadata::WithFluxes here, but // choosing to mirror other use in the code so that the packs are already cached. @@ -54,11 +54,10 @@ TaskStatus ResetFluxes(MeshData *md) { // by enough work over the entire pack and it allows to not use any conditionals. parthenon::par_for( DEFAULT_LOOP_PATTERN, "ResetFluxes X1", parthenon::DevExecSpace(), 0, - cons_pack.GetDim(5) - 1, 0, cons_pack.GetDim(4) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, - ib.e + 1, - KOKKOS_LAMBDA(const int b, const int v, const int k, const int j, const int i) { + cons_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e + 1, + KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { auto &cons = cons_pack(b); - cons.flux(X1DIR, v, k, j, i) = 0.0; + cons.flux(X1DIR, IEN, k, j, i) = 0.0; }); if (ndim < 2) { @@ -66,11 +65,10 @@ TaskStatus ResetFluxes(MeshData *md) { } parthenon::par_for( DEFAULT_LOOP_PATTERN, "ResetFluxes X2", parthenon::DevExecSpace(), 0, - cons_pack.GetDim(5) - 1, 0, cons_pack.GetDim(4) - 1, kb.s, kb.e, jb.s, jb.e + 1, - ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int v, const int k, const int j, const int i) { + cons_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e + 1, ib.s, ib.e, + KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { auto &cons = cons_pack(b); - cons.flux(X2DIR, v, k, j, i) = 0.0; + cons.flux(X2DIR, IEN, k, j, i) = 0.0; }); if (ndim < 3) { @@ -78,22 +76,49 @@ TaskStatus ResetFluxes(MeshData *md) { } parthenon::par_for( DEFAULT_LOOP_PATTERN, "ResetFluxes X3", parthenon::DevExecSpace(), 0, - cons_pack.GetDim(5) - 1, 0, cons_pack.GetDim(4) - 1, kb.s, kb.e + 1, jb.s, jb.e, - ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int v, const int k, const int j, const int i) { + cons_pack.GetDim(5) - 1, kb.s, kb.e + 1, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { auto &cons = cons_pack(b); - cons.flux(X3DIR, v, k, j, i) = 0.0; + cons.flux(X3DIR, IEN, k, j, i) = 0.0; }); return TaskStatus::complete; } +TaskStatus SimpleFluxDiv(MeshData *md_base, MeshData *md_MY0) { + auto pmb = md_base->GetBlockData(0)->GetBlockPointer(); + IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::entire); + IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::entire); + IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::entire); + + // In principle, we'd only need to pack Metadata::WithFluxes here, but + // choosing to mirror other use in the code so that the packs are already cached. + std::vector flags_ind({Metadata::Independent}); + auto base = md_base->PackVariablesAndFluxes(flags_ind); + auto MY0 = md_MY0->PackVariablesAndFluxes(flags_ind); + + const int ndim = pmb->pmy_mesh->ndim; + // Using separate loops for each dim as the launch overhead should be hidden + // by enough work over the entire pack and it allows to not use any conditionals. + parthenon::par_for( + DEFAULT_LOOP_PATTERN, "SimpleFluxDiv", parthenon::DevExecSpace(), 0, + base.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { + // First calc this step + const auto &coords = base.GetCoords(b); + MY0(b, IEN, k, j, i) = + parthenon::Update::FluxDivHelper(IEN, k, j, i, ndim, coords, base(b)); + }); + + return TaskStatus::complete; +} + TaskStatus RKL2StepFirst(MeshData *md_Y0, MeshData *md_Yjm1, MeshData *md_Yjm2, MeshData *md_MY0, const int s_rkl, const Real tau) { auto pmb = md_Y0->GetBlockData(0)->GetBlockPointer(); - IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::interior); - IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::interior); - IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::interior); + IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::entire); + IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::entire); + IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::entire); // Compute coefficients. Meyer+2014 eq. (18) Real mu_tilde_1 = 4. / 3. / @@ -113,11 +138,11 @@ TaskStatus RKL2StepFirst(MeshData *md_Y0, MeshData *md_Yjm1, // by enough work over the entire pack and it allows to not use any conditionals. parthenon::par_for( DEFAULT_LOOP_PATTERN, "RKL first step", parthenon::DevExecSpace(), 0, - Y0.GetDim(5) - 1, 0, Y0.GetDim(4) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, - KOKKOS_LAMBDA(const int b, const int v, const int k, const int j, const int i) { - Yjm1(b, v, k, j, i) = - Y0(b, v, k, j, i) + mu_tilde_1 * tau * MY0(b, v, k, j, i); // Y_1 - Yjm2(b, v, k, j, i) = Y0(b, v, k, j, i); // Y_0 + Y0.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e, + KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) { + Yjm1(b, IEN, k, j, i) = + Y0(b, IEN, k, j, i) + mu_tilde_1 * tau * MY0(b, IEN, k, j, i); // Y_1 + Yjm2(b, IEN, k, j, i) = Y0(b, IEN, k, j, i); // Y_0 }); return TaskStatus::complete; @@ -128,9 +153,9 @@ TaskStatus RKL2StepOther(MeshData *md_Y0, MeshData *md_Yjm1, const Real nu_j, const Real mu_tilde_j, const Real gamma_tilde_j, const Real tau) { auto pmb = md_Y0->GetBlockData(0)->GetBlockPointer(); - IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::interior); - IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::interior); - IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::interior); + IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::entire); + IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::entire); + IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::entire); // In principle, we'd only need to pack Metadata::WithFluxes here, but // choosing to mirror other use in the code so that the packs are already cached. @@ -249,8 +274,7 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks, auto &MY0 = pmesh->mesh_data.GetOrAdd("MY0", i); auto &Yjm2 = pmesh->mesh_data.GetOrAdd("Yjm2", i); - auto init_MY0 = tl.AddTask(set_flx, parthenon::Update::FluxDivergence>, - base.get(), MY0.get()); + auto init_MY0 = tl.AddTask(set_flx, SimpleFluxDiv, base.get(), MY0.get()); // Initialize Y0 and Y1 and the recursion relation starting with j = 2 needs data from // the two preceeding stages. @@ -264,11 +288,11 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks, // performance for various tests (static, amr, block sizes) and then decide on the // best impl. Go with default call (split local/nonlocal) for now. // TODO(pgrete) optimize (in parthenon) to only send subset of updated vars - auto bounds_exchange = parthenon::AddBoundaryExchangeTasks( - rkl2_step_first | start_bnd, tl, base, pmesh->multilevel); + // auto bounds_exchange = parthenon::AddBoundaryExchangeTasks( + // rkl2_step_first | start_bnd, tl, base, pmesh->multilevel); - tl.AddTask(bounds_exchange, parthenon::Update::FillDerived>, - base.get()); + // tl.AddTask(rkl2_step_first, parthenon::Update::FillDerived>, + // base.get()); } // Compute coefficients. Meyer+2012 eq. (16) @@ -297,7 +321,7 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks, // data/fluxes with neighbors. All other containers are passive (i.e., data is only // used but not exchanged). const auto any = parthenon::BoundaryType::any; - auto start_bnd = tl.AddTask(none, parthenon::StartReceiveBoundBufs, base); + // auto start_bnd = tl.AddTask(none, parthenon::StartReceiveBoundBufs, base); auto start_flxcor_recv = tl.AddTask(none, parthenon::StartReceiveFluxCorrections, base); @@ -329,16 +353,25 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks, // performance for various tests (static, amr, block sizes) and then decide on the // best impl. Go with default call (split local/nonlocal) for now. // TODO(pgrete) optimize (in parthenon) to only send subset of updated vars - auto bounds_exchange = parthenon::AddBoundaryExchangeTasks( - rkl2_step_other | start_bnd, tl, base, pmesh->multilevel); + // auto bounds_exchange = parthenon::AddBoundaryExchangeTasks( + // rkl2_step_other | start_bnd, tl, base, pmesh->multilevel); - tl.AddTask(bounds_exchange, parthenon::Update::FillDerived>, - base.get()); + // tl.AddTask(rkl2_step_other, parthenon::Update::FillDerived>, + // base.get()); } b_jm2 = b_jm1; b_jm1 = b_j; } + TaskRegion &final_comm = ptask_coll->AddRegion(num_partitions); + for (int i = 0; i < num_partitions; i++) { + auto &tl = final_comm[i]; + auto &base = pmesh->mesh_data.GetOrAdd("base", i); + auto bounds_exchange = + parthenon::AddBoundaryExchangeTasks(none, tl, base, pmesh->multilevel); + tl.AddTask(bounds_exchange, parthenon::Update::FillDerived>, + base.get()); + } } // See the advection.hpp declaration for a description of how this function gets called.