Skip to content

Commit

Permalink
Hack boundary exchanges for RKL2
Browse files Browse the repository at this point in the history
  • Loading branch information
pgrete committed Nov 21, 2024
1 parent 487c6b8 commit eb719e0
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 46 deletions.
34 changes: 25 additions & 9 deletions src/hydro/diffusion/conduction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <parthenon/package.hpp>

// AthenaPK headers
#include "../../eos/adiabatic_glmmhd.hpp"
#include "../../main.hpp"
#include "config.hpp"
#include "diffusion.hpp"
Expand Down Expand Up @@ -264,15 +265,25 @@ void ThermalFluxIsoFixed(MeshData<Real> *md) {

void ThermalFluxGeneral(MeshData<Real> *md) {
auto pmb = md->GetBlockData(0)->GetBlockPointer();
IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::interior);
IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::interior);
IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::interior);
IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::entire);
IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::entire);
IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::entire);

std::vector<parthenon::MetadataFlag> flags_ind({Metadata::Independent});
auto cons_pack = md->PackVariablesAndFluxes(flags_ind);
auto hydro_pkg = pmb->packages.Get("Hydro");

const auto &eos = md->GetBlockData(0)
->GetBlockPointer()
->packages.Get("Hydro")
->Param<AdiabaticGLMMHDEOS>("eos");

const auto nhydro = hydro_pkg->Param<int>("nhydro");
const auto nscalars = hydro_pkg->Param<int>("nscalars");

auto const &prim_pack = md->PackVariables(std::vector<std::string>{"prim"});
// silly workaroud to not change constorim interface
auto const &cons_pack_no_flux = md->PackVariables(std::vector<std::string>{"cons"});

const int ndim = pmb->pmy_mesh->ndim;

Expand All @@ -281,11 +292,14 @@ void ThermalFluxGeneral(MeshData<Real> *md) {

parthenon::par_for(
DEFAULT_LOOP_PATTERN, "Thermal conduction X1 fluxes (general)",
parthenon::DevExecSpace(), 0, cons_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s,
ib.e + 1, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
parthenon::DevExecSpace(), 0, cons_pack.GetDim(5) - 1, kb.s + 1, kb.e - 1, jb.s + 1,
jb.e - 1, ib.s + 1, ib.e + 1 - 1,
KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
const auto &coords = prim_pack.GetCoords(b);
auto &cons = cons_pack(b);
const auto &prim = prim_pack(b);
// Need this here as we're skipping the FillDerivedCall in RKL
eos.ConsToPrim(cons_pack_no_flux(b), prim, nhydro, nscalars, k, j, i);

// Variables only required in 3D case
Real dTdz = 0.0;
Expand Down Expand Up @@ -376,8 +390,9 @@ void ThermalFluxGeneral(MeshData<Real> *md) {
/* Compute heat fluxes in 2-direction --------------------------------------*/
parthenon::par_for(
DEFAULT_LOOP_PATTERN, "Thermal conduction X2 fluxes (general)",
parthenon::DevExecSpace(), 0, cons_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e + 1,
ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
parthenon::DevExecSpace(), 0, cons_pack.GetDim(5) - 1, kb.s + 1, kb.e - 1, jb.s + 1,
jb.e + 1 - 1, ib.s + 1, ib.e - 1,
KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
const auto &coords = prim_pack.GetCoords(b);
auto &cons = cons_pack(b);
const auto &prim = prim_pack(b);
Expand Down Expand Up @@ -469,8 +484,9 @@ void ThermalFluxGeneral(MeshData<Real> *md) {

parthenon::par_for(
DEFAULT_LOOP_PATTERN, "Thermal conduction X3 fluxes (general)",
parthenon::DevExecSpace(), 0, cons_pack.GetDim(5) - 1, kb.s, kb.e + 1, jb.s, jb.e,
ib.s, ib.e, KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
parthenon::DevExecSpace(), 0, cons_pack.GetDim(5) - 1, kb.s + 1, kb.e + 1 - 1,
jb.s + 1, jb.e - 1, ib.s + 1, ib.e - 1,
KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
const auto &coords = prim_pack.GetCoords(b);
auto &cons = cons_pack(b);
const auto &prim = prim_pack(b);
Expand Down
107 changes: 70 additions & 37 deletions src/hydro/hydro_driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,9 @@ HydroDriver::HydroDriver(ParameterInput *pin, ApplicationInput *app_in, Mesh *pm
// Sets all fluxes to 0
TaskStatus ResetFluxes(MeshData<Real> *md) {
auto pmb = md->GetBlockData(0)->GetBlockPointer();
IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::interior);
IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::interior);
IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::interior);
IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::entire);
IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::entire);
IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::entire);

// In principle, we'd only need to pack Metadata::WithFluxes here, but
// choosing to mirror other use in the code so that the packs are already cached.
Expand All @@ -54,46 +54,71 @@ TaskStatus ResetFluxes(MeshData<Real> *md) {
// by enough work over the entire pack and it allows to not use any conditionals.
parthenon::par_for(
DEFAULT_LOOP_PATTERN, "ResetFluxes X1", parthenon::DevExecSpace(), 0,
cons_pack.GetDim(5) - 1, 0, cons_pack.GetDim(4) - 1, kb.s, kb.e, jb.s, jb.e, ib.s,
ib.e + 1,
KOKKOS_LAMBDA(const int b, const int v, const int k, const int j, const int i) {
cons_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e + 1,
KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
auto &cons = cons_pack(b);
cons.flux(X1DIR, v, k, j, i) = 0.0;
cons.flux(X1DIR, IEN, k, j, i) = 0.0;
});

if (ndim < 2) {
return TaskStatus::complete;
}
parthenon::par_for(
DEFAULT_LOOP_PATTERN, "ResetFluxes X2", parthenon::DevExecSpace(), 0,
cons_pack.GetDim(5) - 1, 0, cons_pack.GetDim(4) - 1, kb.s, kb.e, jb.s, jb.e + 1,
ib.s, ib.e,
KOKKOS_LAMBDA(const int b, const int v, const int k, const int j, const int i) {
cons_pack.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e + 1, ib.s, ib.e,
KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
auto &cons = cons_pack(b);
cons.flux(X2DIR, v, k, j, i) = 0.0;
cons.flux(X2DIR, IEN, k, j, i) = 0.0;
});

if (ndim < 3) {
return TaskStatus::complete;
}
parthenon::par_for(
DEFAULT_LOOP_PATTERN, "ResetFluxes X3", parthenon::DevExecSpace(), 0,
cons_pack.GetDim(5) - 1, 0, cons_pack.GetDim(4) - 1, kb.s, kb.e + 1, jb.s, jb.e,
ib.s, ib.e,
KOKKOS_LAMBDA(const int b, const int v, const int k, const int j, const int i) {
cons_pack.GetDim(5) - 1, kb.s, kb.e + 1, jb.s, jb.e, ib.s, ib.e,
KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
auto &cons = cons_pack(b);
cons.flux(X3DIR, v, k, j, i) = 0.0;
cons.flux(X3DIR, IEN, k, j, i) = 0.0;
});
return TaskStatus::complete;
}

TaskStatus SimpleFluxDiv(MeshData<Real> *md_base, MeshData<Real> *md_MY0) {
auto pmb = md_base->GetBlockData(0)->GetBlockPointer();
IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::entire);
IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::entire);
IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::entire);

// In principle, we'd only need to pack Metadata::WithFluxes here, but
// choosing to mirror other use in the code so that the packs are already cached.
std::vector<parthenon::MetadataFlag> flags_ind({Metadata::Independent});
auto base = md_base->PackVariablesAndFluxes(flags_ind);
auto MY0 = md_MY0->PackVariablesAndFluxes(flags_ind);

const int ndim = pmb->pmy_mesh->ndim;
// Using separate loops for each dim as the launch overhead should be hidden
// by enough work over the entire pack and it allows to not use any conditionals.
parthenon::par_for(
DEFAULT_LOOP_PATTERN, "SimpleFluxDiv", parthenon::DevExecSpace(), 0,
base.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
// First calc this step
const auto &coords = base.GetCoords(b);
MY0(b, IEN, k, j, i) =
parthenon::Update::FluxDivHelper(IEN, k, j, i, ndim, coords, base(b));
});

return TaskStatus::complete;
}

TaskStatus RKL2StepFirst(MeshData<Real> *md_Y0, MeshData<Real> *md_Yjm1,
MeshData<Real> *md_Yjm2, MeshData<Real> *md_MY0, const int s_rkl,
const Real tau) {
auto pmb = md_Y0->GetBlockData(0)->GetBlockPointer();
IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::interior);
IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::interior);
IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::interior);
IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::entire);
IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::entire);
IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::entire);

// Compute coefficients. Meyer+2014 eq. (18)
Real mu_tilde_1 = 4. / 3. /
Expand All @@ -113,11 +138,11 @@ TaskStatus RKL2StepFirst(MeshData<Real> *md_Y0, MeshData<Real> *md_Yjm1,
// by enough work over the entire pack and it allows to not use any conditionals.
parthenon::par_for(
DEFAULT_LOOP_PATTERN, "RKL first step", parthenon::DevExecSpace(), 0,
Y0.GetDim(5) - 1, 0, Y0.GetDim(4) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
KOKKOS_LAMBDA(const int b, const int v, const int k, const int j, const int i) {
Yjm1(b, v, k, j, i) =
Y0(b, v, k, j, i) + mu_tilde_1 * tau * MY0(b, v, k, j, i); // Y_1
Yjm2(b, v, k, j, i) = Y0(b, v, k, j, i); // Y_0
Y0.GetDim(5) - 1, kb.s, kb.e, jb.s, jb.e, ib.s, ib.e,
KOKKOS_LAMBDA(const int b, const int k, const int j, const int i) {
Yjm1(b, IEN, k, j, i) =
Y0(b, IEN, k, j, i) + mu_tilde_1 * tau * MY0(b, IEN, k, j, i); // Y_1
Yjm2(b, IEN, k, j, i) = Y0(b, IEN, k, j, i); // Y_0
});

return TaskStatus::complete;
Expand All @@ -128,9 +153,9 @@ TaskStatus RKL2StepOther(MeshData<Real> *md_Y0, MeshData<Real> *md_Yjm1,
const Real nu_j, const Real mu_tilde_j, const Real gamma_tilde_j,
const Real tau) {
auto pmb = md_Y0->GetBlockData(0)->GetBlockPointer();
IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::interior);
IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::interior);
IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::interior);
IndexRange ib = pmb->cellbounds.GetBoundsI(IndexDomain::entire);
IndexRange jb = pmb->cellbounds.GetBoundsJ(IndexDomain::entire);
IndexRange kb = pmb->cellbounds.GetBoundsK(IndexDomain::entire);

// In principle, we'd only need to pack Metadata::WithFluxes here, but
// choosing to mirror other use in the code so that the packs are already cached.
Expand Down Expand Up @@ -249,8 +274,7 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
auto &MY0 = pmesh->mesh_data.GetOrAdd("MY0", i);
auto &Yjm2 = pmesh->mesh_data.GetOrAdd("Yjm2", i);

auto init_MY0 = tl.AddTask(set_flx, parthenon::Update::FluxDivergence<MeshData<Real>>,
base.get(), MY0.get());
auto init_MY0 = tl.AddTask(set_flx, SimpleFluxDiv, base.get(), MY0.get());

// Initialize Y0 and Y1 and the recursion relation starting with j = 2 needs data from
// the two preceeding stages.
Expand All @@ -264,11 +288,11 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
// performance for various tests (static, amr, block sizes) and then decide on the
// best impl. Go with default call (split local/nonlocal) for now.
// TODO(pgrete) optimize (in parthenon) to only send subset of updated vars
auto bounds_exchange = parthenon::AddBoundaryExchangeTasks(
rkl2_step_first | start_bnd, tl, base, pmesh->multilevel);
// auto bounds_exchange = parthenon::AddBoundaryExchangeTasks(
// rkl2_step_first | start_bnd, tl, base, pmesh->multilevel);

tl.AddTask(bounds_exchange, parthenon::Update::FillDerived<MeshData<Real>>,
base.get());
// tl.AddTask(rkl2_step_first, parthenon::Update::FillDerived<MeshData<Real>>,
// base.get());
}

// Compute coefficients. Meyer+2012 eq. (16)
Expand Down Expand Up @@ -297,7 +321,7 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
// data/fluxes with neighbors. All other containers are passive (i.e., data is only
// used but not exchanged).
const auto any = parthenon::BoundaryType::any;
auto start_bnd = tl.AddTask(none, parthenon::StartReceiveBoundBufs<any>, base);
// auto start_bnd = tl.AddTask(none, parthenon::StartReceiveBoundBufs<any>, base);
auto start_flxcor_recv =
tl.AddTask(none, parthenon::StartReceiveFluxCorrections, base);

Expand Down Expand Up @@ -329,16 +353,25 @@ void AddSTSTasks(TaskCollection *ptask_coll, Mesh *pmesh, BlockList_t &blocks,
// performance for various tests (static, amr, block sizes) and then decide on the
// best impl. Go with default call (split local/nonlocal) for now.
// TODO(pgrete) optimize (in parthenon) to only send subset of updated vars
auto bounds_exchange = parthenon::AddBoundaryExchangeTasks(
rkl2_step_other | start_bnd, tl, base, pmesh->multilevel);
// auto bounds_exchange = parthenon::AddBoundaryExchangeTasks(
// rkl2_step_other | start_bnd, tl, base, pmesh->multilevel);

tl.AddTask(bounds_exchange, parthenon::Update::FillDerived<MeshData<Real>>,
base.get());
// tl.AddTask(rkl2_step_other, parthenon::Update::FillDerived<MeshData<Real>>,
// base.get());
}

b_jm2 = b_jm1;
b_jm1 = b_j;
}
TaskRegion &final_comm = ptask_coll->AddRegion(num_partitions);
for (int i = 0; i < num_partitions; i++) {
auto &tl = final_comm[i];
auto &base = pmesh->mesh_data.GetOrAdd("base", i);
auto bounds_exchange =
parthenon::AddBoundaryExchangeTasks(none, tl, base, pmesh->multilevel);
tl.AddTask(bounds_exchange, parthenon::Update::FillDerived<MeshData<Real>>,
base.get());
}
}

// See the advection.hpp declaration for a description of how this function gets called.
Expand Down

0 comments on commit eb719e0

Please sign in to comment.