Skip to content

Commit

Permalink
Convert hydro calls to AMReX lambda launches
Browse files Browse the repository at this point in the history
  • Loading branch information
maxpkatz committed Sep 7, 2019
1 parent 6324fc2 commit 5c40bdf
Show file tree
Hide file tree
Showing 4 changed files with 106 additions and 60 deletions.
2 changes: 2 additions & 0 deletions Source/Castro.H
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ enum StateType { State_Type };

enum Conserved { Density = 0, Xmom, Ymom, Zmom, Eden, Eint, Temp, FirstSpec, NUM_STATE = FirstSpec + NumSpec };

#define AMREX_ARR4_TO_FORTRAN_ANYD(a) a.p,&((a).begin.x),amrex::GpuArray<int,3>{(a).end.x-1,(a).end.y-1,(a).end.z-1}.data()

class Castro
:
public amrex::AmrLevel
Expand Down
2 changes: 0 additions & 2 deletions Source/Castro.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,6 @@
#include <omp.h>
#endif

#define BL_ARR4_TO_FORTRAN_ANYD(a) a.p,&((a).begin.x),amrex::GpuArray<int,3>{(a).end.x-1,(a).end.y-1,(a).end.z-1}.data()

using namespace amrex;

long Castro::num_zones_advanced = 0;
Expand Down
6 changes: 6 additions & 0 deletions Source/Castro_F.H
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ extern "C"

void ca_get_qvar(int* qvar);

AMREX_GPU_DEVICE
void ca_ctoprim(const int* lo, const int* hi,
const amrex::Real* u, const int* u_lo, const int* u_hi,
const amrex::Real* q, const int* q_lo, const int* q_hi,
Expand All @@ -46,24 +47,28 @@ extern "C"
const BL_FORT_FAB_ARG_3D(state),
const amrex::Real* dx, amrex::Real* dt);

AMREX_GPU_DEVICE
void ca_divu
(const int* lo, const int* hi,
const amrex::Real* dx,
BL_FORT_FAB_ARG_3D(q),
BL_FORT_FAB_ARG_3D(div));

AMREX_GPU_DEVICE
void ca_uflaten
(const int* lo, const int* hi,
BL_FORT_FAB_ARG_3D(q),
BL_FORT_FAB_ARG_3D(flatn));

AMREX_GPU_DEVICE
void ca_ppm_reconstruct
(const int* lo, const int* hi,
BL_FORT_FAB_ARG_3D(q),
BL_FORT_FAB_ARG_3D(flatn),
BL_FORT_FAB_ARG_3D(qm),
BL_FORT_FAB_ARG_3D(qp));

AMREX_GPU_DEVICE
void ca_construct_flux
(const int* lo, const int* hi,
const int* domlo, const int* domhi,
Expand All @@ -78,6 +83,7 @@ extern "C"
BL_FORT_FAB_ARG_3D(flux),
BL_FORT_FAB_ARG_3D(area));

AMREX_GPU_DEVICE
void ca_construct_hydro_update
(const int* lo, const int* hi,
const amrex::Real* dx, const amrex::Real dt,
Expand Down
156 changes: 98 additions & 58 deletions Source/Castro_hydro.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ Castro::construct_mol_hydro_source(Real time, Real dt, int istage, int nstages)

int finest_level = parent->finestLevel();

const Real *dx = geom.CellSize();
auto dx = geom.CellSizeArray();

MultiFab& S_new = get_new_data(State_Type);

Expand All @@ -25,6 +25,8 @@ Castro::construct_mol_hydro_source(Real time, Real dt, int istage, int nstages)

std::vector<amrex::Real> b_mol{0.5, 0.5};

const Real update_scale_factor = b_mol[istage];

// Choose tile size based on whether we're using a GPU.

#ifdef AMREX_USE_GPU
Expand Down Expand Up @@ -52,110 +54,139 @@ Castro::construct_mol_hydro_source(Real time, Real dt, int istage, int nstages)

const Box& qbx = amrex::grow(box, 4);

FArrayBox& state_old = Sborder[mfi];
auto state_old_arr = Sborder[mfi].array();

// Convert the conservative state to the primitive variable state.
// This fills both q and qaux.

q.resize(qbx, QVAR);
Elixir elix_q = q.elixir();
auto q_arr = q.array();

qaux.resize(qbx, NQAUX);
Elixir elix_qaux = qaux.elixir();
auto qaux_arr = qaux.array();

#pragma gpu box(qbx) nohost
ca_ctoprim(AMREX_INT_ANYD(qbx.loVect()), AMREX_INT_ANYD(qbx.hiVect()),
BL_TO_FORTRAN_ANYD(Sborder[mfi]),
BL_TO_FORTRAN_ANYD(q),
BL_TO_FORTRAN_ANYD(qaux));
AMREX_LAUNCH_DEVICE_LAMBDA(qbx, lbx,
{
ca_ctoprim(AMREX_ARLIM_ANYD(lbx.loVect()), AMREX_ARLIM_ANYD(lbx.hiVect()),
AMREX_ARR4_TO_FORTRAN_ANYD(state_old_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(q_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(qaux_arr));
});

const Box& obx = amrex::grow(box, 1);
const Box& tbx = amrex::grow(box, 2);

div.resize(obx, 1);
Elixir elix_div = div.elixir();
auto div_arr = div.array();

// Compute divergence of velocity field.

#pragma gpu box(obx) nohost
ca_divu(AMREX_INT_ANYD(obx.loVect()), AMREX_INT_ANYD(obx.hiVect()),
AMREX_REAL_ANYD(dx),
BL_TO_FORTRAN_ANYD(q),
BL_TO_FORTRAN_ANYD(div));
AMREX_LAUNCH_DEVICE_LAMBDA(obx, lbx,
{
ca_divu(AMREX_ARLIM_ANYD(lbx.loVect()), AMREX_ARLIM_ANYD(lbx.hiVect()),
AMREX_ZFILL(dx.data()),
AMREX_ARR4_TO_FORTRAN_ANYD(q_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(div_arr));
});

flatn.resize(obx, 1);
Elixir elix_flatn = flatn.elixir();
auto flatn_arr = flatn.array();

// Compute flattening coefficient for slope calculations.
#pragma gpu box(obx) nohost
ca_uflaten
(AMREX_INT_ANYD(obx.loVect()), AMREX_INT_ANYD(obx.hiVect()),
BL_TO_FORTRAN_ANYD(q),
BL_TO_FORTRAN_ANYD(flatn));

AMREX_LAUNCH_DEVICE_LAMBDA(obx, lbx,
{
ca_uflaten
(AMREX_ARLIM_ANYD(lbx.loVect()), AMREX_ARLIM_ANYD(lbx.hiVect()),
AMREX_ARR4_TO_FORTRAN_ANYD(q_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(flatn_arr));
});

qm.resize(tbx, 3*QVAR);
Elixir elix_qm = qm.elixir();
auto qm_arr = qm.array();

qp.resize(tbx, 3*QVAR);
Elixir elix_qp = qp.elixir();
auto qp_arr = qp.array();

// Do PPM reconstruction to the zone edges.
#pragma gpu box(obx) nohost
ca_ppm_reconstruct
(AMREX_INT_ANYD(obx.loVect()), AMREX_INT_ANYD(obx.hiVect()),
BL_TO_FORTRAN_ANYD(q),
BL_TO_FORTRAN_ANYD(flatn),
BL_TO_FORTRAN_ANYD(qm),
BL_TO_FORTRAN_ANYD(qp));

AMREX_LAUNCH_DEVICE_LAMBDA(obx, lbx,
{
ca_ppm_reconstruct
(AMREX_ARLIM_ANYD(lbx.loVect()), AMREX_ARLIM_ANYD(lbx.hiVect()),
AMREX_ARR4_TO_FORTRAN_ANYD(q_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(flatn_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(qm_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(qp_arr));
});

q.clear();
flatn.clear();

flux[0].resize(amrex::surroundingNodes(box, 0), NUM_STATE);
Elixir elix_flux_x = flux[0].elixir();
auto flux_x_arr = flux[0].array();

flux[1].resize(amrex::surroundingNodes(box, 1), NUM_STATE);
Elixir elix_flux_y = flux[1].elixir();
auto flux_y_arr = flux[1].array();

flux[2].resize(amrex::surroundingNodes(box, 2), NUM_STATE);
Elixir elix_flux_z = flux[2].elixir();
auto flux_z_arr = flux[2].array();

qe[0].resize(amrex::surroundingNodes(box, 0), NGDNV);
Elixir elix_qe_x = qe[0].elixir();
auto qe_x_arr = qe[0].array();

qe[1].resize(amrex::surroundingNodes(box, 1), NGDNV);
Elixir elix_qe_y = qe[1].elixir();
auto qe_y_arr = qe[1].array();

qe[2].resize(amrex::surroundingNodes(box, 2), NGDNV);
Elixir elix_qe_z = qe[2].elixir();
auto qe_z_arr = qe[2].array();

for (int idir = 0; idir < 3; ++idir) {

const Box& ebx = amrex::surroundingNodes(box, idir);

int idir_f = idir + 1;

#pragma gpu box(ebx) nohost
ca_construct_flux
(AMREX_INT_ANYD(ebx.loVect()), AMREX_INT_ANYD(ebx.hiVect()),
AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi),
AMREX_REAL_ANYD(dx), dt,
idir_f,
BL_TO_FORTRAN_ANYD(Sborder[mfi]),
BL_TO_FORTRAN_ANYD(div),
BL_TO_FORTRAN_ANYD(qaux),
BL_TO_FORTRAN_ANYD(qm),
BL_TO_FORTRAN_ANYD(qp),
BL_TO_FORTRAN_ANYD(qe[idir]),
BL_TO_FORTRAN_ANYD(flux[idir]),
BL_TO_FORTRAN_ANYD(area[idir][mfi]));

Array4<Real> const flux_fab = (flux[idir]).array();
Array4<Real> fluxes_fab = (*fluxes[idir]).array(mfi);
auto flux_arr = flux[idir].array();
auto qe_arr = qe[idir].array();
auto area_arr = area[idir][mfi].array();

AMREX_LAUNCH_DEVICE_LAMBDA(ebx, lbx,
{
ca_construct_flux
(AMREX_ARLIM_ANYD(lbx.loVect()), AMREX_ARLIM_ANYD(lbx.hiVect()),
AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi),
AMREX_ZFILL(dx.data()), dt,
idir_f,
AMREX_ARR4_TO_FORTRAN_ANYD(state_old_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(div_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(qaux_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(qm_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(qp_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(qe_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(flux_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(area_arr));
});

Array4<Real> fluxes_arr = (*fluxes[idir]).array(mfi);
const int numcomp = NUM_STATE;
const Real scale = b_mol[istage];

AMREX_HOST_DEVICE_FOR_4D(ebx, numcomp, i, j, k, n,
{
fluxes_fab(i,j,k,n) += scale * flux_fab(i,j,k,n);
fluxes_arr(i,j,k,n) += update_scale_factor * flux_arr(i,j,k,n);
});

}
Expand All @@ -165,22 +196,31 @@ Castro::construct_mol_hydro_source(Real time, Real dt, int istage, int nstages)
qm.clear();
qp.clear();

#pragma gpu box(box) nohost
ca_construct_hydro_update
(AMREX_INT_ANYD(box.loVect()), AMREX_INT_ANYD(box.hiVect()),
AMREX_REAL_ANYD(dx), dt,
b_mol[istage],
BL_TO_FORTRAN_ANYD(qe[0]),
BL_TO_FORTRAN_ANYD(qe[1]),
BL_TO_FORTRAN_ANYD(qe[2]),
BL_TO_FORTRAN_ANYD(flux[0]),
BL_TO_FORTRAN_ANYD(flux[1]),
BL_TO_FORTRAN_ANYD(flux[2]),
BL_TO_FORTRAN_ANYD(area[0][mfi]),
BL_TO_FORTRAN_ANYD(area[1][mfi]),
BL_TO_FORTRAN_ANYD(area[2][mfi]),
BL_TO_FORTRAN_ANYD(volume[mfi]),
BL_TO_FORTRAN_ANYD(hydro_source[mfi]));
auto area_x_arr = area[0][mfi].array();
auto area_y_arr = area[1][mfi].array();
auto area_z_arr = area[2][mfi].array();

auto volume_arr = volume[mfi].array();
auto hydro_source_arr = hydro_source[mfi].array();

AMREX_LAUNCH_DEVICE_LAMBDA(box, lbx,
{
ca_construct_hydro_update
(AMREX_ARLIM_ANYD(lbx.loVect()), AMREX_ARLIM_ANYD(lbx.hiVect()),
AMREX_ZFILL(dx.data()), dt,
update_scale_factor,
AMREX_ARR4_TO_FORTRAN_ANYD(qe_x_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(qe_y_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(qe_z_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(flux_x_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(flux_y_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(flux_z_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(area_x_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(area_y_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(area_z_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(volume_arr),
AMREX_ARR4_TO_FORTRAN_ANYD(hydro_source_arr));
});

} // MFIter loop

Expand Down

0 comments on commit 5c40bdf

Please sign in to comment.