Convert hydro calls to AMReX lambda launches

AMReX-Astro · Sep 7, 2019 · 5c40bdf · 5c40bdf
1 parent 6324fc2
commit 5c40bdf
Show file tree

Hide file tree

Showing 4 changed files with 106 additions and 60 deletions.
diff --git a/Source/Castro.H b/Source/Castro.H
@@ -17,6 +17,8 @@ enum StateType { State_Type };
 
 enum Conserved { Density = 0, Xmom, Ymom, Zmom, Eden, Eint, Temp, FirstSpec, NUM_STATE = FirstSpec + NumSpec };
 
+#define AMREX_ARR4_TO_FORTRAN_ANYD(a) a.p,&((a).begin.x),amrex::GpuArray<int,3>{(a).end.x-1,(a).end.y-1,(a).end.z-1}.data()
+
 class Castro
     :
     public amrex::AmrLevel

diff --git a/Source/Castro.cpp b/Source/Castro.cpp
@@ -13,8 +13,6 @@
 #include <omp.h>
 #endif
 
-#define BL_ARR4_TO_FORTRAN_ANYD(a) a.p,&((a).begin.x),amrex::GpuArray<int,3>{(a).end.x-1,(a).end.y-1,(a).end.z-1}.data()
-
 using namespace amrex;
 
 long Castro::num_zones_advanced = 0;

diff --git a/Source/Castro_F.H b/Source/Castro_F.H
@@ -24,6 +24,7 @@ extern "C"
 
   void ca_get_qvar(int* qvar);
 
+  AMREX_GPU_DEVICE
   void ca_ctoprim(const int* lo, const int* hi,
                   const amrex::Real* u, const int* u_lo, const int* u_hi,
                   const amrex::Real* q, const int* q_lo, const int* q_hi,
@@ -46,24 +47,28 @@ extern "C"
      const BL_FORT_FAB_ARG_3D(state),
      const amrex::Real* dx, amrex::Real* dt);
 
+  AMREX_GPU_DEVICE
   void ca_divu
     (const int* lo, const int* hi,
      const amrex::Real* dx,
      BL_FORT_FAB_ARG_3D(q),
      BL_FORT_FAB_ARG_3D(div));
 
+  AMREX_GPU_DEVICE
   void ca_uflaten
     (const int* lo, const int* hi,
      BL_FORT_FAB_ARG_3D(q),
      BL_FORT_FAB_ARG_3D(flatn));
 
+  AMREX_GPU_DEVICE
   void ca_ppm_reconstruct
     (const int* lo, const int* hi,
      BL_FORT_FAB_ARG_3D(q),
      BL_FORT_FAB_ARG_3D(flatn),
      BL_FORT_FAB_ARG_3D(qm),
      BL_FORT_FAB_ARG_3D(qp));
 
+  AMREX_GPU_DEVICE
   void ca_construct_flux
     (const int* lo, const int* hi,
      const int* domlo, const int* domhi,
@@ -78,6 +83,7 @@ extern "C"
      BL_FORT_FAB_ARG_3D(flux),
      BL_FORT_FAB_ARG_3D(area));
 
+  AMREX_GPU_DEVICE
   void ca_construct_hydro_update
     (const int* lo, const int* hi,
      const amrex::Real* dx, const amrex::Real dt,

diff --git a/Source/Castro_hydro.cpp b/Source/Castro_hydro.cpp
@@ -14,7 +14,7 @@ Castro::construct_mol_hydro_source(Real time, Real dt, int istage, int nstages)
 
     int finest_level = parent->finestLevel();
 
-    const Real *dx = geom.CellSize();
+    auto dx = geom.CellSizeArray();
 
     MultiFab& S_new = get_new_data(State_Type);
 
@@ -25,6 +25,8 @@ Castro::construct_mol_hydro_source(Real time, Real dt, int istage, int nstages)
 
     std::vector<amrex::Real> b_mol{0.5, 0.5};
 
+    const Real update_scale_factor = b_mol[istage];
+
     // Choose tile size based on whether we're using a GPU.
 
 #ifdef AMREX_USE_GPU
@@ -52,110 +54,139 @@ Castro::construct_mol_hydro_source(Real time, Real dt, int istage, int nstages)
 
             const Box& qbx = amrex::grow(box, 4);
 
+            FArrayBox& state_old = Sborder[mfi];
+            auto state_old_arr = Sborder[mfi].array();
+
             // Convert the conservative state to the primitive variable state.
             // This fills both q and qaux.
 
             q.resize(qbx, QVAR);
             Elixir elix_q = q.elixir();
+            auto q_arr = q.array();
 
             qaux.resize(qbx, NQAUX);
             Elixir elix_qaux = qaux.elixir();
+            auto qaux_arr = qaux.array();
 
-#pragma gpu box(qbx) nohost
-            ca_ctoprim(AMREX_INT_ANYD(qbx.loVect()), AMREX_INT_ANYD(qbx.hiVect()),
-                       BL_TO_FORTRAN_ANYD(Sborder[mfi]),
-                       BL_TO_FORTRAN_ANYD(q),
-                       BL_TO_FORTRAN_ANYD(qaux));
+            AMREX_LAUNCH_DEVICE_LAMBDA(qbx, lbx,
+            {
+                ca_ctoprim(AMREX_ARLIM_ANYD(lbx.loVect()), AMREX_ARLIM_ANYD(lbx.hiVect()),
+                           AMREX_ARR4_TO_FORTRAN_ANYD(state_old_arr),
+                           AMREX_ARR4_TO_FORTRAN_ANYD(q_arr),
+                           AMREX_ARR4_TO_FORTRAN_ANYD(qaux_arr));
+            });
 
             const Box& obx = amrex::grow(box, 1);
             const Box& tbx = amrex::grow(box, 2);
 
             div.resize(obx, 1);
             Elixir elix_div = div.elixir();
+            auto div_arr = div.array();
 
             // Compute divergence of velocity field.
 
-#pragma gpu box(obx) nohost
-            ca_divu(AMREX_INT_ANYD(obx.loVect()), AMREX_INT_ANYD(obx.hiVect()),
-                    AMREX_REAL_ANYD(dx),
-                    BL_TO_FORTRAN_ANYD(q),
-                    BL_TO_FORTRAN_ANYD(div));
+            AMREX_LAUNCH_DEVICE_LAMBDA(obx, lbx,
+            {
+                ca_divu(AMREX_ARLIM_ANYD(lbx.loVect()), AMREX_ARLIM_ANYD(lbx.hiVect()),
+                        AMREX_ZFILL(dx.data()),
+                        AMREX_ARR4_TO_FORTRAN_ANYD(q_arr),
+                        AMREX_ARR4_TO_FORTRAN_ANYD(div_arr));
+            });
 
             flatn.resize(obx, 1);
             Elixir elix_flatn = flatn.elixir();
+            auto flatn_arr = flatn.array();
 
             // Compute flattening coefficient for slope calculations.
-#pragma gpu box(obx) nohost
-            ca_uflaten
-                (AMREX_INT_ANYD(obx.loVect()), AMREX_INT_ANYD(obx.hiVect()),
-                 BL_TO_FORTRAN_ANYD(q),
-                 BL_TO_FORTRAN_ANYD(flatn));
+
+            AMREX_LAUNCH_DEVICE_LAMBDA(obx, lbx,
+            {
+                ca_uflaten
+                    (AMREX_ARLIM_ANYD(lbx.loVect()), AMREX_ARLIM_ANYD(lbx.hiVect()),
+                     AMREX_ARR4_TO_FORTRAN_ANYD(q_arr),
+                     AMREX_ARR4_TO_FORTRAN_ANYD(flatn_arr));
+            });
 
             qm.resize(tbx, 3*QVAR);
             Elixir elix_qm = qm.elixir();
+            auto qm_arr = qm.array();
 
             qp.resize(tbx, 3*QVAR);
             Elixir elix_qp = qp.elixir();
+            auto qp_arr = qp.array();
 
             // Do PPM reconstruction to the zone edges.
-#pragma gpu box(obx) nohost
-            ca_ppm_reconstruct
-                (AMREX_INT_ANYD(obx.loVect()), AMREX_INT_ANYD(obx.hiVect()),
-                 BL_TO_FORTRAN_ANYD(q),
-                 BL_TO_FORTRAN_ANYD(flatn),
-                 BL_TO_FORTRAN_ANYD(qm),
-                 BL_TO_FORTRAN_ANYD(qp));
+
+            AMREX_LAUNCH_DEVICE_LAMBDA(obx, lbx,
+            {
+                ca_ppm_reconstruct
+                    (AMREX_ARLIM_ANYD(lbx.loVect()), AMREX_ARLIM_ANYD(lbx.hiVect()),
+                     AMREX_ARR4_TO_FORTRAN_ANYD(q_arr),
+                     AMREX_ARR4_TO_FORTRAN_ANYD(flatn_arr),
+                     AMREX_ARR4_TO_FORTRAN_ANYD(qm_arr),
+                     AMREX_ARR4_TO_FORTRAN_ANYD(qp_arr));
+            });
 
             q.clear();
             flatn.clear();
 
             flux[0].resize(amrex::surroundingNodes(box, 0), NUM_STATE);
             Elixir elix_flux_x = flux[0].elixir();
+            auto flux_x_arr = flux[0].array();
 
             flux[1].resize(amrex::surroundingNodes(box, 1), NUM_STATE);
             Elixir elix_flux_y = flux[1].elixir();
+            auto flux_y_arr = flux[1].array();
 
             flux[2].resize(amrex::surroundingNodes(box, 2), NUM_STATE);
             Elixir elix_flux_z = flux[2].elixir();
+            auto flux_z_arr = flux[2].array();
 
             qe[0].resize(amrex::surroundingNodes(box, 0), NGDNV);
             Elixir elix_qe_x = qe[0].elixir();
+            auto qe_x_arr = qe[0].array();
 
             qe[1].resize(amrex::surroundingNodes(box, 1), NGDNV);
             Elixir elix_qe_y = qe[1].elixir();
+            auto qe_y_arr = qe[1].array();
 
             qe[2].resize(amrex::surroundingNodes(box, 2), NGDNV);
             Elixir elix_qe_z = qe[2].elixir();
+            auto qe_z_arr = qe[2].array();
 
             for (int idir = 0; idir < 3; ++idir) {
 
                 const Box& ebx = amrex::surroundingNodes(box, idir);
 
                 int idir_f = idir + 1;
 
-#pragma gpu box(ebx) nohost
-                ca_construct_flux
-                    (AMREX_INT_ANYD(ebx.loVect()), AMREX_INT_ANYD(ebx.hiVect()),
-                     AMREX_INT_ANYD(domain_lo), AMREX_INT_ANYD(domain_hi),
-                     AMREX_REAL_ANYD(dx), dt,
-                     idir_f,
-                     BL_TO_FORTRAN_ANYD(Sborder[mfi]),
-                     BL_TO_FORTRAN_ANYD(div),
-                     BL_TO_FORTRAN_ANYD(qaux),
-                     BL_TO_FORTRAN_ANYD(qm),
-                     BL_TO_FORTRAN_ANYD(qp),
-                     BL_TO_FORTRAN_ANYD(qe[idir]),
-                     BL_TO_FORTRAN_ANYD(flux[idir]),
-                     BL_TO_FORTRAN_ANYD(area[idir][mfi]));
-
-                Array4<Real> const flux_fab = (flux[idir]).array();
-                Array4<Real> fluxes_fab = (*fluxes[idir]).array(mfi);
+                auto flux_arr = flux[idir].array();
+                auto qe_arr   = qe[idir].array();
+                auto area_arr = area[idir][mfi].array();
+
+                AMREX_LAUNCH_DEVICE_LAMBDA(ebx, lbx,
+                {
+                    ca_construct_flux
+                        (AMREX_ARLIM_ANYD(lbx.loVect()), AMREX_ARLIM_ANYD(lbx.hiVect()),
+                         AMREX_ARLIM_ANYD(domain_lo), AMREX_ARLIM_ANYD(domain_hi),
+                         AMREX_ZFILL(dx.data()), dt,
+                         idir_f,
+                         AMREX_ARR4_TO_FORTRAN_ANYD(state_old_arr),
+                         AMREX_ARR4_TO_FORTRAN_ANYD(div_arr),
+                         AMREX_ARR4_TO_FORTRAN_ANYD(qaux_arr),
+                         AMREX_ARR4_TO_FORTRAN_ANYD(qm_arr),
+                         AMREX_ARR4_TO_FORTRAN_ANYD(qp_arr),
+                         AMREX_ARR4_TO_FORTRAN_ANYD(qe_arr),
+                         AMREX_ARR4_TO_FORTRAN_ANYD(flux_arr),
+                         AMREX_ARR4_TO_FORTRAN_ANYD(area_arr));
+                });
+
+                Array4<Real> fluxes_arr = (*fluxes[idir]).array(mfi);
                 const int numcomp = NUM_STATE;
-                const Real scale = b_mol[istage];
 
                 AMREX_HOST_DEVICE_FOR_4D(ebx, numcomp, i, j, k, n,
                 {
-                    fluxes_fab(i,j,k,n) += scale * flux_fab(i,j,k,n);
+                    fluxes_arr(i,j,k,n) += update_scale_factor * flux_arr(i,j,k,n);
                 });
 
             }
@@ -165,22 +196,31 @@ Castro::construct_mol_hydro_source(Real time, Real dt, int istage, int nstages)
             qm.clear();
             qp.clear();
 
-#pragma gpu box(box) nohost
-            ca_construct_hydro_update
-                (AMREX_INT_ANYD(box.loVect()), AMREX_INT_ANYD(box.hiVect()),
-                 AMREX_REAL_ANYD(dx), dt,
-                 b_mol[istage],
-                 BL_TO_FORTRAN_ANYD(qe[0]),
-                 BL_TO_FORTRAN_ANYD(qe[1]),
-                 BL_TO_FORTRAN_ANYD(qe[2]),
-                 BL_TO_FORTRAN_ANYD(flux[0]),
-                 BL_TO_FORTRAN_ANYD(flux[1]),
-                 BL_TO_FORTRAN_ANYD(flux[2]),
-                 BL_TO_FORTRAN_ANYD(area[0][mfi]),
-                 BL_TO_FORTRAN_ANYD(area[1][mfi]),
-                 BL_TO_FORTRAN_ANYD(area[2][mfi]),
-                 BL_TO_FORTRAN_ANYD(volume[mfi]),
-                 BL_TO_FORTRAN_ANYD(hydro_source[mfi]));
+            auto area_x_arr = area[0][mfi].array();
+            auto area_y_arr = area[1][mfi].array();
+            auto area_z_arr = area[2][mfi].array();
+
+            auto volume_arr = volume[mfi].array();
+            auto hydro_source_arr = hydro_source[mfi].array();
+
+            AMREX_LAUNCH_DEVICE_LAMBDA(box, lbx,
+            {
+                ca_construct_hydro_update
+                    (AMREX_ARLIM_ANYD(lbx.loVect()), AMREX_ARLIM_ANYD(lbx.hiVect()),
+                     AMREX_ZFILL(dx.data()), dt,
+                     update_scale_factor,
+                     AMREX_ARR4_TO_FORTRAN_ANYD(qe_x_arr),
+                     AMREX_ARR4_TO_FORTRAN_ANYD(qe_y_arr),
+                     AMREX_ARR4_TO_FORTRAN_ANYD(qe_z_arr),
+                     AMREX_ARR4_TO_FORTRAN_ANYD(flux_x_arr),
+                     AMREX_ARR4_TO_FORTRAN_ANYD(flux_y_arr),
+                     AMREX_ARR4_TO_FORTRAN_ANYD(flux_z_arr),
+                     AMREX_ARR4_TO_FORTRAN_ANYD(area_x_arr),
+                     AMREX_ARR4_TO_FORTRAN_ANYD(area_y_arr),
+                     AMREX_ARR4_TO_FORTRAN_ANYD(area_z_arr),
+                     AMREX_ARR4_TO_FORTRAN_ANYD(volume_arr),
+                     AMREX_ARR4_TO_FORTRAN_ANYD(hydro_source_arr));
+            });
 
         } // MFIter loop