Skip to content

Commit

Permalink
Z4co: add rhs.xx
Browse files Browse the repository at this point in the history
  • Loading branch information
lwJi committed Jul 2, 2024
1 parent cd182c8 commit 97ed9c5
Show file tree
Hide file tree
Showing 6 changed files with 284 additions and 15 deletions.
20 changes: 9 additions & 11 deletions Z4co/src/derivs.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -152,15 +152,15 @@ template <typename T>
inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd<T>
deriv2_2d(const int vavail, const simdl<T> &mask, const T *restrict const var,
const ptrdiff_t di, const ptrdiff_t dj, const T dx, const T dy) {
constexpr size_t vsize = tuple_size_v<simd<T> >;
constexpr size_t vsize = tuple_size_v<simd<T>>;
if (di == 1) {
assert(vavail > 0);
constexpr int maxnpoints = deriv_order + 1 + vsize - 1;
const int npoints = deriv_order + 1 + min(int(vsize), vavail) - 1;
array<simd<T>, div_ceil(maxnpoints, int(vsize))> arrx;
for (int i = 0; i < maxnpoints; i += vsize) {
if (i < npoints) {
const simdl<T> mask1 = mask_for_loop_tail<simdl<T> >(i, npoints);
const simdl<T> mask1 = mask_for_loop_tail<simdl<T>>(i, npoints);
arrx[div_floor(i, int(vsize))] =
deriv1d(mask1, &var[i - deriv_order / 2], dj, dy);
}
Expand All @@ -177,7 +177,7 @@ deriv2_2d(const int vavail, const simdl<T> &mask, const T *restrict const var,
for (int j = -deriv_order / 2; j <= deriv_order / 2; ++j)
if (j == 0) {
#ifdef CCTK_DEBUG
arrx[deriv_order / 2 + j] = Arith::nan<simd<T> >()(); // unused
arrx[deriv_order / 2 + j] = Arith::nan<simd<T>>()(); // unused
#endif
} else {
arrx[deriv_order / 2 + j] = deriv1d(mask, &var[j * dj], di, dx);
Expand Down Expand Up @@ -233,10 +233,9 @@ deriv_upwind(const simdl<T> &mask, const GF3D2<const T> &gf_,
}

template <int dir1, int dir2, typename T, int D>
inline ARITH_INLINE
ARITH_DEVICE ARITH_HOST enable_if_t<(dir1 == dir2), simd<T> >
deriv2(const int vavail, const simdl<T> &mask, const GF3D2<const T> &gf_,
const vect<int, dim> &I, const vec<T, D> &dx) {
inline ARITH_INLINE ARITH_DEVICE ARITH_HOST enable_if_t<(dir1 == dir2), simd<T>>
deriv2(const int vavail, const simdl<T> &mask, const GF3D2<const T> &gf_,
const vect<int, dim> &I, const vec<T, D> &dx) {
static_assert(dir1 >= 0 && dir1 < D, "");
static_assert(dir2 >= 0 && dir2 < D, "");
const auto &DI = vect<int, dim>::unit;
Expand All @@ -245,10 +244,9 @@ inline ARITH_INLINE
}

template <int dir1, int dir2, typename T, int D>
inline ARITH_INLINE
ARITH_DEVICE ARITH_HOST enable_if_t<(dir1 != dir2), simd<T> >
deriv2(const int vavail, const simdl<T> &mask, const GF3D2<const T> &gf_,
const vect<int, dim> &I, const vec<T, D> &dx) {
inline ARITH_INLINE ARITH_DEVICE ARITH_HOST enable_if_t<(dir1 != dir2), simd<T>>
deriv2(const int vavail, const simdl<T> &mask, const GF3D2<const T> &gf_,
const vect<int, dim> &I, const vec<T, D> &dx) {
static_assert(dir1 >= 0 && dir1 < D, "");
static_assert(dir2 >= 0 && dir2 < D, "");
const auto &DI = vect<int, dim>::unit;
Expand Down
2 changes: 1 addition & 1 deletion Z4co/src/enforce.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ extern "C" void Z4co_Enforce(CCTK_ARGUMENTS) {
typedef simdl<CCTK_REAL> vbool;
constexpr size_t vsize = tuple_size_v<vreal>;

const auto delta3 = one<smat<vreal, 3> >()();
const auto delta3 = one<smat<vreal, 3>>()();

#ifdef __CUDACC__
const nvtxRangeId_t range = nvtxRangeStartA("Z4co_Enforce::enforce");
Expand Down
4 changes: 2 additions & 2 deletions Z4co/src/initial1.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ extern "C" void Z4co_Initial1(CCTK_ARGUMENTS) {
typedef simdl<CCTK_REAL> vbool;
constexpr size_t vsize = tuple_size_v<vreal>;

const auto delta3 = one<smat<vreal, 3> >()();
const auto delta3 = one<smat<vreal, 3>>()();

const Loop::GridDescBaseDevice grid(cctkGH);
#ifdef __CUDACC__
Expand All @@ -86,7 +86,7 @@ extern "C" void Z4co_Initial1(CCTK_ARGUMENTS) {
const GF3D2index index1(layout1, p.I);

// Load
const smat<vreal, 3> g = gf_g1(mask, index1, one<smat<int, 3> >()());
const smat<vreal, 3> g = gf_g1(mask, index1, one<smat<int, 3>>()());
const smat<vreal, 3> K = gf_K1(mask, index1);
const vreal alp = gf_alp1(mask, index1, 1);
const vec<vreal, 3> beta = gf_beta1(mask, index1);
Expand Down
2 changes: 1 addition & 1 deletion Z4co/src/initial2.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ extern "C" void Z4co_Initial2(CCTK_ARGUMENTS) {
typedef simdl<CCTK_REAL> vbool;
constexpr size_t vsize = tuple_size_v<vreal>;

const auto delta3 = one<smat<vreal, 3> >()();
const auto delta3 = one<smat<vreal, 3>>()();

const Loop::GridDescBaseDevice grid(cctkGH);
#ifdef __CUDACC__
Expand Down
11 changes: 11 additions & 0 deletions Z4co/src/make.code.defn
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# Main make.code.defn file for thorn Z4c

# Source files in this directory
SRCS = \
enforce.cxx \
initial1.cxx \
initial2.cxx \
rhs.cxx

# Subdirectories containing source files
SUBDIRS =
260 changes: 260 additions & 0 deletions Z4co/src/rhs.cxx
Original file line number Diff line number Diff line change
@@ -0,0 +1,260 @@
#include <cctk.h>

#ifdef __CUDACC__
// Disable CCTK_DEBUG since the debug information takes too much
// parameter space to launch the kernels
#ifdef CCTK_DEBUG
#undef CCTK_DEBUG
#endif
#endif

#include "derivs.hxx"
#include "physics.hxx"

#include <loop_device.hxx>
#include <mat.hxx>
#include <simd.hxx>
#include <vec.hxx>

#include <cctk.h>
#include <cctk_Arguments.h>
#include <cctk_Parameters.h>

#ifdef __CUDACC__
#include <nvToolsExt.h>
#endif

#include <cmath>

namespace Z4co {
using namespace Arith;
using namespace Loop;
using namespace std;

extern "C" void Z4co_RHS(CCTK_ARGUMENTS) {
DECLARE_CCTK_ARGUMENTS_Z4co_RHS;
DECLARE_CCTK_PARAMETERS;

for (int d = 0; d < 3; ++d)
if (cctk_nghostzones[d] < deriv_order / 2 + 1)
CCTK_VERROR("Need at least %d ghost zones", deriv_order / 2 + 1);

//

const array<int, dim> indextype = {0, 0, 0};
const array<int, dim> nghostzones = {cctk_nghostzones[0], cctk_nghostzones[1],
cctk_nghostzones[2]};
vect<int, dim> imin, imax;
GridDescBase(cctkGH).box_int<0, 0, 0>(nghostzones, imin, imax);
// Suffix 1: with ghost zones, suffix 0: without ghost zones
const GF3D2layout layout2(cctkGH, indextype);
const GF3D5layout layout5(imin, imax);

const GF3D2<const CCTK_REAL> gf_chi1(layout2, chi);

const smat<GF3D2<const CCTK_REAL>, 3> gf_gammat1{
GF3D2<const CCTK_REAL>(layout2, gammatxx),
GF3D2<const CCTK_REAL>(layout2, gammatxy),
GF3D2<const CCTK_REAL>(layout2, gammatxz),
GF3D2<const CCTK_REAL>(layout2, gammatyy),
GF3D2<const CCTK_REAL>(layout2, gammatyz),
GF3D2<const CCTK_REAL>(layout2, gammatzz)};

const GF3D2<const CCTK_REAL> gf_Kh1(layout2, Kh);

const smat<GF3D2<const CCTK_REAL>, 3> gf_At1{
GF3D2<const CCTK_REAL>(layout2, Atxx),
GF3D2<const CCTK_REAL>(layout2, Atxy),
GF3D2<const CCTK_REAL>(layout2, Atxz),
GF3D2<const CCTK_REAL>(layout2, Atyy),
GF3D2<const CCTK_REAL>(layout2, Atyz),
GF3D2<const CCTK_REAL>(layout2, Atzz)};

const vec<GF3D2<const CCTK_REAL>, 3> gf_Gamt1{
GF3D2<const CCTK_REAL>(layout2, Gamtx),
GF3D2<const CCTK_REAL>(layout2, Gamty),
GF3D2<const CCTK_REAL>(layout2, Gamtz)};

const GF3D2<const CCTK_REAL> gf_Theta1(layout2, Theta);

const GF3D2<const CCTK_REAL> gf_alphaG1(layout2, alphaG);

const vec<GF3D2<const CCTK_REAL>, 3> gf_betaG1{
GF3D2<const CCTK_REAL>(layout2, betaGx),
GF3D2<const CCTK_REAL>(layout2, betaGy),
GF3D2<const CCTK_REAL>(layout2, betaGz)};

//

// Ideas:
//
// - Outline certain functions, e.g. `det` or `raise_index`. Ensure
// they are called with floating-point arguments, not tensor
// indices.

const int ntmps = 154;
GF3D5vector<CCTK_REAL> tmps(layout5, ntmps);
int itmp = 0;

const auto make_gf = [&]() { return GF3D5<CCTK_REAL>(tmps(itmp++)); };
const auto make_vec = [&](const auto &f) {
return vec<result_of_t<decltype(f)()>, 3>([&](int) { return f(); });
};
const auto make_mat = [&](const auto &f) {
return smat<result_of_t<decltype(f)()>, 3>([&](int, int) { return f(); });
};
const auto make_vec_gf = [&]() { return make_vec(make_gf); };
const auto make_mat_gf = [&]() { return make_mat(make_gf); };
const auto make_vec_vec_gf = [&]() { return make_vec(make_vec_gf); };
const auto make_vec_mat_gf = [&]() { return make_vec(make_mat_gf); };
const auto make_mat_vec_gf = [&]() { return make_mat(make_vec_gf); };
const auto make_mat_mat_gf = [&]() { return make_mat(make_mat_gf); };

const GF3D5<CCTK_REAL> gf_chi0(make_gf());
const vec<GF3D5<CCTK_REAL>, 3> gf_dchi0(make_vec_gf());
const smat<GF3D5<CCTK_REAL>, 3> gf_ddchi0(make_mat_gf());
calc_derivs2(cctkGH, gf_chi1, gf_chi0, gf_dchi0, gf_ddchi0, layout5);

const smat<GF3D5<CCTK_REAL>, 3> gf_gammat0(make_mat_gf());
const smat<vec<GF3D5<CCTK_REAL>, 3>, 3> gf_dgammat0(make_mat_vec_gf());
const smat<smat<GF3D5<CCTK_REAL>, 3>, 3> gf_ddgammat0(make_mat_mat_gf());
calc_derivs2(cctkGH, gf_gammat1, gf_gammat0, gf_dgammat0, gf_ddgammat0,
layout5);

const GF3D5<CCTK_REAL> gf_Kh0(make_gf());
const vec<GF3D5<CCTK_REAL>, 3> gf_dKh0(make_vec_gf());
calc_derivs(cctkGH, gf_Kh1, gf_Kh0, gf_dKh0, layout5);

const smat<GF3D5<CCTK_REAL>, 3> gf_At0(make_mat_gf());
const smat<vec<GF3D5<CCTK_REAL>, 3>, 3> gf_dAt0(make_mat_vec_gf());
calc_derivs(cctkGH, gf_At1, gf_At0, gf_dAt0, layout5);

const vec<GF3D5<CCTK_REAL>, 3> gf_Gamt0(make_vec_gf());
const vec<vec<GF3D5<CCTK_REAL>, 3>, 3> gf_dGamt0(make_vec_vec_gf());
calc_derivs(cctkGH, gf_Gamt1, gf_Gamt0, gf_dGamt0, layout5);

const GF3D5<CCTK_REAL> gf_Theta0(make_gf());
const vec<GF3D5<CCTK_REAL>, 3> gf_dTheta0(make_vec_gf());
calc_derivs(cctkGH, gf_Theta1, gf_Theta0, gf_dTheta0, layout5);

const GF3D5<CCTK_REAL> gf_alphaG0(make_gf());
const vec<GF3D5<CCTK_REAL>, 3> gf_dalphaG0(make_vec_gf());
const smat<GF3D5<CCTK_REAL>, 3> gf_ddalphaG0(make_mat_gf());
calc_derivs2(cctkGH, gf_alphaG1, gf_alphaG0, gf_dalphaG0, gf_ddalphaG0,
layout5);

const vec<GF3D5<CCTK_REAL>, 3> gf_betaG0(make_vec_gf());
const vec<vec<GF3D5<CCTK_REAL>, 3>, 3> gf_dbetaG0(make_vec_vec_gf());
const vec<smat<GF3D5<CCTK_REAL>, 3>, 3> gf_ddbetaG0(make_vec_mat_gf());
calc_derivs2(cctkGH, gf_betaG1, gf_betaG0, gf_dbetaG0, gf_ddbetaG0, layout5);

if (itmp != ntmps)
CCTK_VERROR("Wrong number of temporary variables: ntmps=%d itmp=%d", ntmps,
itmp);
itmp = -1;

//

const GF3D2<const CCTK_REAL> gf_eTtt1(layout2, eTtt);

const vec<GF3D2<const CCTK_REAL>, 3> gf_eTti1{
GF3D2<const CCTK_REAL>(layout2, eTtx),
GF3D2<const CCTK_REAL>(layout2, eTty),
GF3D2<const CCTK_REAL>(layout2, eTtz)};

const smat<GF3D2<const CCTK_REAL>, 3> gf_eTij1{
GF3D2<const CCTK_REAL>(layout2, eTxx),
GF3D2<const CCTK_REAL>(layout2, eTxy),
GF3D2<const CCTK_REAL>(layout2, eTxz),
GF3D2<const CCTK_REAL>(layout2, eTyy),
GF3D2<const CCTK_REAL>(layout2, eTyz),
GF3D2<const CCTK_REAL>(layout2, eTzz)};

//

const GF3D2<CCTK_REAL> gf_chi_rhs1(layout2, chi_rhs);

const smat<GF3D2<CCTK_REAL>, 3> gf_gammat_rhs1{
GF3D2<CCTK_REAL>(layout2, gammatxx_rhs),
GF3D2<CCTK_REAL>(layout2, gammatxy_rhs),
GF3D2<CCTK_REAL>(layout2, gammatxz_rhs),
GF3D2<CCTK_REAL>(layout2, gammatyy_rhs),
GF3D2<CCTK_REAL>(layout2, gammatyz_rhs),
GF3D2<CCTK_REAL>(layout2, gammatzz_rhs)};

const GF3D2<CCTK_REAL> gf_Kh_rhs1(layout2, Kh_rhs);

const smat<GF3D2<CCTK_REAL>, 3> gf_At_rhs1{
GF3D2<CCTK_REAL>(layout2, Atxx_rhs), GF3D2<CCTK_REAL>(layout2, Atxy_rhs),
GF3D2<CCTK_REAL>(layout2, Atxz_rhs), GF3D2<CCTK_REAL>(layout2, Atyy_rhs),
GF3D2<CCTK_REAL>(layout2, Atyz_rhs), GF3D2<CCTK_REAL>(layout2, Atzz_rhs)};

const vec<GF3D2<CCTK_REAL>, 3> gf_Gamt_rhs1{
GF3D2<CCTK_REAL>(layout2, Gamtx_rhs),
GF3D2<CCTK_REAL>(layout2, Gamty_rhs),
GF3D2<CCTK_REAL>(layout2, Gamtz_rhs)};

const GF3D2<CCTK_REAL> gf_Theta_rhs1(layout2, Theta_rhs);

const GF3D2<CCTK_REAL> gf_alphaG_rhs1(layout2, alphaG_rhs);

const vec<GF3D2<CCTK_REAL>, 3> gf_betaG_rhs1{
GF3D2<CCTK_REAL>(layout2, betaGx_rhs),
GF3D2<CCTK_REAL>(layout2, betaGy_rhs),
GF3D2<CCTK_REAL>(layout2, betaGz_rhs)};

//

typedef simd<CCTK_REAL> vreal;
typedef simdl<CCTK_REAL> vbool;
constexpr size_t vsize = tuple_size_v<vreal>;

const Loop::GridDescBaseDevice grid(cctkGH);

#ifdef __CUDACC__
const nvtxRangeId_t range = nvtxRangeStartA("Z4co_RHS::rhs");
#endif
noinline([&]() __attribute__((__flatten__, __hot__)) {
grid.loop_int_device<0, 0, 0, vsize>(
grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
const GF3D2index index2(layout2, p.I);
const GF3D5index index5(layout5, p.I);

#include "../wolfram/Z4co_set_rhs.hxx"
});
});
#ifdef __CUDACC__
nvtxRangeEnd(range);
#endif

// Upwind and dissipation terms

// TODO: Consider fusing the loops to reduce memory bandwidth

apply_upwind_diss(cctkGH, gf_chi1, gf_betaG1, gf_chi_rhs1);

for (int a = 0; a < 3; ++a)
for (int b = a; b < 3; ++b)
apply_upwind_diss(cctkGH, gf_gammat1(a, b), gf_betaG1,
gf_gammat_rhs1(a, b));

apply_upwind_diss(cctkGH, gf_Kh1, gf_betaG1, gf_Kh_rhs1);

for (int a = 0; a < 3; ++a)
for (int b = a; b < 3; ++b)
apply_upwind_diss(cctkGH, gf_At1(a, b), gf_betaG1, gf_At_rhs1(a, b));

for (int a = 0; a < 3; ++a)
apply_upwind_diss(cctkGH, gf_Gamt1(a), gf_betaG1, gf_Gamt_rhs1(a));

if (!set_Theta_zero)
apply_upwind_diss(cctkGH, gf_Theta1, gf_betaG1, gf_Theta_rhs1);

apply_upwind_diss(cctkGH, gf_alphaG1, gf_betaG1, gf_alphaG_rhs1);

for (int a = 0; a < 3; ++a)
apply_upwind_diss(cctkGH, gf_betaG1(a), gf_betaG1, gf_betaG_rhs1(a));
}

} // namespace Z4co

0 comments on commit 97ed9c5

Please sign in to comment.