From df588a1b972820375621fb2524178371f05f6ec8 Mon Sep 17 00:00:00 2001 From: Liwei Ji Date: Mon, 26 Aug 2024 15:39:59 -0400 Subject: [PATCH] Z4co: use calc_derivs for Derivs --- Z4co/configuration.ccl | 2 +- Z4co/interface.ccl | 1 + Z4co/param.ccl | 7 + Z4co/src/constraint.cxx | 43 +++- Z4co/src/derivs.hxx | 492 ---------------------------------------- Z4co/src/diss.hxx | 79 +++++++ Z4co/src/initial2.cxx | 39 +++- Z4co/src/rhs.cxx | 45 +++- 8 files changed, 189 insertions(+), 519 deletions(-) delete mode 100644 Z4co/src/derivs.hxx create mode 100644 Z4co/src/diss.hxx diff --git a/Z4co/configuration.ccl b/Z4co/configuration.ccl index 81712b09..d1f5396c 100644 --- a/Z4co/configuration.ccl +++ b/Z4co/configuration.ccl @@ -1,3 +1,3 @@ # Configuration definitions for thorn Z4co -REQUIRES Arith Loop NewRadX +REQUIRES Arith Loop Derivs NewRadX diff --git a/Z4co/interface.ccl b/Z4co/interface.ccl index 644edce7..aacae7a1 100644 --- a/Z4co/interface.ccl +++ b/Z4co/interface.ccl @@ -14,6 +14,7 @@ USES INCLUDE HEADER: sum.hxx USES INCLUDE HEADER: vec.hxx USES INCLUDE HEADER: vect.hxx +USES INCLUDE HEADER: derivs.hxx USES INCLUDE HEADER: newradx.hxx diff --git a/Z4co/param.ccl b/Z4co/param.ccl index c00658c5..140d0775 100644 --- a/Z4co/param.ccl +++ b/Z4co/param.ccl @@ -1,5 +1,12 @@ # Parameter definitions for thorn Z4co +CCTK_INT deriv_order "Order of spatial finite differencing" STEERABLE=never +{ + 2 :: "Second order finite difference" + 4 :: "Fourth order finite difference" + 6 :: "Sixth order finite difference" +} 4 + BOOLEAN calc_ADM_vars "Calculate ADM variables" STEERABLE=recover { } yes diff --git a/Z4co/src/constraint.cxx b/Z4co/src/constraint.cxx index d5c2f65d..9f45a794 100644 --- a/Z4co/src/constraint.cxx +++ b/Z4co/src/constraint.cxx @@ -8,8 +8,7 @@ #endif #endif -#include "derivs.hxx" - +#include #include #include @@ -104,37 +103,60 @@ extern "C" void Z4co_Constraints(CCTK_ARGUMENTS) { const auto make_mat_vec_gf = [&]() { return make_mat(make_vec_gf); }; const auto make_mat_mat_gf = [&]() { return make_mat(make_mat_gf); }; + const Loop::GridDescBaseDevice grid(cctkGH); + + const vect dx(std::array{ + CCTK_DELTA_SPACE(0), + CCTK_DELTA_SPACE(1), + CCTK_DELTA_SPACE(2), + }); + + const auto calccopy = [&](const auto &gf, const auto &gf0) { + Derivs::calc_copy<0, 0, 0>(gf, layout5, grid, gf0); + }; + + const auto calcderivs = [&](const auto &gf, const auto &dgf, + const auto &gf0) { + Derivs::calc_derivs<0, 0, 0>(gf, dgf, layout5, grid, gf0, dx, deriv_order); + }; + + const auto calcderivs2 = [&](const auto &gf, const auto &dgf, + const auto &ddgf, const auto &gf0) { + Derivs::calc_derivs2<0, 0, 0>(gf, dgf, ddgf, layout5, grid, gf0, dx, + deriv_order); + }; + const GF3D5 tl_chi(make_gf()); const vec, 3> tl_dchi(make_vec_gf()); const smat, 3> tl_ddchi(make_mat_gf()); - calc_derivs2(cctkGH, gf_chi, tl_chi, tl_dchi, tl_ddchi, layout5); + calcderivs2(tl_chi, tl_dchi, tl_ddchi, gf_chi); const smat, 3> tl_gamt(make_mat_gf()); const smat, 3>, 3> tl_dgamt(make_mat_vec_gf()); const smat, 3>, 3> tl_ddgamt(make_mat_mat_gf()); - calc_derivs2(cctkGH, gf_gamt, tl_gamt, tl_dgamt, tl_ddgamt, layout5); + calcderivs2(tl_gamt, tl_dgamt, tl_ddgamt, gf_gamt); const GF3D5 tl_exKh(make_gf()); const vec, 3> tl_dexKh(make_vec_gf()); - calc_derivs(cctkGH, gf_exKh, tl_exKh, tl_dexKh, layout5); + calcderivs(tl_exKh, tl_dexKh, gf_exKh); const smat, 3> tl_exAt(make_mat_gf()); const smat, 3>, 3> tl_dexAt(make_mat_vec_gf()); - calc_derivs(cctkGH, gf_exAt, tl_exAt, tl_dexAt, layout5); + calcderivs(tl_exAt, tl_dexAt, gf_exAt); const vec, 3> tl_trGt(make_vec_gf()); const vec, 3>, 3> tl_dtrGt(make_vec_vec_gf()); - calc_derivs(cctkGH, gf_trGt, tl_trGt, tl_dtrGt, layout5); + calcderivs(tl_trGt, tl_dtrGt, gf_trGt); const GF3D5 tl_Theta(make_gf()); const vec, 3> tl_dTheta(make_vec_gf()); - calc_derivs(cctkGH, gf_Theta, tl_Theta, tl_dTheta, layout5); + calcderivs(tl_Theta, tl_dTheta, gf_Theta); const GF3D5 tl_alpha(make_gf()); - calc_copy(cctkGH, gf_alpha, tl_alpha, layout5); + calccopy(tl_alpha, gf_alpha); const vec, 3> tl_beta(make_vec_gf()); - calc_copy(cctkGH, gf_beta, tl_beta, layout5); + calccopy(tl_beta, gf_beta); if (itmp != ntmps) CCTK_VERROR("Wrong number of temporary variables: ntmps=%d itmp=%d", ntmps, @@ -178,7 +200,6 @@ extern "C" void Z4co_Constraints(CCTK_ARGUMENTS) { const CCTK_REAL cpi = acos(-1.0); - const Loop::GridDescBaseDevice grid(cctkGH); #ifdef __CUDACC__ const nvtxRangeId_t range = nvtxRangeStartA("Z4co_Constraints::constraints"); #endif diff --git a/Z4co/src/derivs.hxx b/Z4co/src/derivs.hxx deleted file mode 100644 index f90be6ca..00000000 --- a/Z4co/src/derivs.hxx +++ /dev/null @@ -1,492 +0,0 @@ -#ifndef Z4CO_DERIVS_HXX -#define Z4CO_DERIVS_HXX - -#include -#include -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -namespace Z4co { -using namespace Arith; -using namespace Loop; -using namespace std; - -//////////////////////////////////////////////////////////////////////////////// - -constexpr int deriv_order = 4; - -//////////////////////////////////////////////////////////////////////////////// - -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd -deriv1d(const simdl &mask, const T *restrict const var, const ptrdiff_t di, - const T dx) { - if constexpr (deriv_order == 2) - return -1 / T(2) * - (maskz_loadu(mask, &var[-di]) - maskz_loadu(mask, &var[+di])) / dx; - if constexpr (deriv_order == 4) - return (1 / T(12) * - (maskz_loadu(mask, &var[-2 * di]) - - maskz_loadu(mask, &var[+2 * di])) // - - - 2 / T(3) * - (maskz_loadu(mask, &var[-di]) - maskz_loadu(mask, &var[+di]))) / - dx; -} - -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd -deriv1d_upwind(const simdl &mask, const T *restrict const var, - const ptrdiff_t di, const simd &vel, const T dx) { - // arXiv:1111.2177 [gr-qc], (71) - if constexpr (deriv_order == 2) { - // if (sign) - // // + [ 0 -1 +1 0 0] - // // + 1/2 [+1 -2 +1 0 0] - // // [+1/2 -2 +3/2 0 0] - // return (1 / T(2) * var[-2 * di] // - // - 2 * var[-di] // - // + 3 / T(2) * var[0]) / - // dx; - // else - // // + [ 0 0 -1 +1 0 ] - // // - 1/2 [ 0 0 +1 -2 +1 ] - // // [ 0 0 -3/2 +2 -1/2] - // return (-3 / T(2) * var[0] // - // + 2 * var[+di] // - // - 1 / T(2) * var[+2 * di]) / - // dx; - - // + 1/2 [+1/2 -2 +3/2 0 0 ] - // + 1/2 [ 0 0 -3/2 +2 -1/2] - // [+1/4 -1 0 +1 -1/4] - const simd symm = - 1 / T(4) * - (maskz_loadu(mask, &var[-2 * di]) - - maskz_loadu(mask, &var[2 * di])) // - - (maskz_loadu(mask, &var[-di]) - maskz_loadu(mask, &var[di])); - // + 1/2 [+1/2 -2 +3/2 0 0 ] - // - 1/2 [ 0 0 -3/2 +2 -1/2] - // [+1/4 -1 +3/2 -1 +1/4] - const simd anti = - 1 / T(4) * - (maskz_loadu(mask, &var[-2 * di]) + - maskz_loadu(mask, &var[2 * di])) // - - (maskz_loadu(mask, &var[-di]) + maskz_loadu(mask, &var[di])) // - + 3 / T(2) * maskz_loadu(mask, &var[0]); - return (vel * symm - fabs(vel) * anti) / dx; - } - if constexpr (deriv_order == 4) { - // A fourth order stencil for a first derivative, shifted by one grid point - - // if (sign) - // return (-1 / T(12) * var[-3 * di] // - // + 1 / T(2) * var[-2 * di] // - // - 3 / T(2) * var[-di] // - // + 5 / T(6) * var[0] // - // + 1 / T(4) * var[+di]) / - // dx; - // else - // return (-1 / T(4) * var[-di] // - // - 5 / T(6) * var[0] // - // + 3 / T(2) * var[+di] // - // - 1 / T(2) * var[+2 * di] // - // + 1 / T(12) * var[+3 * di]) / - // dx; - - const simd symm = - -1 / T(24) * - (maskz_loadu(mask, &var[-3 * di]) - - maskz_loadu(mask, &var[3 * di])) // - + 1 / T(4) * - (maskz_loadu(mask, &var[-2 * di]) - - maskz_loadu(mask, &var[2 * di])) // - - - 7 / T(8) * (maskz_loadu(mask, &var[-di]) - maskz_loadu(mask, &var[di])); - const simd anti = - -1 / T(24) * - (maskz_loadu(mask, &var[-3 * di]) + - maskz_loadu(mask, &var[3 * di])) // - + 1 / T(4) * - (maskz_loadu(mask, &var[-2 * di]) + - maskz_loadu(mask, &var[2 * di])) // - - 5 / T(8) * - (maskz_loadu(mask, &var[-di]) + maskz_loadu(mask, &var[di])) // - + 5 / T(6) * maskz_loadu(mask, &var[0]); - return (vel * symm - fabs(vel) * anti) / dx; - } -} - -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd -deriv2_1d(const simdl &mask, const T *restrict const var, const ptrdiff_t di, - const T dx) { - if constexpr (deriv_order == 2) - return ((maskz_loadu(mask, &var[-di]) + maskz_loadu(mask, &var[+di])) // - - 2 * maskz_loadu(mask, &var[0])) / - pow2(dx); - if constexpr (deriv_order == 4) - return (-1 / T(12) * - (maskz_loadu(mask, &var[-2 * di]) + - maskz_loadu(mask, &var[+2 * di])) // - + - 4 / T(3) * - (maskz_loadu(mask, &var[-di]) + maskz_loadu(mask, &var[+di])) // - - 5 / T(2) * maskz_loadu(mask, &var[0])) / - pow2(dx); -} - -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd -deriv2_2d(const int vavail, const simdl &mask, const T *restrict const var, - const ptrdiff_t di, const ptrdiff_t dj, const T dx, const T dy) { - constexpr size_t vsize = tuple_size_v>; - if (di == 1) { - assert(vavail > 0); - constexpr int maxnpoints = deriv_order + 1 + vsize - 1; - const int npoints = deriv_order + 1 + min(int(vsize), vavail) - 1; - array, div_ceil(maxnpoints, int(vsize))> arrx; - for (int i = 0; i < maxnpoints; i += vsize) { - if (i < npoints) { - const simdl mask1 = mask_for_loop_tail>(i, npoints); - arrx[div_floor(i, int(vsize))] = - deriv1d(mask1, &var[i - deriv_order / 2], dj, dy); - } - } -#ifdef CCTK_DEBUG - for (int i = npoints; i < align_ceil(maxnpoints, int(vsize)); ++i) - ((T *)&arrx[0])[i] = Arith::nan()(); // unused -#endif - const T *const varx = (T *)&arrx[0] + deriv_order / 2; - return deriv1d(mask, varx, 1, dx); - } else { - assert(dj != 1); - array, deriv_order + 1> arrx; - for (int j = -deriv_order / 2; j <= deriv_order / 2; ++j) - if (j == 0) { -#ifdef CCTK_DEBUG - arrx[deriv_order / 2 + j] = Arith::nan>()(); // unused -#endif - } else { - arrx[deriv_order / 2 + j] = deriv1d(mask, &var[j * dj], di, dx); - } - const T *const varx = (T *)(&arrx[deriv_order / 2]); - return deriv1d(mask, varx, vsize, dy); - } -} - -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd -deriv1d_diss(const simdl &mask, const T *restrict const var, - const ptrdiff_t di, const T dx) { - if constexpr (deriv_order == 2) - return ((maskz_loadu(mask, &var[-2 * di]) + - maskz_loadu(mask, &var[+2 * di])) // - - - 4 * (maskz_loadu(mask, &var[-di]) + maskz_loadu(mask, &var[+di])) // - + 6 * maskz_loadu(mask, &var[0])) / - dx; - if constexpr (deriv_order == 4) - return ((maskz_loadu(mask, &var[-3 * di]) + - maskz_loadu(mask, &var[+3 * di])) // - - 6 * (maskz_loadu(mask, &var[-2 * di]) + - maskz_loadu(mask, &var[+2 * di])) // - + 15 * (maskz_loadu(mask, &var[-di]) + - maskz_loadu(mask, &var[+di])) // - - 20 * maskz_loadu(mask, &var[0])) / - dx; -} - -//////////////////////////////////////////////////////////////////////////////// - -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd -deriv(const simdl &mask, const GF3D2 &gf_, const vect &I, - const vec &dx) { - static_assert(dir >= 0 && dir < D, ""); - const auto &DI = vect::unit; - const ptrdiff_t di = gf_.delta(DI(dir)); - return deriv1d(mask, &gf_(I), di, dx(dir)); -} - -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd -deriv_upwind(const simdl &mask, const GF3D2 &gf_, - const vect &I, const vec, D> &vel, - const vec &dx) { - static_assert(dir >= 0 && dir < D, ""); - const auto &DI = vect::unit; - const ptrdiff_t di = gf_.delta(DI(dir)); - return deriv1d_upwind(mask, &gf_(I), di, vel(dir), dx(dir)); -} - -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST enable_if_t<(dir1 == dir2), simd> -deriv2(const int vavail, const simdl &mask, const GF3D2 &gf_, - const vect &I, const vec &dx) { - static_assert(dir1 >= 0 && dir1 < D, ""); - static_assert(dir2 >= 0 && dir2 < D, ""); - const auto &DI = vect::unit; - const ptrdiff_t di = gf_.delta(DI(dir1)); - return deriv2_1d(mask, &gf_(I), di, dx(dir1)); -} - -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST enable_if_t<(dir1 != dir2), simd> -deriv2(const int vavail, const simdl &mask, const GF3D2 &gf_, - const vect &I, const vec &dx) { - static_assert(dir1 >= 0 && dir1 < D, ""); - static_assert(dir2 >= 0 && dir2 < D, ""); - const auto &DI = vect::unit; - const ptrdiff_t di = gf_.delta(DI(dir1)); - const ptrdiff_t dj = gf_.delta(DI(dir2)); - return deriv2_2d(vavail, mask, &gf_(I), di, dj, dx(dir1), dx(dir2)); -} - -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd -deriv_diss(const simdl &mask, const GF3D2 &gf_, - const vect &I, const vec &dx) { - static_assert(dir >= 0 && dir < D, ""); - const auto &DI = vect::unit; - const ptrdiff_t di = gf_.delta(DI(dir)); - return deriv1d_diss(mask, &gf_(I), di, dx(dir)); -} - -//////////////////////////////////////////////////////////////////////////////// - -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST vec, dim> -deriv(const simdl &mask, const GF3D2 &gf_, const vect &I, - const vec &dx) { - return {deriv<0>(mask, gf_, I, dx), deriv<1>(mask, gf_, I, dx), - deriv<2>(mask, gf_, I, dx)}; -} - -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd -deriv_upwind(const simdl &mask, const GF3D2 &gf_, - const vect &I, const vec, dim> &vel, - const vec &dx) { - return deriv_upwind<0>(mask, gf_, I, vel, dx) + - deriv_upwind<1>(mask, gf_, I, vel, dx) + - deriv_upwind<2>(mask, gf_, I, vel, dx); -} - -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST smat, dim> -deriv2(const int vavail, const simdl &mask, const GF3D2 &gf_, - const vect &I, const vec &dx) { - return {deriv2<0, 0>(vavail, mask, gf_, I, dx), - deriv2<0, 1>(vavail, mask, gf_, I, dx), - deriv2<0, 2>(vavail, mask, gf_, I, dx), - deriv2<1, 1>(vavail, mask, gf_, I, dx), - deriv2<1, 2>(vavail, mask, gf_, I, dx), - deriv2<2, 2>(vavail, mask, gf_, I, dx)}; -} - -template -inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd -diss(const simdl &mask, const GF3D2 &gf_, const vect &I, - const vec &dx) { - // arXiv:gr-qc/0610128, (63), with r=2 - constexpr int diss_order = deriv_order + 2; - constexpr int sign = diss_order % 4 == 0 ? -1 : +1; - return sign / T(pown(2, deriv_order + 2)) * - (deriv_diss<0>(mask, gf_, I, dx) // - + deriv_diss<1>(mask, gf_, I, dx) // - + deriv_diss<2>(mask, gf_, I, dx)); -} - -//////////////////////////////////////////////////////////////////////////////// - -template -CCTK_ATTRIBUTE_NOINLINE void -calc_copy(const cGH *restrict const cctkGH, const GF3D2 &gf1, - const GF3D5 &gf0, const GF3D5layout &layout0) { - DECLARE_CCTK_ARGUMENTS; - - typedef simd vreal; - typedef simdl vbool; - constexpr size_t vsize = tuple_size_v; - - const Loop::GridDescBaseDevice grid(cctkGH); - grid.loop_int_device<0, 0, 0, vsize>( - grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { - const vbool mask = mask_for_loop_tail(p.i, p.imax); - const GF3D5index index0(layout0, p.I); - const auto val = gf1(mask, p.I); - gf0.store(mask, index0, val); - }); -} - -template -CCTK_ATTRIBUTE_NOINLINE void -calc_derivs(const cGH *restrict const cctkGH, const GF3D2 &gf1, - const GF3D5 &gf0, const vec, dim> &dgf0, - const GF3D5layout &layout0) { - DECLARE_CCTK_ARGUMENTS; - - typedef simd vreal; - typedef simdl vbool; - constexpr size_t vsize = tuple_size_v; - - const vec dx([&](int a) { return CCTK_DELTA_SPACE(a); }); - - const Loop::GridDescBaseDevice grid(cctkGH); - grid.loop_int_device<0, 0, 0, vsize>( - grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { - const vbool mask = mask_for_loop_tail(p.i, p.imax); - const GF3D5index index0(layout0, p.I); - const auto val = gf1(mask, p.I); - gf0.store(mask, index0, val); - const auto dval = deriv(mask, gf1, p.I, dx); - dgf0.store(mask, index0, dval); - }); -} - -template -CCTK_ATTRIBUTE_NOINLINE void -calc_derivs2(const cGH *restrict const cctkGH, const GF3D2 &gf1, - const GF3D5 &gf0, const vec, dim> &dgf0, - const smat, dim> &ddgf0, const GF3D5layout &layout0) { - DECLARE_CCTK_ARGUMENTS; - - typedef simd vreal; - typedef simdl vbool; - constexpr size_t vsize = tuple_size_v; - - const vec dx([&](int a) { return CCTK_DELTA_SPACE(a); }); - - const Loop::GridDescBaseDevice grid(cctkGH); - grid.loop_int_device<0, 0, 0, vsize>( - grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { - const vbool mask = mask_for_loop_tail(p.i, p.imax); - const int vavail = p.imax - p.i; - const GF3D5index index0(layout0, p.I); - const auto val = gf1(mask, p.I); - gf0.store(mask, index0, val); - const auto dval = deriv(mask, gf1, p.I, dx); - dgf0.store(mask, index0, dval); - const auto ddval = deriv2(vavail, mask, gf1, p.I, dx); - ddgf0.store(mask, index0, ddval); - }); -} - -template -CCTK_ATTRIBUTE_NOINLINE void calc_copy(const cGH *restrict const cctkGH, - const vec, dim> &gf0_, - const vec, dim> &gf_, - const GF3D5layout &layout) { - for (int a = 0; a < 3; ++a) - calc_copy(cctkGH, gf0_(a), gf_(a), layout); -} - -template -CCTK_ATTRIBUTE_NOINLINE void -calc_derivs(const cGH *restrict const cctkGH, - const vec, dim> &gf0_, const vec, dim> &gf_, - const vec, dim>, dim> &dgf_, - const GF3D5layout &layout) { - for (int a = 0; a < 3; ++a) - calc_derivs(cctkGH, gf0_(a), gf_(a), dgf_(a), layout); -} - -template -CCTK_ATTRIBUTE_NOINLINE void calc_derivs2( - const cGH *restrict const cctkGH, const vec, dim> &gf0_, - const vec, dim> &gf_, const vec, dim>, dim> &dgf_, - const vec, dim>, dim> &ddgf_, const GF3D5layout &layout) { - for (int a = 0; a < 3; ++a) - calc_derivs2(cctkGH, gf0_(a), gf_(a), dgf_(a), ddgf_(a), layout); -} - -template -CCTK_ATTRIBUTE_NOINLINE void calc_copy(const cGH *restrict const cctkGH, - const smat, dim> &gf0_, - const smat, dim> &gf_, - const GF3D5layout &layout) { - for (int a = 0; a < 3; ++a) - for (int b = a; b < 3; ++b) - calc_copy(cctkGH, gf0_(a, b), gf_(a, b), layout); -} - -template -CCTK_ATTRIBUTE_NOINLINE void calc_derivs( - const cGH *restrict const cctkGH, const smat, dim> &gf0_, - const smat, dim> &gf_, const smat, dim>, dim> &dgf_, - const GF3D5layout &layout) { - for (int a = 0; a < 3; ++a) - for (int b = a; b < 3; ++b) - calc_derivs(cctkGH, gf0_(a, b), gf_(a, b), dgf_(a, b), layout); -} - -template -CCTK_ATTRIBUTE_NOINLINE void calc_derivs2( - const cGH *restrict const cctkGH, const smat, dim> &gf0_, - const smat, dim> &gf_, const smat, dim>, dim> &dgf_, - const smat, dim>, dim> &ddgf_, const GF3D5layout &layout) { - for (int a = 0; a < 3; ++a) - for (int b = a; b < 3; ++b) - calc_derivs2(cctkGH, gf0_(a, b), gf_(a, b), dgf_(a, b), ddgf_(a, b), - layout); -} - -template -CCTK_ATTRIBUTE_NOINLINE void -apply_upwind_diss(const cGH *restrict const cctkGH, const GF3D2 &gf_, - const vec, dim> &gf_betaG_, - const GF3D2 &gf_rhs_) { - DECLARE_CCTK_ARGUMENTS; - DECLARE_CCTK_PARAMETERS; - - const vec dx([&](int a) { return CCTK_DELTA_SPACE(a); }); - - typedef simd vreal; - typedef simdl vbool; - constexpr size_t vsize = tuple_size_v; - - if (epsdiss == 0) { - - const Loop::GridDescBaseDevice grid(cctkGH); - grid.loop_int_device<0, 0, 0, vsize>( - grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { - const vbool mask = mask_for_loop_tail(p.i, p.imax); - const vec betaG = gf_betaG_(mask, p.I); - const vreal rhs_old = gf_rhs_(mask, p.I); - const vreal rhs_new = - rhs_old + deriv_upwind(mask, gf_, p.I, betaG, dx); - gf_rhs_.store(mask, p.I, rhs_new); - }); - - } else { - - const Loop::GridDescBaseDevice grid(cctkGH); - grid.loop_int_device<0, 0, 0, vsize>( - grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { - const vbool mask = mask_for_loop_tail(p.i, p.imax); - const vec betaG = gf_betaG_(mask, p.I); - const vreal rhs_old = gf_rhs_(mask, p.I); - const vreal rhs_new = rhs_old + - deriv_upwind(mask, gf_, p.I, betaG, dx) + - epsdiss * diss(mask, gf_, p.I, dx); - gf_rhs_.store(mask, p.I, rhs_new); - }); - } -} - -} // namespace Z4co - -#endif // #ifndef Z4CO_DERIVS_HXX diff --git a/Z4co/src/diss.hxx b/Z4co/src/diss.hxx new file mode 100644 index 00000000..c984a477 --- /dev/null +++ b/Z4co/src/diss.hxx @@ -0,0 +1,79 @@ +#ifndef Z4CO_DERIVS_HXX +#define Z4CO_DERIVS_HXX + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace Z4co { +using namespace Arith; +using namespace Loop; +using namespace std; + +constexpr int deriv_o = 4; + +template +CCTK_ATTRIBUTE_NOINLINE void +apply_upwind_diss(const cGH *restrict const cctkGH, const GF3D2 &gf_, + const vec, dim> &gf_betaG_, + const GF3D2 &gf_rhs_) { + DECLARE_CCTK_ARGUMENTS; + DECLARE_CCTK_PARAMETERS; + + const vect dx{ + CCTK_DELTA_SPACE(0), + CCTK_DELTA_SPACE(1), + CCTK_DELTA_SPACE(2), + }; + + typedef simd vreal; + typedef simdl vbool; + constexpr size_t vsize = tuple_size_v; + + if (epsdiss == 0) { + + const Loop::GridDescBaseDevice grid(cctkGH); + grid.loop_int_device<0, 0, 0, vsize>( + grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { + const vbool mask = mask_for_loop_tail(p.i, p.imax); + const vec betaG = gf_betaG_(mask, p.I); + const vreal rhs_old = gf_rhs_(mask, p.I); + const vreal rhs_new = rhs_old + Derivs::calc_deriv_upwind( + gf_, mask, p.I, dx, betaG); + gf_rhs_.store(mask, p.I, rhs_new); + }); + + } else { + + const Loop::GridDescBaseDevice grid(cctkGH); + grid.loop_int_device<0, 0, 0, vsize>( + grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { + const vbool mask = mask_for_loop_tail(p.i, p.imax); + const vec betaG = gf_betaG_(mask, p.I); + const vreal rhs_old = gf_rhs_(mask, p.I); + const vreal rhs_new = + rhs_old + + Derivs::calc_deriv_upwind(gf_, mask, p.I, dx, betaG) + + epsdiss * Derivs::calc_diss(gf_, mask, p.I, dx); + gf_rhs_.store(mask, p.I, rhs_new); + }); + } +} + +} // namespace Z4co + +#endif // #ifndef Z4CO_DERIVS_HXX diff --git a/Z4co/src/initial2.cxx b/Z4co/src/initial2.cxx index 74a2fa12..464b11cb 100644 --- a/Z4co/src/initial2.cxx +++ b/Z4co/src/initial2.cxx @@ -1,5 +1,4 @@ -#include "derivs.hxx" - +#include #include #include @@ -40,13 +39,45 @@ calc_gamma(const smat &gu, const vec, D> &Gammal) { extern "C" void Z4co_Initial2(CCTK_ARGUMENTS) { DECLARE_CCTK_ARGUMENTS_Z4co_Initial2; + DECLARE_CCTK_PARAMETERS; - const vec dx{ + const vect dx{ CCTK_DELTA_SPACE(0), CCTK_DELTA_SPACE(1), CCTK_DELTA_SPACE(2), }; + function, dim>( + const GF3D2 &, const simdl &, + const vect &, const vect &)> + calc_deriv; + + switch (deriv_order) { + case 2: + calc_deriv = [](const GF3D2 &gf, + const simdl &mask, const vect &I, + const vect &dx) { + return Derivs::calc_deriv<2>(gf, mask, I, dx); + }; + break; + case 4: + calc_deriv = [](const GF3D2 &gf, + const simdl &mask, const vect &I, + const vect &dx) { + return Derivs::calc_deriv<4>(gf, mask, I, dx); + }; + break; + case 6: + calc_deriv = [](const GF3D2 &gf, + const simdl &mask, const vect &I, + const vect &dx) { + return Derivs::calc_deriv<6>(gf, mask, I, dx); + }; + break; + default: + assert(0); + } + const array indextype = {0, 0, 0}; const GF3D2layout layout1(cctkGH, indextype); @@ -82,7 +113,7 @@ extern "C" void Z4co_Initial2(CCTK_ARGUMENTS) { const smat gammatu = calc_inv(gammat, vreal(1)); const smat, 3> dgammat([&](int a, int b) { - return deriv(mask, gf_gammat1(a, b), p.I, dx); + return calc_deriv(gf_gammat1(a, b), mask, p.I, dx); }); const vec, 3> Gammatl = calc_gammal(dgammat); diff --git a/Z4co/src/rhs.cxx b/Z4co/src/rhs.cxx index 2386abe8..d015d85f 100644 --- a/Z4co/src/rhs.cxx +++ b/Z4co/src/rhs.cxx @@ -10,8 +10,9 @@ // #define Power(x, y) (Arith::pown((x), (y))) -#include "derivs.hxx" +#include "diss.hxx" +#include #include #include #include @@ -112,37 +113,61 @@ extern "C" void Z4co_RHS(CCTK_ARGUMENTS) { const GF3D5 tl_chi(make_gf()); const vec, 3> tl_dchi(make_vec_gf()); const smat, 3> tl_ddchi(make_mat_gf()); - calc_derivs2(cctkGH, gf_chi, tl_chi, tl_dchi, tl_ddchi, layout5); + + const Loop::GridDescBaseDevice grid(cctkGH); + + const vect dx(std::array{ + CCTK_DELTA_SPACE(0), + CCTK_DELTA_SPACE(1), + CCTK_DELTA_SPACE(2), + }); + + const auto calccopy = [&](const auto &gf, const auto &gf0) { + Derivs::calc_copy<0, 0, 0>(gf, layout5, grid, gf0); + }; + + const auto calcderivs = [&](const auto &gf, const auto &dgf, + const auto &gf0) { + Derivs::calc_derivs<0, 0, 0>(gf, dgf, layout5, grid, gf0, dx, deriv_order); + }; + + const auto calcderivs2 = [&](const auto &gf, const auto &dgf, + const auto &ddgf, const auto &gf0) { + Derivs::calc_derivs2<0, 0, 0>(gf, dgf, ddgf, layout5, grid, gf0, dx, + deriv_order); + }; + + calcderivs2(tl_chi, tl_dchi, tl_ddchi, gf_chi); const smat, 3> tl_gamt(make_mat_gf()); const smat, 3>, 3> tl_dgamt(make_mat_vec_gf()); const smat, 3>, 3> tl_ddgamt(make_mat_mat_gf()); - calc_derivs2(cctkGH, gf_gamt, tl_gamt, tl_dgamt, tl_ddgamt, layout5); + calcderivs2(tl_gamt, tl_dgamt, tl_ddgamt, gf_gamt); const GF3D5 tl_exKh(make_gf()); const vec, 3> tl_dexKh(make_vec_gf()); - calc_derivs(cctkGH, gf_exKh, tl_exKh, tl_dexKh, layout5); + calcderivs(tl_exKh, tl_dexKh, gf_exKh); const smat, 3> tl_exAt(make_mat_gf()); - calc_copy(cctkGH, gf_exAt, tl_exAt, layout5); + calccopy(tl_exAt, gf_exAt); const vec, 3> tl_trGt(make_vec_gf()); const vec, 3>, 3> tl_dtrGt(make_vec_vec_gf()); - calc_derivs(cctkGH, gf_trGt, tl_trGt, tl_dtrGt, layout5); + calcderivs(tl_trGt, tl_dtrGt, gf_trGt); const GF3D5 tl_Theta(make_gf()); const vec, 3> tl_dTheta(make_vec_gf()); - calc_derivs(cctkGH, gf_Theta, tl_Theta, tl_dTheta, layout5); + calcderivs(tl_Theta, tl_dTheta, gf_Theta); const GF3D5 tl_alpha(make_gf()); const vec, 3> tl_dalpha(make_vec_gf()); const smat, 3> tl_ddalpha(make_mat_gf()); - calc_derivs2(cctkGH, gf_alpha, tl_alpha, tl_dalpha, tl_ddalpha, layout5); + calcderivs2(tl_alpha, tl_dalpha, tl_ddalpha, gf_alpha); const vec, 3> tl_beta(make_vec_gf()); const vec, 3>, 3> tl_dbeta(make_vec_vec_gf()); const vec, 3>, 3> tl_ddbeta(make_vec_mat_gf()); - calc_derivs2(cctkGH, gf_beta, tl_beta, tl_dbeta, tl_ddbeta, layout5); + calcderivs2(tl_beta, tl_dbeta, tl_ddbeta, gf_beta); if (itmp != ntmps) CCTK_VERROR("Wrong number of temporary variables: ntmps=%d itmp=%d", ntmps, @@ -211,8 +236,6 @@ extern "C" void Z4co_RHS(CCTK_ARGUMENTS) { const CCTK_REAL ceta = eta; // Loop - const Loop::GridDescBaseDevice grid(cctkGH); - #ifdef __CUDACC__ const nvtxRangeId_t range = nvtxRangeStartA("Z4co_RHS::rhs"); #endif