From cd182c8697381dedf463406e683b6a2a352f29b8 Mon Sep 17 00:00:00 2001 From: Liwei Ji Date: Tue, 2 Jul 2024 12:13:33 -0400 Subject: [PATCH] Z4co: copy some tools from Z4c --- Z4co/schedule.ccl | 138 +------------ Z4co/src/derivs.hxx | 455 ++++++++++++++++++++++++++++++++++++++++++ Z4co/src/enforce.cxx | 130 ++++++++++++ Z4co/src/initial1.cxx | 133 ++++++++++++ Z4co/src/initial2.cxx | 86 ++++++++ Z4co/src/physics.hxx | 99 +++++++++ 6 files changed, 912 insertions(+), 129 deletions(-) create mode 100644 Z4co/src/derivs.hxx create mode 100644 Z4co/src/enforce.cxx create mode 100644 Z4co/src/initial1.cxx create mode 100644 Z4co/src/initial2.cxx create mode 100644 Z4co/src/physics.hxx diff --git a/Z4co/schedule.ccl b/Z4co/schedule.ccl index 9788366c..208f0692 100644 --- a/Z4co/schedule.ccl +++ b/Z4co/schedule.ccl @@ -24,10 +24,12 @@ STORAGE: alphaG_rhs STORAGE: betaG_rhs - ################################################################################ - - +# We have 4 schedule groups: +# 1. initial: set up core Z4c variables from ADM variables +# 2. poststep: post-process core Z4c variables and calculate other variables +# 3. analysis: calculate constraints etc. +# 4. rhs: calculate RHS of Z4c variables SCHEDULE GROUP Z4co_InitialGroup AT initial AFTER ADMBaseX_PostInitial { @@ -41,8 +43,6 @@ SCHEDULE GROUP Z4co_PostStepGroup2 AT initial AFTER (TmunuBaseX_SetTmunuVars, Z4 { } "Post-process Z4c variables, part 2" - - SCHEDULE GROUP Z4co_PostStepGroup AT postregrid BEFORE ADMBaseX_SetADMVars { } "Post-process Z4c variables" @@ -51,14 +51,6 @@ SCHEDULE GROUP Z4co_PostStepGroup2 AT postregrid AFTER (TmunuBaseX_SetTmunuVars, { } "Post-process Z4c variables, part 2" - - -SCHEDULE GROUP Z4co_AnalysisGroup AT analysis -{ -} "Analyse Z4c variables" - - - SCHEDULE GROUP Z4co_PostStepGroup IN ODESolvers_PostStep BEFORE ADMBaseX_SetADMVars { } "Post-process Z4c variables" @@ -67,32 +59,17 @@ SCHEDULE GROUP Z4co_PostStepGroup2 IN ODESolvers_PostStep AFTER (TmunuBaseX_SetT { } "Post-process Z4c variables, part 2" +SCHEDULE GROUP Z4co_AnalysisGroup AT analysis +{ +} "Analyse Z4c variables" + SCHEDULE GROUP Z4co_RHSGroup IN ODESolvers_RHS { } "Calculate Z4c RHS" - - ################################################################################ - -SCHEDULE Z4co_Test AT wragh -{ - LANG: C - OPTIONS: meta -} "Self-test" - - - -# We have 4 schedule groups: -# 1. initial: set up core Z4c variables from ADM variables -# 2. poststep: post-process core Z4c variables and calculate other variables -# 3. analysis: calculate constraints etc. -# 4. rhs: calculate RHS of Z4c variables - - - SCHEDULE Z4co_Initial1 IN Z4co_InitialGroup { LANG: C @@ -107,13 +84,7 @@ SCHEDULE Z4co_Initial1 IN Z4co_InitialGroup WRITES: Theta(interior) WRITES: alphaG(interior) WRITES: betaG(interior) - # SYNC: chi SYNC: gamma_tilde - # SYNC: K_hat - # SYNC: A_tilde - # SYNC: Theta - # SYNC: alphaG - # SYNC: betaG } "Convert ADM to Z4c variables, part 1" SCHEDULE Z4co_Initial2 IN Z4co_InitialGroup AFTER Z4co_Initial1 @@ -121,11 +92,8 @@ SCHEDULE Z4co_Initial2 IN Z4co_InitialGroup AFTER Z4co_Initial1 LANG: C READS: gamma_tilde(everywhere) WRITES: Gam_tilde(interior) - # SYNC: Gam_tilde } "Convert ADM to Z4c variables, part 2" - - SCHEDULE Z4co_Enforce IN Z4co_PostStepGroup { LANG: C @@ -159,9 +127,6 @@ if (calc_ADM_vars) { READS: Theta(everywhere) READS: alphaG(everywhere) READS: betaG(everywhere) - # READS: TmunuBaseX::eTtt(interior) - # READS: TmunuBaseX::eTti(interior) - # READS: TmunuBaseX::eTij(interior) WRITES: ADMBaseX::metric(everywhere) WRITES: ADMBaseX::curv(everywhere) WRITES: ADMBaseX::lapse(everywhere) @@ -171,60 +136,6 @@ if (calc_ADM_vars) { } "Convert Z4c to ADM variables" } -if (calc_ADMRHS_vars) { - SCHEDULE Z4co_ADM2 IN Z4co_PostStepGroup2 - { - LANG: C - READS: chi(everywhere) - READS: gamma_tilde(everywhere) - READS: K_hat(everywhere) - READS: A_tilde(everywhere) - READS: Gam_tilde(everywhere) - READS: Theta(everywhere) - READS: alphaG(everywhere) - READS: betaG(everywhere) - READS: TmunuBaseX::eTtt(interior) - READS: TmunuBaseX::eTti(interior) - READS: TmunuBaseX::eTij(interior) - WRITES: ADMBaseX::dtcurv(interior) - WRITES: ADMBaseX::dt2lapse(interior) - WRITES: ADMBaseX::dt2shift(interior) - SYNC: ADMBaseX::dtcurv - SYNC: ADMBaseX::dt2lapse - SYNC: ADMBaseX::dt2shift - } "Calculate second time derivatives of ADM variables" -} - - - -if (calc_constraints) { - SCHEDULE Z4co_Constraints IN Z4co_AnalysisGroup - { - LANG: C - READS: chi(everywhere) - READS: gamma_tilde(everywhere) - READS: K_hat(everywhere) - READS: A_tilde(everywhere) - READS: Gam_tilde(everywhere) - READS: Theta(everywhere) - READS: alphaG(everywhere) - READS: betaG(everywhere) - READS: TmunuBaseX::eTtt(interior) - READS: TmunuBaseX::eTti(interior) - READS: TmunuBaseX::eTij(interior) - WRITES: ZtC(interior) - WRITES: HC(interior) - WRITES: MtC(interior) - WRITES: allC(interior) - # SYNC: ZtC - # SYNC: HC - # SYNC: MtC - # SYNC: allC - } "Calculate Z4c constraints" -} - - - SCHEDULE Z4co_RHS IN Z4co_RHSGroup { LANG: C @@ -256,34 +167,3 @@ SCHEDULE Z4co_RHS IN Z4co_RHSGroup SYNC: alphaG_rhs SYNC: betaG_rhs } "Calculate Z4c RHS" - -if (CCTK_Equals(boundary_conditions, "NewRadX")) { - SCHEDULE Z4co_apply_newradx_boundary_conditions IN Z4co_RHSGroup AFTER Z4co_RHS - { - LANG: C - READS: chi(everywhere) - READS: gamma_tilde(everywhere) - READS: K_hat(everywhere) - READS: A_tilde(everywhere) - READS: Gam_tilde(everywhere) - READS: Theta(everywhere) - READS: alphaG(everywhere) - READS: betaG(everywhere) - READS: chi_rhs(everywhere) - READS: gamma_tilde_rhs(everywhere) - READS: K_hat_rhs(everywhere) - READS: A_tilde_rhs(everywhere) - READS: Gam_tilde_rhs(everywhere) - READS: Theta_rhs(everywhere) - READS: alphaG_rhs(everywhere) - READS: betaG_rhs(everywhere) - WRITES: chi_rhs(interior) - WRITES: gamma_tilde_rhs(interior) - WRITES: K_hat_rhs(interior) - WRITES: A_tilde_rhs(interior) - WRITES: Gam_tilde_rhs(interior) - WRITES: Theta_rhs(interior) - WRITES: alphaG_rhs(interior) - WRITES: betaG_rhs(interior) - } "Apply radiative boundary conditions to Z4c RHS variables using NewRadX" -} diff --git a/Z4co/src/derivs.hxx b/Z4co/src/derivs.hxx new file mode 100644 index 00000000..89518948 --- /dev/null +++ b/Z4co/src/derivs.hxx @@ -0,0 +1,455 @@ +#ifndef DERIVS_HXX +#define DERIVS_HXX + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace Z4co { +using namespace Arith; +using namespace Loop; +using namespace std; + +//////////////////////////////////////////////////////////////////////////////// + +constexpr int deriv_order = 4; + +//////////////////////////////////////////////////////////////////////////////// + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv1d(const simdl &mask, const T *restrict const var, const ptrdiff_t di, + const T dx) { + if constexpr (deriv_order == 2) + return -1 / T(2) * + (maskz_loadu(mask, &var[-di]) - maskz_loadu(mask, &var[+di])) / dx; + if constexpr (deriv_order == 4) + return (1 / T(12) * + (maskz_loadu(mask, &var[-2 * di]) - + maskz_loadu(mask, &var[+2 * di])) // + - + 2 / T(3) * + (maskz_loadu(mask, &var[-di]) - maskz_loadu(mask, &var[+di]))) / + dx; +} + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv1d_upwind(const simdl &mask, const T *restrict const var, + const ptrdiff_t di, const simd &vel, const T dx) { + // arXiv:1111.2177 [gr-qc], (71) + if constexpr (deriv_order == 2) { + // if (sign) + // // + [ 0 -1 +1 0 0] + // // + 1/2 [+1 -2 +1 0 0] + // // [+1/2 -2 +3/2 0 0] + // return (1 / T(2) * var[-2 * di] // + // - 2 * var[-di] // + // + 3 / T(2) * var[0]) / + // dx; + // else + // // + [ 0 0 -1 +1 0 ] + // // - 1/2 [ 0 0 +1 -2 +1 ] + // // [ 0 0 -3/2 +2 -1/2] + // return (-3 / T(2) * var[0] // + // + 2 * var[+di] // + // - 1 / T(2) * var[+2 * di]) / + // dx; + + // + 1/2 [+1/2 -2 +3/2 0 0 ] + // + 1/2 [ 0 0 -3/2 +2 -1/2] + // [+1/4 -1 0 +1 -1/4] + const simd symm = + 1 / T(4) * + (maskz_loadu(mask, &var[-2 * di]) - + maskz_loadu(mask, &var[2 * di])) // + - (maskz_loadu(mask, &var[-di]) - maskz_loadu(mask, &var[di])); + // + 1/2 [+1/2 -2 +3/2 0 0 ] + // - 1/2 [ 0 0 -3/2 +2 -1/2] + // [+1/4 -1 +3/2 -1 +1/4] + const simd anti = + 1 / T(4) * + (maskz_loadu(mask, &var[-2 * di]) + + maskz_loadu(mask, &var[2 * di])) // + - (maskz_loadu(mask, &var[-di]) + maskz_loadu(mask, &var[di])) // + + 3 / T(2) * maskz_loadu(mask, &var[0]); + return (vel * symm - fabs(vel) * anti) / dx; + } + if constexpr (deriv_order == 4) { + // A fourth order stencil for a first derivative, shifted by one grid point + + // if (sign) + // return (-1 / T(12) * var[-3 * di] // + // + 1 / T(2) * var[-2 * di] // + // - 3 / T(2) * var[-di] // + // + 5 / T(6) * var[0] // + // + 1 / T(4) * var[+di]) / + // dx; + // else + // return (-1 / T(4) * var[-di] // + // - 5 / T(6) * var[0] // + // + 3 / T(2) * var[+di] // + // - 1 / T(2) * var[+2 * di] // + // + 1 / T(12) * var[+3 * di]) / + // dx; + + const simd symm = + -1 / T(24) * + (maskz_loadu(mask, &var[-3 * di]) - + maskz_loadu(mask, &var[3 * di])) // + + 1 / T(4) * + (maskz_loadu(mask, &var[-2 * di]) - + maskz_loadu(mask, &var[2 * di])) // + - + 7 / T(8) * (maskz_loadu(mask, &var[-di]) - maskz_loadu(mask, &var[di])); + const simd anti = + -1 / T(24) * + (maskz_loadu(mask, &var[-3 * di]) + + maskz_loadu(mask, &var[3 * di])) // + + 1 / T(4) * + (maskz_loadu(mask, &var[-2 * di]) + + maskz_loadu(mask, &var[2 * di])) // + - 5 / T(8) * + (maskz_loadu(mask, &var[-di]) + maskz_loadu(mask, &var[di])) // + + 5 / T(6) * maskz_loadu(mask, &var[0]); + return (vel * symm - fabs(vel) * anti) / dx; + } +} + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv2_1d(const simdl &mask, const T *restrict const var, const ptrdiff_t di, + const T dx) { + if constexpr (deriv_order == 2) + return ((maskz_loadu(mask, &var[-di]) + maskz_loadu(mask, &var[+di])) // + - 2 * maskz_loadu(mask, &var[0])) / + pow2(dx); + if constexpr (deriv_order == 4) + return (-1 / T(12) * + (maskz_loadu(mask, &var[-2 * di]) + + maskz_loadu(mask, &var[+2 * di])) // + + + 4 / T(3) * + (maskz_loadu(mask, &var[-di]) + maskz_loadu(mask, &var[+di])) // + - 5 / T(2) * maskz_loadu(mask, &var[0])) / + pow2(dx); +} + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv2_2d(const int vavail, const simdl &mask, const T *restrict const var, + const ptrdiff_t di, const ptrdiff_t dj, const T dx, const T dy) { + constexpr size_t vsize = tuple_size_v >; + if (di == 1) { + assert(vavail > 0); + constexpr int maxnpoints = deriv_order + 1 + vsize - 1; + const int npoints = deriv_order + 1 + min(int(vsize), vavail) - 1; + array, div_ceil(maxnpoints, int(vsize))> arrx; + for (int i = 0; i < maxnpoints; i += vsize) { + if (i < npoints) { + const simdl mask1 = mask_for_loop_tail >(i, npoints); + arrx[div_floor(i, int(vsize))] = + deriv1d(mask1, &var[i - deriv_order / 2], dj, dy); + } + } +#ifdef CCTK_DEBUG + for (int i = npoints; i < align_ceil(maxnpoints, int(vsize)); ++i) + ((T *)&arrx[0])[i] = Arith::nan()(); // unused +#endif + const T *const varx = (T *)&arrx[0] + deriv_order / 2; + return deriv1d(mask, varx, 1, dx); + } else { + assert(dj != 1); + array, deriv_order + 1> arrx; + for (int j = -deriv_order / 2; j <= deriv_order / 2; ++j) + if (j == 0) { +#ifdef CCTK_DEBUG + arrx[deriv_order / 2 + j] = Arith::nan >()(); // unused +#endif + } else { + arrx[deriv_order / 2 + j] = deriv1d(mask, &var[j * dj], di, dx); + } + const T *const varx = (T *)(&arrx[deriv_order / 2]); + return deriv1d(mask, varx, vsize, dy); + } +} + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv1d_diss(const simdl &mask, const T *restrict const var, + const ptrdiff_t di, const T dx) { + if constexpr (deriv_order == 2) + return ((maskz_loadu(mask, &var[-2 * di]) + + maskz_loadu(mask, &var[+2 * di])) // + - + 4 * (maskz_loadu(mask, &var[-di]) + maskz_loadu(mask, &var[+di])) // + + 6 * maskz_loadu(mask, &var[0])) / + dx; + if constexpr (deriv_order == 4) + return ((maskz_loadu(mask, &var[-3 * di]) + + maskz_loadu(mask, &var[+3 * di])) // + - 6 * (maskz_loadu(mask, &var[-2 * di]) + + maskz_loadu(mask, &var[+2 * di])) // + + 15 * (maskz_loadu(mask, &var[-di]) + + maskz_loadu(mask, &var[+di])) // + - 20 * maskz_loadu(mask, &var[0])) / + dx; +} + +//////////////////////////////////////////////////////////////////////////////// + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv(const simdl &mask, const GF3D2 &gf_, const vect &I, + const vec &dx) { + static_assert(dir >= 0 && dir < D, ""); + const auto &DI = vect::unit; + const ptrdiff_t di = gf_.delta(DI(dir)); + return deriv1d(mask, &gf_(I), di, dx(dir)); +} + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv_upwind(const simdl &mask, const GF3D2 &gf_, + const vect &I, const vec, D> &vel, + const vec &dx) { + static_assert(dir >= 0 && dir < D, ""); + const auto &DI = vect::unit; + const ptrdiff_t di = gf_.delta(DI(dir)); + return deriv1d_upwind(mask, &gf_(I), di, vel(dir), dx(dir)); +} + +template +inline ARITH_INLINE + ARITH_DEVICE ARITH_HOST enable_if_t<(dir1 == dir2), simd > + deriv2(const int vavail, const simdl &mask, const GF3D2 &gf_, + const vect &I, const vec &dx) { + static_assert(dir1 >= 0 && dir1 < D, ""); + static_assert(dir2 >= 0 && dir2 < D, ""); + const auto &DI = vect::unit; + const ptrdiff_t di = gf_.delta(DI(dir1)); + return deriv2_1d(mask, &gf_(I), di, dx(dir1)); +} + +template +inline ARITH_INLINE + ARITH_DEVICE ARITH_HOST enable_if_t<(dir1 != dir2), simd > + deriv2(const int vavail, const simdl &mask, const GF3D2 &gf_, + const vect &I, const vec &dx) { + static_assert(dir1 >= 0 && dir1 < D, ""); + static_assert(dir2 >= 0 && dir2 < D, ""); + const auto &DI = vect::unit; + const ptrdiff_t di = gf_.delta(DI(dir1)); + const ptrdiff_t dj = gf_.delta(DI(dir2)); + return deriv2_2d(vavail, mask, &gf_(I), di, dj, dx(dir1), dx(dir2)); +} + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv_diss(const simdl &mask, const GF3D2 &gf_, + const vect &I, const vec &dx) { + static_assert(dir >= 0 && dir < D, ""); + const auto &DI = vect::unit; + const ptrdiff_t di = gf_.delta(DI(dir)); + return deriv1d_diss(mask, &gf_(I), di, dx(dir)); +} + +//////////////////////////////////////////////////////////////////////////////// + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST vec, dim> +deriv(const simdl &mask, const GF3D2 &gf_, const vect &I, + const vec &dx) { + return {deriv<0>(mask, gf_, I, dx), deriv<1>(mask, gf_, I, dx), + deriv<2>(mask, gf_, I, dx)}; +} + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +deriv_upwind(const simdl &mask, const GF3D2 &gf_, + const vect &I, const vec, dim> &vel, + const vec &dx) { + return deriv_upwind<0>(mask, gf_, I, vel, dx) + + deriv_upwind<1>(mask, gf_, I, vel, dx) + + deriv_upwind<2>(mask, gf_, I, vel, dx); +} + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST smat, dim> +deriv2(const int vavail, const simdl &mask, const GF3D2 &gf_, + const vect &I, const vec &dx) { + return {deriv2<0, 0>(vavail, mask, gf_, I, dx), + deriv2<0, 1>(vavail, mask, gf_, I, dx), + deriv2<0, 2>(vavail, mask, gf_, I, dx), + deriv2<1, 1>(vavail, mask, gf_, I, dx), + deriv2<1, 2>(vavail, mask, gf_, I, dx), + deriv2<2, 2>(vavail, mask, gf_, I, dx)}; +} + +template +inline ARITH_INLINE ARITH_DEVICE ARITH_HOST simd +diss(const simdl &mask, const GF3D2 &gf_, const vect &I, + const vec &dx) { + // arXiv:gr-qc/0610128, (63), with r=2 + constexpr int diss_order = deriv_order + 2; + constexpr int sign = diss_order % 4 == 0 ? -1 : +1; + return sign / T(pown(2, deriv_order + 2)) * + (deriv_diss<0>(mask, gf_, I, dx) // + + deriv_diss<1>(mask, gf_, I, dx) // + + deriv_diss<2>(mask, gf_, I, dx)); +} + +//////////////////////////////////////////////////////////////////////////////// + +template +CCTK_ATTRIBUTE_NOINLINE void +calc_derivs(const cGH *restrict const cctkGH, const GF3D2 &gf1, + const GF3D5 &gf0, const vec, dim> &dgf0, + const GF3D5layout &layout0) { + DECLARE_CCTK_ARGUMENTS; + + typedef simd vreal; + typedef simdl vbool; + constexpr size_t vsize = tuple_size_v; + + const vec dx([&](int a) { return CCTK_DELTA_SPACE(a); }); + + const Loop::GridDescBaseDevice grid(cctkGH); + grid.loop_int_device<0, 0, 0, vsize>( + grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { + const vbool mask = mask_for_loop_tail(p.i, p.imax); + const GF3D5index index0(layout0, p.I); + const auto val = gf1(mask, p.I); + gf0.store(mask, index0, val); + const auto dval = deriv(mask, gf1, p.I, dx); + dgf0.store(mask, index0, dval); + }); +} + +template +CCTK_ATTRIBUTE_NOINLINE void +calc_derivs2(const cGH *restrict const cctkGH, const GF3D2 &gf1, + const GF3D5 &gf0, const vec, dim> &dgf0, + const smat, dim> &ddgf0, const GF3D5layout &layout0) { + DECLARE_CCTK_ARGUMENTS; + + typedef simd vreal; + typedef simdl vbool; + constexpr size_t vsize = tuple_size_v; + + const vec dx([&](int a) { return CCTK_DELTA_SPACE(a); }); + + const Loop::GridDescBaseDevice grid(cctkGH); + grid.loop_int_device<0, 0, 0, vsize>( + grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { + const vbool mask = mask_for_loop_tail(p.i, p.imax); + const int vavail = p.imax - p.i; + const GF3D5index index0(layout0, p.I); + const auto val = gf1(mask, p.I); + gf0.store(mask, index0, val); + const auto dval = deriv(mask, gf1, p.I, dx); + dgf0.store(mask, index0, dval); + const auto ddval = deriv2(vavail, mask, gf1, p.I, dx); + ddgf0.store(mask, index0, ddval); + }); +} + +template +CCTK_ATTRIBUTE_NOINLINE void +calc_derivs(const cGH *restrict const cctkGH, + const vec, dim> &gf0_, const vec, dim> &gf_, + const vec, dim>, dim> &dgf_, + const GF3D5layout &layout) { + for (int a = 0; a < 3; ++a) + calc_derivs(cctkGH, gf0_(a), gf_(a), dgf_(a), layout); +} + +template +CCTK_ATTRIBUTE_NOINLINE void calc_derivs2( + const cGH *restrict const cctkGH, const vec, dim> &gf0_, + const vec, dim> &gf_, const vec, dim>, dim> &dgf_, + const vec, dim>, dim> &ddgf_, const GF3D5layout &layout) { + for (int a = 0; a < 3; ++a) + calc_derivs2(cctkGH, gf0_(a), gf_(a), dgf_(a), ddgf_(a), layout); +} + +template +CCTK_ATTRIBUTE_NOINLINE void calc_derivs( + const cGH *restrict const cctkGH, const smat, dim> &gf0_, + const smat, dim> &gf_, const smat, dim>, dim> &dgf_, + const GF3D5layout &layout) { + for (int a = 0; a < 3; ++a) + for (int b = a; b < 3; ++b) + calc_derivs(cctkGH, gf0_(a, b), gf_(a, b), dgf_(a, b), layout); +} + +template +CCTK_ATTRIBUTE_NOINLINE void calc_derivs2( + const cGH *restrict const cctkGH, const smat, dim> &gf0_, + const smat, dim> &gf_, const smat, dim>, dim> &dgf_, + const smat, dim>, dim> &ddgf_, const GF3D5layout &layout) { + for (int a = 0; a < 3; ++a) + for (int b = a; b < 3; ++b) + calc_derivs2(cctkGH, gf0_(a, b), gf_(a, b), dgf_(a, b), ddgf_(a, b), + layout); +} + +template +CCTK_ATTRIBUTE_NOINLINE void +apply_upwind_diss(const cGH *restrict const cctkGH, const GF3D2 &gf_, + const vec, dim> &gf_betaG_, + const GF3D2 &gf_rhs_) { + DECLARE_CCTK_ARGUMENTS; + DECLARE_CCTK_PARAMETERS; + + const vec dx([&](int a) { return CCTK_DELTA_SPACE(a); }); + + typedef simd vreal; + typedef simdl vbool; + constexpr size_t vsize = tuple_size_v; + + if (epsdiss == 0) { + + const Loop::GridDescBaseDevice grid(cctkGH); + grid.loop_int_device<0, 0, 0, vsize>( + grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { + const vbool mask = mask_for_loop_tail(p.i, p.imax); + const vec betaG = gf_betaG_(mask, p.I); + const vreal rhs_old = gf_rhs_(mask, p.I); + const vreal rhs_new = + rhs_old + deriv_upwind(mask, gf_, p.I, betaG, dx); + gf_rhs_.store(mask, p.I, rhs_new); + }); + + } else { + + const Loop::GridDescBaseDevice grid(cctkGH); + grid.loop_int_device<0, 0, 0, vsize>( + grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { + const vbool mask = mask_for_loop_tail(p.i, p.imax); + const vec betaG = gf_betaG_(mask, p.I); + const vreal rhs_old = gf_rhs_(mask, p.I); + const vreal rhs_new = rhs_old + + deriv_upwind(mask, gf_, p.I, betaG, dx) + + epsdiss * diss(mask, gf_, p.I, dx); + gf_rhs_.store(mask, p.I, rhs_new); + }); + } +} + +} // namespace Z4co + +#endif // #ifndef DERIVS_HXX diff --git a/Z4co/src/enforce.cxx b/Z4co/src/enforce.cxx new file mode 100644 index 00000000..e84f87dd --- /dev/null +++ b/Z4co/src/enforce.cxx @@ -0,0 +1,130 @@ +#include "physics.hxx" + +#include +#include +#include +#include + +#include +#include +#include + +#ifdef __CUDACC__ +#include +#endif + +#include +#include + +namespace Z4co { +using namespace Arith; +using namespace Loop; +using namespace std; + +extern "C" void Z4co_Enforce(CCTK_ARGUMENTS) { + DECLARE_CCTK_ARGUMENTSX_Z4co_Enforce; + DECLARE_CCTK_PARAMETERS; + + const array indextype = {0, 0, 0}; + const GF3D2layout layout1(cctkGH, indextype); + + const GF3D2 &gf_chi = chi; + + const GF3D2 &gf_alphaG = alphaG; + + const smat, 3> gf_gammat{ + gammatxx, gammatxy, gammatxz, gammatyy, gammatyz, gammatzz, + }; + + const smat, 3> gf_At{ + Atxx, Atxy, Atxz, Atyy, Atyz, Atzz, + }; + + typedef simd vreal; + typedef simdl vbool; + constexpr size_t vsize = tuple_size_v; + + const auto delta3 = one >()(); + +#ifdef __CUDACC__ + const nvtxRangeId_t range = nvtxRangeStartA("Z4co_Enforce::enforce"); +#endif + grid.loop_int_device<0, 0, 0, vsize>( + grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { + const vbool mask = mask_for_loop_tail(p.i, p.imax); + const GF3D2index index1(layout1, p.I); + + // Load + const vreal chi_old = gf_chi(mask, index1); + const vreal alphaG_old = gf_alphaG(mask, index1); + + const smat gammat_old = gf_gammat(mask, index1); + const smat At_old = gf_At(mask, index1); + + // Enforce floors + + const vreal chi = fmax(vreal(chi_floor - 1), chi_old); + const vreal alphaG = fmax(vreal(alphaG_floor - 1), alphaG_old); + + // Enforce algebraic constraints + // See arXiv:1212.2901 [gr-qc]. + + const vreal detgammat_old = calc_det(delta3 + gammat_old); + const vreal chi1_old = 1 / cbrt(detgammat_old) - 1; + const smat gammat([&](int a, int b) ARITH_INLINE { + return (1 + chi1_old) * (delta3(a, b) + gammat_old(a, b)) - + delta3(a, b); + }); +#ifdef CCTK_DEBUG + const vreal detgammat = calc_det(delta3 + gammat); + const vreal gammat_norm = maxabs(delta3 + gammat); + const vreal gammat_scale = gammat_norm; +#if !defined __CUDACC__ && !defined __HIPCC__ + if (!(all(fabs(detgammat - 1) <= 1.0e-12 * gammat_scale))) { + ostringstream buf; + buf << "det gammat is not one: gammat=" << gammat + << " det(gammat)=" << detgammat; + CCTK_VERROR("%s", buf.str().c_str()); + } +#endif + assert(all(fabs(detgammat - 1) <= 1.0e-12 * gammat_scale)); +#endif + + const smat gammatu = + calc_inv(delta3 + gammat, vreal(1)) - delta3; + + const vreal traceAt_old = sum_symm<3>([&](int x, int y) ARITH_INLINE { + return (delta3(x, y) + gammatu(x, y)) * At_old(x, y); + }); + const smat At([&](int a, int b) ARITH_INLINE { + return At_old(a, b) - traceAt_old / 3 * (delta3(a, b) + gammat(a, b)); + }); +#ifdef CCTK_DEBUG + const vreal traceAt = sum_symm<3>([&](int x, int y) ARITH_INLINE { + return (delta3(x, y) + gammatu(x, y)) * At(x, y); + }); + const vreal gammatu_norm = maxabs(delta3 + gammatu); + const vreal At_norm = maxabs(At); + const vreal At_scale = fmax(fmax(gammat_norm, gammatu_norm), At_norm); +#if !defined __CUDACC__ && !defined __HIPCC__ + if (!(all(fabs(traceAt) <= 1.0e-12 * At_scale))) { + ostringstream buf; + buf << "tr At: At=" << At << " tr(At)=" << traceAt; + CCTK_VERROR("%s", buf.str().c_str()); + } +#endif + assert(all(fabs(traceAt) <= 1.0e-12 * At_scale)); +#endif + + // Store + gf_chi.store(mask, index1, chi); + gf_gammat.store(mask, index1, gammat); + gf_At.store(mask, index1, At); + gf_alphaG.store(mask, index1, alphaG); + }); +#ifdef __CUDACC__ + nvtxRangeEnd(range); +#endif +} + +} // namespace Z4co diff --git a/Z4co/src/initial1.cxx b/Z4co/src/initial1.cxx new file mode 100644 index 00000000..388db65d --- /dev/null +++ b/Z4co/src/initial1.cxx @@ -0,0 +1,133 @@ +#include "physics.hxx" + +#include +#include +#include +#include + +#include +#include + +#ifdef __CUDACC__ +#include +#endif + +#include + +namespace Z4co { +using namespace Arith; +using namespace Loop; +using namespace std; + +extern "C" void Z4co_Initial1(CCTK_ARGUMENTS) { + DECLARE_CCTK_ARGUMENTS_Z4co_Initial1; + + const array indextype = {0, 0, 0}; + const GF3D2layout layout1(cctkGH, indextype); + + const smat, 3> gf_g1{ + GF3D2(layout1, gxx), + GF3D2(layout1, gxy), + GF3D2(layout1, gxz), + GF3D2(layout1, gyy), + GF3D2(layout1, gyz), + GF3D2(layout1, gzz)}; + + const smat, 3> gf_K1{ + GF3D2(layout1, kxx), + GF3D2(layout1, kxy), + GF3D2(layout1, kxz), + GF3D2(layout1, kyy), + GF3D2(layout1, kyz), + GF3D2(layout1, kzz)}; + + const GF3D2 gf_alp1(layout1, alp); + + const vec, 3> gf_beta1{ + GF3D2(layout1, betax), + GF3D2(layout1, betay), + GF3D2(layout1, betaz)}; + + const GF3D2 gf_chi1(layout1, chi); + + const smat, 3> gf_gammat1{ + GF3D2(layout1, gammatxx), GF3D2(layout1, gammatxy), + GF3D2(layout1, gammatxz), GF3D2(layout1, gammatyy), + GF3D2(layout1, gammatyz), GF3D2(layout1, gammatzz)}; + + const GF3D2 gf_Kh1(layout1, Kh); + + const smat, 3> gf_At1{ + GF3D2(layout1, Atxx), GF3D2(layout1, Atxy), + GF3D2(layout1, Atxz), GF3D2(layout1, Atyy), + GF3D2(layout1, Atyz), GF3D2(layout1, Atzz)}; + + const GF3D2 gf_Theta1(layout1, Theta); + + const GF3D2 gf_alphaG1(layout1, alphaG); + + const vec, 3> gf_betaG1{GF3D2(layout1, betaGx), + GF3D2(layout1, betaGy), + GF3D2(layout1, betaGz)}; + + typedef simd vreal; + typedef simdl vbool; + constexpr size_t vsize = tuple_size_v; + + const auto delta3 = one >()(); + + const Loop::GridDescBaseDevice grid(cctkGH); +#ifdef __CUDACC__ + const nvtxRangeId_t range = nvtxRangeStartA("Z4co_Initial1::initial1"); +#endif + grid.loop_int_device<0, 0, 0, vsize>( + grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { + const vbool mask = mask_for_loop_tail(p.i, p.imax); + const GF3D2index index1(layout1, p.I); + + // Load + const smat g = gf_g1(mask, index1, one >()()); + const smat K = gf_K1(mask, index1); + const vreal alp = gf_alp1(mask, index1, 1); + const vec beta = gf_beta1(mask, index1); + + // Calculate Z4c variables (all except Gammat) + const vreal detg = calc_det(g); + const smat gu = calc_inv(g, detg); + + const vreal chi = 1 / cbrt(detg) - 1; + + const smat gammat([&](int a, int b) ARITH_INLINE { + return (1 + chi) * g(a, b) - delta3(a, b); + }); + + const vreal trK = sum_symm<3>( + [&](int x, int y) ARITH_INLINE { return gu(x, y) * K(x, y); }); + + const vreal Theta = 0; + + const vreal Kh = trK - 2 * Theta; + + const smat At([&](int a, int b) ARITH_INLINE { + return (1 + chi) * (K(a, b) - trK / 3 * g(a, b)); + }); + + const vreal alphaG = alp - 1; + + const vec betaG([&](int a) ARITH_INLINE { return beta(a); }); + + // Store + gf_chi1.store(mask, index1, chi); + gf_gammat1.store(mask, index1, gammat); + gf_Kh1.store(mask, index1, Kh); + gf_At1.store(mask, index1, At); + gf_Theta1.store(mask, index1, Theta); + gf_alphaG1.store(mask, index1, alphaG); + gf_betaG1.store(mask, index1, betaG); + }); +#ifdef __CUDACC__ + nvtxRangeEnd(range); +#endif +} + +} // namespace Z4co diff --git a/Z4co/src/initial2.cxx b/Z4co/src/initial2.cxx new file mode 100644 index 00000000..ae22b2f9 --- /dev/null +++ b/Z4co/src/initial2.cxx @@ -0,0 +1,86 @@ +#include "derivs.hxx" +#include "physics.hxx" + +#include +#include + +#include +#include + +#ifdef __CUDACC__ +#include +#endif + +namespace Z4co { +using namespace Arith; +using namespace Loop; +using namespace std; + +extern "C" void Z4co_Initial2(CCTK_ARGUMENTS) { + DECLARE_CCTK_ARGUMENTS_Z4co_Initial2; + + const vec dx{ + CCTK_DELTA_SPACE(0), + CCTK_DELTA_SPACE(1), + CCTK_DELTA_SPACE(2), + }; + + const array indextype = {0, 0, 0}; + const GF3D2layout layout1(cctkGH, indextype); + + const smat, 3> gf_gammat1{ + GF3D2(layout1, gammatxx), + GF3D2(layout1, gammatxy), + GF3D2(layout1, gammatxz), + GF3D2(layout1, gammatyy), + GF3D2(layout1, gammatyz), + GF3D2(layout1, gammatzz)}; + + const vec, 3> gf_Gamt1{GF3D2(layout1, Gamtx), + GF3D2(layout1, Gamty), + GF3D2(layout1, Gamtz)}; + + typedef simd vreal; + typedef simdl vbool; + constexpr size_t vsize = tuple_size_v; + + const auto delta3 = one >()(); + + const Loop::GridDescBaseDevice grid(cctkGH); +#ifdef __CUDACC__ + const nvtxRangeId_t range = nvtxRangeStartA("Z4co_Initial2::initial2"); +#endif + grid.loop_int_device<0, 0, 0, vsize>( + grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { + const vbool mask = mask_for_loop_tail(p.i, p.imax); + const GF3D2index index1(layout1, p.I); + + // Load + const smat gammat = gf_gammat1(mask, index1); + + // Calculate Z4c variables (only Gamt) + const smat gammatu = + calc_inv(delta3 + gammat, vreal(1)) - delta3; + + const smat, 3> dgammat([&](int a, int b) { + return deriv(mask, gf_gammat1(a, b), p.I, dx); + }); + + const vec, 3> Gammatl = calc_gammal(dgammat); + const vec, 3> Gammat = + calc_gamma(delta3 + gammatu, Gammatl); + const vec Gamt([&](int a) ARITH_INLINE { + return sum_symm<3>([&](int x, int y) ARITH_INLINE { + return (delta3(x, y) + gammatu(x, y)) * Gammat(a)(x, y); + }); + }); + + // Store + gf_Gamt1.store(mask, index1, Gamt); + }); +#ifdef __CUDACC__ + nvtxRangeEnd(range); +#endif +} + +} // namespace Z4co diff --git a/Z4co/src/physics.hxx b/Z4co/src/physics.hxx new file mode 100644 index 00000000..9772b5c2 --- /dev/null +++ b/Z4co/src/physics.hxx @@ -0,0 +1,99 @@ +#ifndef PHYSICS_HXX +#define PHYSICS_HXX + +#include +#include +#include +#include + +namespace Z4co { +using namespace Arith; + +template +ARITH_INLINE ARITH_DEVICE ARITH_HOST constexpr smat, D> +calc_dgu(const smat &gu, const smat, D> &dg) { + // g_xy = g_xa g_yb g^ab + // g_xy,c = (g_xa g_yb g^ab),c + // = g_xa,c g_yb g^ab + g_xa g_yb,c g^ab + g_xa g_yb g^ab,c + // g_xa g_yb g^ab,c = g_xy,c - g_xa,c g_yb g^ab - g_xa g_yb,c g^ab + // = g_xy,c - g_xy,c - g_xy,c + // = - g_xy,c + // g^ab,c = - g^ax g^by g_xy,c + return smat, D>([&](int a, int b) ARITH_INLINE { + return vec([&](int c) ARITH_INLINE { + // return sum2sym([&](int x, int y) ARITH_INLINE { + // return -gu(a, x) * gu(b, y) * dg(x, y)(c); + // }); + return sum([&](int x) ARITH_INLINE { + return -gu(a, x) * sum([&](int y) ARITH_INLINE { + return gu(b, y) * dg(x, y)(c); + }); + }); + }); + }); +} + +template +ARITH_INLINE ARITH_DEVICE ARITH_HOST constexpr smat, D> +calc_dAu(const smat &gu, const smat, D> &dgu, + const smat &A, const smat, D> &dA) { + // A^ab,c = (g^ax g^by A_xy),c + // = g^ax,c g^by A_xy + g^ax g^by,c A_xy + g^ax g^by A_xy,c + return smat, D>([&](int a, int b) ARITH_INLINE { + return vec([&](int c) ARITH_INLINE { + // return sum2sym([&](int x, int y) ARITH_INLINE { + // return dgu(a, x)(c) * gu(b, y) * A(x, y) // + // + gu(a, x) * dgu(b, y)(c) * A(x, y) // + // + gu(a, x) * gu(b, y) * dA(x, y)(c); + // }); + return sum([&](int x) ARITH_INLINE { + return gu(b, x) * sum([&](int y) ARITH_INLINE { + return dgu(a, y)(c) * A(x, y) // + + dgu(b, y)(c) * A(x, y) // + + gu(b, y) * dA(x, y)(c); + }); + }); + }); + }); +} + +template +ARITH_INLINE ARITH_DEVICE ARITH_HOST constexpr vec, D> +calc_gammal(const smat, D> &dg) { + // Gammal_abc + return vec, D>([&](int a) ARITH_INLINE { + return smat([&](int b, int c) ARITH_INLINE { + return (dg(a, b)(c) + dg(a, c)(b) - dg(b, c)(a)) / 2; + }); + }); +} + +template +ARITH_INLINE ARITH_DEVICE ARITH_HOST constexpr vec, D> +calc_gamma(const smat &gu, const vec, D> &Gammal) { + // Gamma^a_bc + return vec, D>([&](int a) ARITH_INLINE { + return smat([&](int b, int c) ARITH_INLINE { + return sum([&](int x) + ARITH_INLINE { return gu(a, x) * Gammal(x)(b, c); }); + }); + }); +} + +template +ARITH_INLINE ARITH_DEVICE ARITH_HOST constexpr vec, D>, D> +calc_gammalu(const smat &gu, const vec, D> &Gammal) { + // Gamma_ab^c + return vec, D>, D>([&](int a) ARITH_INLINE { + return vec, D>([&](int b) ARITH_INLINE { + return vec([&](int c) ARITH_INLINE { + return sum([&](int x) + ARITH_INLINE { return Gammal(a)(b, x) * gu(x, c); }); + }); + }); + }); +} + +} // namespace Z4co + +#endif // #ifndef PHYSICS_HXX