From 10126ee3f8ef04305832a750835ef586943c69a3 Mon Sep 17 00:00:00 2001 From: Liwei Ji Date: Sun, 7 Jul 2024 16:05:58 -0400 Subject: [PATCH] Z4co: reduce times of calling (mask,index) --- Z4co/src/adm.cxx | 23 +----------- Z4co/wolfram/Z4co_set_ADM.hxx | 70 +++++++++++++++++++++++++---------- Z4co/wolfram/Z4co_set_ADM.wl | 15 +++++++- 3 files changed, 66 insertions(+), 42 deletions(-) diff --git a/Z4co/src/adm.cxx b/Z4co/src/adm.cxx index 71f46648..7df081f1 100644 --- a/Z4co/src/adm.cxx +++ b/Z4co/src/adm.cxx @@ -74,35 +74,14 @@ extern "C" void Z4co_ADM(CCTK_ARGUMENTS) { typedef simdl vbool; constexpr size_t vsize = tuple_size_v; - const GF3D2 &ADMgam11 = gf_ADMgam(0, 0); - const GF3D2 &ADMgam12 = gf_ADMgam(0, 1); - const GF3D2 &ADMgam13 = gf_ADMgam(0, 2); - const GF3D2 &ADMgam22 = gf_ADMgam(1, 1); - const GF3D2 &ADMgam23 = gf_ADMgam(1, 2); - const GF3D2 &ADMgam33 = gf_ADMgam(2, 2); - const GF3D2 &ADMK11 = gf_ADMK(0, 0); - const GF3D2 &ADMK12 = gf_ADMK(0, 1); - const GF3D2 &ADMK13 = gf_ADMK(0, 2); - const GF3D2 &ADMK22 = gf_ADMK(1, 1); - const GF3D2 &ADMK23 = gf_ADMK(1, 2); - const GF3D2 &ADMK33 = gf_ADMK(2, 2); - const GF3D2 &ADMalpha = gf_ADMalpha; - const GF3D2 &ADMbeta1 = gf_ADMbeta(0); - const GF3D2 &ADMbeta2 = gf_ADMbeta(1); - const GF3D2 &ADMbeta3 = gf_ADMbeta(2); - const Loop::GridDescBaseDevice grid(cctkGH); #ifdef __CUDACC__ const nvtxRangeId_t range = nvtxRangeStartA("Z4co_ADM::adm"); #endif - grid.loop_all_device<0, 0, 0, vsize>( - grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { - const vbool mask = mask_for_loop_tail(p.i, p.imax); - const GF3D2index index2(layout2, p.I); #include "../wolfram/Z4co_set_ADM.hxx" - }); + #ifdef __CUDACC__ nvtxRangeEnd(range); #endif diff --git a/Z4co/wolfram/Z4co_set_ADM.hxx b/Z4co/wolfram/Z4co_set_ADM.hxx index 32bae244..a3ee2114 100644 --- a/Z4co/wolfram/Z4co_set_ADM.hxx +++ b/Z4co/wolfram/Z4co_set_ADM.hxx @@ -1,25 +1,55 @@ /* Z4co_set_ADM.hxx */ /* Produced with Mathematica */ -const vreal &chi = gf_chi(mask, index2, 1); -const vreal &gamt11 = gf_gamt(mask, index2, 1)(0,0); -const vreal &gamt12 = gf_gamt(mask, index2, 0)(0,1); -const vreal &gamt13 = gf_gamt(mask, index2, 0)(0,2); -const vreal &gamt22 = gf_gamt(mask, index2, 1)(1,1); -const vreal &gamt23 = gf_gamt(mask, index2, 0)(1,2); -const vreal &gamt33 = gf_gamt(mask, index2, 1)(2,2); -const vreal &exKh = gf_exKh(mask, index2); -const vreal &exAt11 = gf_exAt(mask, index2)(0,0); -const vreal &exAt12 = gf_exAt(mask, index2)(0,1); -const vreal &exAt13 = gf_exAt(mask, index2)(0,2); -const vreal &exAt22 = gf_exAt(mask, index2)(1,1); -const vreal &exAt23 = gf_exAt(mask, index2)(1,2); -const vreal &exAt33 = gf_exAt(mask, index2)(2,2); -const vreal &Theta = gf_Theta(mask, index2); -const vreal &alpha = gf_alpha(mask, index2, 1); -const vreal &beta1 = gf_beta(mask, index2)(0); -const vreal &beta2 = gf_beta(mask, index2)(1); -const vreal &beta3 = gf_beta(mask, index2)(2); +const GF3D2 &ADMgam11 = gf_ADMgam(0,0); +const GF3D2 &ADMgam12 = gf_ADMgam(0,1); +const GF3D2 &ADMgam13 = gf_ADMgam(0,2); +const GF3D2 &ADMgam22 = gf_ADMgam(1,1); +const GF3D2 &ADMgam23 = gf_ADMgam(1,2); +const GF3D2 &ADMgam33 = gf_ADMgam(2,2); +const GF3D2 &ADMK11 = gf_ADMK(0,0); +const GF3D2 &ADMK12 = gf_ADMK(0,1); +const GF3D2 &ADMK13 = gf_ADMK(0,2); +const GF3D2 &ADMK22 = gf_ADMK(1,1); +const GF3D2 &ADMK23 = gf_ADMK(1,2); +const GF3D2 &ADMK33 = gf_ADMK(2,2); +const GF3D2 &ADMalpha = gf_ADMalpha; +const GF3D2 &ADMbeta1 = gf_ADMbeta(0); +const GF3D2 &ADMbeta2 = gf_ADMbeta(1); +const GF3D2 &ADMbeta3 = gf_ADMbeta(2); + +grid.loop_all_device<0, 0, 0, vsize>( + grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE { + const vbool mask = mask_for_loop_tail(p.i, p.imax); + const GF3D2index index2(layout2, p.I); + +const auto &tmp_chi = gf_chi(mask, index2); +const auto &tmp_gamt = gf_gamt(mask, index2); +const auto &tmp_exKh = gf_exKh(mask, index2); +const auto &tmp_exAt = gf_exAt(mask, index2); +const auto &tmp_Theta = gf_Theta(mask, index2); +const auto &tmp_alpha = gf_alpha(mask, index2); +const auto &tmp_beta = gf_beta(mask, index2); + +const vreal chi = tmp_chi; +const vreal gamt11 = tmp_gamt(0,0); +const vreal gamt12 = tmp_gamt(0,1); +const vreal gamt13 = tmp_gamt(0,2); +const vreal gamt22 = tmp_gamt(1,1); +const vreal gamt23 = tmp_gamt(1,2); +const vreal gamt33 = tmp_gamt(2,2); +const vreal exKh = tmp_exKh; +const vreal exAt11 = tmp_exAt(0,0); +const vreal exAt12 = tmp_exAt(0,1); +const vreal exAt13 = tmp_exAt(0,2); +const vreal exAt22 = tmp_exAt(1,1); +const vreal exAt23 = tmp_exAt(1,2); +const vreal exAt33 = tmp_exAt(2,2); +const vreal Theta = tmp_Theta; +const vreal alpha = tmp_alpha; +const vreal beta1 = tmp_beta(0); +const vreal beta2 = tmp_beta(1); +const vreal beta3 = tmp_beta(2); ADMgam11.store(mask, index2, gamt11/chi @@ -86,4 +116,6 @@ beta3 ); +}); + /* Z4co_set_ADM.hxx */ diff --git a/Z4co/wolfram/Z4co_set_ADM.wl b/Z4co/wolfram/Z4co_set_ADM.wl index 2d05895e..e7be3f6c 100644 --- a/Z4co/wolfram/Z4co_set_ADM.wl +++ b/Z4co/wolfram/Z4co_set_ADM.wl @@ -31,10 +31,23 @@ SetOutputFile[FileNameJoin[{Directory[], "Z4co_set_ADM.hxx"}]]; $MainPrint[] := Module[{}, - (*PrintInitializations[{Mode -> "MainOut"}, ADMVarlist];*) + PrintInitializations[{Mode -> "MainOut"}, ADMVarlist]; + pr[]; + + pr["grid.loop_all_device<0, 0, 0, vsize>("]; + pr[" grid.nghostzones, [=] ARITH_DEVICE(const PointDesc &p) ARITH_INLINE {"]; + pr[" const vbool mask = mask_for_loop_tail(p.i, p.imax);"]; + pr[" const GF3D2index index2(layout2, p.I);"]; + pr[]; + + PrintListInitializations[Drop[EvolVarlist, {5}], "gf_", "index2"]; + pr[]; + PrintInitializations[{Mode -> "MainIn"}, Drop[EvolVarlist, {5}]]; pr[]; PrintEquations[{Mode -> "Main"}, ADMVarlist]; + pr[]; + pr["});"]; ]; Import[FileNameJoin[{Environment["GENERATO"], "codes/CarpetX.wl"}]];