diff --git a/Carpet/src/Comm.cc b/Carpet/src/Comm.cc index 375b1be33..8ec6d6f09 100644 --- a/Carpet/src/Comm.cc +++ b/Carpet/src/Comm.cc @@ -242,12 +242,10 @@ static void ProlongateGroupBoundaries(const cGH *cctkGH, for (astate state = static_cast(0); state != state_done; state = static_cast(static_cast(state) + 1)) { ostringstream name1; - name1 << "comm_state[" << timers.size() << "]" - << "." << tostring(state) << ".user"; + name1 << "comm_state[" << timers.size() << "].user." << tostring(state); timers.push_back(new Timers::Timer(name1.str())); ostringstream name2; - name2 << "comm_state[" << timers.size() << "]" - << "." << tostring(state) << ".step"; + name2 << "comm_state[" << timers.size() << "].step." << tostring(state); timers.push_back(new Timers::Timer(name2.str())); } } diff --git a/CarpetLib/src/copy_3d.cc b/CarpetLib/src/copy_3d.cc index 386b86781..cb1b4348a 100644 --- a/CarpetLib/src/copy_3d.cc +++ b/CarpetLib/src/copy_3d.cc @@ -116,12 +116,11 @@ void copy_3d(T const *restrict const src, ivect3 const &restrict srcpadext, ptrdiff_t const dstkoff = dstoff[2]; // Loop over region - if (use_openmp) { + if (false and use_openmp) { #pragma omp parallel for collapse(3) for (int k = 0; k < regkext; ++k) { for (int j = 0; j < regjext; ++j) { for (int i = 0; i < regiext; ++i) { - dst[DSTIND3(i, j, k)] = src[SRCIND3(i, j, k)]; } } @@ -129,8 +128,8 @@ void copy_3d(T const *restrict const src, ivect3 const &restrict srcpadext, } else { for (int k = 0; k < regkext; ++k) { for (int j = 0; j < regjext; ++j) { +#pragma omp simd for (int i = 0; i < regiext; ++i) { - dst[DSTIND3(i, j, k)] = src[SRCIND3(i, j, k)]; } } diff --git a/CarpetLib/src/copy_4d.cc b/CarpetLib/src/copy_4d.cc index 382541e4f..84696dc0c 100644 --- a/CarpetLib/src/copy_4d.cc +++ b/CarpetLib/src/copy_4d.cc @@ -101,13 +101,12 @@ void copy_4d(T const *restrict const src, ivect4 const &restrict srcpadext, ptrdiff_t const dstloff = dstoff[3]; // Loop over region - if (use_openmp) { + if (false and use_openmp) { #pragma omp parallel for collapse(4) for (int l = 0; l < reglext; ++l) { for (int k = 0; k < regkext; ++k) { for (int j = 0; j < regjext; ++j) { for (int i = 0; i < regiext; ++i) { - dst[DSTIND4(i, j, k, l)] = src[SRCIND4(i, j, k, l)]; } } @@ -117,8 +116,8 @@ void copy_4d(T const *restrict const src, ivect4 const &restrict srcpadext, for (int l = 0; l < reglext; ++l) { for (int k = 0; k < regkext; ++k) { for (int j = 0; j < regjext; ++j) { +#pragma omp simd for (int i = 0; i < regiext; ++i) { - dst[DSTIND4(i, j, k, l)] = src[SRCIND4(i, j, k, l)]; } } diff --git a/CarpetLib/src/interpolate_3d_2tl.cc b/CarpetLib/src/interpolate_3d_2tl.cc index 751e0062e..d51ee421a 100644 --- a/CarpetLib/src/interpolate_3d_2tl.cc +++ b/CarpetLib/src/interpolate_3d_2tl.cc @@ -104,11 +104,10 @@ void interpolate_3d_2tl(T const *restrict const src1, CCTK_REAL const t1, RT const s1fac = (t - t2) / (t1 - t2); RT const s2fac = (t - t1) / (t2 - t1); -// Loop over region -#pragma omp parallel + // Loop over region + // #pragma omp parallel CCTK_LOOP3(interpolate_3d_2tl, i, j, k, 0, 0, 0, regiext, regjext, regkext, dstipadext, dstjpadext, dstkpadext) { - dst[DSTIND3(i, j, k)] = +s1fac * src1[SRCIND3(i, j, k)] + s2fac * src2[SRCIND3(i, j, k)]; } diff --git a/CarpetLib/src/interpolate_3d_3tl.cc b/CarpetLib/src/interpolate_3d_3tl.cc index 5bda88448..74fb33cab 100644 --- a/CarpetLib/src/interpolate_3d_3tl.cc +++ b/CarpetLib/src/interpolate_3d_3tl.cc @@ -107,11 +107,10 @@ void interpolate_3d_3tl(T const *restrict const src1, CCTK_REAL const t1, RT const s2fac = (t - t1) * (t - t3) / ((t2 - t1) * (t2 - t3)); RT const s3fac = (t - t1) * (t - t2) / ((t3 - t1) * (t3 - t2)); -// Loop over region -#pragma omp parallel + // Loop over region + // #pragma omp parallel CCTK_LOOP3(interpolate_3d_3tl, i, j, k, 0, 0, 0, regiext, regjext, regkext, dstipadext, dstjpadext, dstkpadext) { - dst[DSTIND3(i, j, k)] = +s1fac * src1[SRCIND3(i, j, k)] + s2fac * src2[SRCIND3(i, j, k)] + s3fac * src3[SRCIND3(i, j, k)]; diff --git a/CarpetLib/src/interpolate_3d_4tl.cc b/CarpetLib/src/interpolate_3d_4tl.cc index 69f02e464..fa4517067 100644 --- a/CarpetLib/src/interpolate_3d_4tl.cc +++ b/CarpetLib/src/interpolate_3d_4tl.cc @@ -115,11 +115,10 @@ void interpolate_3d_4tl(T const *restrict const src1, CCTK_REAL const t1, RT const s4fac = (t - t1) * (t - t2) * (t - t3) / ((t4 - t1) * (t4 - t2) * (t4 - t3)); -// Loop over region -#pragma omp parallel + // Loop over region + // #pragma omp parallel CCTK_LOOP3(interpolate_3d_4tl, i, j, k, 0, 0, 0, regiext, regjext, regkext, dstipadext, dstjpadext, dstkpadext) { - dst[DSTIND3(i, j, k)] = +s1fac * src1[SRCIND3(i, j, k)] + s2fac * src2[SRCIND3(i, j, k)] + s3fac * src3[SRCIND3(i, j, k)] + s4fac * src4[SRCIND3(i, j, k)]; diff --git a/CarpetLib/src/interpolate_3d_5tl.cc b/CarpetLib/src/interpolate_3d_5tl.cc index b999e72e5..f2fc4ae3b 100644 --- a/CarpetLib/src/interpolate_3d_5tl.cc +++ b/CarpetLib/src/interpolate_3d_5tl.cc @@ -120,11 +120,10 @@ void interpolate_3d_5tl(T const *restrict const src1, CCTK_REAL const t1, RT const s5fac = (t - t1) * (t - t2) * (t - t3) * (t - t4) / ((t5 - t1) * (t5 - t2) * (t5 - t3) * (t5 - t4)); -// Loop over region -#pragma omp parallel + // Loop over region + // #pragma omp parallel CCTK_LOOP3(interpolate_3d_5tl, i, j, k, 0, 0, 0, regiext, regjext, regkext, dstipadext, dstjpadext, dstkpadext) { - dst[DSTIND3(i, j, k)] = +s1fac * src1[SRCIND3(i, j, k)] + s2fac * src2[SRCIND3(i, j, k)] + s3fac * src3[SRCIND3(i, j, k)] + s4fac * src4[SRCIND3(i, j, k)] + diff --git a/CarpetLib/src/interpolate_eno_3d_3tl.cc b/CarpetLib/src/interpolate_eno_3d_3tl.cc index cdb8f6204..d9a676b66 100644 --- a/CarpetLib/src/interpolate_eno_3d_3tl.cc +++ b/CarpetLib/src/interpolate_eno_3d_3tl.cc @@ -129,15 +129,14 @@ void interpolate_eno_3d_3tl( bool const use_12 = t >= min(t1, t2) - eps and t <= max(t1, t2) + eps; bool const use_23 = t >= min(t2, t3) - eps and t <= max(t2, t3) + eps; assert(use_12 or use_23); -// TODO: Instead of use_12, calculate 3 coefficents that perform -// the desired 2-point interpolation, which would avoid the if -// statement in the loop, simplifying the code. + // TODO: Instead of use_12, calculate 3 coefficents that perform + // the desired 2-point interpolation, which would avoid the if + // statement in the loop, simplifying the code. -// Loop over region -#pragma omp parallel + // Loop over region + // #pragma omp parallel CCTK_LOOP3(interpolate_end_3d_3tl, i, j, k, 0, 0, 0, regiext, regjext, regkext, srcipadext, srcjpadext, srckpadext) { - T const s1 = src1[SRCIND3(i, j, k)]; T const s2 = src2[SRCIND3(i, j, k)]; T const s3 = src3[SRCIND3(i, j, k)]; diff --git a/CarpetLib/src/prolongate_3d_cc_eno_rf2.cc b/CarpetLib/src/prolongate_3d_cc_eno_rf2.cc index e7f8c2cd1..765425f39 100644 --- a/CarpetLib/src/prolongate_3d_cc_eno_rf2.cc +++ b/CarpetLib/src/prolongate_3d_cc_eno_rf2.cc @@ -1289,8 +1289,8 @@ void prolongate_3d_cc_eno_rf2( } else { // use_loopcontrol_in_operators -// Loop over fine region -#pragma omp parallel + // Loop over fine region + // #pragma omp parallel CCTK_LOOP3(prolongate_3d_cc_eno_rf2, i, j, k, 0, 0, 0, regiext, regjext, regkext, dstipadext, dstjpadext, dstkpadext) { const ptrdiff_t is = (srcioff + i) / 2; diff --git a/CarpetLib/src/prolongate_3d_dgfe_rf2.cc b/CarpetLib/src/prolongate_3d_dgfe_rf2.cc index e3eea4558..dbb522bdd 100644 --- a/CarpetLib/src/prolongate_3d_dgfe_rf2.cc +++ b/CarpetLib/src/prolongate_3d_dgfe_rf2.cc @@ -135,7 +135,7 @@ void prolongate_3d_dgfe_rf2( // Loop over fine region ptrdiff_t const i = 0; -#pragma omp parallel for collapse(2) + // #pragma omp parallel for collapse(2) // Zwicky's Intel compiler 11.1 ices on ptrdiff_t for (/*ptrdiff_t*/ int k = 0; k < regkext; k += 2 * (ORDER + 1)) { for (/*ptrdiff_t*/ int j = 0; j < regjext; j += 2 * (ORDER + 1)) { @@ -171,7 +171,7 @@ void prolongate_3d_dgfe_rf2( // Loop over fine region ptrdiff_t const j = 0; -#pragma omp parallel for collapse(2) + // #pragma omp parallel for collapse(2) // Zwicky's Intel compiler 11.1 ices on ptrdiff_t for (/*ptrdiff_t*/ int k = 0; k < regkext; k += 2 * (ORDER + 1)) { for (/*ptrdiff_t*/ int i = 0; i < regiext; i += 2 * (ORDER + 1)) { @@ -207,7 +207,7 @@ void prolongate_3d_dgfe_rf2( // Loop over fine region ptrdiff_t const k = 0; -#pragma omp parallel for collapse(2) + // #pragma omp parallel for collapse(2) // Zwicky's Intel compiler 11.1 ices on ptrdiff_t for (/*ptrdiff_t*/ int j = 0; j < regjext; j += 2 * (ORDER + 1)) { for (/*ptrdiff_t*/ int i = 0; i < regiext; i += 2 * (ORDER + 1)) { @@ -239,8 +239,8 @@ void prolongate_3d_dgfe_rf2( int const srcstr[3] = {srcdi, srcdj, srcdk}; int const dststr[3] = {dstdi, dstdj, dstdk}; -// Loop over fine region -#pragma omp parallel for collapse(3) + // Loop over fine region + // #pragma omp parallel for collapse(3) // Zwicky's Intel compiler 11.1 ices on ptrdiff_t for (/*ptrdiff_t*/ int k = 0; k < regkext; k += 2 * (ORDER + 1)) { for (/*ptrdiff_t*/ int j = 0; j < regjext; j += 2 * (ORDER + 1)) { diff --git a/CarpetLib/src/restrict_3d_cc_o3_rf2.cc b/CarpetLib/src/restrict_3d_cc_o3_rf2.cc index 06a59e0d6..7b5434ceb 100644 --- a/CarpetLib/src/restrict_3d_cc_o3_rf2.cc +++ b/CarpetLib/src/restrict_3d_cc_o3_rf2.cc @@ -118,8 +118,8 @@ void restrict_3d_cc_o3_rf2( if (not use_loopcontrol_in_operators) { -// Loop over coarse region -#pragma omp parallel for collapse(3) + // Loop over coarse region + // #pragma omp parallel for collapse(3) for (int k = 0; k < regkext; ++k) { for (int j = 0; j < regjext; ++j) { for (int i = 0; i < regiext; ++i) { @@ -215,8 +215,8 @@ void restrict_3d_cc_o3_rf2( } else { -// Loop over coarse region -#pragma omp parallel + // Loop over coarse region + // #pragma omp parallel CCTK_LOOP3(restrict_3d_cc_o3_rf2, i, j, k, 0, 0, 0, regiext, regjext, regkext, dstipadext, dstjpadext, dstkpadext) { diff --git a/CarpetLib/src/restrict_3d_cc_o5_rf2.cc b/CarpetLib/src/restrict_3d_cc_o5_rf2.cc index ebe339403..5c1a49122 100644 --- a/CarpetLib/src/restrict_3d_cc_o5_rf2.cc +++ b/CarpetLib/src/restrict_3d_cc_o5_rf2.cc @@ -167,8 +167,8 @@ void restrict_3d_cc_o5_rf2( if (not use_loopcontrol_in_operators) { -// Loop over coarse region -#pragma omp parallel for collapse(3) + // Loop over coarse region + // #pragma omp parallel for collapse(3) for (int k = 0; k < regkext; ++k) { for (int j = 0; j < regjext; ++j) { for (int i = 0; i < regiext; ++i) { @@ -201,8 +201,8 @@ void restrict_3d_cc_o5_rf2( } else { -// Loop over coarse region -#pragma omp parallel + // Loop over coarse region + // #pragma omp parallel CCTK_LOOP3(restrict_3d_cc_o5_rf2, i, j, k, 0, 0, 0, regiext, regjext, regkext, dstipadext, dstjpadext, dstkpadext) { diff --git a/CarpetLib/src/restrict_3d_cc_rf2.cc b/CarpetLib/src/restrict_3d_cc_rf2.cc index 4897260b0..39fd7f142 100644 --- a/CarpetLib/src/restrict_3d_cc_rf2.cc +++ b/CarpetLib/src/restrict_3d_cc_rf2.cc @@ -108,8 +108,8 @@ void restrict_3d_cc_rf2(T const *restrict const src, if (not use_loopcontrol_in_operators) { -// Loop over coarse region -#pragma omp parallel for collapse(3) + // Loop over coarse region + // #pragma omp parallel for collapse(3) for (int k = 0; k < regkext; ++k) { for (int j = 0; j < regjext; ++j) { for (int i = 0; i < regiext; ++i) { @@ -131,8 +131,8 @@ void restrict_3d_cc_rf2(T const *restrict const src, } else { -// Loop over coarse region -#pragma omp parallel + // Loop over coarse region + // #pragma omp parallel CCTK_LOOP3(restrict_3d_cc_rf2, i, j, k, 0, 0, 0, regiext, regjext, regkext, dstipadext, dstjpadext, dstkpadext) { diff --git a/CarpetLib/src/restrict_3d_dgfe_rf2.cc b/CarpetLib/src/restrict_3d_dgfe_rf2.cc index 01d59b383..d79ce86f6 100644 --- a/CarpetLib/src/restrict_3d_dgfe_rf2.cc +++ b/CarpetLib/src/restrict_3d_dgfe_rf2.cc @@ -118,8 +118,8 @@ void restrict_3d_dgfe_rf2( // Ensure we traverse an integer number of elements assert(all(regext % (ORDER + 1) == 0)); -// Loop over coarse region -#pragma omp parallel for collapse(3) + // Loop over coarse region + // #pragma omp parallel for collapse(3) // Zwicky's Intel compiler 11.1 ices on ptrdiff_t for (/*ptrdiff_t*/ int k = 0; k < regkext; k += ORDER + 1) { for (/*ptrdiff_t*/ int j = 0; j < regjext; j += ORDER + 1) { diff --git a/CarpetLib/src/restrict_3d_rf2.cc b/CarpetLib/src/restrict_3d_rf2.cc index d508b1cf2..c6bcd22f8 100644 --- a/CarpetLib/src/restrict_3d_rf2.cc +++ b/CarpetLib/src/restrict_3d_rf2.cc @@ -110,8 +110,8 @@ void restrict_3d_rf2(T const *restrict const src, } else { -// Loop over coarse region -#pragma omp parallel + // Loop over coarse region + // #pragma omp parallel CCTK_LOOP3(restrict_3d_rf2, i, j, k, 0, 0, 0, regiext, regjext, regkext, dstipadext, dstjpadext, dstkpadext) { diff --git a/CarpetLib/src/restrict_3d_stagger011.cc b/CarpetLib/src/restrict_3d_stagger011.cc index 510d3d0ab..5de8a3188 100644 --- a/CarpetLib/src/restrict_3d_stagger011.cc +++ b/CarpetLib/src/restrict_3d_stagger011.cc @@ -146,8 +146,8 @@ void restrict_3d_stagger011( } else { -// Loop over coarse region -#pragma omp parallel + // Loop over coarse region + // #pragma omp parallel CCTK_LOOP3(restrict_3d_stagger011, i, j, k, 0, 0, 0, regiext, regjext, regkext, dstipadext, dstjpadext, dstkpadext) { diff --git a/CarpetLib/src/restrict_3d_stagger101.cc b/CarpetLib/src/restrict_3d_stagger101.cc index 97aaa3b68..b02f95b9f 100644 --- a/CarpetLib/src/restrict_3d_stagger101.cc +++ b/CarpetLib/src/restrict_3d_stagger101.cc @@ -146,8 +146,8 @@ void restrict_3d_stagger101( } else { -// Loop over coarse region -#pragma omp parallel + // Loop over coarse region + // #pragma omp parallel CCTK_LOOP3(restrict_3d_stagger101, i, j, k, 0, 0, 0, regiext, regjext, regkext, dstipadext, dstjpadext, dstkpadext) { diff --git a/CarpetLib/src/restrict_3d_stagger110.cc b/CarpetLib/src/restrict_3d_stagger110.cc index 1912e4295..3ae4f9385 100644 --- a/CarpetLib/src/restrict_3d_stagger110.cc +++ b/CarpetLib/src/restrict_3d_stagger110.cc @@ -145,8 +145,8 @@ void restrict_3d_stagger110( } else { -// Loop over coarse region -#pragma omp parallel + // Loop over coarse region + // #pragma omp parallel CCTK_LOOP3(restrict_3d_stagger110, i, j, k, 0, 0, 0, regiext, regjext, regkext, dstipadext, dstjpadext, dstkpadext) { dst[DSTIND3(i, j, k)] = typeprops::fromreal(0); diff --git a/CarpetLib/src/restrict_3d_stagger111.cc b/CarpetLib/src/restrict_3d_stagger111.cc index 081e6c74f..af2075082 100644 --- a/CarpetLib/src/restrict_3d_stagger111.cc +++ b/CarpetLib/src/restrict_3d_stagger111.cc @@ -137,8 +137,8 @@ void restrict_3d_stagger111( } else { -// Loop over coarse region -#pragma omp parallel + // Loop over coarse region + // #pragma omp parallel CCTK_LOOP3(restrict_3d_stagger111, i, j, k, 0, 0, 0, regiext, regjext, regkext, dstipadext, dstjpadext, dstkpadext) { dst[DSTIND3(i, j, k)] = typeprops::fromreal(0); diff --git a/CarpetLib/src/restrict_3d_vc_rf2.cc b/CarpetLib/src/restrict_3d_vc_rf2.cc index a8b80dd4c..fa371ca02 100644 --- a/CarpetLib/src/restrict_3d_vc_rf2.cc +++ b/CarpetLib/src/restrict_3d_vc_rf2.cc @@ -211,8 +211,8 @@ void restrict_3d_vc_rf2(T const *restrict const src, if (not use_loopcontrol_in_operators) { -// Loop over coarse region -#pragma omp parallel for collapse(3) + // Loop over coarse region + // #pragma omp parallel for collapse(3) for (int k = 0; k < regkext; ++k) { for (int j = 0; j < regjext; ++j) { for (int i = 0; i < regiext; ++i) { @@ -240,8 +240,8 @@ void restrict_3d_vc_rf2(T const *restrict const src, } else { -// Loop over coarse region -#pragma omp parallel + // Loop over coarse region + // #pragma omp parallel CCTK_LOOP3(restrict_3d_vc_rf2, i, j, k, 0, 0, 0, regiext, regjext, regkext, dstipadext, dstjpadext, dstkpadext) { #ifdef CARPET_DEBUG diff --git a/CarpetLib/src/restrict_4d_rf2.cc b/CarpetLib/src/restrict_4d_rf2.cc index 74b256fae..0f8ab6878 100644 --- a/CarpetLib/src/restrict_4d_rf2.cc +++ b/CarpetLib/src/restrict_4d_rf2.cc @@ -101,13 +101,13 @@ void restrict_4d_rf2(T const *restrict const src, ptrdiff_t const dstkoff = dstoff[2]; ptrdiff_t const dstloff = dstoff[3]; -// Loop over coarse region -#pragma omp parallel for collapse(4) + // Loop over coarse region + // #pragma omp parallel for collapse(4) for (int l = 0; l < reglext; ++l) { for (int k = 0; k < regkext; ++k) { for (int j = 0; j < regjext; ++j) { +#pragma omp simd for (int i = 0; i < regiext; ++i) { - dst[DSTIND4(i, j, k, l)] = src[SRCIND4(2 * i, 2 * j, 2 * k, 2 * l)]; } }