Skip to content

Commit

Permalink
Loop: Pass number of iterations as template argument
Browse files Browse the repository at this point in the history
  • Loading branch information
eschnett committed Jul 13, 2023
1 parent 5e84cd5 commit b5bf9ff
Show file tree
Hide file tree
Showing 2 changed files with 60 additions and 82 deletions.
32 changes: 16 additions & 16 deletions Loop/src/loop.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -145,21 +145,21 @@ public:
}

// Loop over a given box
template <int CI, int CJ, int CK, int VS = 1, typename F>
void loop_box(const F &f, const vect<int, dim> &restrict bnd_min,
template <int CI, int CJ, int CK, int VS = 1, int N = 1, typename F>
void loop_box(const vect<int, dim> &restrict bnd_min,
const vect<int, dim> &restrict bnd_max,
const vect<int, dim> &restrict loop_min,
const vect<int, dim> &restrict loop_max,
const int niters = 1) const {
const vect<int, dim> &restrict loop_max, const F &f) const {
static_assert(CI == 0 || CI == 1);
static_assert(CJ == 0 || CJ == 1);
static_assert(CK == 0 || CK == 1);
static_assert(N >= 0);
static_assert(VS > 0);

if (niters <= 0 || any(loop_max <= loop_min))
if (N == 0 || any(loop_max <= loop_min))
return;

for (int iter = 0; iter < niters; ++iter) {
for (int iter = 0; iter < N; ++iter) {
for (int k = loop_min[2]; k < loop_max[2]; ++k) {
for (int j = loop_min[1]; j < loop_max[1]; ++j) {
#pragma omp simd
Expand Down Expand Up @@ -238,30 +238,30 @@ public:
}

// Loop over all points
template <int CI, int CJ, int CK, int VS = 1, typename F>
template <int CI, int CJ, int CK, int VS = 1, int N = 1, typename F>
inline CCTK_ATTRIBUTE_ALWAYS_INLINE void
loop_all(const vect<int, dim> &group_nghostzones, const F &f) const {
vect<int, dim> bnd_min, bnd_max;
boundary_box<CI, CJ, CK>(group_nghostzones, bnd_min, bnd_max);
vect<int, dim> imin, imax;
box_all<CI, CJ, CK>(group_nghostzones, imin, imax);
loop_box<CI, CJ, CK, VS>(f, bnd_min, bnd_max, imin, imax);
loop_box<CI, CJ, CK, VS, N>(bnd_min, bnd_max, imin, imax, f);
}

// Loop over all interior points
template <int CI, int CJ, int CK, int VS = 1, typename F>
template <int CI, int CJ, int CK, int VS = 1, int N = 1, typename F>
inline CCTK_ATTRIBUTE_ALWAYS_INLINE void
loop_int(const vect<int, dim> &group_nghostzones, const F &f) const {
vect<int, dim> bnd_min, bnd_max;
boundary_box<CI, CJ, CK>(group_nghostzones, bnd_min, bnd_max);
vect<int, dim> imin, imax;
box_int<CI, CJ, CK>(group_nghostzones, imin, imax);
loop_box<CI, CJ, CK, VS>(f, bnd_min, bnd_max, imin, imax);
loop_box<CI, CJ, CK, VS, N>(bnd_min, bnd_max, imin, imax, f);
}

// Loop over a part of the domain. Loop over the interior first,
// then faces, then edges, then corners.
template <int CI, int CJ, int CK, int VS = 1, typename F>
template <int CI, int CJ, int CK, int VS = 1, int N = 1, typename F>
inline CCTK_ATTRIBUTE_ALWAYS_INLINE void
loop_there(const vect<int, dim> &group_nghostzones,
const vect<vect<vect<bool, dim>, dim>, dim> &there,
Expand Down Expand Up @@ -309,7 +309,7 @@ public:
imax[d] = min(tmax[d], imax[d]);
}

loop_box<CI, CJ, CK, VS>(f, bnd_min, bnd_max, imin, imax);
loop_box<CI, CJ, CK, VS, N>(bnd_min, bnd_max, imin, imax, f);
}
} // if rank
}
Expand All @@ -322,7 +322,7 @@ public:
// Loop over all outer boundary points. This excludes ghost faces, but
// includes ghost edges/corners on non-ghost faces. Loop over faces first,
// then edges, then corners.
template <int CI, int CJ, int CK, int VS = 1, typename F>
template <int CI, int CJ, int CK, int VS = 1, int N = 1, typename F>
inline CCTK_ATTRIBUTE_ALWAYS_INLINE void
loop_bnd(const vect<int, dim> &group_nghostzones, const F &f) const {
vect<int, dim> bnd_min, bnd_max;
Expand Down Expand Up @@ -368,7 +368,7 @@ public:
imax[d] = min(tmax[d], imax[d]);
}

loop_box<CI, CJ, CK, VS>(f, bnd_min, bnd_max, imin, imax);
loop_box<CI, CJ, CK, VS, N>(bnd_min, bnd_max, imin, imax, f);
}
} // if rank
}
Expand Down Expand Up @@ -462,7 +462,7 @@ public:

// Loop over all outer ghost points. This excludes ghost edges/corners on
// non-ghost faces. Loop over faces first, then edges, then corners.
template <int CI, int CJ, int CK, int VS = 1, typename F>
template <int CI, int CJ, int CK, int VS = 1, int N = 1, typename F>
inline CCTK_ATTRIBUTE_ALWAYS_INLINE void
loop_ghosts(const vect<int, dim> &group_nghostzones, const F &f) const {
vect<int, dim> bnd_min, bnd_max;
Expand Down Expand Up @@ -508,7 +508,7 @@ public:
imax[d] = min(tmax[d], imax[d]);
}

loop_box<CI, CJ, CK, VS>(f, bnd_min, bnd_max, imin, imax);
loop_box<CI, CJ, CK, VS, N>(bnd_min, bnd_max, imin, imax, f);
}
} // if rank
}
Expand Down
110 changes: 44 additions & 66 deletions Loop/src/loop_device.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -39,32 +39,33 @@ public:
GridDescBaseDevice(const cGH *cctkGH) : GridDescBase(cctkGH) {}

// Loop over a given box
template <int CI, int CJ, int CK, int VS = 1, int NT = AMREX_GPU_MAX_THREADS,
typename F>
void loop_box_device(const F &f, const vect<int, dim> &restrict bnd_min,
template <int CI, int CJ, int CK, int VS = 1, int N = 1,
int NT = AMREX_GPU_MAX_THREADS, typename F>
void loop_box_device(const vect<int, dim> &restrict bnd_min,
const vect<int, dim> &restrict bnd_max,
const vect<int, dim> &restrict loop_min,
const vect<int, dim> &restrict loop_max,
const int niters = 1) const {
const F &f) const {
#ifndef AMREX_USE_GPU

return this->template loop_box<CI, CJ, CK, VS>(f, bnd_min, bnd_max,
loop_min, loop_max, niters);
return this->template loop_box<CI, CJ, CK, VS, N>(bnd_min, bnd_max,
loop_min, loop_max, f);

#else
// Run on GPU

static_assert(CI == 0 || CI == 1);
static_assert(CJ == 0 || CJ == 1);
static_assert(CK == 0 || CK == 1);
static_assert(N >= 0);
static_assert(VS > 0);
static_assert(NT > 0);

if (niters <= 0 || any(loop_max <= loop_min))
return;

// Run on GPU
static_assert(VS == 1, "Only vector size 1 is supported on GPUs");

if (N == 0 || any(loop_max <= loop_min))
return;

// For some reason, the arguments loop_min and loop_max cannot be captured
// correctly in CUDA, but copies of them can
const auto bnd_min1 = bnd_min;
Expand All @@ -80,47 +81,24 @@ public:
CJ ? amrex::IndexType::CELL : amrex::IndexType::NODE,
CK ? amrex::IndexType::CELL : amrex::IndexType::NODE));

if (niters == 1)
amrex::ParallelFor<NT>(
box,
[=, *this] CCTK_DEVICE(const int i, const int j,
const int k) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vect<int, dim> I = {i, j, k};
const vect<int, dim> NI =
vect<int, dim>(I > bnd_max1 - 1) - vect<int, dim>(I < bnd_min1);
const vect<int, dim> I0 =
if_else(NI == 0, 0, if_else(NI < 0, bnd_min1, bnd_max1 - 1));
const vect<int, dim> BI = vect<int, dim>(I == bnd_max1 - 1) -
vect<int, dim>(I == bnd_min1);
constexpr int iter = 0;
amrex::ParallelFor<NT>(
box, [=, *this] CCTK_DEVICE(const int i, const int j,
const int k) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vect<int, dim> I = {i, j, k};
const vect<int, dim> NI =
vect<int, dim>(I > bnd_max1 - 1) - vect<int, dim>(I < bnd_min1);
const vect<int, dim> I0 =
if_else(NI == 0, 0, if_else(NI < 0, bnd_min1, bnd_max1 - 1));
const vect<int, dim> BI =
vect<int, dim>(I == bnd_max1 - 1) - vect<int, dim>(I == bnd_min1);
for (int iter = 0; iter < N; ++iter) {
const PointDesc p =
point_desc({CI, CJ, CK}, I, iter, NI, I0, BI, bnd_min1,
bnd_max1, loop_min1, loop_max1);
f(p);
});
else
amrex::ParallelFor<NT>(
box,
[=, *this] CCTK_DEVICE(const int i, const int j,
const int k) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vect<int, dim> I = {i, j, k};
const vect<int, dim> NI =
vect<int, dim>(I > bnd_max1 - 1) - vect<int, dim>(I < bnd_min1);
const vect<int, dim> I0 =
if_else(NI == 0, 0, if_else(NI < 0, bnd_min1, bnd_max1 - 1));
const vect<int, dim> BI = vect<int, dim>(I == bnd_max1 - 1) -
vect<int, dim>(I == bnd_min1);
for (int iter = 0; iter < niters; ++iter) {
const PointDesc p =
point_desc({CI, CJ, CK}, I, iter, NI, I0, BI, bnd_min1,
bnd_max1, loop_min1, loop_max1);
f(p);
}
});

#endif
}
});

#ifdef AMREX_USE_GPU
static const bool gpu_sync_after_every_kernel = []() {
int type;
const void *const ptr =
Expand All @@ -137,33 +115,33 @@ public:
}

// Loop over all points
template <int CI, int CJ, int CK, int VS = 1, int NT = AMREX_GPU_MAX_THREADS,
typename F>
template <int CI, int CJ, int CK, int VS = 1, int N = 1,
int NT = AMREX_GPU_MAX_THREADS, typename F>
inline CCTK_ATTRIBUTE_ALWAYS_INLINE void
loop_all_device(const vect<int, dim> &group_nghostzones, const F &f) const {
vect<int, dim> bnd_min, bnd_max;
boundary_box<CI, CJ, CK>(group_nghostzones, bnd_min, bnd_max);
vect<int, dim> imin, imax;
box_all<CI, CJ, CK>(group_nghostzones, imin, imax);
loop_box_device<CI, CJ, CK, VS, NT>(f, bnd_min, bnd_max, imin, imax);
loop_box_device<CI, CJ, CK, VS, N, NT>(bnd_min, bnd_max, imin, imax, f);
}

// Loop over all interior points
template <int CI, int CJ, int CK, int VS = 1, int NT = AMREX_GPU_MAX_THREADS,
typename F>
template <int CI, int CJ, int CK, int VS = 1, int N = 1,
int NT = AMREX_GPU_MAX_THREADS, typename F>
inline CCTK_ATTRIBUTE_ALWAYS_INLINE void
loop_int_device(const vect<int, dim> &group_nghostzones, const F &f) const {
vect<int, dim> bnd_min, bnd_max;
boundary_box<CI, CJ, CK>(group_nghostzones, bnd_min, bnd_max);
vect<int, dim> imin, imax;
box_int<CI, CJ, CK>(group_nghostzones, imin, imax);
loop_box_device<CI, CJ, CK, VS, NT>(f, bnd_min, bnd_max, imin, imax);
loop_box_device<CI, CJ, CK, VS, N, NT>(bnd_min, bnd_max, imin, imax, f);
}

// Loop over a part of the domain. Loop over the interior first,
// then faces, then edges, then corners.
template <int CI, int CJ, int CK, int VS = 1, int NT = AMREX_GPU_MAX_THREADS,
typename F>
template <int CI, int CJ, int CK, int VS = 1, int N = 1,
int NT = AMREX_GPU_MAX_THREADS, typename F>
inline CCTK_ATTRIBUTE_ALWAYS_INLINE void
loop_there_device(const vect<int, dim> &group_nghostzones,
const vect<vect<vect<bool, dim>, dim>, dim> &there,
Expand Down Expand Up @@ -211,8 +189,8 @@ public:
imax[d] = min(tmax[d], imax[d]);
}

loop_box_device<CI, CJ, CK, VS, NT>(f, bnd_min, bnd_max, imin,
imax);
loop_box_device<CI, CJ, CK, VS, N, NT>(bnd_min, bnd_max, imin,
imax, f);
}
} // if rank
}
Expand All @@ -225,8 +203,8 @@ public:
// Loop over all outer boundary points. This excludes ghost faces, but
// includes ghost edges/corners on non-ghost faces. Loop over faces first,
// then edges, then corners.
template <int CI, int CJ, int CK, int VS = 1, int NT = AMREX_GPU_MAX_THREADS,
typename F>
template <int CI, int CJ, int CK, int VS = 1, int N = 1,
int NT = AMREX_GPU_MAX_THREADS, typename F>
inline CCTK_ATTRIBUTE_ALWAYS_INLINE void
loop_bnd_device(const vect<int, dim> &group_nghostzones, const F &f) const {
vect<int, dim> bnd_min, bnd_max;
Expand Down Expand Up @@ -272,8 +250,8 @@ public:
imax[d] = min(tmax[d], imax[d]);
}

loop_box_device<CI, CJ, CK, VS, NT>(f, bnd_min, bnd_max, imin,
imax);
loop_box_device<CI, CJ, CK, VS, N, NT>(bnd_min, bnd_max, imin,
imax, f);
}
} // if rank
}
Expand All @@ -286,7 +264,7 @@ public:
#if 0
// Loop over all outer ghost points. This includes ghost edges/corners on
// non-ghost faces. Loop over faces first, then edges, then corners.
template <int CI, int CJ, int CK, int VS = 1, int NT = AMREX_GPU_MAX_THREADS,
template <int CI, int CJ, int CK, int N=1,int VS = 1, int NT = AMREX_GPU_MAX_THREADS,
typename F>
inline CCTK_ATTRIBUTE_ALWAYS_INLINE void
loop_ghosts_inclusive_device(const vect<int, dim> &group_nghostzones,
Expand Down Expand Up @@ -335,7 +313,7 @@ public:
imax[d] = std::min(tmax[d], imax[d]);
}

loop_box_boundary_device<CI, CJ, CK, VS, NT>(f, imin, imax,
loop_box_boundary_device<CI, CJ, CK,VS, N, NT>( imin, imax,
inormal);
}
} // if rank
Expand All @@ -348,8 +326,8 @@ public:

// Loop over all outer ghost points. This excludes ghost edges/corners on
// non-ghost faces. Loop over faces first, then edges, then corners.
template <int CI, int CJ, int CK, int VS = 1, int NT = AMREX_GPU_MAX_THREADS,
typename F>
template <int CI, int CJ, int CK, int VS = 1, int N = 1,
int NT = AMREX_GPU_MAX_THREADS, typename F>
inline CCTK_ATTRIBUTE_ALWAYS_INLINE void
loop_ghosts_device(const vect<int, dim> &group_nghostzones,
const F &f) const {
Expand Down Expand Up @@ -396,8 +374,8 @@ public:
imax[d] = min(tmax[d], imax[d]);
}

loop_box_device<CI, CJ, CK, VS, NT>(f, bnd_min, bnd_max, imin,
imax);
loop_box_device<CI, CJ, CK, VS, N, NT>(bnd_min, bnd_max, imin,
imax, f);
}
} // if rank
}
Expand Down

0 comments on commit b5bf9ff

Please sign in to comment.