Skip to content

Commit

Permalink
Merge pull request #180 from lwJi/calc_derivs
Browse files Browse the repository at this point in the history
Derivs: Use GF3D2 as input in calc_derivs
  • Loading branch information
eschnett authored Jul 25, 2023
2 parents bcd4070 + 7281f34 commit 5abb23e
Show file tree
Hide file tree
Showing 2 changed files with 101 additions and 68 deletions.
101 changes: 65 additions & 36 deletions Derivs/src/derivs.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,29 @@ using namespace Loop;

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_derivs(const vec<GF3D5<T>, dim> &dgf, const GridDescBaseDevice &grid,
const GF3D5<const T> &gf, const GF3D5layout layout,
const vect<T, dim> dx, const int deriv_order) {
calc_copy(const GF3D5<T> &gf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const GF3D2<const T> &gf0,
const vect<T, dim> dx) {
using vreal = simd<T>;
using vbool = simdl<T>;
constexpr std::size_t vsize = std::tuple_size_v<vreal>;

grid.loop_int_device<CI, CJ, CK, vsize>(
grid.nghostzones,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
const GF3D5index index(layout, p.I);
const auto val = gf0(mask, p.I);
gf.store(mask, index, val);
});
}

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_derivs(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
const GF3D5layout layout, const GridDescBaseDevice &grid,
const GF3D2<const T> &gf0, const vect<T, dim> dx,
const int deriv_order) {
using vreal = simd<T>;
using vbool = simdl<T>;
constexpr std::size_t vsize = std::tuple_size_v<vreal>;
Expand All @@ -25,7 +45,9 @@ calc_derivs(const vec<GF3D5<T>, dim> &dgf, const GridDescBaseDevice &grid,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
const GF3D5index index(layout, p.I);
const auto dval = calc_deriv<2>(gf, mask, layout, p.I, dx);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<2>(gf0, mask, p.I, dx);
gf.store(mask, index, val);
dgf.store(mask, index, dval);
});
break;
Expand All @@ -36,7 +58,9 @@ calc_derivs(const vec<GF3D5<T>, dim> &dgf, const GridDescBaseDevice &grid,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
const GF3D5index index(layout, p.I);
const auto dval = calc_deriv<4>(gf, mask, layout, p.I, dx);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<4>(gf0, mask, p.I, dx);
gf.store(mask, index, val);
dgf.store(mask, index, dval);
});
break;
Expand All @@ -48,10 +72,10 @@ calc_derivs(const vec<GF3D5<T>, dim> &dgf, const GridDescBaseDevice &grid,

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_derivs2(const vec<GF3D5<T>, dim> &dgf, const smat<GF3D5<T>, dim> &ddgf,
const GridDescBaseDevice &grid, const GF3D5<const T> &gf,
const GF3D5layout layout, const vect<T, dim> dx,
const int deriv_order) {
calc_derivs2(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
const smat<GF3D5<T>, dim> &ddgf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const GF3D2<const T> &gf0,
const vect<T, dim> dx, const int deriv_order) {
using vreal = simd<T>;
using vbool = simdl<T>;
constexpr std::size_t vsize = std::tuple_size_v<vreal>;
Expand All @@ -64,8 +88,10 @@ calc_derivs2(const vec<GF3D5<T>, dim> &dgf, const smat<GF3D5<T>, dim> &ddgf,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
const GF3D5index index(layout, p.I);
const auto dval = calc_deriv<2>(gf, mask, layout, p.I, dx);
const auto ddval = calc_deriv2<2>(gf, mask, layout, p.I, dx);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<2>(gf0, mask, p.I, dx);
const auto ddval = calc_deriv2<2>(gf0, mask, p.I, dx);
gf.store(mask, index, val);
dgf.store(mask, index, dval);
ddgf.store(mask, index, ddval);
});
Expand All @@ -77,8 +103,10 @@ calc_derivs2(const vec<GF3D5<T>, dim> &dgf, const smat<GF3D5<T>, dim> &ddgf,
[=] CCTK_DEVICE(const PointDesc &p) CCTK_ATTRIBUTE_ALWAYS_INLINE {
const vbool mask = mask_for_loop_tail<vbool>(p.i, p.imax);
const GF3D5index index(layout, p.I);
const auto dval = calc_deriv<4>(gf, mask, layout, p.I, dx);
const auto ddval = calc_deriv2<4>(gf, mask, layout, p.I, dx);
const auto val = gf0(mask, p.I);
const auto dval = calc_deriv<4>(gf0, mask, p.I, dx);
const auto ddval = calc_deriv2<4>(gf0, mask, p.I, dx);
gf.store(mask, index, val);
dgf.store(mask, index, dval);
ddgf.store(mask, index, ddval);
});
Expand All @@ -96,55 +124,56 @@ calc_derivs2(const vec<GF3D5<T>, dim> &dgf, const smat<GF3D5<T>, dim> &ddgf,
using T = CCTK_REAL;

template CCTK_DEVICE CCTK_HOST Arith::vec<Arith::simd<T>, Loop::dim>
calc_deriv<2>(const Loop::GF3D5<const T> &gf, const Arith::simdl<T> &mask,
const Loop::GF3D5layout &layout,
calc_deriv<2>(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::vec<Arith::simd<T>, Loop::dim>
calc_deriv<4>(const Loop::GF3D5<const T> &gf, const Arith::simdl<T> &mask,
const Loop::GF3D5layout &layout,
calc_deriv<4>(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);

template CCTK_DEVICE CCTK_HOST Arith::vec<T, Loop::dim>
calc_deriv<2>(const Loop::GF3D5<const T> &gf, const Loop::GF3D5layout &layout,
calc_deriv<2>(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::vec<T, Loop::dim>
calc_deriv<4>(const Loop::GF3D5<const T> &gf, const Loop::GF3D5layout &layout,
calc_deriv<4>(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);

template CCTK_DEVICE CCTK_HOST Arith::smat<Arith::simd<T>, Loop::dim>
calc_deriv2<2>(const Loop::GF3D5<const T> &gf, const Arith::simdl<T> &mask,
const Loop::GF3D5layout &layout,
calc_deriv2<2>(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::smat<Arith::simd<T>, Loop::dim>
calc_deriv2<4>(const Loop::GF3D5<const T> &gf, const Arith::simdl<T> &mask,
const Loop::GF3D5layout &layout,
calc_deriv2<4>(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);

template CCTK_DEVICE CCTK_HOST Arith::smat<T, Loop::dim>
calc_deriv2<2>(const Loop::GF3D5<const T> &gf, const Loop::GF3D5layout &layout,
calc_deriv2<2>(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);
template CCTK_DEVICE CCTK_HOST Arith::smat<T, Loop::dim>
calc_deriv2<4>(const Loop::GF3D5<const T> &gf, const Loop::GF3D5layout &layout,
calc_deriv2<4>(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx);

template void calc_derivs<0, 0, 0>(const vec<GF3D5<T>, dim> &dgf,
const GridDescBaseDevice &grid,
const GF3D5<const T> &gf,
const GF3D5layout layout,
const vect<T, dim> dx,
const int deriv_order);

template void calc_derivs2<0, 0, 0>(
const vec<GF3D5<T>, dim> &dgf, const smat<GF3D5<T>, dim> &ddgf,
const GridDescBaseDevice &grid, const GF3D5<const T> &gf,
const GF3D5layout layout, const vect<T, dim> dx, const int deriv_order);
template void calc_copy<0, 0, 0>(const GF3D5<T> &gf, const GF3D5layout layout,
const GridDescBaseDevice &grid,
const GF3D2<const T> &gf0,
const vect<T, dim> dx);

template void
calc_derivs<0, 0, 0>(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
const GF3D5layout layout, const GridDescBaseDevice &grid,
const GF3D2<const T> &gf0, const vect<T, dim> dx,
const int deriv_order);

template void
calc_derivs2<0, 0, 0>(const GF3D5<T> &gf, const vec<GF3D5<T>, dim> &dgf,
const smat<GF3D5<T>, dim> &ddgf, const GF3D5layout layout,
const GridDescBaseDevice &grid, const GF3D2<const T> &gf0,
const vect<T, dim> dx, const int deriv_order);

} // namespace Derivs
68 changes: 36 additions & 32 deletions Derivs/src/derivs.hxx
Original file line number Diff line number Diff line change
Expand Up @@ -332,19 +332,18 @@ inline CCTK_ATTRIBUTE_ALWAYS_INLINE
template <int deriv_order, typename T>
inline CCTK_ATTRIBUTE_ALWAYS_INLINE
CCTK_DEVICE CCTK_HOST Arith::vec<Arith::simd<T>, Loop::dim>
calc_deriv(const Loop::GF3D5<const T> &gf, const Arith::simdl<T> &mask,
const Loop::GF3D5layout &layout,
calc_deriv(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx) {
using namespace Arith;
using namespace Loop;
// We use explicit index calculations to avoid unnecessary integer
// multiplications
const T *restrict const ptr = &gf(layout, I);
const T *restrict const ptr = &gf(I);
const std::array<std::ptrdiff_t, Loop::dim> offsets{
layout.delta(1, 0, 0),
layout.delta(0, 1, 0),
layout.delta(0, 0, 1),
gf.delta(1, 0, 0),
gf.delta(0, 1, 0),
gf.delta(0, 0, 1),
};
return {
detail::deriv1d<deriv_order>(
Expand All @@ -368,18 +367,18 @@ inline CCTK_ATTRIBUTE_ALWAYS_INLINE
template <int deriv_order, typename T>
inline CCTK_ATTRIBUTE_ALWAYS_INLINE
CCTK_DEVICE CCTK_HOST Arith::vec<T, Loop::dim>
calc_deriv(const Loop::GF3D5<const T> &gf, const Loop::GF3D5layout &layout,
calc_deriv(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx) {
using namespace Arith;
using namespace Loop;
// We use explicit index calculations to avoid unnecessary integer
// multiplications
const T *restrict const ptr = &gf(layout, I);
const T *restrict const ptr = &gf(I);
const std::array<std::ptrdiff_t, Loop::dim> offsets{
layout.delta(1, 0, 0),
layout.delta(0, 1, 0),
layout.delta(0, 0, 1),
gf.delta(1, 0, 0),
gf.delta(0, 1, 0),
gf.delta(0, 0, 1),
};
return {
detail::deriv1d<deriv_order>(
Expand All @@ -400,19 +399,18 @@ inline CCTK_ATTRIBUTE_ALWAYS_INLINE
template <int deriv_order, typename T>
inline CCTK_ATTRIBUTE_ALWAYS_INLINE
CCTK_DEVICE CCTK_HOST Arith::smat<Arith::simd<T>, Loop::dim>
calc_deriv2(const Loop::GF3D5<const T> &gf, const Arith::simdl<T> &mask,
const Loop::GF3D5layout &layout,
calc_deriv2(const Loop::GF3D2<const T> &gf, const Arith::simdl<T> &mask,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx) {
using namespace Arith;
using namespace Loop;
// We use explicit index calculations to avoid unnecessary integer
// multiplications
const T *restrict const ptr = &gf(layout, I);
const T *restrict const ptr = &gf(I);
const std::array<std::ptrdiff_t, Loop::dim> offsets{
layout.delta(1, 0, 0),
layout.delta(0, 1, 0),
layout.delta(0, 0, 1),
gf.delta(1, 0, 0),
gf.delta(0, 1, 0),
gf.delta(0, 0, 1),
};
return {
detail::deriv2_1d<deriv_order>(
Expand Down Expand Up @@ -451,18 +449,18 @@ inline CCTK_ATTRIBUTE_ALWAYS_INLINE
template <int deriv_order, typename T>
inline CCTK_ATTRIBUTE_ALWAYS_INLINE
CCTK_DEVICE CCTK_HOST Arith::smat<T, Loop::dim>
calc_deriv2(const Loop::GF3D5<const T> &gf, const Loop::GF3D5layout &layout,
calc_deriv2(const Loop::GF3D2<const T> &gf,
const Arith::vect<int, Loop::dim> &I,
const Arith::vect<T, Loop::dim> &dx) {
using namespace Arith;
using namespace Loop;
// We use explicit index calculations to avoid unnecessary integer
// multiplications
const T *restrict const ptr = &gf(layout, I);
const T *restrict const ptr = &gf(I);
const std::array<std::ptrdiff_t, Loop::dim> offsets{
layout.delta(1, 0, 0),
layout.delta(0, 1, 0),
layout.delta(0, 0, 1),
gf.delta(1, 0, 0),
gf.delta(0, 1, 0),
gf.delta(0, 0, 1),
};
return {
detail::deriv2_1d<deriv_order>(
Expand Down Expand Up @@ -501,18 +499,24 @@ inline CCTK_ATTRIBUTE_ALWAYS_INLINE

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_derivs(const Arith::vec<Loop::GF3D5<T>, Loop::dim> &dgf,
const Loop::GridDescBaseDevice &grid,
const Loop::GF3D5<const T> &gf, const Loop::GF3D5layout layout,
const Arith::vect<T, Loop::dim> dx, const int deriv_order);
calc_copy(const Loop::GF3D5<T> &gf, const Loop::GF3D5layout layout,
const Loop::GridDescBaseDevice &grid, const Loop::GF3D2<const T> &gf0,
const Arith::vect<T, Loop::dim> dx);

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void
calc_derivs2(const Arith::vec<Loop::GF3D5<T>, Loop::dim> &dgf,
const Arith::smat<Loop::GF3D5<T>, Loop::dim> &ddgf,
const Loop::GridDescBaseDevice &grid,
const Loop::GF3D5<const T> &gf, const Loop::GF3D5layout layout,
const Arith::vect<T, Loop::dim> dx, const int deriv_order);
CCTK_ATTRIBUTE_NOINLINE void calc_derivs(
const Loop::GF3D5<T> &gf, const Arith::vec<Loop::GF3D5<T>, Loop::dim> &dgf,
const Loop::GF3D5layout layout, const Loop::GridDescBaseDevice &grid,
const Loop::GF3D2<const T> &gf0, const Arith::vect<T, Loop::dim> dx,
const int deriv_order);

template <int CI, int CJ, int CK, typename T>
CCTK_ATTRIBUTE_NOINLINE void calc_derivs2(
const Loop::GF3D5<T> &gf, const Arith::vec<Loop::GF3D5<T>, Loop::dim> &dgf,
const Arith::smat<Loop::GF3D5<T>, Loop::dim> &ddgf,
const Loop::GF3D5layout layout, const Loop::GridDescBaseDevice &grid,
const Loop::GF3D2<const T> &gf0, const Arith::vect<T, Loop::dim> dx,
const int deriv_order);

} // namespace Derivs

Expand Down

0 comments on commit 5abb23e

Please sign in to comment.