Skip to content

Commit

Permalink
Fix launch nested Tile tests
Browse files Browse the repository at this point in the history
  • Loading branch information
MrBurmark committed Dec 30, 2024
1 parent fac8dec commit e7a7273
Show file tree
Hide file tree
Showing 3 changed files with 69 additions and 68 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,17 +20,18 @@ void LaunchNestedTileDirectTestImpl(INDEX_TYPE M)
constexpr int tile_size_y = 3;
constexpr int tile_size_z = 4;

constexpr int threads_x = 2*tile_size_x;
constexpr int threads_y = 3*tile_size_y;
constexpr int threads_z = 4*tile_size_z;
constexpr int threads_x = tile_size_x;
constexpr int threads_y = tile_size_y;
constexpr int threads_z = tile_size_z;

constexpr int blocks_x = 4;
constexpr int blocks_y = 5;
constexpr int blocks_z = 6;

RAJA::TypedRangeSegment<INDEX_TYPE> r1(0, tile_size_x*M);
RAJA::TypedRangeSegment<INDEX_TYPE> r2(0, tile_size_y*M);
RAJA::TypedRangeSegment<INDEX_TYPE> r3(0, tile_size_z*M);
// Use fewer than the number of teams and threads
RAJA::TypedRangeSegment<INDEX_TYPE> r1(0, ((blocks_x-1)*threads_x+1)*M);
RAJA::TypedRangeSegment<INDEX_TYPE> r2(0, ((blocks_y-1)*threads_y+1)*M);
RAJA::TypedRangeSegment<INDEX_TYPE> r3(0, ((blocks_z-1)*threads_z+1)*M);

INDEX_TYPE N1 = static_cast<INDEX_TYPE>(r1.end() - r1.begin());
INDEX_TYPE N2 = static_cast<INDEX_TYPE>(r2.end() - r2.begin());
Expand All @@ -56,9 +57,10 @@ void LaunchNestedTileDirectTestImpl(INDEX_TYPE M)
&check_array,
&test_array);

if ( RAJA::stripIndexType(N) > 0 ) {
std::iota(test_array, test_array + data_len, 0);
working_res.memset(working_array, 0, sizeof(INDEX_TYPE) * data_len);

std::iota(test_array, test_array + RAJA::stripIndexType(N), 0);
if ( RAJA::stripIndexType(N) > 0 ) {

constexpr int DIM = 3;
using layout_t = RAJA::Layout<DIM, INDEX_TYPE,DIM-1>;
Expand All @@ -78,7 +80,7 @@ void LaunchNestedTileDirectTestImpl(INDEX_TYPE M)

auto idx = tx + N1 * (ty + N2 * tz);

Aview(tz, ty, tx) = static_cast<INDEX_TYPE>(idx);
Aview(tz, ty, tx) += static_cast<INDEX_TYPE>(idx);

});
});
Expand All @@ -90,17 +92,13 @@ void LaunchNestedTileDirectTestImpl(INDEX_TYPE M)
});
} else { // zero-length segment

memset(static_cast<void*>(test_array), 0, sizeof(INDEX_TYPE) * data_len);

working_res.memcpy(working_array, test_array, sizeof(INDEX_TYPE) * data_len);

RAJA::launch<LAUNCH_POLICY>
(RAJA::LaunchParams(RAJA::Teams(blocks_x, blocks_y, blocks_z), RAJA::Threads(blocks_x, blocks_y ,blocks_z)),
(RAJA::LaunchParams(RAJA::Teams(blocks_x, blocks_y, blocks_z), RAJA::Threads(threads_x, threads_y,threads_z)),
[=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) {

RAJA::tile<TEAM_Z_POLICY>(ctx, threads_z, r3, [&](RAJA::TypedRangeSegment<INDEX_TYPE> const &z_tile) {
RAJA::tile<TEAM_Y_POLICY>(ctx, threads_y, r2, [&](RAJA::TypedRangeSegment<INDEX_TYPE> const &y_tile) {
RAJA::tile<TEAM_X_POLICY>(ctx, threads_x, r1, [&](RAJA::TypedRangeSegment<INDEX_TYPE> const &x_tile) {
RAJA::tile<TEAM_Z_POLICY>(ctx, tile_size_z, r3, [&](RAJA::TypedRangeSegment<INDEX_TYPE> const &z_tile) {
RAJA::tile<TEAM_Y_POLICY>(ctx, tile_size_y, r2, [&](RAJA::TypedRangeSegment<INDEX_TYPE> const &y_tile) {
RAJA::tile<TEAM_X_POLICY>(ctx, tile_size_x, r1, [&](RAJA::TypedRangeSegment<INDEX_TYPE> const &x_tile) {

RAJA::loop<THREAD_Z_POLICY>(ctx, z_tile, [&](INDEX_TYPE RAJA_UNUSED_ARG(tz)) {
RAJA::loop<THREAD_Y_POLICY>(ctx, y_tile, [&](INDEX_TYPE RAJA_UNUSED_ARG(ty)) {
Expand All @@ -119,6 +117,7 @@ void LaunchNestedTileDirectTestImpl(INDEX_TYPE M)
}

working_res.memcpy(check_array, working_array, sizeof(INDEX_TYPE) * data_len);
working_res.wait();

if (RAJA::stripIndexType(N) > 0) {

Expand Down Expand Up @@ -153,13 +152,13 @@ TYPED_TEST_P(LaunchNestedTileDirectTest, RangeSegmentTeams)
using WORKING_RES = typename camp::at<TypeParam, camp::num<1>>::type;
using LAUNCH_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<0>>::type;

using THREAD_X_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<1>>::type;
using THREAD_Y_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<2>>::type;
using THREAD_Z_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<3>>::type;
using TEAM_Z_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<1>>::type;
using TEAM_Y_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<2>>::type;
using TEAM_X_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<3>>::type;

using TEAM_X_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<4>>::type;
using TEAM_Y_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<5>>::type;
using TEAM_Z_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<6>>::type;
using THREAD_Z_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<4>>::type;
using THREAD_Y_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<5>>::type;
using THREAD_X_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<6>>::type;


// test zero-length range segment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,18 +16,22 @@ template <typename INDEX_TYPE, typename WORKING_RES, typename LAUNCH_POLICY,
void LaunchNestedTileLoopTestImpl(INDEX_TYPE M)
{

constexpr int threads_x = 2;
constexpr int threads_y = 3;
constexpr int threads_z = 4;
constexpr int tile_size_x = 3;
constexpr int tile_size_y = 4;
constexpr int tile_size_z = 5;

constexpr int threads_x = tile_size_x-1;
constexpr int threads_y = tile_size_y-1;
constexpr int threads_z = tile_size_z-1;

constexpr int blocks_x = 4;
constexpr int blocks_y = 5;
constexpr int blocks_z = 6;

//Add one to we check the bounds checking capability
RAJA::TypedRangeSegment<INDEX_TYPE> r1(0, threads_x*M + 1);
RAJA::TypedRangeSegment<INDEX_TYPE> r2(0, threads_y*M + 1);
RAJA::TypedRangeSegment<INDEX_TYPE> r3(0, threads_z*M + 1);
// Use more than the number of teams and threads
RAJA::TypedRangeSegment<INDEX_TYPE> r1(0, (2*blocks_x*threads_x+1)*M);
RAJA::TypedRangeSegment<INDEX_TYPE> r2(0, (2*blocks_y*threads_y+1)*M);
RAJA::TypedRangeSegment<INDEX_TYPE> r3(0, (2*blocks_z*threads_z+1)*M);

INDEX_TYPE N1 = static_cast<INDEX_TYPE>(r1.end() - r1.begin());
INDEX_TYPE N2 = static_cast<INDEX_TYPE>(r2.end() - r2.begin());
Expand All @@ -53,9 +57,10 @@ void LaunchNestedTileLoopTestImpl(INDEX_TYPE M)
&check_array,
&test_array);

if ( RAJA::stripIndexType(N) > 0 ) {
std::iota(test_array, test_array + data_len, 0);
working_res.memset(working_array, 0, sizeof(INDEX_TYPE) * data_len);

std::iota(test_array, test_array + RAJA::stripIndexType(N), 0);
if ( RAJA::stripIndexType(N) > 0 ) {

constexpr int DIM = 3;
using layout_t = RAJA::Layout<DIM, INDEX_TYPE,DIM-1>;
Expand All @@ -65,17 +70,17 @@ void LaunchNestedTileLoopTestImpl(INDEX_TYPE M)
(RAJA::LaunchParams(RAJA::Teams(blocks_x, blocks_y, blocks_z), RAJA::Threads(threads_x, threads_y,threads_z)),
[=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) {

RAJA::tile<TEAM_Z_POLICY>(ctx, threads_z, r3, [&](RAJA::TypedRangeSegment<INDEX_TYPE> const &z_tile) {
RAJA::tile<TEAM_Y_POLICY>(ctx, threads_y, r2, [&](RAJA::TypedRangeSegment<INDEX_TYPE> const &y_tile) {
RAJA::tile<TEAM_X_POLICY>(ctx, threads_x, r1, [&](RAJA::TypedRangeSegment<INDEX_TYPE> const &x_tile) {
RAJA::tile<TEAM_Z_POLICY>(ctx, tile_size_z, r3, [&](RAJA::TypedRangeSegment<INDEX_TYPE> const &z_tile) {
RAJA::tile<TEAM_Y_POLICY>(ctx, tile_size_y, r2, [&](RAJA::TypedRangeSegment<INDEX_TYPE> const &y_tile) {
RAJA::tile<TEAM_X_POLICY>(ctx, tile_size_x, r1, [&](RAJA::TypedRangeSegment<INDEX_TYPE> const &x_tile) {

RAJA::loop<THREAD_Z_POLICY>(ctx, z_tile, [&](INDEX_TYPE tz) {
RAJA::loop<THREAD_Y_POLICY>(ctx, y_tile, [&](INDEX_TYPE ty) {
RAJA::loop<THREAD_X_POLICY>(ctx, x_tile, [&](INDEX_TYPE tx) {

auto idx = tx + N1 * (ty + N2 * tz);

Aview(tz, ty, tx) = static_cast<INDEX_TYPE>(idx);
Aview(tz, ty, tx) += static_cast<INDEX_TYPE>(idx);
});
});
});
Expand All @@ -86,27 +91,20 @@ void LaunchNestedTileLoopTestImpl(INDEX_TYPE M)
});
} else { // zero-length segment

memset(static_cast<void*>(test_array), 0, sizeof(INDEX_TYPE) * data_len);

working_res.memcpy(working_array, test_array, sizeof(INDEX_TYPE) * data_len);

RAJA::launch<LAUNCH_POLICY>
(RAJA::LaunchParams(RAJA::Teams(blocks_x, blocks_y, blocks_z), RAJA::Threads(blocks_x, blocks_y ,blocks_z)),
(RAJA::LaunchParams(RAJA::Teams(blocks_x, blocks_y, blocks_z), RAJA::Threads(threads_x, threads_y,threads_z)),
[=] RAJA_HOST_DEVICE(RAJA::LaunchContext ctx) {

RAJA::tile<TEAM_Z_POLICY>(ctx, threads_z, r3, [&](RAJA::TypedRangeSegment<INDEX_TYPE> const &z_tile) {
RAJA::tile<TEAM_Y_POLICY>(ctx, threads_y, r2, [&](RAJA::TypedRangeSegment<INDEX_TYPE> const &y_tile) {
RAJA::tile<TEAM_X_POLICY>(ctx, threads_x, r1, [&](RAJA::TypedRangeSegment<INDEX_TYPE> const &x_tile) {
RAJA::tile<TEAM_Z_POLICY>(ctx, tile_size_z, r3, [&](RAJA::TypedRangeSegment<INDEX_TYPE> const &z_tile) {
RAJA::tile<TEAM_Y_POLICY>(ctx, tile_size_y, r2, [&](RAJA::TypedRangeSegment<INDEX_TYPE> const &y_tile) {
RAJA::tile<TEAM_X_POLICY>(ctx, tile_size_x, r1, [&](RAJA::TypedRangeSegment<INDEX_TYPE> const &x_tile) {

RAJA::loop<THREAD_Z_POLICY>(ctx, z_tile, [&](INDEX_TYPE tz) {
RAJA::loop<THREAD_Y_POLICY>(ctx, y_tile, [&](INDEX_TYPE ty) {
RAJA::loop<THREAD_X_POLICY>(ctx, x_tile, [&](INDEX_TYPE tx) {

(void) tx;
(void) ty;
(void) tz;
RAJA::loop<THREAD_Z_POLICY>(ctx, z_tile, [&](INDEX_TYPE RAJA_UNUSED_ARG(tz)) {
RAJA::loop<THREAD_Y_POLICY>(ctx, y_tile, [&](INDEX_TYPE RAJA_UNUSED_ARG(ty)) {
RAJA::loop<THREAD_X_POLICY>(ctx, x_tile, [&](INDEX_TYPE RAJA_UNUSED_ARG(tx)) {

working_array[0]++;

});
});
});
Expand All @@ -118,6 +116,7 @@ void LaunchNestedTileLoopTestImpl(INDEX_TYPE M)
}

working_res.memcpy(check_array, working_array, sizeof(INDEX_TYPE) * data_len);
working_res.wait();

if (RAJA::stripIndexType(N) > 0) {

Expand Down Expand Up @@ -152,13 +151,13 @@ TYPED_TEST_P(LaunchNestedTileLoopTest, RangeSegmentTeams)
using WORKING_RES = typename camp::at<TypeParam, camp::num<1>>::type;
using LAUNCH_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<0>>::type;

using THREAD_X_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<1>>::type;
using THREAD_Y_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<2>>::type;
using THREAD_Z_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<3>>::type;
using TEAM_Z_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<1>>::type;
using TEAM_Y_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<2>>::type;
using TEAM_X_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<3>>::type;

using TEAM_X_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<4>>::type;
using TEAM_Y_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<5>>::type;
using TEAM_Z_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<6>>::type;
using THREAD_Z_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<4>>::type;
using THREAD_Y_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<5>>::type;
using THREAD_X_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<6>>::type;


// test zero-length range segment
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,10 @@ void LaunchNestedTileUncheckedTestImpl(INDEX_TYPE M)
const int blocks_y = 5*M;
const int blocks_z = 6*M;

RAJA::TypedRangeSegment<INDEX_TYPE> r1(0, tile_size_x*blocks_x);
RAJA::TypedRangeSegment<INDEX_TYPE> r2(0, tile_size_y*blocks_y);
RAJA::TypedRangeSegment<INDEX_TYPE> r3(0, tile_size_z*blocks_z);
// Use exactly the number of teams and threads
RAJA::TypedRangeSegment<INDEX_TYPE> r1(0, threads_x*blocks_x);
RAJA::TypedRangeSegment<INDEX_TYPE> r2(0, threads_y*blocks_y);
RAJA::TypedRangeSegment<INDEX_TYPE> r3(0, threads_z*blocks_z);

INDEX_TYPE N1 = static_cast<INDEX_TYPE>(r1.end() - r1.begin());
INDEX_TYPE N2 = static_cast<INDEX_TYPE>(r2.end() - r2.begin());
Expand All @@ -53,7 +54,8 @@ void LaunchNestedTileUncheckedTestImpl(INDEX_TYPE M)
&check_array,
&test_array);

std::iota(test_array, test_array + RAJA::stripIndexType(N), 0);
std::iota(test_array, test_array + data_len, 0);
working_res.memset(working_array, 0, sizeof(INDEX_TYPE) * data_len);

constexpr int DIM = 3;
using layout_t = RAJA::Layout<DIM, INDEX_TYPE,DIM-1>;
Expand All @@ -73,7 +75,7 @@ void LaunchNestedTileUncheckedTestImpl(INDEX_TYPE M)

auto idx = tx + N1 * (ty + N2 * tz);

Aview(tz, ty, tx) = static_cast<INDEX_TYPE>(idx);
Aview(tz, ty, tx) += static_cast<INDEX_TYPE>(idx);

});
});
Expand All @@ -89,6 +91,7 @@ void LaunchNestedTileUncheckedTestImpl(INDEX_TYPE M)
working_res.memcpy(check_array, working_array, sizeof(INDEX_TYPE) * data_len);

}
working_res.wait();

for (INDEX_TYPE i = INDEX_TYPE(0); i < N; i++) {
ASSERT_EQ(test_array[RAJA::stripIndexType(i)], check_array[RAJA::stripIndexType(i)]);
Expand All @@ -115,13 +118,13 @@ TYPED_TEST_P(LaunchNestedTileUncheckedTest, RangeSegmentTeams)
using WORKING_RES = typename camp::at<TypeParam, camp::num<1>>::type;
using LAUNCH_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<0>>::type;

using THREAD_X_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<1>>::type;
using THREAD_Y_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<2>>::type;
using THREAD_Z_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<3>>::type;
using TEAM_Z_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<1>>::type;
using TEAM_Y_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<2>>::type;
using TEAM_X_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<3>>::type;

using TEAM_X_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<4>>::type;
using TEAM_Y_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<5>>::type;
using TEAM_Z_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<6>>::type;
using THREAD_Z_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<4>>::type;
using THREAD_Y_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<5>>::type;
using THREAD_X_POLICY = typename camp::at<typename camp::at<TypeParam,camp::num<2>>::type, camp::num<6>>::type;


// test zero-length range segment
Expand Down

0 comments on commit e7a7273

Please sign in to comment.