From 17c2eb7ef82e1bce9d03dfcf6633f4b90fe45aa0 Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Sun, 14 Apr 2024 00:06:20 -0700 Subject: [PATCH 1/9] link group_utils headers --- help_function/src/onedpl_test_group_exchange.cpp | 1 + help_function/src/onedpl_test_group_sort.cpp | 1 + 2 files changed, 2 insertions(+) diff --git a/help_function/src/onedpl_test_group_exchange.cpp b/help_function/src/onedpl_test_group_exchange.cpp index 0cb7bbc0b..61f42f29d 100644 --- a/help_function/src/onedpl_test_group_exchange.cpp +++ b/help_function/src/onedpl_test_group_exchange.cpp @@ -10,6 +10,7 @@ #include #include #include +#include #include template #include #include +#include #include template Date: Mon, 15 Apr 2024 16:13:16 +0530 Subject: [PATCH 2/9] add header in dpl_utils --- help_function/src/onedpl_test_group_exchange.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/help_function/src/onedpl_test_group_exchange.cpp b/help_function/src/onedpl_test_group_exchange.cpp index 61f42f29d..0cb7bbc0b 100644 --- a/help_function/src/onedpl_test_group_exchange.cpp +++ b/help_function/src/onedpl_test_group_exchange.cpp @@ -10,7 +10,6 @@ #include #include #include -#include #include template Date: Mon, 15 Apr 2024 16:13:41 +0530 Subject: [PATCH 3/9] add header in dpl_utils.h --- help_function/src/onedpl_test_group_sort.cpp | 1 - 1 file changed, 1 deletion(-) diff --git a/help_function/src/onedpl_test_group_sort.cpp b/help_function/src/onedpl_test_group_sort.cpp index fe5dfd8c9..bbf1ca7c8 100644 --- a/help_function/src/onedpl_test_group_sort.cpp +++ b/help_function/src/onedpl_test_group_sort.cpp @@ -9,7 +9,6 @@ #include #include #include -#include #include template Date: Fri, 31 May 2024 00:31:16 -0700 Subject: [PATCH 4/9] add load store test --- help_function/src/util_group_store_test.cpp | 203 ++++++++++++++++++++ 1 file changed, 203 insertions(+) create mode 100644 help_function/src/util_group_store_test.cpp diff --git a/help_function/src/util_group_store_test.cpp b/help_function/src/util_group_store_test.cpp new file mode 100644 index 000000000..7e6380f71 --- /dev/null +++ b/help_function/src/util_group_store_test.cpp @@ -0,0 +1,203 @@ +// ====------ onedpl_test_group_load.cpp------------ *- C++ -* ----===// + +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +// +// ===----------------------------------------------------------------------===// +#include +#include +#include +#include +#include + +template +bool helper_validation_function(const int *ptr, const char *func_name) { + if constexpr (T == dpct::group::load_algorithm::BLOCK_LOAD_DIRECT) { + for (int i = 0; i < 512; ++i) { + if (ptr[i] != i) { + std::cout << func_name << "_blocked" + << " failed\n"; + std::ostream_iterator Iter(std::cout, ", "); + std::copy(ptr, ptr + 512, Iter); + std::cout << std::endl; + return false; + } + } + std::cout << func_name << "_blocked" + << " pass\n"; + } else { + for (int i = 0; i < 512; ++i) { + if (ptr[i] != i) { + std::cout << func_name << "_striped" + << " failed\n"; + std::ostream_iterator Iter(std::cout, ", "); + std::copy(ptr, ptr + 512, Iter); + std::cout << std::endl; + return false; + } + } + std::cout << func_name << "_striped" + << " pass\n"; + } + return true; +} + +bool subgroup_helper_validation_function(const int *ptr, const uint32_t *sg_sz, + const char *func_name) { + for (int i = 0; i < 512; ++i) { + if (ptr[i] != i) { + std::cout << " failed\n"; + std::ostream_iterator Iter(std::cout, ", "); + std::copy(ptr, ptr + 512, Iter); + std::cout << std::endl; + return false; + } + } + + std::cout << func_name << " pass\n"; + return true; +} + +template bool test_group_load_store() { + // Tests dpct::group::load_algorithm::BLOCK_LOAD_DIRECT + // dpct::group::load_algorithm::BLOCK_LOAD_STRIPED in its entirety as API + // functions + // Tests dpct::group::store_algorithm::BLOCK_STORE_DIRECT + // dpct::group::store_algorithm::BLOCK_STORE_STRIPED in its entirety as API + // functions + sycl::queue q(dpct::get_default_queue()); + oneapi::dpl::counting_iterator count_it(0); + sycl::buffer buffer(count_it, count_it + 512); + + q.submit([&](sycl::handler &h) { + using group_load = + dpct::group::workgroup_load<4, T, int, const int *, sycl::nd_item<3>>; + using group_store = + dpct::group::workgroup_store<4, S, int, const int *, sycl::nd_item<3>>; + size_t temp_storage_size = group_load::get_local_memory_size(128); + sycl::local_accessor tacc(sycl::range<1>(temp_storage_size), h); + sycl::accessor data_accessor_read_write(buffer, h, sycl::read_write); + h.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 128), sycl::range<3>(1, 1, 128)), + [=](sycl::nd_item<3> item) { + int thread_data[4]; + auto *d_r_w = + data_accessor_read_write.get_multi_ptr() + .get(); + auto *tmp = tacc.get_multi_ptr().get(); + // Load thread_data of each work item to blocked arrangement + group_load(tmp).load(item, d_r_w, thread_data); + // Store thread_data of each work item from blocked arrangement + group_store(tmp).store(item, d_r_w, thread_data); + } + }); + }); + q.wait_and_throw(); + + sycl::host_accessor data_accessor(buffer, sycl::read_write); + const int *ptr = data_accessor.get_multi_ptr(); + return helper_validation_function(ptr, "test_group_load_store"); +} + +bool test_load_store_subgroup_striped_standalone() { + // Tests dpct::group::load_subgroup_striped as standalone method + sycl::queue q(dpct::get_default_queue()); + int data[512]; + for (int i = 0; i < 512; i++) + data[i] = i; + sycl::buffer buffer(data, 512); + sycl::buffer sg_sz_buf{sycl::range<1>(1)}; + + q.submit([&](sycl::handler &h) { + sycl::accessor dacc_read_write(buffer, h, sycl::read_write); + sycl::accessor sg_sz_dacc(sg_sz_buf, h, sycl::read_write); + h.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 128), sycl::range<3>(1, 1, 128)), + [=](sycl::nd_item<3> item) { + int thread_data[4]; + auto *d_r_w = + dacc_read_write.get_multi_ptr().get(); + auto *sg_sz_acc = + sg_sz_dacc.get_multi_ptr().get(); + size_t gid = item.get_global_linear_id(); + if (gid == 0) { + sg_sz_acc[0] = item.get_sub_group().get_local_linear_range(); + } + dpct::group::uninitialized_load_subgroup_striped<4, int>(item, d_r_w, + thread_data); + dpct::group::uninitialized_store_subgroup_striped<4, int>(item, d_r_w, + thread_data); + } + }); + }); + q.wait_and_throw(); + + sycl::host_accessor data_accessor(buffer, sycl::read_only); + const int *ptr = data_accessor.get_multi_ptr(); + sycl::host_accessor data_accessor_sg(sg_sz_buf, sycl::read_only); + const uint32_t *ptr_sg = + data_accessor_sg.get_multi_ptr(); + return subgroup_helper_validation_function( + ptr, ptr_sg, "test_subgroup_striped_standalone"); +} + +template bool test_group_load_store_standalone() { + // Tests dpct::group::load_algorithm::BLOCK_LOAD_DIRECT & + // dpct::group::load_algorithm::BLOCK_LOAD_STRIPED as standalone methods + // Tests dpct::group::store_algorithm::BLOCK_STORE_DIRECT & + // dpct::group::store_algorithm::BLOCK_STORE_STRIPED as standalone methods + sycl::queue q(dpct::get_default_queue()); + int data[512]; + for (int i = 0; i < 512; i++) + data[i] = i; + sycl::buffer buffer(data, 512); + + q.submit([&](sycl::handler &h) { + sycl::accessor dacc_read_write(buffer, h, sycl::read_write); + h.parallel_for( + sycl::nd_range<3>(sycl::range<3>(1, 1, 128), sycl::range<3>(1, 1, 128)), + [=](sycl::nd_item<3> item) { + int thread_data[4]; + auto *d_r_w = + dacc_read_write.get_multi_ptr().get(); + // Load thread_data of each work item to blocked arrangement + if (T == dpct::group::load_algorithm::BLOCK_LOAD_DIRECT) { + dpct::group::load_blocked<4, int>(item, d_r, thread_data); + } else { + dpct::group::load_striped<4, int>(item, d_r, thread_data); + } + // Store thread_data of each work item from blocked arrangement + if (S == dpct::group::store_algorithm::BLOCK_STORE_DIRECT) { + dpct::group::store_blocked<4, int>(item, d_r_w, thread_data); + } else { + dpct::group::store_striped<4, int>(item, d_r_w, thread_data); + } + } + }); + }); + q.wait_and_throw(); + + sycl::host_accessor data_accessor(buffer, sycl::read_only); + const int *ptr = data_accessor.get_multi_ptr(); + return helper_validation_function(ptr, "test_group_load_store"); +} + +int main() { + + return !( + // Calls test_group_load with blocked and striped strategies , should pass + // both results. + test_group_load_store() && + test_group_load_store() && + // Calls test_load_subgroup_striped_standalone and should pass + test_load_store_subgroup_striped_standalone() && + // Calls test_group_load_standalone with blocked and striped strategies as + // free functions, should pass both results. + test_group_load_store_standalone< + dpct::group::load_algorithm::BLOCK_LOAD_STRIPED, dpct::group::store_algorithm::BLOCK_STORE_STRIPED>() && + test_group_load_store_standalone< + dpct::group::load_algorithm::BLOCK_LOAD_DIRECT, dpct::group::store_algorithm::BLOCK_STORE_DIRECT>()); +} From 18861296b6f1370b4c242d9f3c3d76c949a1b823 Mon Sep 17 00:00:00 2001 From: abhilash1910 Date: Wed, 5 Jun 2024 01:33:44 -0700 Subject: [PATCH 5/9] refine test and change name --- ...e_test.cpp => util_group_load_store_test.cpp} | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) rename help_function/src/{util_group_store_test.cpp => util_group_load_store_test.cpp} (91%) diff --git a/help_function/src/util_group_store_test.cpp b/help_function/src/util_group_load_store_test.cpp similarity index 91% rename from help_function/src/util_group_store_test.cpp rename to help_function/src/util_group_load_store_test.cpp index 7e6380f71..4470dd587 100644 --- a/help_function/src/util_group_store_test.cpp +++ b/help_function/src/util_group_load_store_test.cpp @@ -76,7 +76,7 @@ template bool te using group_load = dpct::group::workgroup_load<4, T, int, const int *, sycl::nd_item<3>>; using group_store = - dpct::group::workgroup_store<4, S, int, const int *, sycl::nd_item<3>>; + dpct::group::workgroup_store<4, S, int, int *, sycl::nd_item<3>>; size_t temp_storage_size = group_load::get_local_memory_size(128); sycl::local_accessor tacc(sycl::range<1>(temp_storage_size), h); sycl::accessor data_accessor_read_write(buffer, h, sycl::read_write); @@ -92,8 +92,7 @@ template bool te group_load(tmp).load(item, d_r_w, thread_data); // Store thread_data of each work item from blocked arrangement group_store(tmp).store(item, d_r_w, thread_data); - } - }); + }); }); q.wait_and_throw(); @@ -128,9 +127,7 @@ bool test_load_store_subgroup_striped_standalone() { } dpct::group::uninitialized_load_subgroup_striped<4, int>(item, d_r_w, thread_data); - dpct::group::uninitialized_store_subgroup_striped<4, int>(item, d_r_w, - thread_data); - } + dpct::group::store_subgroup_striped<4, int>(item, d_r_w, thread_data); }); }); q.wait_and_throw(); @@ -165,9 +162,9 @@ template bool te dacc_read_write.get_multi_ptr().get(); // Load thread_data of each work item to blocked arrangement if (T == dpct::group::load_algorithm::BLOCK_LOAD_DIRECT) { - dpct::group::load_blocked<4, int>(item, d_r, thread_data); + dpct::group::load_blocked<4, int>(item, d_r_w, thread_data); } else { - dpct::group::load_striped<4, int>(item, d_r, thread_data); + dpct::group::load_striped<4, int>(item, d_r_w, thread_data); } // Store thread_data of each work item from blocked arrangement if (S == dpct::group::store_algorithm::BLOCK_STORE_DIRECT) { @@ -175,7 +172,6 @@ template bool te } else { dpct::group::store_striped<4, int>(item, d_r_w, thread_data); } - } }); }); q.wait_and_throw(); @@ -191,7 +187,7 @@ int main() { // Calls test_group_load with blocked and striped strategies , should pass // both results. test_group_load_store() && - test_group_load_store() && + test_group_load_store() && // Calls test_load_subgroup_striped_standalone and should pass test_load_store_subgroup_striped_standalone() && // Calls test_group_load_standalone with blocked and striped strategies as From a462f54f1565d0bbbff40b1ed9b6c4e8de9d25be Mon Sep 17 00:00:00 2001 From: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> Date: Fri, 14 Jun 2024 17:08:05 +0530 Subject: [PATCH 6/9] review commits 1 --- .../src/util_group_load_store_test.cpp | 48 ++++--------------- 1 file changed, 8 insertions(+), 40 deletions(-) diff --git a/help_function/src/util_group_load_store_test.cpp b/help_function/src/util_group_load_store_test.cpp index 4470dd587..3e7c3d7b2 100644 --- a/help_function/src/util_group_load_store_test.cpp +++ b/help_function/src/util_group_load_store_test.cpp @@ -1,4 +1,4 @@ -// ====------ onedpl_test_group_load.cpp------------ *- C++ -* ----===// +// ====------ util_group_load_store_test.cpp------------ *- C++ -* ----===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. @@ -13,51 +13,19 @@ #include #include -template bool helper_validation_function(const int *ptr, const char *func_name) { - if constexpr (T == dpct::group::load_algorithm::BLOCK_LOAD_DIRECT) { - for (int i = 0; i < 512; ++i) { - if (ptr[i] != i) { - std::cout << func_name << "_blocked" - << " failed\n"; - std::ostream_iterator Iter(std::cout, ", "); - std::copy(ptr, ptr + 512, Iter); - std::cout << std::endl; - return false; - } - } - std::cout << func_name << "_blocked" - << " pass\n"; - } else { - for (int i = 0; i < 512; ++i) { - if (ptr[i] != i) { - std::cout << func_name << "_striped" - << " failed\n"; - std::ostream_iterator Iter(std::cout, ", "); - std::copy(ptr, ptr + 512, Iter); - std::cout << std::endl; - return false; - } - } - std::cout << func_name << "_striped" - << " pass\n"; - } - return true; -} - -bool subgroup_helper_validation_function(const int *ptr, const uint32_t *sg_sz, - const char *func_name) { for (int i = 0; i < 512; ++i) { if (ptr[i] != i) { - std::cout << " failed\n"; + std::cout << func_name << "_blocked" + << " failed\n"; std::ostream_iterator Iter(std::cout, ", "); std::copy(ptr, ptr + 512, Iter); std::cout << std::endl; return false; } } - std::cout << func_name << " pass\n"; + return true; } @@ -98,7 +66,7 @@ template bool te sycl::host_accessor data_accessor(buffer, sycl::read_write); const int *ptr = data_accessor.get_multi_ptr(); - return helper_validation_function(ptr, "test_group_load_store"); + return helper_validation_function(ptr, "test_group_load_store"); } bool test_load_store_subgroup_striped_standalone() { @@ -137,8 +105,8 @@ bool test_load_store_subgroup_striped_standalone() { sycl::host_accessor data_accessor_sg(sg_sz_buf, sycl::read_only); const uint32_t *ptr_sg = data_accessor_sg.get_multi_ptr(); - return subgroup_helper_validation_function( - ptr, ptr_sg, "test_subgroup_striped_standalone"); + return helper_validation_function( + ptr, "test_subgroup_striped_standalone"); } template bool test_group_load_store_standalone() { @@ -178,7 +146,7 @@ template bool te sycl::host_accessor data_accessor(buffer, sycl::read_only); const int *ptr = data_accessor.get_multi_ptr(); - return helper_validation_function(ptr, "test_group_load_store"); + return helper_validation_function(ptr, "test_group_load_store"); } int main() { From 703f9ed6a9033cbf5cc2df51c046472fac08cf24 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> Date: Wed, 19 Jun 2024 09:29:54 +0530 Subject: [PATCH 7/9] add destructor --- help_function/src/util_group_load_store_test.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/help_function/src/util_group_load_store_test.cpp b/help_function/src/util_group_load_store_test.cpp index 3e7c3d7b2..acad49bbe 100644 --- a/help_function/src/util_group_load_store_test.cpp +++ b/help_function/src/util_group_load_store_test.cpp @@ -96,6 +96,10 @@ bool test_load_store_subgroup_striped_standalone() { dpct::group::uninitialized_load_subgroup_striped<4, int>(item, d_r_w, thread_data); dpct::group::store_subgroup_striped<4, int>(item, d_r_w, thread_data); + //call destructor of thread type + for (size_t i = 0; i < 4; ++i) { + thread_data[i].~int(); + } }); }); q.wait_and_throw(); From 099fe54c3d288e1a9cb24c33b8d55c1239d943f9 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> Date: Wed, 19 Jun 2024 09:47:21 +0530 Subject: [PATCH 8/9] add multiple workgroups --- .../src/util_group_load_store_test.cpp | 136 ++++++++++++++++++ 1 file changed, 136 insertions(+) diff --git a/help_function/src/util_group_load_store_test.cpp b/help_function/src/util_group_load_store_test.cpp index acad49bbe..73bd27418 100644 --- a/help_function/src/util_group_load_store_test.cpp +++ b/help_function/src/util_group_load_store_test.cpp @@ -69,6 +69,46 @@ template bool te return helper_validation_function(ptr, "test_group_load_store"); } +template bool test_group_load_store_multiple_wgs() { + // Tests dpct::group::load_algorithm::BLOCK_LOAD_DIRECT + // dpct::group::load_algorithm::BLOCK_LOAD_STRIPED in its entirety as API + // functions + // Tests dpct::group::store_algorithm::BLOCK_STORE_DIRECT + // dpct::group::store_algorithm::BLOCK_STORE_STRIPED in its entirety as API + // functions + sycl::queue q(dpct::get_default_queue()); + oneapi::dpl::counting_iterator count_it(0); + sycl::buffer buffer(count_it, count_it + 512); + + q.submit([&](sycl::handler &h) { + using group_load = + dpct::group::workgroup_load<4, T, int, const int *, sycl::nd_item<3>>; + using group_store = + dpct::group::workgroup_store<4, S, int, int *, sycl::nd_item<3>>; + size_t temp_storage_size = group_load::get_local_memory_size(128); + sycl::local_accessor tacc(sycl::range<1>(temp_storage_size), h); + sycl::accessor data_accessor_read_write(buffer, h, sycl::read_write); + h.parallel_for( + sycl::nd_range<3>(sycl::range<3>(2, 2, 64), sycl::range<3>(1, 1, 64)), + [=](sycl::nd_item<3> item) { + int thread_data[4]; + auto *d_r_w = + data_accessor_read_write.get_multi_ptr() + .get(); + auto *tmp = tacc.get_multi_ptr().get(); + // Load thread_data of each work item to blocked arrangement + group_load(tmp).load(item, d_r_w, thread_data); + // Store thread_data of each work item from blocked arrangement + group_store(tmp).store(item, d_r_w, thread_data); + }); + }); + q.wait_and_throw(); + + sycl::host_accessor data_accessor(buffer, sycl::read_write); + const int *ptr = data_accessor.get_multi_ptr(); + return helper_validation_function(ptr, "test_group_load_store"); +} + bool test_load_store_subgroup_striped_standalone() { // Tests dpct::group::load_subgroup_striped as standalone method sycl::queue q(dpct::get_default_queue()); @@ -113,6 +153,50 @@ bool test_load_store_subgroup_striped_standalone() { ptr, "test_subgroup_striped_standalone"); } +bool test_load_store_subgroup_striped_standalone_multiple_wgs() { + // Tests dpct::group::load_subgroup_striped as standalone method + sycl::queue q(dpct::get_default_queue()); + int data[512]; + for (int i = 0; i < 512; i++) + data[i] = i; + sycl::buffer buffer(data, 512); + sycl::buffer sg_sz_buf{sycl::range<1>(1)}; + + q.submit([&](sycl::handler &h) { + sycl::accessor dacc_read_write(buffer, h, sycl::read_write); + sycl::accessor sg_sz_dacc(sg_sz_buf, h, sycl::read_write); + h.parallel_for( + sycl::nd_range<3>(sycl::range<3>(2, 2, 64), sycl::range<3>(1, 1, 64)), + [=](sycl::nd_item<3> item) { + int thread_data[4]; + auto *d_r_w = + dacc_read_write.get_multi_ptr().get(); + auto *sg_sz_acc = + sg_sz_dacc.get_multi_ptr().get(); + size_t gid = item.get_global_linear_id(); + if (gid == 0) { + sg_sz_acc[0] = item.get_sub_group().get_local_linear_range(); + } + dpct::group::uninitialized_load_subgroup_striped<4, int>(item, d_r_w, + thread_data); + dpct::group::store_subgroup_striped<4, int>(item, d_r_w, thread_data); + //call destructor of thread type + for (size_t i = 0; i < 4; ++i) { + thread_data[i].~int(); + } + }); + }); + q.wait_and_throw(); + + sycl::host_accessor data_accessor(buffer, sycl::read_only); + const int *ptr = data_accessor.get_multi_ptr(); + sycl::host_accessor data_accessor_sg(sg_sz_buf, sycl::read_only); + const uint32_t *ptr_sg = + data_accessor_sg.get_multi_ptr(); + return helper_validation_function( + ptr, "test_subgroup_striped_standalone"); +} + template bool test_group_load_store_standalone() { // Tests dpct::group::load_algorithm::BLOCK_LOAD_DIRECT & // dpct::group::load_algorithm::BLOCK_LOAD_STRIPED as standalone methods @@ -153,6 +237,47 @@ template bool te return helper_validation_function(ptr, "test_group_load_store"); } +template bool test_group_load_store_standalone_multi_wgs() { + // Tests dpct::group::load_algorithm::BLOCK_LOAD_DIRECT & + // dpct::group::load_algorithm::BLOCK_LOAD_STRIPED as standalone methods + // Tests dpct::group::store_algorithm::BLOCK_STORE_DIRECT & + // dpct::group::store_algorithm::BLOCK_STORE_STRIPED as standalone methods + sycl::queue q(dpct::get_default_queue()); + int data[512]; + for (int i = 0; i < 512; i++) + data[i] = i; + sycl::buffer buffer(data, 512); + + q.submit([&](sycl::handler &h) { + sycl::accessor dacc_read_write(buffer, h, sycl::read_write); + h.parallel_for( + sycl::nd_range<3>(sycl::range<3>(2, 2, 64), sycl::range<3>(1, 1, 64)), + [=](sycl::nd_item<3> item) { + int thread_data[4]; + auto *d_r_w = + dacc_read_write.get_multi_ptr().get(); + // Load thread_data of each work item to blocked arrangement + if (T == dpct::group::load_algorithm::BLOCK_LOAD_DIRECT) { + dpct::group::load_blocked<4, int>(item, d_r_w, thread_data); + } else { + dpct::group::load_striped<4, int>(item, d_r_w, thread_data); + } + // Store thread_data of each work item from blocked arrangement + if (S == dpct::group::store_algorithm::BLOCK_STORE_DIRECT) { + dpct::group::store_blocked<4, int>(item, d_r_w, thread_data); + } else { + dpct::group::store_striped<4, int>(item, d_r_w, thread_data); + } + }); + }); + q.wait_and_throw(); + + sycl::host_accessor data_accessor(buffer, sycl::read_only); + const int *ptr = data_accessor.get_multi_ptr(); + return helper_validation_function(ptr, "test_group_load_store"); +} + + int main() { return !( @@ -167,5 +292,16 @@ int main() { test_group_load_store_standalone< dpct::group::load_algorithm::BLOCK_LOAD_STRIPED, dpct::group::store_algorithm::BLOCK_STORE_STRIPED>() && test_group_load_store_standalone< + dpct::group::load_algorithm::BLOCK_LOAD_DIRECT, dpct::group::store_algorithm::BLOCK_STORE_DIRECT>()) && + + test_group_load_store_multiple_wgs() && + test_group_load_store_multiple_wgs() && + // Calls test_load_subgroup_striped_standalone and should pass + test_load_store_subgroup_striped_standalone_multiple_wgs() && + // Calls test_group_load_standalone with blocked and striped strategies as + // free functions, should pass both results. + test_group_load_store_standalone_multiple_wgs< + dpct::group::load_algorithm::BLOCK_LOAD_STRIPED, dpct::group::store_algorithm::BLOCK_STORE_STRIPED>() && + test_group_load_store_standalone_multiple_wgs< dpct::group::load_algorithm::BLOCK_LOAD_DIRECT, dpct::group::store_algorithm::BLOCK_STORE_DIRECT>()); } From 5c318fe2a0fd17bd9f8c983f100cf48e7a2572e2 Mon Sep 17 00:00:00 2001 From: Abhilash Majumder <30946547+abhilash1910@users.noreply.github.com> Date: Mon, 1 Jul 2024 09:27:03 +0530 Subject: [PATCH 9/9] use nd range arg, fix func name --- .../src/util_group_load_store_test.cpp | 165 +++--------------- 1 file changed, 24 insertions(+), 141 deletions(-) diff --git a/help_function/src/util_group_load_store_test.cpp b/help_function/src/util_group_load_store_test.cpp index 73bd27418..da995d771 100644 --- a/help_function/src/util_group_load_store_test.cpp +++ b/help_function/src/util_group_load_store_test.cpp @@ -29,7 +29,7 @@ bool helper_validation_function(const int *ptr, const char *func_name) { return true; } -template bool test_group_load_store() { +template bool test_group_load_store(sycl::nd_range<3> &range, char *func_name) { // Tests dpct::group::load_algorithm::BLOCK_LOAD_DIRECT // dpct::group::load_algorithm::BLOCK_LOAD_STRIPED in its entirety as API // functions @@ -49,7 +49,8 @@ template bool te sycl::local_accessor tacc(sycl::range<1>(temp_storage_size), h); sycl::accessor data_accessor_read_write(buffer, h, sycl::read_write); h.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 128), sycl::range<3>(1, 1, 128)), + range, + //sycl::nd_range<3>(sycl::range<3>(1, 1, 128), sycl::range<3>(1, 1, 128)), [=](sycl::nd_item<3> item) { int thread_data[4]; auto *d_r_w = @@ -66,47 +67,7 @@ template bool te sycl::host_accessor data_accessor(buffer, sycl::read_write); const int *ptr = data_accessor.get_multi_ptr(); - return helper_validation_function(ptr, "test_group_load_store"); -} - -template bool test_group_load_store_multiple_wgs() { - // Tests dpct::group::load_algorithm::BLOCK_LOAD_DIRECT - // dpct::group::load_algorithm::BLOCK_LOAD_STRIPED in its entirety as API - // functions - // Tests dpct::group::store_algorithm::BLOCK_STORE_DIRECT - // dpct::group::store_algorithm::BLOCK_STORE_STRIPED in its entirety as API - // functions - sycl::queue q(dpct::get_default_queue()); - oneapi::dpl::counting_iterator count_it(0); - sycl::buffer buffer(count_it, count_it + 512); - - q.submit([&](sycl::handler &h) { - using group_load = - dpct::group::workgroup_load<4, T, int, const int *, sycl::nd_item<3>>; - using group_store = - dpct::group::workgroup_store<4, S, int, int *, sycl::nd_item<3>>; - size_t temp_storage_size = group_load::get_local_memory_size(128); - sycl::local_accessor tacc(sycl::range<1>(temp_storage_size), h); - sycl::accessor data_accessor_read_write(buffer, h, sycl::read_write); - h.parallel_for( - sycl::nd_range<3>(sycl::range<3>(2, 2, 64), sycl::range<3>(1, 1, 64)), - [=](sycl::nd_item<3> item) { - int thread_data[4]; - auto *d_r_w = - data_accessor_read_write.get_multi_ptr() - .get(); - auto *tmp = tacc.get_multi_ptr().get(); - // Load thread_data of each work item to blocked arrangement - group_load(tmp).load(item, d_r_w, thread_data); - // Store thread_data of each work item from blocked arrangement - group_store(tmp).store(item, d_r_w, thread_data); - }); - }); - q.wait_and_throw(); - - sycl::host_accessor data_accessor(buffer, sycl::read_write); - const int *ptr = data_accessor.get_multi_ptr(); - return helper_validation_function(ptr, "test_group_load_store"); + return helper_validation_function(ptr, func_name); } bool test_load_store_subgroup_striped_standalone() { @@ -153,91 +114,7 @@ bool test_load_store_subgroup_striped_standalone() { ptr, "test_subgroup_striped_standalone"); } -bool test_load_store_subgroup_striped_standalone_multiple_wgs() { - // Tests dpct::group::load_subgroup_striped as standalone method - sycl::queue q(dpct::get_default_queue()); - int data[512]; - for (int i = 0; i < 512; i++) - data[i] = i; - sycl::buffer buffer(data, 512); - sycl::buffer sg_sz_buf{sycl::range<1>(1)}; - - q.submit([&](sycl::handler &h) { - sycl::accessor dacc_read_write(buffer, h, sycl::read_write); - sycl::accessor sg_sz_dacc(sg_sz_buf, h, sycl::read_write); - h.parallel_for( - sycl::nd_range<3>(sycl::range<3>(2, 2, 64), sycl::range<3>(1, 1, 64)), - [=](sycl::nd_item<3> item) { - int thread_data[4]; - auto *d_r_w = - dacc_read_write.get_multi_ptr().get(); - auto *sg_sz_acc = - sg_sz_dacc.get_multi_ptr().get(); - size_t gid = item.get_global_linear_id(); - if (gid == 0) { - sg_sz_acc[0] = item.get_sub_group().get_local_linear_range(); - } - dpct::group::uninitialized_load_subgroup_striped<4, int>(item, d_r_w, - thread_data); - dpct::group::store_subgroup_striped<4, int>(item, d_r_w, thread_data); - //call destructor of thread type - for (size_t i = 0; i < 4; ++i) { - thread_data[i].~int(); - } - }); - }); - q.wait_and_throw(); - - sycl::host_accessor data_accessor(buffer, sycl::read_only); - const int *ptr = data_accessor.get_multi_ptr(); - sycl::host_accessor data_accessor_sg(sg_sz_buf, sycl::read_only); - const uint32_t *ptr_sg = - data_accessor_sg.get_multi_ptr(); - return helper_validation_function( - ptr, "test_subgroup_striped_standalone"); -} - -template bool test_group_load_store_standalone() { - // Tests dpct::group::load_algorithm::BLOCK_LOAD_DIRECT & - // dpct::group::load_algorithm::BLOCK_LOAD_STRIPED as standalone methods - // Tests dpct::group::store_algorithm::BLOCK_STORE_DIRECT & - // dpct::group::store_algorithm::BLOCK_STORE_STRIPED as standalone methods - sycl::queue q(dpct::get_default_queue()); - int data[512]; - for (int i = 0; i < 512; i++) - data[i] = i; - sycl::buffer buffer(data, 512); - - q.submit([&](sycl::handler &h) { - sycl::accessor dacc_read_write(buffer, h, sycl::read_write); - h.parallel_for( - sycl::nd_range<3>(sycl::range<3>(1, 1, 128), sycl::range<3>(1, 1, 128)), - [=](sycl::nd_item<3> item) { - int thread_data[4]; - auto *d_r_w = - dacc_read_write.get_multi_ptr().get(); - // Load thread_data of each work item to blocked arrangement - if (T == dpct::group::load_algorithm::BLOCK_LOAD_DIRECT) { - dpct::group::load_blocked<4, int>(item, d_r_w, thread_data); - } else { - dpct::group::load_striped<4, int>(item, d_r_w, thread_data); - } - // Store thread_data of each work item from blocked arrangement - if (S == dpct::group::store_algorithm::BLOCK_STORE_DIRECT) { - dpct::group::store_blocked<4, int>(item, d_r_w, thread_data); - } else { - dpct::group::store_striped<4, int>(item, d_r_w, thread_data); - } - }); - }); - q.wait_and_throw(); - - sycl::host_accessor data_accessor(buffer, sycl::read_only); - const int *ptr = data_accessor.get_multi_ptr(); - return helper_validation_function(ptr, "test_group_load_store"); -} - -template bool test_group_load_store_standalone_multi_wgs() { +template bool test_group_load_store_standalone(sycl::nd_range<3> & range, char *func_name) { // Tests dpct::group::load_algorithm::BLOCK_LOAD_DIRECT & // dpct::group::load_algorithm::BLOCK_LOAD_STRIPED as standalone methods // Tests dpct::group::store_algorithm::BLOCK_STORE_DIRECT & @@ -251,7 +128,8 @@ template bool te q.submit([&](sycl::handler &h) { sycl::accessor dacc_read_write(buffer, h, sycl::read_write); h.parallel_for( - sycl::nd_range<3>(sycl::range<3>(2, 2, 64), sycl::range<3>(1, 1, 64)), + range, + //sycl::nd_range<3>(sycl::range<3>(1, 1, 128), sycl::range<3>(1, 1, 128)), [=](sycl::nd_item<3> item) { int thread_data[4]; auto *d_r_w = @@ -274,34 +152,39 @@ template bool te sycl::host_accessor data_accessor(buffer, sycl::read_only); const int *ptr = data_accessor.get_multi_ptr(); - return helper_validation_function(ptr, "test_group_load_store"); + return helper_validation_function(ptr, func_name); } - int main() { + sycl::range<3> global_range{1, 1, 128}; + sycl::range<3> local_range{1, 1, 128}; + sycl::nd_range<3> range{global_range, local_range}; + sycl::range<3> global_range_multi{2, 2, 64}; + sycl::range<3> local_range_multi{1, 1, 64}; + sycl::nd_range<3> range_multi{global_range_multi, local_range_multi}; return !( // Calls test_group_load with blocked and striped strategies , should pass // both results. - test_group_load_store() && - test_group_load_store() && + test_group_load_store(range, "test_group_load_store") && + test_group_load_store(range, "test_group_load_store") && // Calls test_load_subgroup_striped_standalone and should pass test_load_store_subgroup_striped_standalone() && // Calls test_group_load_standalone with blocked and striped strategies as // free functions, should pass both results. test_group_load_store_standalone< - dpct::group::load_algorithm::BLOCK_LOAD_STRIPED, dpct::group::store_algorithm::BLOCK_STORE_STRIPED>() && + dpct::group::load_algorithm::BLOCK_LOAD_STRIPED, dpct::group::store_algorithm::BLOCK_STORE_STRIPED>(range, "test_group_load_store_standalone") && test_group_load_store_standalone< - dpct::group::load_algorithm::BLOCK_LOAD_DIRECT, dpct::group::store_algorithm::BLOCK_STORE_DIRECT>()) && + dpct::group::load_algorithm::BLOCK_LOAD_DIRECT, dpct::group::store_algorithm::BLOCK_STORE_DIRECT>(range, "test_group_load_store_standalone") && - test_group_load_store_multiple_wgs() && - test_group_load_store_multiple_wgs() && + test_group_load_store(range_multi, "test_group_load_store_multiple_wgs") && + test_group_load_store_multiple_wgs(range_multi, "test_group_load_store_multiple_wgs") && // Calls test_load_subgroup_striped_standalone and should pass test_load_store_subgroup_striped_standalone_multiple_wgs() && // Calls test_group_load_standalone with blocked and striped strategies as // free functions, should pass both results. - test_group_load_store_standalone_multiple_wgs< - dpct::group::load_algorithm::BLOCK_LOAD_STRIPED, dpct::group::store_algorithm::BLOCK_STORE_STRIPED>() && - test_group_load_store_standalone_multiple_wgs< - dpct::group::load_algorithm::BLOCK_LOAD_DIRECT, dpct::group::store_algorithm::BLOCK_STORE_DIRECT>()); + test_group_load_store_standalone< + dpct::group::load_algorithm::BLOCK_LOAD_STRIPED, dpct::group::store_algorithm::BLOCK_STORE_STRIPED>(range_multi, "test_group_load_store_standalone_multiple_wgs") && + test_group_load_store_standalone< + dpct::group::load_algorithm::BLOCK_LOAD_DIRECT, dpct::group::store_algorithm::BLOCK_STORE_DIRECT>(range_multi, "test_group_load_store_standalone_multiple_wgs")); }