From 3cb82e6a202179a1ccb3cd6479b775f23d590356 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Zo=C3=AB=20Long?= Date: Wed, 18 Dec 2024 15:55:59 -0800 Subject: [PATCH] All scan types working! --- include/RAJA/policy/sycl/scan.hpp | 127 ++++++++++++++++++------------ 1 file changed, 76 insertions(+), 51 deletions(-) diff --git a/include/RAJA/policy/sycl/scan.hpp b/include/RAJA/policy/sycl/scan.hpp index a4d6cb75e5..d194365350 100644 --- a/include/RAJA/policy/sycl/scan.hpp +++ b/include/RAJA/policy/sycl/scan.hpp @@ -61,8 +61,8 @@ inclusive_inplace( // Calculate the size of the input range size_t size = std::distance(begin, end); - ::sycl::buffer inBuff(begin, end); - ::sycl::buffer outBuff(begin, ::sycl::range<1>(size)); + ::sycl::buffer buffer(begin, end); + ::sycl::buffer tempAccBuff(begin, ::sycl::range<1>(size)); int iterations = 0; for (size_t ii = size >> 1; ii > 0; ii >>= 1) { @@ -72,34 +72,34 @@ inclusive_inplace( iterations++; } - auto inPtr = &inBuff; - auto outPtr = &outBuff; + auto buffPtr = &buffer; + auto tempPtr = &tempAccBuff; if (iterations % 2 == 0) { - outPtr = &inBuff; - inPtr = &outBuff; + tempPtr = &buffer; + buffPtr = &tempAccBuff; } int ii = 1; do { // Submit the kernel to the SYCL queue sycl_queue->submit([&](::sycl::handler& cgh) { - auto inAccessor = inPtr->get_access(cgh); - auto outAccessor = outPtr->get_access(cgh); + auto buffAccessor = buffPtr->get_access(cgh); + auto tempAccessor = tempPtr->get_access(cgh); // outBuffAccessor = outBuff->get_access<::sycl::access::mode::read(cgh); cgh.parallel_for(::sycl::range<1>(size), [=](::sycl::item<1> idx) { size_t td = 1 << (ii - 1); size_t thisID = idx.get_id(0); if (thisID < size and thisID >= td) { - outAccessor[thisID] = binary_op(inAccessor[thisID - td], inAccessor[thisID]); + tempAccessor[thisID] = binary_op(buffAccessor[thisID - td], buffAccessor[thisID]); } else { - outAccessor[thisID] = inAccessor[thisID]; + tempAccessor[thisID] = buffAccessor[thisID]; } }); }); - std::swap(inPtr, outPtr); + std::swap(buffPtr, tempPtr); ii++; } while ( ii <= iterations); @@ -122,52 +122,77 @@ exclusive_inplace( Function binary_op, TT initVal) { -// ::sycl::queue* sycl_queue = sycl_res.get_queue(); - -// using valueT = typename std::remove_reference::type; + ::sycl::queue* sycl_queue = sycl_res.get_queue(); + + using valueT = typename std::remove_reference::type; -// // Calculate the size of the input range -// size_t size = std::distance(begin, end); + // Calculate the size of the input range + size_t size = std::distance(begin, end); -// ::sycl::buffer outBuff(begin, ::sycl::range<1>(size)); + ::sycl::buffer buffer(begin, end); + ::sycl::buffer tempAccBuff(begin, ::sycl::range<1>(size)); -// int iterations = 0; -// for (size_t ii = size >> 1; ii > 0; ii >>= 1) { -// iterations++; -// } -// if ((size & (size - 1)) != 0) { -// iterations++; -// } + int iterations = 0; + for (size_t ii = size >> 1; ii > 0; ii >>= 1) { + iterations++; + } + if ((size & (size - 1)) != 0) { + iterations++; + } -// auto inPtr = begin; -// auto outPtr = &outBuff; + auto buffPtr = &buffer; + auto tempPtr = &tempAccBuff; -// if (iterations % 2 != 0) { -// outPtr = begin; -// inPtr = &outBuff; -// } - -// // Submit the kernel to the SYCL queue -// sycl_queue->submit([&](::sycl::handler& cgh) { -// // outBuffAccessor = outBuff->get_access<::sycl::access::mode::read(cgh); - -// // ::sycl::accessor(size), [=](::sycl::nd_item<1> idx) { -// size_t thisID = idx.get_global_id(0); -// if (thisID > 0 ) { -// outPtr[thisID] = inPtr[thisID - 1]; -// } -// else { -// outPtr[thisID] = initVal; -// } -// // if (idx[0] != 0) { -// // *(begin + idx[0]) = binary_op(*(begin + idx[0] - 1), *(begin + idx[0])); -// // } -// }); -// }); - -// sycl_res.wait(); + if (iterations % 2 != 0) { + tempPtr = &buffer; + buffPtr = &tempAccBuff; + } + + // Submit the kernel to the SYCL queue + sycl_queue->submit([&](::sycl::handler& cgh) { + auto inAccessor = buffPtr->get_access(cgh); + auto outAccessor = tempPtr->get_access(cgh); + // outBuffAccessor = outBuff->get_access<::sycl::access::mode::read(cgh); + + cgh.parallel_for(::sycl::range<1>(size), [=](::sycl::item<1> idx) { + size_t thisID = idx.get_id(0); + // size_t td = 1 << (ii - 1); + if (thisID > 0) { + outAccessor[thisID] = inAccessor[thisID - 1]; + } else { + outAccessor[thisID] = initVal; + } + }); + }); + + std::swap(buffPtr, tempPtr); + + int ii = 1; + do { + // Submit the kernel to the SYCL queue + sycl_queue->submit([&](::sycl::handler& cgh) { + auto buffAccessor = buffPtr->get_access(cgh); + auto tempAccessor = tempPtr->get_access(cgh); + // outBuffAccessor = outBuff->get_access<::sycl::access::mode::read(cgh); + + cgh.parallel_for(::sycl::range<1>(size), [=](::sycl::item<1> idx) { + size_t td = 1 << (ii - 1); + size_t thisID = idx.get_id(0); + if (thisID < size and thisID >= td) { + tempAccessor[thisID] = binary_op(buffAccessor[thisID - td], buffAccessor[thisID]); + } else { + tempAccessor[thisID] = buffAccessor[thisID]; + } + }); + }); + + std::swap(buffPtr, tempPtr); + ii++; + } while ( ii <= iterations); + + sycl_res.wait(); return camp::resources::EventProxy(sycl_res); + return inclusive_inplace(sycl_res, exec, begin, end, binary_op); } template