Skip to content

Commit

Permalink
All scan types working!
Browse files Browse the repository at this point in the history
  • Loading branch information
long58 committed Dec 18, 2024
1 parent 5fe4a54 commit 3cb82e6
Showing 1 changed file with 76 additions and 51 deletions.
127 changes: 76 additions & 51 deletions include/RAJA/policy/sycl/scan.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -61,8 +61,8 @@ inclusive_inplace(
// Calculate the size of the input range
size_t size = std::distance(begin, end);

::sycl::buffer<valueT, 1> inBuff(begin, end);
::sycl::buffer<valueT, 1> outBuff(begin, ::sycl::range<1>(size));
::sycl::buffer<valueT, 1> buffer(begin, end);
::sycl::buffer<valueT, 1> tempAccBuff(begin, ::sycl::range<1>(size));

int iterations = 0;
for (size_t ii = size >> 1; ii > 0; ii >>= 1) {
Expand All @@ -72,34 +72,34 @@ inclusive_inplace(
iterations++;
}

auto inPtr = &inBuff;
auto outPtr = &outBuff;
auto buffPtr = &buffer;
auto tempPtr = &tempAccBuff;

if (iterations % 2 == 0) {
outPtr = &inBuff;
inPtr = &outBuff;
tempPtr = &buffer;
buffPtr = &tempAccBuff;
}

int ii = 1;
do {
// Submit the kernel to the SYCL queue
sycl_queue->submit([&](::sycl::handler& cgh) {
auto inAccessor = inPtr->get_access(cgh);
auto outAccessor = outPtr->get_access(cgh);
auto buffAccessor = buffPtr->get_access(cgh);
auto tempAccessor = tempPtr->get_access(cgh);
// outBuffAccessor = outBuff->get_access<::sycl::access::mode::read(cgh);

cgh.parallel_for(::sycl::range<1>(size), [=](::sycl::item<1> idx) {
size_t td = 1 << (ii - 1);
size_t thisID = idx.get_id(0);
if (thisID < size and thisID >= td) {
outAccessor[thisID] = binary_op(inAccessor[thisID - td], inAccessor[thisID]);
tempAccessor[thisID] = binary_op(buffAccessor[thisID - td], buffAccessor[thisID]);
} else {
outAccessor[thisID] = inAccessor[thisID];
tempAccessor[thisID] = buffAccessor[thisID];
}
});
});

std::swap(inPtr, outPtr);
std::swap(buffPtr, tempPtr);
ii++;
} while ( ii <= iterations);

Expand All @@ -122,52 +122,77 @@ exclusive_inplace(
Function binary_op,
TT initVal)
{
// ::sycl::queue* sycl_queue = sycl_res.get_queue();

// using valueT = typename std::remove_reference<decltype(*begin)>::type;
::sycl::queue* sycl_queue = sycl_res.get_queue();
using valueT = typename std::remove_reference<decltype(*begin)>::type;

// // Calculate the size of the input range
// size_t size = std::distance(begin, end);
// Calculate the size of the input range
size_t size = std::distance(begin, end);

// ::sycl::buffer<valueT, 1> outBuff(begin, ::sycl::range<1>(size));
::sycl::buffer<valueT, 1> buffer(begin, end);
::sycl::buffer<valueT, 1> tempAccBuff(begin, ::sycl::range<1>(size));

// int iterations = 0;
// for (size_t ii = size >> 1; ii > 0; ii >>= 1) {
// iterations++;
// }
// if ((size & (size - 1)) != 0) {
// iterations++;
// }
int iterations = 0;
for (size_t ii = size >> 1; ii > 0; ii >>= 1) {
iterations++;
}
if ((size & (size - 1)) != 0) {
iterations++;
}

// auto inPtr = begin;
// auto outPtr = &outBuff;
auto buffPtr = &buffer;
auto tempPtr = &tempAccBuff;

// if (iterations % 2 != 0) {
// outPtr = begin;
// inPtr = &outBuff;
// }

// // Submit the kernel to the SYCL queue
// sycl_queue->submit([&](::sycl::handler& cgh) {
// // outBuffAccessor = outBuff->get_access<::sycl::access::mode::read(cgh);

// // ::sycl::accessor<valueT, 1, ::sycl::access::mode::read_write
// cgh.parallel_for(::sycl::range<1>(size), [=](::sycl::nd_item<1> idx) {
// size_t thisID = idx.get_global_id(0);
// if (thisID > 0 ) {
// outPtr[thisID] = inPtr[thisID - 1];
// }
// else {
// outPtr[thisID] = initVal;
// }
// // if (idx[0] != 0) {
// // *(begin + idx[0]) = binary_op(*(begin + idx[0] - 1), *(begin + idx[0]));
// // }
// });
// });

// sycl_res.wait();
if (iterations % 2 != 0) {
tempPtr = &buffer;
buffPtr = &tempAccBuff;
}

// Submit the kernel to the SYCL queue
sycl_queue->submit([&](::sycl::handler& cgh) {
auto inAccessor = buffPtr->get_access(cgh);
auto outAccessor = tempPtr->get_access(cgh);
// outBuffAccessor = outBuff->get_access<::sycl::access::mode::read(cgh);

cgh.parallel_for(::sycl::range<1>(size), [=](::sycl::item<1> idx) {
size_t thisID = idx.get_id(0);
// size_t td = 1 << (ii - 1);
if (thisID > 0) {
outAccessor[thisID] = inAccessor[thisID - 1];
} else {
outAccessor[thisID] = initVal;
}
});
});

std::swap(buffPtr, tempPtr);

int ii = 1;
do {
// Submit the kernel to the SYCL queue
sycl_queue->submit([&](::sycl::handler& cgh) {
auto buffAccessor = buffPtr->get_access(cgh);
auto tempAccessor = tempPtr->get_access(cgh);
// outBuffAccessor = outBuff->get_access<::sycl::access::mode::read(cgh);

cgh.parallel_for(::sycl::range<1>(size), [=](::sycl::item<1> idx) {
size_t td = 1 << (ii - 1);
size_t thisID = idx.get_id(0);
if (thisID < size and thisID >= td) {
tempAccessor[thisID] = binary_op(buffAccessor[thisID - td], buffAccessor[thisID]);
} else {
tempAccessor[thisID] = buffAccessor[thisID];
}
});
});

std::swap(buffPtr, tempPtr);
ii++;
} while ( ii <= iterations);

sycl_res.wait();
return camp::resources::EventProxy<camp::resources::Sycl>(sycl_res);
return inclusive_inplace(sycl_res, exec, begin, end, binary_op);
}

template <size_t BLOCK_SIZE,
Expand Down

0 comments on commit 3cb82e6

Please sign in to comment.