diff --git a/clang/lib/DPCT/APINames_CUB.inc b/clang/lib/DPCT/APINames_CUB.inc index 8ac63135c4d1..4da1ad7e59f5 100644 --- a/clang/lib/DPCT/APINames_CUB.inc +++ b/clang/lib/DPCT/APINames_CUB.inc @@ -230,10 +230,10 @@ ENTRY(cub::ThreadLoad, cub::ThreadLoad, true, NO_FLAG, P4, "Successful") ENTRY(cub::ThreadStore, cub::ThreadStore, true, NO_FLAG, P4, "Successful") ENTRY(cub::LoadDirectBlocked, cub::LoadDirectBlocked, true, NO_FLAG, P4, "Successful") ENTRY(cub::LoadDirectBlockedVectorized, cub::LoadDirectBlockedVectorized, false, NO_FLAG, P4, "Comment") -ENTRY(cub::StoreDirectBlocked, cub::StoreDirectBlocked, false, NO_FLAG, P4, "Comment") +ENTRY(cub::StoreDirectBlocked, cub::StoreDirectBlocked, true, NO_FLAG, P4, "Successful") ENTRY(cub::StoreDirectBlockedVectorized, cub::StoreDirectBlockedVectorized, false, NO_FLAG, P4, "Comment") ENTRY(cub::LoadDirectStriped, cub::LoadDirectStriped, true, NO_FLAG, P4, "Successful") -ENTRY(cub::StoreDirectStriped, cub::StoreDirectStriped, false, NO_FLAG, P4, "Comment") +ENTRY(cub::StoreDirectStriped, cub::StoreDirectStriped, true, NO_FLAG, P4, "Successful") ENTRY(cub::LoadDirectWarpStriped, cub::LoadDirectWarpStriped, false, NO_FLAG, P4, "Comment") ENTRY(cub::StoreDirectWarpStriped, cub::StoreDirectWarpStriped, false, NO_FLAG, P4, "Comment") diff --git a/clang/lib/DPCT/CUBAPIMigration.cpp b/clang/lib/DPCT/CUBAPIMigration.cpp index 0ec462bb0d04..e1fea4774c79 100644 --- a/clang/lib/DPCT/CUBAPIMigration.cpp +++ b/clang/lib/DPCT/CUBAPIMigration.cpp @@ -298,7 +298,9 @@ void CubIntrinsicRule::registerMatcher(ast_matchers::MatchFinder &MF) { "DeviceCountUncached", "DeviceCountCachedValue", "PtxVersion", "PtxVersionUncached", "SmVersion", "SmVersionUncached", "RowMajorTid", + "StoreDirectBlocked", "StoreDirectStriped"), "LoadDirectBlocked", "LoadDirectStriped"), + hasAncestor(namespaceDecl(hasName("cub"))))))) .bind("IntrinsicCall"), this); diff --git a/clang/lib/DPCT/Rewriters/CUB/RewriterUtilityFunctions.cpp b/clang/lib/DPCT/Rewriters/CUB/RewriterUtilityFunctions.cpp index a2a8d68841e4..1a5a783806e2 100644 --- a/clang/lib/DPCT/Rewriters/CUB/RewriterUtilityFunctions.cpp +++ b/clang/lib/DPCT/Rewriters/CUB/RewriterUtilityFunctions.cpp @@ -9,6 +9,7 @@ #include "AnalysisInfo.h" #include "CallExprRewriterCUB.h" #include "CallExprRewriterCommon.h" +#include "InclusionHeaders.h" using namespace clang::dpct; @@ -154,6 +155,20 @@ RewriterMap dpct::createUtilityFunctionsRewriterMap() { CALL(MapNames::getDpctNamespace() + "get_minor_version", makeDeviceStr()), LITERAL("10")))) + // cub::StoreDirectBlocked + HEADER_INSERT_FACTORY( + HeaderType::HT_DPCT_GROUP_Utils, + CALL_FACTORY_ENTRY( + "cub::StoreDirectBlocked", + CALL(MapNames::getDpctNamespace() + "group::store_blocked", NDITEM, + ARG(1), ARG(2)))) + // cub::StoreDirectStriped + HEADER_INSERT_FACTORY( + HeaderType::HT_DPCT_GROUP_Utils, + CALL_FACTORY_ENTRY( + "cub::StoreDirectStriped", + CALL(MapNames::getDpctNamespace() + "group::store_striped", NDITEM, + ARG(1), ARG(2)))) // cub::RowMajorTid MEMBER_CALL_FACTORY_ENTRY("cub::RowMajorTid", NDITEM, /*IsArrow=*/false, "get_local_linear_id") diff --git a/clang/test/dpct/cub/intrinsic/store.cu b/clang/test/dpct/cub/intrinsic/store.cu new file mode 100644 index 000000000000..0f89fdcc22fc --- /dev/null +++ b/clang/test/dpct/cub/intrinsic/store.cu @@ -0,0 +1,23 @@ +// UNSUPPORTED: cuda-8.0, cuda-9.0, cuda-9.1, cuda-9.2, cuda-10.0, cuda-10.1, cuda-10.2 +// UNSUPPORTED: v8.0, v9.0, v9.1, v9.2, v10.0, v10.1, v10.2 +// RUN: dpct -in-root %S -out-root %T/intrinsic/store %S/store.cu --cuda-include-path="%cuda-path/include" -- -std=c++14 -x cuda --cuda-host-only +// RUN: FileCheck --input-file %T/intrinsic/store/store.dp.cpp --match-full-lines %s +// RUN: %if build_lit %{icpx -c -fsycl %T/intrinsic/store/store.dp.cpp -o %T/intrinsic/store/store.dp.o %} + +// CHECK:#include +// CHECK:#include +// CHECK:#include +#include + +__global__ void TestStoreStriped(int *d_data) { + int thread_data[4]; + // CHECK: dpct::group::load_striped(item_ct1, d_data, thread_data); + cub::StoreDirectStriped<128>(threadIdx.x, d_data, thread_data); +} + + +__global__ void BlockedToStripedKernel(int *d_data) { + int thread_data[4]; + // CHECK: dpct::group::load_blocked(item_ct1, d_data, thread_data); + cub::StoreDirectBlocked(threadIdx.x, d_data, thread_data); +}