diff --git a/clang/include/clang/DPCT/DPCTOptions.inc b/clang/include/clang/DPCT/DPCTOptions.inc index 4c4ec568be94..1d22ec2cf641 100644 --- a/clang/include/clang/DPCT/DPCTOptions.inc +++ b/clang/include/clang/DPCT/DPCTOptions.inc @@ -814,6 +814,10 @@ DPCT_ENUM_OPTION( "Experimental extension that allows to use non standard SYCL " "builtin functions.\n", false), + DPCT_OPTION_ENUM_VALUE( + "prefetch", int(ExperimentalFeatures::Exp_Prefetch), + "Experimental extension that allows use of SYCL prefetch APIs.\n", + false), DPCT_OPTION_ENUM_VALUE( "all", int(ExperimentalFeatures::Exp_All), "Enable all experimental extensions listed in this option.\n", diff --git a/clang/lib/DPCT/AnalysisInfo.h b/clang/lib/DPCT/AnalysisInfo.h index cfeafbd55174..b37d591bae46 100644 --- a/clang/lib/DPCT/AnalysisInfo.h +++ b/clang/lib/DPCT/AnalysisInfo.h @@ -1347,6 +1347,9 @@ class DpctGlobalInfo { return getUsingExperimental< ExperimentalFeatures::Exp_NonStandardSYCLBuiltins>(); } + static bool useExtPrefetch() { + return getUsingExperimental(); + } static bool useNoQueueDevice() { return getHelperFuncPreference(HelperFuncPreference::NoQueueDevice); } diff --git a/clang/lib/DPCT/CommandOption/ValidateArguments.h b/clang/lib/DPCT/CommandOption/ValidateArguments.h index b1bbc7cec06c..0bb2a612a085 100644 --- a/clang/lib/DPCT/CommandOption/ValidateArguments.h +++ b/clang/lib/DPCT/CommandOption/ValidateArguments.h @@ -100,6 +100,7 @@ enum class ExperimentalFeatures : unsigned int { Exp_InOrderQueueEvents, Exp_ExperimentalFeaturesEnumSize, Exp_NonStandardSYCLBuiltins, + Exp_Prefetch, Exp_All }; enum class HelperFuncPreference : unsigned int { NoQueueDevice = 0 }; diff --git a/clang/lib/DPCT/DPCT.cpp b/clang/lib/DPCT/DPCT.cpp index 807a3966c333..738a5003fd3d 100644 --- a/clang/lib/DPCT/DPCT.cpp +++ b/clang/lib/DPCT/DPCT.cpp @@ -977,6 +977,8 @@ int runDPCT(int argc, const char **argv) { Experimentals.addValue(ExperimentalFeatures::Exp_BindlessImages); else if (Option.ends_with("graph")) Experimentals.addValue(ExperimentalFeatures::Exp_Graph); + else if (Option.ends_with("prefetch")) + Experimentals.addValue(ExperimentalFeatures::Exp_Prefetch); } else if (Option == "--no-dry-pattern") { NoDRYPattern.setValue(true); } diff --git a/clang/lib/DPCT/RulesAsm/AsmMigration.cpp b/clang/lib/DPCT/RulesAsm/AsmMigration.cpp index 91abe3cbfa89..ac0104aa1de9 100644 --- a/clang/lib/DPCT/RulesAsm/AsmMigration.cpp +++ b/clang/lib/DPCT/RulesAsm/AsmMigration.cpp @@ -588,8 +588,9 @@ bool SYCLGenBase::emitVariableDeclaration(const InlineAsmVarDecl *D) { } bool SYCLGenBase::emitAddressExpr(const InlineAsmAddressExpr *Dst) { - // Address expression only support ld/st instructions. - if (!CurrInst || !CurrInst->is(asmtok::op_st, asmtok::op_ld, asmtok::op_atom)) + // Address expression only support ld/st & atom instructions. + if (!CurrInst || !CurrInst->is(asmtok::op_st, asmtok::op_ld, asmtok::op_atom, + asmtok::op_prefetch)) return SYCLGenError(); std::string Type; if (tryEmitType(Type, CurrInst->getType(0))) @@ -617,7 +618,7 @@ bool SYCLGenBase::emitAddressExpr(const InlineAsmAddressExpr *Dst) { std::string Reg; if (tryEmitStmt(Reg, Dst->getSymbol())) return SYCLGenSuccess(); - if (CanSuppressCast(Dst->getSymbol())) + if (CurrInst->is(asmtok::op_prefetch) || CanSuppressCast(Dst->getSymbol())) OS() << llvm::formatv("{0}", Reg); else OS() << llvm::formatv("(({0} *)(uintptr_t){1})", Type, Reg); @@ -1281,6 +1282,42 @@ class SYCLGen : public SYCLGenBase { return SYCLGenSuccess(); } + bool handle_prefetch(const InlineAsmInstruction *Inst) override { + if (!DpctGlobalInfo::useExtPrefetch() || Inst->getNumInputOperands() != 1) + return SYCLGenError(); + + AsmStateSpace SS = Inst->getStateSpace(); + if (SS != AsmStateSpace::S_global && SS != AsmStateSpace::none) { + return SYCLGenError(); + } + + if (!(Inst->hasAttr(InstAttr::L1) || Inst->hasAttr(InstAttr::L2))) + return SYCLGenError(); + + std::string PrefetchHint; + if (Inst->hasAttr(InstAttr::L1)) + PrefetchHint = "L1"; + else if (Inst->hasAttr(InstAttr::L2)) + PrefetchHint = "L2"; + + llvm::SaveAndRestore Store(CurrInst); + CurrInst = Inst; + const auto *Src = + dyn_cast_or_null(Inst->getInputOperand(0)); + if (!Src) + return SYCLGenError(); + + OS() << MapNames::getExpNamespace() << "prefetch("; + if (emitStmt(Src)) + return SYCLGenError(); + OS() << ", "; + OS() << MapNames::getExpNamespace() << "properties{"; + OS() << MapNames::getExpNamespace() << "prefetch_hint_" << PrefetchHint; + OS() << "})"; + endstmt(); + return SYCLGenSuccess(); + } + StringRef GetWiderTypeAsString(const InlineAsmBuiltinType *Type) const { switch (Type->getKind()) { case InlineAsmBuiltinType::s16: diff --git a/clang/lib/DPCT/RulesAsm/Parser/AsmNodes.h b/clang/lib/DPCT/RulesAsm/Parser/AsmNodes.h index 2574e5f4cbde..bbd5510030d6 100644 --- a/clang/lib/DPCT/RulesAsm/Parser/AsmNodes.h +++ b/clang/lib/DPCT/RulesAsm/Parser/AsmNodes.h @@ -389,6 +389,9 @@ class InlineAsmInstruction : public InlineAsmStmt { static bool classof(const InlineAsmStmt *S) { return InstructionClass <= S->getStmtClass(); } + AsmStateSpace getStateSpace() const { + return StateSpace.value_or(AsmStateSpace::none); + } }; /// This represents a device conditional instruction, e.g. instruction @%p diff --git a/clang/lib/DPCT/RulesAsm/Parser/AsmParser.cpp b/clang/lib/DPCT/RulesAsm/Parser/AsmParser.cpp index 9f5d3ab24d26..80bbad38f8ff 100644 --- a/clang/lib/DPCT/RulesAsm/Parser/AsmParser.cpp +++ b/clang/lib/DPCT/RulesAsm/Parser/AsmParser.cpp @@ -377,6 +377,12 @@ InlineAsmStmtResult InlineAsmParser::ParseInstruction() { Ops.push_back(Out.get()); Out = nullptr; } + // prefetch{.state}.{level} [%0] has only one input operand and no type. + if (Opcode->getTokenID() == asmtok::op_prefetch) { + Ops.push_back(Out.get()); + Out = nullptr; + Types.push_back(Context.getBuiltinType(InlineAsmBuiltinType::byte)); + } return ::new (Context) InlineAsmInstruction(Opcode, StateSpace, Attrs, Types, Out.get(), Pred.get(), Ops); diff --git a/clang/lib/DPCT/RulesAsm/Parser/AsmToken.h b/clang/lib/DPCT/RulesAsm/Parser/AsmToken.h index fd022e5ede14..38b6dcb0acd3 100644 --- a/clang/lib/DPCT/RulesAsm/Parser/AsmToken.h +++ b/clang/lib/DPCT/RulesAsm/Parser/AsmToken.h @@ -29,7 +29,7 @@ class InlineAsmToken { enum TokenFlags { NeedsCleaning = 0x01, // This identifier contains special characters. Placeholder = 0x02, // This identifier is an inline asm placeholder. - StartOfDot = 0x04, // This identifier is an dot identifier. + StartOfDot = 0x04, // This identifier is a dot identifier. }; asmtok::TokenKind getKind() const { return Kind; } @@ -112,7 +112,7 @@ class InlineAsmToken { /// Return true if this token is an inline asm placeholder. bool isPlaceholder() const { return getFlag(Placeholder); } - /// Return true if this token is an dot identifier. + /// Return true if this token is a dot identifier. bool startOfDot() const { return getFlag(StartOfDot); } }; diff --git a/clang/lib/DPCT/RulesAsm/Parser/AsmTokenKinds.def b/clang/lib/DPCT/RulesAsm/Parser/AsmTokenKinds.def index a53d3f4cd7b4..3a36108d81aa 100644 --- a/clang/lib/DPCT/RulesAsm/Parser/AsmTokenKinds.def +++ b/clang/lib/DPCT/RulesAsm/Parser/AsmTokenKinds.def @@ -404,6 +404,9 @@ MODIFIER(idx, ".idx") MODIFIER(bfly, ".bfly") MODIFIER(sc, ".sc") MODIFIER(gl, ".gl") +MODIFIER(L1, ".L1") +MODIFIER(L2, ".L2") + #undef LINKAGE #undef TARGET diff --git a/clang/lib/DPCT/SrcAPI/APINames_ASM.inc b/clang/lib/DPCT/SrcAPI/APINames_ASM.inc index 3157d1bc3577..64b28b41015b 100644 --- a/clang/lib/DPCT/SrcAPI/APINames_ASM.inc +++ b/clang/lib/DPCT/SrcAPI/APINames_ASM.inc @@ -100,8 +100,7 @@ ENTRY("not", "not", true, NO_FLAG, P1, "Successful") ENTRY("or", "or", true, NO_FLAG, P1, "Successful") ENTRY("pmevent", "pmevent", false, NO_FLAG, P1, "Comment") ENTRY("popc", "popc", true, NO_FLAG, P1, "Successful") -ENTRY("prefetch", "prefetch", false, NO_FLAG, P1, "Comment") -ENTRY("prefetchu", "prefetchu", false, NO_FLAG, P1, "Comment") +ENTRY("prefetch", "prefetch", true, NO_FLAG, P1, "Partial") ENTRY("prmt", "prmt", false, NO_FLAG, P1, "Comment") ENTRY("rcp", "rcp", true, NO_FLAG, P1, "Successful") ENTRY("red", "red", false, NO_FLAG, P1, "Comment") diff --git a/clang/test/dpct/asm/prefetch.cu b/clang/test/dpct/asm/prefetch.cu new file mode 100644 index 000000000000..833dc295e0fe --- /dev/null +++ b/clang/test/dpct/asm/prefetch.cu @@ -0,0 +1,45 @@ +// UNSUPPORTED: cuda-8.0 +// UNSUPPORTED: v8.0 +// RUN: dpct --format-range=none -out-root %T/prefetch %s --use-experimental-features=prefetch --cuda-include-path="%cuda-path/include" -- -std=c++14 -x cuda --cuda-host-only +// RUN: FileCheck %s --match-full-lines --input-file %T/prefetch/prefetch.dp.cpp +// RUN: %if BUILD_LIT %{icpx -c -DBUILD_TEST -fsycl %T/prefetch/prefetch.dp.cpp -o %T/prefetch/prefetch.dp.o %} + +// clang-format off +#include + +/* +Supported syntax: +----------------- +prefetch.level [a]; // prefetch to generic addr space cache +prefetch.global.level [a]; // prefetch to global cache + +Unsupported syntax: +------------------- +prefetch.local.level +prefetch.global.level::eviction_priority [a]; // prefetch to data cache +prefetch{.tensormap_space}.tensormap [a]; // prefetch the tensormap + +.level = { .L1, .L2 }; +.level::eviction_priority = { .L2::evict_last, .L2::evict_normal }; +.tensormap_space = { .const, .param }; +*/ + +__global__ void prefetch(int *arr) { + /* prefetch of no address space */ + // CHECK: sycl::ext::oneapi::experimental::prefetch(arr, sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::prefetch_hint_L1}); + asm volatile ("prefetch.L1 [%0];" : : "l"(arr)); + // CHECK: sycl::ext::oneapi::experimental::prefetch(arr, sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::prefetch_hint_L2}); + asm volatile ("prefetch.L2 [%0];" : : "l"(arr)); + + /* prefetch of global address space */ + // CHECK: sycl::ext::oneapi::experimental::prefetch(arr, sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::prefetch_hint_L1}); + asm volatile ("prefetch.global.L1 [%0];" : : "l"(arr)); + // CHECK: sycl::ext::oneapi::experimental::prefetch(arr, sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::prefetch_hint_L2}); + asm volatile ("prefetch.global.L2 [%0];" : : "l"(arr)); + + /* using Register-Immediate (Displacement) address mode */ + // CHECK: sycl::ext::oneapi::experimental::prefetch(((uint8_t *)((uintptr_t)arr + 2)), sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::prefetch_hint_L1}); + asm volatile("prefetch.global.L1 [%0 + 2];" :: "l"(arr)); +} + +// clang-format on diff --git a/clang/test/dpct/asm/prefetch_default.cu b/clang/test/dpct/asm/prefetch_default.cu new file mode 100644 index 000000000000..cd9ec0b70016 --- /dev/null +++ b/clang/test/dpct/asm/prefetch_default.cu @@ -0,0 +1,40 @@ +// UNSUPPORTED: cuda-8.0 +// UNSUPPORTED: v8.0 +// RUN: dpct --format-range=none -out-root %T/prefetch_default %s --cuda-include-path="%cuda-path/include" -- -std=c++14 -x cuda --cuda-host-only +// RUN: FileCheck %s --match-full-lines --input-file %T/prefetch_default/prefetch_default.dp.cpp +// RUN: %if BUILD_LIT %{icpx -c -DBUILD_TEST -fsycl %T/prefetch_default/prefetch_default.dp.cpp -o %T/prefetch_default/prefetch_default.dp.o %} + +// clang-format off +#include + +__global__ void prefetch(int *arr) { +#ifndef BUILD_TEST + /* prefetch of no address space */ + // CHECK: /* + // CHECK-NEXT: DPCT1053:{{.*}} Migration of device assembly code is not supported. + // CHECK-NEXT: */ + asm volatile ("prefetch.L1 [%0];" : : "l"(arr)); + // CHECK: /* + // CHECK-NEXT: DPCT1053:{{.*}} Migration of device assembly code is not supported. + // CHECK-NEXT: */ + asm volatile ("prefetch.L2 [%0];" : : "l"(arr)); + + /* prefetch of global address space */ + // CHECK: /* + // CHECK-NEXT: DPCT1053:{{.*}} Migration of device assembly code is not supported. + // CHECK-NEXT: */ + asm volatile ("prefetch.global.L1 [%0];" : : "l"(arr)); + // CHECK: /* + // CHECK-NEXT: DPCT1053:{{.*}} Migration of device assembly code is not supported. + // CHECK-NEXT: */ + asm volatile ("prefetch.global.L2 [%0];" : : "l"(arr)); + + /* using Register-Immediate (Displacement) address mode */ + // CHECK: /* + // CHECK-NEXT: DPCT1053:{{.*}} Migration of device assembly code is not supported. + // CHECK-NEXT: */ + asm volatile("prefetch.global.L1 [%0 + 2];" :: "l"(arr)); +#endif // BUILD_TEST +} + +// clang-format on