Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCLomatic][PTX] Added support for prefetch ASM migration #2579

Merged
merged 3 commits into from
Jan 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions clang/include/clang/DPCT/DPCTOptions.inc
Original file line number Diff line number Diff line change
Expand Up @@ -814,6 +814,10 @@ DPCT_ENUM_OPTION(
"Experimental extension that allows to use non standard SYCL "
"builtin functions.\n",
false),
DPCT_OPTION_ENUM_VALUE(
"prefetch", int(ExperimentalFeatures::Exp_Prefetch),
"Experimental extension that allows use of SYCL prefetch APIs.\n",
false),
DPCT_OPTION_ENUM_VALUE(
"all", int(ExperimentalFeatures::Exp_All),
"Enable all experimental extensions listed in this option.\n",
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/DPCT/AnalysisInfo.h
Original file line number Diff line number Diff line change
Expand Up @@ -1347,6 +1347,9 @@ class DpctGlobalInfo {
return getUsingExperimental<
ExperimentalFeatures::Exp_NonStandardSYCLBuiltins>();
}
static bool useExtPrefetch() {
return getUsingExperimental<ExperimentalFeatures::Exp_Prefetch>();
}
static bool useNoQueueDevice() {
return getHelperFuncPreference(HelperFuncPreference::NoQueueDevice);
}
Expand Down
1 change: 1 addition & 0 deletions clang/lib/DPCT/CommandOption/ValidateArguments.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,6 +100,7 @@ enum class ExperimentalFeatures : unsigned int {
Exp_InOrderQueueEvents,
Exp_ExperimentalFeaturesEnumSize,
Exp_NonStandardSYCLBuiltins,
Exp_Prefetch,
Exp_All
};
enum class HelperFuncPreference : unsigned int { NoQueueDevice = 0 };
Expand Down
2 changes: 2 additions & 0 deletions clang/lib/DPCT/DPCT.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -977,6 +977,8 @@ int runDPCT(int argc, const char **argv) {
Experimentals.addValue(ExperimentalFeatures::Exp_BindlessImages);
else if (Option.ends_with("graph"))
Experimentals.addValue(ExperimentalFeatures::Exp_Graph);
else if (Option.ends_with("prefetch"))
Experimentals.addValue(ExperimentalFeatures::Exp_Prefetch);
} else if (Option == "--no-dry-pattern") {
NoDRYPattern.setValue(true);
}
Expand Down
43 changes: 40 additions & 3 deletions clang/lib/DPCT/RulesAsm/AsmMigration.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -588,8 +588,9 @@ bool SYCLGenBase::emitVariableDeclaration(const InlineAsmVarDecl *D) {
}

bool SYCLGenBase::emitAddressExpr(const InlineAsmAddressExpr *Dst) {
// Address expression only support ld/st instructions.
if (!CurrInst || !CurrInst->is(asmtok::op_st, asmtok::op_ld, asmtok::op_atom))
// Address expression only support ld/st & atom instructions.
if (!CurrInst || !CurrInst->is(asmtok::op_st, asmtok::op_ld, asmtok::op_atom,
asmtok::op_prefetch))
return SYCLGenError();
std::string Type;
if (tryEmitType(Type, CurrInst->getType(0)))
Expand Down Expand Up @@ -617,7 +618,7 @@ bool SYCLGenBase::emitAddressExpr(const InlineAsmAddressExpr *Dst) {
std::string Reg;
if (tryEmitStmt(Reg, Dst->getSymbol()))
return SYCLGenSuccess();
if (CanSuppressCast(Dst->getSymbol()))
if (CurrInst->is(asmtok::op_prefetch) || CanSuppressCast(Dst->getSymbol()))
OS() << llvm::formatv("{0}", Reg);
else
OS() << llvm::formatv("(({0} *)(uintptr_t){1})", Type, Reg);
Expand Down Expand Up @@ -1281,6 +1282,42 @@ class SYCLGen : public SYCLGenBase {
return SYCLGenSuccess();
}

bool handle_prefetch(const InlineAsmInstruction *Inst) override {
if (!DpctGlobalInfo::useExtPrefetch() || Inst->getNumInputOperands() != 1)
return SYCLGenError();

AsmStateSpace SS = Inst->getStateSpace();
if (SS != AsmStateSpace::S_global && SS != AsmStateSpace::none) {
return SYCLGenError();
}

if (!(Inst->hasAttr(InstAttr::L1) || Inst->hasAttr(InstAttr::L2)))
return SYCLGenError();

std::string PrefetchHint;
if (Inst->hasAttr(InstAttr::L1))
PrefetchHint = "L1";
else if (Inst->hasAttr(InstAttr::L2))
PrefetchHint = "L2";

llvm::SaveAndRestore<const InlineAsmInstruction *> Store(CurrInst);
CurrInst = Inst;
const auto *Src =
dyn_cast_or_null<InlineAsmAddressExpr>(Inst->getInputOperand(0));
if (!Src)
return SYCLGenError();

OS() << MapNames::getExpNamespace() << "prefetch(";
if (emitStmt(Src))
return SYCLGenError();
OS() << ", ";
OS() << MapNames::getExpNamespace() << "properties{";
OS() << MapNames::getExpNamespace() << "prefetch_hint_" << PrefetchHint;
OS() << "})";
endstmt();
return SYCLGenSuccess();
}

StringRef GetWiderTypeAsString(const InlineAsmBuiltinType *Type) const {
switch (Type->getKind()) {
case InlineAsmBuiltinType::s16:
Expand Down
3 changes: 3 additions & 0 deletions clang/lib/DPCT/RulesAsm/Parser/AsmNodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -389,6 +389,9 @@ class InlineAsmInstruction : public InlineAsmStmt {
static bool classof(const InlineAsmStmt *S) {
return InstructionClass <= S->getStmtClass();
}
AsmStateSpace getStateSpace() const {
return StateSpace.value_or(AsmStateSpace::none);
}
};

/// This represents a device conditional instruction, e.g. instruction @%p
Expand Down
6 changes: 6 additions & 0 deletions clang/lib/DPCT/RulesAsm/Parser/AsmParser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -377,6 +377,12 @@ InlineAsmStmtResult InlineAsmParser::ParseInstruction() {
Ops.push_back(Out.get());
Out = nullptr;
}
// prefetch{.state}.{level} [%0] has only one input operand and no type.
if (Opcode->getTokenID() == asmtok::op_prefetch) {
Ops.push_back(Out.get());
Out = nullptr;
Types.push_back(Context.getBuiltinType(InlineAsmBuiltinType::byte));
}

return ::new (Context) InlineAsmInstruction(Opcode, StateSpace, Attrs, Types,
Out.get(), Pred.get(), Ops);
Expand Down
4 changes: 2 additions & 2 deletions clang/lib/DPCT/RulesAsm/Parser/AsmToken.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ class InlineAsmToken {
enum TokenFlags {
NeedsCleaning = 0x01, // This identifier contains special characters.
Placeholder = 0x02, // This identifier is an inline asm placeholder.
StartOfDot = 0x04, // This identifier is an dot identifier.
StartOfDot = 0x04, // This identifier is a dot identifier.
};

asmtok::TokenKind getKind() const { return Kind; }
Expand Down Expand Up @@ -112,7 +112,7 @@ class InlineAsmToken {
/// Return true if this token is an inline asm placeholder.
bool isPlaceholder() const { return getFlag(Placeholder); }

/// Return true if this token is an dot identifier.
/// Return true if this token is a dot identifier.
bool startOfDot() const { return getFlag(StartOfDot); }
};

Expand Down
3 changes: 3 additions & 0 deletions clang/lib/DPCT/RulesAsm/Parser/AsmTokenKinds.def
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,9 @@ MODIFIER(idx, ".idx")
MODIFIER(bfly, ".bfly")
MODIFIER(sc, ".sc")
MODIFIER(gl, ".gl")
MODIFIER(L1, ".L1")
MODIFIER(L2, ".L2")


#undef LINKAGE
#undef TARGET
Expand Down
3 changes: 1 addition & 2 deletions clang/lib/DPCT/SrcAPI/APINames_ASM.inc
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,7 @@ ENTRY("not", "not", true, NO_FLAG, P1, "Successful")
ENTRY("or", "or", true, NO_FLAG, P1, "Successful")
ENTRY("pmevent", "pmevent", false, NO_FLAG, P1, "Comment")
ENTRY("popc", "popc", true, NO_FLAG, P1, "Successful")
ENTRY("prefetch", "prefetch", false, NO_FLAG, P1, "Comment")
ENTRY("prefetchu", "prefetchu", false, NO_FLAG, P1, "Comment")
ENTRY("prefetch", "prefetch", true, NO_FLAG, P1, "Partial")
ENTRY("prmt", "prmt", false, NO_FLAG, P1, "Comment")
ENTRY("rcp", "rcp", true, NO_FLAG, P1, "Successful")
ENTRY("red", "red", false, NO_FLAG, P1, "Comment")
Expand Down
45 changes: 45 additions & 0 deletions clang/test/dpct/asm/prefetch.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// UNSUPPORTED: cuda-8.0
// UNSUPPORTED: v8.0
// RUN: dpct --format-range=none -out-root %T/prefetch %s --use-experimental-features=prefetch --cuda-include-path="%cuda-path/include" -- -std=c++14 -x cuda --cuda-host-only
// RUN: FileCheck %s --match-full-lines --input-file %T/prefetch/prefetch.dp.cpp
// RUN: %if BUILD_LIT %{icpx -c -DBUILD_TEST -fsycl %T/prefetch/prefetch.dp.cpp -o %T/prefetch/prefetch.dp.o %}

// clang-format off
#include <cuda_runtime.h>

/*
Supported syntax:
-----------------
prefetch.level [a]; // prefetch to generic addr space cache
prefetch.global.level [a]; // prefetch to global cache

Unsupported syntax:
-------------------
prefetch.local.level
prefetch.global.level::eviction_priority [a]; // prefetch to data cache
prefetch{.tensormap_space}.tensormap [a]; // prefetch the tensormap

.level = { .L1, .L2 };
.level::eviction_priority = { .L2::evict_last, .L2::evict_normal };
.tensormap_space = { .const, .param };
*/

__global__ void prefetch(int *arr) {
/* prefetch of no address space */
// CHECK: sycl::ext::oneapi::experimental::prefetch(arr, sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::prefetch_hint_L1});
TejaX-Alaghari marked this conversation as resolved.
Show resolved Hide resolved
asm volatile ("prefetch.L1 [%0];" : : "l"(arr));
// CHECK: sycl::ext::oneapi::experimental::prefetch(arr, sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::prefetch_hint_L2});
asm volatile ("prefetch.L2 [%0];" : : "l"(arr));

/* prefetch of global address space */
// CHECK: sycl::ext::oneapi::experimental::prefetch(arr, sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::prefetch_hint_L1});
asm volatile ("prefetch.global.L1 [%0];" : : "l"(arr));
// CHECK: sycl::ext::oneapi::experimental::prefetch(arr, sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::prefetch_hint_L2});
asm volatile ("prefetch.global.L2 [%0];" : : "l"(arr));

/* using Register-Immediate (Displacement) address mode */
// CHECK: sycl::ext::oneapi::experimental::prefetch(((uint8_t *)((uintptr_t)arr + 2)), sycl::ext::oneapi::experimental::properties{sycl::ext::oneapi::experimental::prefetch_hint_L1});
asm volatile("prefetch.global.L1 [%0 + 2];" :: "l"(arr));
}

// clang-format on
40 changes: 40 additions & 0 deletions clang/test/dpct/asm/prefetch_default.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// UNSUPPORTED: cuda-8.0
TejaX-Alaghari marked this conversation as resolved.
Show resolved Hide resolved
// UNSUPPORTED: v8.0
// RUN: dpct --format-range=none -out-root %T/prefetch_default %s --cuda-include-path="%cuda-path/include" -- -std=c++14 -x cuda --cuda-host-only
// RUN: FileCheck %s --match-full-lines --input-file %T/prefetch_default/prefetch_default.dp.cpp
// RUN: %if BUILD_LIT %{icpx -c -DBUILD_TEST -fsycl %T/prefetch_default/prefetch_default.dp.cpp -o %T/prefetch_default/prefetch_default.dp.o %}

// clang-format off
#include <cuda_runtime.h>

__global__ void prefetch(int *arr) {
#ifndef BUILD_TEST
/* prefetch of no address space */
// CHECK: /*
// CHECK-NEXT: DPCT1053:{{.*}} Migration of device assembly code is not supported.
// CHECK-NEXT: */
asm volatile ("prefetch.L1 [%0];" : : "l"(arr));
// CHECK: /*
// CHECK-NEXT: DPCT1053:{{.*}} Migration of device assembly code is not supported.
// CHECK-NEXT: */
asm volatile ("prefetch.L2 [%0];" : : "l"(arr));

/* prefetch of global address space */
// CHECK: /*
// CHECK-NEXT: DPCT1053:{{.*}} Migration of device assembly code is not supported.
// CHECK-NEXT: */
asm volatile ("prefetch.global.L1 [%0];" : : "l"(arr));
// CHECK: /*
// CHECK-NEXT: DPCT1053:{{.*}} Migration of device assembly code is not supported.
// CHECK-NEXT: */
asm volatile ("prefetch.global.L2 [%0];" : : "l"(arr));

/* using Register-Immediate (Displacement) address mode */
// CHECK: /*
// CHECK-NEXT: DPCT1053:{{.*}} Migration of device assembly code is not supported.
// CHECK-NEXT: */
asm volatile("prefetch.global.L1 [%0 + 2];" :: "l"(arr));
#endif // BUILD_TEST
}

// clang-format on