Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[SYCL][NATIVECPU] Add Native CPU specific property mechanism and nd_range property #16152

Open
wants to merge 6 commits into
base: sycl
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions clang/lib/CodeGen/BackendUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1165,6 +1165,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline(
if (LangOpts.EnableDAEInSpirKernels)
MPM.addPass(DeadArgumentEliminationSYCLPass());

if (LangOpts.SYCLIsNativeCPU)
llvm::sycl::utils::addSYCLNativeCPUEarlyPasses(MPM);
// Rerun aspect propagation without warning diagnostics.
MPM.addPass(
SYCLPropagateAspectsUsagePass(/*FP64ConvEmu=*/CodeGenOpts.FP64ConvEmu,
Expand Down
80 changes: 62 additions & 18 deletions clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h"
#include "llvm/TargetParser/Triple.h"
#include <optional>
#ifndef NDEBUG
#include "llvm/IR/Verifier.h"
#endif // NDEBUG
Expand Down Expand Up @@ -366,6 +368,8 @@ class BinaryWrapper {
/// Records all created memory buffers for safe auto-gc
llvm::SmallVector<std::unique_ptr<MemoryBuffer>, 4> AutoGcBufs;

std::optional<util::PropertySet> SYCLNativeCPUPropSet = std::nullopt;

public:
void addImage(const OffloadKind Kind, llvm::StringRef File,
llvm::StringRef Manif, llvm::StringRef Tgt,
Expand Down Expand Up @@ -649,16 +653,9 @@ class BinaryWrapper {
}

Function *addDeclarationForNativeCPU(StringRef Name) {
static FunctionType *NativeCPUFuncTy = FunctionType::get(
static FunctionType *FTy = FunctionType::get(
Type::getVoidTy(C),
{PointerType::getUnqual(C), PointerType::getUnqual(C)}, false);
static FunctionType *NativeCPUBuiltinTy = FunctionType::get(
PointerType::getUnqual(C), {PointerType::getUnqual(C)}, false);
FunctionType *FTy;
if (Name.starts_with("__dpcpp_nativecpu"))
FTy = NativeCPUBuiltinTy;
else
FTy = NativeCPUFuncTy;
auto FCalle = M.getOrInsertFunction(
sycl::utils::addSYCLNativeCPUSuffix(Name).str(), FTy);
Function *F = dyn_cast<Function>(FCalle.getCallee());
Expand All @@ -668,16 +665,28 @@ class BinaryWrapper {
}

Expected<std::pair<Constant *, Constant *>>
addDeclarationsForNativeCPU(StringRef EntriesFile) {
addDeclarationsForNativeCPU(StringRef EntriesFile,
std::optional<util::PropertySet> NativeCPUProps) {
Expected<MemoryBuffer *> MBOrErr = loadFile(EntriesFile);
if (!MBOrErr)
return MBOrErr.takeError();
MemoryBuffer *MB = *MBOrErr;
// the Native CPU PI Plug-in expects the BinaryStart field to point to an
// array of struct nativecpu_entry {
// the Native CPU UR adapter expects the BinaryStart field to point to
//
// struct nativecpu_program {
// nativecpu_entry *entries;
// ur_program_properties_t *properties;
// };
//
// where "entries" is an array of:
//
// struct nativecpu_entry {
// char *kernelname;
// unsigned char *kernel_ptr;
// };
StructType *NCPUProgramT = StructType::create(
{PointerType::getUnqual(C), PointerType::getUnqual(C)},
"nativecpu_program");
StructType *NCPUEntryT = StructType::create(
{PointerType::getUnqual(C), PointerType::getUnqual(C)},
"__nativecpu_entry");
Expand All @@ -703,12 +712,40 @@ class BinaryWrapper {
auto *GVar = new GlobalVariable(M, CA->getType(), true,
GlobalVariable::InternalLinkage, CA,
"__sycl_native_cpu_decls");
auto *Begin = ConstantExpr::getGetElementPtr(GVar->getValueType(), GVar,
getSizetConstPair(0u, 0u));
auto *End = ConstantExpr::getGetElementPtr(
GVar->getValueType(), GVar,
getSizetConstPair(0u, NativeCPUEntries.size()));
return std::make_pair(Begin, End);
auto *EntriesBegin = ConstantExpr::getGetElementPtr(
GVar->getValueType(), GVar, getSizetConstPair(0u, 0u));

// Add Native CPU specific properties to the nativecpu_program struct
Constant *PropValue = NullPtr;
if (NativeCPUProps.has_value()) {
auto PropsOrErr = addSYCLPropertySetToModule(*NativeCPUProps);
if (!PropsOrErr)
return PropsOrErr.takeError();
auto *Category = addStringToModule(
sycl::PropSetRegTy::SYCL_NATIVE_CPU_PROPS, "SYCL_PropSetName");
auto S =
ConstantStruct::get(getSyclPropSetTy(), Category,
PropsOrErr.get().first, PropsOrErr.get().second);
auto T = addStructArrayToModule({S}, getSyclPropSetTy());
PropValue = T.first;
}

// Create the nativecpu_program struct.
// We add it to a ConstantArray of length 1 because the SYCL runtime expects
// a non-zero sized binary image, and this allows it to point the end of the
// binary image to the end of the array.
auto *Program =
ConstantStruct::get(NCPUProgramT, {EntriesBegin, PropValue});
ArrayType *ProgramATy = ArrayType::get(NCPUProgramT, 1);
Constant *CPA = ConstantArray::get(ProgramATy, {Program});
auto *ProgramGVar =
new GlobalVariable(M, ProgramATy, true, GlobalVariable::InternalLinkage,
CPA, "__sycl_native_cpu_program");
auto *ProgramBegin = ConstantExpr::getGetElementPtr(
ProgramGVar->getValueType(), ProgramGVar, getSizetConstPair(0u, 0u));
auto *ProgramEnd = ConstantExpr::getGetElementPtr(
ProgramGVar->getValueType(), ProgramGVar, getSizetConstPair(0u, 1u));
return std::make_pair(ProgramBegin, ProgramEnd);
}

// Adds a global readonly variable that is initialized by given data to the
Expand Down Expand Up @@ -941,6 +978,12 @@ class BinaryWrapper {
// the PropSetsInits
for (const auto &PropSet : *PropRegistry) {
// create content in the rightmost column and get begin/end pointers
if (PropSet.first == sycl::PropSetRegTy::SYCL_NATIVE_CPU_PROPS) {
// We don't emit Native CPU specific properties in this section, but
// instead we emit them in the native_cpu_entry struct directly.
SYCLNativeCPUPropSet = PropSet.second;
continue;
}
Expected<std::pair<Constant *, Constant *>> Props =
addSYCLPropertySetToModule(PropSet.second);
if (!Props)
Expand Down Expand Up @@ -1103,7 +1146,8 @@ class BinaryWrapper {
}
std::pair<Constant *, Constant *> Fbin;
if (Img.Tgt == "native_cpu") {
auto FBinOrErr = addDeclarationsForNativeCPU(Img.EntriesFile);
auto FBinOrErr =
addDeclarationsForNativeCPU(Img.EntriesFile, SYCLNativeCPUPropSet);
if (!FBinOrErr)
return FBinOrErr.takeError();
Fbin = *FBinOrErr;
Expand Down
32 changes: 32 additions & 0 deletions llvm/include/llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
//===-- CheckNDRangeSYCLNativeCPU.h -Check if a kernel uses nd_range
// features--===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Checks if the kernel uses features from nd_item such as:
// * local id
// * local range
// * local memory
// * work group barrier
//===----------------------------------------------------------------------===//

#pragma once

#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"

namespace llvm {

class ModulePass;

class CheckNDRangeSYCLNativeCPUPass
: public PassInfoMixin<CheckNDRangeSYCLNativeCPUPass> {
public:
PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM);
};

} // namespace llvm
7 changes: 7 additions & 0 deletions llvm/include/llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,18 @@
//===----------------------------------------------------------------------===//
#pragma once
#include "llvm/ADT/Twine.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Passes/OptimizationLevel.h"

namespace llvm {
namespace sycl {
namespace utils {

// Used to schedule passes in the device compiler cc1 invocation for
// Native CPU.
void addSYCLNativeCPUEarlyPasses(ModulePassManager &MPM);

void addSYCLNativeCPUBackendPasses(ModulePassManager &MPM,
ModuleAnalysisManager &MAM,
OptimizationLevel OptLevel);
Expand All @@ -35,6 +40,8 @@ inline bool isSYCLNativeCPU(const Module &M) {
return M.getModuleFlag("is-native-cpu") != nullptr;
}

constexpr unsigned SyclNativeCpuLocalAS = 3;

} // namespace utils
} // namespace sycl
} // namespace llvm
1 change: 1 addition & 0 deletions llvm/include/llvm/Support/PropertySetIO.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,7 @@ class PropertySetRegistry {
static constexpr char SYCL_DEVICE_REQUIREMENTS[] = "SYCL/device requirements";
static constexpr char SYCL_HOST_PIPES[] = "SYCL/host pipes";
static constexpr char SYCL_VIRTUAL_FUNCTIONS[] = "SYCL/virtual functions";
static constexpr char SYCL_NATIVE_CPU_PROPS[] = "SYCL/native cpu properties";

/// Function for bulk addition of an entire property set in the given
/// \p Category .
Expand Down
7 changes: 7 additions & 0 deletions llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,13 @@ PropSetRegTy computeModuleProperties(const Module &M,
PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(),
*MaxLinearWGSize);
}

if (auto IsNDRange =
getKernelSingleEltMetadata<bool>(Func, "is_nd_range")) {
MetadataNames.push_back(Func.getName().str() + "@is_nd_range");
PropSet.add(PropSetRegTy::SYCL_NATIVE_CPU_PROPS, MetadataNames.back(),
*IsNDRange);
}
}

// Add global_id_mapping information with mapping between device-global
Expand Down
1 change: 1 addition & 0 deletions llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ add_llvm_component_library(LLVMSYCLNativeCPUUtils
ConvertToMuxBuiltinsSYCLNativeCPU.cpp
FixABIMuxBuiltinsSYCLNativeCPU.cpp
FAtomicsNativeCPU.cpp
CheckNDRangeSYCLNativeCPU.cpp

ADDITIONAL_HEADER_DIRS
${LLVM_MAIN_INCLUDE_DIR}/llvm/SYCLLowerIR
Expand Down
121 changes: 121 additions & 0 deletions llvm/lib/SYCLNativeCPUUtils/CheckNDRangeSYCLNativeCPU.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
//- CheckNDRangeSYCLNativeCPU.cpp - Check if a kernel uses nd_range features -//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
// Checks if the kernel uses features from nd_item such as:
// * local id
// * local range
// * local memory
// * work group barrier
//===----------------------------------------------------------------------===//

#include "llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h"
#include "llvm/ADT/PriorityWorklist.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/IR/CallingConv.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/DerivedTypes.h"
#include "llvm/IR/Function.h"
#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Instruction.h"
#include "llvm/IR/Instructions.h"
#include "llvm/IR/Metadata.h"
#include "llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h"
#include "llvm/Support/Casting.h"

using namespace llvm;

static std::array<const char *, 13> NdBuiltins{
"_Z23__spirv_WorkgroupSize_xv", "_Z23__spirv_WorkgroupSize_yv",
"_Z23__spirv_WorkgroupSize_zv", "_Z23__spirv_NumWorkgroups_xv",
"_Z23__spirv_NumWorkgroups_yv", "_Z23__spirv_NumWorkgroups_zv",
"_Z21__spirv_WorkgroupId_xv", "_Z21__spirv_WorkgroupId_yv",
"_Z21__spirv_WorkgroupId_zv", "_Z27__spirv_LocalInvocationId_xv",
"_Z27__spirv_LocalInvocationId_yv", "_Z27__spirv_LocalInvocationId_zv",
"_Z22__spirv_ControlBarrierjjj"};

static void addNDRangeMetadata(Function &F, bool Value) {
auto &Ctx = F.getContext();
F.setMetadata("is_nd_range",
MDNode::get(Ctx, ConstantAsMetadata::get(ConstantInt::get(
Type::getInt1Ty(Ctx), Value))));
}

PreservedAnalyses
CheckNDRangeSYCLNativeCPUPass::run(Module &M, ModuleAnalysisManager &MAM) {
bool ModuleChanged = false;
SmallPtrSet<Function *, 5> NdFuncs; // Functions that use NDRange features
SmallPtrSet<Function *, 5> Visited;
SmallPriorityWorklist<Function *, 5> WorkList;

// Add builtins to the set of functions that may use NDRange features
for (auto &FName : NdBuiltins) {
auto F = M.getFunction(FName);
if (F == nullptr)
continue;
WorkList.insert(F);
NdFuncs.insert(F);
}

// Add users of local AS global var to the set of functions that may use
// NDRange features
for (auto &GV : M.globals()) {
if (GV.getAddressSpace() != sycl::utils::SyclNativeCpuLocalAS)
continue;

for (auto U : GV.users()) {
if (auto I = dyn_cast<Instruction>(U)) {
auto F = I->getFunction();
if (F != nullptr && NdFuncs.insert(F).second) {
WorkList.insert(F);
NdFuncs.insert(F);
}
}
}
}

// Traverse the use chain to find Functions that may use NDRange features
// (or, recursively, Functions that call Functions that may use NDRange
// features)
while (!WorkList.empty()) {
auto F = WorkList.pop_back_val();

for (User *U : F->users()) {
if (auto CI = dyn_cast<CallInst>(U)) {
auto Caller = CI->getFunction();
if (!Caller)
continue;
if (!Visited.contains(Caller)) {
WorkList.insert(Caller);
NdFuncs.insert(Caller);
}
}
}
Visited.insert(F);
}

for (auto &F : M) {
if (F.getCallingConv() == llvm::CallingConv::SPIR_KERNEL) {
bool IsNDRange = false;

// Check for local memory args
for (auto &A : F.args()) {
if (auto Ptr = dyn_cast<PointerType>(A.getType());
Ptr && Ptr->getAddressSpace() == 3) {
IsNDRange = true;
}
}

// Check if the kernel calls one of the ND Range builtins
IsNDRange |= NdFuncs.contains(&F);

addNDRangeMetadata(F, IsNDRange);
ModuleChanged = true;
}
}
return ModuleChanged ? PreservedAnalyses::none() : PreservedAnalyses::all();
}
5 changes: 5 additions & 0 deletions llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
// When NATIVECPU_USE_OCK is set, adds passes from the oneAPI Construction Kit.
//
//===----------------------------------------------------------------------===//
#include "llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h"
#include "llvm/SYCLLowerIR/ConvertToMuxBuiltinsSYCLNativeCPU.h"
#include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h"
#include "llvm/SYCLLowerIR/FixABIMuxBuiltinsSYCLNativeCPU.h"
Expand Down Expand Up @@ -130,3 +131,7 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses(
MPM.addPass(DumpIR());
}
}

void llvm::sycl::utils::addSYCLNativeCPUEarlyPasses(ModulePassManager &MPM) {
MPM.addPass(CheckNDRangeSYCLNativeCPUPass());
}
2 changes: 1 addition & 1 deletion sycl/cmake/modules/FetchUnifiedRuntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ if(SYCL_UR_USE_FETCH_CONTENT)
CACHE PATH "Path to external '${name}' adapter source dir" FORCE)
endfunction()

set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime")
set(UNIFIED_RUNTIME_REPO "https://github.com/PietroGhg/unified-runtime.git")
include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/UnifiedRuntimeTag.cmake)

set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES")
Expand Down
8 changes: 1 addition & 7 deletions sycl/cmake/modules/UnifiedRuntimeTag.cmake
Original file line number Diff line number Diff line change
@@ -1,7 +1 @@
# commit db83117e830406b0d9950e24892dba868acba354
# Merge: 0a90db9b c79df596
# Author: Callum Fare <[email protected]>
# Date: Wed Nov 27 16:04:19 2024 +0000
# Merge pull request #2261 from againull/againull/2d_block_exp
# Add new device descriptor to query 2D block array capabilities of the Intel GPU
set(UNIFIED_RUNTIME_TAG db83117e830406b0d9950e24892dba868acba354)
set(UNIFIED_RUNTIME_TAG pietro/prop_ncpu_r_or_nd)
Loading