diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index f617923670204..a63e1675e3b3b 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -1165,6 +1165,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (LangOpts.EnableDAEInSpirKernels) MPM.addPass(DeadArgumentEliminationSYCLPass()); + if (LangOpts.SYCLIsNativeCPU) + llvm::sycl::utils::addSYCLNativeCPUEarlyPasses(MPM); // Rerun aspect propagation without warning diagnostics. MPM.addPass( SYCLPropagateAspectsUsagePass(/*FP64ConvEmu=*/CodeGenOpts.FP64ConvEmu, diff --git a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp index 8079f9fa22e1b..7738a747eb78a 100644 --- a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +++ b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp @@ -31,7 +31,9 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h" #include "llvm/TargetParser/Triple.h" +#include #ifndef NDEBUG #include "llvm/IR/Verifier.h" #endif // NDEBUG @@ -366,6 +368,8 @@ class BinaryWrapper { /// Records all created memory buffers for safe auto-gc llvm::SmallVector, 4> AutoGcBufs; + std::optional SYCLNativeCPUPropSet = std::nullopt; + public: void addImage(const OffloadKind Kind, llvm::StringRef File, llvm::StringRef Manif, llvm::StringRef Tgt, @@ -649,16 +653,9 @@ class BinaryWrapper { } Function *addDeclarationForNativeCPU(StringRef Name) { - static FunctionType *NativeCPUFuncTy = FunctionType::get( + static FunctionType *FTy = FunctionType::get( Type::getVoidTy(C), {PointerType::getUnqual(C), PointerType::getUnqual(C)}, false); - static FunctionType *NativeCPUBuiltinTy = FunctionType::get( - PointerType::getUnqual(C), {PointerType::getUnqual(C)}, false); - FunctionType *FTy; - if (Name.starts_with("__dpcpp_nativecpu")) - FTy = NativeCPUBuiltinTy; - else - FTy = NativeCPUFuncTy; auto FCalle = M.getOrInsertFunction( sycl::utils::addSYCLNativeCPUSuffix(Name).str(), FTy); Function *F = dyn_cast(FCalle.getCallee()); @@ -668,16 +665,28 @@ class BinaryWrapper { } Expected> - addDeclarationsForNativeCPU(StringRef EntriesFile) { + addDeclarationsForNativeCPU(StringRef EntriesFile, + std::optional NativeCPUProps) { Expected MBOrErr = loadFile(EntriesFile); if (!MBOrErr) return MBOrErr.takeError(); MemoryBuffer *MB = *MBOrErr; - // the Native CPU PI Plug-in expects the BinaryStart field to point to an - // array of struct nativecpu_entry { + // the Native CPU UR adapter expects the BinaryStart field to point to + // + // struct nativecpu_program { + // nativecpu_entry *entries; + // ur_program_properties_t *properties; + // }; + // + // where "entries" is an array of: + // + // struct nativecpu_entry { // char *kernelname; // unsigned char *kernel_ptr; // }; + StructType *NCPUProgramT = StructType::create( + {PointerType::getUnqual(C), PointerType::getUnqual(C)}, + "nativecpu_program"); StructType *NCPUEntryT = StructType::create( {PointerType::getUnqual(C), PointerType::getUnqual(C)}, "__nativecpu_entry"); @@ -703,12 +712,40 @@ class BinaryWrapper { auto *GVar = new GlobalVariable(M, CA->getType(), true, GlobalVariable::InternalLinkage, CA, "__sycl_native_cpu_decls"); - auto *Begin = ConstantExpr::getGetElementPtr(GVar->getValueType(), GVar, - getSizetConstPair(0u, 0u)); - auto *End = ConstantExpr::getGetElementPtr( - GVar->getValueType(), GVar, - getSizetConstPair(0u, NativeCPUEntries.size())); - return std::make_pair(Begin, End); + auto *EntriesBegin = ConstantExpr::getGetElementPtr( + GVar->getValueType(), GVar, getSizetConstPair(0u, 0u)); + + // Add Native CPU specific properties to the nativecpu_program struct + Constant *PropValue = NullPtr; + if (NativeCPUProps.has_value()) { + auto PropsOrErr = addSYCLPropertySetToModule(*NativeCPUProps); + if (!PropsOrErr) + return PropsOrErr.takeError(); + auto *Category = addStringToModule( + sycl::PropSetRegTy::SYCL_NATIVE_CPU_PROPS, "SYCL_PropSetName"); + auto S = + ConstantStruct::get(getSyclPropSetTy(), Category, + PropsOrErr.get().first, PropsOrErr.get().second); + auto T = addStructArrayToModule({S}, getSyclPropSetTy()); + PropValue = T.first; + } + + // Create the nativecpu_program struct. + // We add it to a ConstantArray of length 1 because the SYCL runtime expects + // a non-zero sized binary image, and this allows it to point the end of the + // binary image to the end of the array. + auto *Program = + ConstantStruct::get(NCPUProgramT, {EntriesBegin, PropValue}); + ArrayType *ProgramATy = ArrayType::get(NCPUProgramT, 1); + Constant *CPA = ConstantArray::get(ProgramATy, {Program}); + auto *ProgramGVar = + new GlobalVariable(M, ProgramATy, true, GlobalVariable::InternalLinkage, + CPA, "__sycl_native_cpu_program"); + auto *ProgramBegin = ConstantExpr::getGetElementPtr( + ProgramGVar->getValueType(), ProgramGVar, getSizetConstPair(0u, 0u)); + auto *ProgramEnd = ConstantExpr::getGetElementPtr( + ProgramGVar->getValueType(), ProgramGVar, getSizetConstPair(0u, 1u)); + return std::make_pair(ProgramBegin, ProgramEnd); } // Adds a global readonly variable that is initialized by given data to the @@ -941,6 +978,12 @@ class BinaryWrapper { // the PropSetsInits for (const auto &PropSet : *PropRegistry) { // create content in the rightmost column and get begin/end pointers + if (PropSet.first == sycl::PropSetRegTy::SYCL_NATIVE_CPU_PROPS) { + // We don't emit Native CPU specific properties in this section, but + // instead we emit them in the native_cpu_entry struct directly. + SYCLNativeCPUPropSet = PropSet.second; + continue; + } Expected> Props = addSYCLPropertySetToModule(PropSet.second); if (!Props) @@ -1103,7 +1146,8 @@ class BinaryWrapper { } std::pair Fbin; if (Img.Tgt == "native_cpu") { - auto FBinOrErr = addDeclarationsForNativeCPU(Img.EntriesFile); + auto FBinOrErr = + addDeclarationsForNativeCPU(Img.EntriesFile, SYCLNativeCPUPropSet); if (!FBinOrErr) return FBinOrErr.takeError(); Fbin = *FBinOrErr; diff --git a/llvm/include/llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h b/llvm/include/llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h new file mode 100644 index 0000000000000..c6b70da4d2054 --- /dev/null +++ b/llvm/include/llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h @@ -0,0 +1,32 @@ +//===-- CheckNDRangeSYCLNativeCPU.h -Check if a kernel uses nd_range +// features--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Checks if the kernel uses features from nd_item such as: +// * local id +// * local range +// * local memory +// * work group barrier +//===----------------------------------------------------------------------===// + +#pragma once + +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class ModulePass; + +class CheckNDRangeSYCLNativeCPUPass + : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +}; + +} // namespace llvm diff --git a/llvm/include/llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h b/llvm/include/llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h index d1694a7b99696..308c0cba74351 100644 --- a/llvm/include/llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h +++ b/llvm/include/llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #pragma once #include "llvm/ADT/Twine.h" +#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/Passes/OptimizationLevel.h" @@ -18,6 +19,10 @@ namespace llvm { namespace sycl { namespace utils { +// Used to schedule passes in the device compiler cc1 invocation for +// Native CPU. +void addSYCLNativeCPUEarlyPasses(ModulePassManager &MPM); + void addSYCLNativeCPUBackendPasses(ModulePassManager &MPM, ModuleAnalysisManager &MAM, OptimizationLevel OptLevel); @@ -35,6 +40,8 @@ inline bool isSYCLNativeCPU(const Module &M) { return M.getModuleFlag("is-native-cpu") != nullptr; } +constexpr unsigned SyclNativeCpuLocalAS = 3; + } // namespace utils } // namespace sycl } // namespace llvm diff --git a/llvm/include/llvm/Support/PropertySetIO.h b/llvm/include/llvm/Support/PropertySetIO.h index 13cb687f3b08b..95f883ab3bea2 100644 --- a/llvm/include/llvm/Support/PropertySetIO.h +++ b/llvm/include/llvm/Support/PropertySetIO.h @@ -210,6 +210,7 @@ class PropertySetRegistry { static constexpr char SYCL_DEVICE_REQUIREMENTS[] = "SYCL/device requirements"; static constexpr char SYCL_HOST_PIPES[] = "SYCL/host pipes"; static constexpr char SYCL_VIRTUAL_FUNCTIONS[] = "SYCL/virtual functions"; + static constexpr char SYCL_NATIVE_CPU_PROPS[] = "SYCL/native cpu properties"; /// Function for bulk addition of an entire property set in the given /// \p Category . diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index cfea28538017c..a5380eaacf678 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -298,6 +298,13 @@ PropSetRegTy computeModuleProperties(const Module &M, PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(), *MaxLinearWGSize); } + + if (auto IsNDRange = + getKernelSingleEltMetadata(Func, "is_nd_range")) { + MetadataNames.push_back(Func.getName().str() + "@is_nd_range"); + PropSet.add(PropSetRegTy::SYCL_NATIVE_CPU_PROPS, MetadataNames.back(), + *IsNDRange); + } } // Add global_id_mapping information with mapping between device-global diff --git a/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt b/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt index bbfb74f7a3529..98c6439b983af 100644 --- a/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt +++ b/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_component_library(LLVMSYCLNativeCPUUtils ConvertToMuxBuiltinsSYCLNativeCPU.cpp FixABIMuxBuiltinsSYCLNativeCPU.cpp FAtomicsNativeCPU.cpp + CheckNDRangeSYCLNativeCPU.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/SYCLLowerIR diff --git a/llvm/lib/SYCLNativeCPUUtils/CheckNDRangeSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/CheckNDRangeSYCLNativeCPU.cpp new file mode 100644 index 0000000000000..5cbb201b86ed1 --- /dev/null +++ b/llvm/lib/SYCLNativeCPUUtils/CheckNDRangeSYCLNativeCPU.cpp @@ -0,0 +1,121 @@ +//- CheckNDRangeSYCLNativeCPU.cpp - Check if a kernel uses nd_range features -// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Checks if the kernel uses features from nd_item such as: +// * local id +// * local range +// * local memory +// * work group barrier +//===----------------------------------------------------------------------===// + +#include "llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h" +#include "llvm/ADT/PriorityWorklist.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" +#include "llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h" +#include "llvm/Support/Casting.h" + +using namespace llvm; + +static std::array NdBuiltins{ + "_Z23__spirv_WorkgroupSize_xv", "_Z23__spirv_WorkgroupSize_yv", + "_Z23__spirv_WorkgroupSize_zv", "_Z23__spirv_NumWorkgroups_xv", + "_Z23__spirv_NumWorkgroups_yv", "_Z23__spirv_NumWorkgroups_zv", + "_Z21__spirv_WorkgroupId_xv", "_Z21__spirv_WorkgroupId_yv", + "_Z21__spirv_WorkgroupId_zv", "_Z27__spirv_LocalInvocationId_xv", + "_Z27__spirv_LocalInvocationId_yv", "_Z27__spirv_LocalInvocationId_zv", + "_Z22__spirv_ControlBarrierjjj"}; + +static void addNDRangeMetadata(Function &F, bool Value) { + auto &Ctx = F.getContext(); + F.setMetadata("is_nd_range", + MDNode::get(Ctx, ConstantAsMetadata::get(ConstantInt::get( + Type::getInt1Ty(Ctx), Value)))); +} + +PreservedAnalyses +CheckNDRangeSYCLNativeCPUPass::run(Module &M, ModuleAnalysisManager &MAM) { + bool ModuleChanged = false; + SmallPtrSet NdFuncs; // Functions that use NDRange features + SmallPtrSet Visited; + SmallPriorityWorklist WorkList; + + // Add builtins to the set of functions that may use NDRange features + for (auto &FName : NdBuiltins) { + auto F = M.getFunction(FName); + if (F == nullptr) + continue; + WorkList.insert(F); + NdFuncs.insert(F); + } + + // Add users of local AS global var to the set of functions that may use + // NDRange features + for (auto &GV : M.globals()) { + if (GV.getAddressSpace() != sycl::utils::SyclNativeCpuLocalAS) + continue; + + for (auto U : GV.users()) { + if (auto I = dyn_cast(U)) { + auto F = I->getFunction(); + if (F != nullptr && NdFuncs.insert(F).second) { + WorkList.insert(F); + NdFuncs.insert(F); + } + } + } + } + + // Traverse the use chain to find Functions that may use NDRange features + // (or, recursively, Functions that call Functions that may use NDRange + // features) + while (!WorkList.empty()) { + auto F = WorkList.pop_back_val(); + + for (User *U : F->users()) { + if (auto CI = dyn_cast(U)) { + auto Caller = CI->getFunction(); + if (!Caller) + continue; + if (!Visited.contains(Caller)) { + WorkList.insert(Caller); + NdFuncs.insert(Caller); + } + } + } + Visited.insert(F); + } + + for (auto &F : M) { + if (F.getCallingConv() == llvm::CallingConv::SPIR_KERNEL) { + bool IsNDRange = false; + + // Check for local memory args + for (auto &A : F.args()) { + if (auto Ptr = dyn_cast(A.getType()); + Ptr && Ptr->getAddressSpace() == 3) { + IsNDRange = true; + } + } + + // Check if the kernel calls one of the ND Range builtins + IsNDRange |= NdFuncs.contains(&F); + + addNDRangeMetadata(F, IsNDRange); + ModuleChanged = true; + } + } + return ModuleChanged ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} diff --git a/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp index b30b6c41c2b99..78fece4bf5e32 100644 --- a/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp +++ b/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp @@ -11,6 +11,7 @@ // When NATIVECPU_USE_OCK is set, adds passes from the oneAPI Construction Kit. // //===----------------------------------------------------------------------===// +#include "llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h" #include "llvm/SYCLLowerIR/ConvertToMuxBuiltinsSYCLNativeCPU.h" #include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h" #include "llvm/SYCLLowerIR/FixABIMuxBuiltinsSYCLNativeCPU.h" @@ -130,3 +131,7 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses( MPM.addPass(DumpIR()); } } + +void llvm::sycl::utils::addSYCLNativeCPUEarlyPasses(ModulePassManager &MPM) { + MPM.addPass(CheckNDRangeSYCLNativeCPUPass()); +} diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index e9d52c5b23dac..43a669d7fadde 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -116,7 +116,7 @@ if(SYCL_UR_USE_FETCH_CONTENT) CACHE PATH "Path to external '${name}' adapter source dir" FORCE) endfunction() - set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime") + set(UNIFIED_RUNTIME_REPO "https://github.com/PietroGhg/unified-runtime.git") include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/UnifiedRuntimeTag.cmake) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index b5c82c3ead941..93e680c65e25c 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,7 +1 @@ -# commit db83117e830406b0d9950e24892dba868acba354 -# Merge: 0a90db9b c79df596 -# Author: Callum Fare -# Date: Wed Nov 27 16:04:19 2024 +0000 -# Merge pull request #2261 from againull/againull/2d_block_exp -# Add new device descriptor to query 2D block array capabilities of the Intel GPU -set(UNIFIED_RUNTIME_TAG db83117e830406b0d9950e24892dba868acba354) +set(UNIFIED_RUNTIME_TAG pietro/prop_ncpu_r_or_nd) diff --git a/sycl/test/check_device_code/native_cpu/nd_range_attr.cpp b/sycl/test/check_device_code/native_cpu/nd_range_attr.cpp new file mode 100644 index 0000000000000..a1ea1b5bbbd6d --- /dev/null +++ b/sycl/test/check_device_code/native_cpu/nd_range_attr.cpp @@ -0,0 +1,56 @@ +// REQUIRES: native_cpu +// RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu %s -S -o - | FileCheck %s +// RUN: %clangxx -fsycl-device-only -O0 -fsycl-targets=native_cpu %s -S -o - | FileCheck %s + +#include +using namespace sycl; + +class Test; +class Test2; +class Test3; +class Test4; + +template +void use_local_acc(nd_item<1> it, AccT &acc, + const local_accessor &local_acc) { + local_acc[it.get_local_id()[0]] = 1; + acc[it.get_local_id()[0]] = local_acc[0]; +} + +int main() { + sycl::queue deviceQueue; + sycl::nd_range<1> r(1, 1); + deviceQueue.submit([&](handler &h) { + h.parallel_for(r, [=](nd_item<1> it) { it.barrier(); }); + }); + // CHECK-DAG: @_ZTS4Test({{.*}} !is_nd_range [[MDID:![0-9]*]] + + int res = 0; + { + buffer buf(&res, 1); + deviceQueue.submit([&](handler &h) { + auto acc = buf.template get_access(h); + local_accessor local_acc(1, h); + h.parallel_for( + r, [=](nd_item<1> it) { use_local_acc(it, acc, local_acc); }); + }); + // CHECK-DAG: @_ZTS5Test2({{.*}} !is_nd_range [[MDID:![0-9]*]] + } + + deviceQueue.submit([&](handler &h) { + h.parallel_for(1, [=](item<1> it) { it.get_id(); }); + }); + // CHECK-DAG: @_ZTS5Test3({{.*}} !is_nd_range [[MDNOT:![0-9]*]] + + buffer buf(&res, 1); + deviceQueue.submit([&](sycl::handler &cgh) { + auto acc = sycl::accessor(buf, cgh, sycl::write_only); + cgh.parallel_for_work_group( + range<1>(1), range<1>(1), + [=](auto group) { acc[group.get_group_id()] = 42; }); + }); + // CHECK-DAG: @_ZTS5Test4({{.*}} !is_nd_range [[MDID:![0-9]*]] +} + +//CHECK:[[MDID]] = !{i1 true} +//CHECK:[[MDNOT]] = !{i1 false}