From e45aad466d01a6ba9dde2f1c5ce7985f9fffc2ad Mon Sep 17 00:00:00 2001 From: PietroGhg Date: Mon, 14 Oct 2024 08:59:34 +0100 Subject: [PATCH 1/5] Emit Native CPU properties indipendently --- clang/lib/CodeGen/BackendUtil.cpp | 6 ++ .../ClangOffloadWrapper.cpp | 66 +++++++++++----- .../SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h | 30 ++++++++ llvm/include/llvm/Support/PropertySetIO.h | 1 + .../SYCLLowerIR/ComputeModuleRuntimeInfo.cpp | 5 ++ llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt | 1 + .../CheckNDRangeSYCLNativeCPU.cpp | 75 +++++++++++++++++++ .../PipelineSYCLNativeCPU.cpp | 1 + sycl/cmake/modules/FetchUnifiedRuntime.cmake | 2 +- sycl/cmake/modules/UnifiedRuntimeTag.cmake | 8 +- .../native_cpu/nd_range_attr.cpp | 40 ++++++++++ 11 files changed, 210 insertions(+), 25 deletions(-) create mode 100644 llvm/include/llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h create mode 100644 llvm/lib/SYCLNativeCPUUtils/CheckNDRangeSYCLNativeCPU.cpp create mode 100644 sycl/test/check_device_code/native_cpu/nd_range_attr.cpp diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index f617923670204..72a636370aed7 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -47,6 +47,7 @@ #include "llvm/Passes/PassPlugin.h" #include "llvm/Passes/StandardInstrumentations.h" #include "llvm/ProfileData/InstrProfCorrelator.h" +#include "llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h" #include "llvm/SYCLLowerIR/CleanupSYCLMetadata.h" #include "llvm/SYCLLowerIR/CompileTimePropertiesPass.h" #include "llvm/SYCLLowerIR/ESIMD/ESIMDVerifier.h" @@ -1165,6 +1166,11 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (LangOpts.EnableDAEInSpirKernels) MPM.addPass(DeadArgumentEliminationSYCLPass()); + // We have to schedule the pass here because the native cpu pipeline + // is ran as part of a separate clang invocation, but we want the information + // in sycl-post-link. + if (LangOpts.SYCLIsNativeCPU) + MPM.addPass(CheckNDRangeSYCLNativeCPUPass()); // Rerun aspect propagation without warning diagnostics. MPM.addPass( SYCLPropagateAspectsUsagePass(/*FP64ConvEmu=*/CodeGenOpts.FP64ConvEmu, diff --git a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp index 8079f9fa22e1b..cbd88552d5d72 100644 --- a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +++ b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp @@ -31,7 +31,9 @@ #include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" +#include "llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h" #include "llvm/TargetParser/Triple.h" +#include #ifndef NDEBUG #include "llvm/IR/Verifier.h" #endif // NDEBUG @@ -366,6 +368,8 @@ class BinaryWrapper { /// Records all created memory buffers for safe auto-gc llvm::SmallVector, 4> AutoGcBufs; + std::optional SYCLNativeCPUPropSet = std::nullopt; + public: void addImage(const OffloadKind Kind, llvm::StringRef File, llvm::StringRef Manif, llvm::StringRef Tgt, @@ -649,16 +653,9 @@ class BinaryWrapper { } Function *addDeclarationForNativeCPU(StringRef Name) { - static FunctionType *NativeCPUFuncTy = FunctionType::get( + static FunctionType *FTy = FunctionType::get( Type::getVoidTy(C), {PointerType::getUnqual(C), PointerType::getUnqual(C)}, false); - static FunctionType *NativeCPUBuiltinTy = FunctionType::get( - PointerType::getUnqual(C), {PointerType::getUnqual(C)}, false); - FunctionType *FTy; - if (Name.starts_with("__dpcpp_nativecpu")) - FTy = NativeCPUBuiltinTy; - else - FTy = NativeCPUFuncTy; auto FCalle = M.getOrInsertFunction( sycl::utils::addSYCLNativeCPUSuffix(Name).str(), FTy); Function *F = dyn_cast(FCalle.getCallee()); @@ -668,16 +665,27 @@ class BinaryWrapper { } Expected> - addDeclarationsForNativeCPU(StringRef EntriesFile) { + addDeclarationsForNativeCPU(StringRef EntriesFile, std::optional NativeCPUProps) { Expected MBOrErr = loadFile(EntriesFile); if (!MBOrErr) return MBOrErr.takeError(); MemoryBuffer *MB = *MBOrErr; - // the Native CPU PI Plug-in expects the BinaryStart field to point to an - // array of struct nativecpu_entry { + // the Native CPU UR adapter expects the BinaryStart field to point to + // + // struct nativecpu_program { + // nativecpu_entry *entries; + // ur_program_properties_t *properties; + // }; + // + // where "entries" is an array of: + // + // struct nativecpu_entry { // char *kernelname; // unsigned char *kernel_ptr; // }; + StructType *NCPUProgramT = StructType::create( + {PointerType::getUnqual(C), PointerType::getUnqual(C)}, + "nativecpu_program"); StructType *NCPUEntryT = StructType::create( {PointerType::getUnqual(C), PointerType::getUnqual(C)}, "__nativecpu_entry"); @@ -703,12 +711,30 @@ class BinaryWrapper { auto *GVar = new GlobalVariable(M, CA->getType(), true, GlobalVariable::InternalLinkage, CA, "__sycl_native_cpu_decls"); - auto *Begin = ConstantExpr::getGetElementPtr(GVar->getValueType(), GVar, + auto *EntriesBegin = ConstantExpr::getGetElementPtr(GVar->getValueType(), GVar, + getSizetConstPair(0u, 0u)); + Constant *PropValue = NullPtr; + if (NativeCPUProps.has_value()) { + auto PropsOrErr = addSYCLPropertySetToModule(*NativeCPUProps); + if (!PropsOrErr) + return PropsOrErr.takeError(); + auto *Category = addStringToModule(sycl::PropSetRegTy::SYCL_NATIVE_CPU_PROPS, "SYCL_PropSetName"); + auto S = ConstantStruct::get( + getSyclPropSetTy(), Category, PropsOrErr.get().first, PropsOrErr.get().second); + auto T = addStructArrayToModule({S}, getSyclPropSetTy()); + PropValue = T.first; + } + auto *Program = ConstantStruct::get(NCPUProgramT, {EntriesBegin, PropValue}); + ArrayType *ProgramATy = ArrayType::get(NCPUProgramT, 1); + Constant *CPA = ConstantArray::get(ProgramATy, {Program}); + auto *ProgramGVar = new GlobalVariable(M, ProgramATy, true, + GlobalVariable::InternalLinkage, CPA, + "__sycl_native_cpu_program"); + auto *ProgramBegin = ConstantExpr::getGetElementPtr(ProgramGVar->getValueType(), ProgramGVar, getSizetConstPair(0u, 0u)); - auto *End = ConstantExpr::getGetElementPtr( - GVar->getValueType(), GVar, - getSizetConstPair(0u, NativeCPUEntries.size())); - return std::make_pair(Begin, End); + auto *ProgramEnd = ConstantExpr::getGetElementPtr(ProgramGVar->getValueType(), ProgramGVar, + getSizetConstPair(0u, 1u)); + return std::make_pair(ProgramBegin, ProgramEnd); } // Adds a global readonly variable that is initialized by given data to the @@ -941,6 +967,12 @@ class BinaryWrapper { // the PropSetsInits for (const auto &PropSet : *PropRegistry) { // create content in the rightmost column and get begin/end pointers + if (PropSet.first == sycl::PropSetRegTy::SYCL_NATIVE_CPU_PROPS) { + // We don't emit Native CPU specific properties in this section, but instead + // we emit them in the native_cpu_entry struct directly. + SYCLNativeCPUPropSet = PropSet.second; + continue; + } Expected> Props = addSYCLPropertySetToModule(PropSet.second); if (!Props) @@ -1103,7 +1135,7 @@ class BinaryWrapper { } std::pair Fbin; if (Img.Tgt == "native_cpu") { - auto FBinOrErr = addDeclarationsForNativeCPU(Img.EntriesFile); + auto FBinOrErr = addDeclarationsForNativeCPU(Img.EntriesFile, SYCLNativeCPUPropSet); if (!FBinOrErr) return FBinOrErr.takeError(); Fbin = *FBinOrErr; diff --git a/llvm/include/llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h b/llvm/include/llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h new file mode 100644 index 0000000000000..9e0f63669b1d6 --- /dev/null +++ b/llvm/include/llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h @@ -0,0 +1,30 @@ +//===-- CheckNDRangeSYCLNativeCPU.h -Check if a kernel uses nd_range features--===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// A transformation pass that: +// * Handles the kernel calling convention and attributes. +// * Materializes the spirv builtins so that they can be handled by the host +// runtime. +//===----------------------------------------------------------------------===// + +#pragma once + +#include "llvm/IR/Module.h" +#include "llvm/IR/PassManager.h" + +namespace llvm { + +class ModulePass; + +class CheckNDRangeSYCLNativeCPUPass + : public PassInfoMixin { +public: + PreservedAnalyses run(Module &M, ModuleAnalysisManager &MAM); +}; + +} // namespace llvm diff --git a/llvm/include/llvm/Support/PropertySetIO.h b/llvm/include/llvm/Support/PropertySetIO.h index 13cb687f3b08b..95f883ab3bea2 100644 --- a/llvm/include/llvm/Support/PropertySetIO.h +++ b/llvm/include/llvm/Support/PropertySetIO.h @@ -210,6 +210,7 @@ class PropertySetRegistry { static constexpr char SYCL_DEVICE_REQUIREMENTS[] = "SYCL/device requirements"; static constexpr char SYCL_HOST_PIPES[] = "SYCL/host pipes"; static constexpr char SYCL_VIRTUAL_FUNCTIONS[] = "SYCL/virtual functions"; + static constexpr char SYCL_NATIVE_CPU_PROPS[] = "SYCL/native cpu properties"; /// Function for bulk addition of an entire property set in the given /// \p Category . diff --git a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp index cc287b9101fa8..3d55e883c7c98 100644 --- a/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp +++ b/llvm/lib/SYCLLowerIR/ComputeModuleRuntimeInfo.cpp @@ -307,6 +307,11 @@ PropSetRegTy computeModuleProperties(const Module &M, PropSet.add(PropSetRegTy::SYCL_PROGRAM_METADATA, MetadataNames.back(), *MaxLinearWGSize); } + + if (auto IsNDRange = getKernelSingleEltMetadata(Func, "is_nd_range")) { + MetadataNames.push_back(Func.getName().str() + "@is_nd_range"); + PropSet.add(PropSetRegTy::SYCL_NATIVE_CPU_PROPS, MetadataNames.back(), *IsNDRange); + } } // Add global_id_mapping information with mapping between device-global diff --git a/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt b/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt index bbfb74f7a3529..98c6439b983af 100644 --- a/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt +++ b/llvm/lib/SYCLNativeCPUUtils/CMakeLists.txt @@ -5,6 +5,7 @@ add_llvm_component_library(LLVMSYCLNativeCPUUtils ConvertToMuxBuiltinsSYCLNativeCPU.cpp FixABIMuxBuiltinsSYCLNativeCPU.cpp FAtomicsNativeCPU.cpp + CheckNDRangeSYCLNativeCPU.cpp ADDITIONAL_HEADER_DIRS ${LLVM_MAIN_INCLUDE_DIR}/llvm/SYCLLowerIR diff --git a/llvm/lib/SYCLNativeCPUUtils/CheckNDRangeSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/CheckNDRangeSYCLNativeCPU.cpp new file mode 100644 index 0000000000000..01e1f8324fad9 --- /dev/null +++ b/llvm/lib/SYCLNativeCPUUtils/CheckNDRangeSYCLNativeCPU.cpp @@ -0,0 +1,75 @@ +//===------ PrepareSYCLNativeCPU.cpp - SYCL Native CPU Preparation Pass ---===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// Checks if the kernel uses features from nd_item such as: +// * local id +// * local range +// * local memory +// * work group barrier +//===----------------------------------------------------------------------===// + +#include "llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" + +using namespace llvm; + +// TODO: add other bts +static std::array ndFunctions{ + "_Z23__spirv_WorkgroupSize_xv", "_Z23__spirv_NumWorkgroups_xv", + "_Z21__spirv_WorkgroupId_xv", "_Z27__spirv_LocalInvocationId_xv", + "_Z22__spirv_ControlBarrierjjj"}; + +static void addNDRangeMetadata(Function &F, bool Value) { + auto &Ctx = F.getContext(); + F.setMetadata("is_nd_range", + MDNode::get(Ctx, ConstantAsMetadata::get(ConstantInt::get( + Type::getInt1Ty(Ctx), Value)))); +} + +PreservedAnalyses +CheckNDRangeSYCLNativeCPUPass::run(Module &M, ModuleAnalysisManager &MAM) { + bool ModuleChanged = false; + + for (auto &F : M) { + if (F.getCallingConv() == llvm::CallingConv::SPIR_KERNEL) { + bool IsNDRange = false; + + // Check for local memory args + for (auto &A : F.args()) { + if (auto Ptr = dyn_cast(A.getType()); + Ptr && Ptr->getAddressSpace() == 3) { + IsNDRange = true; + } + } + + for (auto &BB : F) { + for (auto &I : BB) { + if (auto CI = dyn_cast(&I)) { + auto CalleeName = CI->getCalledFunction()->getName(); + if (std::find(ndFunctions.begin(), ndFunctions.end(), CalleeName) != + ndFunctions.end()) { + IsNDRange = true; + break; + } + } + } + if (IsNDRange) { + break; + } + } + + addNDRangeMetadata(F, IsNDRange); + } + } + return ModuleChanged ? PreservedAnalyses::none() : PreservedAnalyses::all(); +} diff --git a/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp index b30b6c41c2b99..5de9153ca54a9 100644 --- a/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp +++ b/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp @@ -11,6 +11,7 @@ // When NATIVECPU_USE_OCK is set, adds passes from the oneAPI Construction Kit. // //===----------------------------------------------------------------------===// +#include "llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h" #include "llvm/SYCLLowerIR/ConvertToMuxBuiltinsSYCLNativeCPU.h" #include "llvm/SYCLLowerIR/FAtomicsNativeCPU.h" #include "llvm/SYCLLowerIR/FixABIMuxBuiltinsSYCLNativeCPU.h" diff --git a/sycl/cmake/modules/FetchUnifiedRuntime.cmake b/sycl/cmake/modules/FetchUnifiedRuntime.cmake index 72841724fa01d..43a669d7fadde 100644 --- a/sycl/cmake/modules/FetchUnifiedRuntime.cmake +++ b/sycl/cmake/modules/FetchUnifiedRuntime.cmake @@ -116,7 +116,7 @@ if(SYCL_UR_USE_FETCH_CONTENT) CACHE PATH "Path to external '${name}' adapter source dir" FORCE) endfunction() - set(UNIFIED_RUNTIME_REPO "https://github.com/oneapi-src/unified-runtime.git") + set(UNIFIED_RUNTIME_REPO "https://github.com/PietroGhg/unified-runtime.git") include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/UnifiedRuntimeTag.cmake) set(UMF_BUILD_EXAMPLES OFF CACHE INTERNAL "EXAMPLES") diff --git a/sycl/cmake/modules/UnifiedRuntimeTag.cmake b/sycl/cmake/modules/UnifiedRuntimeTag.cmake index 95a7b4dc86bce..93e680c65e25c 100644 --- a/sycl/cmake/modules/UnifiedRuntimeTag.cmake +++ b/sycl/cmake/modules/UnifiedRuntimeTag.cmake @@ -1,7 +1 @@ -# commit 9937d029c7fdcbf101e89f8515f640c145e059c5 -# Merge: 9ac6d5d9 10b0e101 -# Author: Callum Fare -# Date: Wed Nov 20 14:49:17 2024 +0000 -# Merge pull request #2258 from aarongreig/aaron/tryUseExtensionSubgroupInfo -# Use extension version of clGetKernelSubGroupInfo when necessary. -set(UNIFIED_RUNTIME_TAG 9937d029c7fdcbf101e89f8515f640c145e059c5) +set(UNIFIED_RUNTIME_TAG pietro/prop_ncpu_r_or_nd) diff --git a/sycl/test/check_device_code/native_cpu/nd_range_attr.cpp b/sycl/test/check_device_code/native_cpu/nd_range_attr.cpp new file mode 100644 index 0000000000000..cc146b067776d --- /dev/null +++ b/sycl/test/check_device_code/native_cpu/nd_range_attr.cpp @@ -0,0 +1,40 @@ +// REQUIRES: native_cpu +// RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu %s -S -o - | FileCheck %s + +#include +using namespace sycl; + +class Test; +class Test2; +class Test3; + +int main() { + sycl::queue deviceQueue; + sycl::nd_range<1> r(1, 1); + deviceQueue.submit([&](handler &h) { + h.parallel_for(r, [=](nd_item<1> it) { it.barrier(); }); + }); + // CHECK-DAG: @_ZTS4Test({{.*}} !is_nd_range [[MDID:![0-9]*]] + + int res = 0; + { + buffer buf(&res, 1); + deviceQueue.submit([&](handler &h) { + auto acc = buf.template get_access(h); + local_accessor local_acc(1, h); + h.parallel_for(r, [=](nd_item<1> it) { + local_acc[0] = 1; + acc[0] = local_acc[0]; + }); + }); + // CHECK-DAG: @_ZTS5Test2({{.*}} !is_nd_range [[MDID:![0-9]*]] + } + deviceQueue.submit([&](handler &h) { + h.parallel_for(1, [=](item<1> it) { it.get_id(); }); + }); + // CHECK-DAG: @_ZTS5Test3({{.*}} !is_nd_range [[MDNOT:![0-9]*]] + +} + +//CHECK:[[MDID]] = !{i1 true} +//CHECK:[[MDNOT]] = !{i1 false} From 21b4a94fe157b2ad9e7bdf2ded42f336ab1648e2 Mon Sep 17 00:00:00 2001 From: PietroGhg Date: Wed, 6 Nov 2024 15:52:57 +0000 Subject: [PATCH 2/5] Review comments --- clang/lib/CodeGen/BackendUtil.cpp | 6 +---- .../ClangOffloadWrapper.cpp | 7 ++++++ .../llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h | 6 +++++ .../CheckNDRangeSYCLNativeCPU.cpp | 23 +++++++++++++------ .../PipelineSYCLNativeCPU.cpp | 4 ++++ 5 files changed, 34 insertions(+), 12 deletions(-) diff --git a/clang/lib/CodeGen/BackendUtil.cpp b/clang/lib/CodeGen/BackendUtil.cpp index 72a636370aed7..a63e1675e3b3b 100644 --- a/clang/lib/CodeGen/BackendUtil.cpp +++ b/clang/lib/CodeGen/BackendUtil.cpp @@ -47,7 +47,6 @@ #include "llvm/Passes/PassPlugin.h" #include "llvm/Passes/StandardInstrumentations.h" #include "llvm/ProfileData/InstrProfCorrelator.h" -#include "llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h" #include "llvm/SYCLLowerIR/CleanupSYCLMetadata.h" #include "llvm/SYCLLowerIR/CompileTimePropertiesPass.h" #include "llvm/SYCLLowerIR/ESIMD/ESIMDVerifier.h" @@ -1166,11 +1165,8 @@ void EmitAssemblyHelper::RunOptimizationPipeline( if (LangOpts.EnableDAEInSpirKernels) MPM.addPass(DeadArgumentEliminationSYCLPass()); - // We have to schedule the pass here because the native cpu pipeline - // is ran as part of a separate clang invocation, but we want the information - // in sycl-post-link. if (LangOpts.SYCLIsNativeCPU) - MPM.addPass(CheckNDRangeSYCLNativeCPUPass()); + llvm::sycl::utils::addSYCLNativeCPUEarlyPasses(MPM); // Rerun aspect propagation without warning diagnostics. MPM.addPass( SYCLPropagateAspectsUsagePass(/*FP64ConvEmu=*/CodeGenOpts.FP64ConvEmu, diff --git a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp index cbd88552d5d72..b6184c4494a90 100644 --- a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +++ b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp @@ -713,6 +713,8 @@ class BinaryWrapper { "__sycl_native_cpu_decls"); auto *EntriesBegin = ConstantExpr::getGetElementPtr(GVar->getValueType(), GVar, getSizetConstPair(0u, 0u)); + + // Add Native CPU specific properties to the nativecpu_program struct Constant *PropValue = NullPtr; if (NativeCPUProps.has_value()) { auto PropsOrErr = addSYCLPropertySetToModule(*NativeCPUProps); @@ -724,6 +726,11 @@ class BinaryWrapper { auto T = addStructArrayToModule({S}, getSyclPropSetTy()); PropValue = T.first; } + + // Create the nativecpu_program struct. + // We add it to a ConstantArray of length 1 because the SYCL runtime expects a + // non-zero sized binary image, and this allows it to point the end of the binary + // image to the end of the array. auto *Program = ConstantStruct::get(NCPUProgramT, {EntriesBegin, PropValue}); ArrayType *ProgramATy = ArrayType::get(NCPUProgramT, 1); Constant *CPA = ConstantArray::get(ProgramATy, {Program}); diff --git a/llvm/include/llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h b/llvm/include/llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h index d1694a7b99696..00c582854220d 100644 --- a/llvm/include/llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h +++ b/llvm/include/llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h @@ -11,6 +11,7 @@ //===----------------------------------------------------------------------===// #pragma once #include "llvm/ADT/Twine.h" +#include "llvm/IR/Module.h" #include "llvm/IR/PassManager.h" #include "llvm/Passes/OptimizationLevel.h" @@ -18,6 +19,11 @@ namespace llvm { namespace sycl { namespace utils { + +// Used to schedule passes in the device compiler cc1 invocation for +// Native CPU. +void addSYCLNativeCPUEarlyPasses(ModulePassManager &MPM); + void addSYCLNativeCPUBackendPasses(ModulePassManager &MPM, ModuleAnalysisManager &MAM, OptimizationLevel OptLevel); diff --git a/llvm/lib/SYCLNativeCPUUtils/CheckNDRangeSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/CheckNDRangeSYCLNativeCPU.cpp index 01e1f8324fad9..d8fbaca25a14c 100644 --- a/llvm/lib/SYCLNativeCPUUtils/CheckNDRangeSYCLNativeCPU.cpp +++ b/llvm/lib/SYCLNativeCPUUtils/CheckNDRangeSYCLNativeCPU.cpp @@ -1,4 +1,4 @@ -//===------ PrepareSYCLNativeCPU.cpp - SYCL Native CPU Preparation Pass ---===// +//- CheckNDRangeSYCLNativeCPU.cpp - Check if a kernel uses nd_range features -// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -23,10 +23,19 @@ using namespace llvm; -// TODO: add other bts -static std::array ndFunctions{ - "_Z23__spirv_WorkgroupSize_xv", "_Z23__spirv_NumWorkgroups_xv", - "_Z21__spirv_WorkgroupId_xv", "_Z27__spirv_LocalInvocationId_xv", +static std::array NdFunctions{ + "_Z23__spirv_WorkgroupSize_xv", + "_Z23__spirv_WorkgroupSize_yv", + "_Z23__spirv_WorkgroupSize_zv", + "_Z23__spirv_NumWorkgroups_xv", + "_Z23__spirv_NumWorkgroups_yv", + "_Z23__spirv_NumWorkgroups_zv", + "_Z21__spirv_WorkgroupId_xv", + "_Z21__spirv_WorkgroupId_yv", + "_Z21__spirv_WorkgroupId_zv", + "_Z27__spirv_LocalInvocationId_xv", + "_Z27__spirv_LocalInvocationId_yv", + "_Z27__spirv_LocalInvocationId_zv", "_Z22__spirv_ControlBarrierjjj"}; static void addNDRangeMetadata(Function &F, bool Value) { @@ -56,8 +65,8 @@ CheckNDRangeSYCLNativeCPUPass::run(Module &M, ModuleAnalysisManager &MAM) { for (auto &I : BB) { if (auto CI = dyn_cast(&I)) { auto CalleeName = CI->getCalledFunction()->getName(); - if (std::find(ndFunctions.begin(), ndFunctions.end(), CalleeName) != - ndFunctions.end()) { + if (std::find(NdFunctions.begin(), NdFunctions.end(), CalleeName) != + NdFunctions.end()) { IsNDRange = true; break; } diff --git a/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp index 5de9153ca54a9..050413cdaac13 100644 --- a/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp +++ b/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp @@ -131,3 +131,7 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses( MPM.addPass(DumpIR()); } } + +void llvm::sycl::utils::addSYCLNativeCPUEarlyPasses(ModulePassManager &MPM) { + MPM.addPass(CheckNDRangeSYCLNativeCPUPass()); +} From 8c344fdf91a47270bf0f4405fcb3170d5db251f3 Mon Sep 17 00:00:00 2001 From: PietroGhg Date: Wed, 6 Nov 2024 15:53:35 +0000 Subject: [PATCH 3/5] Formatting --- .../ClangOffloadWrapper.cpp | 6 +++--- .../llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h | 1 - .../CheckNDRangeSYCLNativeCPU.cpp | 18 ++++++------------ .../PipelineSYCLNativeCPU.cpp | 2 +- 4 files changed, 10 insertions(+), 17 deletions(-) diff --git a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp index b6184c4494a90..0645c425eb386 100644 --- a/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp +++ b/clang/tools/clang-offload-wrapper/ClangOffloadWrapper.cpp @@ -728,9 +728,9 @@ class BinaryWrapper { } // Create the nativecpu_program struct. - // We add it to a ConstantArray of length 1 because the SYCL runtime expects a - // non-zero sized binary image, and this allows it to point the end of the binary - // image to the end of the array. + // We add it to a ConstantArray of length 1 because the SYCL runtime expects + // a non-zero sized binary image, and this allows it to point the end of the + // binary image to the end of the array. auto *Program = ConstantStruct::get(NCPUProgramT, {EntriesBegin, PropValue}); ArrayType *ProgramATy = ArrayType::get(NCPUProgramT, 1); Constant *CPA = ConstantArray::get(ProgramATy, {Program}); diff --git a/llvm/include/llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h b/llvm/include/llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h index 00c582854220d..26cdfe7c7f917 100644 --- a/llvm/include/llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h +++ b/llvm/include/llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h @@ -19,7 +19,6 @@ namespace llvm { namespace sycl { namespace utils { - // Used to schedule passes in the device compiler cc1 invocation for // Native CPU. void addSYCLNativeCPUEarlyPasses(ModulePassManager &MPM); diff --git a/llvm/lib/SYCLNativeCPUUtils/CheckNDRangeSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/CheckNDRangeSYCLNativeCPU.cpp index d8fbaca25a14c..87a9876e96c41 100644 --- a/llvm/lib/SYCLNativeCPUUtils/CheckNDRangeSYCLNativeCPU.cpp +++ b/llvm/lib/SYCLNativeCPUUtils/CheckNDRangeSYCLNativeCPU.cpp @@ -24,18 +24,12 @@ using namespace llvm; static std::array NdFunctions{ - "_Z23__spirv_WorkgroupSize_xv", - "_Z23__spirv_WorkgroupSize_yv", - "_Z23__spirv_WorkgroupSize_zv", - "_Z23__spirv_NumWorkgroups_xv", - "_Z23__spirv_NumWorkgroups_yv", - "_Z23__spirv_NumWorkgroups_zv", - "_Z21__spirv_WorkgroupId_xv", - "_Z21__spirv_WorkgroupId_yv", - "_Z21__spirv_WorkgroupId_zv", - "_Z27__spirv_LocalInvocationId_xv", - "_Z27__spirv_LocalInvocationId_yv", - "_Z27__spirv_LocalInvocationId_zv", + "_Z23__spirv_WorkgroupSize_xv", "_Z23__spirv_WorkgroupSize_yv", + "_Z23__spirv_WorkgroupSize_zv", "_Z23__spirv_NumWorkgroups_xv", + "_Z23__spirv_NumWorkgroups_yv", "_Z23__spirv_NumWorkgroups_zv", + "_Z21__spirv_WorkgroupId_xv", "_Z21__spirv_WorkgroupId_yv", + "_Z21__spirv_WorkgroupId_zv", "_Z27__spirv_LocalInvocationId_xv", + "_Z27__spirv_LocalInvocationId_yv", "_Z27__spirv_LocalInvocationId_zv", "_Z22__spirv_ControlBarrierjjj"}; static void addNDRangeMetadata(Function &F, bool Value) { diff --git a/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp index 050413cdaac13..78fece4bf5e32 100644 --- a/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp +++ b/llvm/lib/SYCLNativeCPUUtils/PipelineSYCLNativeCPU.cpp @@ -133,5 +133,5 @@ void llvm::sycl::utils::addSYCLNativeCPUBackendPasses( } void llvm::sycl::utils::addSYCLNativeCPUEarlyPasses(ModulePassManager &MPM) { - MPM.addPass(CheckNDRangeSYCLNativeCPUPass()); + MPM.addPass(CheckNDRangeSYCLNativeCPUPass()); } From 1d86faaf6e8ec07928ef0ab3896398f4f26e274a Mon Sep 17 00:00:00 2001 From: PietroGhg Date: Wed, 20 Nov 2024 15:08:09 +0000 Subject: [PATCH 4/5] Check for function calls and local as global variables --- .../SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h | 9 ++- .../llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h | 2 + .../CheckNDRangeSYCLNativeCPU.cpp | 75 +++++++++++++++---- .../native_cpu/nd_range_attr.cpp | 24 +++++- 4 files changed, 86 insertions(+), 24 deletions(-) diff --git a/llvm/include/llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h b/llvm/include/llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h index 9e0f63669b1d6..6b2c7a67eac40 100644 --- a/llvm/include/llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h +++ b/llvm/include/llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h @@ -6,10 +6,11 @@ // //===----------------------------------------------------------------------===// // -// A transformation pass that: -// * Handles the kernel calling convention and attributes. -// * Materializes the spirv builtins so that they can be handled by the host -// runtime. +// Checks if the kernel uses features from nd_item such as: +// * local id +// * local range +// * local memory +// * work group barrier //===----------------------------------------------------------------------===// #pragma once diff --git a/llvm/include/llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h b/llvm/include/llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h index 26cdfe7c7f917..308c0cba74351 100644 --- a/llvm/include/llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h +++ b/llvm/include/llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h @@ -40,6 +40,8 @@ inline bool isSYCLNativeCPU(const Module &M) { return M.getModuleFlag("is-native-cpu") != nullptr; } +constexpr unsigned SyclNativeCpuLocalAS = 3; + } // namespace utils } // namespace sycl } // namespace llvm diff --git a/llvm/lib/SYCLNativeCPUUtils/CheckNDRangeSYCLNativeCPU.cpp b/llvm/lib/SYCLNativeCPUUtils/CheckNDRangeSYCLNativeCPU.cpp index 87a9876e96c41..5cbb201b86ed1 100644 --- a/llvm/lib/SYCLNativeCPUUtils/CheckNDRangeSYCLNativeCPU.cpp +++ b/llvm/lib/SYCLNativeCPUUtils/CheckNDRangeSYCLNativeCPU.cpp @@ -14,16 +14,22 @@ //===----------------------------------------------------------------------===// #include "llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h" +#include "llvm/ADT/PriorityWorklist.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Metadata.h" +#include "llvm/SYCLLowerIR/UtilsSYCLNativeCPU.h" +#include "llvm/Support/Casting.h" using namespace llvm; -static std::array NdFunctions{ +static std::array NdBuiltins{ "_Z23__spirv_WorkgroupSize_xv", "_Z23__spirv_WorkgroupSize_yv", "_Z23__spirv_WorkgroupSize_zv", "_Z23__spirv_NumWorkgroups_xv", "_Z23__spirv_NumWorkgroups_yv", "_Z23__spirv_NumWorkgroups_zv", @@ -42,6 +48,55 @@ static void addNDRangeMetadata(Function &F, bool Value) { PreservedAnalyses CheckNDRangeSYCLNativeCPUPass::run(Module &M, ModuleAnalysisManager &MAM) { bool ModuleChanged = false; + SmallPtrSet NdFuncs; // Functions that use NDRange features + SmallPtrSet Visited; + SmallPriorityWorklist WorkList; + + // Add builtins to the set of functions that may use NDRange features + for (auto &FName : NdBuiltins) { + auto F = M.getFunction(FName); + if (F == nullptr) + continue; + WorkList.insert(F); + NdFuncs.insert(F); + } + + // Add users of local AS global var to the set of functions that may use + // NDRange features + for (auto &GV : M.globals()) { + if (GV.getAddressSpace() != sycl::utils::SyclNativeCpuLocalAS) + continue; + + for (auto U : GV.users()) { + if (auto I = dyn_cast(U)) { + auto F = I->getFunction(); + if (F != nullptr && NdFuncs.insert(F).second) { + WorkList.insert(F); + NdFuncs.insert(F); + } + } + } + } + + // Traverse the use chain to find Functions that may use NDRange features + // (or, recursively, Functions that call Functions that may use NDRange + // features) + while (!WorkList.empty()) { + auto F = WorkList.pop_back_val(); + + for (User *U : F->users()) { + if (auto CI = dyn_cast(U)) { + auto Caller = CI->getFunction(); + if (!Caller) + continue; + if (!Visited.contains(Caller)) { + WorkList.insert(Caller); + NdFuncs.insert(Caller); + } + } + } + Visited.insert(F); + } for (auto &F : M) { if (F.getCallingConv() == llvm::CallingConv::SPIR_KERNEL) { @@ -55,23 +110,11 @@ CheckNDRangeSYCLNativeCPUPass::run(Module &M, ModuleAnalysisManager &MAM) { } } - for (auto &BB : F) { - for (auto &I : BB) { - if (auto CI = dyn_cast(&I)) { - auto CalleeName = CI->getCalledFunction()->getName(); - if (std::find(NdFunctions.begin(), NdFunctions.end(), CalleeName) != - NdFunctions.end()) { - IsNDRange = true; - break; - } - } - } - if (IsNDRange) { - break; - } - } + // Check if the kernel calls one of the ND Range builtins + IsNDRange |= NdFuncs.contains(&F); addNDRangeMetadata(F, IsNDRange); + ModuleChanged = true; } } return ModuleChanged ? PreservedAnalyses::none() : PreservedAnalyses::all(); diff --git a/sycl/test/check_device_code/native_cpu/nd_range_attr.cpp b/sycl/test/check_device_code/native_cpu/nd_range_attr.cpp index cc146b067776d..a1ea1b5bbbd6d 100644 --- a/sycl/test/check_device_code/native_cpu/nd_range_attr.cpp +++ b/sycl/test/check_device_code/native_cpu/nd_range_attr.cpp @@ -1,5 +1,6 @@ // REQUIRES: native_cpu // RUN: %clangxx -fsycl-device-only -fsycl-targets=native_cpu %s -S -o - | FileCheck %s +// RUN: %clangxx -fsycl-device-only -O0 -fsycl-targets=native_cpu %s -S -o - | FileCheck %s #include using namespace sycl; @@ -7,6 +8,14 @@ using namespace sycl; class Test; class Test2; class Test3; +class Test4; + +template +void use_local_acc(nd_item<1> it, AccT &acc, + const local_accessor &local_acc) { + local_acc[it.get_local_id()[0]] = 1; + acc[it.get_local_id()[0]] = local_acc[0]; +} int main() { sycl::queue deviceQueue; @@ -22,18 +31,25 @@ int main() { deviceQueue.submit([&](handler &h) { auto acc = buf.template get_access(h); local_accessor local_acc(1, h); - h.parallel_for(r, [=](nd_item<1> it) { - local_acc[0] = 1; - acc[0] = local_acc[0]; - }); + h.parallel_for( + r, [=](nd_item<1> it) { use_local_acc(it, acc, local_acc); }); }); // CHECK-DAG: @_ZTS5Test2({{.*}} !is_nd_range [[MDID:![0-9]*]] } + deviceQueue.submit([&](handler &h) { h.parallel_for(1, [=](item<1> it) { it.get_id(); }); }); // CHECK-DAG: @_ZTS5Test3({{.*}} !is_nd_range [[MDNOT:![0-9]*]] + buffer buf(&res, 1); + deviceQueue.submit([&](sycl::handler &cgh) { + auto acc = sycl::accessor(buf, cgh, sycl::write_only); + cgh.parallel_for_work_group( + range<1>(1), range<1>(1), + [=](auto group) { acc[group.get_group_id()] = 42; }); + }); + // CHECK-DAG: @_ZTS5Test4({{.*}} !is_nd_range [[MDID:![0-9]*]] } //CHECK:[[MDID]] = !{i1 true} From d02834a9ae9d5d80718824581c054ffcd2354cbc Mon Sep 17 00:00:00 2001 From: PietroGhg Date: Fri, 29 Nov 2024 11:05:36 +0000 Subject: [PATCH 5/5] Formatting --- llvm/include/llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/llvm/include/llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h b/llvm/include/llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h index f6a1c2ff03b17..c6b70da4d2054 100644 --- a/llvm/include/llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h +++ b/llvm/include/llvm/SYCLLowerIR/CheckNDRangeSYCLNativeCPU.h @@ -1,5 +1,5 @@ //===-- CheckNDRangeSYCLNativeCPU.h -Check if a kernel uses nd_range -//features--===// +// features--===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information.