From bd6902f46ff78a1fff5cc69b18513e1342c46916 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 23 Jun 2025 08:56:26 -0700 Subject: [PATCH 1/6] [SYCL][NFCI] Make device binary image pointers const in more places This commit changes the use of RTDeviceBinaryImage pointers to be const in more places. Primary cases where we break this is for compressed images, where decompression happens semi-lazily, so const-casts are occassionally necessary. Signed-off-by: Larsen, Steffen --- sycl/source/detail/context_impl.cpp | 12 +- sycl/source/detail/context_impl.hpp | 3 +- sycl/source/detail/device_binary_image.hpp | 2 +- sycl/source/detail/device_global_map.hpp | 55 +++++--- .../source/detail/device_global_map_entry.cpp | 100 +++++++++++---- .../source/detail/device_global_map_entry.hpp | 14 +- sycl/source/detail/device_image_impl.hpp | 22 ++-- sycl/source/detail/helpers.cpp | 2 +- sycl/source/detail/kernel_bundle_impl.hpp | 70 ++++++++-- sycl/source/detail/memory_manager.cpp | 2 +- .../program_manager/program_manager.cpp | 120 +++++++++--------- .../program_manager/program_manager.hpp | 44 ++++--- sycl/source/detail/scheduler/commands.cpp | 2 +- sycl/source/kernel_bundle.cpp | 2 +- sycl/test-e2e/SYCLBIN/Inputs/dg.hpp | 104 +++++++++++++++ sycl/test-e2e/SYCLBIN/Inputs/dg_kernel.cpp | 21 +++ sycl/test-e2e/SYCLBIN/dg_executable.cpp | 25 ++++ sycl/test-e2e/SYCLBIN/dg_input.cpp | 24 ++++ sycl/test-e2e/SYCLBIN/dg_object.cpp | 24 ++++ .../no_sycl_hpp_in_e2e_tests.cpp | 2 +- sycl/unittests/program_manager/Cleanup.cpp | 10 +- 21 files changed, 493 insertions(+), 167 deletions(-) create mode 100644 sycl/test-e2e/SYCLBIN/Inputs/dg.hpp create mode 100644 sycl/test-e2e/SYCLBIN/Inputs/dg_kernel.cpp create mode 100644 sycl/test-e2e/SYCLBIN/dg_executable.cpp create mode 100644 sycl/test-e2e/SYCLBIN/dg_input.cpp create mode 100644 sycl/test-e2e/SYCLBIN/dg_object.cpp diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 0bf73f191a8d0..4b417f2e6d13c 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -340,9 +340,9 @@ void context_impl::addDeviceGlobalInitializer( } } -std::vector -context_impl::initializeDeviceGlobals(ur_program_handle_t NativePrg, - queue_impl &QueueImpl) { +std::vector context_impl::initializeDeviceGlobals( + ur_program_handle_t NativePrg, queue_impl &QueueImpl, + detail::kernel_bundle_impl *KernelBundleImplPtr) { if (!MDeviceGlobalNotInitializedCnt.load(std::memory_order_acquire)) return {}; @@ -396,6 +396,12 @@ context_impl::initializeDeviceGlobals(ur_program_handle_t NativePrg, detail::ProgramManager::getInstance().getDeviceGlobalEntries( DeviceGlobalIds, /*ExcludeDeviceImageScopeDecorated=*/true); + // Kernel bundles may have isolated device globals. They need to be + // initialized too. + if (KernelBundleImplPtr && KernelBundleImplPtr->getDeviceGlobalMap().size()) + KernelBundleImplPtr->getDeviceGlobalMap().getEntries( + DeviceGlobalIds, /*ExcludeDeviceImageScopeDecorated=*/true, + DeviceGlobalEntries); // If there were no device globals without device_image_scope the device // globals are trivially fully initialized and we can end early. diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 24a19f0a9c674..a38950948fdf5 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -223,7 +223,8 @@ class context_impl : public std::enable_shared_from_this { /// Initializes device globals for a program on the associated queue. std::vector - initializeDeviceGlobals(ur_program_handle_t NativePrg, queue_impl &QueueImpl); + initializeDeviceGlobals(ur_program_handle_t NativePrg, queue_impl &QueueImpl, + detail::kernel_bundle_impl *KernelBundleImplPtr); void memcpyToHostOnlyDeviceGlobal(device_impl &DeviceImpl, const void *DeviceGlobalPtr, diff --git a/sycl/source/detail/device_binary_image.hpp b/sycl/source/detail/device_binary_image.hpp index db433ea01a407..ee96ccc998d27 100644 --- a/sycl/source/detail/device_binary_image.hpp +++ b/sycl/source/detail/device_binary_image.hpp @@ -296,7 +296,7 @@ class DynRTDeviceBinaryImage : public RTDeviceBinaryImage { } static DynRTDeviceBinaryImage - merge(const std::vector &Imgs); + merge(const std::vector &Imgs); protected: DynRTDeviceBinaryImage(); diff --git a/sycl/source/detail/device_global_map.hpp b/sycl/source/detail/device_global_map.hpp index 77bfed5c34a85..35d7ebec5b407 100644 --- a/sycl/source/detail/device_global_map.hpp +++ b/sycl/source/detail/device_global_map.hpp @@ -16,6 +16,7 @@ #include #include #include +#include namespace sycl { inline namespace _V1 { @@ -23,9 +24,22 @@ namespace detail { class DeviceGlobalMap { public: - void initializeEntries(RTDeviceBinaryImage *Img) { - const auto &DeviceGlobals = Img->getDeviceGlobals(); + DeviceGlobalMap(bool OwnerControlledCleanup) + : MOwnerControlledCleanup{OwnerControlledCleanup} {} + + ~DeviceGlobalMap() { + if (!MOwnerControlledCleanup) + for (auto &DeviceGlobalIt : MDeviceGlobals) + DeviceGlobalIt.second->cleanup(); + } + + void initializeEntries(const RTDeviceBinaryImage *Img) { std::lock_guard DeviceGlobalsGuard(MDeviceGlobalsMutex); + initializeEntriesLockless(Img); + } + + void initializeEntriesLockless(const RTDeviceBinaryImage *Img) { + const auto &DeviceGlobals = Img->getDeviceGlobals(); for (const sycl_device_binary_property &DeviceGlobal : DeviceGlobals) { ByteArray DeviceGlobalInfo = DeviceBinaryProperty(DeviceGlobal).asByteArray(); @@ -56,6 +70,7 @@ class DeviceGlobalMap { DeviceGlobal->Name, Img, TypeSize, DeviceImageScopeDecorated); MDeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr)); } + std::cout << DeviceGlobal->Name << std::endl; } } @@ -102,9 +117,16 @@ class DeviceGlobalMap { return Entry->second; } - DeviceGlobalMapEntry *tryGetEntry(const std::string &UniqueId, - bool ExcludeDeviceImageScopeDecorated) { + DeviceGlobalMapEntry * + tryGetEntry(const std::string &UniqueId, + bool ExcludeDeviceImageScopeDecorated = false) { std::lock_guard DeviceGlobalsGuard(MDeviceGlobalsMutex); + return tryGetEntryLockless(UniqueId, ExcludeDeviceImageScopeDecorated); + } + + DeviceGlobalMapEntry * + tryGetEntryLockless(const std::string &UniqueId, + bool ExcludeDeviceImageScopeDecorated = false) const { auto DeviceGlobalEntry = MDeviceGlobals.find(UniqueId); if (DeviceGlobalEntry != MDeviceGlobals.end() && (!ExcludeDeviceImageScopeDecorated || @@ -113,22 +135,17 @@ class DeviceGlobalMap { return nullptr; } - std::vector - getEntries(const std::vector &UniqueIds, - bool ExcludeDeviceImageScopeDecorated) { - std::vector FoundEntries; - FoundEntries.reserve(UniqueIds.size()); - + void getEntries(const std::vector &UniqueIds, + bool ExcludeDeviceImageScopeDecorated, + std::vector &OutVec) { std::lock_guard DeviceGlobalsGuard(MDeviceGlobalsMutex); for (const std::string &UniqueId : UniqueIds) { auto DeviceGlobalEntry = MDeviceGlobals.find(UniqueId); - assert(DeviceGlobalEntry != MDeviceGlobals.end() && - "Device global not found in map."); - if (!ExcludeDeviceImageScopeDecorated || - !DeviceGlobalEntry->second->MIsDeviceImageScopeDecorated) - FoundEntries.push_back(DeviceGlobalEntry->second.get()); + if (DeviceGlobalEntry != MDeviceGlobals.end() && + (!ExcludeDeviceImageScopeDecorated || + !DeviceGlobalEntry->second->MIsDeviceImageScopeDecorated)) + OutVec.push_back(DeviceGlobalEntry->second.get()); } - return FoundEntries; } const std::unordered_map @@ -143,6 +160,12 @@ class DeviceGlobalMap { } private: + // Indicates whether the owner will explicitly cleanup the entries. If false + // the dtor of DeviceGlobalMap will cleanup the enties. + // Note: This lets the global device global map avoid overhead at shutdown and + // instead let the contexts own the associated entries. + bool MOwnerControlledCleanup = true; + // Maps between device_global identifiers and associated information. std::unordered_map> MDeviceGlobals; diff --git a/sycl/source/detail/device_global_map_entry.cpp b/sycl/source/detail/device_global_map_entry.cpp index 78bd59a9ef795..2cb9e570a5c92 100644 --- a/sycl/source/detail/device_global_map_entry.cpp +++ b/sycl/source/detail/device_global_map_entry.cpp @@ -68,22 +68,33 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(queue_impl &QueueImpl) { { std::lock_guard Lock(NewAlloc.MInitEventMutex); ur_event_handle_t InitEvent; - // C++ guarantees members appear in memory in the order they are declared, - // so since the member variable that contains the initial contents of the - // device_global is right after the usm_ptr member variable we can do - // some pointer arithmetic to memcopy over this value to the usm_ptr. This - // value inside of the device_global will be zero-initialized if it was not - // given a value on construction. - - MemoryManager::copy_usm(reinterpret_cast( - reinterpret_cast(MDeviceGlobalPtr) + - sizeof(MDeviceGlobalPtr)), - QueueImpl, MDeviceGlobalTSize, NewAlloc.MPtr, - std::vector{}, &InitEvent); + if (MDeviceGlobalPtr) { + // C++ guarantees members appear in memory in the order they are declared, + // so since the member variable that contains the initial contents of the + // device_global is right after the usm_ptr member variable we can do + // some pointer arithmetic to memcopy over this value to the usm_ptr. This + // value inside of the device_global will be zero-initialized if it was + // not given a value on construction. + MemoryManager::copy_usm( + reinterpret_cast( + reinterpret_cast(MDeviceGlobalPtr) + + sizeof(MDeviceGlobalPtr)), + QueueImpl, MDeviceGlobalTSize, NewAlloc.MPtr, + std::vector{}, &InitEvent); + } else { + // For SYCLBIN device globals we do not have a host pointer to copy from, + // so instead we fill the USM memory with 0's. + MemoryManager::fill_usm(NewAlloc.MPtr, QueueImpl, MDeviceGlobalTSize, + {static_cast(0)}, {}, &InitEvent); + } NewAlloc.MInitEvent = InitEvent; } - CtxImpl.addAssociatedDeviceGlobal(MDeviceGlobalPtr); + // Only device globals with host variables need to be registered with the + // context. The rest will be managed by their kernel bundles and cleaned up + // accordingly. + if (MDeviceGlobalPtr) + CtxImpl.addAssociatedDeviceGlobal(MDeviceGlobalPtr); return NewAlloc; } @@ -111,19 +122,32 @@ DeviceGlobalMapEntry::getOrAllocateDeviceGlobalUSM(const context &Context) { "USM allocation for device and context already happened."); DeviceGlobalUSMMem &NewAlloc = NewAllocIt.first->second; - // C++ guarantees members appear in memory in the order they are declared, - // so since the member variable that contains the initial contents of the - // device_global is right after the usm_ptr member variable we can do - // some pointer arithmetic to memcopy over this value to the usm_ptr. This - // value inside of the device_global will be zero-initialized if it was not - // given a value on construction. - MemoryManager::context_copy_usm( - reinterpret_cast( - reinterpret_cast(MDeviceGlobalPtr) + - sizeof(MDeviceGlobalPtr)), - &CtxImpl, MDeviceGlobalTSize, NewAlloc.MPtr); - - CtxImpl.addAssociatedDeviceGlobal(MDeviceGlobalPtr); + if (MDeviceGlobalPtr) { + // C++ guarantees members appear in memory in the order they are declared, + // so since the member variable that contains the initial contents of the + // device_global is right after the usm_ptr member variable we can do + // some pointer arithmetic to memcopy over this value to the usm_ptr. This + // value inside of the device_global will be zero-initialized if it was not + // given a value on construction. + MemoryManager::context_copy_usm( + reinterpret_cast( + reinterpret_cast(MDeviceGlobalPtr) + + sizeof(MDeviceGlobalPtr)), + &CtxImpl, MDeviceGlobalTSize, NewAlloc.MPtr); + } else { + // For SYCLBIN device globals we do not have a host pointer to copy from, + // so instead we fill the USM memory with 0's. + std::vector ImmBuff(MDeviceGlobalTSize, + static_cast(0)); + MemoryManager::context_copy_usm(ImmBuff.data(), &CtxImpl, + MDeviceGlobalTSize, NewAlloc.MPtr); + } + + // Only device globals with host variables need to be registered with the + // context. The rest will be managed by their kernel bundles and cleaned up + // accordingly. + if (MDeviceGlobalPtr) + CtxImpl.addAssociatedDeviceGlobal(MDeviceGlobalPtr); return NewAlloc; } @@ -150,6 +174,30 @@ void DeviceGlobalMapEntry::removeAssociatedResources( } } +void DeviceGlobalMapEntry::cleanup() { + std::lock_guard Lock{MDeviceToUSMPtrMapMutex}; + assert(MDeviceGlobalPtr == nullptr && + "Entry has host variable, so it should be associated with a context " + "and should be cleaned up by its dtor."); + for (auto &USMPtrIt : MDeviceToUSMPtrMap) { + // The context should be alive through the kernel_bundle owning these + // device_global entries. + const context_impl *CtxImpl = USMPtrIt.first.second; + DeviceGlobalUSMMem &USMMem = USMPtrIt.second; + detail::usm::freeInternal(USMMem.MPtr, CtxImpl); + if (USMMem.MInitEvent.has_value()) + CtxImpl->getAdapter()->call( + *USMMem.MInitEvent); +#ifndef NDEBUG + // For debugging we set the event and memory to some recognizable values + // to allow us to check that this cleanup happens before erasure. + USMMem.MPtr = nullptr; + USMMem.MInitEvent = {}; +#endif + } + MDeviceToUSMPtrMap.clear(); +} + } // namespace detail } // namespace _V1 } // namespace sycl diff --git a/sycl/source/detail/device_global_map_entry.hpp b/sycl/source/detail/device_global_map_entry.hpp index cfa86a6639e43..3623e315ed9df 100644 --- a/sycl/source/detail/device_global_map_entry.hpp +++ b/sycl/source/detail/device_global_map_entry.hpp @@ -55,7 +55,7 @@ struct DeviceGlobalMapEntry { // Pointer to the device_global on host. const void *MDeviceGlobalPtr = nullptr; // Images device_global are used by. - std::unordered_set MImages; + std::unordered_set MImages; // The image identifiers for the images using the device_global used by in the // cache. std::set MImageIdentifiers; @@ -71,7 +71,7 @@ struct DeviceGlobalMapEntry { // Constructor for only initializing ID, type size, and device image scope // flag. The pointer to the device global will be initialized later. - DeviceGlobalMapEntry(std::string UniqueId, RTDeviceBinaryImage *Img, + DeviceGlobalMapEntry(std::string UniqueId, const RTDeviceBinaryImage *Img, std::uint32_t DeviceGlobalTSize, bool IsDeviceImageScopeDecorated) : MUniqueId(UniqueId), MImages{Img}, @@ -89,7 +89,8 @@ struct DeviceGlobalMapEntry { // Initialize the device_global's element type size and the flag signalling // if the device_global has the device_image_scope property. - void initialize(RTDeviceBinaryImage *Img, std::uint32_t DeviceGlobalTSize, + void initialize(const RTDeviceBinaryImage *Img, + std::uint32_t DeviceGlobalTSize, bool IsDeviceImageScopeDecorated) { if (MDeviceGlobalTSize != 0) { // The device global entry has already been initialized. This can happen @@ -119,6 +120,13 @@ struct DeviceGlobalMapEntry { // Removes resources for device_globals associated with the context. void removeAssociatedResources(const context_impl *CtxImpl); + // Cleans up the USM memory and intialization events associated with this + // entry. This should only be called when the device global entry is not + // owned by the program manager, as otherwise it will be bound to the lifetime + // of the owner context and will be cleaned up through + // removeAssociatedResources. + void cleanup(); + private: // Map from a device and a context to the associated USM allocation for the // device_global. This should always be empty if MIsDeviceImageScopeDecorated diff --git a/sycl/source/detail/device_image_impl.hpp b/sycl/source/detail/device_image_impl.hpp index 6be86572de356..37cbeddff7eea 100644 --- a/sycl/source/detail/device_image_impl.hpp +++ b/sycl/source/detail/device_image_impl.hpp @@ -84,20 +84,15 @@ class ManagedDeviceGlobalsRegistry { bool hasDeviceGlobalName(const std::string &Name) const noexcept { return !MDeviceGlobalNames.empty() && std::find(MDeviceGlobalNames.begin(), MDeviceGlobalNames.end(), - mangleDeviceGlobalName(Name)) != MDeviceGlobalNames.end(); + Name) != MDeviceGlobalNames.end(); } DeviceGlobalMapEntry *tryGetDeviceGlobalEntry(const std::string &Name) const { auto &PM = detail::ProgramManager::getInstance(); - return PM.tryGetDeviceGlobalEntry(MPrefix + mangleDeviceGlobalName(Name)); + return PM.tryGetDeviceGlobalEntry(MPrefix + Name); } private: - static std::string mangleDeviceGlobalName(const std::string &Name) { - // TODO: Support device globals declared in namespaces. - return "_Z" + std::to_string(Name.length()) + Name; - } - void unregisterDeviceGlobalsFromContext() { if (MDeviceGlobalNames.empty()) return; @@ -1125,11 +1120,12 @@ class device_image_impl { // imports. // TODO: Consider making a collectDeviceImageDeps variant that takes a // set reference and inserts into that instead. - std::set ImgDeps; + std::set ImgDeps; for (const device &Device : DevImgImpl->get_devices()) { - std::set DevImgDeps = PM.collectDeviceImageDeps( - *NewImage, *getSyclObjImpl(Device), - /*ErrorOnUnresolvableImport=*/State == bundle_state::executable); + std::set DevImgDeps = + PM.collectDeviceImageDeps(*NewImage, *getSyclObjImpl(Device), + /*ErrorOnUnresolvableImport=*/State == + bundle_state::executable); ImgDeps.insert(DevImgDeps.begin(), DevImgDeps.end()); } @@ -1144,13 +1140,13 @@ class device_image_impl { if (State == bundle_state::executable) { // If target is executable we bundle the image and dependencies together // and bring it into state. - for (RTDeviceBinaryImage *ImgDep : ImgDeps) + for (const RTDeviceBinaryImage *ImgDep : ImgDeps) NewImageAndDeps.push_back(PM.createDependencyImage( MContext, SupportingDevsRef, ImgDep, bundle_state::input)); } else if (State == bundle_state::object) { // If the target is object, we bring the dependencies into object state // individually and put them in the bundle. - for (RTDeviceBinaryImage *ImgDep : ImgDeps) { + for (const RTDeviceBinaryImage *ImgDep : ImgDeps) { DevImgPlainWithDeps ImgDepWithDeps{PM.createDependencyImage( MContext, SupportingDevsRef, ImgDep, bundle_state::input)}; PM.bringSYCLDeviceImageToState(ImgDepWithDeps, State); diff --git a/sycl/source/detail/helpers.cpp b/sycl/source/detail/helpers.cpp index 14e7aca275221..45d1ee112263e 100644 --- a/sycl/source/detail/helpers.cpp +++ b/sycl/source/detail/helpers.cpp @@ -50,7 +50,7 @@ retrieveKernelBinary(queue_impl &Queue, KernelNameStrRefT KernelName, ProgramManager::getInstance().getRawDeviceImages(KernelIds); auto DeviceImage = std::find_if( DeviceImages.begin(), DeviceImages.end(), - [isNvidia](RTDeviceBinaryImage *DI) { + [isNvidia](const RTDeviceBinaryImage *DI) { const std::string &TargetSpec = isNvidia ? std::string("llvm_nvptx64") : std::string("llvm_amdgcn"); return DI->getFormat() == SYCL_DEVICE_BINARY_TYPE_LLVMIR_BITCODE && diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 67414a2ca197e..264629c7182bf 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -8,6 +8,7 @@ #pragma once +#include #include #include #include @@ -189,6 +190,7 @@ class kernel_bundle_impl MDevices, PropList); MDeviceImages.emplace_back(BuiltImg); MUniqueDeviceImages.emplace_back(BuiltImg); + populateDeviceGlobalsForSYCLBIN(); break; } case bundle_state::input: @@ -411,6 +413,8 @@ class kernel_bundle_impl removeDuplicateImages(); + populateDeviceGlobalsForSYCLBIN(); + for (const kernel_bundle &Bundle : ObjectBundles) { const KernelBundleImplPtr &BundlePtr = getSyclObjImpl(Bundle); for (const std::pair> @@ -489,6 +493,9 @@ class kernel_bundle_impl fillUniqueDeviceImages(); + if (get_bundle_state() == bundle_state::executable) + populateDeviceGlobalsForSYCLBIN(); + if (get_bundle_state() == bundle_state::input) { // Copy spec constants values from the device images. auto MergeSpecConstants = [this](const device_image_plain &Img) { @@ -589,6 +596,8 @@ class kernel_bundle_impl ProgramManager::getInstance().bringSYCLDeviceImagesToState(MDeviceImages, State); fillUniqueDeviceImages(); + if (State == bundle_state::executable) + populateDeviceGlobalsForSYCLBIN(); } template @@ -698,10 +707,14 @@ class kernel_bundle_impl } bool ext_oneapi_has_device_global(const std::string &Name) const { - return std::any_of( - begin(), end(), [&Name](const device_image_plain &DeviceImage) { - return getSyclObjImpl(DeviceImage)->hasDeviceGlobalName(Name); - }); + std::string MangledName = mangleDeviceGlobalName(Name); + return (MDeviceGlobals.size() && + MDeviceGlobals.tryGetEntryLockless(MangledName)) || + std::any_of(begin(), end(), + [&MangledName](const device_image_plain &DeviceImage) { + return getSyclObjImpl(DeviceImage) + ->hasDeviceGlobalName(MangledName); + }); } void *ext_oneapi_get_device_global_address(const std::string &Name, @@ -1025,28 +1038,51 @@ class kernel_bundle_impl return const_cast(this)->Base::shared_from_this(); } + DeviceGlobalMap &getDeviceGlobalMap() { return MDeviceGlobals; } + private: DeviceGlobalMapEntry *getDeviceGlobalEntry(const std::string &Name) const { - if (!hasSourceBasedImages()) { + if (!hasSourceBasedImages() && !hasSYCLBINImages()) { throw sycl::exception(make_error_code(errc::invalid), "Querying device globals by name is only available " - "in kernel_bundles successfully built from " + "in kernel_bundles created from SYCLBIN files and " + "kernel_bundles successfully built from " "kernel_bundle::ext_oneapi_source> " "with 'sycl' source language."); } - if (!ext_oneapi_has_device_global(Name)) { - throw sycl::exception(make_error_code(errc::invalid), - "device global '" + Name + - "' not found in kernel_bundle"); - } + std::string MangledName = mangleDeviceGlobalName(Name); + + if (MDeviceGlobals.size()) + if (DeviceGlobalMapEntry *Entry = + MDeviceGlobals.tryGetEntryLockless(MangledName)) + return Entry; for (const device_image_plain &DevImg : MUniqueDeviceImages) if (DeviceGlobalMapEntry *Entry = - getSyclObjImpl(DevImg)->tryGetDeviceGlobalEntry(Name)) + getSyclObjImpl(DevImg)->tryGetDeviceGlobalEntry(MangledName)) return Entry; - assert(false && "Device global should have been found."); - return nullptr; + + throw sycl::exception(make_error_code(errc::invalid), + "device global '" + Name + + "' not found in kernel_bundle"); + } + + static std::string mangleDeviceGlobalName(const std::string &Name) { + // TODO: Support device globals declared in namespaces. + return "_Z" + std::to_string(Name.length()) + Name; + } + + void populateDeviceGlobalsForSYCLBIN() { + // This should only be called from ctors, so lockless initialization is + // safe. + for (const device_image_plain &DevImg : MUniqueDeviceImages) { + const auto &DevImgImpl = getSyclObjImpl(DevImg); + if (DevImgImpl->getOriginMask() & ImageOriginSYCLBIN) + if (const RTDeviceBinaryImage *DevBinImg = + DevImgImpl->get_bin_image_ref()) + MDeviceGlobals.initializeEntriesLockless(DevBinImg); + } } void fillUniqueDeviceImages() { @@ -1084,6 +1120,12 @@ class kernel_bundle_impl // from any device image. SpecConstMapT MSpecConstValues; bundle_state MState; + + // Map for isolating device_global variables owned by the SYCLBINs in the + // kernel_bundle. This map will ensure the cleanup of its entries, unlike the + // map in program_manager which has its entry cleanup managed by the + // corresponding owner contexts. + DeviceGlobalMap MDeviceGlobals{/*OwnerControlledCleanup=*/false}; }; } // namespace detail diff --git a/sycl/source/detail/memory_manager.cpp b/sycl/source/detail/memory_manager.cpp index eef788f883fad..7736e656cc574 100644 --- a/sycl/source/detail/memory_manager.cpp +++ b/sycl/source/detail/memory_manager.cpp @@ -1145,7 +1145,7 @@ getOrBuildProgramForDeviceGlobal(queue_impl &Queue, // If there was no cached program, build one. auto Context = createSyclObjFromImpl(ContextImpl); ProgramManager &PM = ProgramManager::getInstance(); - RTDeviceBinaryImage &Img = PM.getDeviceImage( + const RTDeviceBinaryImage &Img = PM.getDeviceImage( DeviceGlobalEntry->MImages, ContextImpl, *getSyclObjImpl(Device)); device_image_plain DeviceImage = diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 455810696260d..9d6c04d95a942 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -583,7 +583,7 @@ static const char *getUrDeviceTarget(const char *URDeviceTarget) { return UR_DEVICE_BINARY_TARGET_UNKNOWN; } -static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, +static bool compatibleWithDevice(const RTDeviceBinaryImage *BinImage, const device_impl &DeviceImpl) { auto &Adapter = DeviceImpl.getAdapter(); @@ -593,11 +593,10 @@ static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, // compatible with implementation. The function returns invalid index if no // device images are compatible. uint32_t SuitableImageID = std::numeric_limits::max(); - sycl_device_binary DevBin = - const_cast(&BinImage->getRawData()); + const sycl_device_binary_struct &DevBin = BinImage->getRawData(); ur_device_binary_t UrBinary{}; - UrBinary.pDeviceTargetSpec = getUrDeviceTarget(DevBin->DeviceTargetSpec); + UrBinary.pDeviceTargetSpec = getUrDeviceTarget(DevBin.DeviceTargetSpec); ur_result_t Error = Adapter->call_nocheck( URDeviceHandle, &UrBinary, @@ -611,7 +610,8 @@ static bool compatibleWithDevice(RTDeviceBinaryImage *BinImage, } // Check if the device image is a BF16 devicelib image. -bool ProgramManager::isBfloat16DeviceImage(RTDeviceBinaryImage *BinImage) { +bool ProgramManager::isBfloat16DeviceImage( + const RTDeviceBinaryImage *BinImage) { // SYCL devicelib image. if ((m_Bfloat16DeviceLibImages[0].get() == BinImage) || m_Bfloat16DeviceLibImages[1].get() == BinImage) @@ -623,7 +623,7 @@ bool ProgramManager::isBfloat16DeviceImage(RTDeviceBinaryImage *BinImage) { // Check if device natively support BF16 conversion and accordingly // decide whether to use fallback or native BF16 devicelib image. bool ProgramManager::shouldBF16DeviceImageBeUsed( - RTDeviceBinaryImage *BinImage, const device_impl &DeviceImpl) { + const RTDeviceBinaryImage *BinImage, const device_impl &DeviceImpl) { // Decide whether a devicelib image should be used. int Bfloat16DeviceLibVersion = -1; if (m_Bfloat16DeviceLibImages[0].get() == BinImage) @@ -672,17 +672,17 @@ static bool checkLinkingSupport(const device_impl &DeviceImpl, return false; } -std::set +std::set ProgramManager::collectDeviceImageDeps(const RTDeviceBinaryImage &Img, const device_impl &Dev, bool ErrorOnUnresolvableImport) { // TODO collecting dependencies for virtual functions and imported symbols // should be combined since one can lead to new unresolved dependencies for // the other. - std::set DeviceImagesToLink = + std::set DeviceImagesToLink = collectDependentDeviceImagesForVirtualFunctions(Img, Dev); - std::set ImageDeps = + std::set ImageDeps = collectDeviceImageDepsForImportedSymbols(Img, Dev, ErrorOnUnresolvableImport); DeviceImagesToLink.insert(ImageDeps.begin(), ImageDeps.end()); @@ -690,19 +690,19 @@ ProgramManager::collectDeviceImageDeps(const RTDeviceBinaryImage &Img, } static inline void -CheckAndDecompressImage([[maybe_unused]] RTDeviceBinaryImage *Img) { +CheckAndDecompressImage([[maybe_unused]] const RTDeviceBinaryImage *Img) { #ifdef SYCL_RT_ZSTD_AVAILABLE - if (auto CompImg = dynamic_cast(Img)) + if (auto CompImg = dynamic_cast(Img)) if (CompImg->IsCompressed()) - CompImg->Decompress(); + const_cast(CompImg)->Decompress(); #endif } -std::set +std::set ProgramManager::collectDeviceImageDepsForImportedSymbols( const RTDeviceBinaryImage &MainImg, const device_impl &Dev, bool ErrorOnUnresolvableImport) { - std::set DeviceImagesToLink; + std::set DeviceImagesToLink; std::set HandledSymbols; std::queue WorkList; for (const sycl_device_binary_property &ISProp : @@ -722,7 +722,7 @@ ProgramManager::collectDeviceImageDepsForImportedSymbols( auto Range = m_ExportedSymbolImages.equal_range(Symbol); bool Found = false; for (auto It = Range.first; It != Range.second; ++It) { - RTDeviceBinaryImage *Img = It->second; + const RTDeviceBinaryImage *Img = It->second; if (!doesDevSupportDeviceRequirements(Dev, *Img) || !compatibleWithDevice(Img, Dev)) @@ -737,8 +737,7 @@ ProgramManager::collectDeviceImageDepsForImportedSymbols( // and then check if the format matches. if (Format == SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE || Img->getFormat() == SYCL_DEVICE_BINARY_TYPE_COMPRESSED_NONE) { - auto MainImgPtr = const_cast(&MainImg); - CheckAndDecompressImage(MainImgPtr); + CheckAndDecompressImage(&MainImg); CheckAndDecompressImage(Img); Format = MainImg.getFormat(); } @@ -760,18 +759,18 @@ ProgramManager::collectDeviceImageDepsForImportedSymbols( "No device image found for external symbol " + Symbol); } - DeviceImagesToLink.erase(const_cast(&MainImg)); + DeviceImagesToLink.erase(&MainImg); return DeviceImagesToLink; } -std::set +std::set ProgramManager::collectDependentDeviceImagesForVirtualFunctions( const RTDeviceBinaryImage &Img, const device_impl &Dev) { // If virtual functions are used in a program, then we need to link several // device images together to make sure that vtable pointers stored in // objects are valid between different kernels (which could be in different // device images). - std::set DeviceImagesToLink; + std::set DeviceImagesToLink; // KernelA may use some set-a, which is also used by KernelB that in turn // uses set-b, meaning that this search should be recursive. The set below // is used to stop that recursion, i.e. to avoid looking at sets we have @@ -804,7 +803,7 @@ ProgramManager::collectDependentDeviceImagesForVirtualFunctions( // There could be more than one device image that uses the same set // of virtual functions, or provides virtual funtions from the same // set. - for (RTDeviceBinaryImage *BinImage : m_VFSet2BinImage.at(SetName)) { + for (const RTDeviceBinaryImage *BinImage : m_VFSet2BinImage.at(SetName)) { // Here we can encounter both uses-virtual-functions-set and // virtual-functions-set properties, but their handling is the same: we // just grab all sets they reference and add them for consideration if @@ -833,7 +832,7 @@ ProgramManager::collectDependentDeviceImagesForVirtualFunctions( // We may have inserted the original image into the list as well, because it // is also a part of m_VFSet2BinImage map. No need to to return it to avoid // passing it twice to link call later. - DeviceImagesToLink.erase(const_cast(&Img)); + DeviceImagesToLink.erase(&Img); return DeviceImagesToLink; } @@ -901,11 +900,11 @@ ur_program_handle_t ProgramManager::getBuiltURProgram( checkDevSupportDeviceRequirements(RootOrSubDevImpl, Img, NDRDesc)) throw *exception; - std::set DeviceImagesToLink = + std::set DeviceImagesToLink = collectDeviceImageDeps(Img, {RootOrSubDevImpl}); // Decompress all DeviceImagesToLink - for (RTDeviceBinaryImage *BinImg : DeviceImagesToLink) + for (const RTDeviceBinaryImage *BinImg : DeviceImagesToLink) CheckAndDecompressImage(BinImg); std::vector AllImages; @@ -1500,14 +1499,10 @@ const char *getArchName(const device_impl &DeviceImpl) { return "unknown"; } -sycl_device_binary getRawImg(RTDeviceBinaryImage *Img) { - return reinterpret_cast( - const_cast(&Img->getRawData())); -} - template -RTDeviceBinaryImage *getBinImageFromMultiMap( - const std::unordered_multimap &ImagesSet, +const RTDeviceBinaryImage *getBinImageFromMultiMap( + const std::unordered_multimap + &ImagesSet, const StorageKey &Key, context_impl &ContextImpl, const device_impl &DeviceImpl) { auto [ItBegin, ItEnd] = ImagesSet.equal_range(Key); @@ -1519,7 +1514,7 @@ RTDeviceBinaryImage *getBinImageFromMultiMap( // (checked using info::device::architecture) or JIT compiled. // This selection will then be passed to urDeviceSelectBinary // for final selection. - std::vector DeviceFilteredImgs; + std::vector DeviceFilteredImgs; DeviceFilteredImgs.reserve(std::distance(ItBegin, ItEnd)); for (auto It = ItBegin; It != ItEnd; ++It) { if (doesImageTargetMatchDevice(*It->second, DeviceImpl)) @@ -1538,13 +1533,14 @@ RTDeviceBinaryImage *getBinImageFromMultiMap( std::vector UrBinaries(NumImgs); for (uint32_t BinaryCount = 0; BinaryCount < NumImgs; BinaryCount++) { - sycl_device_binary RawImg = getRawImg(DeviceFilteredImgs[BinaryCount]); + const sycl_device_binary_struct &RawImg = + DeviceFilteredImgs[BinaryCount]->getRawData(); UrBinaries[BinaryCount].pDeviceTargetSpec = - getUrDeviceTarget(RawImg->DeviceTargetSpec); + getUrDeviceTarget(RawImg.DeviceTargetSpec); if (DeviceImpl.getBackend() == backend::ext_oneapi_hip) { UrBinariesStorage.emplace_back( - RawImg->BinaryStart, - std::distance(RawImg->BinaryStart, RawImg->BinaryEnd)); + RawImg.BinaryStart, + std::distance(RawImg.BinaryStart, RawImg.BinaryEnd)); UrBinaries[BinaryCount].pNext = &UrBinariesStorage[BinaryCount]; } } @@ -1557,7 +1553,7 @@ RTDeviceBinaryImage *getBinImageFromMultiMap( return DeviceFilteredImgs[ImgInd]; } -RTDeviceBinaryImage & +const RTDeviceBinaryImage & ProgramManager::getDeviceImage(KernelNameStrRefT KernelName, context_impl &ContextImpl, const device_impl &DeviceImpl) { @@ -1572,11 +1568,11 @@ ProgramManager::getDeviceImage(KernelNameStrRefT KernelName, if (m_UseSpvFile) { assert(m_SpvFileImage); return getDeviceImage( - std::unordered_set({m_SpvFileImage.get()}), + std::unordered_set({m_SpvFileImage.get()}), ContextImpl, DeviceImpl); } - RTDeviceBinaryImage *Img = nullptr; + const RTDeviceBinaryImage *Img = nullptr; { std::lock_guard KernelIDsGuard(m_KernelIDsMutex); if (auto KernelId = m_KernelName2KernelIDs.find(KernelName); @@ -1604,8 +1600,8 @@ ProgramManager::getDeviceImage(KernelNameStrRefT KernelName, "No kernel named " + std::string(KernelName) + " was found"); } -RTDeviceBinaryImage &ProgramManager::getDeviceImage( - const std::unordered_set &ImageSet, +const RTDeviceBinaryImage &ProgramManager::getDeviceImage( + const std::unordered_set &ImageSet, context_impl &ContextImpl, const device_impl &DeviceImpl) { assert(ImageSet.size() > 0); @@ -1851,7 +1847,7 @@ ProgramManager::ProgramPtr ProgramManager::build( return Program; } -void ProgramManager::cacheKernelUsesAssertInfo(RTDeviceBinaryImage &Img) { +void ProgramManager::cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img) { const RTDeviceBinaryImage::PropertyRange &AssertUsedRange = Img.getAssertUsed(); if (AssertUsedRange.isAvailable()) @@ -1859,7 +1855,8 @@ void ProgramManager::cacheKernelUsesAssertInfo(RTDeviceBinaryImage &Img) { m_KernelUsesAssert.insert(Prop->Name); } -void ProgramManager::cacheKernelImplicitLocalArg(RTDeviceBinaryImage &Img) { +void ProgramManager::cacheKernelImplicitLocalArg( + const RTDeviceBinaryImage &Img) { const RTDeviceBinaryImage::PropertyRange &ImplicitLocalArgRange = Img.getImplicitLocalArg(); if (ImplicitLocalArgRange.isAvailable()) @@ -2399,7 +2396,7 @@ bool ProgramManager::hasCompatibleImage(const device_impl &DeviceImpl) { return std::any_of( m_BinImg2KernelIDs.cbegin(), m_BinImg2KernelIDs.cend(), - [&](std::pair>> Elem) { return compatibleWithDevice(Elem.first, DeviceImpl); }); } @@ -2433,9 +2430,9 @@ void ProgramManager::addOrInitDeviceGlobalEntry(const void *DeviceGlobalPtr, m_DeviceGlobals.addOrInitialize(DeviceGlobalPtr, UniqueId); } -std::set +std::set ProgramManager::getRawDeviceImages(const std::vector &KernelIDs) { - std::set BinImages; + std::set BinImages; std::lock_guard KernelIDsGuard(m_KernelIDsMutex); for (const kernel_id &KID : KernelIDs) { auto Range = m_KernelIDs2BinImage.equal_range(KID); @@ -2460,8 +2457,11 @@ ProgramManager::tryGetDeviceGlobalEntry(const std::string &UniqueId, std::vector ProgramManager::getDeviceGlobalEntries( const std::vector &UniqueIds, bool ExcludeDeviceImageScopeDecorated) { - return m_DeviceGlobals.getEntries(UniqueIds, - ExcludeDeviceImageScopeDecorated); + std::vector FoundEntries; + FoundEntries.reserve(UniqueIds.size()); + m_DeviceGlobals.getEntries(UniqueIds, ExcludeDeviceImageScopeDecorated, + FoundEntries); + return FoundEntries; } void ProgramManager::addOrInitHostPipeEntry(const void *HostPipePtr, @@ -2496,7 +2496,8 @@ HostPipeMapEntry *ProgramManager::getHostPipeEntry(const void *HostPipePtr) { } device_image_plain ProgramManager::getDeviceImageFromBinaryImage( - RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev) { + const RTDeviceBinaryImage *BinImage, const context &Ctx, + const device &Dev) { const bundle_state ImgState = getBinImageState(BinImage); assert(compatibleWithDevice(BinImage, *getSyclObjImpl(Dev).get())); @@ -2522,7 +2523,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( // Collect unique raw device images taking into account kernel ids passed // TODO: Can we avoid repacking? - std::set BinImages; + std::set BinImages; if (!KernelIDs.empty()) { for (const auto &KID : KernelIDs) { bool isCompatibleWithAtLeastOneDev = @@ -2566,18 +2567,19 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( // a separate branch for that case to avoid unnecessary tracking work. struct DeviceBinaryImageInfo { std::shared_ptr> KernelIDs; - std::set Deps; + std::set Deps; bundle_state State = bundle_state::input; int RequirementCounter = 0; }; - std::unordered_map ImageInfoMap; + std::unordered_map + ImageInfoMap; for (const sycl::device &Dev : Devs) { device_impl &DevImpl = *getSyclObjImpl(Dev); // Track the highest image state for each requested kernel. using StateImagesPairT = - std::pair>; + std::pair>; using KernelImageMapT = std::map; KernelImageMapT KernelImageMap; @@ -2585,7 +2587,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( for (const kernel_id &KernelID : KernelIDs) KernelImageMap.insert({KernelID, {}}); - for (RTDeviceBinaryImage *BinImage : BinImages) { + for (const RTDeviceBinaryImage *BinImage : BinImages) { if (!compatibleWithDevice(BinImage, DevImpl) || !doesDevSupportDeviceRequirements(DevImpl, *BinImage)) continue; @@ -2632,7 +2634,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( KernelImages.push_back(BinImage); ++ImgRequirementCounter; } else if (KernelImagesState < ImgState) { - for (RTDeviceBinaryImage *Img : KernelImages) { + for (const RTDeviceBinaryImage *Img : KernelImages) { auto It = ImageInfoMap.find(Img); assert(It != ImageInfoMap.end()); assert(It->second.RequirementCounter > 0); @@ -2656,7 +2658,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( for (const auto &ImgInfoPair : ImageInfoMap) { if (ImgInfoPair.second.RequirementCounter == 0) continue; - for (RTDeviceBinaryImage *Dep : ImgInfoPair.second.Deps) { + for (const RTDeviceBinaryImage *Dep : ImgInfoPair.second.Deps) { auto It = ImageInfoMap.find(Dep); if (It != ImageInfoMap.end()) It->second.RequirementCounter = 0; @@ -2673,11 +2675,11 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( ImgInfoPair.second.KernelIDs, /*PIProgram=*/nullptr); std::vector Images; - const std::set &Deps = ImgInfoPair.second.Deps; + const std::set &Deps = ImgInfoPair.second.Deps; Images.reserve(Deps.size() + 1); Images.push_back( createSyclObjFromImpl(std::move(MainImpl))); - for (RTDeviceBinaryImage *Dep : Deps) + for (const RTDeviceBinaryImage *Dep : Deps) Images.push_back( createDependencyImage(Ctx, Devs, Dep, ImgInfoPair.second.State)); SYCLDeviceImages.push_back(std::move(Images)); @@ -2688,7 +2690,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( device_image_plain ProgramManager::createDependencyImage( const context &Ctx, const std::vector &Devs, - RTDeviceBinaryImage *DepImage, bundle_state DepState) { + const RTDeviceBinaryImage *DepImage, bundle_state DepState) { std::shared_ptr> DepKernelIDs; { std::lock_guard KernelIDsGuard(m_KernelIDsMutex); diff --git a/sycl/source/detail/program_manager/program_manager.hpp b/sycl/source/detail/program_manager/program_manager.hpp index cecd2b8702942..427ca01a23245 100644 --- a/sycl/source/detail/program_manager/program_manager.hpp +++ b/sycl/source/detail/program_manager/program_manager.hpp @@ -135,12 +135,12 @@ class ProgramManager { // process. Can only be called after staticInit is done. static ProgramManager &getInstance(); - RTDeviceBinaryImage &getDeviceImage(KernelNameStrRefT KernelName, - context_impl &ContextImpl, - const device_impl &DeviceImpl); + const RTDeviceBinaryImage &getDeviceImage(KernelNameStrRefT KernelName, + context_impl &ContextImpl, + const device_impl &DeviceImpl); - RTDeviceBinaryImage &getDeviceImage( - const std::unordered_set &ImagesToVerify, + const RTDeviceBinaryImage &getDeviceImage( + const std::unordered_set &ImagesToVerify, context_impl &ContextImpl, const device_impl &DeviceImpl); ur_program_handle_t createURProgram(const RTDeviceBinaryImage &Img, @@ -287,7 +287,7 @@ class ProgramManager { HostPipeMapEntry *getHostPipeEntry(const void *HostPipePtr); device_image_plain - getDeviceImageFromBinaryImage(RTDeviceBinaryImage *BinImage, + getDeviceImageFromBinaryImage(const RTDeviceBinaryImage *BinImage, const context &Ctx, const device &Dev); // The function returns a vector of SYCL device images that are compiled with @@ -299,7 +299,7 @@ class ProgramManager { // Creates a new dependency image for a given dependency binary image. device_image_plain createDependencyImage(const context &Ctx, const std::vector &Devs, - RTDeviceBinaryImage *DepImage, + const RTDeviceBinaryImage *DepImage, bundle_state DepState); // Bring image to the required state. Does it inplace @@ -377,13 +377,13 @@ class ProgramManager { KernelNameStrRefT KernelName, KernelNameBasedCacheT *KernelNameBasedCachePtr) const; - std::set + std::set getRawDeviceImages(const std::vector &KernelIDs); - std::set + std::set collectDeviceImageDeps(const RTDeviceBinaryImage &Img, const device_impl &Dev, bool ErrorOnUnresolvableImport = true); - std::set + std::set collectDeviceImageDepsForImportedSymbols(const RTDeviceBinaryImage &Img, const device_impl &Dev, bool ErrorOnUnresolvableImport); @@ -408,17 +408,17 @@ class ProgramManager { void dumpImage(const RTDeviceBinaryImage &Img, uint32_t SequenceID = 0) const; /// Add info on kernels using assert into cache - void cacheKernelUsesAssertInfo(RTDeviceBinaryImage &Img); + void cacheKernelUsesAssertInfo(const RTDeviceBinaryImage &Img); /// Add info on kernels using local arg into cache - void cacheKernelImplicitLocalArg(RTDeviceBinaryImage &Img); + void cacheKernelImplicitLocalArg(const RTDeviceBinaryImage &Img); - std::set + std::set collectDependentDeviceImagesForVirtualFunctions( const RTDeviceBinaryImage &Img, const device_impl &Dev); - bool isBfloat16DeviceImage(RTDeviceBinaryImage *BinImage); - bool shouldBF16DeviceImageBeUsed(RTDeviceBinaryImage *BinImage, + bool isBfloat16DeviceImage(const RTDeviceBinaryImage *BinImage); + bool shouldBF16DeviceImageBeUsed(const RTDeviceBinaryImage *BinImage, const device_impl &DeviceImpl); protected: @@ -437,7 +437,7 @@ class ProgramManager { // in case of SPIRV + AOT. // Using shared_ptr to avoid expensive copy of the vector. /// Access must be guarded by the m_KernelIDsMutex mutex. - std::unordered_multimap + std::unordered_multimap m_KernelIDs2BinImage; // Maps device binary image to a vector of kernel ids in this image. @@ -445,7 +445,7 @@ class ProgramManager { // The vector is initialized in addImages function and is supposed to be // immutable afterwards. /// Access must be guarded by the m_KernelIDsMutex mutex. - std::unordered_map>> m_BinImg2KernelIDs; @@ -461,13 +461,13 @@ class ProgramManager { /// in the sycl::detail::__sycl_service_kernel__ namespace which is /// exclusively used for this purpose. /// Access must be guarded by the m_KernelIDsMutex mutex. - std::unordered_multimap + std::unordered_multimap m_ServiceKernels; /// Caches all exported symbols to allow faster lookup when excluding these // from kernel bundles. /// Access must be guarded by the m_KernelIDsMutex mutex. - std::unordered_multimap + std::unordered_multimap m_ExportedSymbolImages; /// Keeps all device images we are refering to during program lifetime. Used @@ -483,7 +483,7 @@ class ProgramManager { /// Caches list of device images that use or provide virtual functions from /// the same set. Used to simplify access. /// Access must be guarded by the m_KernelIDsMutex mutex. - std::unordered_map> + std::unordered_map> m_VFSet2BinImage; /// Protects built-in kernel ID cache. @@ -533,7 +533,9 @@ class ProgramManager { SanitizerType m_SanitizerFoundInImage; // Maps between device_global identifiers and associated information. - DeviceGlobalMap m_DeviceGlobals; + // The ownership of entry resources is taken to allow contexts to cleanup + // their associated entry resources when they die. + DeviceGlobalMap m_DeviceGlobals{/*OwnerControlledCleanup=*/true}; // Maps between host_pipe identifiers and associated information. std::unordered_map> diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index f159bf5e41832..4eeaf9a0270e8 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2726,7 +2726,7 @@ void enqueueImpKernel( // Initialize device globals associated with this. std::vector DeviceGlobalInitEvents = - ContextImpl->initializeDeviceGlobals(Program, Queue); + ContextImpl->initializeDeviceGlobals(Program, Queue, KernelBundleImplPtr); if (!DeviceGlobalInitEvents.empty()) { std::vector EventsWithDeviceGlobalInits; EventsWithDeviceGlobalInits.reserve(RawEvents.size() + diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index 47d4878807f3b..d10078622c941 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -374,7 +374,7 @@ bool is_compatible(const std::vector &KernelIDs, const device &Dev) { // the device and whose target matches the device. detail::device_impl &DevImpl = *getSyclObjImpl(Dev); for (const auto &KernelID : KernelIDs) { - std::set BinImages = + std::set BinImages = detail::ProgramManager::getInstance().getRawDeviceImages({KernelID}); if (std::none_of(BinImages.begin(), BinImages.end(), diff --git a/sycl/test-e2e/SYCLBIN/Inputs/dg.hpp b/sycl/test-e2e/SYCLBIN/Inputs/dg.hpp new file mode 100644 index 0000000000000..747f6a9d4186e --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/dg.hpp @@ -0,0 +1,104 @@ +#include "common.hpp" + +#include + +static constexpr size_t NUM = 1024; +static constexpr size_t WGSIZE = 16; +static constexpr float EPS = 0.001; + +int main(int argc, char *argv[]) { + assert(argc == 2); + + sycl::queue Q; + sycl::device Dev = Q.get_device(); + + int Failed = CommonLoadCheck(Q.get_context(), argv[1]); + +#if defined(SYCLBIN_INPUT_STATE) + auto KBInput = syclexp::get_kernel_bundle( + Q.get_context(), std::string{argv[1]}); + auto KBExe1 = sycl::build(KBInput); + auto KBExe2 = sycl::build(KBInput); +#elif defined(SYCLBIN_OBJECT_STATE) + auto KBObj = syclexp::get_kernel_bundle( + Q.get_context(), std::string{argv[1]}); + auto KBExe1 = sycl::link(KBObj); + auto KBExe2 = sycl::link(KBObj); +#else // defined(SYCLBIN_EXECUTABLE_STATE) + auto KBExe1 = syclexp::get_kernel_bundle( + Q.get_context(), std::string{argv[1]}); + auto KBExe2 = syclexp::get_kernel_bundle( + Q.get_context(), std::string{argv[1]}); +#endif + + sycl::kernel AddK = KBExe1.ext_oneapi_get_kernel("ff_dg_adder"); + + // Check presence of device globals. + assert(KBExe1.ext_oneapi_has_device_global("DG")); + // Querying a non-existing device global shall not crash. + assert(!KBExe1.ext_oneapi_has_device_global("bogus_DG")); + + void *DGAddr = KBExe1.ext_oneapi_get_device_global_address("DG", Dev); + size_t DGSize = KBExe1.ext_oneapi_get_device_global_size("DG"); + assert(DGSize == 4); + + int32_t Val = -1; + auto CheckVal = [&](int32_t Expected) { + Val = -1; + Q.memcpy(&Val, DGAddr, DGSize).wait(); + if (Val != Expected) { + std::cout << "Val: " << Val << " != " << Expected << '\n'; + ++Failed; + } + }; + + // Device globals are zero-initialized. + CheckVal(0); + + // Set the DG. + Val = 123; + Q.memcpy(DGAddr, &Val, DGSize).wait(); + CheckVal(123); + + // Run a kernel using it. + Val = -17; + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(Val); + CGH.single_task(AddK); + }).wait(); + CheckVal(123 - 17); + + // Test that each bundle has its distinct set of globals. + DGAddr = KBExe2.ext_oneapi_get_device_global_address("DG", Dev); + CheckVal(0); + + DGAddr = KBExe1.ext_oneapi_get_device_global_address("DG", Dev); + CheckVal(123 - 17); + + // Test global with `device_image_scope`. We currently cannot read/write these + // from the host, but they should work device-only. + auto SwapK = KBExe2.ext_oneapi_get_kernel("ff_swap"); + int64_t *ValBuf = sycl::malloc_shared(1, Q); + *ValBuf = -1; + auto DoSwap = [&]() { + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(ValBuf); + CGH.single_task(SwapK); + }).wait(); + }; + + DoSwap(); + if (*ValBuf != 0) { + std::cout << "ValBuf: " << *ValBuf << " != 0"; + ++Failed; + } + DoSwap(); + if (*ValBuf != -1) { + std::cout << "ValBuf: " << *ValBuf << " != -1"; + ++Failed; + } + + sycl::free(ValBuf, Q); + + return Failed; +} diff --git a/sycl/test-e2e/SYCLBIN/Inputs/dg_kernel.cpp b/sycl/test-e2e/SYCLBIN/Inputs/dg_kernel.cpp new file mode 100644 index 0000000000000..fa66cf29b8d7d --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/dg_kernel.cpp @@ -0,0 +1,21 @@ +#include + +namespace syclex = sycl::ext::oneapi::experimental; + +syclex::device_global DG; + +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (syclex::single_task_kernel)) void ff_dg_adder(int val) { + DG += val; +} + +syclex::device_global + DG_DIS; + +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (syclex::single_task_kernel)) void ff_swap(int64_t *val) { + int64_t tmp = DG_DIS; + DG_DIS = *val; + *val = tmp; +} diff --git a/sycl/test-e2e/SYCLBIN/dg_executable.cpp b/sycl/test-e2e/SYCLBIN/dg_executable.cpp new file mode 100644 index 0000000000000..8c10d043b5d5e --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/dg_executable.cpp @@ -0,0 +1,25 @@ +//==----------- dg_executable.cpp --- SYCLBIN extension tests +//-------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_device_allocations + +// -- Test for using device globals in SYCLBIN. + +// Due to the regression in https://github.com/intel/llvm/issues/18432 it will +// fail to build the SYCLBIN with nvptx targets. Once this is fixed, +// %{sycl_target_opts} should be added to the SYCLBIN generation run-line. +// REQUIRES: target-spir + +// RUN: %clangxx --offload-new-driver -fsyclbin=executable %S/Inputs/dg_kernel.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_EXECUTABLE_STATE + +#include "Inputs/dg.hpp" diff --git a/sycl/test-e2e/SYCLBIN/dg_input.cpp b/sycl/test-e2e/SYCLBIN/dg_input.cpp new file mode 100644 index 0000000000000..62b5fa59ff6c5 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/dg_input.cpp @@ -0,0 +1,24 @@ +//==----------- dg_input.cpp --- SYCLBIN extension tests -------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_device_allocations + +// -- Test for using device globals in SYCLBIN. + +// Due to the regression in https://github.com/intel/llvm/issues/18432 it will +// fail to build the SYCLBIN with nvptx targets. Once this is fixed, +// %{sycl_target_opts} should be added to the SYCLBIN generation run-line. +// REQUIRES: target-spir + +// RUN: %clangxx --offload-new-driver -fsyclbin=input %S/Inputs/dg_kernel.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_INPUT_STATE + +#include "Inputs/dg.hpp" diff --git a/sycl/test-e2e/SYCLBIN/dg_object.cpp b/sycl/test-e2e/SYCLBIN/dg_object.cpp new file mode 100644 index 0000000000000..07b2f990ed0fc --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/dg_object.cpp @@ -0,0 +1,24 @@ +//==----------- dg_object.cpp --- SYCLBIN extension tests ------------------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_device_allocations + +// -- Test for using device globals in SYCLBIN. + +// Due to the regression in https://github.com/intel/llvm/issues/18432 it will +// fail to build the SYCLBIN with nvptx targets. Once this is fixed, +// %{sycl_target_opts} should be added to the SYCLBIN generation run-line. +// REQUIRES: target-spir + +// RUN: %clangxx --offload-new-driver -fsyclbin=object %S/Inputs/dg_kernel.cpp -o %t.syclbin +// RUN: %{build} -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_OBJECT_STATE + +#include "Inputs/dg.hpp" diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp index de2b939756ea0..5b97ae7a0cac9 100644 --- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp +++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp @@ -6,7 +6,7 @@ // CHECK-DAG: README.md // CHECK-DAG: lit.cfg.py // -// CHECK-NUM-MATCHES: 25 +// CHECK-NUM-MATCHES: 26 // // This test verifies that `` isn't used in E2E tests. Instead, // fine-grained includes should used, see diff --git a/sycl/unittests/program_manager/Cleanup.cpp b/sycl/unittests/program_manager/Cleanup.cpp index 6f7cca016264c..8aa1da1f2936d 100644 --- a/sycl/unittests/program_manager/Cleanup.cpp +++ b/sycl/unittests/program_manager/Cleanup.cpp @@ -13,7 +13,7 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager { public: std::unordered_multimap & + const sycl::detail::RTDeviceBinaryImage *> & getKernelID2BinImage() { return m_KernelIDs2BinImage; } @@ -23,20 +23,20 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager { return m_KernelName2KernelIDs; } - std::unordered_map>> & getBinImage2KernelId() { return m_BinImg2KernelIDs; } std::unordered_multimap & + const sycl::detail::RTDeviceBinaryImage *> & getServiceKernels() { return m_ServiceKernels; } std::unordered_multimap & + const sycl::detail::RTDeviceBinaryImage *> & getExportedSymbolImages() { return m_ExportedSymbolImages; } @@ -48,7 +48,7 @@ class ProgramManagerExposed : public sycl::detail::ProgramManager { } std::unordered_map> & + std::set> & getVFSet2BinImage() { return m_VFSet2BinImage; } From bc9e54b493398c8e467c3fc5ff3e0896472b93c0 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Thu, 26 Jun 2025 00:55:54 -0700 Subject: [PATCH 2/6] Remove debug print Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_global_map.hpp | 1 - 1 file changed, 1 deletion(-) diff --git a/sycl/source/detail/device_global_map.hpp b/sycl/source/detail/device_global_map.hpp index 35d7ebec5b407..38d8089a0331a 100644 --- a/sycl/source/detail/device_global_map.hpp +++ b/sycl/source/detail/device_global_map.hpp @@ -70,7 +70,6 @@ class DeviceGlobalMap { DeviceGlobal->Name, Img, TypeSize, DeviceImageScopeDecorated); MDeviceGlobals.emplace(DeviceGlobal->Name, std::move(EntryUPtr)); } - std::cout << DeviceGlobal->Name << std::endl; } } From 1687b01d59fa7e353b8acf55ec1e40f986d9a72d Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 30 Jun 2025 03:02:42 -0700 Subject: [PATCH 3/6] Fix device_global initialization in linking Signed-off-by: Larsen, Steffen --- sycl/source/detail/program_manager/program_manager.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 9d6c04d95a942..e574704c17896 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -3043,6 +3043,10 @@ ProgramManager::link(const std::vector &Imgs, const RTDeviceBinaryImage *NewBinImg = mergeImageData( Imgs, *KernelIDs, NewSpecConstBlob, NewSpecConstMap, MergedImageStorage); + // With both the new program and the merged image data, initailize associated + // device_global variables. + ContextImpl.addDeviceGlobalInitializer(LinkedProg, Devs, NewBinImg); + { std::lock_guard Lock(MNativeProgramsMutex); // NativePrograms map does not intend to keep reference to program handle, From 664c7508d56f38a7de9997ae0d6c7d75a69d8f15 Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 30 Jun 2025 05:41:37 -0700 Subject: [PATCH 4/6] Disable on opencl gpu Signed-off-by: Larsen, Steffen --- sycl/test-e2e/SYCLBIN/dg_executable.cpp | 3 +++ sycl/test-e2e/SYCLBIN/dg_input.cpp | 3 +++ sycl/test-e2e/SYCLBIN/dg_object.cpp | 3 +++ 3 files changed, 9 insertions(+) diff --git a/sycl/test-e2e/SYCLBIN/dg_executable.cpp b/sycl/test-e2e/SYCLBIN/dg_executable.cpp index 8c10d043b5d5e..72d6ea3a053b5 100644 --- a/sycl/test-e2e/SYCLBIN/dg_executable.cpp +++ b/sycl/test-e2e/SYCLBIN/dg_executable.cpp @@ -16,6 +16,9 @@ // %{sycl_target_opts} should be added to the SYCLBIN generation run-line. // REQUIRES: target-spir +// UNSUPPORTED: opencl && gpu +// UNSUPPORTED-TRACKER: GSD-4287 + // RUN: %clangxx --offload-new-driver -fsyclbin=executable %S/Inputs/dg_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/dg_input.cpp b/sycl/test-e2e/SYCLBIN/dg_input.cpp index 62b5fa59ff6c5..27029f5e1f017 100644 --- a/sycl/test-e2e/SYCLBIN/dg_input.cpp +++ b/sycl/test-e2e/SYCLBIN/dg_input.cpp @@ -15,6 +15,9 @@ // %{sycl_target_opts} should be added to the SYCLBIN generation run-line. // REQUIRES: target-spir +// UNSUPPORTED: opencl && gpu +// UNSUPPORTED-TRACKER: GSD-4287 + // RUN: %clangxx --offload-new-driver -fsyclbin=input %S/Inputs/dg_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/dg_object.cpp b/sycl/test-e2e/SYCLBIN/dg_object.cpp index 07b2f990ed0fc..74c9f59f3e7bf 100644 --- a/sycl/test-e2e/SYCLBIN/dg_object.cpp +++ b/sycl/test-e2e/SYCLBIN/dg_object.cpp @@ -15,6 +15,9 @@ // %{sycl_target_opts} should be added to the SYCLBIN generation run-line. // REQUIRES: target-spir +// UNSUPPORTED: opencl && gpu +// UNSUPPORTED-TRACKER: GSD-4287 + // RUN: %clangxx --offload-new-driver -fsyclbin=object %S/Inputs/dg_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin From 586aaff1844d42fe923083dd80f482380cd367ed Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Mon, 30 Jun 2025 22:09:58 -0700 Subject: [PATCH 5/6] Address comments Signed-off-by: Larsen, Steffen --- sycl/source/detail/device_global_map.hpp | 2 +- sycl/source/detail/program_manager/program_manager.cpp | 2 +- sycl/test-e2e/SYCLBIN/dg_executable.cpp | 8 +------- sycl/test-e2e/SYCLBIN/dg_input.cpp | 5 ----- sycl/test-e2e/SYCLBIN/dg_object.cpp | 5 ----- 5 files changed, 3 insertions(+), 19 deletions(-) diff --git a/sycl/source/detail/device_global_map.hpp b/sycl/source/detail/device_global_map.hpp index 38d8089a0331a..256c48066ec87 100644 --- a/sycl/source/detail/device_global_map.hpp +++ b/sycl/source/detail/device_global_map.hpp @@ -160,7 +160,7 @@ class DeviceGlobalMap { private: // Indicates whether the owner will explicitly cleanup the entries. If false - // the dtor of DeviceGlobalMap will cleanup the enties. + // the dtor of DeviceGlobalMap will cleanup the entries. // Note: This lets the global device global map avoid overhead at shutdown and // instead let the contexts own the associated entries. bool MOwnerControlledCleanup = true; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 3a6ddaa026a7c..922808835812a 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -3044,7 +3044,7 @@ ProgramManager::link(const std::vector &Imgs, const RTDeviceBinaryImage *NewBinImg = mergeImageData( Imgs, *KernelIDs, NewSpecConstBlob, NewSpecConstMap, MergedImageStorage); - // With both the new program and the merged image data, initailize associated + // With both the new program and the merged image data, initialize associated // device_global variables. ContextImpl.addDeviceGlobalInitializer(LinkedProg, Devs, NewBinImg); diff --git a/sycl/test-e2e/SYCLBIN/dg_executable.cpp b/sycl/test-e2e/SYCLBIN/dg_executable.cpp index 72d6ea3a053b5..1d3c9e2cf3259 100644 --- a/sycl/test-e2e/SYCLBIN/dg_executable.cpp +++ b/sycl/test-e2e/SYCLBIN/dg_executable.cpp @@ -1,5 +1,4 @@ -//==----------- dg_executable.cpp --- SYCLBIN extension tests -//-------------------==// +//==---------- dg_executable.cpp --- SYCLBIN extension tests ---------------==// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -11,11 +10,6 @@ // -- Test for using device globals in SYCLBIN. -// Due to the regression in https://github.com/intel/llvm/issues/18432 it will -// fail to build the SYCLBIN with nvptx targets. Once this is fixed, -// %{sycl_target_opts} should be added to the SYCLBIN generation run-line. -// REQUIRES: target-spir - // UNSUPPORTED: opencl && gpu // UNSUPPORTED-TRACKER: GSD-4287 diff --git a/sycl/test-e2e/SYCLBIN/dg_input.cpp b/sycl/test-e2e/SYCLBIN/dg_input.cpp index 27029f5e1f017..ddc9da758fad7 100644 --- a/sycl/test-e2e/SYCLBIN/dg_input.cpp +++ b/sycl/test-e2e/SYCLBIN/dg_input.cpp @@ -10,11 +10,6 @@ // -- Test for using device globals in SYCLBIN. -// Due to the regression in https://github.com/intel/llvm/issues/18432 it will -// fail to build the SYCLBIN with nvptx targets. Once this is fixed, -// %{sycl_target_opts} should be added to the SYCLBIN generation run-line. -// REQUIRES: target-spir - // UNSUPPORTED: opencl && gpu // UNSUPPORTED-TRACKER: GSD-4287 diff --git a/sycl/test-e2e/SYCLBIN/dg_object.cpp b/sycl/test-e2e/SYCLBIN/dg_object.cpp index 74c9f59f3e7bf..a5b4b97a11e84 100644 --- a/sycl/test-e2e/SYCLBIN/dg_object.cpp +++ b/sycl/test-e2e/SYCLBIN/dg_object.cpp @@ -10,11 +10,6 @@ // -- Test for using device globals in SYCLBIN. -// Due to the regression in https://github.com/intel/llvm/issues/18432 it will -// fail to build the SYCLBIN with nvptx targets. Once this is fixed, -// %{sycl_target_opts} should be added to the SYCLBIN generation run-line. -// REQUIRES: target-spir - // UNSUPPORTED: opencl && gpu // UNSUPPORTED-TRACKER: GSD-4287 From f67e54932af39c4bf78e50b2247b2e0c84f1aa7f Mon Sep 17 00:00:00 2001 From: "Larsen, Steffen" Date: Tue, 1 Jul 2025 02:00:11 -0700 Subject: [PATCH 6/6] Add xfail Signed-off-by: Larsen, Steffen --- sycl/test-e2e/SYCLBIN/dg_executable.cpp | 6 +++++- sycl/test-e2e/SYCLBIN/dg_input.cpp | 6 +++++- sycl/test-e2e/SYCLBIN/dg_object.cpp | 6 +++++- 3 files changed, 15 insertions(+), 3 deletions(-) diff --git a/sycl/test-e2e/SYCLBIN/dg_executable.cpp b/sycl/test-e2e/SYCLBIN/dg_executable.cpp index 1d3c9e2cf3259..c2ec644eedc6c 100644 --- a/sycl/test-e2e/SYCLBIN/dg_executable.cpp +++ b/sycl/test-e2e/SYCLBIN/dg_executable.cpp @@ -13,7 +13,11 @@ // UNSUPPORTED: opencl && gpu // UNSUPPORTED-TRACKER: GSD-4287 -// RUN: %clangxx --offload-new-driver -fsyclbin=executable %S/Inputs/dg_kernel.cpp -o %t.syclbin +// SYCLBIN currently only properly detects SPIR-V binaries. +// XFAIL: !target-spir +// XFAIL-TRACKER: CMPLRLLVM-68811 + +// RUN: %clangxx --offload-new-driver -fsyclbin=executable %{sycl_target_opts} %S/Inputs/dg_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/dg_input.cpp b/sycl/test-e2e/SYCLBIN/dg_input.cpp index ddc9da758fad7..9e535e87fe71f 100644 --- a/sycl/test-e2e/SYCLBIN/dg_input.cpp +++ b/sycl/test-e2e/SYCLBIN/dg_input.cpp @@ -13,7 +13,11 @@ // UNSUPPORTED: opencl && gpu // UNSUPPORTED-TRACKER: GSD-4287 -// RUN: %clangxx --offload-new-driver -fsyclbin=input %S/Inputs/dg_kernel.cpp -o %t.syclbin +// SYCLBIN currently only properly detects SPIR-V binaries. +// XFAIL: !target-spir +// XFAIL-TRACKER: CMPLRLLVM-68811 + +// RUN: %clangxx --offload-new-driver -fsyclbin=input %{sycl_target_opts} %S/Inputs/dg_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin diff --git a/sycl/test-e2e/SYCLBIN/dg_object.cpp b/sycl/test-e2e/SYCLBIN/dg_object.cpp index a5b4b97a11e84..faa2c87070df6 100644 --- a/sycl/test-e2e/SYCLBIN/dg_object.cpp +++ b/sycl/test-e2e/SYCLBIN/dg_object.cpp @@ -13,7 +13,11 @@ // UNSUPPORTED: opencl && gpu // UNSUPPORTED-TRACKER: GSD-4287 -// RUN: %clangxx --offload-new-driver -fsyclbin=object %S/Inputs/dg_kernel.cpp -o %t.syclbin +// SYCLBIN currently only properly detects SPIR-V binaries. +// XFAIL: !target-spir +// XFAIL-TRACKER: CMPLRLLVM-68811 + +// RUN: %clangxx --offload-new-driver -fsyclbin=object %{sycl_target_opts} %S/Inputs/dg_kernel.cpp -o %t.syclbin // RUN: %{build} -o %t.out // RUN: %{l0_leak_check} %{run} %t.out %t.syclbin