From 8478dc8efc72ba123ceb8ac900b549c3a118a646 Mon Sep 17 00:00:00 2001 From: Andrei Elovikov Date: Thu, 17 Jul 2025 09:19:18 -0700 Subject: [PATCH] [NFC][SYCL] `std::shared_ptr` cleanups * Avoid unnecessary copies * Use rvalue-reference if param is getting moved from (except `device_image` ctor) * Remove `DeviceImageImplPtr` type alias (not too many uses remaining, doesn't bring much value anymore) * Inline some temporaries so that explicit `std::move` wouldn't be needed * Switch some sets to use raw `device_image_impl *` ptr * `kernel_impl::getDeviceImage` to return raw reference --- sycl/include/sycl/kernel_bundle.hpp | 14 ++++--- sycl/source/backend.cpp | 13 +++---- sycl/source/detail/helpers.cpp | 4 +- sycl/source/detail/kernel_bundle_impl.hpp | 28 +++++++------- sycl/source/detail/kernel_impl.cpp | 2 +- sycl/source/detail/kernel_impl.hpp | 6 +-- .../program_manager/program_manager.cpp | 34 +++++++---------- sycl/source/detail/scheduler/commands.cpp | 38 +++++++++---------- sycl/source/kernel_bundle.cpp | 7 ++-- sycl/test/abi/sycl_symbols_windows.dump | 1 + .../arg_mask/EliminatedArgMask.cpp | 4 +- 11 files changed, 71 insertions(+), 80 deletions(-) diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp index aa6cb7bc1d161..e2709a94e4be3 100644 --- a/sycl/include/sycl/kernel_bundle.hpp +++ b/sycl/include/sycl/kernel_bundle.hpp @@ -100,13 +100,15 @@ class __SYCL_EXPORT kernel_id : public detail::OwnerLessBase { namespace detail { class device_image_impl; -using DeviceImageImplPtr = std::shared_ptr; // The class is used as a base for device_image for "untemplating" public // methods. class __SYCL_EXPORT device_image_plain { public: - device_image_plain(const detail::DeviceImageImplPtr &Impl) + device_image_plain(const std::shared_ptr &Impl) + : impl(Impl) {} + + device_image_plain(std::shared_ptr &&Impl) : impl(std::move(Impl)) {} bool operator==(const device_image_plain &RHS) const { @@ -124,7 +126,7 @@ class __SYCL_EXPORT device_image_plain { ur_native_handle_t getNative() const; protected: - detail::DeviceImageImplPtr impl; + std::shared_ptr impl; template friend const decltype(Obj::impl) & @@ -191,7 +193,7 @@ class device_image : public detail::device_image_plain, #endif // _HAS_STD_BYTE private: - device_image(detail::DeviceImageImplPtr Impl) + device_image(std::shared_ptr Impl) : device_image_plain(std::move(Impl)) {} template @@ -736,7 +738,7 @@ namespace detail { // Stable selector function type for passing thru library boundaries using DevImgSelectorImpl = - std::function; + std::function &DevImgImpl)>; // Internal non-template versions of get_kernel_bundle API which is used by // public onces @@ -769,7 +771,7 @@ kernel_bundle get_kernel_bundle(const context &Ctx, std::vector UniqueDevices = detail::removeDuplicateDevices(Devs); detail::DevImgSelectorImpl SelectorWrapper = - [Selector](const detail::DeviceImageImplPtr &DevImg) { + [Selector](const std::shared_ptr &DevImg) { return Selector( detail::createSyclObjFromImpl>(DevImg)); }; diff --git a/sycl/source/backend.cpp b/sycl/source/backend.cpp index 7c8fc1505c534..dfebcc946670a 100644 --- a/sycl/source/backend.cpp +++ b/sycl/source/backend.cpp @@ -301,13 +301,12 @@ make_kernel_bundle(ur_native_handle_t NativeHandle, // this by pre-building the device image and extracting kernel info. We can't // do the same to user images, since they may contain references to undefined // symbols (e.g. when kernel_bundle is supposed to be joined with another). - auto KernelIDs = std::make_shared>(); - auto DevImgImpl = device_image_impl::create( - nullptr, TargetContext, Devices, State, KernelIDs, std::move(UrProgram), - ImageOriginInterop); - device_image_plain DevImg{DevImgImpl}; - - return kernel_bundle_impl::create(TargetContext, Devices, DevImg); + return kernel_bundle_impl::create( + TargetContext, Devices, + device_image_plain{ + device_image_impl::create(nullptr, TargetContext, Devices, State, + std::make_shared>(), + std::move(UrProgram), ImageOriginInterop)}); } // TODO: Unused. Remove when allowed. diff --git a/sycl/source/detail/helpers.cpp b/sycl/source/detail/helpers.cpp index 4322aa49ca2c4..fab0c3d78ad4f 100644 --- a/sycl/source/detail/helpers.cpp +++ b/sycl/source/detail/helpers.cpp @@ -64,12 +64,12 @@ const RTDeviceBinaryImage *retrieveKernelBinary(queue_impl &Queue, } if (KernelCG->MSyclKernel != nullptr) - return KernelCG->MSyclKernel->getDeviceImage()->get_bin_image_ref(); + return KernelCG->MSyclKernel->getDeviceImage().get_bin_image_ref(); if (auto KernelBundleImpl = KernelCG->getKernelBundle()) if (auto SyclKernelImpl = KernelBundleImpl->tryGetKernel(KernelName)) // Retrieve the device image from the kernel bundle. - return SyclKernelImpl->getDeviceImage()->get_bin_image_ref(); + return SyclKernelImpl->getDeviceImage().get_bin_image_ref(); context_impl &ContextImpl = Queue.getContextImpl(); return &detail::ProgramManager::getInstance().getDeviceImage( diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 22cd8d8fd9d72..44fa29e03a9c8 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -124,10 +124,10 @@ class kernel_bundle_impl // Interop constructor kernel_bundle_impl(context Ctx, devices_range Devs, - device_image_plain &DevImage, private_tag Tag) + device_image_plain &&DevImage, private_tag Tag) : kernel_bundle_impl(std::move(Ctx), Devs, Tag) { MDeviceImages.emplace_back(DevImage); - MUniqueDeviceImages.emplace_back(DevImage); + MUniqueDeviceImages.emplace_back(std::move(DevImage)); } // Matches sycl::build and sycl::compile @@ -161,11 +161,11 @@ class kernel_bundle_impl for (const DevImgPlainWithDeps &DevImgWithDeps : InputBundleImpl.MDeviceImages) { // Skip images which are not compatible with devices provided - if (std::none_of(get_devices().begin(), get_devices().end(), - [&DevImgWithDeps](device_impl &Dev) { - return getSyclObjImpl(DevImgWithDeps.getMain()) - ->compatible_with_device(Dev); - })) + if (none_of(get_devices(), + [&MainImg = *getSyclObjImpl(DevImgWithDeps.getMain())]( + device_impl &Dev) { + return MainImg.compatible_with_device(Dev); + })) continue; switch (TargetState) { @@ -395,11 +395,11 @@ class kernel_bundle_impl for (const DevImgPlainWithDeps *DeviceImageWithDeps : ImagesWithSpecConsts) { // Skip images which are not compatible with devices provided - if (std::none_of(get_devices().begin(), get_devices().end(), - [DeviceImageWithDeps](device_impl &Dev) { - return getSyclObjImpl(DeviceImageWithDeps->getMain()) - ->compatible_with_device(Dev); - })) + if (none_of(get_devices(), + [&MainImg = *getSyclObjImpl(DeviceImageWithDeps->getMain())]( + device_impl &Dev) { + return MainImg.compatible_with_device(Dev); + })) continue; std::vector LinkedResults = @@ -995,8 +995,8 @@ class kernel_bundle_impl SelectedImage->get_ur_program()); return std::make_shared( - Kernel, *detail::getSyclObjImpl(MContext), SelectedImage, *this, - ArgMask, SelectedImage->get_ur_program(), CacheMutex); + Kernel, *detail::getSyclObjImpl(MContext), std::move(SelectedImage), + *this, ArgMask, SelectedImage->get_ur_program(), CacheMutex); } std::shared_ptr diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index 0cb679f1f0fc3..f6435c3dc7632 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -40,7 +40,7 @@ kernel_impl::kernel_impl(ur_kernel_handle_t Kernel, context_impl &Context, } kernel_impl::kernel_impl(ur_kernel_handle_t Kernel, context_impl &ContextImpl, - DeviceImageImplPtr DeviceImageImpl, + std::shared_ptr &&DeviceImageImpl, const kernel_bundle_impl &KernelBundleImpl, const KernelArgMask *ArgMask, ur_program_handle_t Program, std::mutex *CacheMutex) diff --git a/sycl/source/detail/kernel_impl.hpp b/sycl/source/detail/kernel_impl.hpp index 0c3c1ab0bf1e4..d30e274db94ec 100644 --- a/sycl/source/detail/kernel_impl.hpp +++ b/sycl/source/detail/kernel_impl.hpp @@ -50,7 +50,7 @@ class kernel_impl { /// \param ContextImpl is a valid SYCL context /// \param KernelBundleImpl is a valid instance of kernel_bundle_impl kernel_impl(ur_kernel_handle_t Kernel, context_impl &ContextImpl, - DeviceImageImplPtr DeviceImageImpl, + std::shared_ptr &&DeviceImageImpl, const kernel_bundle_impl &KernelBundleImpl, const KernelArgMask *ArgMask, ur_program_handle_t Program, std::mutex *CacheMutex); @@ -213,7 +213,7 @@ class kernel_impl { bool isInteropOrSourceBased() const noexcept; bool hasSYCLMetadata() const noexcept; - const DeviceImageImplPtr &getDeviceImage() const { return MDeviceImageImpl; } + device_image_impl &getDeviceImage() const { return *MDeviceImageImpl; } ur_native_handle_t getNative() const { adapter_impl &Adapter = MContext->getAdapter(); @@ -247,7 +247,7 @@ class kernel_impl { const std::shared_ptr MContext; const ur_program_handle_t MProgram = nullptr; bool MCreatedFromSource = true; - const DeviceImageImplPtr MDeviceImageImpl; + const std::shared_ptr MDeviceImageImpl; const KernelBundleImplPtr MKernelBundleImpl; bool MIsInterop = false; mutable std::mutex MNoncacheableEnqueueMutex; diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 54d24808475e3..92b2024b33785 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2463,11 +2463,9 @@ device_image_plain ProgramManager::getDeviceImageFromBinaryImage( KernelIDs = m_BinImg2KernelIDs[BinImage]; } - DeviceImageImplPtr Impl = device_image_impl::create( - BinImage, Ctx, Dev, ImgState, KernelIDs, Managed{}, - ImageOriginSYCLOffline); - - return createSyclObjFromImpl(std::move(Impl)); + return createSyclObjFromImpl(device_image_impl::create( + BinImage, Ctx, Dev, ImgState, std::move(KernelIDs), + Managed{}, ImageOriginSYCLOffline)); } std::vector @@ -2625,7 +2623,7 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( if (ImgInfoPair.second.RequirementCounter == 0) continue; - DeviceImageImplPtr MainImpl = device_image_impl::create( + std::shared_ptr MainImpl = device_image_impl::create( ImgInfoPair.first, Ctx, Devs, ImgInfoPair.second.State, ImgInfoPair.second.KernelIDs, Managed{}, ImageOriginSYCLOffline); @@ -2660,11 +2658,10 @@ ProgramManager::createDependencyImage(const context &Ctx, devices_range Devs, assert(DepState == getBinImageState(DepImage) && "State mismatch between main image and its dependency"); - DeviceImageImplPtr DepImpl = device_image_impl::create( - DepImage, Ctx, Devs, DepState, std::move(DepKernelIDs), - Managed{}, ImageOriginSYCLOffline); - return createSyclObjFromImpl(std::move(DepImpl)); + return createSyclObjFromImpl(device_image_impl::create( + DepImage, Ctx, Devs, DepState, std::move(DepKernelIDs), + Managed{}, ImageOriginSYCLOffline)); } void ProgramManager::bringSYCLDeviceImageToState( @@ -2833,7 +2830,7 @@ ProgramManager::compile(const DevImgPlainWithDeps &ImgWithDeps, std::optional RTCInfo = InputImpl.getRTCInfo(); - DeviceImageImplPtr ObjectImpl = device_image_impl::create( + std::shared_ptr ObjectImpl = device_image_impl::create( InputImpl.get_bin_image_ref(), InputImpl.get_context(), Devs, bundle_state::object, InputImpl.get_kernel_ids_ptr(), std::move(Prog), InputImpl.get_spec_const_data_ref(), @@ -3031,16 +3028,14 @@ ProgramManager::link(const std::vector &Imgs, } auto MergedRTCInfo = detail::KernelCompilerBinaryInfo::Merge(RTCInfoPtrs); - DeviceImageImplPtr ExecutableImpl = device_image_impl::create( + // TODO: Make multiple sets of device images organized by devices they are + // compiled for. + return {createSyclObjFromImpl(device_image_impl::create( NewBinImg, Context, Devs, bundle_state::executable, std::move(KernelIDs), std::move(LinkedProg), std::move(NewSpecConstMap), std::move(NewSpecConstBlob), CombinedOrigins, std::move(MergedRTCInfo), std::move(MergedKernelNames), std::move(MergedEliminatedKernelArgMasks), - std::move(MergedImageStorage)); - - // TODO: Make multiple sets of device images organized by devices they are - // compiled for. - return {createSyclObjFromImpl(std::move(ExecutableImpl))}; + std::move(MergedImageStorage)))}; } // The function duplicates most of the code from existing getBuiltPIProgram. @@ -3114,13 +3109,12 @@ ProgramManager::build(const DevImgPlainWithDeps &DevImgWithDeps, } auto MergedRTCInfo = detail::KernelCompilerBinaryInfo::Merge(RTCInfoPtrs); - DeviceImageImplPtr ExecImpl = device_image_impl::create( + return createSyclObjFromImpl(device_image_impl::create( ResultBinImg, Context, Devs, bundle_state::executable, std::move(KernelIDs), std::move(ResProgram), std::move(SpecConstMap), std::move(SpecConstBlob), CombinedOrigins, std::move(MergedRTCInfo), std::move(MergedKernelNames), std::move(MergedEliminatedKernelArgMasks), - std::move(MergedImageStorage)); - return createSyclObjFromImpl(std::move(ExecImpl)); + std::move(MergedImageStorage))); } // When caching is enabled, the returned UrKernel will already have diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index e427d851148c1..1cb9e72662ca9 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -2400,10 +2400,9 @@ static void SetArgBasedOnType( static ur_result_t SetKernelParamsAndLaunch( queue_impl &Queue, std::vector &Args, - const std::shared_ptr &DeviceImageImpl, - ur_kernel_handle_t Kernel, NDRDescT &NDRDesc, - std::vector &RawEvents, detail::event_impl *OutEventImpl, - const KernelArgMask *EliminatedArgMask, + device_image_impl *DeviceImageImpl, ur_kernel_handle_t Kernel, + NDRDescT &NDRDesc, std::vector &RawEvents, + detail::event_impl *OutEventImpl, const KernelArgMask *EliminatedArgMask, const std::function &getMemAllocationFunc, bool IsCooperative, bool KernelUsesClusterLaunch, uint32_t WorkGroupMemorySize, const RTDeviceBinaryImage *BinImage, @@ -2418,8 +2417,7 @@ static ur_result_t SetKernelParamsAndLaunch( std::vector Empty; Kernel = Scheduler::getInstance().completeSpecConstMaterialization( Queue, BinImage, KernelName, - DeviceImageImpl.get() ? DeviceImageImpl->get_spec_const_blob_ref() - : Empty); + DeviceImageImpl ? DeviceImageImpl->get_spec_const_blob_ref() : Empty); } if (KernelFuncPtr && !KernelHasSpecialCaptures) { @@ -2449,9 +2447,8 @@ static ur_result_t SetKernelParamsAndLaunch( } else { auto setFunc = [&Adapter, Kernel, &DeviceImageImpl, &getMemAllocationFunc, &Queue](detail::ArgDesc &Arg, size_t NextTrueIndex) { - SetArgBasedOnType(Adapter, Kernel, DeviceImageImpl.get(), - getMemAllocationFunc, Queue.getContextImpl(), Arg, - NextTrueIndex); + SetArgBasedOnType(Adapter, Kernel, DeviceImageImpl, getMemAllocationFunc, + Queue.getContextImpl(), Arg, NextTrueIndex); }; applyFuncOnFilteredArgs(EliminatedArgMask, Args, setFunc); } @@ -2537,14 +2534,14 @@ static ur_result_t SetKernelParamsAndLaunch( return Error; } -static std::tuple, +static std::tuple getCGKernelInfo(const CGExecKernel &CommandGroup, context_impl &ContextImpl, device_impl &DeviceImpl, std::vector &KernelCacheValsToRelease) { ur_kernel_handle_t UrKernel = nullptr; - std::shared_ptr DeviceImageImpl = nullptr; + device_image_impl *DeviceImageImpl = nullptr; const KernelArgMask *EliminatedArgMask = nullptr; kernel_bundle_impl *KernelBundleImplPtr = CommandGroup.MKernelBundle.get(); @@ -2556,7 +2553,7 @@ getCGKernelInfo(const CGExecKernel &CommandGroup, context_impl &ContextImpl, CommandGroup.MKernelName) : std::shared_ptr{nullptr}) { UrKernel = SyclKernelImpl->getHandleRef(); - DeviceImageImpl = SyclKernelImpl->getDeviceImage(); + DeviceImageImpl = &SyclKernelImpl->getDeviceImage(); EliminatedArgMask = SyclKernelImpl->getKernelArgMask(); } else { FastKernelCacheValPtr FastKernelCacheVal = @@ -2568,8 +2565,7 @@ getCGKernelInfo(const CGExecKernel &CommandGroup, context_impl &ContextImpl, // To keep UrKernel valid, we return FastKernelCacheValPtr. KernelCacheValsToRelease.push_back(std::move(FastKernelCacheVal)); } - return std::make_tuple(UrKernel, std::move(DeviceImageImpl), - EliminatedArgMask); + return std::make_tuple(UrKernel, DeviceImageImpl, EliminatedArgMask); } ur_result_t enqueueImpCommandBufferKernel( @@ -2586,7 +2582,7 @@ ur_result_t enqueueImpCommandBufferKernel( std::vector FastKernelCacheValsToRelease; ur_kernel_handle_t UrKernel = nullptr; - std::shared_ptr DeviceImageImpl = nullptr; + device_image_impl *DeviceImageImpl = nullptr; const KernelArgMask *EliminatedArgMask = nullptr; context_impl &ContextImpl = *sycl::detail::getSyclObjImpl(Ctx); @@ -2610,10 +2606,10 @@ ur_result_t enqueueImpCommandBufferKernel( } adapter_impl &Adapter = ContextImpl.getAdapter(); - auto SetFunc = [&Adapter, &UrKernel, &DeviceImageImpl, &ContextImpl, - &getMemAllocationFunc](sycl::detail::ArgDesc &Arg, - size_t NextTrueIndex) { - sycl::detail::SetArgBasedOnType(Adapter, UrKernel, DeviceImageImpl.get(), + auto SetFunc = [&Adapter, &UrKernel, &ContextImpl, &getMemAllocationFunc, + DeviceImageImpl](sycl::detail::ArgDesc &Arg, + size_t NextTrueIndex) { + sycl::detail::SetArgBasedOnType(Adapter, UrKernel, DeviceImageImpl, getMemAllocationFunc, ContextImpl, Arg, NextTrueIndex); }; @@ -2695,7 +2691,7 @@ void enqueueImpKernel( const KernelArgMask *EliminatedArgMask; std::shared_ptr SyclKernelImpl; - std::shared_ptr DeviceImageImpl; + device_image_impl *DeviceImageImpl = nullptr; FastKernelCacheValPtr KernelCacheVal; if (nullptr != MSyclKernel) { @@ -2717,7 +2713,7 @@ void enqueueImpKernel( ? KernelBundleImplPtr->tryGetKernel(KernelName) : std::shared_ptr{nullptr})) { Kernel = SyclKernelImpl->getHandleRef(); - DeviceImageImpl = SyclKernelImpl->getDeviceImage(); + DeviceImageImpl = &SyclKernelImpl->getDeviceImage(); Program = DeviceImageImpl->get_ur_program(); diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index 831277b1cc818..1d844dd9517d6 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -285,11 +285,10 @@ bool has_kernel_bundle_impl(const context &Ctx, const std::vector &Devs, std::set CombinedKernelIDs; for (const DevImgPlainWithDeps &DeviceImageWithDeps : DeviceImagesWithDeps) { for (const device_image_plain &DeviceImage : DeviceImageWithDeps) { - const std::shared_ptr &DeviceImageImpl = - getSyclObjImpl(DeviceImage); + device_image_impl &DeviceImageImpl = *getSyclObjImpl(DeviceImage); - CombinedKernelIDs.insert(DeviceImageImpl->get_kernel_ids().begin(), - DeviceImageImpl->get_kernel_ids().end()); + CombinedKernelIDs.insert(DeviceImageImpl.get_kernel_ids().begin(), + DeviceImageImpl.get_kernel_ids().end()); } } diff --git a/sycl/test/abi/sycl_symbols_windows.dump b/sycl/test/abi/sycl_symbols_windows.dump index 2ec3e780667c7..748d74482a1a5 100644 --- a/sycl/test/abi/sycl_symbols_windows.dump +++ b/sycl/test/abi/sycl_symbols_windows.dump @@ -355,6 +355,7 @@ ??0device@_V1@sycl@@QEAA@PEAU_cl_device_id@@@Z ??0device@_V1@sycl@@QEAA@XZ ??0device_image_plain@detail@_V1@sycl@@QEAA@$$QEAV0123@@Z +??0device_image_plain@detail@_V1@sycl@@QEAA@$$QEAV?$shared_ptr@Vdevice_image_impl@detail@_V1@sycl@@@std@@@Z ??0device_image_plain@detail@_V1@sycl@@QEAA@AEBV0123@@Z ??0device_image_plain@detail@_V1@sycl@@QEAA@AEBV?$shared_ptr@Vdevice_image_impl@detail@_V1@sycl@@@std@@@Z ??0device_selector@_V1@sycl@@QEAA@AEBV012@@Z diff --git a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp index d90c847cd9bfc..ab322bce9022b 100644 --- a/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp +++ b/sycl/unittests/program_manager/arg_mask/EliminatedArgMask.cpp @@ -185,9 +185,9 @@ const sycl::detail::KernelArgMask *getKernelArgMaskFromBundle( auto SyclKernelImpl = KernelBundleImplPtr->tryGetKernel(ExecKernel->MKernelName); EXPECT_TRUE(SyclKernelImpl != nullptr); - std::shared_ptr DeviceImageImpl = + sycl::detail::device_image_impl &DeviceImageImpl = SyclKernelImpl->getDeviceImage(); - ur_program_handle_t Program = DeviceImageImpl->get_ur_program(); + ur_program_handle_t Program = DeviceImageImpl.get_ur_program(); EXPECT_TRUE(nullptr == ExecKernel->MSyclKernel || !ExecKernel->MSyclKernel->isCreatedFromSource());