diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index 67414a2ca197e..ef2ae1ce9e6a1 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -247,21 +247,26 @@ class kernel_bundle_impl // Due to a bug in L0, specializations with conflicting IDs will overwrite // each other when linked together, so to avoid this issue we link - // regular offline-compiled SYCL device images in separation. + // images with specialization constants in separation. // TODO: Remove when spec const overwriting issue has been fixed in L0. - std::vector OfflineDeviceImages; + std::vector ImagesWithSpecConsts; std::unordered_set> - OfflineDeviceImageSet; + ImagesWithSpecConstsSet; for (const kernel_bundle &ObjectBundle : ObjectBundles) { for (const DevImgPlainWithDeps &DeviceImageWithDeps : getSyclObjImpl(ObjectBundle)->MDeviceImages) { - if (getSyclObjImpl(DeviceImageWithDeps.getMain())->getOriginMask() & - ImageOriginSYCLOffline) { - OfflineDeviceImages.push_back(&DeviceImageWithDeps); - for (const device_image_plain &DevImg : DeviceImageWithDeps) - OfflineDeviceImageSet.insert(getSyclObjImpl(DevImg)); - } + if (std::none_of(DeviceImageWithDeps.begin(), DeviceImageWithDeps.end(), + [](const device_image_plain &DevImg) { + const RTDeviceBinaryImage *BinImg = + getSyclObjImpl(DevImg)->get_bin_image_ref(); + return BinImg && BinImg->getSpecConstants().size(); + })) + continue; + + ImagesWithSpecConsts.push_back(&DeviceImageWithDeps); + for (const device_image_plain &DevImg : DeviceImageWithDeps) + ImagesWithSpecConstsSet.insert(getSyclObjImpl(DevImg)); } } @@ -273,8 +278,8 @@ class kernel_bundle_impl ObjectBundles) for (const device_image_plain &DevImg : getSyclObjImpl(ObjectBundle)->MUniqueDeviceImages) - if (OfflineDeviceImageSet.find(getSyclObjImpl(DevImg)) == - OfflineDeviceImageSet.end()) + if (ImagesWithSpecConstsSet.find(getSyclObjImpl(DevImg)) == + ImagesWithSpecConstsSet.end()) DevImagesSet.insert(getSyclObjImpl(DevImg)); DevImages.reserve(DevImagesSet.size()); for (auto It = DevImagesSet.begin(); It != DevImagesSet.end();) @@ -391,7 +396,8 @@ class kernel_bundle_impl } // ... And link the offline images in separation. (Workaround.) - for (const DevImgPlainWithDeps *DeviceImageWithDeps : OfflineDeviceImages) { + for (const DevImgPlainWithDeps *DeviceImageWithDeps : + ImagesWithSpecConsts) { // Skip images which are not compatible with devices provided if (std::none_of(MDevices.begin(), MDevices.end(), [DeviceImageWithDeps](const device &Dev) { diff --git a/sycl/source/detail/program_manager/program_manager.cpp b/sycl/source/detail/program_manager/program_manager.cpp index 455810696260d..95d68c904a6a3 100644 --- a/sycl/source/detail/program_manager/program_manager.cpp +++ b/sycl/source/detail/program_manager/program_manager.cpp @@ -2599,7 +2599,10 @@ ProgramManager::getSYCLDeviceImagesWithCompatibleState( std::lock_guard KernelIDsGuard(m_KernelIDsMutex); ImgInfo.KernelIDs = m_BinImg2KernelIDs[BinImage]; } - ImgInfo.Deps = collectDeviceImageDeps(*BinImage, {DevImpl}); + ImgInfo.Deps = + collectDeviceImageDeps(*BinImage, {DevImpl}, + /*ErrorOnUnresolvableImport=*/TargetState == + bundle_state::executable); } const bundle_state ImgState = ImgInfo.State; const std::shared_ptr> &ImageKernelIDs = diff --git a/sycl/test-e2e/SYCLBIN/Inputs/link_sycl_inline.cpp b/sycl/test-e2e/SYCLBIN/Inputs/link_sycl_inline.cpp new file mode 100644 index 0000000000000..ba5989f51146a --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/link_sycl_inline.cpp @@ -0,0 +1,20 @@ +#include + +namespace syclext = sycl::ext::oneapi; +namespace syclexp = sycl::ext::oneapi::experimental; + +typedef void (*FuncPtrT)(size_t *); + +struct ArgsT { + size_t *Ptr; + FuncPtrT *FuncPtr; +}; + +SYCL_EXTERNAL size_t GetID() { + return syclext::this_work_item::get_nd_item<1>().get_global_id(); +} + +extern "C" SYCL_EXTERNAL SYCL_EXT_ONEAPI_FUNCTION_PROPERTY( + (syclexp::nd_range_kernel<1>)) void Kernel(ArgsT Args) { + (**Args.FuncPtr)(Args.Ptr); +} diff --git a/sycl/test-e2e/SYCLBIN/Inputs/link_sycl_inline.hpp b/sycl/test-e2e/SYCLBIN/Inputs/link_sycl_inline.hpp new file mode 100644 index 0000000000000..3dbbb9a070d02 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/Inputs/link_sycl_inline.hpp @@ -0,0 +1,82 @@ +#include "common.hpp" + +#include + +namespace syclexp = sycl::ext::oneapi::experimental; + +typedef void (*FuncPtrT)(size_t *); + +struct ArgsT { + size_t *Ptr; + FuncPtrT *FuncPtr; +}; + +#ifdef __SYCL_DEVICE_ONLY__ +SYCL_EXTERNAL size_t GetID(); +#else +// Host-side code to avoid linker problems. Will never be called. +SYCL_EXTERNAL size_t GetID() { return 0; } +#endif + +SYCL_EXTERNAL +void Func(size_t *Ptr) { + size_t GlobalID = GetID(); + Ptr[GlobalID] = GlobalID; +} + +SYCL_EXT_ONEAPI_FUNCTION_PROPERTY((syclexp::single_task_kernel)) +void GetFuncPtr(ArgsT Args) { *Args.FuncPtr = Func; } + +constexpr size_t N = 32; + +int main(int argc, char *argv[]) { + assert(argc == 2); + + sycl::queue Q; + + int Failed = CommonLoadCheck(Q.get_context(), argv[1]); + +#if defined(SYCLBIN_INPUT_STATE) + auto SYCLBINInput = syclexp::get_kernel_bundle( + Q.get_context(), std::string{argv[1]}); + auto SYCLBINObj = sycl::compile(SYCLBINInput); +#elif defined(SYCLBIN_OBJECT_STATE) + auto SYCLBINObj = syclexp::get_kernel_bundle( + Q.get_context(), std::string{argv[1]}); +#else // defined(SYCLBIN_EXECUTABLE_STATE) +#error "Test does not work with executable state." +#endif + + auto KBObj = + syclexp::get_kernel_bundle( + Q.get_context()); + auto KBExe = sycl::link({KBObj, SYCLBINObj}); + + ArgsT Args{}; + Args.FuncPtr = sycl::malloc_shared(N, Q); + Args.Ptr = sycl::malloc_shared(N, Q); + + sycl::kernel GetFuncPtrKern = KBExe.ext_oneapi_get_kernel(); + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(Args); + CGH.single_task(GetFuncPtrKern); + }).wait(); + + sycl::kernel Kern = KBExe.ext_oneapi_get_kernel("Kernel"); + Q.submit([&](sycl::handler &CGH) { + CGH.set_args(Args); + CGH.parallel_for(sycl::nd_range{{N}, {N}}, Kern); + }).wait(); + + for (size_t I = 0; I < N; ++I) { + if (Args.Ptr[I] != I) { + std::cout << Args.Ptr[I] << " != " << I << std::endl; + ++Failed; + } + } + + sycl::free(Args.FuncPtr, Q); + sycl::free(Args.Ptr, Q); + + return Failed; +} diff --git a/sycl/test-e2e/SYCLBIN/link_sycl_inline_input.cpp b/sycl/test-e2e/SYCLBIN/link_sycl_inline_input.cpp new file mode 100644 index 0000000000000..4607c85e17493 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/link_sycl_inline_input.cpp @@ -0,0 +1,24 @@ +//==-------- link_sycl_inline_input.cpp --- SYCLBIN extension tests --------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_shared_allocations + +// -- Test for linking between inline SYCL code and SYCLBIN code. + +// Due to the regression in https://github.com/intel/llvm/issues/18432 it will +// fail to build the SYCLBIN with nvptx targets. Once this is fixed, +// %{sycl_target_opts} should be added to the SYCLBIN generation run-line. +// REQUIRES: target-spir + +// RUN: %clangxx --offload-new-driver -fsyclbin=input -fsycl-allow-device-image-dependencies -Xclang -fsycl-allow-func-ptr %S/Inputs/link_sycl_inline.cpp -o %t.syclbin +// RUN: %{build} -fsycl-allow-device-image-dependencies -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_INPUT_STATE + +#include "Inputs/link_sycl_inline.hpp" diff --git a/sycl/test-e2e/SYCLBIN/link_sycl_inline_object.cpp b/sycl/test-e2e/SYCLBIN/link_sycl_inline_object.cpp new file mode 100644 index 0000000000000..655815adeecf6 --- /dev/null +++ b/sycl/test-e2e/SYCLBIN/link_sycl_inline_object.cpp @@ -0,0 +1,25 @@ +//==-------- link_sycl_inline_object.cpp --- SYCLBIN extension tests +//--------==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +// REQUIRES: aspect-usm_shared_allocations + +// -- Test for linking between inline SYCL code and SYCLBIN code. + +// Due to the regression in https://github.com/intel/llvm/issues/18432 it will +// fail to build the SYCLBIN with nvptx targets. Once this is fixed, +// %{sycl_target_opts} should be added to the SYCLBIN generation run-line. +// REQUIRES: target-spir + +// RUN: %clangxx --offload-new-driver -fsyclbin=object -fsycl-allow-device-image-dependencies -Xclang -fsycl-allow-func-ptr %S/Inputs/link_sycl_inline.cpp -o %t.syclbin +// RUN: %{build} -fsycl-allow-device-image-dependencies -o %t.out +// RUN: %{l0_leak_check} %{run} %t.out %t.syclbin + +#define SYCLBIN_OBJECT_STATE + +#include "Inputs/link_sycl_inline.hpp" diff --git a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp index de2b939756ea0..5b97ae7a0cac9 100644 --- a/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp +++ b/sycl/test/e2e_test_requirements/no_sycl_hpp_in_e2e_tests.cpp @@ -6,7 +6,7 @@ // CHECK-DAG: README.md // CHECK-DAG: lit.cfg.py // -// CHECK-NUM-MATCHES: 25 +// CHECK-NUM-MATCHES: 26 // // This test verifies that `` isn't used in E2E tests. Instead, // fine-grained includes should used, see diff --git a/sycl/unittests/kernel-and-program/OutOfResources.cpp b/sycl/unittests/kernel-and-program/OutOfResources.cpp index 3ffe91503e881..cc9f18ae57b21 100644 --- a/sycl/unittests/kernel-and-program/OutOfResources.cpp +++ b/sycl/unittests/kernel-and-program/OutOfResources.cpp @@ -174,7 +174,7 @@ TEST_P(OutOfResourcesTestSuite, urProgramLink) { auto b3 = sycl::link({b1, b2}); EXPECT_FALSE(outOfResourcesToggle); // one restart due to out of resources, one link per each of b1 and b2. - EXPECT_EQ(nProgramLink, 3); + EXPECT_EQ(nProgramLink, 2); // no programs should be in the cache due to out of resources. { detail::KernelProgramCache::ProgramCache &Cache =