From c0fa3329ad745524b3b7820def36f802a42eccaf Mon Sep 17 00:00:00 2001 From: Dmitriy Sobolev Date: Tue, 3 Dec 2024 12:50:29 +0000 Subject: [PATCH] Fall back to SYCL 2020 API Signed-off-by: Dmitriy Sobolev --- .../dpl/experimental/kt/internal/esimd_defs.h | 2 +- .../kt/internal/esimd_radix_sort_kernels.h | 2 +- .../dpl/internal/reduce_by_segment_impl.h | 16 ++--- .../dpl/internal/scan_by_segment_impl.h | 8 +-- .../pstl/hetero/dpcpp/parallel_backend_sycl.h | 8 +-- .../dpcpp/parallel_backend_sycl_radix_sort.h | 18 ++--- .../dpcpp/parallel_backend_sycl_reduce.h | 4 +- .../dpcpp/parallel_backend_sycl_utils.h | 11 ++- .../oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h | 67 ++++++++++--------- .../pstl/hetero/dpcpp/unseq_backend_sycl.h | 12 ++-- include/oneapi/dpl/pstl/utils.h | 2 +- 11 files changed, 75 insertions(+), 75 deletions(-) diff --git a/include/oneapi/dpl/experimental/kt/internal/esimd_defs.h b/include/oneapi/dpl/experimental/kt/internal/esimd_defs.h index f9b9cd6e95f..d2640487624 100644 --- a/include/oneapi/dpl/experimental/kt/internal/esimd_defs.h +++ b/include/oneapi/dpl/experimental/kt/internal/esimd_defs.h @@ -21,7 +21,7 @@ // https://github.com/intel/llvm/blob/sycl/sycl/doc/extensions/supported/sycl_ext_intel_esimd/sycl_ext_intel_esimd.md#static-allocation-of-slm-using-slm_init-function #define _ONEDPL_ESIMD_INLINE inline __attribute__((always_inline)) -#define _ONEDPL_ESIMD_LSC_FENCE_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 70200) +#define _ONEDPL_ESIMD_LSC_FENCE_PRESENT (_ONEDPL_DPCPP_LIBSYCL_VERSION >= 70200) namespace oneapi::dpl::experimental::kt::gpu::esimd::__impl { diff --git a/include/oneapi/dpl/experimental/kt/internal/esimd_radix_sort_kernels.h b/include/oneapi/dpl/experimental/kt/internal/esimd_radix_sort_kernels.h index 78b2b81dba0..794170e3231 100644 --- a/include/oneapi/dpl/experimental/kt/internal/esimd_radix_sort_kernels.h +++ b/include/oneapi/dpl/experimental/kt/internal/esimd_radix_sort_kernels.h @@ -647,7 +647,7 @@ struct __radix_sort_onesweep_kernel // should not logically be needed. Consider removing once this has been further investigated. // This preprocessor check is set to expire and needs to be reevaluated once the SYCL major version // is upgraded to 9. -#if _ONEDPL_LIBSYCL_VERSION < 90000 +#if _ONEDPL_DPCPP_LIBSYCL_VERSION < 90000 # if _ONEDPL_ESIMD_LSC_FENCE_PRESENT __dpl_esimd::__ns::fence<__dpl_esimd::__ns::memory_kind::local>(); # else diff --git a/include/oneapi/dpl/internal/reduce_by_segment_impl.h b/include/oneapi/dpl/internal/reduce_by_segment_impl.h index d0979554786..4ddce6d3473 100644 --- a/include/oneapi/dpl/internal/reduce_by_segment_impl.h +++ b/include/oneapi/dpl/internal/reduce_by_segment_impl.h @@ -281,12 +281,12 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy auto __seg_end_identification = __exec.queue().submit([&](sycl::handler& __cgh) { oneapi::dpl::__ranges::__require_access(__cgh, __keys); auto __seg_ends_acc = __seg_ends.template get_access(__cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __cgh.use_kernel_bundle(__seg_reduce_count_kernel.get_kernel_bundle()); #endif __cgh.parallel_for<_SegReduceCountKernel>( sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=]( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __seg_reduce_count_kernel, #endif sycl::nd_item<1> __item) { @@ -319,11 +319,11 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy __cgh.depends_on(__seg_end_identification); auto __seg_ends_acc = __seg_ends.template get_access(__cgh); auto __seg_ends_scan_acc = __seg_ends_scanned.template get_access(__cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __cgh.use_kernel_bundle(__seg_reduce_offset_kernel.get_kernel_bundle()); #endif __cgh.parallel_for<_SegReduceOffsetKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __seg_reduce_offset_kernel, #endif sycl::nd_range<1>{__wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { @@ -342,11 +342,11 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy auto __partials_acc = __partials.template get_access(__cgh); auto __seg_ends_scan_acc = __seg_ends_scanned.template get_access(__cgh); __dpl_sycl::__local_accessor<__val_type> __loc_acc(2 * __wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __cgh.use_kernel_bundle(__seg_reduce_wg_kernel.get_kernel_bundle()); #endif __cgh.parallel_for<_SegReduceWgKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __seg_reduce_wg_kernel, #endif sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { @@ -465,11 +465,11 @@ __sycl_reduce_by_segment(__internal::__hetero_tag<_BackendTag>, _ExecutionPolicy __dpl_sycl::__local_accessor<__diff_type> __loc_seg_ends_acc(__wgroup_size, __cgh); __cgh.depends_on(__wg_reduce); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __cgh.use_kernel_bundle(__seg_reduce_prefix_kernel.get_kernel_bundle()); #endif __cgh.parallel_for<_SegReducePrefixKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __seg_reduce_prefix_kernel, #endif sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { diff --git a/include/oneapi/dpl/internal/scan_by_segment_impl.h b/include/oneapi/dpl/internal/scan_by_segment_impl.h index b895561baeb..a8199e227dd 100644 --- a/include/oneapi/dpl/internal/scan_by_segment_impl.h +++ b/include/oneapi/dpl/internal/scan_by_segment_impl.h @@ -164,11 +164,11 @@ struct __sycl_scan_by_segment_impl __dpl_sycl::__local_accessor<__val_type> __loc_acc(2 * __wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __cgh.use_kernel_bundle(__seg_scan_wg_kernel.get_kernel_bundle()); #endif __cgh.parallel_for<_SegScanWgKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __seg_scan_wg_kernel, #endif sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { @@ -268,11 +268,11 @@ struct __sycl_scan_by_segment_impl __dpl_sycl::__local_accessor<__val_type> __loc_partials_acc(__wgroup_size, __cgh); __dpl_sycl::__local_accessor __loc_seg_ends_acc(__wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __cgh.use_kernel_bundle(__seg_scan_prefix_kernel.get_kernel_bundle()); #endif __cgh.parallel_for<_SegScanPrefixKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __seg_scan_prefix_kernel, #endif sycl::nd_range<1>{__n_groups * __wgroup_size, __wgroup_size}, [=](sycl::nd_item<1> __item) { diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index 68dd00188dd..330df59d020 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -324,11 +324,11 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name auto __temp_acc = __result_and_scratch.template __get_scratch_acc( __cgh, __dpl_sycl::__no_init{}); __dpl_sycl::__local_accessor<_Type> __local_acc(__wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __cgh.use_kernel_bundle(__kernel_1.get_kernel_bundle()); #endif __cgh.parallel_for<_LocalScanKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __kernel_1, #endif sycl::nd_range<1>(__n_groups * __wgroup_size, __wgroup_size), [=](sycl::nd_item<1> __item) { @@ -345,11 +345,11 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name __cgh.depends_on(__submit_event); auto __temp_acc = __result_and_scratch.template __get_scratch_acc(__cgh); __dpl_sycl::__local_accessor<_Type> __local_acc(__wgroup_size, __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __cgh.use_kernel_bundle(__kernel_2.get_kernel_bundle()); #endif __cgh.parallel_for<_GroupScanKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __kernel_2, #endif // TODO: try to balance work between several workgroups instead of one diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h index a220b3c29ff..088df58df01 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h @@ -198,11 +198,11 @@ __radix_sort_count_submit(_ExecutionPolicy&& __exec, ::std::size_t __segments, : oneapi::dpl::__ranges::__require_access(__hdl, __val_rng, __count_rng); // an accessor per work-group with value counters from each work-item auto __count_lacc = __dpl_sycl::__local_accessor<_CountT>(__wg_size * __radix_states, __hdl); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __hdl.use_kernel_bundle(__kernel.get_kernel_bundle()); #endif __hdl.parallel_for<_KernelName>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __kernel, #endif sycl::nd_range<1>(__segments * __wg_size, __wg_size), [=](sycl::nd_item<1> __self_item) { @@ -299,11 +299,11 @@ __radix_sort_scan_submit(_ExecutionPolicy&& __exec, ::std::size_t __scan_wg_size __hdl.depends_on(__dependency_event); // access the counters for all work groups oneapi::dpl::__ranges::__require_access(__hdl, __count_rng); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __hdl.use_kernel_bundle(__kernel.get_kernel_bundle()); #endif __hdl.parallel_for<_KernelName>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __kernel, #endif sycl::nd_range<1>(__radix_states * __scan_wg_size, __scan_wg_size), [=](sycl::nd_item<1> __self_item) { @@ -346,7 +346,7 @@ enum class __peer_prefix_algo template struct __peer_prefix_helper; -#if (_ONEDPL_LIBSYCL_VERSION >= 50700) +#if _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50700) template struct __peer_prefix_helper<__radix_states, _OffsetT, __peer_prefix_algo::atomic_fetch_or> { @@ -390,7 +390,7 @@ struct __peer_prefix_helper<__radix_states, _OffsetT, __peer_prefix_algo::atomic return __offset; } }; -#endif // (_ONEDPL_LIBSYCL_VERSION >= 50700) +#endif // _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50700) template struct __peer_prefix_helper<__radix_states, _OffsetT, __peer_prefix_algo::scan_then_broadcast> @@ -544,11 +544,11 @@ __radix_sort_reorder_submit(_ExecutionPolicy&& __exec, ::std::size_t __segments, typename _PeerHelper::_TempStorageT __peer_temp(1, __hdl); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __hdl.use_kernel_bundle(__kernel.get_kernel_bundle()); #endif __hdl.parallel_for<_KernelName>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __kernel, #endif //Each SYCL work group processes one data segment. @@ -728,7 +728,7 @@ struct __parallel_radix_sort_iteration { #if _ONEDPL_SYCL_SUB_GROUP_MASK_PRESENT constexpr auto __peer_algorithm = __peer_prefix_algo::subgroup_ballot; -#elif _ONEDPL_LIBSYCL_VERSION >= 50700 +#elif _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50700) constexpr auto __peer_algorithm = __peer_prefix_algo::atomic_fetch_or; #else constexpr auto __peer_algorithm = __peer_prefix_algo::scan_then_broadcast; diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index edad63d2a79..a29060a9cca 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -370,11 +370,11 @@ struct __parallel_transform_reduce_impl oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); std::size_t __local_mem_size = __reduce_pattern.local_mem_req(__work_group_size); __dpl_sycl::__local_accessor<_Tp> __temp_local(sycl::range<1>(__local_mem_size), __cgh); -#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __cgh.use_kernel_bundle(__kernel.get_kernel_bundle()); #endif __cgh.parallel_for<_ReduceKernel>( -#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_COMPILE_KERNEL && !_ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT __kernel, #endif sycl::nd_range<1>(sycl::range<1>(__n_groups * __work_group_size), diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h index f4eb557170e..824357c249b 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_utils.h @@ -108,15 +108,12 @@ __supports_sub_group_size(const _ExecutionPolicy& __exec, std::size_t __target_s // Kernel run-time information helpers //----------------------------------------------------------------------------- -// 20201214 value corresponds to Intel(R) oneAPI C++ Compiler Classic 2021.1.2 Patch release -#define _USE_KERNEL_DEVICE_SPECIFIC_API (__SYCL_COMPILER_VERSION > 20201214) || (_ONEDPL_LIBSYCL_VERSION >= 50700) - template ::std::size_t __kernel_work_group_size(const _ExecutionPolicy& __policy, const sycl::kernel& __kernel) { const sycl::device& __device = __policy.queue().get_device(); -#if _USE_KERNEL_DEVICE_SPECIFIC_API +#if _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50700) return __kernel.template get_info(__device); #else return __kernel.template get_work_group_info(__device); @@ -130,10 +127,10 @@ __kernel_sub_group_size(const _ExecutionPolicy& __policy, const sycl::kernel& __ const sycl::device& __device = __policy.queue().get_device(); [[maybe_unused]] const ::std::size_t __wg_size = __kernel_work_group_size(__policy, __kernel); const ::std::uint32_t __sg_size = -#if _USE_KERNEL_DEVICE_SPECIFIC_API +#if _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50700) __kernel.template get_info( __device -# if _ONEDPL_LIBSYCL_VERSION < 60000 +# if _ONEDPL_DPCPP_LIBSYCL_VERSION < 60000 , sycl::range<3> { __wg_size, 1, 1 } # endif @@ -267,7 +264,7 @@ class __kernel_compiler static_assert(__kernel_count > 0, "At least one kernel name should be provided"); public: -#if _ONEDPL_KERNEL_BUNDLE_PRESENT +#if _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT template static auto __compile(_Exec&& __exec) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h index fbe9bd24080..6965d301dbe 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/sycl_defs.h @@ -37,14 +37,16 @@ // Combine SYCL runtime library version #if defined(__LIBSYCL_MAJOR_VERSION) && defined(__LIBSYCL_MINOR_VERSION) && defined(__LIBSYCL_PATCH_VERSION) -# define _ONEDPL_LIBSYCL_VERSION \ +# define _ONEDPL_DPCPP_LIBSYCL_VERSION \ (__LIBSYCL_MAJOR_VERSION * 10000 + __LIBSYCL_MINOR_VERSION * 100 + __LIBSYCL_PATCH_VERSION) #else -# define _ONEDPL_LIBSYCL_VERSION 0 +# define _ONEDPL_DPCPP_LIBSYCL_VERSION 0 #endif +#define _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(_ONEDPL_LIBSYCL_VERSION) \ + (_ONEDPL_DPCPP_LIBSYCL_VERSION >= _ONEDPL_LIBSYCL_VERSION || _ONEDPL_DPCPP_LIBSYCL_VERSION == 0) #if _ONEDPL_FPGA_DEVICE -# if _ONEDPL_LIBSYCL_VERSION >= 50400 +# if _ONEDPL_DPCPP_LIBSYCL_VERSION >= 50400 # include # else # include @@ -52,21 +54,22 @@ #endif // Macros to check the new SYCL features -#define _ONEDPL_NO_INIT_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) -#define _ONEDPL_KERNEL_BUNDLE_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) -#define _ONEDPL_SYCL2020_COLLECTIVES_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) -#define _ONEDPL_SYCL2020_KNOWN_IDENTITY_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) -#define _ONEDPL_SYCL2020_FUNCTIONAL_OBJECTS_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50300) -#define _ONEDPL_SYCL2023_ATOMIC_REF_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50500) -#define _ONEDPL_SYCL_SUB_GROUP_MASK_PRESENT (SYCL_EXT_ONEAPI_SUB_GROUP_MASK >= 1) && (_ONEDPL_LIBSYCL_VERSION >= 50700) -#define _ONEDPL_SYCL_PLACEHOLDER_HOST_ACCESSOR_DEPRECATED (_ONEDPL_LIBSYCL_VERSION >= 60200) +#define _ONEDPL_SYCL2020_NO_INIT_PRESENT _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50300) +#define _ONEDPL_SYCL2020_KERNEL_BUNDLE_PRESENT _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50300) +#define _ONEDPL_SYCL2020_COLLECTIVES_PRESENT _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50300) +#define _ONEDPL_SYCL2020_KNOWN_IDENTITY_PRESENT _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50300) +#define _ONEDPL_SYCL2020_FUNCTIONAL_OBJECTS_PRESENT _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50300) +#define _ONEDPL_SYCL2020_ATOMIC_REF_PRESENT _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50500) +#define _ONEDPL_SYCL_SUB_GROUP_MASK_PRESENT (SYCL_EXT_ONEAPI_SUB_GROUP_MASK >= 1) && \ + (_ONEDPL_DPCPP_LIBSYCL_VERSION >= 50700) +#define _ONEDPL_SYCL_PLACEHOLDER_HOST_ACCESSOR_DEPRECATED (_ONEDPL_DPCPP_LIBSYCL_VERSION >= 60200) #define _ONEDPL_SYCL_DEVICE_COPYABLE_SPECIALIZATION_BROKEN \ - (_ONEDPL_LIBSYCL_VERSION < 70100) && (_ONEDPL_LIBSYCL_VERSION != 0) + (_ONEDPL_DPCPP_LIBSYCL_VERSION < 70100) && (_ONEDPL_DPCPP_LIBSYCL_VERSION != 0) // TODO: determine which compiler configurations provide subgroup load/store #define _ONEDPL_SYCL_SUB_GROUP_LOAD_STORE_PRESENT false -#define _ONEDPL_SYCL_SUB_GROUP_PRESENT (_ONEDPL_LIBSYCL_VERSION >= 50700) +#define _ONEDPL_SYCL2020_SUB_GROUP_PRESENT _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50700) // Macro to check if we are compiling for SPIR-V devices. This macro must only be used within // SYCL kernels for determining SPIR-V compilation. Using this macro on the host may lead to incorrect behavior. @@ -78,7 +81,7 @@ # endif #endif // _ONEDPL_DETECT_SPIRV_COMPILATION -#if _ONEDPL_LIBSYCL_VERSION >= 50300 +#if _ONEDPL_DPCPP_LIBSYCL_VERSION >= 50300 # define _ONEDPL_SYCL_REQD_SUB_GROUP_SIZE(SIZE) sycl::reqd_sub_group_size(SIZE) #else # define _ONEDPL_SYCL_REQD_SUB_GROUP_SIZE(SIZE) intel::reqd_sub_group_size(SIZE) @@ -95,7 +98,7 @@ // The unified future supporting USM memory and buffers is only supported after DPCPP 2023.1 // but not by 2023.2. -#if (_ONEDPL_LIBSYCL_VERSION >= 60100 && _ONEDPL_LIBSYCL_VERSION != 60200) +#if (_ONEDPL_DPCPP_LIBSYCL_VERSION >= 60100 && _ONEDPL_DPCPP_LIBSYCL_VERSION != 60200) # define _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT 1 #else # define _ONEDPL_SYCL_UNIFIED_USM_BUFFER_PRESENT 0 @@ -105,7 +108,7 @@ namespace __dpl_sycl { using __no_init = -#if _ONEDPL_NO_INIT_PRESENT +#if _ONEDPL_SYCL2020_NO_INIT_PRESENT sycl::property::no_init; #else sycl::property::noinit; @@ -118,7 +121,7 @@ using __known_identity = sycl::known_identity<_BinaryOp, _T>; template using __has_known_identity = sycl::has_known_identity<_BinaryOp, _T>; -#elif _ONEDPL_LIBSYCL_VERSION == 50200 +#elif _ONEDPL_DPCPP_LIBSYCL_VERSION == 50200 template using __known_identity = sycl::ONEAPI::known_identity<_BinaryOp, _T>; @@ -149,7 +152,7 @@ template using __minimum = sycl::ONEAPI::minimum<_T>; #endif // _ONEDPL_SYCL2020_FUNCTIONAL_OBJECTS_PRESENT -#if _ONEDPL_SYCL_SUB_GROUP_PRESENT +#if _ONEDPL_SYCL2020_SUB_GROUP_PRESENT using __sub_group = sycl::sub_group; #else using __sub_group = sycl::ONEAPI::sub_group; @@ -159,7 +162,7 @@ template constexpr auto __get_buffer_size(const _Buffer& __buffer) { -#if _ONEDPL_LIBSYCL_VERSION >= 50300 +#if _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50300) return __buffer.size(); #else return __buffer.get_count(); @@ -170,7 +173,7 @@ template constexpr auto __get_accessor_size(const _Accessor& __accessor) { -#if _ONEDPL_LIBSYCL_VERSION >= 50300 +#if _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50300) return __accessor.size(); #else return __accessor.get_count(); @@ -181,7 +184,7 @@ template constexpr void __group_barrier(_Item __item) { -#if 0 //_ONEDPL_LIBSYCL_VERSION >= 50300 +#if 0 //_ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50300) //TODO: usage of sycl::group_barrier: probably, we have to revise SYCL parallel patterns which use a group_barrier. // 1) sycl::group_barrier() implementation is not ready // 2) sycl::group_barrier and sycl::item::group_barrier are not quite equivalent @@ -335,7 +338,7 @@ __joint_none_of(_Args&&... __args) } #if _ONEDPL_FPGA_DEVICE -# if _ONEDPL_LIBSYCL_VERSION >= 60100 +# if _ONEDPL_DPCPP_LIBSYCL_VERSION >= 60100 inline auto __fpga_emulator_selector() { return sycl::ext::intel::fpga_emulator_selector_v; @@ -345,7 +348,7 @@ inline auto __fpga_selector() return sycl::ext::intel::fpga_selector_v; } -# elif _ONEDPL_LIBSYCL_VERSION >= 50300 +# elif _ONEDPL_DPCPP_LIBSYCL_VERSION >= 50300 inline auto __fpga_emulator_selector() { return sycl::ext::intel::fpga_emulator_selector{}; @@ -367,21 +370,21 @@ inline auto __fpga_selector() #endif // _ONEDPL_FPGA_DEVICE using __target = -#if _ONEDPL_LIBSYCL_VERSION >= 50400 +#if _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50400) sycl::target; #else sycl::access::target; #endif constexpr __target __target_device = -#if _ONEDPL_LIBSYCL_VERSION >= 50400 +#if _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50400) __target::device; #else __target::global_buffer; #endif constexpr __target __host_target = -#if _ONEDPL_LIBSYCL_VERSION >= 60200 +#if _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50400) __target::host_task; #else __target::host_buffer; @@ -389,14 +392,14 @@ constexpr __target __host_target = template using __buffer_allocator = -#if _ONEDPL_LIBSYCL_VERSION >= 60000 +#if _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50400) sycl::buffer_allocator<_DataT>; #else sycl::buffer_allocator; #endif template -#if _ONEDPL_SYCL2023_ATOMIC_REF_PRESENT +#if _ONEDPL_SYCL2020_ATOMIC_REF_PRESENT using __atomic_ref = sycl::atomic_ref<_AtomicType, sycl::memory_order::relaxed, sycl::memory_scope::work_group, _Space>; #else struct __atomic_ref : sycl::atomic<_AtomicType, _Space> @@ -404,11 +407,11 @@ struct __atomic_ref : sycl::atomic<_AtomicType, _Space> explicit __atomic_ref(_AtomicType& ref) : sycl::atomic<_AtomicType, _Space>(sycl::multi_ptr<_AtomicType, _Space>(&ref)){}; }; -#endif // _ONEDPL_SYCL2023_ATOMIC_REF_PRESENT +#endif // _ONEDPL_SYCL2020_ATOMIC_REF_PRESENT template using __local_accessor = -#if _ONEDPL_LIBSYCL_VERSION >= 60000 +#if _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(60000) sycl::local_accessor<_DataT, _Dimensions>; #else sycl::accessor<_DataT, _Dimensions, sycl::access::mode::read_write, __dpl_sycl::__target::local>; @@ -418,7 +421,7 @@ template auto __get_host_access(_Buf&& __buf) { -#if _ONEDPL_LIBSYCL_VERSION >= 60200 +#if _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(60200) return ::std::forward<_Buf>(__buf).get_host_access(sycl::read_only); #else return ::std::forward<_Buf>(__buf).template get_access(); @@ -429,7 +432,7 @@ template auto __get_accessor_ptr(const _Acc& __acc) { -#if _ONEDPL_LIBSYCL_VERSION >= 70000 +#if _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(70000) return __acc.template get_multi_ptr().get(); #else return __acc.get_pointer(); diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h index 2caa6add318..24d1ed9b204 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/unseq_backend_sycl.h @@ -49,7 +49,7 @@ inline constexpr bool __can_use_known_identity = template using __has_known_identity = ::std::conditional_t< __can_use_known_identity<_Tp>, -# if _ONEDPL_LIBSYCL_VERSION >= 50200 +# if _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50200) typename ::std::disjunction< __dpl_sycl::__has_known_identity<_BinaryOp, _Tp>, ::std::conjunction<::std::is_arithmetic<_Tp>, @@ -61,14 +61,14 @@ using __has_known_identity = ::std::conditional_t< ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__minimum>, ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__maximum<_Tp>>, ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__maximum>>>>, -# else //_ONEDPL_LIBSYCL_VERSION >= 50200 +# else //_ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50200) typename ::std::conjunction< ::std::is_arithmetic<_Tp>, ::std::disjunction<::std::is_same<::std::decay_t<_BinaryOp>, ::std::plus<_Tp>>, ::std::is_same<::std::decay_t<_BinaryOp>, ::std::plus>, ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__plus<_Tp>>, ::std::is_same<::std::decay_t<_BinaryOp>, __dpl_sycl::__plus>>>, -# endif //_ONEDPL_LIBSYCL_VERSION >= 50200 +# endif //_ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50200) ::std::false_type>; // This is for the case of __can_use_known_identity<_Tp>==false #else //_ONEDPL_USE_GROUP_ALGOS && defined(SYCL_IMPLEMENTATION_INTEL) @@ -90,11 +90,11 @@ struct __known_identity_for_plus template inline constexpr _Tp __known_identity = -#if _ONEDPL_LIBSYCL_VERSION >= 50200 +#if _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50200) __dpl_sycl::__known_identity<_BinaryOp, _Tp>::value; -#else //_ONEDPL_LIBSYCL_VERSION >= 50200 +#else //_ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50200) __known_identity_for_plus<_BinaryOp, _Tp>::value; //for plus only -#endif //_ONEDPL_LIBSYCL_VERSION >= 50200 +#endif //_ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50200) template struct walk_n diff --git a/include/oneapi/dpl/pstl/utils.h b/include/oneapi/dpl/pstl/utils.h index 8a8dfdae1bc..669b92b1d5b 100644 --- a/include/oneapi/dpl/pstl/utils.h +++ b/include/oneapi/dpl/pstl/utils.h @@ -505,7 +505,7 @@ __dpl_bit_cast(const _Src& __src) noexcept { #if __cpp_lib_bit_cast >= 201806L return ::std::bit_cast<_Dst>(__src); -#elif _ONEDPL_BACKEND_SYCL && _ONEDPL_LIBSYCL_VERSION >= 50300 +#elif _ONEDPL_BACKEND_SYCL && _ONEDPL_DPCPP_LIBSYCL_ZERO_OR_GE(50300) return sycl::bit_cast<_Dst>(__src); #elif __has_builtin(__builtin_bit_cast) return __builtin_bit_cast(_Dst, __src);