diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index d7134e557f..1224346e06 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -228,7 +228,7 @@ template struct __parallel_for_submitter<__internal::__optional_kernel_name<_Name...>> { template - __future + auto operator()(_ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) const { assert(oneapi::dpl::__ranges::__get_first_range_size(__rngs...) > 0); @@ -249,7 +249,7 @@ struct __parallel_for_submitter<__internal::__optional_kernel_name<_Name...>> //General version of parallel_for, one additional parameter - __count of iterations of loop __cgh.parallel_for, //for some algorithms happens that size of processing range is n, but amount of iterations is n/2. template -__future +auto __parallel_for(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) { @@ -275,7 +275,7 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name { template - __future> + auto operator()(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _BinaryOperation __binary_op, _InitType __init, _LocalScan __local_scan, _GroupScan __group_scan, _GlobalScan __global_scan) const { @@ -557,7 +557,7 @@ struct __parallel_copy_if_static_single_group_submitter<_Size, _ElemsPerItem, _W { template - __future> + auto operator()(_Policy&& __policy, _InRng&& __in_rng, _OutRng&& __out_rng, ::std::size_t __n, _InitType __init, _BinaryOperation __bin_op, _UnaryOp __unary_op, _Assign __assign) { @@ -652,7 +652,7 @@ struct __parallel_copy_if_static_single_group_submitter<_Size, _ElemsPerItem, _W template -__future> +auto __parallel_transform_scan_single_group(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out_rng, ::std::size_t __n, _UnaryOperation __unary_op, _InitType __init, _BinaryOperation __binary_op, @@ -746,7 +746,7 @@ __parallel_transform_scan_single_group(oneapi::dpl::__internal::__device_backend template -__future> +auto __parallel_transform_scan_base(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range1&& __in_rng, _Range2&& __out_rng, _BinaryOperation __binary_op, _InitType __init, _LocalScan __local_scan, _GroupScan __group_scan, _GlobalScan __global_scan) @@ -1812,7 +1812,7 @@ struct __parallel_partial_sort_submitter<__internal::__optional_kernel_name<_Glo __internal::__optional_kernel_name<_CopyBackName...>> { template - __future + auto operator()(_BackendTag, _ExecutionPolicy&& __exec, _Range&& __rng, _Merge __merge, _Compare __comp) const { using _Tp = oneapi::dpl::__internal::__value_t<_Range>; diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h index 3c73eaafdf..2ced9aaec5 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_fpga.h @@ -56,7 +56,7 @@ template struct __parallel_for_fpga_submitter<__internal::__optional_kernel_name<_Name...>> { template - __future + auto operator()(_ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) const { auto __n = oneapi::dpl::__ranges::__get_first_range_size(__rngs...); @@ -80,7 +80,7 @@ struct __parallel_for_fpga_submitter<__internal::__optional_kernel_name<_Name... }; template -__future +auto __parallel_for(oneapi::dpl::__internal::__fpga_backend_tag, _ExecutionPolicy&& __exec, _Fp __brick, _Index __count, _Ranges&&... __rngs) { diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h index dde5cee2d4..f3ba8672f0 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_histogram.h @@ -497,7 +497,7 @@ __histogram_general_private_global_atomics(oneapi::dpl::__internal::__device_bac template <::std::uint16_t __iters_per_work_item, typename _ExecutionPolicy, typename _Range1, typename _Range2, typename _BinHashMgr> -__future +auto __parallel_histogram_select_kernel(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, const sycl::event& __init_event, _Range1&& __input, _Range2&& __bins, const _BinHashMgr& __binhash_manager) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h index bfdfd8883d..b9990958e9 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge.h @@ -137,7 +137,7 @@ template struct __parallel_merge_submitter<_IdType, __internal::__optional_kernel_name<_Name...>> { template - __future + auto operator()(_ExecutionPolicy&& __exec, _Range1&& __rng1, _Range2&& __rng2, _Range3&& __rng3, _Compare __comp) const { const _IdType __n1 = __rng1.size(); diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h index ef1faf2ef7..1c0bd923a4 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_merge_sort.h @@ -212,7 +212,7 @@ struct __parallel_sort_submitter<_IdType, __internal::__optional_kernel_name<_Le __internal::__optional_kernel_name<_CopyBackName...>> { template - __future + auto operator()(_ExecutionPolicy&& __exec, _Range&& __rng, _Compare __comp, _LeafSorter& __leaf_sorter) const { using _Tp = oneapi::dpl::__internal::__value_t<_Range>; diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h index 6d08927ea5..0c11355657 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_radix_sort.h @@ -766,7 +766,7 @@ struct __parallel_radix_sort_iteration // radix sort: main function //----------------------------------------------------------------------- template -__future +auto __parallel_radix_sort(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Range&& __in_rng, _Proj __proj) { diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h index 2744251b2f..c0adce402e 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce.h @@ -125,7 +125,7 @@ struct __parallel_transform_reduce_small_submitter<_Tp, _Commutative, _VecSize, { template - __future> + auto operator()(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, const _Size __n, const _Size __work_group_size, const _Size __iters_per_work_item, _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) const @@ -161,7 +161,7 @@ struct __parallel_transform_reduce_small_submitter<_Tp, _Commutative, _VecSize, template -__future> +auto __parallel_transform_reduce_small_impl(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, const _Size __n, const _Size __work_group_size, const _Size __iters_per_work_item, _ReduceOp __reduce_op, @@ -236,8 +236,8 @@ struct __parallel_transform_reduce_work_group_kernel_submitter<_Tp, _Commutative { template - __future> - operator()(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, sycl::event&& __reduce_event, + auto + operator()(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, sycl::event& __reduce_event, const _Size __n, const _Size __work_group_size, const _Size __iters_per_work_item, _ReduceOp __reduce_op, _InitType __init, __result_and_scratch_storage<_ExecutionPolicy2, _Tp>& __scratch_container) const { @@ -312,7 +312,7 @@ struct __parallel_transform_reduce_impl { template - static __future> + static auto submit(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _Size __n, _Size __work_group_size, const _Size __iters_per_work_item, _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) @@ -439,7 +439,7 @@ struct __parallel_transform_reduce_impl // reduced in each step. template -__future> +auto __parallel_transform_reduce(oneapi::dpl::__internal::__device_backend_tag __backend_tag, _ExecutionPolicy&& __exec, _ReduceOp __reduce_op, _TransformOp __transform_op, _InitType __init, _Ranges&&... __rngs) { diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_then_scan.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_then_scan.h index a7bd22bdd9..3d69cab952 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_then_scan.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl_reduce_then_scan.h @@ -747,7 +747,7 @@ __is_gpu_with_sg_32(const _ExecutionPolicy& __exec) template -__future> +auto __parallel_transform_reduce_then_scan(oneapi::dpl::__internal::__device_backend_tag, _ExecutionPolicy&& __exec, _InRng&& __in_rng, _OutRng&& __out_rng, _GenReduceInput __gen_reduce_input, _ReduceOp __reduce_op, _GenScanInput __gen_scan_input,