diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index be24619614d..9c692e38d75 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -43,13 +43,6 @@ namespace dpl namespace __internal { -template -auto -__make_thread_enumerable_storage(std::size_t __num_elements, _ValueType __init_value) -{ - return __par_backend::__thread_enumerable_storage{__num_elements, __init_value}; -} - //------------------------------------------------------------------------ // any_of //------------------------------------------------------------------------ @@ -4338,35 +4331,34 @@ __pattern_histogram(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Rando _DiffType __n = __last - __first; if (__n > 0) { - auto __thread_enumerable_storage = - oneapi::dpl::__internal::__make_thread_enumerable_storage(__num_bins, _HistogramValueT{0}); + __par_backend::__thread_enumerable_storage> __tls{__num_bins, _HistogramValueT{0}}; //main histogram loop //TODO: add defaulted grain-size option for __parallel_for and use larger one here to account for overhead __par_backend::__parallel_for(__backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, - [__func, &__thread_enumerable_storage](_RandomAccessIterator1 __first_local, + [__func, &__tls](_RandomAccessIterator1 __first_local, _RandomAccessIterator1 __last_local) { __internal::__brick_histogram(__first_local, __last_local, __func, - __thread_enumerable_storage.get(), _IsVector{}); + __tls.get().begin(), _IsVector{}); }); // now accumulate temporary storage into output global histogram __par_backend::__parallel_for( __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __histogram_first, __histogram_first + __num_bins, - [__histogram_first, &__thread_enumerable_storage](auto __global_histogram_first, + [__histogram_first, &__tls](auto __global_histogram_first, auto __global_histogram_last) { _DiffType __local_n = __global_histogram_last - __global_histogram_first; - std::size_t __num_temporary_copies = __thread_enumerable_storage.size(); + std::size_t __num_temporary_copies = __tls.size(); _DiffType __range_begin_id = __global_histogram_first - __histogram_first; //initialize output global histogram with first local histogram via assign - __internal::__brick_walk2_n(__thread_enumerable_storage.get_with_id(0) + __range_begin_id, __local_n, + __internal::__brick_walk2_n(__tls.get_with_id(0).begin() + __range_begin_id, __local_n, __global_histogram_first, oneapi::dpl::__internal::__pstl_assign(), _IsVector{}); for (std::size_t __i = 1; __i < __num_temporary_copies; ++__i) { //accumulate into output global histogram with other local histogram via += operator __internal::__brick_walk2_n( - __thread_enumerable_storage.get_with_id(__i) + __range_begin_id, __local_n, + __tls.get_with_id(__i).begin() + __range_begin_id, __local_n, __global_histogram_first, [](_HistogramValueT __x, _HistogramValueT& __y) { __y += __x; }, _IsVector{}); } diff --git a/include/oneapi/dpl/pstl/omp/util.h b/include/oneapi/dpl/pstl/omp/util.h index 7ca0dc7b143..cf2de8f8f43 100644 --- a/include/oneapi/dpl/pstl/omp/util.h +++ b/include/oneapi/dpl/pstl/omp/util.h @@ -153,20 +153,21 @@ __process_chunk(const __chunk_metrics& __metrics, _Iterator __base, _Index __chu __f(__first, __last); } -template +template struct __thread_enumerable_storage { - __thread_enumerable_storage(std::size_t __num_bins, _ValueType __init_value) + template + __thread_enumerable_storage(Args&&... args) { _PSTL_PRAGMA(omp parallel) _PSTL_PRAGMA(omp single nowait) { __num_threads = omp_get_num_threads(); __thread_specific_storage.resize(__num_threads); - _PSTL_PRAGMA(omp taskloop shared(__thread_specific_storage, __num_bins, __init_value)) + _PSTL_PRAGMA(omp taskloop shared(__thread_specific_storage)) for (std::size_t __tid = 0; __tid < __num_threads; ++__tid) { - __thread_specific_storage[__tid].resize(__num_bins, __init_value); + __thread_specific_storage[__tid] = std::make_unique<_StorageType>(std::forward(args)...); } } } @@ -177,19 +178,19 @@ struct __thread_enumerable_storage return __num_threads; } - auto + _StorageType& get_with_id(std::size_t __i) { - return __thread_specific_storage[__i].begin(); + return *__thread_specific_storage[__i]; } - auto + _StorageType& get() { return get_with_id(omp_get_thread_num()); } - std::vector> __thread_specific_storage; + std::vector> __thread_specific_storage; std::size_t __num_threads; }; diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index ec546bf3b91..2c92d878f87 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -42,10 +42,11 @@ __cancel_execution(oneapi::dpl::__internal::__serial_backend_tag) { } -template +template struct __thread_enumerable_storage { - __thread_enumerable_storage(std::size_t __num_bins, _ValueType __init_value) : __storage(__num_bins, __init_value) + template + __thread_enumerable_storage(Args&&... args) : __storage(std::forward(args)...) { } @@ -55,19 +56,19 @@ struct __thread_enumerable_storage return std::size_t{1}; } - auto + _StorageType& get() { - return __storage.begin(); + return __storage; } - auto + _StorageType& get_with_id(std::size_t __i) { return get(); } - std::vector<_ValueType> __storage; + _StorageType __storage; }; template diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index 538a9de1701..cbb6e3b24df 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -1307,11 +1307,12 @@ __parallel_for_each(oneapi::dpl::__internal::__tbb_backend_tag, _ExecutionPolicy tbb::this_task_arena::isolate([&]() { tbb::parallel_for_each(__begin, __end, __f); }); } -template +template struct __thread_enumerable_storage { - __thread_enumerable_storage(std::size_t __num_bins, _ValueType __init_value) - : __thread_specific_storage(__num_bins, __init_value) + template + __thread_enumerable_storage(Args&&... args) + : __thread_specific_storage(std::forward(args)...) { } @@ -1321,19 +1322,19 @@ struct __thread_enumerable_storage return __thread_specific_storage.size(); } - auto + _StorageType& get() { - return __thread_specific_storage.local().begin(); + return __thread_specific_storage.local(); } - auto + _StorageType& get_with_id(std::size_t __i) { - return __thread_specific_storage.begin()[__i].begin(); + return __thread_specific_storage.begin()[__i]; } - tbb::enumerable_thread_specific> __thread_specific_storage; + tbb::enumerable_thread_specific<_StorageType> __thread_specific_storage; }; } // namespace __tbb_backend