From d79cd5e03f55e5987fdb62077c1d92842c6597dc Mon Sep 17 00:00:00 2001 From: Dan Hoeflinger Date: Mon, 30 Dec 2024 11:19:43 -0500 Subject: [PATCH] comments and clarity Signed-off-by: Dan Hoeflinger --- include/oneapi/dpl/pstl/algorithm_impl.h | 3 +- include/oneapi/dpl/pstl/omp/util.h | 29 ++++++++++++------- .../oneapi/dpl/pstl/parallel_backend_serial.h | 2 +- .../oneapi/dpl/pstl/parallel_backend_tbb.h | 2 +- 4 files changed, 22 insertions(+), 14 deletions(-) diff --git a/include/oneapi/dpl/pstl/algorithm_impl.h b/include/oneapi/dpl/pstl/algorithm_impl.h index a204e11e8a..ce36636cec 100644 --- a/include/oneapi/dpl/pstl/algorithm_impl.h +++ b/include/oneapi/dpl/pstl/algorithm_impl.h @@ -4339,7 +4339,8 @@ __pattern_histogram(__parallel_tag<_IsVector>, _ExecutionPolicy&& __exec, _Rando __par_backend::__parallel_for( __backend_tag{}, ::std::forward<_ExecutionPolicy>(__exec), __first, __last, [__func, &__tls](_RandomAccessIterator1 __first_local, _RandomAccessIterator1 __last_local) { - __internal::__brick_histogram(__first_local, __last_local, __func, __tls.get().begin(), _IsVector{}); + __internal::__brick_histogram(__first_local, __last_local, __func, + __tls.get_for_current_thread().begin(), _IsVector{}); }); // now accumulate temporary storage into output global histogram __par_backend::__parallel_for( diff --git a/include/oneapi/dpl/pstl/omp/util.h b/include/oneapi/dpl/pstl/omp/util.h index d6c13bfda7..f5d9e018de 100644 --- a/include/oneapi/dpl/pstl/omp/util.h +++ b/include/oneapi/dpl/pstl/omp/util.h @@ -154,6 +154,9 @@ __process_chunk(const __chunk_metrics& __metrics, _Iterator __base, _Index __chu __f(__first, __last); } + +// abstract class to allow inclusion in __thread_enumerable_storage as member without requiring explicit template +// instantiation of param types template class __construct_by_args_base { @@ -163,6 +166,7 @@ class __construct_by_args_base construct() = 0; }; +// Helper class to allow construction of _StorageType from a stored argument pack template class __construct_by_args : public __construct_by_args_base<_StorageType> { @@ -193,34 +197,37 @@ struct __thread_enumerable_storage std::uint32_t size() const { + // only count storage which has been instantiated return __num_elements.load(); } _StorageType& get_with_id(std::uint32_t __i) { - if (__i < size()) + assert(__i < size()); + + std::uint32_t __count = 0; + std::uint32_t __j = 0; + + for (; __j < __thread_specific_storage.size() && __count <= __i; ++__j) { - std::uint32_t __count = 0; - std::uint32_t __j = 0; - for (; __j < __thread_specific_storage.size() && __count <= __i; ++__j) + // Only include storage from threads which have instantiated a storage object + if (__thread_specific_storage[__j]) { - if (__thread_specific_storage[__j]) - { - __count++; - } + __count++; } - // Need to back up one once we have found a valid element - return *__thread_specific_storage[__j - 1]; } + // Need to back up one once we have found a valid storage object + return *__thread_specific_storage[__j - 1]; } _StorageType& - get() + get_for_current_thread() { std::uint32_t __i = omp_get_thread_num(); if (!__thread_specific_storage[__i]) { + // create temporary storage on first usage to avoid extra parallel region and unnecessary instantiation __thread_specific_storage[__i] = __construct_helper->construct(); __num_elements.fetch_add(1); } diff --git a/include/oneapi/dpl/pstl/parallel_backend_serial.h b/include/oneapi/dpl/pstl/parallel_backend_serial.h index 67db9e5114..4795736fde 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_serial.h +++ b/include/oneapi/dpl/pstl/parallel_backend_serial.h @@ -57,7 +57,7 @@ struct __thread_enumerable_storage } _StorageType& - get() + get_for_current_thread() { return __storage; } diff --git a/include/oneapi/dpl/pstl/parallel_backend_tbb.h b/include/oneapi/dpl/pstl/parallel_backend_tbb.h index 8dc470fd78..de36b97d1e 100644 --- a/include/oneapi/dpl/pstl/parallel_backend_tbb.h +++ b/include/oneapi/dpl/pstl/parallel_backend_tbb.h @@ -1322,7 +1322,7 @@ struct __thread_enumerable_storage } _StorageType& - get() + get_for_current_thread() { return __thread_specific_storage.local(); }