Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

simd std::remove #6322

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ namespace hpx::parallel::detail {
sequential_find_t<ExPolicy>, Iterator first, Sentinel last,
T const& value, Proj proj = Proj())
{
return util::loop_pred<
std::decay_t<hpx::execution::sequenced_policy>>(
return util::loop_pred<ExPolicy>(
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you need to decay the ExPolicy here (and in other places)?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we really need decay over here? ExPolicy is only being used for deducing if seq or unseq overload should be called. is_unseq<unseq_policy> should be true for both value and reference right?

Also, please note that this is branched from simd-find.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, you tell me. I was just asking the question without closer investigation.

Copy link
Contributor Author

@Johan511 Johan511 Aug 18, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To the best of my knowledge, I don't believe we need to decay. I am unsure why it was being decayed previously. I assume is_unseq<unseq_policy>, is_unseq<unseq_policy&> and is_unseq<unseq_policy&&> all evaluate to true

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hkaiser I have tried checking behaviour with/without std::decay_t. The correct overloads are called in both cases. I am unsure why std::decay_t is being used.

first, last, [&value, &proj](auto const& curr) {
return HPX_INVOKE(proj, *curr) == value;
});
Expand Down
18 changes: 10 additions & 8 deletions libs/core/algorithms/include/hpx/parallel/algorithms/remove.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -247,12 +247,13 @@ namespace hpx::parallel {
namespace detail {

/// \cond NOINTERNAL
template <typename Iter, typename Sent, typename Pred, typename Proj>
template <typename ExPolicy, typename Iter, typename Sent,
typename Pred, typename Proj>
constexpr Iter sequential_remove_if(
Iter first, Sent last, Pred pred, Proj proj)
ExPolicy, Iter first, Sent last, Pred pred, Proj proj)
{
first = hpx::parallel::detail::sequential_find_if<
hpx::execution::sequenced_policy>(first, last, pred, proj);
first = hpx::parallel::detail::sequential_find_if<ExPolicy>(
first, last, pred, proj);

if (first != last)
{
Expand All @@ -275,11 +276,12 @@ namespace hpx::parallel {

template <typename ExPolicy, typename Iter, typename Sent,
typename Pred, typename Proj>
static constexpr Iter sequential(
ExPolicy, Iter first, Sent last, Pred&& pred, Proj&& proj)
static constexpr Iter sequential(ExPolicy&& policy, Iter first,
Sent last, Pred&& pred, Proj&& proj)
{
return sequential_remove_if(first, last,
HPX_FORWARD(Pred, pred), HPX_FORWARD(Proj, proj));
return sequential_remove_if(HPX_FORWARD(ExPolicy, policy),
first, last, HPX_FORWARD(Pred, pred),
HPX_FORWARD(Proj, proj));
}

template <typename ExPolicy, typename Iter, typename Sent,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include <type_traits>
#include <utility>

// Please use static assert and enforce Iter to be Random Access Iterator
namespace hpx::parallel::util {
/*
Compiler and Hardware should also support vector operations for IterDiff,
Expand All @@ -42,7 +41,7 @@ namespace hpx::parallel::util {
HPX_PRAGMA_VECTOR_UNALIGNED HPX_PRAGMA_SIMD_EARLYEXIT
for (; i < n; ++i)
{
if (f(*(first + i)))
if (f(first + i))
{
break;
}
Expand All @@ -64,7 +63,7 @@ namespace hpx::parallel::util {
HPX_PRAGMA_VECTOR_UNALIGNED HPX_VECTOR_REDUCTION(| : found_flag)
for (IterDiff j = i; j < i + num_blocks; ++j)
{
std::int32_t const t = f(*(first + j));
std::int32_t const t = f(first + j);
simd_lane[j - i] = t;
found_flag |= t;
}
Expand All @@ -88,7 +87,7 @@ namespace hpx::parallel::util {
//Keep remainder scalar
while (i != n)
{
if (f(*(first + i)))
if (f(first + i))
{
break;
}
Expand All @@ -108,7 +107,7 @@ namespace hpx::parallel::util {
// clang-format off
HPX_PRAGMA_VECTOR_UNALIGNED HPX_PRAGMA_SIMD_EARLYEXIT
for (; i < n; ++i)
if (f(*(first1 + i), *(first2 + i)))
if (f(first1 + i, first2 + i))
break;
// clang-format on

Expand All @@ -129,8 +128,8 @@ namespace hpx::parallel::util {
HPX_PRAGMA_VECTOR_UNALIGNED HPX_VECTOR_REDUCTION(| : found_flag)
for (i = 0; i < num_blocks; ++i)
{
IterDiff const t = f(*(first1 + outer_loop_ind + i),
*(first2 + outer_loop_ind + i));
IterDiff const t = f(first1 + outer_loop_ind + i,
first2 + outer_loop_ind + i);
simd_lane[i] = t;
found_flag |= t;
}
Expand All @@ -152,7 +151,7 @@ namespace hpx::parallel::util {

//Keep remainder scalar
for (; outer_loop_ind != n; ++outer_loop_ind)
if (f(*(first1 + outer_loop_ind), *(first2 + outer_loop_ind)))
if (f(first1 + outer_loop_ind, first2 + outer_loop_ind))
break;

return std::make_pair(first1 + outer_loop_ind, first2 + outer_loop_ind);
Expand Down
14 changes: 13 additions & 1 deletion libs/core/algorithms/include/hpx/parallel/util/loop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@

#include <hpx/config.hpp>
#include <hpx/assert.hpp>
#include <hpx/concepts/concepts.hpp>
#include <hpx/datastructures/tuple.hpp>
#include <hpx/execution/traits/is_execution_policy.hpp>
#include <hpx/functional/detail/invoke.hpp>
#include <hpx/functional/detail/tag_fallback_invoke.hpp>
#include <hpx/functional/invoke_result.hpp>
#include <hpx/iterator_support/traits/is_iterator.hpp>
#include <hpx/parallel/unseq/simd_helpers.hpp>
#include <hpx/type_support/identity.hpp>

#include <algorithm>
Expand Down Expand Up @@ -104,7 +106,6 @@ namespace hpx::parallel::util {

///////////////////////////////////////////////////////////////////////////
namespace detail {

// Helper class to repeatedly call a function starting from a given
// iterator position till the predicate returns true.
template <typename Iterator>
Expand Down Expand Up @@ -140,6 +141,17 @@ namespace hpx::parallel::util {
}
};

template <typename Begin, typename End, typename Pred, typename ExPolicy,
HPX_CONCEPT_REQUIRES_(hpx::traits::is_random_access_iterator_v<Begin>&&
hpx::is_unsequenced_execution_policy_v<ExPolicy>)>
HPX_HOST_DEVICE HPX_FORCEINLINE Begin tag_invoke(
hpx::parallel::util::loop_pred_t<ExPolicy>, Begin HPX_RESTRICT begin,
End HPX_RESTRICT end, Pred&& pred)
{
return unseq_first_n(
begin, std::distance(begin, end), HPX_FORWARD(Pred, pred));
}

#if !defined(HPX_COMPUTE_DEVICE_CODE)
template <typename ExPolicy>
inline constexpr loop_pred_t<ExPolicy> loop_pred = loop_pred_t<ExPolicy>{};
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ void test_unseq_first_n1_dispatch2(std::size_t length, std::size_t first_index)
{
first_index = first_index % length;

std::vector<T> v(length, static_cast<T>(false));
std::vector<T> v(length);
std::size_t i = 0;

std::for_each(v.begin(), v.end(), [&](T& t) {
Expand All @@ -36,7 +36,7 @@ void test_unseq_first_n1_dispatch2(std::size_t length, std::size_t first_index)
i++;
});

auto f = [](T t) { return t; };
auto f = [](auto t) { return *t; };

auto iter_test = hpx::parallel::util::unseq_first_n(
v.begin(), static_cast<T>(length), f);
Expand Down Expand Up @@ -80,7 +80,7 @@ void test_unseq_first_n2_dispatch2(std::size_t length, std::size_t first_index)
idx++;
}

auto f = [](T t1, T t2) { return t1 && t2; };
auto f = [](auto t1, auto t2) { return *t1 && *t2; };

auto iter_pair_test = hpx::parallel::util::unseq2_first_n(
v1.begin(), v2.begin(), static_cast<T>(length), f);
Expand Down
Loading