Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Unseq first bench #6325

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
8 changes: 6 additions & 2 deletions .jenkins/lsu-perftests/launch_perftests.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@ set -ex
hpx_targets=(
"foreach_report_test"
"future_overhead_report_test"
"stream_report_test")
"stream_report_test"
"unseq_first_n_report_test")
hpx_test_options=(
"--hpx:ini=hpx.thread_queue.init_threads_count=100 \
--hpx:threads=4 --vector_size=104857 --work_delay=1 \
Expand All @@ -22,7 +23,10 @@ hpx_test_options=(
--repetitions=40 --futures=207270"
"--hpx:ini=hpx.thread_queue.init_threads_count=100 \
--vector_size=518176 --hpx:threads=4 --iterations=200 \
--warmup_iterations=20")
--warmup_iterations=20"
"--hpx:ini=hpx.thread_queue.init_threads_count=100 \
--hpx:threads=4 --vector_size=1048572 --work_delay=1 \
--chunk_size=0 --test_count=200")


# "--hpx:ini=hpx.thread_queue.init_threads_count=100 \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,7 @@ namespace hpx::parallel::detail {
sequential_find_t<ExPolicy>, Iterator first, Sentinel last,
T const& value, Proj proj = Proj())
{
return util::loop_pred<
std::decay_t<hpx::execution::sequenced_policy>>(
return util::loop_pred<ExPolicy>(
first, last, [&value, &proj](auto const& curr) {
return HPX_INVOKE(proj, *curr) == value;
});
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include <type_traits>
#include <utility>

// Please use static assert and enforce Iter to be Random Access Iterator
namespace hpx::parallel::util {
/*
Compiler and Hardware should also support vector operations for IterDiff,
Expand All @@ -42,7 +41,7 @@ namespace hpx::parallel::util {
HPX_PRAGMA_VECTOR_UNALIGNED HPX_PRAGMA_SIMD_EARLYEXIT
for (; i < n; ++i)
{
if (f(*(first + i)))
if (f(first + i))
{
break;
}
Expand All @@ -64,7 +63,7 @@ namespace hpx::parallel::util {
HPX_PRAGMA_VECTOR_UNALIGNED HPX_VECTOR_REDUCTION(| : found_flag)
for (IterDiff j = i; j < i + num_blocks; ++j)
{
std::int32_t const t = f(*(first + j));
std::int32_t const t = f(first + j);
simd_lane[j - i] = t;
found_flag |= t;
}
Expand All @@ -88,7 +87,7 @@ namespace hpx::parallel::util {
//Keep remainder scalar
while (i != n)
{
if (f(*(first + i)))
if (f(first + i))
{
break;
}
Expand All @@ -108,7 +107,7 @@ namespace hpx::parallel::util {
// clang-format off
HPX_PRAGMA_VECTOR_UNALIGNED HPX_PRAGMA_SIMD_EARLYEXIT
for (; i < n; ++i)
if (f(*(first1 + i), *(first2 + i)))
if (f(first1 + i, first2 + i))
break;
// clang-format on

Expand All @@ -129,8 +128,8 @@ namespace hpx::parallel::util {
HPX_PRAGMA_VECTOR_UNALIGNED HPX_VECTOR_REDUCTION(| : found_flag)
for (i = 0; i < num_blocks; ++i)
{
IterDiff const t = f(*(first1 + outer_loop_ind + i),
*(first2 + outer_loop_ind + i));
IterDiff const t = f(first1 + outer_loop_ind + i,
first2 + outer_loop_ind + i);
simd_lane[i] = t;
found_flag |= t;
}
Expand All @@ -152,7 +151,7 @@ namespace hpx::parallel::util {

//Keep remainder scalar
for (; outer_loop_ind != n; ++outer_loop_ind)
if (f(*(first1 + outer_loop_ind), *(first2 + outer_loop_ind)))
if (f(first1 + outer_loop_ind, first2 + outer_loop_ind))
break;

return std::make_pair(first1 + outer_loop_ind, first2 + outer_loop_ind);
Expand Down
14 changes: 13 additions & 1 deletion libs/core/algorithms/include/hpx/parallel/util/loop.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,14 @@

#include <hpx/config.hpp>
#include <hpx/assert.hpp>
#include <hpx/concepts/concepts.hpp>
#include <hpx/datastructures/tuple.hpp>
#include <hpx/execution/traits/is_execution_policy.hpp>
#include <hpx/functional/detail/invoke.hpp>
#include <hpx/functional/detail/tag_fallback_invoke.hpp>
#include <hpx/functional/invoke_result.hpp>
#include <hpx/iterator_support/traits/is_iterator.hpp>
#include <hpx/parallel/unseq/simd_helpers.hpp>
#include <hpx/type_support/identity.hpp>

#include <algorithm>
Expand Down Expand Up @@ -104,7 +106,6 @@ namespace hpx::parallel::util {

///////////////////////////////////////////////////////////////////////////
namespace detail {

// Helper class to repeatedly call a function starting from a given
// iterator position till the predicate returns true.
template <typename Iterator>
Expand Down Expand Up @@ -140,6 +141,17 @@ namespace hpx::parallel::util {
}
};

template <typename Begin, typename End, typename Pred, typename ExPolicy,
HPX_CONCEPT_REQUIRES_(hpx::traits::is_random_access_iterator_v<Begin>&&
hpx::is_unsequenced_execution_policy_v<ExPolicy>)>
HPX_HOST_DEVICE HPX_FORCEINLINE Begin tag_invoke(
hpx::parallel::util::loop_pred_t<ExPolicy>, Begin HPX_RESTRICT begin,
End HPX_RESTRICT end, Pred&& pred)
{
return unseq_first_n(
begin, std::distance(begin, end), HPX_FORWARD(Pred, pred));
}

#if !defined(HPX_COMPUTE_DEVICE_CODE)
template <typename ExPolicy>
inline constexpr loop_pred_t<ExPolicy> loop_pred = loop_pred_t<ExPolicy>{};
Expand Down
1 change: 1 addition & 0 deletions libs/core/algorithms/tests/performance/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ set(benchmarks
benchmark_unique
benchmark_unique_copy
foreach_report
unseq_first_n_report
foreach_scaling
transform_reduce_scaling
)
Expand Down
100 changes: 100 additions & 0 deletions libs/core/algorithms/tests/performance/unseq_first_n_report.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
// Copyright (c) 2021 ETH Zurich
// Copyright (c) 2021-2022 Hartmut Kaiser
// Copyright (c) 2014 Grant Mercer
//
// SPDX-License-Identifier: BSL-1.0
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <hpx/config.hpp>

#if !defined(HPX_COMPUTE_DEVICE_CODE)
#include <hpx/algorithm.hpp>
#include <hpx/chrono.hpp>
#include <hpx/compute.hpp>
#include <hpx/execution.hpp>
#include <hpx/init.hpp>
#include <hpx/modules/testing.hpp>

#include <cstddef>
#include <cstdint>
#include <iomanip>
#include <iostream>
#include <iterator>
#include <memory>
#include <numeric>
#include <random>
#include <stdexcept>
#include <string>
#include <type_traits>
#include <vector>

int test_count = 100;

void unseq_first_n_benchmark(
std::vector<std::size_t> const& data_representation)
{
hpx::parallel::util::unseq_first_n(data_representation.begin(),
data_representation.size(), [](auto it) { return *it == 1; });
}

///////////////////////////////////////////////////////////////////////////////
int hpx_main(hpx::program_options::variables_map& vm)
{
std::size_t vector_size = vm["vector_size"].as<std::size_t>();
test_count = vm["test_count"].as<int>();

// verify that input is within domain of program
if (test_count == 0 || test_count < 0)
{
std::cerr << "test_count cannot be zero or negative...\n" << std::flush;
hpx::local::finalize();
return -1;
}

{
std::vector<std::size_t> data_representation(vector_size);
data_representation[data_representation.size() / 2] = 1;

{
hpx::util::perftests_report("unseq_first_n", "scheduler_executor",
test_count, [&data_representation]() {
unseq_first_n_benchmark(data_representation);
});
}

hpx::util::perftests_print_times();
}

return hpx::local::finalize();
}

///////////////////////////////////////////////////////////////////////////////
int main(int argc, char* argv[])
{
using namespace hpx::program_options;

options_description cmdline("usage: " HPX_APPLICATION_STRING " [options]");

// clang-format off
cmdline.add_options()
("vector_size", value<std::size_t>()->default_value(1000),
"size of vector")
("work_delay", value<int>()->default_value(1),
"loop delay per element in nanoseconds")
("test_count", value<int>()->default_value(100),
"number of tests to be averaged")
("chunk_size", value<int>()->default_value(0),
"number of iterations to combine while parallelization")
("disable_stealing", "disable thread stealing")
("fast_idle_mode", "enable fast idle mode")
;
// clang-format on

hpx::local::init_params init_args;
init_args.desc_cmdline = cmdline;
init_args.cfg = {"hpx.os_threads=all"};

return hpx::local::init(hpx_main, argc, argv, init_args);
}
#endif
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ void test_unseq_first_n1_dispatch2(std::size_t length, std::size_t first_index)
{
first_index = first_index % length;

std::vector<T> v(length, static_cast<T>(false));
std::vector<T> v(length);
std::size_t i = 0;

std::for_each(v.begin(), v.end(), [&](T& t) {
Expand All @@ -36,7 +36,7 @@ void test_unseq_first_n1_dispatch2(std::size_t length, std::size_t first_index)
i++;
});

auto f = [](T t) { return t; };
auto f = [](auto t) { return *t; };

auto iter_test = hpx::parallel::util::unseq_first_n(
v.begin(), static_cast<T>(length), f);
Expand Down Expand Up @@ -80,7 +80,7 @@ void test_unseq_first_n2_dispatch2(std::size_t length, std::size_t first_index)
idx++;
}

auto f = [](T t1, T t2) { return t1 && t2; };
auto f = [](auto t1, auto t2) { return *t1 && *t2; };

auto iter_pair_test = hpx::parallel::util::unseq2_first_n(
v1.begin(), v2.begin(), static_cast<T>(length), f);
Expand Down
2 changes: 1 addition & 1 deletion tools/perftests_ci/local_run-lsu.sh
100644 → 100755
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

# To execute from the build directory

src_dir=${1:-~/projects/hpx}
src_dir=${1:-~/hpx}
build_dir=$PWD

# Clean old artifacts if any
Expand Down
Loading