Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding hierarchical operation to index_queue spawning #6318

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 26 additions & 18 deletions libs/core/algorithms/tests/performance/foreach_scaling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ std::uint64_t averageout_plain_for(std::size_t vector_size)
std::iota(
std::begin(data_representation), std::end(data_representation), gen());

std::uint64_t start = hpx::chrono::high_resolution_clock::now();
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();

// average out 100 executions to avoid varying results
for (auto i = 0; i < test_count; i++)
Expand All @@ -52,7 +52,7 @@ std::uint64_t averageout_plain_for_iter(std::size_t vector_size)
std::iota(
std::begin(data_representation), std::end(data_representation), gen());

std::uint64_t start = hpx::chrono::high_resolution_clock::now();
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();

// average out 100 executions to avoid varying results
for (auto i = 0; i < test_count; i++)
Expand All @@ -72,7 +72,7 @@ std::uint64_t averageout_parallel_foreach(
std::iota(
std::begin(data_representation), std::end(data_representation), gen());

std::uint64_t start = hpx::chrono::high_resolution_clock::now();
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();

// average out 100 executions to avoid varying results
for (auto i = 0; i < test_count; i++)
Expand All @@ -92,7 +92,7 @@ std::uint64_t averageout_task_foreach(std::size_t vector_size, Executor&& exec)

if (num_overlapping_loops <= 0)
{
std::uint64_t start = hpx::chrono::high_resolution_clock::now();
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();

for (auto i = 0; i < test_count; i++)
measure_task_foreach(data_representation, exec).wait();
Expand All @@ -103,7 +103,7 @@ std::uint64_t averageout_task_foreach(std::size_t vector_size, Executor&& exec)
std::vector<hpx::shared_future<void>> tests;
tests.resize(num_overlapping_loops);

std::uint64_t start = hpx::chrono::high_resolution_clock::now();
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();

for (auto i = 0; i < test_count; i++)
{
Expand All @@ -124,7 +124,7 @@ std::uint64_t averageout_sequential_foreach(std::size_t vector_size)
std::iota(
std::begin(data_representation), std::end(data_representation), gen());

std::uint64_t start = hpx::chrono::high_resolution_clock::now();
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();

// average out 100 executions to avoid varying results
for (auto i = 0; i < test_count; i++)
Expand All @@ -142,7 +142,7 @@ std::uint64_t averageout_parallel_forloop(
std::iota(
std::begin(data_representation), std::end(data_representation), gen());

std::uint64_t start = hpx::chrono::high_resolution_clock::now();
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();

// average out 100 executions to avoid varying results
for (auto i = 0; i < test_count; i++)
Expand All @@ -167,7 +167,7 @@ std::uint64_t averageout_task_forloop(std::size_t vector_size, Executor&& exec)

if (num_overlapping_loops <= 0)
{
std::uint64_t start = hpx::chrono::high_resolution_clock::now();
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();

for (auto i = 0; i < test_count; i++)
measure_task_forloop(data_representation, exec).wait();
Expand All @@ -178,7 +178,7 @@ std::uint64_t averageout_task_forloop(std::size_t vector_size, Executor&& exec)
std::vector<hpx::shared_future<void>> tests;
tests.resize(num_overlapping_loops);

std::uint64_t start = hpx::chrono::high_resolution_clock::now();
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();

for (auto i = 0; i < test_count; i++)
{
Expand All @@ -199,7 +199,7 @@ std::uint64_t averageout_sequential_forloop(std::size_t vector_size)
std::iota(
std::begin(data_representation), std::end(data_representation), gen());

std::uint64_t start = hpx::chrono::high_resolution_clock::now();
std::uint64_t const start = hpx::chrono::high_resolution_clock::now();

// average out 100 executions to avoid varying results
for (auto i = 0; i < test_count; i++)
Expand All @@ -212,8 +212,8 @@ std::uint64_t averageout_sequential_forloop(std::size_t vector_size)
int hpx_main(hpx::program_options::variables_map& vm)
{
// pull values from cmd
std::size_t vector_size = vm["vector_size"].as<std::size_t>();
bool csvoutput = vm.count("csv_output") != 0;
std::size_t const vector_size = vm["vector_size"].as<std::size_t>();
bool const csvoutput = vm.count("csv_output") != 0;
delay = vm["work_delay"].as<int>();
test_count = vm["test_count"].as<int>();
chunk_size = vm["chunk_size"].as<int>();
Expand Down Expand Up @@ -264,8 +264,8 @@ int hpx_main(hpx::program_options::variables_map& vm)
std::uint64_t task_time_forloop = 0;
std::uint64_t seq_time_forloop = 0;

std::uint64_t plain_time_for = averageout_plain_for(vector_size);
std::uint64_t plain_time_for_iter =
std::uint64_t const plain_time_for = averageout_plain_for(vector_size);
std::uint64_t const plain_time_for_iter =
averageout_plain_for_iter(vector_size);

if (vm["executor"].as<std::string>() == "forkjoin")
Expand Down Expand Up @@ -467,11 +467,15 @@ int hpx_main(hpx::program_options::variables_map& vm)
<< std::left
<< "Parallel Scale : " << std::right
<< std::setw(8)
<< (double(seq_time_foreach) / par_time_foreach) << "\n"
<< (static_cast<double>(seq_time_foreach) /
par_time_foreach)
<< "\n"
<< std::left
<< "Task Scale : " << std::right
<< std::setw(8)
<< (double(seq_time_foreach) / task_time_foreach) << "\n"
<< (static_cast<double>(seq_time_foreach) /
task_time_foreach)
<< "\n"
<< std::flush;

std::cout << "-------------Average-(for_loop)----------------\n"
Expand All @@ -490,11 +494,15 @@ int hpx_main(hpx::program_options::variables_map& vm)
<< std::left
<< "Parallel Scale : " << std::right
<< std::setw(8)
<< (double(seq_time_forloop) / par_time_forloop) << "\n"
<< (static_cast<double>(seq_time_forloop) /
par_time_forloop)
<< "\n"
<< std::left
<< "Task Scale : " << std::right
<< std::setw(8)
<< (double(seq_time_forloop) / task_time_forloop) << "\n";
<< (static_cast<double>(seq_time_forloop) /
task_time_forloop)
<< "\n";
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -715,7 +715,7 @@ void test_sorted_until3_seq()
std::iota(std::begin(c1), std::end(c1), 0);
std::iota(std::begin(c2), std::end(c2), 0);

auto until1 =
auto const until1 =
hpx::ranges::is_sorted_until(c1, std::less<int>(), [&](int x) {
if (x == 0)
{
Expand All @@ -730,7 +730,7 @@ void test_sorted_until3_seq()
return x;
}
});
auto until2 =
auto const until2 =
hpx::ranges::is_sorted_until(c2, std::less<int>(), [&](int x) {
if (x == static_cast<int>(c2.size()) / 3 ||
x == 2 * static_cast<int>(c2.size()) / 3)
Expand All @@ -743,8 +743,8 @@ void test_sorted_until3_seq()
}
});

auto test_index1 = std::begin(c1) + 1;
auto test_index2 = std::begin(c2) + c2.size() / 3;
auto const test_index1 = std::begin(c1) + 1;
auto const test_index2 = std::begin(c2) + c2.size() / 3;

HPX_TEST(until1 == test_index1);
HPX_TEST(until2 == test_index2);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ namespace hpx::compute::host {
{
}

explicit native_handle_type(hpx::threads::mask_type mask)
explicit native_handle_type(hpx::threads::mask_type const& mask)
: mask_(mask)
{
}
Expand All @@ -56,7 +56,7 @@ namespace hpx::compute::host {
target() = default;

// Constructs target from a given mask of processing units
explicit target(hpx::threads::mask_type mask)
explicit target(hpx::threads::mask_type const& mask)
: handle_(mask)
{
}
Expand Down
12 changes: 11 additions & 1 deletion libs/core/compute_local/src/host_target.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,16 +27,26 @@ namespace hpx::compute::host {
hpx::threads::mask_type const mask = native_handle().get_device();
std::size_t const mask_size = hpx::threads::mask_size(mask);

bool found_one = false;

std::size_t num_thread = 0;
for (/**/; num_thread != num_os_threads; ++num_thread)
{
if (hpx::threads::bit_and(
mask, rp.get_pu_mask(num_thread), mask_size))
{
found_one = true;
break;
}
}
return std::make_pair(num_thread, hpx::threads::count(mask));

if (!found_one)
{
return std::make_pair(static_cast<std::size_t>(-1), 0);
}

return std::make_pair(
num_thread, (std::min)(num_os_threads, hpx::threads::count(mask)));
}

void target::serialize(serialization::input_archive& ar, unsigned int)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -206,15 +206,15 @@ namespace hpx::threads {
/// local thread number associated with this hint. Local thread numbers
/// are indexed from zero. It is up to the scheduler to decide how to
/// interpret thread numbers that are larger than the number of threads
/// available to the scheduler. Typically thread numbers will wrap
/// available to the scheduler. Typically, thread numbers will wrap
/// around when too large.
thread = 1,

/// A hint that tells the scheduler to prefer scheduling a task on the
/// NUMA domain associated with this hint. NUMA domains are indexed from
/// zero. It is up to the scheduler to decide how to interpret NUMA
/// domain indices that are larger than the number of available NUMA
/// domains to the scheduler. Typically indices will wrap around when
/// domains to the scheduler. Typically, indices will wrap around when
/// too large.
numa = 2,
};
Expand Down Expand Up @@ -295,7 +295,7 @@ namespace hpx::threads {
}

///////////////////////////////////////////////////////////////////////////
/// \enum thread_placement_hint
/// \enum thread_execution_hint
///
/// The type of hint given to the scheduler related running a thread as a
/// child directly in the context of the parent thread
Expand Down
Loading
Loading