STEllAR-GROUP · hkaiser · Aug 7, 2023 · Aug 23, 2023 · Apr 11, 2024
@@ -35,7 +35,7 @@ std::uint64_t averageout_plain_for(std::size_t vector_size)
     std::iota(
         std::begin(data_representation), std::end(data_representation), gen());
 
-    std::uint64_t start = hpx::chrono::high_resolution_clock::now();
+    std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
 
     // average out 100 executions to avoid varying results
     for (auto i = 0; i < test_count; i++)
@@ -52,7 +52,7 @@ std::uint64_t averageout_plain_for_iter(std::size_t vector_size)
     std::iota(
         std::begin(data_representation), std::end(data_representation), gen());
 
-    std::uint64_t start = hpx::chrono::high_resolution_clock::now();
+    std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
 
     // average out 100 executions to avoid varying results
     for (auto i = 0; i < test_count; i++)
@@ -72,7 +72,7 @@ std::uint64_t averageout_parallel_foreach(
     std::iota(
         std::begin(data_representation), std::end(data_representation), gen());
 
-    std::uint64_t start = hpx::chrono::high_resolution_clock::now();
+    std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
 
     // average out 100 executions to avoid varying results
     for (auto i = 0; i < test_count; i++)
@@ -92,7 +92,7 @@ std::uint64_t averageout_task_foreach(std::size_t vector_size, Executor&& exec)
 
     if (num_overlapping_loops <= 0)
     {
-        std::uint64_t start = hpx::chrono::high_resolution_clock::now();
+        std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
 
         for (auto i = 0; i < test_count; i++)
             measure_task_foreach(data_representation, exec).wait();
@@ -103,7 +103,7 @@ std::uint64_t averageout_task_foreach(std::size_t vector_size, Executor&& exec)
     std::vector<hpx::shared_future<void>> tests;
     tests.resize(num_overlapping_loops);
 
-    std::uint64_t start = hpx::chrono::high_resolution_clock::now();
+    std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
 
     for (auto i = 0; i < test_count; i++)
     {
@@ -124,7 +124,7 @@ std::uint64_t averageout_sequential_foreach(std::size_t vector_size)
     std::iota(
         std::begin(data_representation), std::end(data_representation), gen());
 
-    std::uint64_t start = hpx::chrono::high_resolution_clock::now();
+    std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
 
     // average out 100 executions to avoid varying results
     for (auto i = 0; i < test_count; i++)
@@ -142,7 +142,7 @@ std::uint64_t averageout_parallel_forloop(
     std::iota(
         std::begin(data_representation), std::end(data_representation), gen());
 
-    std::uint64_t start = hpx::chrono::high_resolution_clock::now();
+    std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
 
     // average out 100 executions to avoid varying results
     for (auto i = 0; i < test_count; i++)
@@ -167,7 +167,7 @@ std::uint64_t averageout_task_forloop(std::size_t vector_size, Executor&& exec)
 
     if (num_overlapping_loops <= 0)
     {
-        std::uint64_t start = hpx::chrono::high_resolution_clock::now();
+        std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
 
         for (auto i = 0; i < test_count; i++)
             measure_task_forloop(data_representation, exec).wait();
@@ -178,7 +178,7 @@ std::uint64_t averageout_task_forloop(std::size_t vector_size, Executor&& exec)
     std::vector<hpx::shared_future<void>> tests;
     tests.resize(num_overlapping_loops);
 
-    std::uint64_t start = hpx::chrono::high_resolution_clock::now();
+    std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
 
     for (auto i = 0; i < test_count; i++)
     {
@@ -199,7 +199,7 @@ std::uint64_t averageout_sequential_forloop(std::size_t vector_size)
     std::iota(
         std::begin(data_representation), std::end(data_representation), gen());
 
-    std::uint64_t start = hpx::chrono::high_resolution_clock::now();
+    std::uint64_t const start = hpx::chrono::high_resolution_clock::now();
 
     // average out 100 executions to avoid varying results
     for (auto i = 0; i < test_count; i++)
@@ -212,8 +212,8 @@ std::uint64_t averageout_sequential_forloop(std::size_t vector_size)
 int hpx_main(hpx::program_options::variables_map& vm)
 {
     // pull values from cmd
-    std::size_t vector_size = vm["vector_size"].as<std::size_t>();
-    bool csvoutput = vm.count("csv_output") != 0;
+    std::size_t const vector_size = vm["vector_size"].as<std::size_t>();
+    bool const csvoutput = vm.count("csv_output") != 0;
     delay = vm["work_delay"].as<int>();
     test_count = vm["test_count"].as<int>();
     chunk_size = vm["chunk_size"].as<int>();
@@ -264,8 +264,8 @@ int hpx_main(hpx::program_options::variables_map& vm)
         std::uint64_t task_time_forloop = 0;
         std::uint64_t seq_time_forloop = 0;
 
-        std::uint64_t plain_time_for = averageout_plain_for(vector_size);
-        std::uint64_t plain_time_for_iter =
+        std::uint64_t const plain_time_for = averageout_plain_for(vector_size);
+        std::uint64_t const plain_time_for_iter =
             averageout_plain_for_iter(vector_size);
 
         if (vm["executor"].as<std::string>() == "forkjoin")
@@ -467,11 +467,15 @@ int hpx_main(hpx::program_options::variables_map& vm)
                       << std::left
                       << "Parallel Scale                    : " << std::right
                       << std::setw(8)
-                      << (double(seq_time_foreach) / par_time_foreach) << "\n"
+                      << (static_cast<double>(seq_time_foreach) /
+                             par_time_foreach)
+                      << "\n"
                       << std::left
                       << "Task Scale                        : " << std::right
                       << std::setw(8)
-                      << (double(seq_time_foreach) / task_time_foreach) << "\n"
+                      << (static_cast<double>(seq_time_foreach) /
+                             task_time_foreach)
+                      << "\n"
                       << std::flush;
 
             std::cout << "-------------Average-(for_loop)----------------\n"
@@ -490,11 +494,15 @@ int hpx_main(hpx::program_options::variables_map& vm)
                       << std::left
                       << "Parallel Scale                    : " << std::right
                       << std::setw(8)
-                      << (double(seq_time_forloop) / par_time_forloop) << "\n"
+                      << (static_cast<double>(seq_time_forloop) /
+                             par_time_forloop)
+                      << "\n"
                       << std::left
                       << "Task Scale                        : " << std::right
                       << std::setw(8)
-                      << (double(seq_time_forloop) / task_time_forloop) << "\n";
+                      << (static_cast<double>(seq_time_forloop) /
+                             task_time_forloop)
+                      << "\n";
         }
     }
 

@@ -715,7 +715,7 @@ void test_sorted_until3_seq()
     std::iota(std::begin(c1), std::end(c1), 0);
     std::iota(std::begin(c2), std::end(c2), 0);
 
-    auto until1 =
+    auto const until1 =
         hpx::ranges::is_sorted_until(c1, std::less<int>(), [&](int x) {
             if (x == 0)
             {
@@ -730,7 +730,7 @@ void test_sorted_until3_seq()
                 return x;
             }
         });
-    auto until2 =
+    auto const until2 =
         hpx::ranges::is_sorted_until(c2, std::less<int>(), [&](int x) {
             if (x == static_cast<int>(c2.size()) / 3 ||
                 x == 2 * static_cast<int>(c2.size()) / 3)
@@ -743,8 +743,8 @@ void test_sorted_until3_seq()
             }
         });
 
-    auto test_index1 = std::begin(c1) + 1;
-    auto test_index2 = std::begin(c2) + c2.size() / 3;
+    auto const test_index1 = std::begin(c1) + 1;
+    auto const test_index2 = std::begin(c2) + c2.size() / 3;
 
     HPX_TEST(until1 == test_index1);
     HPX_TEST(until2 == test_index2);

@@ -32,7 +32,7 @@ namespace hpx::compute::host {
             {
             }
 
-            explicit native_handle_type(hpx::threads::mask_type mask)
+            explicit native_handle_type(hpx::threads::mask_type const& mask)
               : mask_(mask)
             {
             }
@@ -56,7 +56,7 @@ namespace hpx::compute::host {
         target() = default;
 
         // Constructs target from a given mask of processing units
-        explicit target(hpx::threads::mask_type mask)
+        explicit target(hpx::threads::mask_type const& mask)
           : handle_(mask)
         {
         }

@@ -27,16 +27,26 @@ namespace hpx::compute::host {
         hpx::threads::mask_type const mask = native_handle().get_device();
         std::size_t const mask_size = hpx::threads::mask_size(mask);
 
+        bool found_one = false;
+
         std::size_t num_thread = 0;
         for (/**/; num_thread != num_os_threads; ++num_thread)
         {
             if (hpx::threads::bit_and(
                     mask, rp.get_pu_mask(num_thread), mask_size))
             {
+                found_one = true;
                 break;
             }
         }
-        return std::make_pair(num_thread, hpx::threads::count(mask));
+
+        if (!found_one)
+        {
+            return std::make_pair(static_cast<std::size_t>(-1), 0);
+        }
+
+        return std::make_pair(
+            num_thread, (std::min)(num_os_threads, hpx::threads::count(mask)));
     }
 
     void target::serialize(serialization::input_archive& ar, unsigned int)

@@ -206,15 +206,15 @@ namespace hpx::threads {
         /// local thread number associated with this hint. Local thread numbers
         /// are indexed from zero. It is up to the scheduler to decide how to
         /// interpret thread numbers that are larger than the number of threads
-        /// available to the scheduler. Typically thread numbers will wrap
+        /// available to the scheduler. Typically, thread numbers will wrap
         /// around when too large.
         thread = 1,
 
         /// A hint that tells the scheduler to prefer scheduling a task on the
         /// NUMA domain associated with this hint. NUMA domains are indexed from
         /// zero. It is up to the scheduler to decide how to interpret NUMA
         /// domain indices that are larger than the number of available NUMA
-        /// domains to the scheduler. Typically indices will wrap around when
+        /// domains to the scheduler. Typically, indices will wrap around when
         /// too large.
         numa = 2,
     };
@@ -295,7 +295,7 @@ namespace hpx::threads {
     }
 
     ///////////////////////////////////////////////////////////////////////////
-    /// \enum thread_placement_hint
+    /// \enum thread_execution_hint
     ///
     /// The type of hint given to the scheduler related running a thread as a
     /// child directly in the context of the parent thread