From 9e270714c3b6240340a16f40a2adfd99f61c5ccd Mon Sep 17 00:00:00 2001 From: Matthew Michel Date: Fri, 3 Jan 2025 08:38:16 -0600 Subject: [PATCH 1/3] Hotfix for reduce-by-segment when key types are different For differing key types, we cannot store an input key into a variable that is the output key type. This is important for discard iterators in the output where we are met with a compile-time error if we attempt to do so. Signed-off-by: Matthew Michel --- .../oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h index c0568a312ca..6c972b48295 100644 --- a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h +++ b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h @@ -939,12 +939,10 @@ struct __write_red_by_seg using std::get; auto __out_keys = get<0>(__out_rng.tuple()); auto __out_values = get<1>(__out_rng.tuple()); - using _KeyType = oneapi::dpl::__internal::__value_t; - using _ValType = oneapi::dpl::__internal::__value_t; - const _KeyType& __next_key = get<2>(__tup); - const _KeyType& __current_key = get<3>(__tup); - const _ValType& __current_value = get<1>(get<0>(__tup)); + const auto& __next_key = get<2>(__tup); + const auto& __current_key = get<3>(__tup); + const auto& __current_value = get<1>(get<0>(__tup)); const bool __is_seg_end = get<1>(__tup); const std::size_t __out_idx = get<0>(get<0>(__tup)); From 9132e965cb6a3921db830b16fb8a39d336d10af5 Mon Sep 17 00:00:00 2001 From: Matthew Michel Date: Fri, 3 Jan 2025 10:19:13 -0600 Subject: [PATCH 2/3] Add test case with discard iterator Signed-off-by: Matthew Michel --- .../reduce_by_segment_zip.pass.cpp | 61 +++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/test/parallel_api/numeric/numeric.ops/reduce_by_segment_zip.pass.cpp b/test/parallel_api/numeric/numeric.ops/reduce_by_segment_zip.pass.cpp index dba44019d00..587570113cd 100644 --- a/test/parallel_api/numeric/numeric.ops/reduce_by_segment_zip.pass.cpp +++ b/test/parallel_api/numeric/numeric.ops/reduce_by_segment_zip.pass.cpp @@ -108,6 +108,65 @@ test_with_usm() EXPECT_EQ_N(exp_values1, output_values1, n, "wrong values1 from reduce_by_segment"); EXPECT_EQ_N(exp_values2, output_values2, n, "wrong values2 from reduce_by_segment"); } + +template +void +test_zip_with_discard() +{ + constexpr sycl::usm::alloc alloc_type = sycl::usm::alloc::device; + sycl::queue q = TestUtils::get_test_queue(); + + constexpr int n = 5; + + //data initialization + int keys1[n] = {1, 1, 2, 2, 3}; + int keys2[n] = {1, 1, 2, 2, 3}; + int values1[n] = {1, 1, 1, 1, 1}; + int values2[n] = {2, 2, 2, 2, 2}; + int output_keys[n] = {}; + int output_values1[n] = {}; + int output_values2[n] = {}; + + // allocate USM memory and copying data to USM shared/device memory + TestUtils::usm_data_transfer dt_helper1(q, keys1, n); + TestUtils::usm_data_transfer dt_helper2(q, keys2, n); + TestUtils::usm_data_transfer dt_helper3(q, values1, n); + TestUtils::usm_data_transfer dt_helper4(q, values2, n); + TestUtils::usm_data_transfer dt_helper5(q, output_keys, n); + TestUtils::usm_data_transfer dt_helper6(q, output_values1, n); + TestUtils::usm_data_transfer dt_helper7(q, output_values2, n); + auto d_keys1 = dt_helper1.get_data(); + auto d_keys2 = dt_helper2.get_data(); + auto d_values1 = dt_helper3.get_data(); + auto d_values2 = dt_helper4.get_data(); + auto d_output_keys = dt_helper5.get_data(); + auto d_output_values1 = dt_helper6.get_data(); + auto d_output_values2 = dt_helper7.get_data(); + + //make zip iterators + auto begin_keys_in = oneapi::dpl::make_zip_iterator(d_keys1, d_keys2); + auto end_keys_in = oneapi::dpl::make_zip_iterator(d_keys1 + n, d_keys2 + n); + auto begin_vals_in = oneapi::dpl::make_zip_iterator(d_values1, d_values2); + auto begin_keys_out = oneapi::dpl::make_zip_iterator(d_output_keys, oneapi::dpl::discard_iterator()); + auto begin_vals_out = oneapi::dpl::make_zip_iterator(d_output_values1, d_output_values2); + + //run reduce_by_segment algorithm + auto new_last = oneapi::dpl::reduce_by_segment(TestUtils::make_device_policy(q), begin_keys_in, + end_keys_in, begin_vals_in, begin_keys_out, begin_vals_out, + std::equal_to<>(), TestUtils::TupleAddFunctor()); + + //retrieve result on the host and check the result + dt_helper5.retrieve_data(output_keys); + dt_helper6.retrieve_data(output_values1); + dt_helper7.retrieve_data(output_values2); + + const int exp_keys[n] = {1, 2, 3}; + const int exp_values1[n] = {2, 2, 1}; + const int exp_values2[n] = {4, 4, 2}; + EXPECT_EQ_N(exp_keys, output_keys, n, "wrong keys from reduce_by_segment"); + EXPECT_EQ_N(exp_values1, output_values1, n, "wrong values1 from reduce_by_segment"); + EXPECT_EQ_N(exp_values2, output_values2, n, "wrong values2 from reduce_by_segment"); +} #endif //The code below for test a call of reduce_by_segment with zip iterators was kept "as is", as an example reported by a user; just "memory deallocation" added. @@ -118,6 +177,8 @@ int main() test_with_usm(); // Run tests for USM device memory test_with_usm(); + + test_zip_with_discard(); #endif return TestUtils::done(TEST_DPCPP_BACKEND_PRESENT); From dbdfe64a96cb646000aa4e91e257777b9fce345b Mon Sep 17 00:00:00 2001 From: Matthew Michel Date: Fri, 3 Jan 2025 13:10:54 -0600 Subject: [PATCH 3/3] Add a discard iterator for one of the zipped output values Signed-off-by: Matthew Michel --- .../reduce_by_segment_zip.pass.cpp | 20 +++++++------------ 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/test/parallel_api/numeric/numeric.ops/reduce_by_segment_zip.pass.cpp b/test/parallel_api/numeric/numeric.ops/reduce_by_segment_zip.pass.cpp index 587570113cd..3670eb3a910 100644 --- a/test/parallel_api/numeric/numeric.ops/reduce_by_segment_zip.pass.cpp +++ b/test/parallel_api/numeric/numeric.ops/reduce_by_segment_zip.pass.cpp @@ -124,8 +124,7 @@ test_zip_with_discard() int values1[n] = {1, 1, 1, 1, 1}; int values2[n] = {2, 2, 2, 2, 2}; int output_keys[n] = {}; - int output_values1[n] = {}; - int output_values2[n] = {}; + int output_values[n] = {}; // allocate USM memory and copying data to USM shared/device memory TestUtils::usm_data_transfer dt_helper1(q, keys1, n); @@ -133,22 +132,20 @@ test_zip_with_discard() TestUtils::usm_data_transfer dt_helper3(q, values1, n); TestUtils::usm_data_transfer dt_helper4(q, values2, n); TestUtils::usm_data_transfer dt_helper5(q, output_keys, n); - TestUtils::usm_data_transfer dt_helper6(q, output_values1, n); - TestUtils::usm_data_transfer dt_helper7(q, output_values2, n); + TestUtils::usm_data_transfer dt_helper6(q, output_values, n); auto d_keys1 = dt_helper1.get_data(); auto d_keys2 = dt_helper2.get_data(); auto d_values1 = dt_helper3.get_data(); auto d_values2 = dt_helper4.get_data(); auto d_output_keys = dt_helper5.get_data(); - auto d_output_values1 = dt_helper6.get_data(); - auto d_output_values2 = dt_helper7.get_data(); + auto d_output_values = dt_helper6.get_data(); //make zip iterators auto begin_keys_in = oneapi::dpl::make_zip_iterator(d_keys1, d_keys2); auto end_keys_in = oneapi::dpl::make_zip_iterator(d_keys1 + n, d_keys2 + n); auto begin_vals_in = oneapi::dpl::make_zip_iterator(d_values1, d_values2); auto begin_keys_out = oneapi::dpl::make_zip_iterator(d_output_keys, oneapi::dpl::discard_iterator()); - auto begin_vals_out = oneapi::dpl::make_zip_iterator(d_output_values1, d_output_values2); + auto begin_vals_out = oneapi::dpl::make_zip_iterator(oneapi::dpl::discard_iterator(), d_output_values); //run reduce_by_segment algorithm auto new_last = oneapi::dpl::reduce_by_segment(TestUtils::make_device_policy(q), begin_keys_in, @@ -157,15 +154,12 @@ test_zip_with_discard() //retrieve result on the host and check the result dt_helper5.retrieve_data(output_keys); - dt_helper6.retrieve_data(output_values1); - dt_helper7.retrieve_data(output_values2); + dt_helper6.retrieve_data(output_values); const int exp_keys[n] = {1, 2, 3}; - const int exp_values1[n] = {2, 2, 1}; - const int exp_values2[n] = {4, 4, 2}; + const int exp_values[n] = {4, 4, 2}; EXPECT_EQ_N(exp_keys, output_keys, n, "wrong keys from reduce_by_segment"); - EXPECT_EQ_N(exp_values1, output_values1, n, "wrong values1 from reduce_by_segment"); - EXPECT_EQ_N(exp_values2, output_values2, n, "wrong values2 from reduce_by_segment"); + EXPECT_EQ_N(exp_values, output_values, n, "wrong values from reduce_by_segment"); } #endif