uxlfoundation · mmichel11 · Jan 3, 2025 · Jan 3, 2025 · Jan 3, 2025 · Jan 3, 2025
diff --git a/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h b/include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h
@@ -939,12 +939,10 @@ struct __write_red_by_seg
         using std::get;
         auto __out_keys = get<0>(__out_rng.tuple());
         auto __out_values = get<1>(__out_rng.tuple());
-        using _KeyType = oneapi::dpl::__internal::__value_t<decltype(__out_keys)>;
-        using _ValType = oneapi::dpl::__internal::__value_t<decltype(__out_values)>;
 
-        const _KeyType& __next_key = get<2>(__tup);
-        const _KeyType& __current_key = get<3>(__tup);
-        const _ValType& __current_value = get<1>(get<0>(__tup));
+        const auto& __next_key = get<2>(__tup);
+        const auto& __current_key = get<3>(__tup);
+        const auto& __current_value = get<1>(get<0>(__tup));
         const bool __is_seg_end = get<1>(__tup);
         const std::size_t __out_idx = get<0>(get<0>(__tup));
 

diff --git a/test/parallel_api/numeric/numeric.ops/reduce_by_segment_zip.pass.cpp b/test/parallel_api/numeric/numeric.ops/reduce_by_segment_zip.pass.cpp
@@ -108,6 +108,65 @@ test_with_usm()
     EXPECT_EQ_N(exp_values1, output_values1, n, "wrong values1 from reduce_by_segment");
     EXPECT_EQ_N(exp_values2, output_values2, n, "wrong values2 from reduce_by_segment");
 }
+
+template <typename KernelName>
+void
+test_zip_with_discard()
+{
+    constexpr sycl::usm::alloc alloc_type = sycl::usm::alloc::device;
+    sycl::queue q = TestUtils::get_test_queue();
+
+    constexpr int n = 5;
+
+    //data initialization
+    int keys1[n] = {1, 1, 2, 2, 3};
+    int keys2[n] = {1, 1, 2, 2, 3};
+    int values1[n] = {1, 1, 1, 1, 1};
+    int values2[n] = {2, 2, 2, 2, 2};
+    int output_keys[n] = {};
+    int output_values1[n] = {};
+    int output_values2[n] = {};
+
+    // allocate USM memory and copying data to USM shared/device memory
+    TestUtils::usm_data_transfer<alloc_type, int> dt_helper1(q, keys1, n);
+    TestUtils::usm_data_transfer<alloc_type, int> dt_helper2(q, keys2, n);
+    TestUtils::usm_data_transfer<alloc_type, int> dt_helper3(q, values1, n);
+    TestUtils::usm_data_transfer<alloc_type, int> dt_helper4(q, values2, n);
+    TestUtils::usm_data_transfer<alloc_type, int> dt_helper5(q, output_keys, n);
+    TestUtils::usm_data_transfer<alloc_type, int> dt_helper6(q, output_values1, n);
+    TestUtils::usm_data_transfer<alloc_type, int> dt_helper7(q, output_values2, n);
+    auto d_keys1 = dt_helper1.get_data();
+    auto d_keys2 = dt_helper2.get_data();
+    auto d_values1 = dt_helper3.get_data();
+    auto d_values2 = dt_helper4.get_data();
+    auto d_output_keys = dt_helper5.get_data();
+    auto d_output_values1 = dt_helper6.get_data();
+    auto d_output_values2 = dt_helper7.get_data();
+
+    //make zip iterators
+    auto begin_keys_in = oneapi::dpl::make_zip_iterator(d_keys1, d_keys2);
+    auto end_keys_in = oneapi::dpl::make_zip_iterator(d_keys1 + n, d_keys2 + n);
+    auto begin_vals_in = oneapi::dpl::make_zip_iterator(d_values1, d_values2);
+    auto begin_keys_out = oneapi::dpl::make_zip_iterator(d_output_keys, oneapi::dpl::discard_iterator());
+    auto begin_vals_out = oneapi::dpl::make_zip_iterator(d_output_values1, d_output_values2);
+
+    //run reduce_by_segment algorithm
+    auto new_last = oneapi::dpl::reduce_by_segment(TestUtils::make_device_policy<KernelName>(q), begin_keys_in,
+                                                   end_keys_in, begin_vals_in, begin_keys_out, begin_vals_out,
+                                                   std::equal_to<>(), TestUtils::TupleAddFunctor());
+
+    //retrieve result on the host and check the result
+    dt_helper5.retrieve_data(output_keys);
+    dt_helper6.retrieve_data(output_values1);
+    dt_helper7.retrieve_data(output_values2);
+
+    const int exp_keys[n] = {1, 2, 3};
+    const int exp_values1[n] = {2, 2, 1};
+    const int exp_values2[n] = {4, 4, 2};
+    EXPECT_EQ_N(exp_keys, output_keys, n, "wrong keys from reduce_by_segment");
+    EXPECT_EQ_N(exp_values1, output_values1, n, "wrong values1 from reduce_by_segment");
+    EXPECT_EQ_N(exp_values2, output_values2, n, "wrong values2 from reduce_by_segment");
+}
 #endif
 
 //The code below for test a call of reduce_by_segment with zip iterators was kept "as is", as an example reported by a user; just "memory deallocation" added.
@@ -118,6 +177,8 @@ int main()
     test_with_usm<sycl::usm::alloc::shared, class KernelName1>();
     // Run tests for USM device memory
     test_with_usm<sycl::usm::alloc::device, class KernelName2>();
+
+    test_zip_with_discard<class KernelName3>();
 #endif
 
     return TestUtils::done(TEST_DPCPP_BACKEND_PRESENT);