Skip to content

Commit

Permalink
Use full specialization of returned __future instance and move (inste…
Browse files Browse the repository at this point in the history
…ad copy) data into it

Signed-off-by: Sergey Kopienko <[email protected]>
  • Loading branch information
SergeyKopienko committed Sep 25, 2024
1 parent e3f700e commit 5ba1be4
Show file tree
Hide file tree
Showing 8 changed files with 31 additions and 22 deletions.
18 changes: 12 additions & 6 deletions include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ struct __parallel_for_submitter<__internal::__optional_kernel_name<_Name...>>
__brick(__idx, __rngs...);
});
});
return __future(__event);
return __future<sycl::event>(std::move(__event));
}
};

Expand Down Expand Up @@ -372,7 +372,8 @@ struct __parallel_scan_submitter<_CustomName, __internal::__optional_kernel_name
});
});

return __future(__final_event, __result_and_scratch);
return __future<sycl::event, __result_and_scratch_storage<_ExecutionPolicy, typename _InitType::__value_type>>{
std::move(__final_event), std::move(__result_and_scratch)};
}
};

Expand Down Expand Up @@ -644,7 +645,8 @@ struct __parallel_copy_if_static_single_group_submitter<_Size, _ElemsPerItem, _W
}
});
});
return __future(__event, __result);
return __future<sycl::event, __result_and_scratch_storage<_Policy, _Size>>{std::move(__event),
std::move(__result)};
}
};

Expand Down Expand Up @@ -700,7 +702,10 @@ __parallel_transform_scan_single_group(oneapi::dpl::__internal::__device_backend
/* _IsFullGroup= */ ::std::false_type, _Inclusive, _CustomName>>>()(
::std::forward<_ExecutionPolicy>(__exec), std::forward<_InRng>(__in_rng),
std::forward<_OutRng>(__out_rng), __n, __init, __binary_op, __unary_op);
return __future(__event, __dummy_result_and_scratch);

return __future<sycl::event,
__result_and_scratch_storage<_ExecutionPolicy, typename _InitType::__value_type>>{
std::move(__event), std::move(__dummy_result_and_scratch)};
};
if (__n <= 16)
return __single_group_scan_f(std::integral_constant<::std::uint16_t, 16>{});
Expand Down Expand Up @@ -734,7 +739,8 @@ __parallel_transform_scan_single_group(oneapi::dpl::__internal::__device_backend
__parallel_transform_scan_dynamic_single_group_submitter<_Inclusive::value, _DynamicGroupScanKernel>()(
std::forward<_ExecutionPolicy>(__exec), std::forward<_InRng>(__in_rng),
std::forward<_OutRng>(__out_rng), __n, __init, __binary_op, __unary_op, __max_wg_size);
return __future(__event, __dummy_result_and_scratch);
return __future<sycl::event, __result_and_scratch_storage<_ExecutionPolicy, typename _InitType::__value_type>>{
std::move(__event), std::move(__dummy_result_and_scratch)};
}
}

Expand Down Expand Up @@ -1866,7 +1872,7 @@ struct __parallel_partial_sort_submitter<__internal::__optional_kernel_name<_Glo
});
}
// return future and extend lifetime of temporary buffer
return __future(__event1);
return __future<sycl::event>(std::move(__event1));
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ struct __parallel_for_fpga_submitter<__internal::__optional_kernel_name<_Name...
}
});
});
return __future(__event);
return __future<sycl::event>(std::move(__event));
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -516,19 +516,19 @@ __parallel_histogram_select_kernel(oneapi::dpl::__internal::__device_backend_tag
// if bins fit into registers, use register private accumulation
if (__num_bins <= __max_work_item_private_bins)
{
return __future(
return __future<sycl::event>(
__histogram_general_registers_local_reduction<__iters_per_work_item, __max_work_item_private_bins>(
__backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size,
::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager));
__backend_tag, std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size,
std::forward<_Range1>(__input), std::forward<_Range2>(__bins), __binhash_manager));
}
// if bins fit into SLM, use local atomics
else if (__num_bins * sizeof(_local_histogram_type) +
__binhash_manager.get_required_SLM_elements() * sizeof(_extra_memory_type) <
__local_mem_size)
{
return __future(__histogram_general_local_atomics<__iters_per_work_item>(
__backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size,
::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager));
return __future<sycl::event>(__histogram_general_local_atomics<__iters_per_work_item>(
__backend_tag, std::forward<_ExecutionPolicy>(__exec), __init_event, __work_group_size,
std::forward<_Range1>(__input), std::forward<_Range2>(__bins), __binhash_manager));
}
else // otherwise, use global atomics (private copies per workgroup)
{
Expand All @@ -537,9 +537,9 @@ __parallel_histogram_select_kernel(oneapi::dpl::__internal::__device_backend_tag
// suggestion which but global memory limitations may increase this value to be able to fit the workgroup
// private copies of the histogram bins in global memory. No unrolling is taken advantage of here because it
// is a runtime argument.
return __future(__histogram_general_private_global_atomics(
__backend_tag, ::std::forward<_ExecutionPolicy>(__exec), __init_event, __iters_per_work_item,
__work_group_size, ::std::forward<_Range1>(__input), ::std::forward<_Range2>(__bins), __binhash_manager));
return __future<sycl::event>(__histogram_general_private_global_atomics(
__backend_tag, std::forward<_ExecutionPolicy>(__exec), __init_event, __iters_per_work_item,
__work_group_size, std::forward<_Range1>(__input), std::forward<_Range2>(__bins), __binhash_manager));
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ struct __parallel_merge_submitter<_IdType, __internal::__optional_kernel_name<_N
__comp);
});
});
return __future(__event);
return __future<sycl::event>(std::move(__event));
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -303,7 +303,7 @@ struct __parallel_sort_submitter<_IdType, __internal::__optional_kernel_name<_Le
});
}

return __future(__event1);
return __future<sycl::event>(std::move(__event1));
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -866,7 +866,7 @@ __parallel_radix_sort(oneapi::dpl::__internal::__device_backend_tag, _ExecutionP
}
}

return __future(__event);
return __future<sycl::event>(std::move(__event));
}

} // namespace __par_backend_hetero
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,8 @@ struct __parallel_transform_reduce_small_submitter<_Tp, _Commutative, _VecSize,
});
});

return __future(__reduce_event, __scratch_container);
return __future<sycl::event, __result_and_scratch_storage<_ExecutionPolicy, _Tp>>{
std::move(__reduce_event), std::move(__scratch_container)};
}
}; // struct __parallel_transform_reduce_small_submitter

Expand Down Expand Up @@ -418,7 +419,8 @@ struct __parallel_transform_reduce_impl
__n_groups = oneapi::dpl::__internal::__dpl_ceiling_div(__n, __size_per_work_group);
} while (__n > 1);

return __future(__reduce_event, __scratch_container);
return __future<sycl::event, __result_and_scratch_storage<_ExecutionPolicy, _Tp>>{
std::move(__reduce_event), std::move(__scratch_container)};
}
}; // struct __parallel_transform_reduce_impl

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -863,7 +863,8 @@ __parallel_transform_reduce_then_scan(oneapi::dpl::__internal::__device_backend_
__inputs_per_item = __inputs_per_sub_group / __sub_group_size;
}
}
return __future(__event, __result_and_scratch);
return __future<sycl::event, __result_and_scratch_storage<_ExecutionPolicy, typename _InitType::__value_type>>{
std::move(__event), std::move(__result_and_scratch)};
}

} // namespace __par_backend_hetero
Expand Down

0 comments on commit 5ba1be4

Please sign in to comment.