Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Simplification of capture mode usages in submit / parallel_for calls #1959

Draft
wants to merge 9 commits into
base: main
Choose a base branch
from
Draft
4 changes: 2 additions & 2 deletions include/oneapi/dpl/pstl/hetero/dpcpp/parallel_backend_sycl.h
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ struct __parallel_for_submitter<__internal::__optional_kernel_name<_Name...>>
{
assert(oneapi::dpl::__ranges::__get_first_range_size(__rngs...) > 0);
_PRINT_INFO_IN_DEBUG_MODE(__exec);
auto __event = __exec.queue().submit([&__rngs..., &__brick, __count](sycl::handler& __cgh) {
auto __event = __exec.queue().submit([&](sycl::handler& __cgh) {
//get an access to data under SYCL buffer:
oneapi::dpl::__ranges::__require_access(__cgh, __rngs...);

Expand Down Expand Up @@ -2001,7 +2001,7 @@ struct __parallel_partial_sort_submitter<__internal::__optional_kernel_name<_Glo
sycl::event __event1;
do
{
__event1 = __exec.queue().submit([&, __data_in_temp, __k](sycl::handler& __cgh) {
__event1 = __exec.queue().submit([&](sycl::handler& __cgh) {
__cgh.depends_on(__event1);
oneapi::dpl::__ranges::__require_access(__cgh, __rng);
auto __temp_acc = __temp.template get_access<access_mode::read_write>(__cgh);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ struct __parallel_for_fpga_submitter<__internal::__optional_kernel_name<_Name...
assert(__n > 0);

_PRINT_INFO_IN_DEBUG_MODE(__exec);
auto __event = __exec.queue().submit([&__rngs..., &__brick, __count](sycl::handler& __cgh) {
auto __event = __exec.queue().submit([&](sycl::handler& __cgh) {
//get an access to data under SYCL buffer:
oneapi::dpl::__ranges::__require_access(__cgh, __rngs...);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,7 @@ struct __merge_sort_global_submitter<_IndexT, __internal::__optional_kernel_name
const std::int64_t __n_iter = sycl::ctz(__n_power2) - sycl::ctz(__leaf_size);
for (std::int64_t __i = 0; __i < __n_iter; ++__i)
{
__event_chain = __q.submit([&, __event_chain, __n_sorted, __data_in_temp](sycl::handler& __cgh) {
__event_chain = __q.submit([&](sycl::handler& __cgh) {
__cgh.depends_on(__event_chain);

oneapi::dpl::__ranges::__require_access(__cgh, __rng);
Expand Down Expand Up @@ -303,7 +303,7 @@ struct __merge_sort_copy_back_submitter<__internal::__optional_kernel_name<_Copy
sycl::event
operator()(sycl::queue& __q, _Range& __rng, _TempBuf& __temp_buf, sycl::event __event_chain) const
{
__event_chain = __q.submit([&, __event_chain](sycl::handler& __cgh) {
__event_chain = __q.submit([&](sycl::handler& __cgh) {
__cgh.depends_on(__event_chain);
oneapi::dpl::__ranges::__require_access(__cgh, __rng);
auto __temp_acc = __temp_buf.template get_access<access_mode::read>(__cgh);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ struct __parallel_transform_reduce_small_submitter<_Tp, _Commutative, _VecSize,
using __result_and_scratch_storage_t = __result_and_scratch_storage<_ExecutionPolicy, _Tp>;
__result_and_scratch_storage_t __scratch_container{__exec, 1, 0};

sycl::event __reduce_event = __exec.queue().submit([&, __n](sycl::handler& __cgh) {
sycl::event __reduce_event = __exec.queue().submit([&](sycl::handler& __cgh) {
oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); // get an access to data under SYCL buffer
auto __res_acc =
__scratch_container.template __get_result_acc<sycl::access_mode::write>(__cgh, __dpl_sycl::__no_init{});
Expand Down Expand Up @@ -205,7 +205,7 @@ struct __parallel_transform_reduce_device_kernel_submitter<_Tp, _Commutative, _V
const _Size __n_groups = oneapi::dpl::__internal::__dpl_ceiling_div(__n, __size_per_work_group);
const bool __is_full = __n == __size_per_work_group * __n_groups;

return __exec.queue().submit([&, __n](sycl::handler& __cgh) {
return __exec.queue().submit([&](sycl::handler& __cgh) {
oneapi::dpl::__ranges::__require_access(__cgh, __rngs...); // get an access to data under SYCL buffer
std::size_t __local_mem_size = __reduce_pattern.local_mem_req(__work_group_size);
__dpl_sycl::__local_accessor<_Tp> __temp_local(sycl::range<1>(__local_mem_size), __cgh);
Expand Down Expand Up @@ -252,7 +252,7 @@ struct __parallel_transform_reduce_work_group_kernel_submitter<_Tp, _Commutative

using __result_and_scratch_storage_t = __result_and_scratch_storage<_ExecutionPolicy2, _Tp>;

__reduce_event = __exec.queue().submit([&, __n](sycl::handler& __cgh) {
__reduce_event = __exec.queue().submit([&](sycl::handler& __cgh) {
__cgh.depends_on(__reduce_event);

auto __temp_acc = __scratch_container.template __get_scratch_acc<sycl::access_mode::read>(__cgh);
Expand Down Expand Up @@ -358,8 +358,7 @@ struct __parallel_transform_reduce_impl
sycl::event __reduce_event;
do
{
__reduce_event = __exec.queue().submit([&, __is_first, __offset_1, __offset_2, __n,
__n_groups](sycl::handler& __cgh) {
__reduce_event = __exec.queue().submit([&](sycl::handler& __cgh) {
__cgh.depends_on(__reduce_event);
auto __temp_acc = __scratch_container.template __get_scratch_acc<sycl::access_mode::read_write>(
__cgh, __is_first ? sycl::property_list{__dpl_sycl::__no_init{}} : sycl::property_list{});
Expand Down
Loading