use ::sycl for anything from sycl

ginkgo-project · Aug 22, 2023 · 17302d4 · 17302d4
1 parent c325a59
commit 17302d4
Show file tree

Hide file tree

Showing 52 changed files with 985 additions and 936 deletions.
diff --git a/benchmark/utils/dpcpp_timer.dp.cpp b/benchmark/utils/dpcpp_timer.dp.cpp
@@ -74,30 +74,29 @@ class SyclTimer : public Timer {
     {
         exec_->synchronize();
         // Currently, gko::SyclExecutor always use default stream.
-        start_ = exec_->get_queue()->submit([&](sycl::handler& cgh) {
-            cgh.parallel_for(1, [=](sycl::id<1> id) {});
+        start_ = exec_->get_queue()->submit([&](::sycl::handler& cgh) {
+            cgh.parallel_for(1, [=](::sycl::id<1> id) {});
         });
     }
 
     double toc_impl() override
     {
-        auto stop = exec_->get_queue()->submit([&](sycl::handler& cgh) {
-            cgh.parallel_for(1, [=](sycl::id<1> id) {});
+        auto stop = exec_->get_queue()->submit([&](::sycl::handler& cgh) {
+            cgh.parallel_for(1, [=](::sycl::id<1> id) {});
         });
         stop.wait_and_throw();
         // get the start time of stop
         auto stop_time = stop.get_profiling_info<
-            sycl::info::event_profiling::command_start>();
+            ::sycl::info::event_profiling::command_start>();
         // get the end time of start
-        auto start_time =
-            start_
-                .get_profiling_info<sycl::info::event_profiling::command_end>();
+        auto start_time = start_.get_profiling_info<
+            ::sycl::info::event_profiling::command_end>();
         return (stop_time - start_time) / double{1.0e9};
     }
 
 private:
     std::shared_ptr<const gko::SyclExecutor> exec_;
-    sycl::event start_;
+    ::sycl::event start_;
     int id_;
 };
 

diff --git a/core/device_hooks/dpcpp_hooks.cpp b/core/device_hooks/dpcpp_hooks.cpp
@@ -149,7 +149,7 @@ namespace kernels {
 namespace sycl {
 
 
-void destroy_event(sycl::event* event) GKO_NOT_COMPILED(sycl);
+void destroy_event(::sycl::event* event) GKO_NOT_COMPILED(sycl);
 
 
 }  // namespace sycl

diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -79,16 +79,16 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
     const auto num_batches = x_ub.num_batch_items;
     auto device = exec->get_queue()->get_device();
     auto group_size =
-        device.get_info<sycl::info::device::max_work_group_size>();
+        device.get_info<::sycl::info::device::max_work_group_size>();
 
     const dim3 block(group_size);
     const dim3 grid(num_batches);
 
     // Launch a kernel that has nbatches blocks, each block has max group size
     if (alpha->get_common_size()[1] == 1) {
-        exec->get_queue()->submit([&](sycl::handler& cgh) {
+        exec->get_queue()->submit([&](::sycl::handler& cgh) {
             cgh.parallel_for(
-                sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                     auto group = item_ct1.get_group();
                     auto group_id = group.get_group_linear_id();
                     const auto alpha_b =
@@ -99,9 +99,9 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
                 });
         });
     } else {
-        exec->get_queue()->submit([&](sycl::handler& cgh) {
+        exec->get_queue()->submit([&](::sycl::handler& cgh) {
             cgh.parallel_for(
-                sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                     auto group = item_ct1.get_group();
                     auto group_id = group.get_group_linear_id();
                     const auto alpha_b =
@@ -130,17 +130,17 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
     const auto num_batches = x->get_num_batch_items();
     auto device = exec->get_queue()->get_device();
     auto group_size =
-        device.get_info<sycl::info::device::max_work_group_size>();
+        device.get_info<::sycl::info::device::max_work_group_size>();
 
     const dim3 block(group_size);
     const dim3 grid(num_batches);
     const auto alpha_ub = get_batch_struct(alpha);
     const auto x_ub = get_batch_struct(x);
     const auto y_ub = get_batch_struct(y);
     if (alpha->get_common_size()[1] == 1) {
-        exec->get_queue()->submit([&](sycl::handler& cgh) {
+        exec->get_queue()->submit([&](::sycl::handler& cgh) {
             cgh.parallel_for(
-                sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                     auto group = item_ct1.get_group();
                     auto group_id = group.get_group_linear_id();
                     const auto alpha_b =
@@ -152,9 +152,9 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
                 });
         });
     } else {
-        exec->get_queue()->submit([&](sycl::handler& cgh) {
+        exec->get_queue()->submit([&](::sycl::handler& cgh) {
             cgh.parallel_for(
-                sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                     auto group = item_ct1.get_group();
                     auto group_id = group.get_group_linear_id();
                     const auto alpha_b =
@@ -185,16 +185,16 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
     const auto num_batches = x_ub.num_batch_items;
     auto device = exec->get_queue()->get_device();
     auto group_size =
-        device.get_info<sycl::info::device::max_work_group_size>();
+        device.get_info<::sycl::info::device::max_work_group_size>();
 
     const dim3 block(group_size);
     const dim3 grid(num_batches);
 
     // TODO: Remove reqd_sub_group size and use sycl::reduce_over_group
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
             sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+            [=](::sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
                 config::warp_size)]] {
                 auto group = item_ct1.get_group();
                 auto group_id = group.get_group_linear_id();
@@ -224,15 +224,15 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
     const auto num_batches = x_ub.num_batch_items;
     auto device = exec->get_queue()->get_device();
     auto group_size =
-        device.get_info<sycl::info::device::max_work_group_size>();
+        device.get_info<::sycl::info::device::max_work_group_size>();
 
     const dim3 block(group_size);
     const dim3 grid(num_batches);
 
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
             sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
+            [=](::sycl::nd_item<3> item_ct1)
                 [[sycl::reqd_sub_group_size(config::warp_size)]] {
                     auto group = item_ct1.get_group();
                     auto group_id = group.get_group_linear_id();
@@ -262,14 +262,14 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
     const auto num_batches = x_ub.num_batch_items;
     auto device = exec->get_queue()->get_device();
     auto group_size =
-        device.get_info<sycl::info::device::max_work_group_size>();
+        device.get_info<::sycl::info::device::max_work_group_size>();
 
     const dim3 block(group_size);
     const dim3 grid(num_batches);
 
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(sycl_nd_range(grid, block),
-                         [=](sycl::nd_item<3> item_ct1)
+                         [=](::sycl::nd_item<3> item_ct1)
                              [[sycl::reqd_sub_group_size(config::warp_size)]] {
                                  auto group = item_ct1.get_group();
                                  auto group_id = group.get_group_linear_id();
@@ -297,14 +297,14 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
     const auto num_batches = x_ub.num_batch_items;
     auto device = exec->get_queue()->get_device();
     auto group_size =
-        device.get_info<sycl::info::device::max_work_group_size>();
+        device.get_info<::sycl::info::device::max_work_group_size>();
 
     const dim3 block(group_size);
     const dim3 grid(num_batches);
 
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 auto group = item_ct1.get_group();
                 auto group_id = group.get_group_linear_id();
                 const auto x_b = batch::extract_batch_item(x_ub, group_id);

diff --git a/dpcpp/base/batch_multi_vector_kernels.hpp.inc b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
@@ -34,7 +34,7 @@ template <typename ValueType, typename Mapping>
 __dpct_inline__ void scale_kernel(
     const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
     const gko::batch::multi_vector::batch_item<ValueType>& x,
-    sycl::nd_item<3>& item_ct1, Mapping map)
+    ::sycl::nd_item<3>& item_ct1, Mapping map)
 {
     const int max_li = x.num_rows * x.num_rhs;
     for (int li = item_ct1.get_local_linear_id(); li < max_li;
@@ -53,7 +53,7 @@ __dpct_inline__ void add_scaled_kernel(
     const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
     const gko::batch::multi_vector::batch_item<const ValueType>& x,
     const gko::batch::multi_vector::batch_item<ValueType>& y,
-    sycl::nd_item<3>& item_ct1, Mapping map)
+    ::sycl::nd_item<3>& item_ct1, Mapping map)
 {
     const int max_li = x.num_rows * x.num_rhs;
     for (int li = item_ct1.get_local_id(2); li < max_li;
@@ -72,12 +72,12 @@ __dpct_inline__ void compute_gen_dot_product_kernel(
     const gko::batch::multi_vector::batch_item<const ValueType>& x,
     const gko::batch::multi_vector::batch_item<const ValueType>& y,
     const gko::batch::multi_vector::batch_item<ValueType>& result,
-    sycl::nd_item<3>& item_ct1, Mapping conj_map)
+    ::sycl::nd_item<3>& item_ct1, Mapping conj_map)
 {
     constexpr auto tile_size = config::warp_size;
     auto subg =
         group::tiled_partition<tile_size>(group::this_thread_block(item_ct1));
-    const auto subgroup = static_cast<sycl::sub_group>(subg);
+    const auto subgroup = static_cast<::sycl::sub_group>(subg);
     const int subgroup_id = subgroup.get_group_id();
     const int subgroup_size = subgroup.get_local_range().size();
     const int num_subgroups = subgroup.get_group_range().size();
@@ -107,12 +107,12 @@ __dpct_inline__ void compute_norm2_kernel(
     const gko::batch::multi_vector::batch_item<const ValueType>& x,
     const gko::batch::multi_vector::batch_item<remove_complex<ValueType>>&
         result,
-    sycl::nd_item<3>& item_ct1)
+    ::sycl::nd_item<3>& item_ct1)
 {
     constexpr auto tile_size = config::warp_size;
     auto subg =
         group::tiled_partition<tile_size>(group::this_thread_block(item_ct1));
-    const auto subgroup = static_cast<sycl::sub_group>(subg);
+    const auto subgroup = static_cast<::sycl::sub_group>(subg);
     const int subgroup_id = subgroup.get_group_id();
     const int subgroup_size = subgroup.get_local_range().size();
     const int num_subgroups = subgroup.get_group_range().size();
@@ -140,7 +140,7 @@ template <typename ValueType>
 __dpct_inline__ void copy_kernel(
     const gko::batch::multi_vector::batch_item<const ValueType>& in,
     const gko::batch::multi_vector::batch_item<ValueType>& out,
-    sycl::nd_item<3>& item_ct1)
+    ::sycl::nd_item<3>& item_ct1)
 {
     for (int iz = item_ct1.get_local_linear_id(); iz < in.num_rows * in.num_rhs;
          iz += item_ct1.get_local_range().size()) {

diff --git a/dpcpp/base/device.hpp b/dpcpp/base/device.hpp
@@ -43,7 +43,7 @@ namespace sycl {
 
 
 /** calls delete on the given event. */
-void destroy_event(sycl::event* event);
+void destroy_event(::sycl::event* event);
 
 
 }  // namespace sycl

diff --git a/dpcpp/base/dim3.dp.hpp b/dpcpp/base/dim3.dp.hpp
@@ -66,9 +66,9 @@ struct dim3 {
      * get_range returns the range for sycl with correct ordering (reverse of
      * cuda)
      *
-     * @return sycl::range<3>
+     * @return ::sycl::range<3>
      */
-    sycl::range<3> get_range() { return sycl::range<3>(z, y, x); }
+    ::sycl::range<3> get_range() { return ::sycl::range<3>(z, y, x); }
 };
 
 
@@ -80,11 +80,11 @@ struct dim3 {
  *
  * @return sycl::nd_range<3>
  */
-inline sycl::nd_range<3> sycl_nd_range(dim3 grid, dim3 block)
+inline ::sycl::nd_range<3> sycl_nd_range(dim3 grid, dim3 block)
 {
     auto local_range = block.get_range();
     auto global_range = grid.get_range() * local_range;
-    return sycl::nd_range<3>(global_range, local_range);
+    return ::sycl::nd_range<3>(global_range, local_range);
 }
 
 

diff --git a/dpcpp/base/executor.dp.cpp b/dpcpp/base/executor.dp.cpp
@@ -53,12 +53,12 @@ namespace detail {
 
 const std::vector<sycl::device> get_devices(std::string device_type)
 {
-    std::map<std::string, sycl::info::device_type> device_type_map{
-        {"accelerator", sycl::info::device_type::accelerator},
-        {"all", sycl::info::device_type::all},
-        {"cpu", sycl::info::device_type::cpu},
-        {"host", sycl::info::device_type::host},
-        {"gpu", sycl::info::device_type::gpu}};
+    std::map<std::string, ::sycl::info::device_type> device_type_map{
+        {"accelerator", ::sycl::info::device_type::accelerator},
+        {"all", ::sycl::info::device_type::all},
+        {"cpu", ::sycl::info::device_type::cpu},
+        {"host", ::sycl::info::device_type::host},
+        {"gpu", ::sycl::info::device_type::gpu}};
     std::for_each(device_type.begin(), device_type.end(),
                   [](char& c) { c = std::tolower(c); });
     return sycl::device::get_devices(device_type_map.at(device_type));
@@ -224,7 +224,7 @@ bool SyclExecutor::verify_memory_to(const SyclExecutor* dest_exec) const
 namespace detail {
 
 
-void delete_queue(sycl::queue* queue)
+void delete_queue(::sycl::queue* queue)
 {
     queue->wait();
     delete queue;
@@ -257,7 +257,7 @@ void SyclExecutor::set_device_property(sycl_queue_property property)
     if (!device.is_host()) {
         try {
             auto subgroup_sizes =
-                device.get_info<sycl::info::device::sub_group_sizes>();
+                device.get_info<::sycl::info::device::sub_group_sizes>();
             for (auto& i : subgroup_sizes) {
                 this->get_exec_info().subgroup_sizes.push_back(i);
             }
@@ -266,26 +266,26 @@ void SyclExecutor::set_device_property(sycl_queue_property property)
         }
     }
     this->get_exec_info().num_computing_units = static_cast<int>(
-        device.get_info<sycl::info::device::max_compute_units>());
+        device.get_info<::sycl::info::device::max_compute_units>());
     const auto subgroup_sizes = this->get_exec_info().subgroup_sizes;
     if (subgroup_sizes.size()) {
         this->get_exec_info().max_subgroup_size = static_cast<int>(
             *std::max_element(subgroup_sizes.begin(), subgroup_sizes.end()));
     }
     this->get_exec_info().max_workgroup_size = static_cast<int>(
-        device.get_info<sycl::info::device::max_work_group_size>());
+        device.get_info<::sycl::info::device::max_work_group_size>());
 // They change the max_work_item_size with template parameter Dimension after
 // major version 6 and adding the default = 3 is not in the same release.
 #if GINKGO_DPCPP_MAJOR_VERSION >= 6
     auto max_workitem_sizes =
-        device.get_info<sycl::info::device::max_work_item_sizes<3>>();
+        device.get_info<::sycl::info::device::max_work_item_sizes<3>>();
 #else
     auto max_workitem_sizes =
-        device.get_info<sycl::info::device::max_work_item_sizes>();
+        device.get_info<::sycl::info::device::max_work_item_sizes>();
 #endif
-    // Get the max dimension of a sycl::id object
+    // Get the max dimension of a ::sycl::id object
     auto max_work_item_dimensions =
-        device.get_info<sycl::info::device::max_work_item_dimensions>();
+        device.get_info<::sycl::info::device::max_work_item_dimensions>();
     for (uint32 i = 0; i < max_work_item_dimensions; i++) {
         this->get_exec_info().max_workitem_sizes.push_back(
             max_workitem_sizes[i]);
@@ -295,10 +295,10 @@ void SyclExecutor::set_device_property(sycl_queue_property property)
     if (device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) {
 #if GINKGO_DPCPP_MAJOR_VERSION >= 6
         this->get_exec_info().num_pu_per_cu = device.get_info<
-            sycl::ext::intel::info::device::gpu_hw_threads_per_eu>();
+            ::sycl::ext::intel::info::device::gpu_hw_threads_per_eu>();
 #else
         this->get_exec_info().num_pu_per_cu = device.get_info<
-            sycl::info::device::ext_intel_gpu_hw_threads_per_eu>();
+            ::sycl::info::device::ext_intel_gpu_hw_threads_per_eu>();
 #endif
     } else {
         // To make the usage still valid.
@@ -311,16 +311,18 @@ void SyclExecutor::set_device_property(sycl_queue_property property)
     // `wait()` would be needed after every call to a DPC++ function or kernel.
     // For example, without `in_order`, doing a copy, a kernel, and a copy, will
     // not necessarily happen in that order by default, which we need to avoid.
-    auto* queue = new sycl::queue{device, detail::get_property_list(property)};
-    queue_ = std::move(queue_manager<sycl::queue>{queue, detail::delete_queue});
+    auto* queue =
+        new ::sycl::queue{device, detail::get_property_list(property)};
+    queue_ =
+        std::move(queue_manager<::sycl::queue>{queue, detail::delete_queue});
 }
 
 
 namespace kernels {
 namespace sycl {
 
 
-void destroy_event(sycl::event* event) { delete event; }
+void destroy_event(::sycl::event* event) { delete event; }
 
 
 }  // namespace sycl