diff --git a/benchmark/utils/dpcpp_timer.dp.cpp b/benchmark/utils/dpcpp_timer.dp.cpp
index ddd5fe7a698..1aa12f9287d 100644
--- a/benchmark/utils/dpcpp_timer.dp.cpp
+++ b/benchmark/utils/dpcpp_timer.dp.cpp
@@ -74,30 +74,29 @@ class SyclTimer : public Timer {
     {
         exec_->synchronize();
         // Currently, gko::SyclExecutor always use default stream.
-        start_ = exec_->get_queue()->submit([&](sycl::handler& cgh) {
-            cgh.parallel_for(1, [=](sycl::id<1> id) {});
+        start_ = exec_->get_queue()->submit([&](::sycl::handler& cgh) {
+            cgh.parallel_for(1, [=](::sycl::id<1> id) {});
         });
     }
 
     double toc_impl() override
     {
-        auto stop = exec_->get_queue()->submit([&](sycl::handler& cgh) {
-            cgh.parallel_for(1, [=](sycl::id<1> id) {});
+        auto stop = exec_->get_queue()->submit([&](::sycl::handler& cgh) {
+            cgh.parallel_for(1, [=](::sycl::id<1> id) {});
         });
         stop.wait_and_throw();
         // get the start time of stop
         auto stop_time = stop.get_profiling_info<
-            sycl::info::event_profiling::command_start>();
+            ::sycl::info::event_profiling::command_start>();
         // get the end time of start
-        auto start_time =
-            start_
-                .get_profiling_info<sycl::info::event_profiling::command_end>();
+        auto start_time = start_.get_profiling_info<
+            ::sycl::info::event_profiling::command_end>();
         return (stop_time - start_time) / double{1.0e9};
     }
 
 private:
     std::shared_ptr<const gko::SyclExecutor> exec_;
-    sycl::event start_;
+    ::sycl::event start_;
     int id_;
 };
 
diff --git a/core/device_hooks/dpcpp_hooks.cpp b/core/device_hooks/dpcpp_hooks.cpp
index 465c5cf2215..9f67d80901b 100644
--- a/core/device_hooks/dpcpp_hooks.cpp
+++ b/core/device_hooks/dpcpp_hooks.cpp
@@ -149,7 +149,7 @@ namespace kernels {
 namespace sycl {
 
 
-void destroy_event(sycl::event* event) GKO_NOT_COMPILED(sycl);
+void destroy_event(::sycl::event* event) GKO_NOT_COMPILED(sycl);
 
 
 }  // namespace sycl
diff --git a/dpcpp/base/batch_multi_vector_kernels.dp.cpp b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
index 42c6263651d..93683f228ed 100644
--- a/dpcpp/base/batch_multi_vector_kernels.dp.cpp
+++ b/dpcpp/base/batch_multi_vector_kernels.dp.cpp
@@ -79,16 +79,16 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
     const auto num_batches = x_ub.num_batch_items;
     auto device = exec->get_queue()->get_device();
     auto group_size =
-        device.get_info<sycl::info::device::max_work_group_size>();
+        device.get_info<::sycl::info::device::max_work_group_size>();
 
     const dim3 block(group_size);
     const dim3 grid(num_batches);
 
     // Launch a kernel that has nbatches blocks, each block has max group size
     if (alpha->get_common_size()[1] == 1) {
-        exec->get_queue()->submit([&](sycl::handler& cgh) {
+        exec->get_queue()->submit([&](::sycl::handler& cgh) {
             cgh.parallel_for(
-                sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                     auto group = item_ct1.get_group();
                     auto group_id = group.get_group_linear_id();
                     const auto alpha_b =
@@ -99,9 +99,9 @@ void scale(std::shared_ptr<const DefaultExecutor> exec,
                 });
         });
     } else {
-        exec->get_queue()->submit([&](sycl::handler& cgh) {
+        exec->get_queue()->submit([&](::sycl::handler& cgh) {
             cgh.parallel_for(
-                sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                     auto group = item_ct1.get_group();
                     auto group_id = group.get_group_linear_id();
                     const auto alpha_b =
@@ -130,7 +130,7 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
     const auto num_batches = x->get_num_batch_items();
     auto device = exec->get_queue()->get_device();
     auto group_size =
-        device.get_info<sycl::info::device::max_work_group_size>();
+        device.get_info<::sycl::info::device::max_work_group_size>();
 
     const dim3 block(group_size);
     const dim3 grid(num_batches);
@@ -138,9 +138,9 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
     const auto x_ub = get_batch_struct(x);
     const auto y_ub = get_batch_struct(y);
     if (alpha->get_common_size()[1] == 1) {
-        exec->get_queue()->submit([&](sycl::handler& cgh) {
+        exec->get_queue()->submit([&](::sycl::handler& cgh) {
             cgh.parallel_for(
-                sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                     auto group = item_ct1.get_group();
                     auto group_id = group.get_group_linear_id();
                     const auto alpha_b =
@@ -152,9 +152,9 @@ void add_scaled(std::shared_ptr<const DefaultExecutor> exec,
                 });
         });
     } else {
-        exec->get_queue()->submit([&](sycl::handler& cgh) {
+        exec->get_queue()->submit([&](::sycl::handler& cgh) {
             cgh.parallel_for(
-                sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                     auto group = item_ct1.get_group();
                     auto group_id = group.get_group_linear_id();
                     const auto alpha_b =
@@ -185,16 +185,16 @@ void compute_dot(std::shared_ptr<const DefaultExecutor> exec,
     const auto num_batches = x_ub.num_batch_items;
     auto device = exec->get_queue()->get_device();
     auto group_size =
-        device.get_info<sycl::info::device::max_work_group_size>();
+        device.get_info<::sycl::info::device::max_work_group_size>();
 
     const dim3 block(group_size);
     const dim3 grid(num_batches);
 
     // TODO: Remove reqd_sub_group size and use sycl::reduce_over_group
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
             sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+            [=](::sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
                 config::warp_size)]] {
                 auto group = item_ct1.get_group();
                 auto group_id = group.get_group_linear_id();
@@ -224,15 +224,15 @@ void compute_conj_dot(std::shared_ptr<const DefaultExecutor> exec,
     const auto num_batches = x_ub.num_batch_items;
     auto device = exec->get_queue()->get_device();
     auto group_size =
-        device.get_info<sycl::info::device::max_work_group_size>();
+        device.get_info<::sycl::info::device::max_work_group_size>();
 
     const dim3 block(group_size);
     const dim3 grid(num_batches);
 
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
             sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
+            [=](::sycl::nd_item<3> item_ct1)
                 [[sycl::reqd_sub_group_size(config::warp_size)]] {
                     auto group = item_ct1.get_group();
                     auto group_id = group.get_group_linear_id();
@@ -262,14 +262,14 @@ void compute_norm2(std::shared_ptr<const DefaultExecutor> exec,
     const auto num_batches = x_ub.num_batch_items;
     auto device = exec->get_queue()->get_device();
     auto group_size =
-        device.get_info<sycl::info::device::max_work_group_size>();
+        device.get_info<::sycl::info::device::max_work_group_size>();
 
     const dim3 block(group_size);
     const dim3 grid(num_batches);
 
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(sycl_nd_range(grid, block),
-                         [=](sycl::nd_item<3> item_ct1)
+                         [=](::sycl::nd_item<3> item_ct1)
                              [[sycl::reqd_sub_group_size(config::warp_size)]] {
                                  auto group = item_ct1.get_group();
                                  auto group_id = group.get_group_linear_id();
@@ -297,14 +297,14 @@ void copy(std::shared_ptr<const DefaultExecutor> exec,
     const auto num_batches = x_ub.num_batch_items;
     auto device = exec->get_queue()->get_device();
     auto group_size =
-        device.get_info<sycl::info::device::max_work_group_size>();
+        device.get_info<::sycl::info::device::max_work_group_size>();
 
     const dim3 block(group_size);
     const dim3 grid(num_batches);
 
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 auto group = item_ct1.get_group();
                 auto group_id = group.get_group_linear_id();
                 const auto x_b = batch::extract_batch_item(x_ub, group_id);
diff --git a/dpcpp/base/batch_multi_vector_kernels.hpp.inc b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
index 75e1981d659..4445acb6e96 100644
--- a/dpcpp/base/batch_multi_vector_kernels.hpp.inc
+++ b/dpcpp/base/batch_multi_vector_kernels.hpp.inc
@@ -34,7 +34,7 @@ template <typename ValueType, typename Mapping>
 __dpct_inline__ void scale_kernel(
     const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
     const gko::batch::multi_vector::batch_item<ValueType>& x,
-    sycl::nd_item<3>& item_ct1, Mapping map)
+    ::sycl::nd_item<3>& item_ct1, Mapping map)
 {
     const int max_li = x.num_rows * x.num_rhs;
     for (int li = item_ct1.get_local_linear_id(); li < max_li;
@@ -53,7 +53,7 @@ __dpct_inline__ void add_scaled_kernel(
     const gko::batch::multi_vector::batch_item<const ValueType>& alpha,
     const gko::batch::multi_vector::batch_item<const ValueType>& x,
     const gko::batch::multi_vector::batch_item<ValueType>& y,
-    sycl::nd_item<3>& item_ct1, Mapping map)
+    ::sycl::nd_item<3>& item_ct1, Mapping map)
 {
     const int max_li = x.num_rows * x.num_rhs;
     for (int li = item_ct1.get_local_id(2); li < max_li;
@@ -72,12 +72,12 @@ __dpct_inline__ void compute_gen_dot_product_kernel(
     const gko::batch::multi_vector::batch_item<const ValueType>& x,
     const gko::batch::multi_vector::batch_item<const ValueType>& y,
     const gko::batch::multi_vector::batch_item<ValueType>& result,
-    sycl::nd_item<3>& item_ct1, Mapping conj_map)
+    ::sycl::nd_item<3>& item_ct1, Mapping conj_map)
 {
     constexpr auto tile_size = config::warp_size;
     auto subg =
         group::tiled_partition<tile_size>(group::this_thread_block(item_ct1));
-    const auto subgroup = static_cast<sycl::sub_group>(subg);
+    const auto subgroup = static_cast<::sycl::sub_group>(subg);
     const int subgroup_id = subgroup.get_group_id();
     const int subgroup_size = subgroup.get_local_range().size();
     const int num_subgroups = subgroup.get_group_range().size();
@@ -107,12 +107,12 @@ __dpct_inline__ void compute_norm2_kernel(
     const gko::batch::multi_vector::batch_item<const ValueType>& x,
     const gko::batch::multi_vector::batch_item<remove_complex<ValueType>>&
         result,
-    sycl::nd_item<3>& item_ct1)
+    ::sycl::nd_item<3>& item_ct1)
 {
     constexpr auto tile_size = config::warp_size;
     auto subg =
         group::tiled_partition<tile_size>(group::this_thread_block(item_ct1));
-    const auto subgroup = static_cast<sycl::sub_group>(subg);
+    const auto subgroup = static_cast<::sycl::sub_group>(subg);
     const int subgroup_id = subgroup.get_group_id();
     const int subgroup_size = subgroup.get_local_range().size();
     const int num_subgroups = subgroup.get_group_range().size();
@@ -140,7 +140,7 @@ template <typename ValueType>
 __dpct_inline__ void copy_kernel(
     const gko::batch::multi_vector::batch_item<const ValueType>& in,
     const gko::batch::multi_vector::batch_item<ValueType>& out,
-    sycl::nd_item<3>& item_ct1)
+    ::sycl::nd_item<3>& item_ct1)
 {
     for (int iz = item_ct1.get_local_linear_id(); iz < in.num_rows * in.num_rhs;
          iz += item_ct1.get_local_range().size()) {
diff --git a/dpcpp/base/device.hpp b/dpcpp/base/device.hpp
index a603bf4a395..db847af323c 100644
--- a/dpcpp/base/device.hpp
+++ b/dpcpp/base/device.hpp
@@ -43,7 +43,7 @@ namespace sycl {
 
 
 /** calls delete on the given event. */
-void destroy_event(sycl::event* event);
+void destroy_event(::sycl::event* event);
 
 
 }  // namespace sycl
diff --git a/dpcpp/base/dim3.dp.hpp b/dpcpp/base/dim3.dp.hpp
index 525fd8a3b90..8a8d93ecdfb 100644
--- a/dpcpp/base/dim3.dp.hpp
+++ b/dpcpp/base/dim3.dp.hpp
@@ -66,9 +66,9 @@ struct dim3 {
      * get_range returns the range for sycl with correct ordering (reverse of
      * cuda)
      *
-     * @return sycl::range<3>
+     * @return ::sycl::range<3>
      */
-    sycl::range<3> get_range() { return sycl::range<3>(z, y, x); }
+    ::sycl::range<3> get_range() { return ::sycl::range<3>(z, y, x); }
 };
 
 
@@ -80,11 +80,11 @@ struct dim3 {
  *
  * @return sycl::nd_range<3>
  */
-inline sycl::nd_range<3> sycl_nd_range(dim3 grid, dim3 block)
+inline ::sycl::nd_range<3> sycl_nd_range(dim3 grid, dim3 block)
 {
     auto local_range = block.get_range();
     auto global_range = grid.get_range() * local_range;
-    return sycl::nd_range<3>(global_range, local_range);
+    return ::sycl::nd_range<3>(global_range, local_range);
 }
 
 
diff --git a/dpcpp/base/executor.dp.cpp b/dpcpp/base/executor.dp.cpp
index ff29d9f1d54..6bf3e735ba0 100644
--- a/dpcpp/base/executor.dp.cpp
+++ b/dpcpp/base/executor.dp.cpp
@@ -53,12 +53,12 @@ namespace detail {
 
 const std::vector<sycl::device> get_devices(std::string device_type)
 {
-    std::map<std::string, sycl::info::device_type> device_type_map{
-        {"accelerator", sycl::info::device_type::accelerator},
-        {"all", sycl::info::device_type::all},
-        {"cpu", sycl::info::device_type::cpu},
-        {"host", sycl::info::device_type::host},
-        {"gpu", sycl::info::device_type::gpu}};
+    std::map<std::string, ::sycl::info::device_type> device_type_map{
+        {"accelerator", ::sycl::info::device_type::accelerator},
+        {"all", ::sycl::info::device_type::all},
+        {"cpu", ::sycl::info::device_type::cpu},
+        {"host", ::sycl::info::device_type::host},
+        {"gpu", ::sycl::info::device_type::gpu}};
     std::for_each(device_type.begin(), device_type.end(),
                   [](char& c) { c = std::tolower(c); });
     return sycl::device::get_devices(device_type_map.at(device_type));
@@ -224,7 +224,7 @@ bool SyclExecutor::verify_memory_to(const SyclExecutor* dest_exec) const
 namespace detail {
 
 
-void delete_queue(sycl::queue* queue)
+void delete_queue(::sycl::queue* queue)
 {
     queue->wait();
     delete queue;
@@ -257,7 +257,7 @@ void SyclExecutor::set_device_property(sycl_queue_property property)
     if (!device.is_host()) {
         try {
             auto subgroup_sizes =
-                device.get_info<sycl::info::device::sub_group_sizes>();
+                device.get_info<::sycl::info::device::sub_group_sizes>();
             for (auto& i : subgroup_sizes) {
                 this->get_exec_info().subgroup_sizes.push_back(i);
             }
@@ -266,26 +266,26 @@ void SyclExecutor::set_device_property(sycl_queue_property property)
         }
     }
     this->get_exec_info().num_computing_units = static_cast<int>(
-        device.get_info<sycl::info::device::max_compute_units>());
+        device.get_info<::sycl::info::device::max_compute_units>());
     const auto subgroup_sizes = this->get_exec_info().subgroup_sizes;
     if (subgroup_sizes.size()) {
         this->get_exec_info().max_subgroup_size = static_cast<int>(
             *std::max_element(subgroup_sizes.begin(), subgroup_sizes.end()));
     }
     this->get_exec_info().max_workgroup_size = static_cast<int>(
-        device.get_info<sycl::info::device::max_work_group_size>());
+        device.get_info<::sycl::info::device::max_work_group_size>());
 // They change the max_work_item_size with template parameter Dimension after
 // major version 6 and adding the default = 3 is not in the same release.
 #if GINKGO_DPCPP_MAJOR_VERSION >= 6
     auto max_workitem_sizes =
-        device.get_info<sycl::info::device::max_work_item_sizes<3>>();
+        device.get_info<::sycl::info::device::max_work_item_sizes<3>>();
 #else
     auto max_workitem_sizes =
-        device.get_info<sycl::info::device::max_work_item_sizes>();
+        device.get_info<::sycl::info::device::max_work_item_sizes>();
 #endif
-    // Get the max dimension of a sycl::id object
+    // Get the max dimension of a ::sycl::id object
     auto max_work_item_dimensions =
-        device.get_info<sycl::info::device::max_work_item_dimensions>();
+        device.get_info<::sycl::info::device::max_work_item_dimensions>();
     for (uint32 i = 0; i < max_work_item_dimensions; i++) {
         this->get_exec_info().max_workitem_sizes.push_back(
             max_workitem_sizes[i]);
@@ -295,10 +295,10 @@ void SyclExecutor::set_device_property(sycl_queue_property property)
     if (device.has(sycl::aspect::ext_intel_gpu_hw_threads_per_eu)) {
 #if GINKGO_DPCPP_MAJOR_VERSION >= 6
         this->get_exec_info().num_pu_per_cu = device.get_info<
-            sycl::ext::intel::info::device::gpu_hw_threads_per_eu>();
+            ::sycl::ext::intel::info::device::gpu_hw_threads_per_eu>();
 #else
         this->get_exec_info().num_pu_per_cu = device.get_info<
-            sycl::info::device::ext_intel_gpu_hw_threads_per_eu>();
+            ::sycl::info::device::ext_intel_gpu_hw_threads_per_eu>();
 #endif
     } else {
         // To make the usage still valid.
@@ -311,8 +311,10 @@ void SyclExecutor::set_device_property(sycl_queue_property property)
     // `wait()` would be needed after every call to a DPC++ function or kernel.
     // For example, without `in_order`, doing a copy, a kernel, and a copy, will
     // not necessarily happen in that order by default, which we need to avoid.
-    auto* queue = new sycl::queue{device, detail::get_property_list(property)};
-    queue_ = std::move(queue_manager<sycl::queue>{queue, detail::delete_queue});
+    auto* queue =
+        new ::sycl::queue{device, detail::get_property_list(property)};
+    queue_ =
+        std::move(queue_manager<::sycl::queue>{queue, detail::delete_queue});
 }
 
 
@@ -320,7 +322,7 @@ namespace kernels {
 namespace sycl {
 
 
-void destroy_event(sycl::event* event) { delete event; }
+void destroy_event(::sycl::event* event) { delete event; }
 
 
 }  // namespace sycl
diff --git a/dpcpp/base/helper.dp.cpp b/dpcpp/base/helper.dp.cpp
index e43fe3fd6b4..7ec4ce3da00 100644
--- a/dpcpp/base/helper.dp.cpp
+++ b/dpcpp/base/helper.dp.cpp
@@ -41,14 +41,14 @@ namespace kernels {
 namespace sycl {
 
 
-bool validate(sycl::queue* queue, unsigned int workgroup_size,
+bool validate(::sycl::queue* queue, unsigned int workgroup_size,
               unsigned int subgroup_size)
 {
     auto device = queue->get_device();
     auto subgroup_size_list =
-        device.get_info<sycl::info::device::sub_group_sizes>();
+        device.get_info<::sycl::info::device::sub_group_sizes>();
     auto max_workgroup_size =
-        device.get_info<sycl::info::device::max_work_group_size>();
+        device.get_info<::sycl::info::device::max_work_group_size>();
     bool allowed = false;
     for (auto& i : subgroup_size_list) {
         allowed |= (i == subgroup_size);
diff --git a/dpcpp/base/helper.hpp b/dpcpp/base/helper.hpp
index edc7695c296..830573db6c3 100644
--- a/dpcpp/base/helper.hpp
+++ b/dpcpp/base/helper.hpp
@@ -56,19 +56,19 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  * @param name_  the name of the host function with config
  * @param kernel_  the kernel name
  */
-#define GKO_ENABLE_DEFAULT_HOST(name_, kernel_)                           \
-    template <typename... InferredArgs>                                   \
-    void name_(dim3 grid, dim3 block, gko::size_type, sycl::queue* queue, \
-               InferredArgs... args)                                      \
-    {                                                                     \
-        queue->submit([&](sycl::handler& cgh) {                           \
-            cgh.parallel_for(                                             \
-                sycl_nd_range(grid, block),                               \
-                [=](sycl::nd_item<3> item_ct1)                            \
-                    [[sycl::reqd_sub_group_size(config::warp_size)]] {    \
-                        kernel_(args..., item_ct1);                       \
-                    });                                                   \
-        });                                                               \
+#define GKO_ENABLE_DEFAULT_HOST(name_, kernel_)                             \
+    template <typename... InferredArgs>                                     \
+    void name_(dim3 grid, dim3 block, gko::size_type, ::sycl::queue* queue, \
+               InferredArgs... args)                                        \
+    {                                                                       \
+        queue->submit([&](::sycl::handler& cgh) {                           \
+            cgh.parallel_for(                                               \
+                sycl_nd_range(grid, block),                                 \
+                [=](::sycl::nd_item<3> item_ct1)                            \
+                    [[sycl::reqd_sub_group_size(config::warp_size)]] {      \
+                        kernel_(args..., item_ct1);                         \
+                    });                                                     \
+        });                                                                 \
     }
 
 
@@ -83,40 +83,40 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_ENABLE_DEFAULT_HOST_CONFIG(name_, kernel_)                        \
     template <std::uint32_t encoded, typename... InferredArgs>                \
     inline void name_(dim3 grid, dim3 block, gko::size_type,                  \
-                      sycl::queue* queue, InferredArgs... args)               \
+                      ::sycl::queue* queue, InferredArgs... args)             \
     {                                                                         \
-        queue->submit([&](sycl::handler& cgh) {                               \
+        queue->submit([&](::sycl::handler& cgh) {                             \
             if constexpr (DCFG_1D::decode<1>(encoded) > 1) {                  \
                 cgh.parallel_for(sycl_nd_range(grid, block),                  \
-                                 [=](sycl::nd_item<3> item_ct1)               \
+                                 [=](::sycl::nd_item<3> item_ct1)             \
                                      [[sycl::reqd_sub_group_size(             \
                                          DCFG_1D::decode<1>(encoded))]] {     \
                                          kernel_<encoded>(args..., item_ct1); \
                                      });                                      \
             } else {                                                          \
                 cgh.parallel_for(sycl_nd_range(grid, block),                  \
-                                 [=](sycl::nd_item<3> item_ct1) {             \
+                                 [=](::sycl::nd_item<3> item_ct1) {           \
                                      kernel_<encoded>(args..., item_ct1);     \
                                  });                                          \
             }                                                                 \
         });                                                                   \
     }
 
-#define GKO_ENABLE_DEFAULT_HOST_CONFIG_TYPE(name_, kernel_)                 \
-    template <typename DeviceConfig, typename... InferredArgs>              \
-    inline void name_(dim3 grid, dim3 block, gko::size_type,                \
-                      sycl::queue* queue, InferredArgs... args)             \
-    {                                                                       \
-        queue->submit([&](sycl::handler& cgh) {                             \
-            cgh.parallel_for(                                               \
-                sycl_nd_range(grid, block),                                 \
-                [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( \
-                    DeviceConfig::                                          \
-                        subgroup_size)]] __WG_BOUND__(DeviceConfig::        \
-                                                          block_size) {     \
-                    kernel_<DeviceConfig>(args..., item_ct1);               \
-                });                                                         \
-        });                                                                 \
+#define GKO_ENABLE_DEFAULT_HOST_CONFIG_TYPE(name_, kernel_)                   \
+    template <typename DeviceConfig, typename... InferredArgs>                \
+    inline void name_(dim3 grid, dim3 block, gko::size_type,                  \
+                      ::sycl::queue* queue, InferredArgs... args)             \
+    {                                                                         \
+        queue->submit([&](::sycl::handler& cgh) {                             \
+            cgh.parallel_for(                                                 \
+                sycl_nd_range(grid, block),                                   \
+                [=](::sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size( \
+                    DeviceConfig::                                            \
+                        subgroup_size)]] __WG_BOUND__(DeviceConfig::          \
+                                                          block_size) {       \
+                    kernel_<DeviceConfig>(args..., item_ct1);                 \
+                });                                                           \
+        });                                                                   \
     }
 
 /**
@@ -133,7 +133,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #define GKO_ENABLE_DEFAULT_CONFIG_CALL(name_, callable_, list_)               \
     template <typename... InferredArgs>                                       \
     void name_(std::uint32_t desired_cfg, dim3 grid, dim3 block,              \
-               gko::size_type dynamic_shared_memory, sycl::queue* queue,      \
+               gko::size_type dynamic_shared_memory, ::sycl::queue* queue,    \
                InferredArgs... args)                                          \
     {                                                                         \
         callable_(                                                            \
@@ -195,7 +195,7 @@ namespace sycl {
  *
  * @return the given arguments are valid or not in given queue.
  */
-bool validate(sycl::queue* queue, unsigned workgroup_size,
+bool validate(::sycl::queue* queue, unsigned workgroup_size,
               unsigned subgroup_size);
 
 
diff --git a/dpcpp/base/kernel_launch.dp.hpp b/dpcpp/base/kernel_launch.dp.hpp
index 744f086dbc8..0141b38037d 100644
--- a/dpcpp/base/kernel_launch.dp.hpp
+++ b/dpcpp/base/kernel_launch.dp.hpp
@@ -51,11 +51,11 @@ namespace device_std = std;
 
 
 template <typename KernelFunction, typename... KernelArgs>
-void generic_kernel_1d(sycl::handler& cgh, int64 size, KernelFunction fn,
+void generic_kernel_1d(::sycl::handler& cgh, int64 size, KernelFunction fn,
                        KernelArgs... args)
 {
-    cgh.parallel_for(sycl::range<1>{static_cast<std::size_t>(size)},
-                     [=](sycl::id<1> idx_id) {
+    cgh.parallel_for(::sycl::range<1>{static_cast<std::size_t>(size)},
+                     [=](::sycl::id<1> idx_id) {
                          auto idx = static_cast<int64>(idx_id[0]);
                          fn(idx, args...);
                      });
@@ -63,11 +63,11 @@ void generic_kernel_1d(sycl::handler& cgh, int64 size, KernelFunction fn,
 
 
 template <typename KernelFunction, typename... KernelArgs>
-void generic_kernel_2d(sycl::handler& cgh, int64 rows, int64 cols,
+void generic_kernel_2d(::sycl::handler& cgh, int64 rows, int64 cols,
                        KernelFunction fn, KernelArgs... args)
 {
-    cgh.parallel_for(sycl::range<1>{static_cast<std::size_t>(rows * cols)},
-                     [=](sycl::id<1> idx) {
+    cgh.parallel_for(::sycl::range<1>{static_cast<std::size_t>(rows * cols)},
+                     [=](::sycl::id<1> idx) {
                          auto row = static_cast<int64>(idx[0]) / cols;
                          auto col = static_cast<int64>(idx[0]) % cols;
                          fn(row, col, args...);
@@ -79,7 +79,7 @@ template <typename KernelFunction, typename... KernelArgs>
 void run_kernel(std::shared_ptr<const SyclExecutor> exec, KernelFunction fn,
                 size_type size, KernelArgs&&... args)
 {
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
         generic_kernel_1d(cgh, static_cast<int64>(size), fn,
                           map_to_device(args)...);
     });
@@ -89,7 +89,7 @@ template <typename KernelFunction, typename... KernelArgs>
 void run_kernel(std::shared_ptr<const SyclExecutor> exec, KernelFunction fn,
                 dim<2> size, KernelArgs&&... args)
 {
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
         generic_kernel_2d(cgh, static_cast<int64>(size[0]),
                           static_cast<int64>(size[1]), fn,
                           map_to_device(args)...);
diff --git a/dpcpp/base/kernel_launch_reduction.dp.hpp b/dpcpp/base/kernel_launch_reduction.dp.hpp
index 713fc02a34f..db5f29d7ec5 100644
--- a/dpcpp/base/kernel_launch_reduction.dp.hpp
+++ b/dpcpp/base/kernel_launch_reduction.dp.hpp
@@ -60,7 +60,7 @@ static constexpr auto dcfg_1d_list_simple_reduction = dcfg_1d_list_t();
 template <typename DeviceConfig, typename ValueType, typename KernelFunction,
           typename ReductionOp, typename FinalizeOp,
           typename... MappedKernelArgs>
-void generic_kernel_reduction_1d(sycl::handler& cgh, int64 size,
+void generic_kernel_reduction_1d(::sycl::handler& cgh, int64 size,
                                  int64 num_workgroups, KernelFunction fn,
                                  ReductionOp op, FinalizeOp finalize,
                                  ValueType identity, ValueType* storage,
@@ -69,15 +69,16 @@ void generic_kernel_reduction_1d(sycl::handler& cgh, int64 size,
     constexpr auto wg_size = DeviceConfig::block_size;
     constexpr auto sg_size = DeviceConfig::subgroup_size;
     constexpr auto num_partials = wg_size / sg_size;
-    sycl::accessor<uninitialized_array<ValueType, num_partials>, 0,
-                   sycl::access_mode::read_write, sycl::access::target::local>
+    ::sycl::accessor<uninitialized_array<ValueType, num_partials>, 0,
+                     ::sycl::access_mode::read_write,
+                     ::sycl::access::target::local>
         subgroup_partial_acc(cgh);
     const auto range = sycl_nd_range(dim3(num_workgroups), dim3(wg_size));
     const auto global_size = num_workgroups * wg_size;
 
     cgh.parallel_for(
         range,
-        [=](sycl::nd_item<3> idx) [[sycl::reqd_sub_group_size(sg_size)]] {
+        [=](::sycl::nd_item<3> idx) [[sycl::reqd_sub_group_size(sg_size)]] {
             auto subgroup_partial = &(*subgroup_partial_acc.get_pointer())[0];
             const auto tidx = thread::get_thread_id_flat<int64>(idx);
             const auto local_tidx = static_cast<int64>(tidx % wg_size);
@@ -91,7 +92,7 @@ void generic_kernel_reduction_1d(sycl::handler& cgh, int64 size,
             if (subgroup.thread_rank() == 0) {
                 subgroup_partial[local_tidx / sg_size] = partial;
             }
-            idx.barrier(sycl::access::fence_space::local_space);
+            idx.barrier(::sycl::access::fence_space::local_space);
             if (local_tidx < sg_size) {
                 partial = identity;
                 for (int64 i = local_tidx; i < num_partials; i += sg_size) {
@@ -109,7 +110,7 @@ void generic_kernel_reduction_1d(sycl::handler& cgh, int64 size,
 template <typename DeviceConfig, typename ValueType, typename KernelFunction,
           typename ReductionOp, typename FinalizeOp,
           typename... MappedKernelArgs>
-void generic_kernel_reduction_2d(sycl::handler& cgh, int64 rows, int64 cols,
+void generic_kernel_reduction_2d(::sycl::handler& cgh, int64 rows, int64 cols,
                                  int64 num_workgroups, KernelFunction fn,
                                  ReductionOp op, FinalizeOp finalize,
                                  ValueType identity, ValueType* storage,
@@ -118,15 +119,16 @@ void generic_kernel_reduction_2d(sycl::handler& cgh, int64 rows, int64 cols,
     constexpr auto wg_size = DeviceConfig::block_size;
     constexpr auto sg_size = DeviceConfig::subgroup_size;
     constexpr auto num_partials = wg_size / sg_size;
-    sycl::accessor<uninitialized_array<ValueType, num_partials>, 0,
-                   sycl::access_mode::read_write, sycl::access::target::local>
+    ::sycl::accessor<uninitialized_array<ValueType, num_partials>, 0,
+                     ::sycl::access_mode::read_write,
+                     ::sycl::access::target::local>
         subgroup_partial_acc(cgh);
     const auto range = sycl_nd_range(dim3(num_workgroups), dim3(wg_size));
     const auto global_size = num_workgroups * wg_size;
 
     cgh.parallel_for(
         range,
-        [=](sycl::nd_item<3> idx) [[sycl::reqd_sub_group_size(sg_size)]] {
+        [=](::sycl::nd_item<3> idx) [[sycl::reqd_sub_group_size(sg_size)]] {
             auto subgroup_partial = &(*subgroup_partial_acc.get_pointer())[0];
             const auto tidx = thread::get_thread_id_flat<int64>(idx);
             const auto local_tidx = static_cast<int64>(tidx % wg_size);
@@ -142,7 +144,7 @@ void generic_kernel_reduction_2d(sycl::handler& cgh, int64 rows, int64 cols,
             if (subgroup.thread_rank() == 0) {
                 subgroup_partial[local_tidx / sg_size] = partial;
             }
-            idx.barrier(sycl::access::fence_space::local_space);
+            idx.barrier(::sycl::access::fence_space::local_space);
             if (local_tidx < sg_size) {
                 partial = identity;
                 for (int64 i = local_tidx; i < num_partials; i += sg_size) {
@@ -178,13 +180,13 @@ void run_kernel_reduction_impl(std::shared_ptr<const SyclExecutor> exec,
         if (tmp.get_num_elems() < required_storage) {
             tmp.resize_and_reset(required_storage);
         }
-        queue->submit([&](sycl::handler& cgh) {
+        queue->submit([&](::sycl::handler& cgh) {
             generic_kernel_reduction_1d<DeviceConfig>(
                 cgh, static_cast<int64>(size), num_workgroups, fn, op,
                 [](auto v) { return v; }, identity,
                 reinterpret_cast<ValueType*>(tmp.get_data()), args...);
         });
-        queue->submit([&](sycl::handler& cgh) {
+        queue->submit([&](::sycl::handler& cgh) {
             generic_kernel_reduction_1d<DeviceConfig>(
                 cgh, static_cast<int64>(num_workgroups), 1,
                 [](auto i, auto v) { return v[i]; }, op, finalize, identity,
@@ -192,7 +194,7 @@ void run_kernel_reduction_impl(std::shared_ptr<const SyclExecutor> exec,
                 reinterpret_cast<const ValueType*>(tmp.get_const_data()));
         });
     } else {
-        queue->submit([&](sycl::handler& cgh) {
+        queue->submit([&](::sycl::handler& cgh) {
             generic_kernel_reduction_1d<DeviceConfig>(
                 cgh, static_cast<int64>(size), 1, fn, op, finalize, identity,
                 result, args...);
@@ -225,13 +227,13 @@ void run_kernel_reduction_impl(std::shared_ptr<const SyclExecutor> exec,
         if (tmp.get_num_elems() < required_storage) {
             tmp.resize_and_reset(required_storage);
         }
-        queue->submit([&](sycl::handler& cgh) {
+        queue->submit([&](::sycl::handler& cgh) {
             generic_kernel_reduction_2d<DeviceConfig>(
                 cgh, rows, cols, num_workgroups, fn, op,
                 [](auto v) { return v; }, identity,
                 reinterpret_cast<ValueType*>(tmp.get_data()), args...);
         });
-        queue->submit([&](sycl::handler& cgh) {
+        queue->submit([&](::sycl::handler& cgh) {
             generic_kernel_reduction_1d<DeviceConfig>(
                 cgh, static_cast<int64>(num_workgroups), 1,
                 [](auto i, auto v) { return v[i]; }, op, finalize, identity,
@@ -239,7 +241,7 @@ void run_kernel_reduction_impl(std::shared_ptr<const SyclExecutor> exec,
                 reinterpret_cast<const ValueType*>(tmp.get_const_data()));
         });
     } else {
-        queue->submit([&](sycl::handler& cgh) {
+        queue->submit([&](::sycl::handler& cgh) {
             generic_kernel_reduction_2d<DeviceConfig>(cgh, rows, cols, 1, fn,
                                                       op, finalize, identity,
                                                       result, args...);
@@ -315,10 +317,10 @@ void generic_kernel_row_reduction_2d(syn::value_list<int, ssg_size>,
     static_assert(ssg_size <= sg_size, "ssg must be smaller than sg");
     const auto num_workgroups = ceildiv(rows * col_blocks * ssg_size, wg_size);
     const auto range = sycl_nd_range(dim3(num_workgroups), dim3(wg_size));
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
             range,
-            [=](sycl::nd_item<3> id) [[sycl::reqd_sub_group_size(sg_size)]] {
+            [=](::sycl::nd_item<3> id) [[sycl::reqd_sub_group_size(sg_size)]] {
                 const auto idx =
                     thread::get_subwarp_id_flat<ssg_size, int64>(id);
                 const auto row = idx % rows;
@@ -359,7 +361,7 @@ template <typename cfg, int ssg_size, typename ValueType,
           typename KernelFunction, typename ReductionOp, typename FinalizeOp,
           typename... MappedKernelArgs>
 void generic_kernel_col_reduction_2d_small(
-    sycl::handler& cgh, int64 rows, int64 cols, int64 row_blocks,
+    ::sycl::handler& cgh, int64 rows, int64 cols, int64 row_blocks,
     KernelFunction fn, ReductionOp op, FinalizeOp finalize, ValueType identity,
     ValueType* result, MappedKernelArgs... args)
 {
@@ -369,12 +371,14 @@ void generic_kernel_col_reduction_2d_small(
     constexpr auto subgroups_per_workgroup = wg_size / sg_size;
     // stores the subwarp_size partial sums from each warp, grouped by warp
     constexpr auto shared_storage = subgroups_per_workgroup * ssg_size;
-    sycl::accessor<uninitialized_array<ValueType, shared_storage>, 0,
-                   sycl::access_mode::read_write, sycl::access::target::local>
+    ::sycl::accessor<uninitialized_array<ValueType, shared_storage>, 0,
+                     ::sycl::access_mode::read_write,
+                     ::sycl::access::target::local>
         block_partial_acc(cgh);
     const auto range = sycl_nd_range(dim3(row_blocks), dim3(wg_size));
     cgh.parallel_for(
-        range, [=](sycl::nd_item<3> id) [[sycl::reqd_sub_group_size(sg_size)]] {
+        range,
+        [=](::sycl::nd_item<3> id) [[sycl::reqd_sub_group_size(sg_size)]] {
             auto block_partial = &(*block_partial_acc.get_pointer())[0];
             const auto ssg_id =
                 thread::get_subwarp_id_flat<ssg_size, int64>(id);
@@ -434,7 +438,7 @@ template <typename cfg, typename ValueType, typename KernelFunction,
           typename ReductionOp, typename FinalizeOp,
           typename... MappedKernelArgs>
 void generic_kernel_col_reduction_2d_blocked(
-    sycl::handler& cgh, int64 rows, int64 cols, int64 row_blocks,
+    ::sycl::handler& cgh, int64 rows, int64 cols, int64 row_blocks,
     int64 col_blocks, KernelFunction fn, ReductionOp op, FinalizeOp finalize,
     ValueType identity, ValueType* result, MappedKernelArgs... args)
 {
@@ -442,11 +446,13 @@ void generic_kernel_col_reduction_2d_blocked(
     constexpr auto sg_size = cfg::subgroup_size;
     const auto range =
         sycl_nd_range(dim3(row_blocks, col_blocks), dim3(wg_size));
-    sycl::accessor<uninitialized_array<ValueType, wg_size>, 0,
-                   sycl::access_mode::read_write, sycl::access::target::local>
+    ::sycl::accessor<uninitialized_array<ValueType, wg_size>, 0,
+                     ::sycl::access_mode::read_write,
+                     ::sycl::access::target::local>
         block_partial_acc(cgh);
     cgh.parallel_for(
-        range, [=](sycl::nd_item<3> id) [[sycl::reqd_sub_group_size(sg_size)]] {
+        range,
+        [=](::sycl::nd_item<3> id) [[sycl::reqd_sub_group_size(sg_size)]] {
             const auto sg_id = thread::get_subwarp_id_flat<sg_size, int64>(id);
             const auto sg_num =
                 thread::get_subwarp_num_flat<sg_size, int64>(id);
@@ -483,12 +489,12 @@ void generic_kernel_col_reduction_2d_blocked(
 
 template <typename ValueType, typename ReductionOp, typename FinalizeOp>
 void generic_kernel_reduction_finalize_2d(
-    sycl::handler& cgh, int64 num_results, int64 num_blocks, ReductionOp op,
+    ::sycl::handler& cgh, int64 num_results, int64 num_blocks, ReductionOp op,
     FinalizeOp finalize, ValueType identity, const ValueType* input,
     int64 result_stride, ValueType* result)
 {
-    cgh.parallel_for(sycl::range<1>{static_cast<std::size_t>(num_results)},
-                     [=](sycl::id<1> id) {
+    cgh.parallel_for(::sycl::range<1>{static_cast<std::size_t>(num_results)},
+                     [=](::sycl::id<1> id) {
                          auto partial = identity;
                          for (int64 block = 0; block < num_blocks; block++) {
                              partial = op(partial,
@@ -519,7 +525,7 @@ void run_generic_col_reduction_small(syn::value_list<int, ssg_size>,
         std::min<int64>(ceildiv(rows * ssg_size, wg_size), max_workgroups);
     auto queue = exec->get_queue();
     if (row_blocks <= 1) {
-        queue->submit([&](sycl::handler& cgh) {
+        queue->submit([&](::sycl::handler& cgh) {
             generic_kernel_col_reduction_2d_small<cfg, ssg_size>(
                 cgh, rows, cols, 1, fn, op, finalize, identity, result,
                 args...);
@@ -529,13 +535,13 @@ void run_generic_col_reduction_small(syn::value_list<int, ssg_size>,
         if (tmp.get_num_elems() < required_storage) {
             tmp.resize_and_reset(required_storage);
         }
-        queue->submit([&](sycl::handler& cgh) {
+        queue->submit([&](::sycl::handler& cgh) {
             generic_kernel_col_reduction_2d_small<cfg, ssg_size>(
                 cgh, rows, cols, row_blocks, fn, op, [](auto v) { return v; },
                 identity, reinterpret_cast<ValueType*>(tmp.get_data()),
                 args...);
         });
-        queue->submit([&](sycl::handler& cgh) {
+        queue->submit([&](::sycl::handler& cgh) {
             generic_kernel_reduction_finalize_2d(
                 cgh, cols, row_blocks, op, finalize, identity,
                 reinterpret_cast<const ValueType*>(tmp.get_const_data()), 1,
@@ -579,7 +585,7 @@ void run_kernel_row_reduction_stage1(std::shared_ptr<const SyclExecutor> exec,
             syn::value_list<int, sg_size>{}, exec, rows, cols, col_blocks, fn,
             op, [](auto v) { return v; }, identity,
             reinterpret_cast<ValueType*>(tmp.get_data()), 1, args...);
-        queue->submit([&](sycl::handler& cgh) {
+        queue->submit([&](::sycl::handler& cgh) {
             generic_kernel_reduction_finalize_2d(
                 cgh, rows, col_blocks, op, finalize, identity,
                 reinterpret_cast<const ValueType*>(tmp.get_const_data()),
@@ -638,7 +644,7 @@ void run_kernel_col_reduction_stage1(std::shared_ptr<const SyclExecutor> exec,
             col_blocks);
         auto queue = exec->get_queue();
         if (row_blocks <= 1) {
-            queue->submit([&](sycl::handler& cgh) {
+            queue->submit([&](::sycl::handler& cgh) {
                 generic_kernel_col_reduction_2d_blocked<cfg>(
                     cgh, rows, cols, 1, col_blocks, fn, op, finalize, identity,
                     result, args...);
@@ -648,13 +654,13 @@ void run_kernel_col_reduction_stage1(std::shared_ptr<const SyclExecutor> exec,
             if (tmp.get_num_elems() < required_storage) {
                 tmp.resize_and_reset(required_storage);
             }
-            queue->submit([&](sycl::handler& cgh) {
+            queue->submit([&](::sycl::handler& cgh) {
                 generic_kernel_col_reduction_2d_blocked<cfg>(
                     cgh, rows, cols, row_blocks, col_blocks, fn, op,
                     [](auto v) { return v; }, identity,
                     reinterpret_cast<ValueType*>(tmp.get_data()), args...);
             });
-            queue->submit([&](sycl::handler& cgh) {
+            queue->submit([&](::sycl::handler& cgh) {
                 generic_kernel_reduction_finalize_2d(
                     cgh, cols, row_blocks, op, finalize, identity,
                     reinterpret_cast<const ValueType*>(tmp.get_const_data()), 1,
diff --git a/dpcpp/base/kernel_launch_solver.dp.hpp b/dpcpp/base/kernel_launch_solver.dp.hpp
index 2c41fd5c3e6..088ac90ae7a 100644
--- a/dpcpp/base/kernel_launch_solver.dp.hpp
+++ b/dpcpp/base/kernel_launch_solver.dp.hpp
@@ -42,12 +42,12 @@ namespace sycl {
 
 
 template <typename KernelFunction, typename... KernelArgs>
-void generic_kernel_2d_solver(sycl::handler& cgh, int64 rows, int64 cols,
+void generic_kernel_2d_solver(::sycl::handler& cgh, int64 rows, int64 cols,
                               int64 default_stride, KernelFunction fn,
                               KernelArgs... args)
 {
-    cgh.parallel_for(sycl::range<1>{static_cast<std::size_t>(rows * cols)},
-                     [=](sycl::id<1> idx) {
+    cgh.parallel_for(::sycl::range<1>{static_cast<std::size_t>(rows * cols)},
+                     [=](::sycl::id<1> idx) {
                          auto row = static_cast<int64>(idx[0] / cols);
                          auto col = static_cast<int64>(idx[0] % cols);
                          fn(row, col,
@@ -62,7 +62,7 @@ void run_kernel_solver(std::shared_ptr<const SyclExecutor> exec,
                        KernelFunction fn, dim<2> size, size_type default_stride,
                        KernelArgs&&... args)
 {
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
         kernels::sycl::generic_kernel_2d_solver(
             cgh, static_cast<int64>(size[0]), static_cast<int64>(size[1]),
             static_cast<int64>(default_stride), fn,
diff --git a/dpcpp/base/onemkl_bindings.hpp b/dpcpp/base/onemkl_bindings.hpp
index 0d0e9e70827..50ecc6f1768 100644
--- a/dpcpp/base/onemkl_bindings.hpp
+++ b/dpcpp/base/onemkl_bindings.hpp
@@ -95,7 +95,7 @@ struct is_supported<std::complex<double>> : std::true_type {};
 
 
 #define GKO_BIND_DOT(ValueType, Name, Func)                                    \
-    inline void Name(sycl::queue& exec_queue, std::int64_t n,                  \
+    inline void Name(::sycl::queue& exec_queue, std::int64_t n,                \
                      const ValueType* x, std::int64_t incx,                    \
                      const ValueType* y, std::int64_t incy, ValueType* result) \
     {                                                                          \
diff --git a/dpcpp/base/timer.dp.cpp b/dpcpp/base/timer.dp.cpp
index 874bb82dba2..96b2af24d43 100644
--- a/dpcpp/base/timer.dp.cpp
+++ b/dpcpp/base/timer.dp.cpp
@@ -56,7 +56,7 @@ SyclTimer::SyclTimer(std::shared_ptr<const SyclExecutor> exec)
 void SyclTimer::init_time_point(time_point& time)
 {
     time.type_ = time_point::type::sycl;
-    time.data_.sycl_event = new sycl::event{};
+    time.data_.sycl_event = new ::sycl::event{};
 }
 
 
@@ -64,8 +64,8 @@ void SyclTimer::record(time_point& time)
 {
     GKO_ASSERT(time.type_ == time_point::type::sycl);
     *time.data_.sycl_event =
-        exec_->get_queue()->submit([&](sycl::handler& cgh) {
-            cgh.parallel_for(1, [=](sycl::id<1> id) {});
+        exec_->get_queue()->submit([&](::sycl::handler& cgh) {
+            cgh.parallel_for(1, [=](::sycl::id<1> id) {});
         });
 }
 
@@ -83,12 +83,11 @@ std::chrono::nanoseconds SyclTimer::difference_async(const time_point& start,
     GKO_ASSERT(start.type_ == time_point::type::sycl);
     GKO_ASSERT(stop.type_ == time_point::type::sycl);
     stop.data_.sycl_event->wait_and_throw();
-    auto stop_time =
-        stop.data_.sycl_event
-            ->get_profiling_info<sycl::info::event_profiling::command_start>();
+    auto stop_time = stop.data_.sycl_event->get_profiling_info<
+        ::sycl::info::event_profiling::command_start>();
     auto start_time =
         start.data_.sycl_event
-            ->get_profiling_info<sycl::info::event_profiling::command_end>();
+            ->get_profiling_info<::sycl::info::event_profiling::command_end>();
     return std::chrono::nanoseconds{static_cast<int64>(stop_time - start_time)};
 }
 
diff --git a/dpcpp/components/atomic.dp.hpp b/dpcpp/components/atomic.dp.hpp
index 6fb793cf6a8..b7922875076 100644
--- a/dpcpp/components/atomic.dp.hpp
+++ b/dpcpp/components/atomic.dp.hpp
@@ -49,8 +49,8 @@ namespace sycl {
 namespace atomic {
 
 
-constexpr auto local_space = sycl::access::address_space::local_space;
-constexpr auto global_space = sycl::access::address_space::global_space;
+constexpr auto local_space = ::sycl::access::address_space::local_space;
+constexpr auto global_space = ::sycl::access::address_space::global_space;
 
 
 }  // namespace atomic
@@ -58,50 +58,52 @@ constexpr auto global_space = sycl::access::address_space::global_space;
 namespace {
 
 
-template <sycl::access::address_space addressSpace = atomic::global_space,
+template <::sycl::access::address_space addressSpace = atomic::global_space,
           typename T>
 T atomic_compare_exchange_strong(
-    sycl::multi_ptr<T, addressSpace> addr, T expected, T desired,
-    sycl::memory_order success = sycl::memory_order::relaxed,
-    sycl::memory_order fail = sycl::memory_order::relaxed)
+    ::sycl::multi_ptr<T, addressSpace> addr, T expected, T desired,
+    ::sycl::memory_order success = ::sycl::memory_order::relaxed,
+    ::sycl::memory_order fail = ::sycl::memory_order::relaxed)
 {
-    sycl::atomic<T, addressSpace> obj(addr);
+    ::sycl::atomic<T, addressSpace> obj(addr);
     obj.compare_exchange_strong(expected, desired, success, fail);
     return expected;
 }
 
-template <sycl::access::address_space addressSpace = atomic::global_space,
+template <::sycl::access::address_space addressSpace = atomic::global_space,
           typename T>
 T atomic_compare_exchange_strong(
     T* addr, T expected, T desired,
-    sycl::memory_order success = sycl::memory_order::relaxed,
-    sycl::memory_order fail = sycl::memory_order::relaxed)
+    ::sycl::memory_order success = ::sycl::memory_order::relaxed,
+    ::sycl::memory_order fail = ::sycl::memory_order::relaxed)
 {
     return atomic_compare_exchange_strong(
-        sycl::multi_ptr<T, addressSpace>(addr), expected, desired, success,
+        ::sycl::multi_ptr<T, addressSpace>(addr), expected, desired, success,
         fail);
 }
 
 
-template <sycl::access::address_space addressSpace = atomic::global_space,
+template <::sycl::access::address_space addressSpace = atomic::global_space,
           typename T>
 inline T atomic_fetch_add(
     T* addr, T operand,
-    sycl::memory_order memoryOrder = sycl::memory_order::relaxed)
+    ::sycl::memory_order memoryOrder = ::sycl::memory_order::relaxed)
 {
-    sycl::atomic<T, addressSpace> obj((sycl::multi_ptr<T, addressSpace>(addr)));
-    return sycl::atomic_fetch_add(obj, operand, memoryOrder);
+    ::sycl::atomic<T, addressSpace> obj(
+        (::sycl::multi_ptr<T, addressSpace>(addr)));
+    return ::sycl::atomic_fetch_add(obj, operand, memoryOrder);
 }
 
 
-template <sycl::access::address_space addressSpace = atomic::global_space,
+template <::sycl::access::address_space addressSpace = atomic::global_space,
           typename T>
 inline T atomic_fetch_max(
     T* addr, T operand,
-    sycl::memory_order memoryOrder = sycl::memory_order::relaxed)
+    ::sycl::memory_order memoryOrder = ::sycl::memory_order::relaxed)
 {
-    sycl::atomic<T, addressSpace> obj((sycl::multi_ptr<T, addressSpace>(addr)));
-    return sycl::atomic_fetch_max(obj, operand, memoryOrder);
+    ::sycl::atomic<T, addressSpace> obj(
+        (::sycl::multi_ptr<T, addressSpace>(addr)));
+    return ::sycl::atomic_fetch_max(obj, operand, memoryOrder);
 }
 
 
@@ -111,7 +113,7 @@ inline T atomic_fetch_max(
 namespace detail {
 
 
-template <sycl::access::address_space addressSpace, typename ValueType,
+template <::sycl::access::address_space addressSpace, typename ValueType,
           typename = void>
 struct atomic_helper {
     __dpct_inline__ static ValueType atomic_add(ValueType*, ValueType)
@@ -124,7 +126,7 @@ struct atomic_helper {
 };
 
 
-template <sycl::access::address_space addressSpace, typename ValueType,
+template <::sycl::access::address_space addressSpace, typename ValueType,
           typename = void>
 struct atomic_max_helper {
     __dpct_inline__ static ValueType atomic_max(ValueType*, ValueType)
@@ -147,27 +149,27 @@ __dpct_inline__ ResultType reinterpret(ValueType val)
 }
 
 
-#define GKO_BIND_ATOMIC_HELPER_STRUCTURE(CONVERTER_TYPE)                    \
-    template <sycl::access::address_space addressSpace, typename ValueType> \
-    struct atomic_helper<                                                   \
-        addressSpace, ValueType,                                            \
-        std::enable_if_t<(sizeof(ValueType) == sizeof(CONVERTER_TYPE))>> {  \
-        __dpct_inline__ static ValueType atomic_add(                        \
-            ValueType* __restrict__ addr, ValueType val)                    \
-        {                                                                   \
-            CONVERTER_TYPE* address_as_converter =                          \
-                reinterpret_cast<CONVERTER_TYPE*>(addr);                    \
-            CONVERTER_TYPE old = *address_as_converter;                     \
-            CONVERTER_TYPE assumed;                                         \
-            do {                                                            \
-                assumed = old;                                              \
-                old = atomic_compare_exchange_strong<addressSpace>(         \
-                    address_as_converter, assumed,                          \
-                    reinterpret<CONVERTER_TYPE>(                            \
-                        val + reinterpret<ValueType>(assumed)));            \
-            } while (assumed != old);                                       \
-            return reinterpret<ValueType>(old);                             \
-        }                                                                   \
+#define GKO_BIND_ATOMIC_HELPER_STRUCTURE(CONVERTER_TYPE)                      \
+    template <::sycl::access::address_space addressSpace, typename ValueType> \
+    struct atomic_helper<                                                     \
+        addressSpace, ValueType,                                              \
+        std::enable_if_t<(sizeof(ValueType) == sizeof(CONVERTER_TYPE))>> {    \
+        __dpct_inline__ static ValueType atomic_add(                          \
+            ValueType* __restrict__ addr, ValueType val)                      \
+        {                                                                     \
+            CONVERTER_TYPE* address_as_converter =                            \
+                reinterpret_cast<CONVERTER_TYPE*>(addr);                      \
+            CONVERTER_TYPE old = *address_as_converter;                       \
+            CONVERTER_TYPE assumed;                                           \
+            do {                                                              \
+                assumed = old;                                                \
+                old = atomic_compare_exchange_strong<addressSpace>(           \
+                    address_as_converter, assumed,                            \
+                    reinterpret<CONVERTER_TYPE>(                              \
+                        val + reinterpret<ValueType>(assumed)));              \
+            } while (assumed != old);                                         \
+            return reinterpret<ValueType>(old);                               \
+        }                                                                     \
     };
 
 // Support 64-bit ATOMIC_ADD
@@ -179,7 +181,7 @@ GKO_BIND_ATOMIC_HELPER_STRUCTURE(unsigned int);
 #undef GKO_BIND_ATOMIC_HELPER_STRUCTURE
 
 #define GKO_BIND_ATOMIC_HELPER_VALUETYPE(ValueType)                         \
-    template <sycl::access::address_space addressSpace>                     \
+    template <::sycl::access::address_space addressSpace>                   \
     struct atomic_helper<addressSpace, ValueType, std::enable_if_t<true>> { \
         __dpct_inline__ static ValueType atomic_add(                        \
             ValueType* __restrict__ addr, ValueType val)                    \
@@ -195,7 +197,7 @@ GKO_BIND_ATOMIC_HELPER_VALUETYPE(unsigned long long int);
 #undef GKO_BIND_ATOMIC_HELPER_VALUETYPE
 
 
-template <sycl::access::address_space addressSpace, typename ValueType>
+template <::sycl::access::address_space addressSpace, typename ValueType>
 struct atomic_helper<
     addressSpace, ValueType,
     std::enable_if_t<is_complex<ValueType>() && sizeof(ValueType) >= 16>> {
@@ -214,28 +216,28 @@ struct atomic_helper<
 };
 
 
-#define GKO_BIND_ATOMIC_MAX_STRUCTURE(CONVERTER_TYPE)                       \
-    template <sycl::access::address_space addressSpace, typename ValueType> \
-    struct atomic_max_helper<                                               \
-        addressSpace, ValueType,                                            \
-        std::enable_if_t<(sizeof(ValueType) == sizeof(CONVERTER_TYPE))>> {  \
-        __dpct_inline__ static ValueType atomic_max(                        \
-            ValueType* __restrict__ addr, ValueType val)                    \
-        {                                                                   \
-            CONVERTER_TYPE* address_as_converter =                          \
-                reinterpret_cast<CONVERTER_TYPE*>(addr);                    \
-            CONVERTER_TYPE old = *address_as_converter;                     \
-            CONVERTER_TYPE assumed;                                         \
-            do {                                                            \
-                assumed = old;                                              \
-                if (reinterpret<ValueType>(assumed) < val) {                \
-                    old = atomic_compare_exchange_strong<addressSpace>(     \
-                        address_as_converter, assumed,                      \
-                        reinterpret<CONVERTER_TYPE>(val));                  \
-                }                                                           \
-            } while (assumed != old);                                       \
-            return reinterpret<ValueType>(old);                             \
-        }                                                                   \
+#define GKO_BIND_ATOMIC_MAX_STRUCTURE(CONVERTER_TYPE)                         \
+    template <::sycl::access::address_space addressSpace, typename ValueType> \
+    struct atomic_max_helper<                                                 \
+        addressSpace, ValueType,                                              \
+        std::enable_if_t<(sizeof(ValueType) == sizeof(CONVERTER_TYPE))>> {    \
+        __dpct_inline__ static ValueType atomic_max(                          \
+            ValueType* __restrict__ addr, ValueType val)                      \
+        {                                                                     \
+            CONVERTER_TYPE* address_as_converter =                            \
+                reinterpret_cast<CONVERTER_TYPE*>(addr);                      \
+            CONVERTER_TYPE old = *address_as_converter;                       \
+            CONVERTER_TYPE assumed;                                           \
+            do {                                                              \
+                assumed = old;                                                \
+                if (reinterpret<ValueType>(assumed) < val) {                  \
+                    old = atomic_compare_exchange_strong<addressSpace>(       \
+                        address_as_converter, assumed,                        \
+                        reinterpret<CONVERTER_TYPE>(val));                    \
+                }                                                             \
+            } while (assumed != old);                                         \
+            return reinterpret<ValueType>(old);                               \
+        }                                                                     \
     };
 
 // Support 64-bit ATOMIC_ADD
@@ -247,7 +249,7 @@ GKO_BIND_ATOMIC_MAX_STRUCTURE(unsigned int);
 #undef GKO_BIND_ATOMIC_MAX_STRUCTURE
 
 #define GKO_BIND_ATOMIC_MAX_VALUETYPE(ValueType)              \
-    template <sycl::access::address_space addressSpace>       \
+    template <::sycl::access::address_space addressSpace>     \
     struct atomic_max_helper<addressSpace, ValueType,         \
                              std::enable_if_t<true>> {        \
         __dpct_inline__ static ValueType atomic_max(          \
@@ -267,7 +269,7 @@ GKO_BIND_ATOMIC_MAX_VALUETYPE(unsigned long long int);
 }  // namespace detail
 
 
-template <sycl::access::address_space addressSpace = atomic::global_space,
+template <::sycl::access::address_space addressSpace = atomic::global_space,
           typename T>
 __dpct_inline__ T atomic_add(T* __restrict__ addr, T val)
 {
@@ -275,7 +277,7 @@ __dpct_inline__ T atomic_add(T* __restrict__ addr, T val)
 }
 
 
-template <sycl::access::address_space addressSpace = atomic::global_space,
+template <::sycl::access::address_space addressSpace = atomic::global_space,
           typename T>
 __dpct_inline__ T atomic_max(T* __restrict__ addr, T val)
 {
diff --git a/dpcpp/components/cooperative_groups.dp.hpp b/dpcpp/components/cooperative_groups.dp.hpp
index 435419bbdf4..0ab4ee5e55d 100644
--- a/dpcpp/components/cooperative_groups.dp.hpp
+++ b/dpcpp/components/cooperative_groups.dp.hpp
@@ -168,8 +168,8 @@ namespace detail {
  * This is a limited implementation of the SYCL thread_block_tile.
  */
 template <unsigned Size>
-class thread_block_tile : public sycl::sub_group {
-    using sub_group = sycl::sub_group;
+class thread_block_tile : public ::sycl::sub_group {
+    using sub_group = ::sycl::sub_group;
     using id_type = sub_group::id_type;
     using mask_type = config::lane_mask_type;
 
@@ -239,9 +239,9 @@ class thread_block_tile : public sycl::sub_group {
     __dpct_inline__ mask_type ballot(int predicate) const noexcept
     {
         // todo: change it when OneAPI update the mask related api
-        return sycl::reduce_over_group(
+        return ::sycl::reduce_over_group(
             *this, (predicate != 0) ? mask_type(1) << data_.rank : mask_type(0),
-            sycl::plus<mask_type>());
+            ::sycl::plus<mask_type>());
     }
 
     /**
@@ -250,7 +250,7 @@ class thread_block_tile : public sycl::sub_group {
      */
     __dpct_inline__ bool any(int predicate) const noexcept
     {
-        return sycl::any_of_group(*this, (predicate != 0));
+        return ::sycl::any_of_group(*this, (predicate != 0));
     }
 
     /**
@@ -259,7 +259,7 @@ class thread_block_tile : public sycl::sub_group {
      */
     __dpct_inline__ bool all(int predicate) const noexcept
     {
-        return sycl::all_of_group(*this, (predicate != 0));
+        return ::sycl::all_of_group(*this, (predicate != 0));
     }
 
 
@@ -390,7 +390,7 @@ struct is_communicator_group_impl<thread_block_tile<Size>> : std::true_type {};
 
 
 class thread_block {
-    friend __dpct_inline__ thread_block this_thread_block(sycl::nd_item<3>&);
+    friend __dpct_inline__ thread_block this_thread_block(::sycl::nd_item<3>&);
 
 public:
     __dpct_inline__ unsigned thread_rank() const noexcept { return data_.rank; }
@@ -400,7 +400,7 @@ class thread_block {
     __dpct_inline__ void sync() const noexcept { group_.barrier(); }
 
 private:
-    __dpct_inline__ thread_block(sycl::nd_item<3>& group)
+    __dpct_inline__ thread_block(::sycl::nd_item<3>& group)
         : group_{group},
           data_{static_cast<unsigned>(group.get_local_range().size()),
                 static_cast<unsigned>(group.get_local_linear_id())}
@@ -410,11 +410,11 @@ class thread_block {
         unsigned rank;
     } data_;
 
-    sycl::nd_item<3>& group_;
+    ::sycl::nd_item<3>& group_;
 };
 
 
-__dpct_inline__ thread_block this_thread_block(sycl::nd_item<3>& group)
+__dpct_inline__ thread_block this_thread_block(::sycl::nd_item<3>& group)
 {
     return thread_block(group);
 }
@@ -444,7 +444,7 @@ struct is_synchronizable_group_impl<thread_block> : std::true_type {};
  * bit block) would have to be used to represent the full space of thread ranks.
  */
 class grid_group {
-    friend __dpct_inline__ grid_group this_grid(sycl::nd_item<3>&);
+    friend __dpct_inline__ grid_group this_grid(::sycl::nd_item<3>&);
 
 public:
     __dpct_inline__ unsigned size() const noexcept { return data_.size; }
@@ -452,7 +452,7 @@ class grid_group {
     __dpct_inline__ unsigned thread_rank() const noexcept { return data_.rank; }
 
 private:
-    __dpct_inline__ grid_group(sycl::nd_item<3>& group)
+    __dpct_inline__ grid_group(::sycl::nd_item<3>& group)
         : data_{static_cast<unsigned>(group.get_global_range().size()),
                 static_cast<unsigned>(group.get_global_linear_id())}
     {}
@@ -467,7 +467,7 @@ class grid_group {
 // grid_group this_grid()
 // using cooperative_groups::this_grid;
 // Instead, use our limited implementation:
-__dpct_inline__ grid_group this_grid(sycl::nd_item<3>& group)
+__dpct_inline__ grid_group this_grid(::sycl::nd_item<3>& group)
 {
     return grid_group(group);
 }
diff --git a/dpcpp/components/diagonal_block_manipulation.dp.hpp b/dpcpp/components/diagonal_block_manipulation.dp.hpp
index 900e669ee72..bd5f4cf5670 100644
--- a/dpcpp/components/diagonal_block_manipulation.dp.hpp
+++ b/dpcpp/components/diagonal_block_manipulation.dp.hpp
@@ -68,7 +68,7 @@ __dpct_inline__ void extract_transposed_diag_blocks(
     const ValueType* __restrict__ values,
     const IndexType* __restrict__ block_ptrs, size_type num_blocks,
     ValueType* __restrict__ block_row, int increment,
-    ValueType* __restrict__ workspace, sycl::nd_item<3> item_ct1)
+    ValueType* __restrict__ workspace, ::sycl::nd_item<3> item_ct1)
 {
     const int tid =
         item_ct1.get_local_id(1) * item_ct1.get_local_range().get(2) +
diff --git a/dpcpp/components/intrinsics.dp.hpp b/dpcpp/components/intrinsics.dp.hpp
index fd2778adcee..440713526aa 100644
--- a/dpcpp/components/intrinsics.dp.hpp
+++ b/dpcpp/components/intrinsics.dp.hpp
@@ -52,10 +52,10 @@ namespace sycl {
  * @internal
  * Returns the number of set bits in the given mask.
  */
-__dpct_inline__ int popcnt(uint32 mask) { return sycl::popcount(mask); }
+__dpct_inline__ int popcnt(uint32 mask) { return ::sycl::popcount(mask); }
 
 /** @copydoc popcnt */
-__dpct_inline__ int popcnt(uint64 mask) { return sycl::popcount(mask); }
+__dpct_inline__ int popcnt(uint64 mask) { return ::sycl::popcount(mask); }
 
 
 /**
@@ -67,13 +67,13 @@ __dpct_inline__ int popcnt(uint64 mask) { return sycl::popcount(mask); }
  */
 __dpct_inline__ int ffs(uint32 mask)
 {
-    return (mask == 0) ? 0 : (sycl::ext::intel::ctz(mask) + 1);
+    return (mask == 0) ? 0 : (::sycl::ext::intel::ctz(mask) + 1);
 }
 
 /** @copydoc ffs */
 __dpct_inline__ int ffs(uint64 mask)
 {
-    return (mask == 0) ? 0 : (sycl::ext::intel::ctz(mask) + 1);
+    return (mask == 0) ? 0 : (::sycl::ext::intel::ctz(mask) + 1);
 }
 
 
@@ -82,10 +82,10 @@ __dpct_inline__ int ffs(uint64 mask)
  * Returns the number of zero bits before the first set bit in the given mask,
  * starting from the most significant bit.
  */
-__dpct_inline__ int clz(uint32 mask) { return sycl::clz(mask); }
+__dpct_inline__ int clz(uint32 mask) { return ::sycl::clz(mask); }
 
 /** @copydoc clz */
-__dpct_inline__ int clz(uint64 mask) { return sycl::clz(mask); }
+__dpct_inline__ int clz(uint64 mask) { return ::sycl::clz(mask); }
 
 
 }  // namespace sycl
diff --git a/dpcpp/components/prefix_sum.dp.hpp b/dpcpp/components/prefix_sum.dp.hpp
index 7d4cf21f22d..4b5b4c7b218 100644
--- a/dpcpp/components/prefix_sum.dp.hpp
+++ b/dpcpp/components/prefix_sum.dp.hpp
@@ -132,7 +132,7 @@ __dpct_inline__ void subwarp_prefix_sum(ValueType element,
 template <typename DeviceConfig, typename ValueType>
 void start_prefix_sum(
     size_type num_elements, ValueType* __restrict__ elements,
-    ValueType* __restrict__ block_sum, sycl::nd_item<3> item_ct1,
+    ValueType* __restrict__ block_sum, ::sycl::nd_item<3> item_ct1,
     uninitialized_array<ValueType, DeviceConfig::block_size>& prefix_helper)
 {
     const auto tidx = thread::get_thread_id_flat(item_ct1);
@@ -185,17 +185,17 @@ void start_prefix_sum(
 
 template <typename DeviceConfig, typename ValueType>
 void start_prefix_sum(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                      sycl::queue* queue, size_type num_elements,
+                      ::sycl::queue* queue, size_type num_elements,
                       ValueType* elements, ValueType* block_sum)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<uninitialized_array<ValueType, DeviceConfig::block_size>,
-                       0, sycl::access::mode::read_write,
-                       sycl::access::target::local>
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<
+            uninitialized_array<ValueType, DeviceConfig::block_size>, 0,
+            ::sycl::access::mode::read_write, ::sycl::access::target::local>
             prefix_helper_acc_ct1(cgh);
 
         cgh.parallel_for(sycl_nd_range(grid, block),
-                         [=](sycl::nd_item<3> item_ct1) {
+                         [=](::sycl::nd_item<3> item_ct1) {
                              start_prefix_sum<DeviceConfig>(
                                  num_elements, elements, block_sum, item_ct1,
                                  *prefix_helper_acc_ct1.get_pointer());
@@ -222,7 +222,7 @@ template <typename DeviceConfig, typename ValueType>
 void finalize_prefix_sum(size_type num_elements,
                          ValueType* __restrict__ elements,
                          const ValueType* __restrict__ block_sum,
-                         sycl::nd_item<3> item_ct1)
+                         ::sycl::nd_item<3> item_ct1)
 {
     const auto tidx = thread::get_thread_id_flat(item_ct1);
 
@@ -237,12 +237,12 @@ void finalize_prefix_sum(size_type num_elements,
 
 template <typename DeviceConfig, typename ValueType>
 void finalize_prefix_sum(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                         sycl::queue* queue, size_type num_elements,
+                         ::sycl::queue* queue, size_type num_elements,
                          ValueType* elements, const ValueType* block_sum)
 {
-    queue->submit([&](sycl::handler& cgh) {
+    queue->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(sycl_nd_range(grid, block),
-                         [=](sycl::nd_item<3> item_ct1) {
+                         [=](::sycl::nd_item<3> item_ct1) {
                              finalize_prefix_sum<DeviceConfig>(
                                  num_elements, elements, block_sum, item_ct1);
                          });
diff --git a/dpcpp/components/reduction.dp.hpp b/dpcpp/components/reduction.dp.hpp
index a0f4d3e8d6c..54847ff986e 100644
--- a/dpcpp/components/reduction.dp.hpp
+++ b/dpcpp/components/reduction.dp.hpp
@@ -173,7 +173,7 @@ void reduce(const Group& __restrict__ group, ValueType* __restrict__ data,
 template <unsigned int sg_size = config::warp_size, typename Operator,
           typename ValueType>
 void reduce_array(size_type size, const ValueType* __restrict__ source,
-                  ValueType* __restrict__ result, sycl::nd_item<3> item_ct1,
+                  ValueType* __restrict__ result, ::sycl::nd_item<3> item_ct1,
                   Operator reduce_op = Operator{})
 {
     const auto tidx = thread::get_thread_id_flat(item_ct1);
@@ -201,7 +201,7 @@ void reduce_array(size_type size, const ValueType* __restrict__ source,
 template <typename DeviceConfig, typename ValueType>
 void reduce_add_array(
     size_type size, const ValueType* __restrict__ source,
-    ValueType* __restrict__ result, sycl::nd_item<3> item_ct1,
+    ValueType* __restrict__ result, ::sycl::nd_item<3> item_ct1,
     uninitialized_array<ValueType, DeviceConfig::block_size>& block_sum)
 {
     reduce_array<DeviceConfig::subgroup_size>(
@@ -215,18 +215,18 @@ void reduce_add_array(
 
 template <typename DeviceConfig = device_config<256, 16>, typename ValueType>
 void reduce_add_array(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                      sycl::queue* queue, size_type size,
+                      ::sycl::queue* queue, size_type size,
                       const ValueType* source, ValueType* result)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<uninitialized_array<ValueType, DeviceConfig::block_size>,
-                       0, sycl::access::mode::read_write,
-                       sycl::access::target::local>
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<
+            uninitialized_array<ValueType, DeviceConfig::block_size>, 0,
+            ::sycl::access::mode::read_write, ::sycl::access::target::local>
             block_sum_acc_ct1(cgh);
 
         cgh.parallel_for(
             sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
+            [=](::sycl::nd_item<3> item_ct1)
                 [[sycl::reqd_sub_group_size(DeviceConfig::subgroup_size)]] {
                     reduce_add_array<DeviceConfig>(
                         size, source, result, item_ct1,
diff --git a/dpcpp/components/sorting.dp.hpp b/dpcpp/components/sorting.dp.hpp
index a7ec4f26eb6..ea5f37c11be 100644
--- a/dpcpp/components/sorting.dp.hpp
+++ b/dpcpp/components/sorting.dp.hpp
@@ -136,13 +136,13 @@ struct bitonic_warp {
     // 1. for sorting, we have to reverse the sort order in the upper half
     // 2. for merging, we have to determine for the XOR shuffle if we are
     //    the "smaller" thread, as this thread gets the "smaller" element.
-    __dpct_inline__ static bool upper_half(sycl::nd_item<3> item_ct1)
+    __dpct_inline__ static bool upper_half(::sycl::nd_item<3> item_ct1)
     {
         return bool(item_ct1.get_local_id(2) & (num_threads / 2));
     }
 
     __dpct_inline__ static void merge(ValueType* els, bool reverse,
-                                      sycl::nd_item<3> item_ct1)
+                                      ::sycl::nd_item<3> item_ct1)
     {
         auto tile = group::tiled_partition<config::warp_size>(
             group::this_thread_block(item_ct1));
@@ -155,7 +155,7 @@ struct bitonic_warp {
     }
 
     __dpct_inline__ static void sort(ValueType* els, bool reverse,
-                                     sycl::nd_item<3> item_ct1)
+                                     ::sycl::nd_item<3> item_ct1)
     {
         auto new_reverse = reverse != upper_half(item_ct1);
         half::sort(els, new_reverse, item_ct1);
@@ -167,12 +167,12 @@ template <typename ValueType, int NumLocalElements>
 struct bitonic_warp<ValueType, NumLocalElements, 1> {
     using local = bitonic_local<ValueType, NumLocalElements>;
     __dpct_inline__ static void merge(ValueType* els, bool reverse,
-                                      sycl::nd_item<3> item_ct1)
+                                      ::sycl::nd_item<3> item_ct1)
     {
         local::merge(els, reverse);
     }
     __dpct_inline__ static void sort(ValueType* els, bool reverse,
-                                     sycl::nd_item<3> item_ct1)
+                                     ::sycl::nd_item<3> item_ct1)
     {
         local::sort(els, reverse);
     }
@@ -201,7 +201,8 @@ struct bitonic_global {
     static_assert(32 % num_groups == 0,
                   "num_groups must be a power of two <= 32");
 
-    __dpct_inline__ static int shared_idx(int local, sycl::nd_item<3> item_ct1)
+    __dpct_inline__ static int shared_idx(int local,
+                                          ::sycl::nd_item<3> item_ct1)
     {
         auto rank = group::this_thread_block(item_ct1).thread_rank();
         // use the same memory-bank to avoid bank conflicts
@@ -211,7 +212,7 @@ struct bitonic_global {
     // check if we are in the upper half of all groups in this block
     // this is important as for sorting, we have to reverse the sort order in
     // the upper half
-    __dpct_inline__ static bool upper_half(sycl::nd_item<3> item_ct1)
+    __dpct_inline__ static bool upper_half(::sycl::nd_item<3> item_ct1)
     {
         auto rank = group::this_thread_block(item_ct1).thread_rank();
         return bool(rank & (num_groups * num_threads / 2));
@@ -219,7 +220,7 @@ struct bitonic_global {
 
     __dpct_inline__ static void merge(ValueType* local_els,
                                       ValueType* shared_els, bool reverse,
-                                      sycl::nd_item<3> item_ct1)
+                                      ::sycl::nd_item<3> item_ct1)
     {
         group::this_thread_block(item_ct1).sync();
         auto upper_shared_els = shared_els + (num_groups * num_threads / 2);
@@ -235,7 +236,7 @@ struct bitonic_global {
 
     __dpct_inline__ static void sort(ValueType* local_els,
                                      ValueType* shared_els, bool reverse,
-                                     sycl::nd_item<3> item_ct1)
+                                     ::sycl::nd_item<3> item_ct1)
     {
         auto new_reverse = reverse != upper_half(item_ct1);
         half::sort(local_els, shared_els, new_reverse, item_ct1);
@@ -248,7 +249,8 @@ template <typename ValueType, int num_local, int num_threads,
 struct bitonic_global<ValueType, num_local, num_threads, 1, num_total_threads> {
     using warp = bitonic_warp<ValueType, num_local, num_threads>;
 
-    __dpct_inline__ static int shared_idx(int local, sycl::nd_item<3> item_ct1)
+    __dpct_inline__ static int shared_idx(int local,
+                                          ::sycl::nd_item<3> item_ct1)
     {
         // use the indexing from the general struct
         return bitonic_global<ValueType, num_local, num_threads, 2,
@@ -257,7 +259,7 @@ struct bitonic_global<ValueType, num_local, num_threads, 1, num_total_threads> {
 
     __dpct_inline__ static void merge(ValueType* local_els,
                                       ValueType* shared_els, bool reverse,
-                                      sycl::nd_item<3> item_ct1)
+                                      ::sycl::nd_item<3> item_ct1)
     {
         group::this_thread_block(item_ct1).sync();
         for (int i = 0; i < num_local; ++i) {
@@ -271,7 +273,7 @@ struct bitonic_global<ValueType, num_local, num_threads, 1, num_total_threads> {
 
     __dpct_inline__ static void sort(ValueType* local_els,
                                      ValueType* shared_els, bool reverse,
-                                     sycl::nd_item<3> item_ct1)
+                                     ::sycl::nd_item<3> item_ct1)
     {
         auto rank = group::this_thread_block(item_ct1).thread_rank();
         // This is the first step, so we don't need to load from shared memory
@@ -316,7 +318,7 @@ struct bitonic_global<ValueType, num_local, num_threads, 1, num_total_threads> {
 template <int num_elements, int num_local, typename ValueType>
 __dpct_inline__ void bitonic_sort(ValueType* local_elements,
                                   ValueType* shared_elements,
-                                  sycl::nd_item<3> item_ct1)
+                                  ::sycl::nd_item<3> item_ct1)
 {
     constexpr auto num_threads = num_elements / num_local;
     constexpr auto num_warps = num_threads / config::warp_size;
diff --git a/dpcpp/components/thread_ids.dp.hpp b/dpcpp/components/thread_ids.dp.hpp
index a5724d2f5d0..63973013dfe 100644
--- a/dpcpp/components/thread_ids.dp.hpp
+++ b/dpcpp/components/thread_ids.dp.hpp
@@ -68,7 +68,7 @@ namespace thread {
  * @note Assumes that grid dimensions are in cuda standard format:
  *       `(block_group_size, first_grid_dimension, second grid_dimension)`
  */
-__dpct_inline__ size_type get_block_group_id(sycl::nd_item<3> item_ct1)
+__dpct_inline__ size_type get_block_group_id(::sycl::nd_item<3> item_ct1)
 {
     return static_cast<size_type>(item_ct1.get_group(0)) *
                item_ct1.get_group_range(1) +
@@ -85,7 +85,7 @@ __dpct_inline__ size_type get_block_group_id(sycl::nd_item<3> item_ct1)
  * @note Assumes that grid dimensions are in cuda standard format:
  *       `(block_group_size, first_grid_dimension, second grid_dimension)`
  */
-__dpct_inline__ size_type get_block_id(sycl::nd_item<3> item_ct1)
+__dpct_inline__ size_type get_block_id(::sycl::nd_item<3> item_ct1)
 {
     return get_block_group_id(item_ct1) * item_ct1.get_group_range(2) +
            item_ct1.get_group(2);
@@ -105,7 +105,7 @@ __dpct_inline__ size_type get_block_id(sycl::nd_item<3> item_ct1)
  *       `(subwarp_size, config::warp_size / subwarp_size, block_size /
  *         config::warp_size)`
  */
-__dpct_inline__ size_type get_local_warp_id(sycl::nd_item<3> item_ct1)
+__dpct_inline__ size_type get_local_warp_id(::sycl::nd_item<3> item_ct1)
 {
     return static_cast<size_type>(item_ct1.get_local_id(0));
 }
@@ -127,7 +127,7 @@ __dpct_inline__ size_type get_local_warp_id(sycl::nd_item<3> item_ct1)
  *         config::warp_size)`
  */
 template <int subwarp_size>
-__dpct_inline__ size_type get_local_subwarp_id(sycl::nd_item<3> item_ct1)
+__dpct_inline__ size_type get_local_subwarp_id(::sycl::nd_item<3> item_ct1)
 {
     // sycl does not have subwarp.
     constexpr auto subwarps_per_warp = subwarp_size / subwarp_size;
@@ -151,7 +151,7 @@ __dpct_inline__ size_type get_local_subwarp_id(sycl::nd_item<3> item_ct1)
  *         config::warp_size)`
  */
 template <int subwarp_size>
-__dpct_inline__ size_type get_local_thread_id(sycl::nd_item<3> item_ct1)
+__dpct_inline__ size_type get_local_thread_id(::sycl::nd_item<3> item_ct1)
 {
     return get_local_subwarp_id<subwarp_size>(item_ct1) * subwarp_size +
            item_ct1.get_local_id(2);
@@ -175,7 +175,7 @@ __dpct_inline__ size_type get_local_thread_id(sycl::nd_item<3> item_ct1)
  *       respectively.
  */
 template <int warps_per_block>
-__dpct_inline__ size_type get_warp_id(sycl::nd_item<3> item_ct1)
+__dpct_inline__ size_type get_warp_id(::sycl::nd_item<3> item_ct1)
 {
     return get_block_id(item_ct1) * warps_per_block +
            get_local_warp_id(item_ct1);
@@ -199,7 +199,7 @@ __dpct_inline__ size_type get_warp_id(sycl::nd_item<3> item_ct1)
  *       respectively.
  */
 template <int subwarp_size, int warps_per_block>
-__dpct_inline__ size_type get_subwarp_id(sycl::nd_item<3> item_ct1)
+__dpct_inline__ size_type get_subwarp_id(::sycl::nd_item<3> item_ct1)
 {
     // sycl does not have subwarp
     constexpr auto subwarps_per_warp = subwarp_size / subwarp_size;
@@ -225,7 +225,7 @@ __dpct_inline__ size_type get_subwarp_id(sycl::nd_item<3> item_ct1)
  *       respectively.
  */
 template <int subwarp_size, int warps_per_block>
-__dpct_inline__ size_type get_thread_id(sycl::nd_item<3> item_ct1)
+__dpct_inline__ size_type get_thread_id(::sycl::nd_item<3> item_ct1)
 {
     return get_subwarp_id<subwarp_size, warps_per_block>(item_ct1) *
                subwarp_size +
@@ -245,7 +245,7 @@ __dpct_inline__ size_type get_thread_id(sycl::nd_item<3> item_ct1)
  * @tparam IndexType  the index type
  */
 template <typename IndexType = size_type>
-__dpct_inline__ IndexType get_thread_id_flat(sycl::nd_item<3> item_ct1)
+__dpct_inline__ IndexType get_thread_id_flat(::sycl::nd_item<3> item_ct1)
 {
     return item_ct1.get_local_id(2) +
            static_cast<IndexType>(item_ct1.get_local_range().get(2)) *
@@ -265,7 +265,7 @@ __dpct_inline__ IndexType get_thread_id_flat(sycl::nd_item<3> item_ct1)
  * @tparam IndexType  the index type
  */
 template <typename IndexType = size_type>
-__dpct_inline__ IndexType get_thread_num_flat(sycl::nd_item<3> item_ct1)
+__dpct_inline__ IndexType get_thread_num_flat(::sycl::nd_item<3> item_ct1)
 {
     return item_ct1.get_local_range().get(2) *
            static_cast<IndexType>(item_ct1.get_group_range(2));
@@ -285,7 +285,7 @@ __dpct_inline__ IndexType get_thread_num_flat(sycl::nd_item<3> item_ct1)
  * @tparam IndexType  the index type
  */
 template <int subwarp_size, typename IndexType = size_type>
-__dpct_inline__ IndexType get_subwarp_id_flat(sycl::nd_item<3> item_ct1)
+__dpct_inline__ IndexType get_subwarp_id_flat(::sycl::nd_item<3> item_ct1)
 {
     static_assert(!(subwarp_size & (subwarp_size - 1)),
                   "subwarp_size must be a power of two");
@@ -309,7 +309,7 @@ __dpct_inline__ IndexType get_subwarp_id_flat(sycl::nd_item<3> item_ct1)
  * @tparam IndexType  the index type
  */
 template <int subwarp_size, typename IndexType = size_type>
-__dpct_inline__ IndexType get_subwarp_num_flat(sycl::nd_item<3> item_ct1)
+__dpct_inline__ IndexType get_subwarp_num_flat(::sycl::nd_item<3> item_ct1)
 {
     static_assert(!(subwarp_size & (subwarp_size - 1)),
                   "subwarp_size must be a power of two");
diff --git a/dpcpp/factorization/cholesky_kernels.dp.cpp b/dpcpp/factorization/cholesky_kernels.dp.cpp
index b23673bfff2..3bc84c4b08a 100644
--- a/dpcpp/factorization/cholesky_kernels.dp.cpp
+++ b/dpcpp/factorization/cholesky_kernels.dp.cpp
@@ -74,8 +74,8 @@ void symbolic_count(std::shared_ptr<const DefaultExecutor> exec,
     const auto postorder_parent = forest.postorder_parents.get_const_data();
     auto queue = exec->get_queue();
     // build sorted postorder node list for each row
-    queue->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx_id) {
+    queue->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(::sycl::range<1>{num_rows}, [=](::sycl::id<1> idx_id) {
             const auto row = idx_id[0];
             const auto row_begin = row_ptrs[row];
             const auto row_end = row_ptrs[row + 1];
@@ -96,8 +96,8 @@ void symbolic_count(std::shared_ptr<const DefaultExecutor> exec,
         });
     });
     // count nonzeros per row of L
-    queue->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx_id) {
+    queue->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(::sycl::range<1>{num_rows}, [=](::sycl::id<1> idx_id) {
             const auto row = idx_id[0];
             const auto row_begin = row_ptrs[row];
             // instead of relying on the input containing a diagonal, we
@@ -143,8 +143,8 @@ void symbolic_factorize(
     const auto postorder_parent = forest.postorder_parents.get_const_data();
     const auto out_row_ptrs = l_factor->get_const_row_ptrs();
     const auto out_cols = l_factor->get_col_idxs();
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx_id) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(::sycl::range<1>{num_rows}, [=](::sycl::id<1> idx_id) {
             const auto row = idx_id[0];
             const auto row_begin = row_ptrs[row];
             // instead of relying on the input containing a diagonal, we
diff --git a/dpcpp/factorization/factorization_kernels.dp.cpp b/dpcpp/factorization/factorization_kernels.dp.cpp
index c771a73cb29..264b746389c 100644
--- a/dpcpp/factorization/factorization_kernels.dp.cpp
+++ b/dpcpp/factorization/factorization_kernels.dp.cpp
@@ -120,7 +120,7 @@ void find_missing_diagonal_elements(
     const IndexType* __restrict__ col_idxs,
     const IndexType* __restrict__ row_ptrs,
     IndexType* __restrict__ elements_to_add_per_row,
-    bool* __restrict__ changes_required, sycl::nd_item<3> item_ct1)
+    bool* __restrict__ changes_required, ::sycl::nd_item<3> item_ct1)
 {
     const auto total_subwarp_count =
         thread::get_subwarp_num_flat<SubwarpSize, IndexType>(item_ct1);
@@ -167,14 +167,14 @@ void find_missing_diagonal_elements(
 
 template <bool IsSorted, int SubwarpSize, typename IndexType>
 void find_missing_diagonal_elements(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    IndexType num_rows, IndexType num_cols, const IndexType* col_idxs,
-    const IndexType* row_ptrs, IndexType* elements_to_add_per_row,
-    bool* changes_required)
+    dim3 grid, dim3 block, size_type dynamic_shared_memory,
+    ::sycl::queue* queue, IndexType num_rows, IndexType num_cols,
+    const IndexType* col_idxs, const IndexType* row_ptrs,
+    IndexType* elements_to_add_per_row, bool* changes_required)
 {
     queue->parallel_for(
         sycl_nd_range(grid, block),
-        [=](sycl::nd_item<3> item_ct1)
+        [=](::sycl::nd_item<3> item_ct1)
             [[sycl::reqd_sub_group_size(SubwarpSize)]] {
                 find_missing_diagonal_elements<IsSorted, SubwarpSize>(
                     num_rows, num_cols, col_idxs, row_ptrs,
@@ -191,7 +191,8 @@ void add_missing_diagonal_elements(
     const IndexType* __restrict__ old_col_idxs,
     const IndexType* __restrict__ old_row_ptrs,
     ValueType* __restrict__ new_values, IndexType* __restrict__ new_col_idxs,
-    const IndexType* __restrict__ row_ptrs_addition, sycl::nd_item<3> item_ct1)
+    const IndexType* __restrict__ row_ptrs_addition,
+    ::sycl::nd_item<3> item_ct1)
 {
     // Precaution in case not enough threads were created
     const auto total_subwarp_count =
@@ -267,14 +268,14 @@ void add_missing_diagonal_elements(
 
 template <int SubwarpSize, typename ValueType, typename IndexType>
 void add_missing_diagonal_elements(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    IndexType num_rows, const ValueType* old_values,
+    dim3 grid, dim3 block, size_type dynamic_shared_memory,
+    ::sycl::queue* queue, IndexType num_rows, const ValueType* old_values,
     const IndexType* old_col_idxs, const IndexType* old_row_ptrs,
     ValueType* new_values, IndexType* new_col_idxs,
     const IndexType* row_ptrs_addition)
 {
     queue->parallel_for(sycl_nd_range(grid, block),
-                        [=](sycl::nd_item<3> item_ct1)
+                        [=](::sycl::nd_item<3> item_ct1)
                             [[sycl::reqd_sub_group_size(SubwarpSize)]] {
                                 add_missing_diagonal_elements<SubwarpSize>(
                                     num_rows, old_values, old_col_idxs,
@@ -287,7 +288,7 @@ void add_missing_diagonal_elements(
 template <typename IndexType>
 void update_row_ptrs(IndexType num_rows, IndexType* __restrict__ row_ptrs,
                      IndexType* __restrict__ row_ptr_addition,
-                     sycl::nd_item<3> item_ct1)
+                     ::sycl::nd_item<3> item_ct1)
 {
     const auto total_thread_count =
         thread::get_thread_num_flat<IndexType>(item_ct1);
@@ -300,11 +301,11 @@ void update_row_ptrs(IndexType num_rows, IndexType* __restrict__ row_ptrs,
 
 template <typename IndexType>
 void update_row_ptrs(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                     sycl::queue* queue, IndexType num_rows,
+                     ::sycl::queue* queue, IndexType num_rows,
                      IndexType* row_ptrs, IndexType* row_ptr_addition)
 {
     queue->parallel_for(
-        sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+        sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
             update_row_ptrs(num_rows, row_ptrs, row_ptr_addition, item_ct1);
         });
 }
@@ -317,7 +318,7 @@ void count_nnz_per_l_u_row(size_type num_rows,
                            const ValueType* __restrict__ values,
                            IndexType* __restrict__ l_nnz_row,
                            IndexType* __restrict__ u_nnz_row,
-                           sycl::nd_item<3> item_ct1)
+                           ::sycl::nd_item<3> item_ct1)
 {
     const auto row = thread::get_thread_id_flat<IndexType>(item_ct1);
     if (row < num_rows) {
@@ -337,13 +338,14 @@ void count_nnz_per_l_u_row(size_type num_rows,
 
 template <typename ValueType, typename IndexType>
 void count_nnz_per_l_u_row(dim3 grid, dim3 block,
-                           size_type dynamic_shared_memory, sycl::queue* queue,
-                           size_type num_rows, const IndexType* row_ptrs,
-                           const IndexType* col_idxs, const ValueType* values,
-                           IndexType* l_nnz_row, IndexType* u_nnz_row)
+                           size_type dynamic_shared_memory,
+                           ::sycl::queue* queue, size_type num_rows,
+                           const IndexType* row_ptrs, const IndexType* col_idxs,
+                           const ValueType* values, IndexType* l_nnz_row,
+                           IndexType* u_nnz_row)
 {
     queue->parallel_for(
-        sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+        sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
             count_nnz_per_l_u_row(num_rows, row_ptrs, col_idxs, values,
                                   l_nnz_row, u_nnz_row, item_ct1);
         });
@@ -359,7 +361,8 @@ void initialize_l_u(size_type num_rows, const IndexType* __restrict__ row_ptrs,
                     ValueType* __restrict__ l_values,
                     const IndexType* __restrict__ u_row_ptrs,
                     IndexType* __restrict__ u_col_idxs,
-                    ValueType* __restrict__ u_values, sycl::nd_item<3> item_ct1)
+                    ValueType* __restrict__ u_values,
+                    ::sycl::nd_item<3> item_ct1)
 {
     const auto row = thread::get_thread_id_flat<IndexType>(item_ct1);
     if (row < num_rows) {
@@ -397,7 +400,7 @@ void initialize_l_u(size_type num_rows, const IndexType* __restrict__ row_ptrs,
 
 template <typename ValueType, typename IndexType>
 void initialize_l_u(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                    sycl::queue* queue, size_type num_rows,
+                    ::sycl::queue* queue, size_type num_rows,
                     const IndexType* row_ptrs, const IndexType* col_idxs,
                     const ValueType* values, const IndexType* l_row_ptrs,
                     IndexType* l_col_idxs, ValueType* l_values,
@@ -405,7 +408,7 @@ void initialize_l_u(dim3 grid, dim3 block, size_type dynamic_shared_memory,
                     ValueType* u_values)
 {
     queue->parallel_for(
-        sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+        sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
             initialize_l_u(num_rows, row_ptrs, col_idxs, values, l_row_ptrs,
                            l_col_idxs, l_values, u_row_ptrs, u_col_idxs,
                            u_values, item_ct1);
@@ -419,7 +422,7 @@ void count_nnz_per_l_row(size_type num_rows,
                          const IndexType* __restrict__ col_idxs,
                          const ValueType* __restrict__ values,
                          IndexType* __restrict__ l_nnz_row,
-                         sycl::nd_item<3> item_ct1)
+                         ::sycl::nd_item<3> item_ct1)
 {
     const auto row = thread::get_thread_id_flat<IndexType>(item_ct1);
     if (row < num_rows) {
@@ -436,12 +439,12 @@ void count_nnz_per_l_row(size_type num_rows,
 
 template <typename ValueType, typename IndexType>
 void count_nnz_per_l_row(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                         sycl::queue* queue, size_type num_rows,
+                         ::sycl::queue* queue, size_type num_rows,
                          const IndexType* row_ptrs, const IndexType* col_idxs,
                          const ValueType* values, IndexType* l_nnz_row)
 {
     queue->parallel_for(sycl_nd_range(grid, block),
-                        [=](sycl::nd_item<3> item_ct1) {
+                        [=](::sycl::nd_item<3> item_ct1) {
                             count_nnz_per_l_row(num_rows, row_ptrs, col_idxs,
                                                 values, l_nnz_row, item_ct1);
                         });
@@ -455,7 +458,7 @@ void initialize_l(size_type num_rows, const IndexType* __restrict__ row_ptrs,
                   const IndexType* __restrict__ l_row_ptrs,
                   IndexType* __restrict__ l_col_idxs,
                   ValueType* __restrict__ l_values, bool use_sqrt,
-                  sycl::nd_item<3> item_ct1)
+                  ::sycl::nd_item<3> item_ct1)
 {
     const auto row = thread::get_thread_id_flat<IndexType>(item_ct1);
     if (row < num_rows) {
@@ -491,13 +494,13 @@ void initialize_l(size_type num_rows, const IndexType* __restrict__ row_ptrs,
 
 template <typename ValueType, typename IndexType>
 void initialize_l(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                  sycl::queue* queue, size_type num_rows,
+                  ::sycl::queue* queue, size_type num_rows,
                   const IndexType* row_ptrs, const IndexType* col_idxs,
                   const ValueType* values, const IndexType* l_row_ptrs,
                   IndexType* l_col_idxs, ValueType* l_values, bool use_sqrt)
 {
     queue->parallel_for(
-        sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+        sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
             initialize_l(num_rows, row_ptrs, col_idxs, values, l_row_ptrs,
                          l_col_idxs, l_values, use_sqrt, item_ct1);
         });
diff --git a/dpcpp/factorization/par_ic_kernels.dp.cpp b/dpcpp/factorization/par_ic_kernels.dp.cpp
index bf954e3c374..d857359b7f6 100644
--- a/dpcpp/factorization/par_ic_kernels.dp.cpp
+++ b/dpcpp/factorization/par_ic_kernels.dp.cpp
@@ -65,7 +65,7 @@ namespace kernel {
 template <typename ValueType, typename IndexType>
 void ic_init(const IndexType* __restrict__ l_row_ptrs,
              ValueType* __restrict__ l_vals, size_type num_rows,
-             sycl::nd_item<3> item_ct1)
+             ::sycl::nd_item<3> item_ct1)
 {
     auto row = thread::get_thread_id_flat(item_ct1);
     if (row >= num_rows) {
@@ -82,11 +82,11 @@ void ic_init(const IndexType* __restrict__ l_row_ptrs,
 
 template <typename ValueType, typename IndexType>
 void ic_init(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-             sycl::queue* queue, const IndexType* l_row_ptrs, ValueType* l_vals,
-             size_type num_rows)
+             ::sycl::queue* queue, const IndexType* l_row_ptrs,
+             ValueType* l_vals, size_type num_rows)
 {
     queue->parallel_for(sycl_nd_range(grid, block),
-                        [=](sycl::nd_item<3> item_ct1) {
+                        [=](::sycl::nd_item<3> item_ct1) {
                             ic_init(l_row_ptrs, l_vals, num_rows, item_ct1);
                         });
 }
@@ -99,7 +99,7 @@ void ic_sweep(const IndexType* __restrict__ a_row_idxs,
               const IndexType* __restrict__ l_row_ptrs,
               const IndexType* __restrict__ l_col_idxs,
               ValueType* __restrict__ l_vals, IndexType l_nnz,
-              sycl::nd_item<3> item_ct1)
+              ::sycl::nd_item<3> item_ct1)
 {
     const auto l_nz = thread::get_thread_id_flat<IndexType>(item_ct1);
     if (l_nz >= l_nnz) {
@@ -133,13 +133,13 @@ void ic_sweep(const IndexType* __restrict__ a_row_idxs,
 
 template <typename ValueType, typename IndexType>
 void ic_sweep(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-              sycl::queue* queue, const IndexType* a_row_idxs,
+              ::sycl::queue* queue, const IndexType* a_row_idxs,
               const IndexType* a_col_idxs, const ValueType* a_vals,
               const IndexType* l_row_ptrs, const IndexType* l_col_idxs,
               ValueType* l_vals, IndexType l_nnz)
 {
     queue->parallel_for(sycl_nd_range(grid, block),
-                        [=](sycl::nd_item<3> item_ct1) {
+                        [=](::sycl::nd_item<3> item_ct1) {
                             ic_sweep(a_row_idxs, a_col_idxs, a_vals, l_row_ptrs,
                                      l_col_idxs, l_vals, l_nnz, item_ct1);
                         });
diff --git a/dpcpp/factorization/par_ict_kernels.dp.cpp b/dpcpp/factorization/par_ict_kernels.dp.cpp
index 99f80a2c61e..704d774e0a1 100644
--- a/dpcpp/factorization/par_ict_kernels.dp.cpp
+++ b/dpcpp/factorization/par_ict_kernels.dp.cpp
@@ -88,7 +88,7 @@ void ict_tri_spgeam_nnz(const IndexType* __restrict__ llh_row_ptrs,
                         const IndexType* __restrict__ a_row_ptrs,
                         const IndexType* __restrict__ a_col_idxs,
                         IndexType* __restrict__ l_new_row_ptrs,
-                        IndexType num_rows, sycl::nd_item<3> item_ct1)
+                        IndexType num_rows, ::sycl::nd_item<3> item_ct1)
 {
     auto subwarp = group::tiled_partition<subgroup_size>(
         group::this_thread_block(item_ct1));
@@ -120,14 +120,14 @@ void ict_tri_spgeam_nnz(const IndexType* __restrict__ llh_row_ptrs,
 
 template <int subgroup_size, typename IndexType>
 void ict_tri_spgeam_nnz(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                        sycl::queue* queue, const IndexType* llh_row_ptrs,
+                        ::sycl::queue* queue, const IndexType* llh_row_ptrs,
                         const IndexType* llh_col_idxs,
                         const IndexType* a_row_ptrs,
                         const IndexType* a_col_idxs, IndexType* l_new_row_ptrs,
                         IndexType num_rows)
 {
     queue->parallel_for(sycl_nd_range(grid, block),
-                        [=](sycl::nd_item<3> item_ct1)
+                        [=](::sycl::nd_item<3> item_ct1)
                             [[sycl::reqd_sub_group_size(subgroup_size)]] {
                                 ict_tri_spgeam_nnz<subgroup_size>(
                                     llh_row_ptrs, llh_col_idxs, a_row_ptrs,
@@ -150,7 +150,7 @@ void ict_tri_spgeam_init(const IndexType* __restrict__ llh_row_ptrs,
                          const IndexType* __restrict__ l_new_row_ptrs,
                          IndexType* __restrict__ l_new_col_idxs,
                          ValueType* __restrict__ l_new_vals, IndexType num_rows,
-                         sycl::nd_item<3> item_ct1)
+                         ::sycl::nd_item<3> item_ct1)
 {
     auto subwarp = group::tiled_partition<subgroup_size>(
         group::this_thread_block(item_ct1));
@@ -302,7 +302,7 @@ void ict_tri_spgeam_init(const IndexType* __restrict__ llh_row_ptrs,
 
 template <int subgroup_size, typename ValueType, typename IndexType>
 void ict_tri_spgeam_init(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                         sycl::queue* queue, const IndexType* llh_row_ptrs,
+                         ::sycl::queue* queue, const IndexType* llh_row_ptrs,
                          const IndexType* llh_col_idxs,
                          const ValueType* llh_vals, const IndexType* a_row_ptrs,
                          const IndexType* a_col_idxs, const ValueType* a_vals,
@@ -313,7 +313,7 @@ void ict_tri_spgeam_init(dim3 grid, dim3 block, size_type dynamic_shared_memory,
                          IndexType num_rows)
 {
     queue->parallel_for(sycl_nd_range(grid, block),
-                        [=](sycl::nd_item<3> item_ct1)
+                        [=](::sycl::nd_item<3> item_ct1)
                             [[sycl::reqd_sub_group_size(subgroup_size)]] {
                                 ict_tri_spgeam_init<subgroup_size>(
                                     llh_row_ptrs, llh_col_idxs, llh_vals,
@@ -337,7 +337,7 @@ void ict_sweep(const IndexType* __restrict__ a_row_ptrs,
                const IndexType* __restrict__ l_row_idxs,
                const IndexType* __restrict__ l_col_idxs,
                ValueType* __restrict__ l_vals, IndexType l_nnz,
-               sycl::nd_item<3> item_ct1)
+               ::sycl::nd_item<3> item_ct1)
 {
     auto l_nz = thread::get_subwarp_id_flat<subgroup_size, IndexType>(item_ct1);
     if (l_nz >= l_nnz) {
@@ -398,13 +398,13 @@ void ict_sweep(const IndexType* __restrict__ a_row_ptrs,
 
 template <int subgroup_size, typename ValueType, typename IndexType>
 void ict_sweep(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-               sycl::queue* queue, const IndexType* a_row_ptrs,
+               ::sycl::queue* queue, const IndexType* a_row_ptrs,
                const IndexType* a_col_idxs, const ValueType* a_vals,
                const IndexType* l_row_ptrs, const IndexType* l_row_idxs,
                const IndexType* l_col_idxs, ValueType* l_vals, IndexType l_nnz)
 {
     queue->parallel_for(sycl_nd_range(grid, block),
-                        [=](sycl::nd_item<3> item_ct1)
+                        [=](::sycl::nd_item<3> item_ct1)
                             [[sycl::reqd_sub_group_size(subgroup_size)]] {
                                 ict_sweep<subgroup_size>(
                                     a_row_ptrs, a_col_idxs, a_vals, l_row_ptrs,
diff --git a/dpcpp/factorization/par_ilu_kernels.dp.cpp b/dpcpp/factorization/par_ilu_kernels.dp.cpp
index 775b9ee8ad2..8a4ea679999 100644
--- a/dpcpp/factorization/par_ilu_kernels.dp.cpp
+++ b/dpcpp/factorization/par_ilu_kernels.dp.cpp
@@ -71,7 +71,7 @@ void compute_l_u_factors(size_type num_elements,
                          const IndexType* __restrict__ u_row_ptrs,
                          const IndexType* __restrict__ u_col_idxs,
                          ValueType* __restrict__ u_values,
-                         sycl::nd_item<3> item_ct1)
+                         ::sycl::nd_item<3> item_ct1)
 {
     const auto elem_id = thread::get_thread_id_flat<IndexType>(item_ct1);
     if (elem_id < num_elements) {
@@ -112,7 +112,7 @@ void compute_l_u_factors(size_type num_elements,
 
 template <typename ValueType, typename IndexType>
 void compute_l_u_factors(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                         sycl::queue* queue, size_type num_elements,
+                         ::sycl::queue* queue, size_type num_elements,
                          const IndexType* row_idxs, const IndexType* col_idxs,
                          const ValueType* values, const IndexType* l_row_ptrs,
                          const IndexType* l_col_idxs, ValueType* l_values,
@@ -120,7 +120,7 @@ void compute_l_u_factors(dim3 grid, dim3 block, size_type dynamic_shared_memory,
                          const IndexType* u_col_idxs, ValueType* u_values)
 {
     queue->parallel_for(
-        sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+        sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
             compute_l_u_factors(num_elements, row_idxs, col_idxs, values,
                                 l_row_ptrs, l_col_idxs, l_values, u_row_ptrs,
                                 u_col_idxs, u_values, item_ct1);
diff --git a/dpcpp/factorization/par_ilut_filter_kernels.hpp.inc b/dpcpp/factorization/par_ilut_filter_kernels.hpp.inc
index 063292a293f..1d74e3e5e6a 100644
--- a/dpcpp/factorization/par_ilut_filter_kernels.hpp.inc
+++ b/dpcpp/factorization/par_ilut_filter_kernels.hpp.inc
@@ -39,7 +39,7 @@ template <int subgroup_size, typename IndexType, typename Predicate,
 void abstract_filter_impl(const IndexType* row_ptrs, IndexType num_rows,
                           Predicate pred, BeginCallback begin_cb,
                           StepCallback step_cb, FinishCallback finish_cb,
-                          sycl::nd_item<3> item_ct1)
+                          ::sycl::nd_item<3> item_ct1)
 {
     auto subwarp = group::tiled_partition<subgroup_size>(
         group::this_thread_block(item_ct1));
@@ -67,7 +67,8 @@ void abstract_filter_impl(const IndexType* row_ptrs, IndexType num_rows,
 template <int subgroup_size, typename Predicate, typename IndexType>
 void abstract_filter_nnz(const IndexType* __restrict__ row_ptrs,
                          IndexType num_rows, Predicate pred,
-                         IndexType* __restrict__ nnz, sycl::nd_item<3> item_ct1)
+                         IndexType* __restrict__ nnz,
+                         ::sycl::nd_item<3> item_ct1)
 {
     IndexType count{};
     abstract_filter_impl<subgroup_size>(
@@ -93,7 +94,7 @@ void abstract_filter(const IndexType* __restrict__ old_row_ptrs,
                      IndexType* __restrict__ new_row_idxs,
                      IndexType* __restrict__ new_col_idxs,
                      ValueType* __restrict__ new_vals,
-                     sycl::nd_item<3> item_ct1)
+                     ::sycl::nd_item<3> item_ct1)
 {
     IndexType count{};
     IndexType new_offset{};
@@ -124,7 +125,7 @@ void threshold_filter_nnz(const IndexType* __restrict__ row_ptrs,
                           const ValueType* vals, IndexType num_rows,
                           remove_complex<ValueType> threshold,
                           IndexType* __restrict__ nnz, bool lower,
-                          sycl::nd_item<3> item_ct1)
+                          ::sycl::nd_item<3> item_ct1)
 {
     abstract_filter_nnz<subgroup_size>(
         row_ptrs, num_rows,
@@ -137,18 +138,19 @@ void threshold_filter_nnz(const IndexType* __restrict__ row_ptrs,
 
 template <int subgroup_size, typename ValueType, typename IndexType>
 void threshold_filter_nnz(dim3 grid, dim3 block,
-                          size_type dynamic_shared_memory, sycl::queue* queue,
+                          size_type dynamic_shared_memory, ::sycl::queue* queue,
                           const IndexType* row_ptrs, const ValueType* vals,
                           IndexType num_rows,
                           remove_complex<ValueType> threshold, IndexType* nnz,
                           bool lower)
 {
-    queue->parallel_for(
-        sycl_nd_range(grid, block), [=
-    ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(subgroup_size)]] {
-            threshold_filter_nnz<subgroup_size>(
-                row_ptrs, vals, num_rows, threshold, nnz, lower, item_ct1);
-        });
+    queue->parallel_for(sycl_nd_range(grid, block),
+                        [=](::sycl::nd_item<3> item_ct1)
+                            [[sycl::reqd_sub_group_size(subgroup_size)]] {
+                                threshold_filter_nnz<subgroup_size>(
+                                    row_ptrs, vals, num_rows, threshold, nnz,
+                                    lower, item_ct1);
+                            });
 }
 
 
@@ -161,7 +163,7 @@ void threshold_filter(const IndexType* __restrict__ old_row_ptrs,
                       IndexType* __restrict__ new_row_idxs,
                       IndexType* __restrict__ new_col_idxs,
                       ValueType* __restrict__ new_vals, bool lower,
-                      sycl::nd_item<3> item_ct1)
+                      ::sycl::nd_item<3> item_ct1)
 {
     abstract_filter<subgroup_size>(
         old_row_ptrs, old_col_idxs, old_vals, num_rows,
@@ -174,20 +176,21 @@ void threshold_filter(const IndexType* __restrict__ old_row_ptrs,
 
 template <int subgroup_size, typename ValueType, typename IndexType>
 void threshold_filter(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                      sycl::queue* queue, const IndexType* old_row_ptrs,
+                      ::sycl::queue* queue, const IndexType* old_row_ptrs,
                       const IndexType* old_col_idxs, const ValueType* old_vals,
                       IndexType num_rows, remove_complex<ValueType> threshold,
                       const IndexType* new_row_ptrs, IndexType* new_row_idxs,
                       IndexType* new_col_idxs, ValueType* new_vals, bool lower)
 {
-    queue->parallel_for(
-        sycl_nd_range(grid, block), [=
-    ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(subgroup_size)]] {
-            threshold_filter<subgroup_size>(
-                old_row_ptrs, old_col_idxs, old_vals, num_rows, threshold,
-                new_row_ptrs, new_row_idxs, new_col_idxs, new_vals, lower,
-                item_ct1);
-        });
+    queue->parallel_for(sycl_nd_range(grid, block),
+                        [=](::sycl::nd_item<3> item_ct1)
+                            [[sycl::reqd_sub_group_size(subgroup_size)]] {
+                                threshold_filter<subgroup_size>(
+                                    old_row_ptrs, old_col_idxs, old_vals,
+                                    num_rows, threshold, new_row_ptrs,
+                                    new_row_idxs, new_col_idxs, new_vals, lower,
+                                    item_ct1);
+                            });
 }
 
 
@@ -195,7 +198,7 @@ template <int subgroup_size, typename IndexType, typename BucketType>
 void bucket_filter_nnz(const IndexType* __restrict__ row_ptrs,
                        const BucketType* buckets, IndexType num_rows,
                        BucketType bucket, IndexType* __restrict__ nnz,
-                       sycl::nd_item<3> item_ct1)
+                       ::sycl::nd_item<3> item_ct1)
 {
     abstract_filter_nnz<subgroup_size>(
         row_ptrs, num_rows,
@@ -207,16 +210,17 @@ void bucket_filter_nnz(const IndexType* __restrict__ row_ptrs,
 
 template <int subgroup_size, typename IndexType, typename BucketType>
 void bucket_filter_nnz(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                       sycl::queue* queue, const IndexType* row_ptrs,
+                       ::sycl::queue* queue, const IndexType* row_ptrs,
                        const BucketType* buckets, IndexType num_rows,
                        BucketType bucket, IndexType* nnz)
 {
-    queue->parallel_for(
-        sycl_nd_range(grid, block), [=
-    ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(subgroup_size)]] {
-            bucket_filter_nnz<subgroup_size>(row_ptrs, buckets, num_rows,
-                                             bucket, nnz, item_ct1);
-        });
+    queue->parallel_for(sycl_nd_range(grid, block),
+                        [=](::sycl::nd_item<3> item_ct1)
+                            [[sycl::reqd_sub_group_size(subgroup_size)]] {
+                                bucket_filter_nnz<subgroup_size>(
+                                    row_ptrs, buckets, num_rows, bucket, nnz,
+                                    item_ct1);
+                            });
 }
 
 
@@ -230,7 +234,8 @@ void bucket_filter(const IndexType* __restrict__ old_row_ptrs,
                    const IndexType* __restrict__ new_row_ptrs,
                    IndexType* __restrict__ new_row_idxs,
                    IndexType* __restrict__ new_col_idxs,
-                   ValueType* __restrict__ new_vals, sycl::nd_item<3> item_ct1)
+                   ValueType* __restrict__ new_vals,
+                   ::sycl::nd_item<3> item_ct1)
 {
     abstract_filter<subgroup_size>(
         old_row_ptrs, old_col_idxs, old_vals, num_rows,
@@ -243,20 +248,22 @@ void bucket_filter(const IndexType* __restrict__ old_row_ptrs,
 template <int subgroup_size, typename ValueType, typename IndexType,
           typename BucketType>
 void bucket_filter(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                   sycl::queue* queue, const IndexType* old_row_ptrs,
+                   ::sycl::queue* queue, const IndexType* old_row_ptrs,
                    const IndexType* old_col_idxs, const ValueType* old_vals,
                    const BucketType* buckets, IndexType num_rows,
                    BucketType bucket, const IndexType* new_row_ptrs,
                    IndexType* new_row_idxs, IndexType* new_col_idxs,
                    ValueType* new_vals)
 {
-    queue->parallel_for(
-        sycl_nd_range(grid, block), [=
-    ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(subgroup_size)]] {
-            bucket_filter<subgroup_size>(
-                old_row_ptrs, old_col_idxs, old_vals, buckets, num_rows, bucket,
-                new_row_ptrs, new_row_idxs, new_col_idxs, new_vals, item_ct1);
-        });
+    queue->parallel_for(sycl_nd_range(grid, block),
+                        [=](::sycl::nd_item<3> item_ct1)
+                            [[sycl::reqd_sub_group_size(subgroup_size)]] {
+                                bucket_filter<subgroup_size>(
+                                    old_row_ptrs, old_col_idxs, old_vals,
+                                    buckets, num_rows, bucket, new_row_ptrs,
+                                    new_row_idxs, new_col_idxs, new_vals,
+                                    item_ct1);
+                            });
 }
 
 
diff --git a/dpcpp/factorization/par_ilut_select_kernels.hpp.inc b/dpcpp/factorization/par_ilut_select_kernels.hpp.inc
index 41fa99cc24e..006256ca7a1 100644
--- a/dpcpp/factorization/par_ilut_select_kernels.hpp.inc
+++ b/dpcpp/factorization/par_ilut_select_kernels.hpp.inc
@@ -54,7 +54,7 @@ constexpr auto basecase_block_size = basecase_size / basecase_local_size;
 template <typename ValueType, typename IndexType>
 void build_searchtree(const ValueType* __restrict__ input, IndexType size,
                       remove_complex<ValueType>* __restrict__ tree_output,
-                      sycl::nd_item<3> item_ct1,
+                      ::sycl::nd_item<3> item_ct1,
                       remove_complex<ValueType>* sh_samples)
 {
     using AbsType = remove_complex<ValueType>;
@@ -87,22 +87,22 @@ void build_searchtree(const ValueType* __restrict__ input, IndexType size,
 
 template <typename ValueType, typename IndexType>
 void build_searchtree(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                      sycl::queue* queue, const ValueType* input,
+                      ::sycl::queue* queue, const ValueType* input,
                       IndexType size, remove_complex<ValueType>* tree_output)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<remove_complex<ValueType>, 1,
-                       sycl::access_mode::read_write,
-                       sycl::access::target::local>
-            sh_samples_acc_ct1(sycl::range<1>(1024 /*sample_size*/), cgh);
-
-        cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
-                build_searchtree(input, size, tree_output, item_ct1,
-                                 sh_samples_acc_ct1.get_pointer());
-            });
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<remove_complex<ValueType>, 1,
+                         ::sycl::access_mode::read_write,
+                         ::sycl::access::target::local>
+            sh_samples_acc_ct1(::sycl::range<1>(1024 /*sample_size*/), cgh);
+
+        cgh.parallel_for(sycl_nd_range(grid, block),
+                         [=](::sycl::nd_item<3> item_ct1)
+                             [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                                 build_searchtree(
+                                     input, size, tree_output, item_ct1,
+                                     sh_samples_acc_ct1.get_pointer());
+                             });
     });
 }
 
@@ -119,7 +119,7 @@ template <typename ValueType, typename IndexType>
 void count_buckets(const ValueType* __restrict__ input, IndexType size,
                    const remove_complex<ValueType>* __restrict__ tree,
                    IndexType* counter, unsigned char* oracles,
-                   int items_per_thread, sycl::nd_item<3> item_ct1,
+                   int items_per_thread, ::sycl::nd_item<3> item_ct1,
                    remove_complex<ValueType>* sh_tree, IndexType* sh_counter)
 {
     // load tree into shared memory, initialize counters
@@ -168,21 +168,22 @@ void count_buckets(const ValueType* __restrict__ input, IndexType size,
 
 template <typename ValueType, typename IndexType>
 void count_buckets(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                   sycl::queue* queue, const ValueType* input, IndexType size,
+                   ::sycl::queue* queue, const ValueType* input, IndexType size,
                    const remove_complex<ValueType>* tree, IndexType* counter,
                    unsigned char* oracles, int items_per_thread)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<remove_complex<ValueType>, 1,
-                       sycl::access_mode::read_write,
-                       sycl::access::target::local>
-            sh_tree_acc_ct1(sycl::range<1>(255 /*searchtree_inner_size*/), cgh);
-        sycl::accessor<IndexType, 1, sycl::access_mode::read_write,
-                       sycl::access::target::local>
-            sh_counter_acc_ct1(sycl::range<1>(256 /*searchtree_width*/), cgh);
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<remove_complex<ValueType>, 1,
+                         ::sycl::access_mode::read_write,
+                         ::sycl::access::target::local>
+            sh_tree_acc_ct1(::sycl::range<1>(255 /*searchtree_inner_size*/),
+                            cgh);
+        ::sycl::accessor<IndexType, 1, ::sycl::access_mode::read_write,
+                         ::sycl::access::target::local>
+            sh_counter_acc_ct1(::sycl::range<1>(256 /*searchtree_width*/), cgh);
 
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 count_buckets(
                     input, size, tree, counter, oracles, items_per_thread,
                     item_ct1,
@@ -204,7 +205,7 @@ void count_buckets(dim3 grid, dim3 block, size_type dynamic_shared_memory,
 template <typename IndexType>
 void block_prefix_sum(IndexType* __restrict__ counters,
                       IndexType* __restrict__ totals, IndexType num_blocks,
-                      sycl::nd_item<3> item_ct1, IndexType* warp_sums)
+                      ::sycl::nd_item<3> item_ct1, IndexType* warp_sums)
 {
     constexpr auto num_warps = default_block_size / config::warp_size;
     static_assert(num_warps < config::warp_size,
@@ -273,23 +274,24 @@ void block_prefix_sum(IndexType* __restrict__ counters,
 
 template <typename IndexType>
 void block_prefix_sum(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                      sycl::queue* queue, IndexType* counters,
+                      ::sycl::queue* queue, IndexType* counters,
                       IndexType* totals, IndexType num_blocks)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<IndexType, 1, sycl::access_mode::read_write,
-                       sycl::access::target::local>
-            warp_sums_acc_ct1(sycl::range<1>(default_block_size /
-                                             config::warp_size /*num_warps*/),
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<IndexType, 1, ::sycl::access_mode::read_write,
+                         ::sycl::access::target::local>
+            warp_sums_acc_ct1(::sycl::range<1>(default_block_size /
+                                               config::warp_size /*num_warps*/),
                               cgh);
 
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
-                block_prefix_sum(counters, totals, num_blocks, item_ct1,
-                                 (IndexType*)warp_sums_acc_ct1.get_pointer());
-            });
+            sycl_nd_range(grid, block),
+            [=](::sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    block_prefix_sum(
+                        counters, totals, num_blocks, item_ct1,
+                        (IndexType*)warp_sums_acc_ct1.get_pointer());
+                });
     });
 }
 
@@ -305,7 +307,7 @@ void filter_bucket(const ValueType* __restrict__ input, IndexType size,
                    unsigned char bucket, const unsigned char* oracles,
                    const IndexType* block_offsets,
                    remove_complex<ValueType>* __restrict__ output,
-                   int items_per_thread, sycl::nd_item<3> item_ct1,
+                   int items_per_thread, ::sycl::nd_item<3> item_ct1,
                    IndexType* counter)
 {
     // initialize the counter with the block prefix sum.
@@ -337,18 +339,18 @@ void filter_bucket(const ValueType* __restrict__ input, IndexType size,
 
 template <typename ValueType, typename IndexType>
 void filter_bucket(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                   sycl::queue* queue, const ValueType* input, IndexType size,
+                   ::sycl::queue* queue, const ValueType* input, IndexType size,
                    unsigned char bucket, const unsigned char* oracles,
                    const IndexType* block_offsets,
                    remove_complex<ValueType>* output, int items_per_thread)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<IndexType, 0, sycl::access_mode::read_write,
-                       sycl::access::target::local>
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<IndexType, 0, ::sycl::access_mode::read_write,
+                         ::sycl::access::target::local>
             counter_acc_ct1(cgh);
 
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 filter_bucket(input, size, bucket, oracles, block_offsets,
                               output, items_per_thread, item_ct1,
                               (IndexType*)counter_acc_ct1.get_pointer());
@@ -365,7 +367,7 @@ void filter_bucket(dim3 grid, dim3 block, size_type dynamic_shared_memory,
 template <typename ValueType, typename IndexType>
 void basecase_select(const ValueType* __restrict__ input, IndexType size,
                      IndexType rank, ValueType* __restrict__ out,
-                     sycl::nd_item<3> item_ct1, ValueType* sh_local)
+                     ::sycl::nd_item<3> item_ct1, ValueType* sh_local)
 {
     constexpr auto sentinel = std::numeric_limits<ValueType>::infinity();
     ValueType local[basecase_local_size];
@@ -382,21 +384,21 @@ void basecase_select(const ValueType* __restrict__ input, IndexType size,
 
 template <typename ValueType, typename IndexType>
 void basecase_select(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                     sycl::queue* queue, const ValueType* input, IndexType size,
-                     IndexType rank, ValueType* out)
+                     ::sycl::queue* queue, const ValueType* input,
+                     IndexType size, IndexType rank, ValueType* out)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<ValueType, 1, sycl::access_mode::read_write,
-                       sycl::access::target::local>
-            sh_local_acc_ct1(sycl::range<1>(1024 /*basecase_size*/), cgh);
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<ValueType, 1, ::sycl::access_mode::read_write,
+                         ::sycl::access::target::local>
+            sh_local_acc_ct1(::sycl::range<1>(1024 /*basecase_size*/), cgh);
 
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=
-        ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                            config::warp_size)]] {
-                basecase_select(input, size, rank, out, item_ct1,
-                                (ValueType*)sh_local_acc_ct1.get_pointer());
-            });
+            sycl_nd_range(grid, block),
+            [=](::sycl::nd_item<3> item_ct1)
+                [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                    basecase_select(input, size, rank, out, item_ct1,
+                                    (ValueType*)sh_local_acc_ct1.get_pointer());
+                });
     });
 }
 
@@ -410,7 +412,7 @@ void basecase_select(dim3 grid, dim3 block, size_type dynamic_shared_memory,
  */
 template <typename IndexType>
 void find_bucket(IndexType* prefix_sum, IndexType rank,
-                 sycl::nd_item<3> item_ct1)
+                 ::sycl::nd_item<3> item_ct1)
 {
     auto warp = group::tiled_partition<config::warp_size>(
         group::this_thread_block(item_ct1));
@@ -429,14 +431,13 @@ void find_bucket(IndexType* prefix_sum, IndexType rank,
 
 template <typename IndexType>
 void find_bucket(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                 sycl::queue* queue, IndexType* prefix_sum, IndexType rank)
+                 ::sycl::queue* queue, IndexType* prefix_sum, IndexType rank)
 {
-    queue->parallel_for(
-        sycl_nd_range(grid, block), [=
-    ](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
-                                        config::warp_size)]] {
-            find_bucket(prefix_sum, rank, item_ct1);
-        });
+    queue->parallel_for(sycl_nd_range(grid, block),
+                        [=](::sycl::nd_item<3> item_ct1)
+                            [[sycl::reqd_sub_group_size(config::warp_size)]] {
+                                find_bucket(prefix_sum, rank, item_ct1);
+                            });
 }
 
 
diff --git a/dpcpp/factorization/par_ilut_spgeam_kernel.dp.cpp b/dpcpp/factorization/par_ilut_spgeam_kernel.dp.cpp
index e0fe8caaf85..e754aa52397 100644
--- a/dpcpp/factorization/par_ilut_spgeam_kernel.dp.cpp
+++ b/dpcpp/factorization/par_ilut_spgeam_kernel.dp.cpp
@@ -88,7 +88,7 @@ void tri_spgeam_nnz(const IndexType* __restrict__ lu_row_ptrs,
                     const IndexType* __restrict__ a_col_idxs,
                     IndexType* __restrict__ l_new_row_ptrs,
                     IndexType* __restrict__ u_new_row_ptrs, IndexType num_rows,
-                    sycl::nd_item<3> item_ct1)
+                    ::sycl::nd_item<3> item_ct1)
 {
     auto subwarp = group::tiled_partition<subgroup_size>(
         group::this_thread_block(item_ct1));
@@ -123,13 +123,13 @@ void tri_spgeam_nnz(const IndexType* __restrict__ lu_row_ptrs,
 
 template <int subgroup_size, typename IndexType>
 void tri_spgeam_nnz(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                    sycl::queue* queue, const IndexType* lu_row_ptrs,
+                    ::sycl::queue* queue, const IndexType* lu_row_ptrs,
                     const IndexType* lu_col_idxs, const IndexType* a_row_ptrs,
                     const IndexType* a_col_idxs, IndexType* l_new_row_ptrs,
                     IndexType* u_new_row_ptrs, IndexType num_rows)
 {
     queue->parallel_for(sycl_nd_range(grid, block),
-                        [=](sycl::nd_item<3> item_ct1)
+                        [=](::sycl::nd_item<3> item_ct1)
                             [[sycl::reqd_sub_group_size(subgroup_size)]] {
                                 tri_spgeam_nnz<subgroup_size>(
                                     lu_row_ptrs, lu_col_idxs, a_row_ptrs,
@@ -160,7 +160,7 @@ void tri_spgeam_init(const IndexType* __restrict__ lu_row_ptrs,
                      const IndexType* __restrict__ u_new_row_ptrs,
                      IndexType* __restrict__ u_new_col_idxs,
                      ValueType* __restrict__ u_new_vals, IndexType num_rows,
-                     sycl::nd_item<3> item_ct1)
+                     ::sycl::nd_item<3> item_ct1)
 {
     auto subwarp = group::tiled_partition<subgroup_size>(
         group::this_thread_block(item_ct1));
@@ -342,7 +342,7 @@ void tri_spgeam_init(const IndexType* __restrict__ lu_row_ptrs,
 
 template <int subgroup_size, typename ValueType, typename IndexType>
 void tri_spgeam_init(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                     sycl::queue* queue, const IndexType* lu_row_ptrs,
+                     ::sycl::queue* queue, const IndexType* lu_row_ptrs,
                      const IndexType* lu_col_idxs, const ValueType* lu_vals,
                      const IndexType* a_row_ptrs, const IndexType* a_col_idxs,
                      const ValueType* a_vals, const IndexType* l_row_ptrs,
@@ -354,7 +354,7 @@ void tri_spgeam_init(dim3 grid, dim3 block, size_type dynamic_shared_memory,
                      ValueType* u_new_vals, IndexType num_rows)
 {
     queue->parallel_for(sycl_nd_range(grid, block),
-                        [=](sycl::nd_item<3> item_ct1)
+                        [=](::sycl::nd_item<3> item_ct1)
                             [[sycl::reqd_sub_group_size(subgroup_size)]] {
                                 tri_spgeam_init<subgroup_size>(
                                     lu_row_ptrs, lu_col_idxs, lu_vals,
diff --git a/dpcpp/factorization/par_ilut_sweep_kernel.dp.cpp b/dpcpp/factorization/par_ilut_sweep_kernel.dp.cpp
index 4209e402d00..d62ca1f59ee 100644
--- a/dpcpp/factorization/par_ilut_sweep_kernel.dp.cpp
+++ b/dpcpp/factorization/par_ilut_sweep_kernel.dp.cpp
@@ -92,7 +92,7 @@ void sweep(const IndexType* __restrict__ a_row_ptrs,
            const IndexType* __restrict__ ut_col_ptrs,
            const IndexType* __restrict__ ut_row_idxs,
            ValueType* __restrict__ ut_vals, IndexType u_nnz,
-           sycl::nd_item<3> item_ct1)
+           ::sycl::nd_item<3> item_ct1)
 {
     auto tidx = thread::get_subwarp_id_flat<subgroup_size, IndexType>(item_ct1);
     if (tidx >= l_nnz + u_nnz) {
@@ -167,7 +167,7 @@ void sweep(const IndexType* __restrict__ a_row_ptrs,
 
 template <int subgroup_size, typename ValueType, typename IndexType>
 void sweep(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-           sycl::queue* queue, const IndexType* a_row_ptrs,
+           ::sycl::queue* queue, const IndexType* a_row_ptrs,
            const IndexType* a_col_idxs, const ValueType* a_vals,
            const IndexType* l_row_ptrs, const IndexType* l_row_idxs,
            const IndexType* l_col_idxs, ValueType* l_vals, IndexType l_nnz,
@@ -176,7 +176,7 @@ void sweep(dim3 grid, dim3 block, size_type dynamic_shared_memory,
            const IndexType* ut_row_idxs, ValueType* ut_vals, IndexType u_nnz)
 {
     queue->parallel_for(sycl_nd_range(grid, block),
-                        [=](sycl::nd_item<3> item_ct1)
+                        [=](::sycl::nd_item<3> item_ct1)
                             [[sycl::reqd_sub_group_size(subgroup_size)]] {
                                 sweep<subgroup_size>(
                                     a_row_ptrs, a_col_idxs, a_vals, l_row_ptrs,
diff --git a/dpcpp/matrix/coo_kernels.dp.cpp b/dpcpp/matrix/coo_kernels.dp.cpp
index 05fcab9627f..a98cec436b2 100644
--- a/dpcpp/matrix/coo_kernels.dp.cpp
+++ b/dpcpp/matrix/coo_kernels.dp.cpp
@@ -104,7 +104,7 @@ void spmv_kernel(const size_type nnz, const size_type num_lines,
                  const IndexType* __restrict__ row,
                  const ValueType* __restrict__ b, const size_type b_stride,
                  ValueType* __restrict__ c, const size_type c_stride,
-                 Closure scale, sycl::nd_item<3> item_ct1)
+                 Closure scale, ::sycl::nd_item<3> item_ct1)
 {
     ValueType temp_val = zero<ValueType>();
     const auto start =
@@ -160,7 +160,7 @@ void abstract_spmv(const size_type nnz, const size_type num_lines,
                    const IndexType* __restrict__ row,
                    const ValueType* __restrict__ b, const size_type b_stride,
                    ValueType* __restrict__ c, const size_type c_stride,
-                   sycl::nd_item<3> item_ct1)
+                   ::sycl::nd_item<3> item_ct1)
 {
     spmv_kernel(
         nnz, num_lines, val, col, row, b, b_stride, c, c_stride,
@@ -175,7 +175,7 @@ void abstract_spmv(const size_type nnz, const size_type num_lines,
                    const IndexType* __restrict__ row,
                    const ValueType* __restrict__ b, const size_type b_stride,
                    ValueType* __restrict__ c, const size_type c_stride,
-                   sycl::nd_item<3> item_ct1)
+                   ::sycl::nd_item<3> item_ct1)
 {
     ValueType scale_factor = alpha[0];
     spmv_kernel(
@@ -213,7 +213,7 @@ void spmm_kernel(const size_type nnz, const size_type num_elems,
                  const IndexType* __restrict__ row, const size_type num_cols,
                  const ValueType* __restrict__ b, const size_type b_stride,
                  ValueType* __restrict__ c, const size_type c_stride,
-                 Closure scale, sycl::nd_item<3> item_ct1)
+                 Closure scale, ::sycl::nd_item<3> item_ct1)
 {
     ValueType temp = zero<ValueType>();
     const auto coo_idx =
@@ -251,7 +251,7 @@ void abstract_spmm(const size_type nnz, const size_type num_elems,
                    const IndexType* __restrict__ row, const size_type num_cols,
                    const ValueType* __restrict__ b, const size_type b_stride,
                    ValueType* __restrict__ c, const size_type c_stride,
-                   sycl::nd_item<3> item_ct1)
+                   ::sycl::nd_item<3> item_ct1)
 {
     spmm_kernel(
         nnz, num_elems, val, col, row, num_cols, b, b_stride, c, c_stride,
@@ -266,7 +266,7 @@ void abstract_spmm(const size_type nnz, const size_type num_elems,
                    const IndexType* __restrict__ row, const size_type num_cols,
                    const ValueType* __restrict__ b, const size_type b_stride,
                    ValueType* __restrict__ c, const size_type c_stride,
-                   sycl::nd_item<3> item_ct1)
+                   ::sycl::nd_item<3> item_ct1)
 {
     ValueType scale_factor = alpha[0];
     spmm_kernel(
diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp
index 95a240d710d..43f9a9a5aee 100644
--- a/dpcpp/matrix/csr_kernels.dp.cpp
+++ b/dpcpp/matrix/csr_kernels.dp.cpp
@@ -109,7 +109,7 @@ __dpct_inline__ T ceildivT(T nom, T denom)
 template <typename ValueType, typename IndexType>
 __dpct_inline__ bool block_segment_scan_reverse(
     const IndexType* __restrict__ ind, ValueType* __restrict__ val,
-    sycl::nd_item<3> item_ct1)
+    ::sycl::nd_item<3> item_ct1)
 {
     bool last = true;
     const auto reg_ind = ind[item_ct1.get_local_id(2)];
@@ -220,7 +220,7 @@ __dpct_inline__ void spmv_kernel(
     acc::range<matrix_accessor> val, const IndexType* __restrict__ col_idxs,
     const IndexType* __restrict__ row_ptrs, const IndexType* __restrict__ srow,
     acc::range<input_accessor> b, acc::range<output_accessor> c, Closure scale,
-    sycl::nd_item<3> item_ct1)
+    ::sycl::nd_item<3> item_ct1)
 {
     using arithmetic_type = typename output_accessor::arithmetic_type;
     const IndexType warp_idx =
@@ -266,7 +266,7 @@ void abstract_spmv(const IndexType nwarps, const IndexType num_rows,
                    const IndexType* __restrict__ row_ptrs,
                    const IndexType* __restrict__ srow,
                    acc::range<input_accessor> b, acc::range<output_accessor> c,
-                   sycl::nd_item<3> item_ct1)
+                   ::sycl::nd_item<3> item_ct1)
 {
     using arithmetic_type = typename output_accessor::arithmetic_type;
     using output_type = typename output_accessor::storage_type;
@@ -293,7 +293,7 @@ void abstract_spmv(
     acc::range<matrix_accessor> val, const IndexType* __restrict__ col_idxs,
     const IndexType* __restrict__ row_ptrs, const IndexType* __restrict__ srow,
     acc::range<input_accessor> b, acc::range<output_accessor> c,
-    sycl::nd_item<3> item_ct1)
+    ::sycl::nd_item<3> item_ct1)
 {
     using arithmetic_type = typename output_accessor::arithmetic_type;
     using output_type = typename output_accessor::storage_type;
@@ -336,7 +336,7 @@ template <typename arithmetic_type, typename IndexType,
 void merge_path_reduce(
     const IndexType nwarps, const arithmetic_type* __restrict__ last_val,
     const IndexType* __restrict__ last_row, acc::range<output_accessor> c,
-    Alpha_op alpha_op, sycl::nd_item<3> item_ct1,
+    Alpha_op alpha_op, ::sycl::nd_item<3> item_ct1,
     uninitialized_array<IndexType, spmv_block_size>& tmp_ind,
     uninitialized_array<arithmetic_type, spmv_block_size>& tmp_val)
 {
@@ -384,7 +384,7 @@ void merge_path_spmv(
     acc::range<input_accessor> b, acc::range<output_accessor> c,
     IndexType* __restrict__ row_out,
     typename output_accessor::arithmetic_type* __restrict__ val_out,
-    Alpha_op alpha_op, Beta_op beta_op, sycl::nd_item<3> item_ct1,
+    Alpha_op alpha_op, Beta_op beta_op, ::sycl::nd_item<3> item_ct1,
     IndexType* shared_row_ptrs)
 {
     using arithmetic_type = typename output_accessor::arithmetic_type;
@@ -462,7 +462,7 @@ void abstract_merge_path_spmv(
     acc::range<input_accessor> b, acc::range<output_accessor> c,
     IndexType* __restrict__ row_out,
     typename output_accessor::arithmetic_type* __restrict__ val_out,
-    sycl::nd_item<3> item_ct1, IndexType* shared_row_ptrs)
+    ::sycl::nd_item<3> item_ct1, IndexType* shared_row_ptrs)
 {
     using type = typename output_accessor::arithmetic_type;
     merge_path_spmv<items_per_thread>(
@@ -474,20 +474,21 @@ void abstract_merge_path_spmv(
 template <int items_per_thread, typename matrix_accessor,
           typename input_accessor, typename output_accessor, typename IndexType>
 void abstract_merge_path_spmv(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    const IndexType num_rows, acc::range<matrix_accessor> val,
-    const IndexType* col_idxs, const IndexType* row_ptrs, const IndexType* srow,
+    dim3 grid, dim3 block, size_type dynamic_shared_memory,
+    ::sycl::queue* queue, const IndexType num_rows,
+    acc::range<matrix_accessor> val, const IndexType* col_idxs,
+    const IndexType* row_ptrs, const IndexType* srow,
     acc::range<input_accessor> b, acc::range<output_accessor> c,
     IndexType* row_out, typename output_accessor::arithmetic_type* val_out)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<IndexType, 1, sycl::access_mode::read_write,
-                       sycl::access::target::local>
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<IndexType, 1, ::sycl::access_mode::read_write,
+                         ::sycl::access::target::local>
             shared_row_ptrs_acc_ct1(
-                sycl::range<1>(spmv_block_size * items_per_thread), cgh);
+                ::sycl::range<1>(spmv_block_size * items_per_thread), cgh);
 
         cgh.parallel_for(sycl_nd_range(grid, block),
-                         [=](sycl::nd_item<3> item_ct1) {
+                         [=](::sycl::nd_item<3> item_ct1) {
                              abstract_merge_path_spmv<items_per_thread>(
                                  num_rows, val, col_idxs, row_ptrs, srow, b, c,
                                  row_out, val_out, item_ct1,
@@ -509,7 +510,7 @@ void abstract_merge_path_spmv(
     const typename output_accessor::storage_type* __restrict__ beta,
     acc::range<output_accessor> c, IndexType* __restrict__ row_out,
     typename output_accessor::arithmetic_type* __restrict__ val_out,
-    sycl::nd_item<3> item_ct1, IndexType* shared_row_ptrs)
+    ::sycl::nd_item<3> item_ct1, IndexType* shared_row_ptrs)
 {
     using type = typename output_accessor::arithmetic_type;
     const type alpha_val = alpha[0];
@@ -524,8 +525,8 @@ void abstract_merge_path_spmv(
 template <int items_per_thread, typename matrix_accessor,
           typename input_accessor, typename output_accessor, typename IndexType>
 void abstract_merge_path_spmv(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    const IndexType num_rows,
+    dim3 grid, dim3 block, size_type dynamic_shared_memory,
+    ::sycl::queue* queue, const IndexType num_rows,
     const typename matrix_accessor::storage_type* alpha,
     acc::range<matrix_accessor> val, const IndexType* col_idxs,
     const IndexType* row_ptrs, const IndexType* srow,
@@ -534,14 +535,14 @@ void abstract_merge_path_spmv(
     acc::range<output_accessor> c, IndexType* row_out,
     typename output_accessor::arithmetic_type* val_out)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<IndexType, 1, sycl::access_mode::read_write,
-                       sycl::access::target::local>
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<IndexType, 1, ::sycl::access_mode::read_write,
+                         ::sycl::access::target::local>
             shared_row_ptrs_acc_ct1(
-                sycl::range<1>(spmv_block_size * items_per_thread), cgh);
+                ::sycl::range<1>(spmv_block_size * items_per_thread), cgh);
 
         cgh.parallel_for(sycl_nd_range(grid, block),
-                         [=](sycl::nd_item<3> item_ct1) {
+                         [=](::sycl::nd_item<3> item_ct1) {
                              abstract_merge_path_spmv<items_per_thread>(
                                  num_rows, alpha, val, col_idxs, row_ptrs, srow,
                                  b, beta, c, row_out, val_out, item_ct1,
@@ -557,7 +558,7 @@ template <typename arithmetic_type, typename IndexType,
 void abstract_reduce(
     const IndexType nwarps, const arithmetic_type* __restrict__ last_val,
     const IndexType* __restrict__ last_row, acc::range<output_accessor> c,
-    sycl::nd_item<3> item_ct1,
+    ::sycl::nd_item<3> item_ct1,
     uninitialized_array<IndexType, spmv_block_size>& tmp_ind,
     uninitialized_array<arithmetic_type, spmv_block_size>& tmp_val)
 {
@@ -569,23 +570,23 @@ void abstract_reduce(
 template <typename arithmetic_type, typename IndexType,
           typename output_accessor>
 void abstract_reduce(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                     sycl::queue* queue, const IndexType nwarps,
+                     ::sycl::queue* queue, const IndexType nwarps,
                      const arithmetic_type* __restrict__ last_val,
                      const IndexType* __restrict__ last_row,
                      acc::range<output_accessor> c)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<uninitialized_array<IndexType, spmv_block_size>, 0,
-                       sycl::access_mode::read_write,
-                       sycl::access::target::local>
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<uninitialized_array<IndexType, spmv_block_size>, 0,
+                         ::sycl::access_mode::read_write,
+                         ::sycl::access::target::local>
             tmp_ind_acc_ct1(cgh);
-        sycl::accessor<uninitialized_array<arithmetic_type, spmv_block_size>, 0,
-                       sycl::access_mode::read_write,
-                       sycl::access::target::local>
+        ::sycl::accessor<uninitialized_array<arithmetic_type, spmv_block_size>,
+                         0, ::sycl::access_mode::read_write,
+                         ::sycl::access::target::local>
             tmp_val_acc_ct1(cgh);
 
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 abstract_reduce(nwarps, last_val, last_row, c, item_ct1,
                                 *tmp_ind_acc_ct1.get_pointer(),
                                 *tmp_val_acc_ct1.get_pointer());
@@ -600,7 +601,7 @@ void abstract_reduce(
     const IndexType nwarps, const arithmetic_type* __restrict__ last_val,
     const IndexType* __restrict__ last_row,
     const MatrixValueType* __restrict__ alpha, acc::range<output_accessor> c,
-    sycl::nd_item<3> item_ct1,
+    ::sycl::nd_item<3> item_ct1,
     uninitialized_array<IndexType, spmv_block_size>& tmp_ind,
     uninitialized_array<arithmetic_type, spmv_block_size>& tmp_val)
 {
@@ -614,23 +615,23 @@ void abstract_reduce(
 template <typename arithmetic_type, typename MatrixValueType,
           typename IndexType, typename output_accessor>
 void abstract_reduce(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                     sycl::queue* queue, const IndexType nwarps,
+                     ::sycl::queue* queue, const IndexType nwarps,
                      const arithmetic_type* last_val, const IndexType* last_row,
                      const MatrixValueType* alpha,
                      acc::range<output_accessor> c)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<uninitialized_array<IndexType, spmv_block_size>, 0,
-                       sycl::access_mode::read_write,
-                       sycl::access::target::local>
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<uninitialized_array<IndexType, spmv_block_size>, 0,
+                         ::sycl::access_mode::read_write,
+                         ::sycl::access::target::local>
             tmp_ind_acc_ct1(cgh);
-        sycl::accessor<uninitialized_array<arithmetic_type, spmv_block_size>, 0,
-                       sycl::access_mode::read_write,
-                       sycl::access::target::local>
+        ::sycl::accessor<uninitialized_array<arithmetic_type, spmv_block_size>,
+                         0, ::sycl::access_mode::read_write,
+                         ::sycl::access::target::local>
             tmp_val_acc_ct1(cgh);
 
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 abstract_reduce(nwarps, last_val, last_row, alpha, c, item_ct1,
                                 *tmp_ind_acc_ct1.get_pointer(),
                                 *tmp_val_acc_ct1.get_pointer());
@@ -648,7 +649,7 @@ void device_classical_spmv(const size_type num_rows,
                            const IndexType* __restrict__ row_ptrs,
                            acc::range<input_accessor> b,
                            acc::range<output_accessor> c, Closure scale,
-                           sycl::nd_item<3> item_ct1)
+                           ::sycl::nd_item<3> item_ct1)
 {
     using arithmetic_type = typename output_accessor::arithmetic_type;
     auto subgroup_tile = group::tiled_partition<subgroup_size>(
@@ -686,7 +687,7 @@ void abstract_classical_spmv(const size_type num_rows,
                              const IndexType* __restrict__ row_ptrs,
                              acc::range<input_accessor> b,
                              acc::range<output_accessor> c,
-                             sycl::nd_item<3> item_ct1)
+                             ::sycl::nd_item<3> item_ct1)
 {
     using type = typename output_accessor::arithmetic_type;
     device_classical_spmv<subgroup_size>(
@@ -696,16 +697,19 @@ void abstract_classical_spmv(const size_type num_rows,
 
 template <size_type subgroup_size, typename matrix_accessor,
           typename input_accessor, typename output_accessor, typename IndexType>
-void abstract_classical_spmv(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    const size_type num_rows, acc::range<matrix_accessor> val,
-    const IndexType* col_idxs, const IndexType* row_ptrs,
-    acc::range<input_accessor> b, acc::range<output_accessor> c)
+void abstract_classical_spmv(dim3 grid, dim3 block,
+                             size_type dynamic_shared_memory,
+                             ::sycl::queue* queue, const size_type num_rows,
+                             acc::range<matrix_accessor> val,
+                             const IndexType* col_idxs,
+                             const IndexType* row_ptrs,
+                             acc::range<input_accessor> b,
+                             acc::range<output_accessor> c)
 {
     if (subgroup_size > 1) {
-        queue->submit([&](sycl::handler& cgh) {
+        queue->submit([&](::sycl::handler& cgh) {
             cgh.parallel_for(sycl_nd_range(grid, block),
-                             [=](sycl::nd_item<3> item_ct1)
+                             [=](::sycl::nd_item<3> item_ct1)
                                  [[sycl::reqd_sub_group_size(subgroup_size)]] {
                                      abstract_classical_spmv<subgroup_size>(
                                          num_rows, val, col_idxs, row_ptrs, b,
@@ -713,9 +717,9 @@ void abstract_classical_spmv(
                                  });
         });
     } else {
-        queue->submit([&](sycl::handler& cgh) {
+        queue->submit([&](::sycl::handler& cgh) {
             cgh.parallel_for(
-                sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+                sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                     abstract_classical_spmv<subgroup_size>(
                         num_rows, val, col_idxs, row_ptrs, b, c, item_ct1);
                 });
@@ -732,7 +736,7 @@ void abstract_classical_spmv(
     acc::range<matrix_accessor> val, const IndexType* __restrict__ col_idxs,
     const IndexType* __restrict__ row_ptrs, acc::range<input_accessor> b,
     const typename output_accessor::storage_type* __restrict__ beta,
-    acc::range<output_accessor> c, sycl::nd_item<3> item_ct1)
+    acc::range<output_accessor> c, ::sycl::nd_item<3> item_ct1)
 {
     using type = typename output_accessor::arithmetic_type;
     const type alpha_val = alpha[0];
@@ -748,8 +752,8 @@ void abstract_classical_spmv(
 template <size_type subgroup_size, typename matrix_accessor,
           typename input_accessor, typename output_accessor, typename IndexType>
 void abstract_classical_spmv(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    const size_type num_rows,
+    dim3 grid, dim3 block, size_type dynamic_shared_memory,
+    ::sycl::queue* queue, const size_type num_rows,
     const typename matrix_accessor::storage_type* alpha,
     acc::range<matrix_accessor> val, const IndexType* col_idxs,
     const IndexType* row_ptrs, acc::range<input_accessor> b,
@@ -757,9 +761,9 @@ void abstract_classical_spmv(
     acc::range<output_accessor> c)
 {
     if (subgroup_size > 1) {
-        queue->submit([&](sycl::handler& cgh) {
+        queue->submit([&](::sycl::handler& cgh) {
             cgh.parallel_for(sycl_nd_range(grid, block),
-                             [=](sycl::nd_item<3> item_ct1)
+                             [=](::sycl::nd_item<3> item_ct1)
                                  [[sycl::reqd_sub_group_size(subgroup_size)]] {
                                      abstract_classical_spmv<subgroup_size>(
                                          num_rows, alpha, val, col_idxs,
@@ -767,9 +771,9 @@ void abstract_classical_spmv(
                                  });
         });
     } else {
-        queue->submit([&](sycl::handler& cgh) {
+        queue->submit([&](::sycl::handler& cgh) {
             cgh.parallel_for(sycl_nd_range(grid, block),
-                             [=](sycl::nd_item<3> item_ct1) {
+                             [=](::sycl::nd_item<3> item_ct1) {
                                  abstract_classical_spmv<subgroup_size>(
                                      num_rows, alpha, val, col_idxs, row_ptrs,
                                      b, beta, c, item_ct1);
@@ -783,7 +787,7 @@ template <typename ValueType, typename IndexType>
 void fill_in_dense(size_type num_rows, const IndexType* __restrict__ row_ptrs,
                    const IndexType* __restrict__ col_idxs,
                    const ValueType* __restrict__ values, size_type stride,
-                   ValueType* __restrict__ result, sycl::nd_item<3> item_ct1)
+                   ValueType* __restrict__ result, ::sycl::nd_item<3> item_ct1)
 {
     const auto tidx = thread::get_thread_id_flat(item_ct1);
     if (tidx < num_rows) {
@@ -799,7 +803,7 @@ GKO_ENABLE_DEFAULT_HOST(fill_in_dense, fill_in_dense);
 template <typename IndexType>
 void check_unsorted(const IndexType* __restrict__ row_ptrs,
                     const IndexType* __restrict__ col_idxs, IndexType num_rows,
-                    bool* flag, sycl::nd_item<3> item_ct1, bool* sh_flag)
+                    bool* flag, ::sycl::nd_item<3> item_ct1, bool* sh_flag)
 {
     auto block = group::this_thread_block(item_ct1);
     if (block.thread_rank() == 0) {
@@ -826,16 +830,16 @@ void check_unsorted(const IndexType* __restrict__ row_ptrs,
 
 template <typename IndexType>
 void check_unsorted(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                    sycl::queue* queue, const IndexType* row_ptrs,
+                    ::sycl::queue* queue, const IndexType* row_ptrs,
                     const IndexType* col_idxs, IndexType num_rows, bool* flag)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<bool, 0, sycl::access_mode::read_write,
-                       sycl::access::target::local>
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<bool, 0, ::sycl::access_mode::read_write,
+                         ::sycl::access::target::local>
             sh_flag_acc_ct1(cgh);
 
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 check_unsorted(row_ptrs, col_idxs, num_rows, flag, item_ct1,
                                sh_flag_acc_ct1.get_pointer());
             });
@@ -848,7 +852,7 @@ void extract_diagonal(size_type diag_size, size_type nnz,
                       const ValueType* __restrict__ orig_values,
                       const IndexType* __restrict__ orig_row_ptrs,
                       const IndexType* __restrict__ orig_col_idxs,
-                      ValueType* __restrict__ diag, sycl::nd_item<3> item_ct1)
+                      ValueType* __restrict__ diag, ::sycl::nd_item<3> item_ct1)
 {
     constexpr auto warp_size = config::warp_size;
     const auto row = thread::get_subwarp_id_flat<warp_size>(item_ct1);
@@ -879,7 +883,7 @@ void row_ptr_permute_kernel(size_type num_rows,
                             const IndexType* __restrict__ permutation,
                             const IndexType* __restrict__ in_row_ptrs,
                             IndexType* __restrict__ out_nnz,
-                            sycl::nd_item<3> item_ct1)
+                            ::sycl::nd_item<3> item_ct1)
 {
     auto tid = thread::get_thread_id_flat(item_ct1);
     if (tid >= num_rows) {
@@ -898,7 +902,7 @@ void inv_row_ptr_permute_kernel(size_type num_rows,
                                 const IndexType* __restrict__ permutation,
                                 const IndexType* __restrict__ in_row_ptrs,
                                 IndexType* __restrict__ out_nnz,
-                                sycl::nd_item<3> item_ct1)
+                                ::sycl::nd_item<3> item_ct1)
 {
     auto tid = thread::get_thread_id_flat(item_ct1);
     if (tid >= num_rows) {
@@ -921,7 +925,7 @@ void row_permute_kernel(size_type num_rows,
                         const IndexType* __restrict__ out_row_ptrs,
                         IndexType* __restrict__ out_cols,
                         ValueType* __restrict__ out_vals,
-                        sycl::nd_item<3> item_ct1)
+                        ::sycl::nd_item<3> item_ct1)
 {
     auto tid = thread::get_subwarp_id_flat<subgroup_size>(item_ct1);
     if (tid >= num_rows) {
@@ -941,15 +945,15 @@ void row_permute_kernel(size_type num_rows,
 
 template <int subgroup_size, typename ValueType, typename IndexType>
 void row_permute_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                        sycl::queue* queue, size_type num_rows,
+                        ::sycl::queue* queue, size_type num_rows,
                         const IndexType* permutation,
                         const IndexType* in_row_ptrs, const IndexType* in_cols,
                         const ValueType* in_vals, const IndexType* out_row_ptrs,
                         IndexType* out_cols, ValueType* out_vals)
 {
-    queue->submit([&](sycl::handler& cgh) {
+    queue->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 row_permute_kernel<subgroup_size>(
                     num_rows, permutation, in_row_ptrs, in_cols, in_vals,
                     out_row_ptrs, out_cols, out_vals, item_ct1);
@@ -967,7 +971,7 @@ void inv_row_permute_kernel(size_type num_rows,
                             const IndexType* __restrict__ out_row_ptrs,
                             IndexType* __restrict__ out_cols,
                             ValueType* __restrict__ out_vals,
-                            sycl::nd_item<3> item_ct1)
+                            ::sycl::nd_item<3> item_ct1)
 {
     auto tid = thread::get_subwarp_id_flat<subgroup_size>(item_ct1);
     if (tid >= num_rows) {
@@ -987,16 +991,17 @@ void inv_row_permute_kernel(size_type num_rows,
 
 template <int subgroup_size, typename ValueType, typename IndexType>
 void inv_row_permute_kernel(dim3 grid, dim3 block,
-                            size_type dynamic_shared_memory, sycl::queue* queue,
-                            size_type num_rows, const IndexType* permutation,
+                            size_type dynamic_shared_memory,
+                            ::sycl::queue* queue, size_type num_rows,
+                            const IndexType* permutation,
                             const IndexType* in_row_ptrs,
                             const IndexType* in_cols, const ValueType* in_vals,
                             const IndexType* out_row_ptrs, IndexType* out_cols,
                             ValueType* out_vals)
 {
-    queue->submit([&](sycl::handler& cgh) {
+    queue->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 inv_row_permute_kernel<subgroup_size>(
                     num_rows, permutation, in_row_ptrs, in_cols, in_vals,
                     out_row_ptrs, out_cols, out_vals, item_ct1);
@@ -1014,7 +1019,7 @@ void inv_symm_permute_kernel(size_type num_rows,
                              const IndexType* __restrict__ out_row_ptrs,
                              IndexType* __restrict__ out_cols,
                              ValueType* __restrict__ out_vals,
-                             sycl::nd_item<3> item_ct1)
+                             ::sycl::nd_item<3> item_ct1)
 {
     auto tid = thread::get_subwarp_id_flat<subgroup_size>(item_ct1);
     if (tid >= num_rows) {
@@ -1035,16 +1040,16 @@ void inv_symm_permute_kernel(size_type num_rows,
 template <int subgroup_size, typename ValueType, typename IndexType>
 void inv_symm_permute_kernel(dim3 grid, dim3 block,
                              size_type dynamic_shared_memory,
-                             sycl::queue* queue, size_type num_rows,
+                             ::sycl::queue* queue, size_type num_rows,
                              const IndexType* permutation,
                              const IndexType* in_row_ptrs,
                              const IndexType* in_cols, const ValueType* in_vals,
                              const IndexType* out_row_ptrs, IndexType* out_cols,
                              ValueType* out_vals)
 {
-    queue->submit([&](sycl::handler& cgh) {
+    queue->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 inv_symm_permute_kernel<subgroup_size>(
                     num_rows, permutation, in_row_ptrs, in_cols, in_vals,
                     out_row_ptrs, out_cols, out_vals, item_ct1);
@@ -1461,7 +1466,7 @@ void calc_nnz_in_span(const span row_span, const span col_span,
                       const IndexType* __restrict__ row_ptrs,
                       const IndexType* __restrict__ col_idxs,
                       IndexType* __restrict__ nnz_per_row,
-                      sycl::nd_item<3> item_ct1)
+                      ::sycl::nd_item<3> item_ct1)
 {
     const auto tidx = thread::get_thread_id_flat(item_ct1) + row_span.begin;
     if (tidx < row_span.end) {
@@ -1488,7 +1493,7 @@ void compute_submatrix_idxs_and_vals(size_type num_rows, size_type num_cols,
                                      const IndexType* __restrict__ res_row_ptrs,
                                      IndexType* __restrict__ res_col_idxs,
                                      ValueType* __restrict__ res_values,
-                                     sycl::nd_item<3> item_ct1)
+                                     ::sycl::nd_item<3> item_ct1)
 {
     const auto tidx = thread::get_thread_id_flat(item_ct1);
     if (tidx < num_rows) {
@@ -1795,8 +1800,8 @@ void spgemm(std::shared_ptr<const SyclExecutor> exec,
         reinterpret_cast<col_heap_element<ValueType, IndexType>*>(heap);
 
     // first sweep: count nnz for each row
-    queue->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) {
+    queue->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(::sycl::range<1>{num_rows}, [=](::sycl::id<1> idx) {
             const auto a_row = static_cast<size_type>(idx[0]);
             c_row_ptrs[a_row] = spgemm_multiway_merge(
                 a_row, a_row_ptrs, a_cols, a_vals, b_row_ptrs, b_cols, b_vals,
@@ -1819,8 +1824,8 @@ void spgemm(std::shared_ptr<const SyclExecutor> exec,
     auto c_col_idxs = c_col_idxs_array.get_data();
     auto c_vals = c_vals_array.get_data();
 
-    queue->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) {
+    queue->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(::sycl::range<1>{num_rows}, [=](::sycl::id<1> idx) {
             const auto a_row = static_cast<size_type>(idx[0]);
             spgemm_multiway_merge(
                 a_row, a_row_ptrs, a_cols, a_vals, b_row_ptrs, b_cols, b_vals,
@@ -1880,8 +1885,8 @@ void advanced_spgemm(std::shared_ptr<const SyclExecutor> exec,
         reinterpret_cast<col_heap_element<ValueType, IndexType>*>(heap);
 
     // first sweep: count nnz for each row
-    queue->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) {
+    queue->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(::sycl::range<1>{num_rows}, [=](::sycl::id<1> idx) {
             const auto a_row = static_cast<size_type>(idx[0]);
             auto d_nz = d_row_ptrs[a_row];
             const auto d_end = d_row_ptrs[a_row + 1];
@@ -1918,8 +1923,8 @@ void advanced_spgemm(std::shared_ptr<const SyclExecutor> exec,
     auto c_col_idxs = c_col_idxs_array.get_data();
     auto c_vals = c_vals_array.get_data();
 
-    queue->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) {
+    queue->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(::sycl::range<1>{num_rows}, [=](::sycl::id<1> idx) {
             const auto a_row = static_cast<size_type>(idx[0]);
             auto d_nz = d_row_ptrs[a_row];
             const auto d_end = d_row_ptrs[a_row + 1];
@@ -1997,8 +2002,8 @@ void spgeam(std::shared_ptr<const SyclExecutor> exec,
     auto queue = exec->get_queue();
 
     // count number of non-zeros per row
-    queue->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) {
+    queue->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(::sycl::range<1>{num_rows}, [=](::sycl::id<1> idx) {
             const auto row = static_cast<size_type>(idx[0]);
             auto a_idx = a_row_ptrs[row];
             const auto a_end = a_row_ptrs[row + 1];
@@ -2034,8 +2039,8 @@ void spgeam(std::shared_ptr<const SyclExecutor> exec,
     const auto beta_vals = beta->get_const_values();
 
     // count number of non-zeros per row
-    queue->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) {
+    queue->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(::sycl::range<1>{num_rows}, [=](::sycl::id<1> idx) {
             const auto row = static_cast<size_type>(idx[0]);
             auto a_idx = a_row_ptrs[row];
             const auto a_end = a_row_ptrs[row + 1];
@@ -2115,8 +2120,8 @@ void generic_transpose(std::shared_ptr<const SyclExecutor> exec,
     auto out_vals = trans->get_values();
     components::fill_array(exec, tmp_counts, num_cols, IndexType{});
 
-    queue->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) {
+    queue->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(::sycl::range<1>{num_rows}, [=](::sycl::id<1> idx) {
             const auto row = static_cast<size_type>(idx[0]);
             const auto begin = row_ptrs[row];
             const auto end = row_ptrs[row + 1];
@@ -2129,8 +2134,8 @@ void generic_transpose(std::shared_ptr<const SyclExecutor> exec,
     components::prefix_sum_nonnegative(exec, tmp_counts, num_cols + 1);
     exec->copy(num_cols + 1, tmp_counts, out_row_ptrs);
 
-    queue->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) {
+    queue->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(::sycl::range<1>{num_rows}, [=](::sycl::id<1> idx) {
             const auto row = static_cast<size_type>(idx[0]);
             const auto begin = row_ptrs[row];
             const auto end = row_ptrs[row + 1];
@@ -2256,8 +2261,8 @@ void sort_by_column_index(std::shared_ptr<const SyclExecutor> exec,
     const auto row_ptrs = to_sort->get_const_row_ptrs();
     auto cols = to_sort->get_col_idxs();
     auto vals = to_sort->get_values();
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(::sycl::range<1>{num_rows}, [=](::sycl::id<1> idx) {
             const auto row = static_cast<size_type>(idx[0]);
             const auto begin = row_ptrs[row];
             auto size = row_ptrs[row + 1] - begin;
@@ -2316,8 +2321,8 @@ void is_sorted_by_column_index(
     const auto row_ptrs = to_check->get_const_row_ptrs();
     const auto cols = to_check->get_const_col_idxs();
     auto is_sorted_device = is_sorted_device_array.get_data();
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(::sycl::range<1>{num_rows}, [=](::sycl::id<1> idx) {
             const auto row = static_cast<size_type>(idx[0]);
             const auto begin = row_ptrs[row];
             const auto end = row_ptrs[row + 1];
@@ -2473,8 +2478,8 @@ void build_lookup(std::shared_ptr<const SyclExecutor> exec,
                   const IndexType* storage_offsets, int64* row_desc,
                   int32* storage)
 {
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(::sycl::range<1>{num_rows}, [=](::sycl::id<1> idx) {
             const auto row = static_cast<size_type>(idx[0]);
             const auto row_begin = row_ptrs[row];
             const auto row_len = row_ptrs[row + 1] - row_begin;
diff --git a/dpcpp/matrix/dense_kernels.dp.cpp b/dpcpp/matrix/dense_kernels.dp.cpp
index 565c25e82de..4f89dbbf9e6 100644
--- a/dpcpp/matrix/dense_kernels.dp.cpp
+++ b/dpcpp/matrix/dense_kernels.dp.cpp
@@ -87,7 +87,7 @@ template <std::uint32_t sg_size, typename ValueType, typename Closure>
 void transpose(const size_type nrows, const size_type ncols,
                const ValueType* __restrict__ in, const size_type in_stride,
                ValueType* __restrict__ out, const size_type out_stride,
-               Closure op, sycl::nd_item<3> item_ct1,
+               Closure op, ::sycl::nd_item<3> item_ct1,
                uninitialized_array<ValueType, sg_size*(sg_size + 1)>& space)
 {
     auto local_x = item_ct1.get_local_id(2);
@@ -98,7 +98,7 @@ void transpose(const size_type nrows, const size_type ncols,
         space[local_y * (sg_size + 1) + local_x] = op(in[y * in_stride + x]);
     }
 
-    item_ct1.barrier(sycl::access::fence_space::local_space);
+    item_ct1.barrier(::sycl::access::fence_space::local_space);
     x = item_ct1.get_group(1) * sg_size + local_x;
     y = item_ct1.get_group(2) * sg_size + local_y;
     if (y < ncols && x < nrows) {
@@ -111,7 +111,7 @@ void transpose(
     const size_type nrows, const size_type ncols,
     const ValueType* __restrict__ in, const size_type in_stride,
     ValueType* __restrict__ out, const size_type out_stride,
-    sycl::nd_item<3> item_ct1,
+    ::sycl::nd_item<3> item_ct1,
     uninitialized_array<ValueType, DeviceConfig::subgroup_size*(
                                        DeviceConfig::subgroup_size + 1)>& space)
 {
@@ -121,7 +121,7 @@ void transpose(
 }
 
 template <typename DeviceConfig, typename ValueType>
-void transpose(sycl::queue* queue, const matrix::Dense<ValueType>* orig,
+void transpose(::sycl::queue* queue, const matrix::Dense<ValueType>* orig,
                matrix::Dense<ValueType>* trans)
 {
     auto size = orig->get_size();
@@ -129,10 +129,10 @@ void transpose(sycl::queue* queue, const matrix::Dense<ValueType>* orig,
     dim3 grid(ceildiv(size[1], sg_size), ceildiv(size[0], sg_size));
     dim3 block(sg_size, sg_size);
 
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<uninitialized_array<ValueType, sg_size*(sg_size + 1)>, 0,
-                       sycl::access_mode::read_write,
-                       sycl::access::target::local>
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<uninitialized_array<ValueType, sg_size*(sg_size + 1)>,
+                         0, ::sycl::access_mode::read_write,
+                         ::sycl::access::target::local>
             space_acc_ct1(cgh);
         // Can not pass the member to device function directly
         auto in = orig->get_const_values();
@@ -140,7 +140,7 @@ void transpose(sycl::queue* queue, const matrix::Dense<ValueType>* orig,
         auto out = trans->get_values();
         auto out_stride = trans->get_stride();
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 transpose<DeviceConfig>(size[0], size[1], in, in_stride, out,
                                         out_stride, item_ct1,
                                         *space_acc_ct1.get_pointer());
@@ -157,7 +157,7 @@ void conj_transpose(
     const size_type nrows, const size_type ncols,
     const ValueType* __restrict__ in, const size_type in_stride,
     ValueType* __restrict__ out, const size_type out_stride,
-    sycl::nd_item<3> item_ct1,
+    ::sycl::nd_item<3> item_ct1,
     uninitialized_array<ValueType, DeviceConfig::subgroup_size*(
                                        DeviceConfig::subgroup_size + 1)>& space)
 {
@@ -168,21 +168,21 @@ void conj_transpose(
 
 template <typename DeviceConfig, typename ValueType>
 void conj_transpose(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                    sycl::queue* queue, const size_type nrows,
+                    ::sycl::queue* queue, const size_type nrows,
                     const size_type ncols, const ValueType* in,
                     const size_type in_stride, ValueType* out,
                     const size_type out_stride)
 {
     constexpr auto sg_size = DeviceConfig::subgroup_size;
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<uninitialized_array<ValueType, sg_size*(sg_size + 1)>, 0,
-                       sycl::access_mode::read_write,
-                       sycl::access::target::local>
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<uninitialized_array<ValueType, sg_size*(sg_size + 1)>,
+                         0, ::sycl::access_mode::read_write,
+                         ::sycl::access::target::local>
             space_acc_ct1(cgh);
 
         cgh.parallel_for(
             sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1) __WG_BOUND__(sg_size, sg_size) {
+            [=](::sycl::nd_item<3> item_ct1) __WG_BOUND__(sg_size, sg_size) {
                 conj_transpose<DeviceConfig>(nrows, ncols, in, in_stride, out,
                                              out_stride, item_ct1,
                                              *space_acc_ct1.get_pointer());
@@ -308,8 +308,8 @@ void convert_to_coo(std::shared_ptr<const DefaultExecutor> exec,
     auto cols = result->get_col_idxs();
     auto vals = result->get_values();
 
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(num_rows, [=](sycl::item<1> item) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(num_rows, [=](::sycl::item<1> item) {
             const auto row = static_cast<size_type>(item[0]);
             auto write_to = row_ptrs[row];
 
@@ -343,8 +343,8 @@ void convert_to_csr(std::shared_ptr<const DefaultExecutor> exec,
     auto cols = result->get_col_idxs();
     auto vals = result->get_values();
 
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(num_rows, [=](sycl::item<1> item) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(num_rows, [=](::sycl::item<1> item) {
             const auto row = static_cast<size_type>(item[0]);
             auto write_to = row_ptrs[row];
 
@@ -378,8 +378,8 @@ void convert_to_ell(std::shared_ptr<const DefaultExecutor> exec,
     auto vals = result->get_values();
     const auto stride = result->get_stride();
 
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(num_rows, [=](sycl::item<1> item) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(num_rows, [=](::sycl::item<1> item) {
             const auto row = static_cast<size_type>(item[0]);
             size_type col_idx = 0;
             for (size_type col = 0; col < num_cols; col++) {
@@ -440,8 +440,8 @@ void convert_to_hybrid(std::shared_ptr<const DefaultExecutor> exec,
     auto coo_cols = result->get_coo_col_idxs();
     auto coo_vals = result->get_coo_values();
 
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(num_rows, [=](sycl::item<1> item) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(num_rows, [=](::sycl::item<1> item) {
             const auto row = static_cast<size_type>(item[0]);
             size_type ell_count = 0;
             size_type col = 0;
@@ -493,8 +493,8 @@ void convert_to_sellp(std::shared_ptr<const DefaultExecutor> exec,
     auto vals = result->get_values();
     auto col_idxs = result->get_col_idxs();
 
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(num_rows, [=](sycl::item<1> item) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(num_rows, [=](::sycl::item<1> item) {
             const auto row = static_cast<size_type>(item[0]);
             const auto local_row = row % slice_size;
             const auto slice = row / slice_size;
@@ -534,8 +534,8 @@ void convert_to_sparsity_csr(std::shared_ptr<const DefaultExecutor> exec,
     const auto row_ptrs = result->get_const_row_ptrs();
     auto cols = result->get_col_idxs();
 
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(num_rows, [=](sycl::item<1> item) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(num_rows, [=](::sycl::item<1> item) {
             const auto row = static_cast<size_type>(item[0]);
             auto write_to = row_ptrs[row];
 
@@ -566,7 +566,7 @@ void transpose(std::shared_ptr<const DefaultExecutor> exec,
             return validate(queue, cfg.block_size, sg_size) &&
                    sg_size * (sg_size + 1) * sizeof(ValueType) <=
                        queue->get_device()
-                           .get_info<sycl::info::device::local_mem_size>();
+                           .get_info<::sycl::info::device::local_mem_size>();
         },
         queue, orig, trans);
 }
@@ -588,7 +588,7 @@ void conj_transpose(std::shared_ptr<const DefaultExecutor> exec,
             return validate(queue, DCFG_1D::decode<0>(cfg), sg_size) &&
                    sg_size * (sg_size + 1) * sizeof(ValueType) <=
                        queue->get_device()
-                           .get_info<sycl::info::device::local_mem_size>();
+                           .get_info<::sycl::info::device::local_mem_size>();
         });
     const auto sg_size = DCFG_1D::decode<1>(cfg);
     dim3 grid(ceildiv(size[1], sg_size), ceildiv(size[0], sg_size));
diff --git a/dpcpp/matrix/diagonal_kernels.dp.cpp b/dpcpp/matrix/diagonal_kernels.dp.cpp
index 60bc52c18d2..f44eede1dc7 100644
--- a/dpcpp/matrix/diagonal_kernels.dp.cpp
+++ b/dpcpp/matrix/diagonal_kernels.dp.cpp
@@ -68,7 +68,7 @@ template <typename ValueType, typename IndexType>
 void apply_to_csr(size_type num_rows, const ValueType* __restrict__ diag,
                   const IndexType* __restrict__ row_ptrs,
                   ValueType* __restrict__ result_values, bool inverse,
-                  sycl::nd_item<3> item_ct1)
+                  ::sycl::nd_item<3> item_ct1)
 {
     constexpr auto warp_size = config::warp_size;
     auto warp_tile =
diff --git a/dpcpp/matrix/ell_kernels.dp.cpp b/dpcpp/matrix/ell_kernels.dp.cpp
index 88e2bf40efa..703e92056f6 100644
--- a/dpcpp/matrix/ell_kernels.dp.cpp
+++ b/dpcpp/matrix/ell_kernels.dp.cpp
@@ -119,7 +119,7 @@ void spmv_kernel(
     acc::range<a_accessor> val, const IndexType* __restrict__ col,
     const size_type stride, const size_type num_stored_elements_per_row,
     acc::range<b_accessor> b, OutputValueType* __restrict__ c,
-    const size_type c_stride, Closure op, sycl::nd_item<3> item_ct1,
+    const size_type c_stride, Closure op, ::sycl::nd_item<3> item_ct1,
     uninitialized_array<OutputValueType,
                         default_block_size / num_thread_per_worker>& storage)
 {
@@ -153,7 +153,7 @@ void spmv_kernel(
             storage[item_ct1.get_local_id(2)] = 0;
         }
 
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(::sycl::access::fence_space::local_space);
         auto temp = zero<OutputValueType>();
         if (runnable) {
             for (size_type idx =
@@ -171,7 +171,7 @@ void spmv_kernel(
                                             temp);
         }
 
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(::sycl::access::fence_space::local_space);
         if (runnable && idx_in_worker == 0) {
             const auto c_ind = x * c_stride + column_id;
             if (atomic) {
@@ -192,7 +192,7 @@ void spmv(
     acc::range<a_accessor> val, const IndexType* __restrict__ col,
     const size_type stride, const size_type num_stored_elements_per_row,
     acc::range<b_accessor> b, OutputValueType* __restrict__ c,
-    const size_type c_stride, sycl::nd_item<3> item_ct1,
+    const size_type c_stride, ::sycl::nd_item<3> item_ct1,
     uninitialized_array<OutputValueType,
                         default_block_size / num_thread_per_worker>& storage)
 {
@@ -206,21 +206,21 @@ void spmv(
 template <int num_thread_per_worker, bool atomic = false, typename b_accessor,
           typename a_accessor, typename OutputValueType, typename IndexType>
 void spmv(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-          sycl::queue* queue, const size_type num_rows,
+          ::sycl::queue* queue, const size_type num_rows,
           const int num_worker_per_row, acc::range<a_accessor> val,
           const IndexType* col, const size_type stride,
           const size_type num_stored_elements_per_row, acc::range<b_accessor> b,
           OutputValueType* c, const size_type c_stride)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<
             uninitialized_array<OutputValueType,
                                 default_block_size / num_thread_per_worker>,
-            0, sycl::access_mode::read_write, sycl::access::target::local>
+            0, ::sycl::access_mode::read_write, ::sycl::access::target::local>
             storage_acc_ct1(cgh);
 
         cgh.parallel_for(sycl_nd_range(grid, block),
-                         [=](sycl::nd_item<3> item_ct1) {
+                         [=](::sycl::nd_item<3> item_ct1) {
                              spmv<num_thread_per_worker, atomic>(
                                  num_rows, num_worker_per_row, val, col, stride,
                                  num_stored_elements_per_row, b, c, c_stride,
@@ -238,7 +238,7 @@ void spmv(
     const IndexType* __restrict__ col, const size_type stride,
     const size_type num_stored_elements_per_row, acc::range<b_accessor> b,
     const OutputValueType* __restrict__ beta, OutputValueType* __restrict__ c,
-    const size_type c_stride, sycl::nd_item<3> item_ct1,
+    const size_type c_stride, ::sycl::nd_item<3> item_ct1,
     uninitialized_array<OutputValueType,
                         default_block_size / num_thread_per_worker>& storage)
 {
@@ -272,22 +272,22 @@ void spmv(
 template <int num_thread_per_worker, bool atomic = false, typename b_accessor,
           typename a_accessor, typename OutputValueType, typename IndexType>
 void spmv(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-          sycl::queue* queue, const size_type num_rows,
+          ::sycl::queue* queue, const size_type num_rows,
           const int num_worker_per_row, acc::range<a_accessor> alpha,
           acc::range<a_accessor> val, const IndexType* col,
           const size_type stride, const size_type num_stored_elements_per_row,
           acc::range<b_accessor> b, const OutputValueType* beta,
           OutputValueType* c, const size_type c_stride)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<
             uninitialized_array<OutputValueType,
                                 default_block_size / num_thread_per_worker>,
-            0, sycl::access_mode::read_write, sycl::access::target::local>
+            0, ::sycl::access_mode::read_write, ::sycl::access::target::local>
             storage_acc_ct1(cgh);
 
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 spmv<num_thread_per_worker, atomic>(
                     num_rows, num_worker_per_row, alpha, val, col, stride,
                     num_stored_elements_per_row, b, beta, c, c_stride, item_ct1,
diff --git a/dpcpp/matrix/sellp_kernels.dp.cpp b/dpcpp/matrix/sellp_kernels.dp.cpp
index 85300bb531a..bd16dbb4d75 100644
--- a/dpcpp/matrix/sellp_kernels.dp.cpp
+++ b/dpcpp/matrix/sellp_kernels.dp.cpp
@@ -76,7 +76,7 @@ void spmv_kernel(size_type num_rows, size_type num_right_hand_sides,
                  const ValueType* __restrict__ a,
                  const IndexType* __restrict__ cols,
                  const ValueType* __restrict__ b, ValueType* __restrict__ c,
-                 sycl::nd_item<3> item_ct1)
+                 ::sycl::nd_item<3> item_ct1)
 {
     const auto row = thread::get_thread_id_flat(item_ct1);
     const auto slice_id = row / slice_size;
@@ -99,16 +99,14 @@ GKO_ENABLE_DEFAULT_HOST(spmv_kernel, spmv_kernel);
 
 
 template <typename ValueType, typename IndexType>
-void advanced_spmv_kernel(size_type num_rows, size_type num_right_hand_sides,
-                          size_type b_stride, size_type c_stride,
-                          size_type slice_size,
-                          const size_type* __restrict__ slice_sets,
-                          const ValueType* __restrict__ alpha,
-                          const ValueType* __restrict__ a,
-                          const IndexType* __restrict__ cols,
-                          const ValueType* __restrict__ b,
-                          const ValueType* __restrict__ beta,
-                          ValueType* __restrict__ c, sycl::nd_item<3> item_ct1)
+void advanced_spmv_kernel(
+    size_type num_rows, size_type num_right_hand_sides, size_type b_stride,
+    size_type c_stride, size_type slice_size,
+    const size_type* __restrict__ slice_sets,
+    const ValueType* __restrict__ alpha, const ValueType* __restrict__ a,
+    const IndexType* __restrict__ cols, const ValueType* __restrict__ b,
+    const ValueType* __restrict__ beta, ValueType* __restrict__ c,
+    ::sycl::nd_item<3> item_ct1)
 {
     const auto row = thread::get_thread_id_flat(item_ct1);
     const auto slice_id = row / slice_size;
diff --git a/dpcpp/matrix/sparsity_csr_kernels.dp.cpp b/dpcpp/matrix/sparsity_csr_kernels.dp.cpp
index 29ddfc7bb40..6bd3eb11b6e 100644
--- a/dpcpp/matrix/sparsity_csr_kernels.dp.cpp
+++ b/dpcpp/matrix/sparsity_csr_kernels.dp.cpp
@@ -80,7 +80,7 @@ void device_classical_spmv(const size_type num_rows,
                            const IndexType* __restrict__ row_ptrs,
                            acc::range<input_accessor> b,
                            acc::range<output_accessor> c, Closure scale,
-                           sycl::nd_item<3> item_ct1)
+                           ::sycl::nd_item<3> item_ct1)
 {
     using arithmetic_type = typename output_accessor::arithmetic_type;
     auto subgroup_tile = group::tiled_partition<subgroup_size>(
@@ -119,7 +119,7 @@ void abstract_classical_spmv(const size_type num_rows,
                              const IndexType* __restrict__ row_ptrs,
                              acc::range<input_accessor> b,
                              acc::range<output_accessor> c,
-                             sycl::nd_item<3> item_ct1)
+                             ::sycl::nd_item<3> item_ct1)
 {
     using type = typename output_accessor::arithmetic_type;
     device_classical_spmv<subgroup_size>(
@@ -130,14 +130,14 @@ void abstract_classical_spmv(const size_type num_rows,
 template <size_type subgroup_size, typename MatrixValueType,
           typename input_accessor, typename output_accessor, typename IndexType>
 void abstract_classical_spmv(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    const size_type num_rows, const MatrixValueType* val,
+    dim3 grid, dim3 block, size_type dynamic_shared_memory,
+    ::sycl::queue* queue, const size_type num_rows, const MatrixValueType* val,
     const IndexType* col_idxs, const IndexType* row_ptrs,
     acc::range<input_accessor> b, acc::range<output_accessor> c)
 {
     // only subgroup = 1, so does not need sycl::reqd_sub_group_size
     queue->parallel_for(
-        sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+        sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
             abstract_classical_spmv<subgroup_size>(num_rows, val, col_idxs,
                                                    row_ptrs, b, c, item_ct1);
         });
@@ -152,7 +152,7 @@ void abstract_classical_spmv(
     const IndexType* __restrict__ col_idxs,
     const IndexType* __restrict__ row_ptrs, acc::range<input_accessor> b,
     const typename output_accessor::storage_type* __restrict__ beta,
-    acc::range<output_accessor> c, sycl::nd_item<3> item_ct1)
+    acc::range<output_accessor> c, ::sycl::nd_item<3> item_ct1)
 {
     using type = typename output_accessor::arithmetic_type;
     const type alpha_val = static_cast<type>(alpha[0]);
@@ -167,17 +167,20 @@ void abstract_classical_spmv(
 
 template <size_type subgroup_size, typename MatrixValueType,
           typename input_accessor, typename output_accessor, typename IndexType>
-void abstract_classical_spmv(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    const size_type num_rows, const MatrixValueType* alpha,
-    const MatrixValueType* val, const IndexType* col_idxs,
-    const IndexType* row_ptrs, acc::range<input_accessor> b,
-    const typename output_accessor::storage_type* beta,
-    acc::range<output_accessor> c)
+void abstract_classical_spmv(dim3 grid, dim3 block,
+                             size_type dynamic_shared_memory,
+                             ::sycl::queue* queue, const size_type num_rows,
+                             const MatrixValueType* alpha,
+                             const MatrixValueType* val,
+                             const IndexType* col_idxs,
+                             const IndexType* row_ptrs,
+                             acc::range<input_accessor> b,
+                             const typename output_accessor::storage_type* beta,
+                             acc::range<output_accessor> c)
 {
     // only subgroup = 1, so does not need sycl::reqd_sub_group_size
     queue->parallel_for(
-        sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+        sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
             abstract_classical_spmv<subgroup_size>(
                 num_rows, alpha, val, col_idxs, row_ptrs, b, beta, c, item_ct1);
         });
@@ -309,8 +312,8 @@ void sort_by_column_index(std::shared_ptr<const SyclExecutor> exec,
     const auto cols = to_sort->get_col_idxs();
     auto queue = exec->get_queue();
     // build sorted postorder node list for each row
-    queue->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx_id) {
+    queue->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(::sycl::range<1>{num_rows}, [=](::sycl::id<1> idx_id) {
             const auto row = idx_id[0];
             const auto row_begin = row_ptrs[row];
             const auto row_end = row_ptrs[row + 1];
@@ -337,8 +340,8 @@ void is_sorted_by_column_index(
     const auto row_ptrs = to_check->get_const_row_ptrs();
     const auto cols = to_check->get_const_col_idxs();
     auto is_sorted_device = gpu_array.get_data();
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl::range<1>{num_rows}, [=](sycl::id<1> idx) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(::sycl::range<1>{num_rows}, [=](::sycl::id<1> idx) {
             const auto row = static_cast<size_type>(idx[0]);
             const auto begin = row_ptrs[row];
             const auto end = row_ptrs[row + 1];
diff --git a/dpcpp/preconditioner/isai_kernels.dp.cpp b/dpcpp/preconditioner/isai_kernels.dp.cpp
index f3e1c2c58ac..fa1bb7a4582 100644
--- a/dpcpp/preconditioner/isai_kernels.dp.cpp
+++ b/dpcpp/preconditioner/isai_kernels.dp.cpp
@@ -91,7 +91,7 @@ __dpct_inline__ void generic_generate(
     const IndexType* __restrict__ i_col_idxs, ValueType* __restrict__ i_values,
     IndexType* __restrict__ excess_rhs_sizes,
     IndexType* __restrict__ excess_nnz, Callable direct_solve,
-    sycl::nd_item<3> item_ct1,
+    ::sycl::nd_item<3> item_ct1,
     uninitialized_array<ValueType, subwarp_size * subwarp_size *
                                        subwarps_per_block>* storage)
 {
@@ -225,7 +225,7 @@ void generate_l_inverse(
     const IndexType* __restrict__ i_row_ptrs,
     const IndexType* __restrict__ i_col_idxs, ValueType* __restrict__ i_values,
     IndexType* __restrict__ excess_rhs_sizes,
-    IndexType* __restrict__ excess_nnz, sycl::nd_item<3> item_ct1,
+    IndexType* __restrict__ excess_nnz, ::sycl::nd_item<3> item_ct1,
     uninitialized_array<ValueType, subwarp_size * subwarp_size *
                                        subwarps_per_block>* storage)
 {
@@ -259,23 +259,23 @@ void generate_l_inverse(
 template <int subwarp_size, int subwarps_per_block, typename ValueType,
           typename IndexType>
 void generate_l_inverse(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                        sycl::queue* queue, IndexType num_rows,
+                        ::sycl::queue* queue, IndexType num_rows,
                         const IndexType* m_row_ptrs,
                         const IndexType* m_col_idxs, const ValueType* m_values,
                         const IndexType* i_row_ptrs,
                         const IndexType* i_col_idxs, ValueType* i_values,
                         IndexType* excess_rhs_sizes, IndexType* excess_nnz)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<
             uninitialized_array<ValueType, subwarp_size * subwarp_size *
                                                subwarps_per_block>,
-            0, sycl::access_mode::read_write, sycl::access::target::local>
+            0, ::sycl::access_mode::read_write, ::sycl::access::target::local>
             storage_acc_ct1(cgh);
 
         cgh.parallel_for(
             sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
+            [=](::sycl::nd_item<3> item_ct1)
                 [[sycl::reqd_sub_group_size(subwarp_size)]] {
                     generate_l_inverse<subwarp_size, subwarps_per_block>(
                         num_rows, m_row_ptrs, m_col_idxs, m_values, i_row_ptrs,
@@ -295,7 +295,7 @@ void generate_u_inverse(
     const IndexType* __restrict__ i_row_ptrs,
     const IndexType* __restrict__ i_col_idxs, ValueType* __restrict__ i_values,
     IndexType* __restrict__ excess_rhs_sizes,
-    IndexType* __restrict__ excess_nnz, sycl::nd_item<3> item_ct1,
+    IndexType* __restrict__ excess_nnz, ::sycl::nd_item<3> item_ct1,
     uninitialized_array<ValueType, subwarp_size * subwarp_size *
                                        subwarps_per_block>* storage)
 {
@@ -329,23 +329,23 @@ void generate_u_inverse(
 template <int subwarp_size, int subwarps_per_block, typename ValueType,
           typename IndexType>
 void generate_u_inverse(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                        sycl::queue* queue, IndexType num_rows,
+                        ::sycl::queue* queue, IndexType num_rows,
                         const IndexType* m_row_ptrs,
                         const IndexType* m_col_idxs, const ValueType* m_values,
                         const IndexType* i_row_ptrs,
                         const IndexType* i_col_idxs, ValueType* i_values,
                         IndexType* excess_rhs_sizes, IndexType* excess_nnz)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<
             uninitialized_array<ValueType, subwarp_size * subwarp_size *
                                                subwarps_per_block>,
-            0, sycl::access_mode::read_write, sycl::access::target::local>
+            0, ::sycl::access_mode::read_write, ::sycl::access::target::local>
             storage_acc_ct1(cgh);
 
         cgh.parallel_for(
             sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
+            [=](::sycl::nd_item<3> item_ct1)
                 [[sycl::reqd_sub_group_size(subwarp_size)]] {
                     generate_u_inverse<subwarp_size, subwarps_per_block>(
                         num_rows, m_row_ptrs, m_col_idxs, m_values, i_row_ptrs,
@@ -365,7 +365,7 @@ void generate_general_inverse(
     const IndexType* __restrict__ i_row_ptrs,
     const IndexType* __restrict__ i_col_idxs, ValueType* __restrict__ i_values,
     IndexType* __restrict__ excess_rhs_sizes,
-    IndexType* __restrict__ excess_nnz, bool spd, sycl::nd_item<3> item_ct1,
+    IndexType* __restrict__ excess_nnz, bool spd, ::sycl::nd_item<3> item_ct1,
     uninitialized_array<ValueType, subwarp_size * subwarp_size *
                                        subwarps_per_block>* storage)
 {
@@ -411,23 +411,23 @@ void generate_general_inverse(
 template <int subwarp_size, int subwarps_per_block, typename ValueType,
           typename IndexType>
 void generate_general_inverse(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    IndexType num_rows, const IndexType* m_row_ptrs,
+    dim3 grid, dim3 block, size_type dynamic_shared_memory,
+    ::sycl::queue* queue, IndexType num_rows, const IndexType* m_row_ptrs,
     const IndexType* m_col_idxs, const ValueType* m_values,
     const IndexType* i_row_ptrs, const IndexType* i_col_idxs,
     ValueType* i_values, IndexType* excess_rhs_sizes, IndexType* excess_nnz,
     bool spd)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<
             uninitialized_array<ValueType, subwarp_size * subwarp_size *
                                                subwarps_per_block>,
-            0, sycl::access_mode::read_write, sycl::access::target::local>
+            0, ::sycl::access_mode::read_write, ::sycl::access::target::local>
             storage_acc_ct1(cgh);
 
         cgh.parallel_for(
             sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
+            [=](::sycl::nd_item<3> item_ct1)
                 [[sycl::reqd_sub_group_size(subwarp_size)]] {
                     generate_general_inverse<subwarp_size, subwarps_per_block>(
                         num_rows, m_row_ptrs, m_col_idxs, m_values, i_row_ptrs,
@@ -452,7 +452,7 @@ void generate_excess_system(IndexType num_rows,
                             ValueType* __restrict__ excess_values,
                             ValueType* __restrict__ excess_rhs,
                             size_type e_start, size_type e_end,
-                            sycl::nd_item<3> item_ct1)
+                            ::sycl::nd_item<3> item_ct1)
 {
     const auto row =
         thread::get_subwarp_id_flat<subwarp_size, IndexType>(item_ct1) +
@@ -517,8 +517,8 @@ void generate_excess_system(IndexType num_rows,
 
 template <int subwarp_size, typename ValueType, typename IndexType>
 void generate_excess_system(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    IndexType num_rows, const IndexType* m_row_ptrs,
+    dim3 grid, dim3 block, size_type dynamic_shared_memory,
+    ::sycl::queue* queue, IndexType num_rows, const IndexType* m_row_ptrs,
     const IndexType* m_col_idxs, const ValueType* m_values,
     const IndexType* i_row_ptrs, const IndexType* i_col_idxs,
     const IndexType* excess_rhs_ptrs, const IndexType* excess_nz_ptrs,
@@ -527,7 +527,7 @@ void generate_excess_system(
     size_type e_end)
 {
     queue->parallel_for(sycl_nd_range(grid, block),
-                        [=](sycl::nd_item<3> item_ct1)
+                        [=](::sycl::nd_item<3> item_ct1)
                             [[sycl::reqd_sub_group_size(subwarp_size)]] {
                                 generate_excess_system<subwarp_size>(
                                     num_rows, m_row_ptrs, m_col_idxs, m_values,
@@ -542,7 +542,7 @@ template <int subwarp_size, typename ValueType, typename IndexType>
 void scale_excess_solution(const IndexType* __restrict__ excess_block_ptrs,
                            ValueType* __restrict__ excess_solution,
                            size_type e_start, size_type e_end,
-                           sycl::nd_item<3> item_ct1)
+                           ::sycl::nd_item<3> item_ct1)
 {
     const auto warp_id =
         thread::get_subwarp_id_flat<subwarp_size, IndexType>(item_ct1);
@@ -572,13 +572,14 @@ void scale_excess_solution(const IndexType* __restrict__ excess_block_ptrs,
 
 template <int subwarp_size, typename ValueType, typename IndexType>
 void scale_excess_solution(dim3 grid, dim3 block,
-                           size_type dynamic_shared_memory, sycl::queue* queue,
+                           size_type dynamic_shared_memory,
+                           ::sycl::queue* queue,
                            const IndexType* excess_block_ptrs,
                            ValueType* excess_solution, size_type e_start,
                            size_type e_end)
 {
     queue->parallel_for(sycl_nd_range(grid, block),
-                        [=](sycl::nd_item<3> item_ct1)
+                        [=](::sycl::nd_item<3> item_ct1)
                             [[sycl::reqd_sub_group_size(subwarp_size)]] {
                                 scale_excess_solution<subwarp_size>(
                                     excess_block_ptrs, excess_solution, e_start,
@@ -592,7 +593,7 @@ void copy_excess_solution(IndexType num_rows,
                           const IndexType* __restrict__ excess_rhs_ptrs,
                           const ValueType* __restrict__ excess_solution,
                           ValueType* __restrict__ i_values, size_type e_start,
-                          size_type e_end, sycl::nd_item<3> item_ct1)
+                          size_type e_end, ::sycl::nd_item<3> item_ct1)
 {
     const auto excess_row =
         thread::get_subwarp_id_flat<subwarp_size, IndexType>(item_ct1);
@@ -621,14 +622,14 @@ void copy_excess_solution(IndexType num_rows,
 
 template <int subwarp_size, typename ValueType, typename IndexType>
 void copy_excess_solution(dim3 grid, dim3 block,
-                          size_type dynamic_shared_memory, sycl::queue* queue,
+                          size_type dynamic_shared_memory, ::sycl::queue* queue,
                           IndexType num_rows, const IndexType* i_row_ptrs,
                           const IndexType* excess_rhs_ptrs,
                           const ValueType* excess_solution, ValueType* i_values,
                           size_type e_start, size_type e_end)
 {
     queue->parallel_for(sycl_nd_range(grid, block),
-                        [=](sycl::nd_item<3> item_ct1)
+                        [=](::sycl::nd_item<3> item_ct1)
                             [[sycl::reqd_sub_group_size(subwarp_size)]] {
                                 copy_excess_solution<subwarp_size>(
                                     num_rows, i_row_ptrs, excess_rhs_ptrs,
diff --git a/dpcpp/preconditioner/jacobi_advanced_apply_instantiate.inc.dp.cpp b/dpcpp/preconditioner/jacobi_advanced_apply_instantiate.inc.dp.cpp
index 2f5b4c70991..8957c93b7c5 100644
--- a/dpcpp/preconditioner/jacobi_advanced_apply_instantiate.inc.dp.cpp
+++ b/dpcpp/preconditioner/jacobi_advanced_apply_instantiate.inc.dp.cpp
@@ -71,7 +71,7 @@ void advanced_apply(
     const IndexType* __restrict__ block_ptrs, size_type num_blocks,
     const ValueType* __restrict__ alpha, const ValueType* __restrict__ b,
     int32 b_stride, ValueType* __restrict__ x, int32 x_stride,
-    sycl::nd_item<3> item_ct1)
+    ::sycl::nd_item<3> item_ct1)
 {
     const auto block_id =
         thread::get_subwarp_id<subwarp_size, warps_per_block>(item_ct1);
@@ -98,15 +98,15 @@ void advanced_apply(
 template <int max_block_size, int subwarp_size, int warps_per_block,
           typename ValueType, typename IndexType>
 void advanced_apply(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    const ValueType* blocks,
+    dim3 grid, dim3 block, size_type dynamic_shared_memory,
+    ::sycl::queue* queue, const ValueType* blocks,
     preconditioner::block_interleaved_storage_scheme<IndexType> storage_scheme,
     const IndexType* block_ptrs, size_type num_blocks, const ValueType* alpha,
     const ValueType* b, int32 b_stride, ValueType* x, int32 x_stride)
 {
     queue->parallel_for(
         sycl_nd_range(grid, block),
-        [=](sycl::nd_item<3> item_ct1)
+        [=](::sycl::nd_item<3> item_ct1)
             [[sycl::reqd_sub_group_size(subwarp_size)]] {
                 advanced_apply<max_block_size, subwarp_size, warps_per_block>(
                     blocks, storage_scheme, block_ptrs, num_blocks, alpha, b,
@@ -124,7 +124,7 @@ void advanced_adaptive_apply(
     const IndexType* __restrict__ block_ptrs, size_type num_blocks,
     const ValueType* __restrict__ alpha, const ValueType* __restrict__ b,
     int32 b_stride, ValueType* __restrict__ x, int32 x_stride,
-    sycl::nd_item<3> item_ct1)
+    ::sycl::nd_item<3> item_ct1)
 {
     const auto block_id =
         thread::get_subwarp_id<subwarp_size, warps_per_block>(item_ct1);
@@ -156,8 +156,8 @@ void advanced_adaptive_apply(
 template <int max_block_size, int subwarp_size, int warps_per_block,
           typename ValueType, typename IndexType>
 void advanced_adaptive_apply(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    const ValueType* blocks,
+    dim3 grid, dim3 block, size_type dynamic_shared_memory,
+    ::sycl::queue* queue, const ValueType* blocks,
     preconditioner::block_interleaved_storage_scheme<IndexType> storage_scheme,
     const precision_reduction* block_precisions, const IndexType* block_ptrs,
     size_type num_blocks, const ValueType* alpha, const ValueType* b,
@@ -165,7 +165,7 @@ void advanced_adaptive_apply(
 {
     queue->parallel_for(
         sycl_nd_range(grid, block),
-        [=](sycl::nd_item<3> item_ct1)
+        [=](::sycl::nd_item<3> item_ct1)
             [[sycl::reqd_sub_group_size(subwarp_size)]] {
                 advanced_adaptive_apply<max_block_size, subwarp_size,
                                         warps_per_block>(
diff --git a/dpcpp/preconditioner/jacobi_generate_instantiate.inc.dp.cpp b/dpcpp/preconditioner/jacobi_generate_instantiate.inc.dp.cpp
index b1109c5e0fb..1c3d412495a 100644
--- a/dpcpp/preconditioner/jacobi_generate_instantiate.inc.dp.cpp
+++ b/dpcpp/preconditioner/jacobi_generate_instantiate.inc.dp.cpp
@@ -123,7 +123,7 @@ void generate(
     const ValueType* __restrict__ values, ValueType* __restrict__ block_data,
     preconditioner::block_interleaved_storage_scheme<IndexType> storage_scheme,
     const IndexType* __restrict__ block_ptrs, size_type num_blocks,
-    sycl::nd_item<3> item_ct1,
+    ::sycl::nd_item<3> item_ct1,
     uninitialized_array<ValueType, max_block_size * warps_per_block>* workspace)
 {
     const auto block_id =
@@ -152,21 +152,21 @@ void generate(
 template <int max_block_size, int subwarp_size, int warps_per_block,
           typename ValueType, typename IndexType>
 void generate(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    size_type num_rows, const IndexType* row_ptrs, const IndexType* col_idxs,
-    const ValueType* values, ValueType* block_data,
+    dim3 grid, dim3 block, size_type dynamic_shared_memory,
+    ::sycl::queue* queue, size_type num_rows, const IndexType* row_ptrs,
+    const IndexType* col_idxs, const ValueType* values, ValueType* block_data,
     preconditioner::block_interleaved_storage_scheme<IndexType> storage_scheme,
     const IndexType* block_ptrs, size_type num_blocks)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<
             uninitialized_array<ValueType, max_block_size * warps_per_block>, 0,
-            sycl::access_mode::read_write, sycl::access::target::local>
+            ::sycl::access_mode::read_write, ::sycl::access::target::local>
             workspace_acc_ct1(cgh);
 
         cgh.parallel_for(
             sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
+            [=](::sycl::nd_item<3> item_ct1)
                 [[sycl::reqd_sub_group_size(subwarp_size)]] {
                     generate<max_block_size, subwarp_size, warps_per_block>(
                         num_rows, row_ptrs, col_idxs, values, block_data,
@@ -239,7 +239,7 @@ void adaptive_generate(
     remove_complex<ValueType>* __restrict__ conditioning,
     precision_reduction* __restrict__ block_precisions,
     const IndexType* __restrict__ block_ptrs, size_type num_blocks,
-    sycl::nd_item<3> item_ct1,
+    ::sycl::nd_item<3> item_ct1,
     uninitialized_array<ValueType, max_block_size * warps_per_block>* workspace)
 {
     // extract blocks
@@ -334,23 +334,23 @@ void adaptive_generate(
 template <int max_block_size, int subwarp_size, int warps_per_block,
           typename ValueType, typename IndexType>
 void adaptive_generate(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    size_type num_rows, const IndexType* row_ptrs, const IndexType* col_idxs,
-    const ValueType* values, remove_complex<ValueType> accuracy,
-    ValueType* block_data,
+    dim3 grid, dim3 block, size_type dynamic_shared_memory,
+    ::sycl::queue* queue, size_type num_rows, const IndexType* row_ptrs,
+    const IndexType* col_idxs, const ValueType* values,
+    remove_complex<ValueType> accuracy, ValueType* block_data,
     preconditioner::block_interleaved_storage_scheme<IndexType> storage_scheme,
     remove_complex<ValueType>* conditioning,
     precision_reduction* block_precisions, const IndexType* block_ptrs,
     size_type num_blocks)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<
             uninitialized_array<ValueType, max_block_size * warps_per_block>, 0,
-            sycl::access_mode::read_write, sycl::access::target::local>
+            ::sycl::access_mode::read_write, ::sycl::access::target::local>
             workspace_acc_ct1(cgh);
 
         cgh.parallel_for(sycl_nd_range(grid, block),
-                         [=](sycl::nd_item<3> item_ct1)
+                         [=](::sycl::nd_item<3> item_ct1)
                              [[sycl::reqd_sub_group_size(subwarp_size)]] {
                                  adaptive_generate<max_block_size, subwarp_size,
                                                    warps_per_block>(
diff --git a/dpcpp/preconditioner/jacobi_kernels.dp.cpp b/dpcpp/preconditioner/jacobi_kernels.dp.cpp
index 13bc822d036..9a3cf4141e1 100644
--- a/dpcpp/preconditioner/jacobi_kernels.dp.cpp
+++ b/dpcpp/preconditioner/jacobi_kernels.dp.cpp
@@ -72,7 +72,7 @@ constexpr int default_grid_size = 32 * 32 * 128;
 void duplicate_array(const precision_reduction* __restrict__ source,
                      size_type source_size,
                      precision_reduction* __restrict__ dest,
-                     size_type dest_size, sycl::nd_item<3> item_ct1)
+                     size_type dest_size, ::sycl::nd_item<3> item_ct1)
 {
     auto grid = group::this_grid(item_ct1);
     if (grid.thread_rank() >= dest_size) {
@@ -84,12 +84,12 @@ void duplicate_array(const precision_reduction* __restrict__ source,
 }
 
 void duplicate_array(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                     sycl::queue* queue, const precision_reduction* source,
+                     ::sycl::queue* queue, const precision_reduction* source,
                      size_type source_size, precision_reduction* dest,
                      size_type dest_size)
 {
     queue->parallel_for(
-        sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+        sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
             duplicate_array(source, source_size, dest, dest_size, item_ct1);
         });
 }
@@ -100,7 +100,7 @@ void compare_adjacent_rows(size_type num_rows, int32 max_block_size,
                            const IndexType* __restrict__ row_ptrs,
                            const IndexType* __restrict__ col_idx,
                            bool* __restrict__ matching_next_row,
-                           sycl::nd_item<3> item_ct1)
+                           ::sycl::nd_item<3> item_ct1)
 {
     const auto warp = group::tiled_partition<config::warp_size>(
         group::this_thread_block(item_ct1));
@@ -142,13 +142,13 @@ void compare_adjacent_rows(size_type num_rows, int32 max_block_size,
 
 template <typename IndexType>
 void compare_adjacent_rows(dim3 grid, dim3 block,
-                           size_type dynamic_shared_memory, sycl::queue* queue,
-                           size_type num_rows, int32 max_block_size,
-                           const IndexType* row_ptrs, const IndexType* col_idx,
-                           bool* matching_next_row)
+                           size_type dynamic_shared_memory,
+                           ::sycl::queue* queue, size_type num_rows,
+                           int32 max_block_size, const IndexType* row_ptrs,
+                           const IndexType* col_idx, bool* matching_next_row)
 {
     queue->parallel_for(sycl_nd_range(grid, block),
-                        [=](sycl::nd_item<3> item_ct1)
+                        [=](::sycl::nd_item<3> item_ct1)
                             [[sycl::reqd_sub_group_size(config::warp_size)]] {
                                 compare_adjacent_rows(
                                     num_rows, max_block_size, row_ptrs, col_idx,
@@ -184,13 +184,16 @@ void generate_natural_block_pointer(size_type num_rows, int32 max_block_size,
 }
 
 template <typename IndexType>
-void generate_natural_block_pointer(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    size_type num_rows, int32 max_block_size, const bool* matching_next_row,
-    IndexType* block_ptrs, size_type* num_blocks_arr)
+void generate_natural_block_pointer(dim3 grid, dim3 block,
+                                    size_type dynamic_shared_memory,
+                                    ::sycl::queue* queue, size_type num_rows,
+                                    int32 max_block_size,
+                                    const bool* matching_next_row,
+                                    IndexType* block_ptrs,
+                                    size_type* num_blocks_arr)
 {
     queue->parallel_for(
-        sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+        sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
             generate_natural_block_pointer(num_rows, max_block_size,
                                            matching_next_row, block_ptrs,
                                            num_blocks_arr);
@@ -225,15 +228,13 @@ void agglomerate_supervariables_kernel(int32 max_block_size,
 }
 
 template <typename IndexType>
-void agglomerate_supervariables_kernel(dim3 grid, dim3 block,
-                                       size_type dynamic_shared_memory,
-                                       sycl::queue* queue, int32 max_block_size,
-                                       size_type num_natural_blocks,
-                                       IndexType* block_ptrs,
-                                       size_type* num_blocks_arr)
+void agglomerate_supervariables_kernel(
+    dim3 grid, dim3 block, size_type dynamic_shared_memory,
+    ::sycl::queue* queue, int32 max_block_size, size_type num_natural_blocks,
+    IndexType* block_ptrs, size_type* num_blocks_arr)
 {
     queue->parallel_for(
-        sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+        sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
             agglomerate_supervariables_kernel(
                 max_block_size, num_natural_blocks, block_ptrs, num_blocks_arr);
         });
@@ -246,7 +247,7 @@ void transpose_jacobi(
     const ValueType* __restrict__ blocks,
     preconditioner::block_interleaved_storage_scheme<IndexType> storage_scheme,
     const IndexType* __restrict__ block_ptrs, size_type num_blocks,
-    ValueType* __restrict__ out_blocks, sycl::nd_item<3> item_ct1)
+    ValueType* __restrict__ out_blocks, ::sycl::nd_item<3> item_ct1)
 {
     const auto block_id =
         thread::get_subwarp_id<subwarp_size, warps_per_block>(item_ct1);
@@ -272,13 +273,13 @@ void transpose_jacobi(
 template <bool conjugate, int max_block_size, int subwarp_size,
           int warps_per_block, typename ValueType, typename IndexType>
 void transpose_jacobi(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    const ValueType* blocks,
+    dim3 grid, dim3 block, size_type dynamic_shared_memory,
+    ::sycl::queue* queue, const ValueType* blocks,
     preconditioner::block_interleaved_storage_scheme<IndexType> storage_scheme,
     const IndexType* block_ptrs, size_type num_blocks, ValueType* out_blocks)
 {
     queue->parallel_for(sycl_nd_range(grid, block),
-                        [=](sycl::nd_item<3> item_ct1)
+                        [=](::sycl::nd_item<3> item_ct1)
                             [[sycl::reqd_sub_group_size(subwarp_size)]] {
                                 transpose_jacobi<conjugate, max_block_size,
                                                  subwarp_size, warps_per_block>(
@@ -295,7 +296,7 @@ void adaptive_transpose_jacobi(
     preconditioner::block_interleaved_storage_scheme<IndexType> storage_scheme,
     const precision_reduction* __restrict__ block_precisions,
     const IndexType* __restrict__ block_ptrs, size_type num_blocks,
-    ValueType* __restrict__ out_blocks, sycl::nd_item<3> item_ct1)
+    ValueType* __restrict__ out_blocks, ::sycl::nd_item<3> item_ct1)
 {
     const auto block_id =
         thread::get_subwarp_id<subwarp_size, warps_per_block>(item_ct1);
@@ -330,15 +331,15 @@ void adaptive_transpose_jacobi(
 template <bool conjugate, int max_block_size, int subwarp_size,
           int warps_per_block, typename ValueType, typename IndexType>
 void adaptive_transpose_jacobi(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    const ValueType* blocks,
+    dim3 grid, dim3 block, size_type dynamic_shared_memory,
+    ::sycl::queue* queue, const ValueType* blocks,
     preconditioner::block_interleaved_storage_scheme<IndexType> storage_scheme,
     const precision_reduction* block_precisions, const IndexType* block_ptrs,
     size_type num_blocks, ValueType* out_blocks)
 {
     queue->parallel_for(
         sycl_nd_range(grid, block),
-        [=](sycl::nd_item<3> item_ct1)
+        [=](::sycl::nd_item<3> item_ct1)
             [[sycl::reqd_sub_group_size(subwarp_size)]] {
                 adaptive_transpose_jacobi<conjugate, max_block_size,
                                           subwarp_size, warps_per_block>(
diff --git a/dpcpp/preconditioner/jacobi_simple_apply_instantiate.inc.dp.cpp b/dpcpp/preconditioner/jacobi_simple_apply_instantiate.inc.dp.cpp
index 39e78f37d1c..4dde844b589 100644
--- a/dpcpp/preconditioner/jacobi_simple_apply_instantiate.inc.dp.cpp
+++ b/dpcpp/preconditioner/jacobi_simple_apply_instantiate.inc.dp.cpp
@@ -72,7 +72,7 @@ void apply(
     preconditioner::block_interleaved_storage_scheme<IndexType> storage_scheme,
     const IndexType* __restrict__ block_ptrs, size_type num_blocks,
     const ValueType* __restrict__ b, int32 b_stride, ValueType* __restrict__ x,
-    int32 x_stride, sycl::nd_item<3> item_ct1)
+    int32 x_stride, ::sycl::nd_item<3> item_ct1)
 {
     const auto block_id =
         thread::get_subwarp_id<subwarp_size, warps_per_block>(item_ct1);
@@ -98,15 +98,15 @@ void apply(
 template <int max_block_size, int subwarp_size, int warps_per_block,
           typename ValueType, typename IndexType>
 void apply(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    const ValueType* blocks,
+    dim3 grid, dim3 block, size_type dynamic_shared_memory,
+    ::sycl::queue* queue, const ValueType* blocks,
     preconditioner::block_interleaved_storage_scheme<IndexType> storage_scheme,
     const IndexType* block_ptrs, size_type num_blocks, const ValueType* b,
     int32 b_stride, ValueType* x, int32 x_stride)
 {
     queue->parallel_for(
         sycl_nd_range(grid, block),
-        [=](sycl::nd_item<3> item_ct1)
+        [=](::sycl::nd_item<3> item_ct1)
             [[sycl::reqd_sub_group_size(subwarp_size)]] {
                 apply<max_block_size, subwarp_size, warps_per_block>(
                     blocks, storage_scheme, block_ptrs, num_blocks, b, b_stride,
@@ -123,7 +123,7 @@ void adaptive_apply(
     const precision_reduction* __restrict__ block_precisions,
     const IndexType* __restrict__ block_ptrs, size_type num_blocks,
     const ValueType* __restrict__ b, int32 b_stride, ValueType* __restrict__ x,
-    int32 x_stride, sycl::nd_item<3> item_ct1)
+    int32 x_stride, ::sycl::nd_item<3> item_ct1)
 {
     const auto block_id =
         thread::get_subwarp_id<subwarp_size, warps_per_block>(item_ct1);
@@ -153,8 +153,8 @@ void adaptive_apply(
 template <int max_block_size, int subwarp_size, int warps_per_block,
           typename ValueType, typename IndexType>
 void adaptive_apply(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    const ValueType* blocks,
+    dim3 grid, dim3 block, size_type dynamic_shared_memory,
+    ::sycl::queue* queue, const ValueType* blocks,
     preconditioner::block_interleaved_storage_scheme<IndexType> storage_scheme,
     const precision_reduction* block_precisions, const IndexType* block_ptrs,
     size_type num_blocks, const ValueType* b, int32 b_stride, ValueType* x,
@@ -162,7 +162,7 @@ void adaptive_apply(
 {
     queue->parallel_for(
         sycl_nd_range(grid, block),
-        [=](sycl::nd_item<3> item_ct1)
+        [=](::sycl::nd_item<3> item_ct1)
             [[sycl::reqd_sub_group_size(subwarp_size)]] {
                 adaptive_apply<max_block_size, subwarp_size, warps_per_block>(
                     blocks, storage_scheme, block_precisions, block_ptrs,
diff --git a/dpcpp/solver/cb_gmres_kernels.dp.cpp b/dpcpp/solver/cb_gmres_kernels.dp.cpp
index fb5633999d1..3c1e5223025 100644
--- a/dpcpp/solver/cb_gmres_kernels.dp.cpp
+++ b/dpcpp/solver/cb_gmres_kernels.dp.cpp
@@ -82,7 +82,7 @@ constexpr int default_dot_size = default_dot_dim * default_dot_dim;
 template <typename ValueType>
 void zero_matrix_kernel(size_type m, size_type n, size_type stride,
                         ValueType* __restrict__ array,
-                        sycl::nd_item<3> item_ct1)
+                        ::sycl::nd_item<3> item_ct1)
 {
     const auto tidx = thread::get_thread_id_flat(item_ct1);
     if (tidx < n) {
@@ -102,7 +102,7 @@ template <size_type block_size, typename ValueType, typename Accessor3d>
 void restart_1_kernel(size_type num_rows, size_type num_rhs,
                       size_type krylov_dim, Accessor3d krylov_bases,
                       ValueType* __restrict__ residual_norm_collection,
-                      size_type stride_residual_nc, sycl::nd_item<3> item_ct1)
+                      size_type stride_residual_nc, ::sycl::nd_item<3> item_ct1)
 {
     const auto global_id = thread::get_thread_id_flat(item_ct1);
     const auto krylov_stride =
@@ -130,14 +130,15 @@ void restart_1_kernel(size_type num_rows, size_type num_rhs,
 
 template <size_type block_size, typename ValueType, typename Accessor3d>
 void restart_1_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                      sycl::queue* queue, size_type num_rows, size_type num_rhs,
-                      size_type krylov_dim, Accessor3d krylov_bases,
+                      ::sycl::queue* queue, size_type num_rows,
+                      size_type num_rhs, size_type krylov_dim,
+                      Accessor3d krylov_bases,
                       ValueType* residual_norm_collection,
                       size_type stride_residual_nc)
 {
-    queue->submit([&](sycl::handler& cgh) {
+    queue->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 restart_1_kernel<block_size>(
                     num_rows, num_rhs, krylov_dim, krylov_bases,
                     residual_norm_collection, stride_residual_nc, item_ct1);
@@ -154,7 +155,7 @@ void restart_2_kernel(
     const remove_complex<ValueType>* __restrict__ residual_norm,
     ValueType* __restrict__ residual_norm_collection, Accessor3d krylov_bases,
     ValueType* __restrict__ next_krylov_basis, size_type stride_next_krylov,
-    size_type* __restrict__ final_iter_nums, sycl::nd_item<3> item_ct1)
+    size_type* __restrict__ final_iter_nums, ::sycl::nd_item<3> item_ct1)
 {
     const auto global_id = thread::get_thread_id_flat(item_ct1);
     const auto krylov_stride =
@@ -178,16 +179,17 @@ void restart_2_kernel(
 
 template <size_type block_size, typename ValueType, typename Accessor3d>
 void restart_2_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                      sycl::queue* queue, size_type num_rows, size_type num_rhs,
-                      const ValueType* residual, size_type stride_residual,
+                      ::sycl::queue* queue, size_type num_rows,
+                      size_type num_rhs, const ValueType* residual,
+                      size_type stride_residual,
                       const remove_complex<ValueType>* residual_norm,
                       ValueType* residual_norm_collection,
                       Accessor3d krylov_bases, ValueType* next_krylov_basis,
                       size_type stride_next_krylov, size_type* final_iter_nums)
 {
-    queue->submit([&](sycl::handler& cgh) {
+    queue->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 restart_2_kernel<block_size>(
                     num_rows, num_rhs, residual, stride_residual, residual_norm,
                     residual_norm_collection, krylov_bases, next_krylov_basis,
@@ -200,7 +202,7 @@ void restart_2_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory,
 void increase_final_iteration_numbers_kernel(
     size_type* __restrict__ final_iter_nums,
     const stopping_status* __restrict__ stop_status, size_type total_number,
-    sycl::nd_item<3> item_ct1)
+    ::sycl::nd_item<3> item_ct1)
 {
     const auto global_id = thread::get_thread_id_flat(item_ct1);
     if (global_id < total_number) {
@@ -217,7 +219,8 @@ void multinorm2_kernel(
     size_type num_rows, size_type num_cols,
     const ValueType* __restrict__ next_krylov_basis,
     size_type stride_next_krylov, remove_complex<ValueType>* __restrict__ norms,
-    const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1,
+    const stopping_status* __restrict__ stop_status,
+    ::sycl::nd_item<3> item_ct1,
     uninitialized_array<remove_complex<ValueType>,
                         default_dot_dim*(default_dot_dim + 1)>*
         reduction_helper_array)
@@ -261,22 +264,22 @@ void multinorm2_kernel(
 
 template <typename ValueType>
 void multinorm2_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                       sycl::queue* queue, size_type num_rows,
+                       ::sycl::queue* queue, size_type num_rows,
                        size_type num_cols, const ValueType* next_krylov_basis,
                        size_type stride_next_krylov,
                        remove_complex<ValueType>* norms,
                        const stopping_status* stop_status)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<
             uninitialized_array<remove_complex<ValueType>,
                                 default_dot_dim*(default_dot_dim + 1)>,
-            0, sycl::access_mode::read_write, sycl::access::target::local>
+            0, ::sycl::access_mode::read_write, ::sycl::access::target::local>
             reduction_helper_array_acc_ct1(cgh);
 
         cgh.parallel_for(
             sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
+            [=](::sycl::nd_item<3> item_ct1)
                 [[sycl::reqd_sub_group_size(default_dot_dim)]] {
                     multinorm2_kernel(
                         num_rows, num_cols, next_krylov_basis,
@@ -292,7 +295,7 @@ void multinorminf_without_stop_kernel(
     size_type num_rows, size_type num_cols,
     const ValueType* __restrict__ next_krylov_basis,
     size_type stride_next_krylov, remove_complex<ValueType>* __restrict__ norms,
-    size_type stride_norms, sycl::nd_item<3> item_ct1,
+    size_type stride_norms, ::sycl::nd_item<3> item_ct1,
     uninitialized_array<remove_complex<ValueType>,
                         default_dot_dim*(default_dot_dim + 1)>*
         reduction_helper_array)
@@ -339,21 +342,21 @@ void multinorminf_without_stop_kernel(
 
 template <typename ValueType>
 void multinorminf_without_stop_kernel(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    size_type num_rows, size_type num_cols, const ValueType* next_krylov_basis,
-    size_type stride_next_krylov, remove_complex<ValueType>* norms,
-    size_type stride_norms)
+    dim3 grid, dim3 block, size_type dynamic_shared_memory,
+    ::sycl::queue* queue, size_type num_rows, size_type num_cols,
+    const ValueType* next_krylov_basis, size_type stride_next_krylov,
+    remove_complex<ValueType>* norms, size_type stride_norms)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<
             uninitialized_array<remove_complex<ValueType>,
                                 default_dot_dim*(default_dot_dim + 1)>,
-            0, sycl::access_mode::read_write, sycl::access::target::local>
+            0, ::sycl::access_mode::read_write, ::sycl::access::target::local>
             reduction_helper_array_acc_ct1(cgh);
 
         cgh.parallel_for(
             sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
+            [=](::sycl::nd_item<3> item_ct1)
                 [[sycl::reqd_sub_group_size(default_dot_dim)]] {
                     multinorminf_without_stop_kernel(
                         num_rows, num_cols, next_krylov_basis,
@@ -372,7 +375,8 @@ void multinorm2_inf_kernel(
     size_type stride_next_krylov,
     remove_complex<ValueType>* __restrict__ norms1,
     remove_complex<ValueType>* __restrict__ norms2,
-    const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1,
+    const stopping_status* __restrict__ stop_status,
+    ::sycl::nd_item<3> item_ct1,
     uninitialized_array<remove_complex<ValueType>,
                         (1 + compute_inf) *
                             default_dot_dim*(default_dot_dim + 1)>*
@@ -441,22 +445,23 @@ void multinorm2_inf_kernel(
 
 template <bool compute_inf, typename ValueType>
 void multinorm2_inf_kernel(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    size_type num_rows, size_type num_cols, const ValueType* next_krylov_basis,
-    size_type stride_next_krylov, remove_complex<ValueType>* norms1,
-    remove_complex<ValueType>* norms2, const stopping_status* stop_status)
+    dim3 grid, dim3 block, size_type dynamic_shared_memory,
+    ::sycl::queue* queue, size_type num_rows, size_type num_cols,
+    const ValueType* next_krylov_basis, size_type stride_next_krylov,
+    remove_complex<ValueType>* norms1, remove_complex<ValueType>* norms2,
+    const stopping_status* stop_status)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<
             uninitialized_array<remove_complex<ValueType>,
                                 (1 + compute_inf) *
                                     default_dot_dim*(default_dot_dim + 1)>,
-            0, sycl::access_mode::read_write, sycl::access::target::local>
+            0, ::sycl::access_mode::read_write, ::sycl::access::target::local>
             reduction_helper_array_acc_ct1(cgh);
 
         cgh.parallel_for(
             sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
+            [=](::sycl::nd_item<3> item_ct1)
                 [[sycl::reqd_sub_group_size(default_dot_dim)]] {
                     multinorm2_inf_kernel<compute_inf>(
                         num_rows, num_cols, next_krylov_basis,
@@ -473,7 +478,8 @@ void multidot_kernel(
     const ValueType* __restrict__ next_krylov_basis,
     size_type stride_next_krylov, const Accessor3d krylov_bases,
     ValueType* __restrict__ hessenberg_iter, size_type stride_hessenberg,
-    const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1,
+    const stopping_status* __restrict__ stop_status,
+    ::sycl::nd_item<3> item_ct1,
     uninitialized_array<ValueType, dot_dim * dot_dim>& reduction_helper_array)
 {
     /*
@@ -533,22 +539,22 @@ void multidot_kernel(
 
 template <int dot_dim, typename ValueType, typename Accessor3d>
 void multidot_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                     sycl::queue* queue, size_type num_rows, size_type num_cols,
-                     const ValueType* next_krylov_basis,
+                     ::sycl::queue* queue, size_type num_rows,
+                     size_type num_cols, const ValueType* next_krylov_basis,
                      size_type stride_next_krylov,
                      const Accessor3d krylov_bases, ValueType* hessenberg_iter,
                      size_type stride_hessenberg,
                      const stopping_status* stop_status)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<uninitialized_array<ValueType, dot_dim * dot_dim>, 0,
-                       sycl::access_mode::read_write,
-                       sycl::access::target::local>
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<uninitialized_array<ValueType, dot_dim * dot_dim>, 0,
+                         ::sycl::access_mode::read_write,
+                         ::sycl::access::target::local>
             reduction_helper_array_acc_ct1(cgh);
 
         cgh.parallel_for(
             sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
+            [=](::sycl::nd_item<3> item_ct1)
                 [[sycl::reqd_sub_group_size(dot_dim)]] {
                     multidot_kernel<dot_dim>(
                         num_rows, num_cols, next_krylov_basis,
@@ -565,7 +571,8 @@ void singledot_kernel(
     size_type num_rows, const ValueType* __restrict__ next_krylov_basis,
     size_type stride_next_krylov, const Accessor3d krylov_bases,
     ValueType* __restrict__ hessenberg_iter, size_type stride_hessenberg,
-    const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1,
+    const stopping_status* __restrict__ stop_status,
+    ::sycl::nd_item<3> item_ct1,
     uninitialized_array<ValueType, block_size>& reduction_helper_array)
 {
     /*
@@ -615,22 +622,22 @@ void singledot_kernel(
 
 template <int block_size, typename ValueType, typename Accessor3d>
 void singledot_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                      sycl::queue* queue, size_type num_rows,
+                      ::sycl::queue* queue, size_type num_rows,
                       const ValueType* next_krylov_basis,
                       size_type stride_next_krylov,
                       const Accessor3d krylov_bases, ValueType* hessenberg_iter,
                       size_type stride_hessenberg,
                       const stopping_status* stop_status)
 {
-    queue->submit([&](sycl::handler& cgh) {
-        sycl::accessor<uninitialized_array<ValueType, block_size>, 0,
-                       sycl::access_mode::read_write,
-                       sycl::access::target::local>
+    queue->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<uninitialized_array<ValueType, block_size>, 0,
+                         ::sycl::access_mode::read_write,
+                         ::sycl::access::target::local>
             reduction_helper_array_acc_ct1(cgh);
 
         cgh.parallel_for(
             sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
+            [=](::sycl::nd_item<3> item_ct1)
                 [[sycl::reqd_sub_group_size(config::warp_size)]] {
                     singledot_kernel<block_size>(
                         num_rows, next_krylov_basis, stride_next_krylov,
@@ -645,12 +652,15 @@ void singledot_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory,
 // Must be called with at least `num_rows * stride_next_krylov` threads in
 // total.
 template <int block_size, typename ValueType, typename Accessor3d>
-void update_next_krylov_kernel(
-    size_type num_iters, size_type num_rows, size_type num_cols,
-    ValueType* __restrict__ next_krylov_basis, size_type stride_next_krylov,
-    const Accessor3d krylov_bases,
-    const ValueType* __restrict__ hessenberg_iter, size_type stride_hessenberg,
-    const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1)
+void update_next_krylov_kernel(size_type num_iters, size_type num_rows,
+                               size_type num_cols,
+                               ValueType* __restrict__ next_krylov_basis,
+                               size_type stride_next_krylov,
+                               const Accessor3d krylov_bases,
+                               const ValueType* __restrict__ hessenberg_iter,
+                               size_type stride_hessenberg,
+                               const stopping_status* __restrict__ stop_status,
+                               ::sycl::nd_item<3> item_ct1)
 {
     const auto global_id = thread::get_thread_id_flat(item_ct1);
     const auto row_idx = global_id / stride_next_krylov;
@@ -672,15 +682,16 @@ void update_next_krylov_kernel(
 
 template <int block_size, typename ValueType, typename Accessor3d>
 void update_next_krylov_kernel(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    size_type num_iters, size_type num_rows, size_type num_cols,
-    ValueType* next_krylov_basis, size_type stride_next_krylov,
-    const Accessor3d krylov_bases, const ValueType* hessenberg_iter,
-    size_type stride_hessenberg, const stopping_status* stop_status)
+    dim3 grid, dim3 block, size_type dynamic_shared_memory,
+    ::sycl::queue* queue, size_type num_iters, size_type num_rows,
+    size_type num_cols, ValueType* next_krylov_basis,
+    size_type stride_next_krylov, const Accessor3d krylov_bases,
+    const ValueType* hessenberg_iter, size_type stride_hessenberg,
+    const stopping_status* stop_status)
 {
-    queue->submit([&](sycl::handler& cgh) {
+    queue->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 update_next_krylov_kernel<block_size>(
                     num_iters, num_rows, num_cols, next_krylov_basis,
                     stride_next_krylov, krylov_bases, hessenberg_iter,
@@ -700,7 +711,7 @@ void update_next_krylov_and_add_kernel(
     size_type stride_hessenberg, const ValueType* __restrict__ buffer_iter,
     size_type stride_buffer, const stopping_status* __restrict__ stop_status,
     const stopping_status* __restrict__ reorth_status,
-    sycl::nd_item<3> item_ct1)
+    ::sycl::nd_item<3> item_ct1)
 {
     const auto global_id = thread::get_thread_id_flat(item_ct1);
     const auto row_idx = global_id / stride_next_krylov;
@@ -726,17 +737,17 @@ void update_next_krylov_and_add_kernel(
 
 template <int block_size, typename ValueType, typename Accessor3d>
 void update_next_krylov_and_add_kernel(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    size_type num_iters, size_type num_rows, size_type num_cols,
-    ValueType* next_krylov_basis, size_type stride_next_krylov,
-    const Accessor3d krylov_bases, ValueType* hessenberg_iter,
-    size_type stride_hessenberg, const ValueType* buffer_iter,
-    size_type stride_buffer, const stopping_status* stop_status,
-    const stopping_status* reorth_status)
+    dim3 grid, dim3 block, size_type dynamic_shared_memory,
+    ::sycl::queue* queue, size_type num_iters, size_type num_rows,
+    size_type num_cols, ValueType* next_krylov_basis,
+    size_type stride_next_krylov, const Accessor3d krylov_bases,
+    ValueType* hessenberg_iter, size_type stride_hessenberg,
+    const ValueType* buffer_iter, size_type stride_buffer,
+    const stopping_status* stop_status, const stopping_status* reorth_status)
 {
-    queue->submit([&](sycl::handler& cgh) {
+    queue->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 update_next_krylov_and_add_kernel<block_size>(
                     num_iters, num_rows, num_cols, next_krylov_basis,
                     stride_next_krylov, krylov_bases, hessenberg_iter,
@@ -755,7 +766,7 @@ void check_arnoldi_norms(
     size_type stride_hessenberg, size_type iter, Accessor3d krylov_bases,
     const stopping_status* __restrict__ stop_status,
     stopping_status* __restrict__ reorth_status,
-    size_type* __restrict__ num_reorth, sycl::nd_item<3> item_ct1)
+    size_type* __restrict__ num_reorth, ::sycl::nd_item<3> item_ct1)
 {
     const remove_complex<ValueType> eta_squared = 1.0 / 2.0;
     const auto col_idx = thread::get_thread_id_flat(item_ct1);
@@ -783,7 +794,7 @@ void check_arnoldi_norms(
 
 template <int block_size, typename ValueType, typename Accessor3d>
 void check_arnoldi_norms(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                         sycl::queue* queue, size_type num_rhs,
+                         ::sycl::queue* queue, size_type num_rhs,
                          remove_complex<ValueType>* arnoldi_norm,
                          size_type stride_norm, ValueType* hessenberg_iter,
                          size_type stride_hessenberg, size_type iter,
@@ -791,9 +802,9 @@ void check_arnoldi_norms(dim3 grid, dim3 block, size_type dynamic_shared_memory,
                          const stopping_status* stop_status,
                          stopping_status* reorth_status, size_type* num_reorth)
 {
-    queue->submit([&](sycl::handler& cgh) {
+    queue->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 check_arnoldi_norms<block_size>(
                     num_rhs, arnoldi_norm, stride_norm, hessenberg_iter,
                     stride_hessenberg, iter, krylov_bases, stop_status,
@@ -809,7 +820,7 @@ void set_scalar_kernel(size_type num_rhs, size_type num_blocks,
                        size_type stride_residual,
                        const RealValueType* __restrict__ arnoldi_inf,
                        size_type stride_inf, Accessor3d krylov_bases,
-                       sycl::nd_item<3> item_ct1)
+                       ::sycl::nd_item<3> item_ct1)
 {
     static_assert(!is_complex_s<RealValueType>::value,
                   "ValueType must not be complex!");
@@ -836,15 +847,15 @@ void set_scalar_kernel(size_type num_rhs, size_type num_blocks,
 
 template <int block_size, typename RealValueType, typename Accessor3d>
 void set_scalar_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                       sycl::queue* queue, size_type num_rhs,
+                       ::sycl::queue* queue, size_type num_rhs,
                        size_type num_blocks, const RealValueType* residual_norm,
                        size_type stride_residual,
                        const RealValueType* arnoldi_inf, size_type stride_inf,
                        Accessor3d krylov_bases)
 {
-    queue->submit([&](sycl::handler& cgh) {
+    queue->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 set_scalar_kernel<block_size>(
                     num_rhs, num_blocks, residual_norm, stride_residual,
                     arnoldi_inf, stride_inf, krylov_bases, item_ct1);
@@ -861,7 +872,8 @@ void update_krylov_next_krylov_kernel(
     ValueType* __restrict__ next_krylov_basis, size_type stride_next_krylov,
     Accessor3d krylov_bases, const ValueType* __restrict__ hessenberg_iter,
     size_type stride_hessenberg,
-    const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1)
+    const stopping_status* __restrict__ stop_status,
+    ::sycl::nd_item<3> item_ct1)
 {
     const auto global_id = thread::get_thread_id_flat(item_ct1);
     const auto row_idx = global_id / stride_next_krylov;
@@ -883,15 +895,16 @@ void update_krylov_next_krylov_kernel(
 
 template <int block_size, typename ValueType, typename Accessor3d>
 void update_krylov_next_krylov_kernel(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue* queue,
-    size_type iter, size_type num_rows, size_type num_cols,
-    ValueType* next_krylov_basis, size_type stride_next_krylov,
-    Accessor3d krylov_bases, const ValueType* hessenberg_iter,
-    size_type stride_hessenberg, const stopping_status* stop_status)
+    dim3 grid, dim3 block, size_type dynamic_shared_memory,
+    ::sycl::queue* queue, size_type iter, size_type num_rows,
+    size_type num_cols, ValueType* next_krylov_basis,
+    size_type stride_next_krylov, Accessor3d krylov_bases,
+    const ValueType* hessenberg_iter, size_type stride_hessenberg,
+    const stopping_status* stop_status)
 {
-    queue->submit([&](sycl::handler& cgh) {
+    queue->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 update_krylov_next_krylov_kernel<block_size>(
                     iter, num_rows, num_cols, next_krylov_basis,
                     stride_next_krylov, krylov_bases, hessenberg_iter,
@@ -910,7 +923,7 @@ void calculate_Qy_kernel(size_type num_rows, size_type num_cols,
                          ValueType* __restrict__ before_preconditioner,
                          size_type stride_preconditioner,
                          const size_type* __restrict__ final_iter_nums,
-                         sycl::nd_item<3> item_ct1)
+                         ::sycl::nd_item<3> item_ct1)
 {
     const auto global_id = thread::get_thread_id_flat(item_ct1);
     const auto row_id = global_id / stride_preconditioner;
@@ -927,16 +940,16 @@ void calculate_Qy_kernel(size_type num_rows, size_type num_cols,
 
 template <size_type block_size, typename ValueType, typename Accessor3d>
 void calculate_Qy_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                         sycl::queue* queue, size_type num_rows,
+                         ::sycl::queue* queue, size_type num_rows,
                          size_type num_cols, const Accessor3d krylov_bases,
                          const ValueType* y, size_type stride_y,
                          ValueType* before_preconditioner,
                          size_type stride_preconditioner,
                          const size_type* final_iter_nums)
 {
-    queue->submit([&](sycl::handler& cgh) {
+    queue->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(sycl_nd_range(grid, block),
-                         [=](sycl::nd_item<3> item_ct1) {
+                         [=](::sycl::nd_item<3> item_ct1) {
                              calculate_Qy_kernel<block_size>(
                                  num_rows, num_cols, krylov_bases, y, stride_y,
                                  before_preconditioner, stride_preconditioner,
diff --git a/dpcpp/solver/common_gmres_kernels.dp.inc b/dpcpp/solver/common_gmres_kernels.dp.inc
index e376d5d5521..10a10cda15a 100644
--- a/dpcpp/solver/common_gmres_kernels.dp.inc
+++ b/dpcpp/solver/common_gmres_kernels.dp.inc
@@ -40,7 +40,7 @@ void initialize_kernel(
     ValueType *__restrict__ residual, size_type stride_residual,
     ValueType *__restrict__ givens_sin, size_type stride_sin,
     ValueType *__restrict__ givens_cos, size_type stride_cos,
-    stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1)
+    stopping_status *__restrict__ stop_status, ::sycl::nd_item<3> item_ct1)
 {
     const auto global_id = thread::get_thread_id_flat(item_ct1);
 
@@ -67,7 +67,7 @@ void initialize_kernel(
 
 template <size_type block_size, typename ValueType>
 void initialize_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory,
-                         sycl::queue *queue, size_type num_rows,
+                         ::sycl::queue *queue, size_type num_rows,
                          size_type num_cols, size_type krylov_dim,
                          const ValueType *b, size_type stride_b,
                          ValueType *residual, size_type stride_residual,
@@ -75,9 +75,9 @@ void initialize_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory,
                          ValueType *givens_cos, size_type stride_cos,
                          stopping_status *stop_status)
 {
-    queue->submit([&](sycl::handler &cgh) {
+    queue->submit([&](::sycl::handler &cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 initialize_kernel<block_size>(
                     num_rows, num_cols, krylov_dim, b, stride_b, residual,
                     stride_residual, givens_sin, stride_sin, givens_cos,
@@ -146,7 +146,7 @@ void givens_rotation_kernel(
     remove_complex<ValueType> *__restrict__ residual_norm,
     ValueType *__restrict__ residual_norm_collection,
     size_type stride_residual_norm_collection,
-    const stopping_status *__restrict__ stop_status, sycl::nd_item<3> item_ct1)
+    const stopping_status *__restrict__ stop_status, ::sycl::nd_item<3> item_ct1)
 {
     const auto col_idx = thread::get_thread_id_flat(item_ct1);
 
@@ -195,7 +195,7 @@ void givens_rotation_kernel(
 
 template <size_type block_size, typename ValueType>
 void givens_rotation_kernel(dim3 grid, dim3 block,
-                            size_type dynamic_shared_memory, sycl::queue *queue,
+                            size_type dynamic_shared_memory, ::sycl::queue *queue,
                             size_type num_rows, size_type num_cols,
                             size_type iter, ValueType *hessenberg_iter,
                             size_type stride_hessenberg, ValueType *givens_sin,
@@ -206,9 +206,9 @@ void givens_rotation_kernel(dim3 grid, dim3 block,
                             size_type stride_residual_norm_collection,
                             const stopping_status *stop_status)
 {
-    queue->submit([&](sycl::handler &cgh) {
+    queue->submit([&](::sycl::handler &cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 givens_rotation_kernel<block_size>(
                     num_rows, num_cols, iter, hessenberg_iter,
                     stride_hessenberg, givens_sin, stride_sin, givens_cos,
@@ -227,7 +227,7 @@ void solve_upper_triangular_kernel(
     size_type stride_residual_norm_collection,
     const ValueType *__restrict__ hessenberg, size_type stride_hessenberg,
     ValueType *__restrict__ y, size_type stride_y,
-    const size_type *__restrict__ final_iter_nums, sycl::nd_item<3> item_ct1)
+    const size_type *__restrict__ final_iter_nums, ::sycl::nd_item<3> item_ct1)
 {
     const auto col_idx = thread::get_thread_id_flat(item_ct1);
 
@@ -253,16 +253,16 @@ void solve_upper_triangular_kernel(
 
 template <size_type block_size, typename ValueType>
 void solve_upper_triangular_kernel(
-    dim3 grid, dim3 block, size_type dynamic_shared_memory, sycl::queue *queue,
+    dim3 grid, dim3 block, size_type dynamic_shared_memory, ::sycl::queue *queue,
     size_type num_cols, size_type num_rhs,
     const ValueType *residual_norm_collection,
     size_type stride_residual_norm_collection, const ValueType *hessenberg,
     size_type stride_hessenberg, ValueType *y, size_type stride_y,
     const size_type *final_iter_nums)
 {
-    queue->submit([&](sycl::handler &cgh) {
+    queue->submit([&](::sycl::handler &cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 solve_upper_triangular_kernel<block_size>(
                     num_cols, num_rhs, residual_norm_collection,
                     stride_residual_norm_collection, hessenberg,
diff --git a/dpcpp/solver/idr_kernels.dp.cpp b/dpcpp/solver/idr_kernels.dp.cpp
index 5549992f990..8ba72595234 100644
--- a/dpcpp/solver/idr_kernels.dp.cpp
+++ b/dpcpp/solver/idr_kernels.dp.cpp
@@ -75,7 +75,7 @@ template <typename ValueType>
 void initialize_m_kernel(size_type subspace_dim, size_type nrhs,
                          ValueType* __restrict__ m_values, size_type m_stride,
                          stopping_status* __restrict__ stop_status,
-                         sycl::nd_item<3> item_ct1)
+                         ::sycl::nd_item<3> item_ct1)
 {
     const auto global_id = thread::get_thread_id_flat(item_ct1);
     const auto row = global_id / m_stride;
@@ -93,16 +93,16 @@ void initialize_m_kernel(size_type subspace_dim, size_type nrhs,
 
 template <typename ValueType>
 void initialize_m_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory,
-                         sycl::queue* stream, size_type subspace_dim,
+                         ::sycl::queue* stream, size_type subspace_dim,
                          size_type nrhs, ValueType* m_values,
                          size_type m_stride, stopping_status* stop_status)
 {
     if (nrhs == 0) {
         return;
     }
-    stream->submit([&](sycl::handler& cgh) {
+    stream->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 initialize_m_kernel(subspace_dim, nrhs, m_values, m_stride,
                                     stop_status, item_ct1);
             });
@@ -113,7 +113,7 @@ void initialize_m_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory,
 template <size_type block_size, typename ValueType>
 void orthonormalize_subspace_vectors_kernel(
     size_type num_rows, size_type num_cols, ValueType* __restrict__ values,
-    size_type stride, sycl::nd_item<3> item_ct1,
+    size_type stride, ::sycl::nd_item<3> item_ct1,
     uninitialized_array<ValueType, block_size>& reduction_helper_array)
 {
     const auto tidx = thread::get_thread_id_flat(item_ct1);
@@ -131,12 +131,12 @@ void orthonormalize_subspace_vectors_kernel(
             }
 
             // Ensure already finish reading this shared memory
-            item_ct1.barrier(sycl::access::fence_space::local_space);
+            item_ct1.barrier(::sycl::access::fence_space::local_space);
             reduction_helper[tidx] = dot;
             ::gko::kernels::sycl::reduce(
                 group::this_thread_block(item_ct1), reduction_helper,
                 [](const ValueType& a, const ValueType& b) { return a + b; });
-            item_ct1.barrier(sycl::access::fence_space::local_space);
+            item_ct1.barrier(::sycl::access::fence_space::local_space);
 
             dot = reduction_helper[0];
             for (size_type j = tidx; j < num_cols; j += block_size) {
@@ -150,13 +150,13 @@ void orthonormalize_subspace_vectors_kernel(
         }
 
         // Ensure already finish reading this shared memory
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(::sycl::access::fence_space::local_space);
         reduction_helper_real[tidx] = norm;
         ::gko::kernels::sycl::reduce(
             group::this_thread_block(item_ct1), reduction_helper_real,
             [](const remove_complex<ValueType>& a,
                const remove_complex<ValueType>& b) { return a + b; });
-        item_ct1.barrier(sycl::access::fence_space::local_space);
+        item_ct1.barrier(::sycl::access::fence_space::local_space);
 
         norm = std::sqrt(reduction_helper_real[0]);
         for (size_type j = tidx; j < num_cols; j += block_size) {
@@ -167,18 +167,18 @@ void orthonormalize_subspace_vectors_kernel(
 
 template <size_type block_size, typename ValueType>
 void orthonormalize_subspace_vectors_kernel(
-    dim3 grid, dim3 block, size_t dynamic_shared_memory, sycl::queue* stream,
+    dim3 grid, dim3 block, size_t dynamic_shared_memory, ::sycl::queue* stream,
     size_type num_rows, size_type num_cols, ValueType* values, size_type stride)
 {
-    stream->submit([&](sycl::handler& cgh) {
-        sycl::accessor<uninitialized_array<ValueType, block_size>, 0,
-                       sycl::access_mode::read_write,
-                       sycl::access::target::local>
+    stream->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<uninitialized_array<ValueType, block_size>, 0,
+                         ::sycl::access_mode::read_write,
+                         ::sycl::access::target::local>
             reduction_helper_array_acc_ct1(cgh);
 
         cgh.parallel_for(
             sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
+            [=](::sycl::nd_item<3> item_ct1)
                 [[sycl::reqd_sub_group_size(config::warp_size)]] {
                     orthonormalize_subspace_vectors_kernel<block_size>(
                         num_rows, num_cols, values, stride, item_ct1,
@@ -194,7 +194,8 @@ void solve_lower_triangular_kernel(
     const ValueType* __restrict__ m_values, size_type m_stride,
     const ValueType* __restrict__ f_values, size_type f_stride,
     ValueType* __restrict__ c_values, size_type c_stride,
-    const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1)
+    const stopping_status* __restrict__ stop_status,
+    ::sycl::nd_item<3> item_ct1)
 {
     const auto global_id = thread::get_thread_id_flat(item_ct1);
 
@@ -217,14 +218,14 @@ void solve_lower_triangular_kernel(
 
 template <typename ValueType>
 void solve_lower_triangular_kernel(
-    dim3 grid, dim3 block, size_t dynamic_shared_memory, sycl::queue* stream,
+    dim3 grid, dim3 block, size_t dynamic_shared_memory, ::sycl::queue* stream,
     size_type subspace_dim, size_type nrhs, const ValueType* m_values,
     size_type m_stride, const ValueType* f_values, size_type f_stride,
     ValueType* c_values, size_type c_stride, const stopping_status* stop_status)
 {
-    stream->submit([&](sycl::handler& cgh) {
+    stream->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 solve_lower_triangular_kernel(
                     subspace_dim, nrhs, m_values, m_stride, f_values, f_stride,
                     c_values, c_stride, stop_status, item_ct1);
@@ -242,7 +243,7 @@ void step_1_kernel(size_type k, size_type num_rows, size_type subspace_dim,
                    const ValueType* __restrict__ g_values, size_type g_stride,
                    ValueType* __restrict__ v_values, size_type v_stride,
                    const stopping_status* __restrict__ stop_status,
-                   sycl::nd_item<3> item_ct1)
+                   ::sycl::nd_item<3> item_ct1)
 {
     const auto global_id = thread::get_thread_id_flat(item_ct1);
     const auto row = global_id / nrhs;
@@ -264,7 +265,7 @@ void step_1_kernel(size_type k, size_type num_rows, size_type subspace_dim,
 
 template <typename ValueType>
 void step_1_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory,
-                   sycl::queue* stream, size_type k, size_type num_rows,
+                   ::sycl::queue* stream, size_type k, size_type num_rows,
                    size_type subspace_dim, size_type nrhs,
                    const ValueType* residual_values, size_type residual_stride,
                    const ValueType* c_values, size_type c_stride,
@@ -275,9 +276,9 @@ void step_1_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory,
     if (nrhs == 0) {
         return;
     }
-    stream->submit([&](sycl::handler& cgh) {
+    stream->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 step_1_kernel(k, num_rows, subspace_dim, nrhs, residual_values,
                               residual_stride, c_values, c_stride, g_values,
                               g_stride, v_values, v_stride, stop_status,
@@ -294,7 +295,7 @@ void step_2_kernel(size_type k, size_type num_rows, size_type subspace_dim,
                    const ValueType* __restrict__ c_values, size_type c_stride,
                    ValueType* __restrict__ u_values, size_type u_stride,
                    const stopping_status* __restrict__ stop_status,
-                   sycl::nd_item<3> item_ct1)
+                   ::sycl::nd_item<3> item_ct1)
 {
     const auto global_id = thread::get_thread_id_flat(item_ct1);
     const auto row = global_id / nrhs;
@@ -316,7 +317,7 @@ void step_2_kernel(size_type k, size_type num_rows, size_type subspace_dim,
 
 template <typename ValueType>
 void step_2_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory,
-                   sycl::queue* stream, size_type k, size_type num_rows,
+                   ::sycl::queue* stream, size_type k, size_type num_rows,
                    size_type subspace_dim, size_type nrhs,
                    const ValueType* omega_values, const ValueType* v_values,
                    size_type v_stride, const ValueType* c_values,
@@ -326,9 +327,9 @@ void step_2_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory,
     if (nrhs == 0) {
         return;
     }
-    stream->submit([&](sycl::handler& cgh) {
+    stream->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 step_2_kernel(k, num_rows, subspace_dim, nrhs, omega_values,
                               v_values, v_stride, c_values, c_stride, u_values,
                               u_stride, stop_status, item_ct1);
@@ -342,7 +343,8 @@ void multidot_kernel(
     size_type num_rows, size_type nrhs, const ValueType* __restrict__ p_i,
     const ValueType* __restrict__ g_k, size_type g_k_stride,
     ValueType* __restrict__ alpha,
-    const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1,
+    const stopping_status* __restrict__ stop_status,
+    ::sycl::nd_item<3> item_ct1,
     uninitialized_array<ValueType, default_dot_dim*(default_dot_dim + 1)>&
         reduction_helper_array)
 {
@@ -367,7 +369,7 @@ void multidot_kernel(
         }
     }
     reduction_helper[tidx * (default_dot_dim + 1) + tidy] = local_res;
-    item_ct1.barrier(sycl::access::fence_space::local_space);
+    item_ct1.barrier(::sycl::access::fence_space::local_space);
     local_res = reduction_helper[tidy * (default_dot_dim + 1) + tidx];
     const auto tile_block = group::tiled_partition<default_dot_dim>(
         group::this_thread_block(item_ct1));
@@ -382,21 +384,21 @@ void multidot_kernel(
 
 template <typename ValueType>
 void multidot_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory,
-                     sycl::queue* stream, size_type num_rows, size_type nrhs,
+                     ::sycl::queue* stream, size_type num_rows, size_type nrhs,
                      const ValueType* p_i, const ValueType* g_k,
                      size_type g_k_stride, ValueType* alpha,
                      const stopping_status* stop_status)
 {
-    stream->submit([&](sycl::handler& cgh) {
-        sycl::accessor<uninitialized_array<ValueType, default_dot_dim*(
-                                                          default_dot_dim + 1)>,
-                       0, sycl::access_mode::read_write,
-                       sycl::access::target::local>
+    stream->submit([&](::sycl::handler& cgh) {
+        ::sycl::accessor<uninitialized_array<
+                             ValueType, default_dot_dim*(default_dot_dim + 1)>,
+                         0, ::sycl::access_mode::read_write,
+                         ::sycl::access::target::local>
             reduction_helper_array_acc_ct1(cgh);
 
         cgh.parallel_for(
             sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1)
+            [=](::sycl::nd_item<3> item_ct1)
                 [[sycl::reqd_sub_group_size(default_dot_dim)]] {
                     multidot_kernel(
                         num_rows, nrhs, p_i, g_k, g_k_stride, alpha,
@@ -414,7 +416,8 @@ void update_g_k_and_u_kernel(
     size_type m_stride, const ValueType* __restrict__ g_values,
     size_type g_stride, ValueType* __restrict__ g_k_values,
     size_type g_k_stride, ValueType* __restrict__ u_values, size_type u_stride,
-    const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1)
+    const stopping_status* __restrict__ stop_status,
+    ::sycl::nd_item<3> item_ct1)
 {
     const auto tidx = thread::get_thread_id_flat(item_ct1);
     const auto row = tidx / g_k_stride;
@@ -434,22 +437,20 @@ void update_g_k_and_u_kernel(
 }
 
 template <size_type block_size, typename ValueType>
-void update_g_k_and_u_kernel(dim3 grid, dim3 block,
-                             size_t dynamic_shared_memory, sycl::queue* stream,
-                             size_type k, size_type i, size_type size,
-                             size_type nrhs, const ValueType* alpha,
-                             const ValueType* m_values, size_type m_stride,
-                             const ValueType* g_values, size_type g_stride,
-                             ValueType* g_k_values, size_type g_k_stride,
-                             ValueType* u_values, size_type u_stride,
-                             const stopping_status* stop_status)
+void update_g_k_and_u_kernel(
+    dim3 grid, dim3 block, size_t dynamic_shared_memory, ::sycl::queue* stream,
+    size_type k, size_type i, size_type size, size_type nrhs,
+    const ValueType* alpha, const ValueType* m_values, size_type m_stride,
+    const ValueType* g_values, size_type g_stride, ValueType* g_k_values,
+    size_type g_k_stride, ValueType* u_values, size_type u_stride,
+    const stopping_status* stop_status)
 {
     if (g_k_stride == 0) {
         return;
     }
-    stream->submit([&](sycl::handler& cgh) {
+    stream->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(sycl_nd_range(grid, block),
-                         [=](sycl::nd_item<3> item_ct1) {
+                         [=](::sycl::nd_item<3> item_ct1) {
                              update_g_k_and_u_kernel<block_size>(
                                  k, i, size, nrhs, alpha, m_values, m_stride,
                                  g_values, g_stride, g_k_values, g_k_stride,
@@ -465,7 +466,7 @@ void update_g_kernel(size_type k, size_type size, size_type nrhs,
                      size_type g_k_stride, ValueType* __restrict__ g_values,
                      size_type g_stride,
                      const stopping_status* __restrict__ stop_status,
-                     sycl::nd_item<3> item_ct1)
+                     ::sycl::nd_item<3> item_ct1)
 {
     const auto tidx = thread::get_thread_id_flat(item_ct1);
     const auto row = tidx / g_k_stride;
@@ -483,7 +484,7 @@ void update_g_kernel(size_type k, size_type size, size_type nrhs,
 
 template <size_type block_size, typename ValueType>
 void update_g_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory,
-                     sycl::queue* stream, size_type k, size_type size,
+                     ::sycl::queue* stream, size_type k, size_type size,
                      size_type nrhs, const ValueType* g_k_values,
                      size_type g_k_stride, ValueType* g_values,
                      size_type g_stride, const stopping_status* stop_status)
@@ -491,9 +492,9 @@ void update_g_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory,
     if (g_k_stride == 0) {
         return;
     }
-    stream->submit([&](sycl::handler& cgh) {
+    stream->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 update_g_kernel<block_size>(k, size, nrhs, g_k_values,
                                             g_k_stride, g_values, g_stride,
                                             stop_status, item_ct1);
@@ -511,7 +512,8 @@ void update_x_r_and_f_kernel(
     ValueType* __restrict__ f_values, size_type f_stride,
     ValueType* __restrict__ r_values, size_type r_stride,
     ValueType* __restrict__ x_values, size_type x_stride,
-    const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1)
+    const stopping_status* __restrict__ stop_status,
+    ::sycl::nd_item<3> item_ct1)
 {
     const auto global_id = thread::get_thread_id_flat(item_ct1);
     const auto row = global_id / x_stride;
@@ -538,7 +540,7 @@ void update_x_r_and_f_kernel(
 
 template <typename ValueType>
 void update_x_r_and_f_kernel(
-    dim3 grid, dim3 block, size_t dynamic_shared_memory, sycl::queue* stream,
+    dim3 grid, dim3 block, size_t dynamic_shared_memory, ::sycl::queue* stream,
     size_type k, size_type size, size_type subspace_dim, size_type nrhs,
     const ValueType* m_values, size_type m_stride, const ValueType* g_values,
     size_type g_stride, const ValueType* u_values, size_type u_stride,
@@ -546,9 +548,9 @@ void update_x_r_and_f_kernel(
     size_type r_stride, ValueType* x_values, size_type x_stride,
     const stopping_status* stop_status)
 {
-    stream->submit([&](sycl::handler& cgh) {
+    stream->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 update_x_r_and_f_kernel(
                     k, size, subspace_dim, nrhs, m_values, m_stride, g_values,
                     g_stride, u_values, u_stride, f_values, f_stride, r_values,
@@ -564,7 +566,8 @@ void compute_omega_kernel(
     const ValueType* __restrict__ tht,
     const remove_complex<ValueType>* __restrict__ residual_norm,
     ValueType* __restrict__ omega,
-    const stopping_status* __restrict__ stop_status, sycl::nd_item<3> item_ct1)
+    const stopping_status* __restrict__ stop_status,
+    ::sycl::nd_item<3> item_ct1)
 {
     const auto global_id = thread::get_thread_id_flat(item_ct1);
 
@@ -586,15 +589,15 @@ void compute_omega_kernel(
 
 template <typename ValueType>
 void compute_omega_kernel(dim3 grid, dim3 block, size_t dynamic_shared_memory,
-                          sycl::queue* stream, size_type nrhs,
+                          ::sycl::queue* stream, size_type nrhs,
                           const remove_complex<ValueType> kappa,
                           const ValueType* tht,
                           const remove_complex<ValueType>* residual_norm,
                           ValueType* omega, const stopping_status* stop_status)
 {
-    stream->submit([&](sycl::handler& cgh) {
+    stream->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl_nd_range(grid, block), [=](sycl::nd_item<3> item_ct1) {
+            sycl_nd_range(grid, block), [=](::sycl::nd_item<3> item_ct1) {
                 compute_omega_kernel(nrhs, kappa, tht, residual_norm, omega,
                                      stop_status, item_ct1);
             });
@@ -632,8 +635,8 @@ void initialize_subspace_vectors(std::shared_ptr<const SyclExecutor> exec,
         auto n =
             subspace_vectors->get_size()[0] * subspace_vectors->get_stride();
         n = is_complex<ValueType>() ? 2 * n : n;
-        exec->get_queue()->submit([&](sycl::handler& cgh) {
-            cgh.parallel_for(sycl::range<1>(n), [=](sycl::item<1> idx) {
+        exec->get_queue()->submit([&](::sycl::handler& cgh) {
+            cgh.parallel_for(::sycl::range<1>(n), [=](::sycl::item<1> idx) {
                 std::uint64_t offset = idx.get_linear_id();
                 oneapi::dpl::minstd_rand engine(seed, offset);
                 oneapi::dpl::normal_distribution<remove_complex<ValueType>>
diff --git a/dpcpp/stop/criterion_kernels.dp.cpp b/dpcpp/stop/criterion_kernels.dp.cpp
index 3a24092ecac..9b8c90fa93a 100644
--- a/dpcpp/stop/criterion_kernels.dp.cpp
+++ b/dpcpp/stop/criterion_kernels.dp.cpp
@@ -56,8 +56,8 @@ void set_all_statuses(std::shared_ptr<const SyclExecutor> exec,
 {
     auto size = stop_status->get_num_elems();
     stopping_status* __restrict__ stop_status_ptr = stop_status->get_data();
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl::range<1>{size}, [=](sycl::id<1> idx_id) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(::sycl::range<1>{size}, [=](::sycl::id<1> idx_id) {
             const auto idx = idx_id[0];
             stop_status_ptr[idx].stop(stoppingId, setFinalized);
         });
diff --git a/dpcpp/stop/residual_norm_kernels.dp.cpp b/dpcpp/stop/residual_norm_kernels.dp.cpp
index 76295864840..34249c7cab2 100644
--- a/dpcpp/stop/residual_norm_kernels.dp.cpp
+++ b/dpcpp/stop/residual_norm_kernels.dp.cpp
@@ -68,8 +68,8 @@ void residual_norm(std::shared_ptr<const SyclExecutor> exec,
     static_assert(is_complex_s<ValueType>::value == false,
                   "ValueType must not be complex in this function!");
     auto device_storage_val = device_storage->get_data();
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl::range<1>{1}, [=](sycl::id<1>) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(::sycl::range<1>{1}, [=](::sycl::id<1>) {
             device_storage_val[0] = true;
             device_storage_val[1] = false;
         });
@@ -78,9 +78,9 @@ void residual_norm(std::shared_ptr<const SyclExecutor> exec,
     auto orig_tau_val = orig_tau->get_const_values();
     auto tau_val = tau->get_const_values();
     auto stop_status_val = stop_status->get_data();
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl::range<1>{tau->get_size()[1]}, [=](sycl::id<1> idx_id) {
+            ::sycl::range<1>{tau->get_size()[1]}, [=](::sycl::id<1> idx_id) {
                 const auto tidx = idx_id[0];
                 if (tau_val[tidx] < rel_residual_goal * orig_tau_val[tidx]) {
                     stop_status_val[tidx].converge(stoppingId, setFinalized);
@@ -124,8 +124,8 @@ void implicit_residual_norm(
     array<bool>* device_storage, bool* all_converged, bool* one_changed)
 {
     auto device_storage_val = device_storage->get_data();
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
-        cgh.parallel_for(sycl::range<1>{1}, [=](sycl::id<1>) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
+        cgh.parallel_for(::sycl::range<1>{1}, [=](::sycl::id<1>) {
             device_storage_val[0] = true;
             device_storage_val[1] = false;
         });
@@ -134,9 +134,9 @@ void implicit_residual_norm(
     auto orig_tau_val = orig_tau->get_const_values();
     auto tau_val = tau->get_const_values();
     auto stop_status_val = stop_status->get_data();
-    exec->get_queue()->submit([&](sycl::handler& cgh) {
+    exec->get_queue()->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
-            sycl::range<1>{tau->get_size()[1]}, [=](sycl::id<1> idx_id) {
+            ::sycl::range<1>{tau->get_size()[1]}, [=](::sycl::id<1> idx_id) {
                 const auto tidx = idx_id[0];
                 if (std::sqrt(std::abs(tau_val[tidx])) <
                     rel_residual_goal * orig_tau_val[tidx]) {
diff --git a/dpcpp/test/base/executor.dp.cpp b/dpcpp/test/base/executor.dp.cpp
index 19f03039c73..976102a217b 100644
--- a/dpcpp/test/base/executor.dp.cpp
+++ b/dpcpp/test/base/executor.dp.cpp
@@ -115,7 +115,8 @@ TEST_F(SyclExecutor, CanGetExecInfo)
 
 TEST_F(SyclExecutor, KnowsNumberOfDevicesOfTypeAll)
 {
-    auto count = sycl::device::get_devices(sycl::info::device_type::all).size();
+    auto count =
+        sycl::device::get_devices(::sycl::info::device_type::all).size();
 
     auto num_devices = gko::SyclExecutor::get_num_devices("all");
 
@@ -125,7 +126,8 @@ TEST_F(SyclExecutor, KnowsNumberOfDevicesOfTypeAll)
 
 TEST_F(SyclExecutor, KnowsNumberOfDevicesOfTypeCPU)
 {
-    auto count = sycl::device::get_devices(sycl::info::device_type::cpu).size();
+    auto count =
+        sycl::device::get_devices(::sycl::info::device_type::cpu).size();
 
     auto num_devices = gko::SyclExecutor::get_num_devices("cpu");
 
@@ -135,7 +137,8 @@ TEST_F(SyclExecutor, KnowsNumberOfDevicesOfTypeCPU)
 
 TEST_F(SyclExecutor, KnowsNumberOfDevicesOfTypeGPU)
 {
-    auto count = sycl::device::get_devices(sycl::info::device_type::gpu).size();
+    auto count =
+        sycl::device::get_devices(::sycl::info::device_type::gpu).size();
 
     auto num_devices = gko::SyclExecutor::get_num_devices("gpu");
 
@@ -146,7 +149,8 @@ TEST_F(SyclExecutor, KnowsNumberOfDevicesOfTypeGPU)
 TEST_F(SyclExecutor, KnowsNumberOfDevicesOfTypeAccelerator)
 {
     auto count =
-        sycl::device::get_devices(sycl::info::device_type::accelerator).size();
+        sycl::device::get_devices(::sycl::info::device_type::accelerator)
+            .size();
 
     auto num_devices = gko::SyclExecutor::get_num_devices("accelerator");
 
@@ -197,7 +201,7 @@ TEST_F(SyclExecutor, CopiesDataToCPU)
 
     is_set.set_executor(sycl);
     ASSERT_NO_THROW(sycl->synchronize());
-    ASSERT_NO_THROW(sycl->get_queue()->submit([&](sycl::handler& cgh) {
+    ASSERT_NO_THROW(sycl->get_queue()->submit([&](::sycl::handler& cgh) {
         auto* is_set_ptr = is_set.get_data();
         cgh.single_task([=]() { check_data(copy, is_set_ptr); });
     }));
@@ -217,7 +221,7 @@ TEST_F(SyclExecutor, CopiesDataFromCPU)
 {
     int copy[2];
     auto orig = sycl->alloc<int>(2);
-    sycl->get_queue()->submit([&](sycl::handler& cgh) {
+    sycl->get_queue()->submit([&](::sycl::handler& cgh) {
         cgh.single_task([=]() { init_data(orig); });
     });
 
@@ -238,7 +242,7 @@ TEST_F(SyclExecutor, CopiesDataFromSyclToSycl)
     int copy[2];
     gko::array<bool> is_set(ref, 1);
     auto orig = sycl->alloc<int>(2);
-    sycl->get_queue()->submit([&](sycl::handler& cgh) {
+    sycl->get_queue()->submit([&](::sycl::handler& cgh) {
         cgh.single_task([=]() { init_data(orig); });
     });
 
@@ -246,7 +250,7 @@ TEST_F(SyclExecutor, CopiesDataFromSyclToSycl)
     sycl2->copy_from(sycl, 2, orig, copy_sycl2);
     // Check that the data is really on GPU
     is_set.set_executor(sycl2);
-    ASSERT_NO_THROW(sycl2->get_queue()->submit([&](sycl::handler& cgh) {
+    ASSERT_NO_THROW(sycl2->get_queue()->submit([&](::sycl::handler& cgh) {
         auto* is_set_ptr = is_set.get_data();
         cgh.single_task([=]() { check_data(copy_sycl2, is_set_ptr); });
     }));
@@ -278,9 +282,9 @@ TEST_F(SyclExecutor, FreeAfterKernel)
         gko::array<float> y(sycl, length);
         auto x_val = x.get_data();
         auto y_val = y.get_data();
-        sycl->get_queue()->submit([&](sycl::handler& cgh) {
-            cgh.parallel_for(sycl::range<1>{length},
-                             [=](sycl::id<1> i) { y_val[i] += x_val[i]; });
+        sycl->get_queue()->submit([&](::sycl::handler& cgh) {
+            cgh.parallel_for(::sycl::range<1>{length},
+                             [=](::sycl::id<1> i) { y_val[i] += x_val[i]; });
         });
     }
     // to ensure everything on queue is finished.
diff --git a/dpcpp/test/components/cooperative_groups.dp.cpp b/dpcpp/test/components/cooperative_groups.dp.cpp
index d48d3a0d6a9..35d53eac9e1 100644
--- a/dpcpp/test/components/cooperative_groups.dp.cpp
+++ b/dpcpp/test/components/cooperative_groups.dp.cpp
@@ -113,7 +113,7 @@ class CooperativeGroups : public testing::TestWithParam<unsigned int> {
 
 // kernel implementation
 template <typename DeviceConfig>
-void cg_shuffle(bool* s, sycl::nd_item<3> item_ct1)
+void cg_shuffle(bool* s, ::sycl::nd_item<3> item_ct1)
 {
     constexpr auto sg_size = DeviceConfig::subgroup_size;
     auto group =
@@ -130,13 +130,13 @@ void cg_shuffle(bool* s, sycl::nd_item<3> item_ct1)
 // group all kernel things together
 template <typename DeviceConfig>
 void cg_shuffle_host(dim3 grid, dim3 block,
-                     gko::size_type dynamic_shared_memory, sycl::queue* queue,
+                     gko::size_type dynamic_shared_memory, ::sycl::queue* queue,
                      bool* s)
 {
-    queue->submit([&](sycl::handler& cgh) {
+    queue->submit([&](::sycl::handler& cgh) {
         cgh.parallel_for(
             sycl_nd_range(grid, block),
-            [=](sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
+            [=](::sycl::nd_item<3> item_ct1) [[sycl::reqd_sub_group_size(
                 DeviceConfig::subgroup_size)]] __WG_BOUND__(DeviceConfig::
                                                                 block_size) {
                 cg_shuffle<DeviceConfig>(s, item_ct1);
@@ -151,7 +151,7 @@ GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION_TOTYPE(cg_shuffle_config,
 // the call
 void cg_shuffle_config_call(std::uint32_t desired_cfg, dim3 grid, dim3 block,
                             gko::size_type dynamic_shared_memory,
-                            sycl::queue* queue, bool* s)
+                            ::sycl::queue* queue, bool* s)
 {
     cg_shuffle_config(
         default_config_list,
@@ -169,7 +169,7 @@ TEST_P(CooperativeGroups, Shuffle)
 
 
 template <typename DeviceConfig>
-void cg_all(bool* s, sycl::nd_item<3> item_ct1)
+void cg_all(bool* s, ::sycl::nd_item<3> item_ct1)
 {
     constexpr auto sg_size = DeviceConfig::subgroup_size;
     auto group =
@@ -190,7 +190,7 @@ TEST_P(CooperativeGroups, All) { test_all_subgroup(cg_all_call<bool*>); }
 
 
 template <typename DeviceConfig>
-void cg_any(bool* s, sycl::nd_item<3> item_ct1)
+void cg_any(bool* s, ::sycl::nd_item<3> item_ct1)
 {
     constexpr auto sg_size = DeviceConfig::subgroup_size;
     auto group =
@@ -210,7 +210,7 @@ TEST_P(CooperativeGroups, Any) { test_all_subgroup(cg_any_call<bool*>); }
 
 
 template <typename cfg>
-void cg_ballot(bool* s, sycl::nd_item<3> item_ct1)
+void cg_ballot(bool* s, ::sycl::nd_item<3> item_ct1)
 {
     constexpr auto sg_size = cfg::subgroup_size;
     auto group =
diff --git a/dpcpp/test_dpcpp.dp.cpp b/dpcpp/test_dpcpp.dp.cpp
index 80627b6498b..f9984698af4 100644
--- a/dpcpp/test_dpcpp.dp.cpp
+++ b/dpcpp/test_dpcpp.dp.cpp
@@ -36,6 +36,6 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 int main()
 {
     // Use the queue property `in_order` which is DPC++ only
-    sycl::queue myQueue{sycl::property::queue::in_order{}};
+    ::sycl::queue myQueue{sycl::property::queue::in_order{}};
     return 0;
 }
diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp
index 7f91c08db0d..53a5e01ad83 100644
--- a/include/ginkgo/core/base/executor.hpp
+++ b/include/ginkgo/core/base/executor.hpp
@@ -1969,7 +1969,7 @@ class SyclExecutor : public detail::ExecutorBase<SyclExecutor>,
         return this->get_exec_info().device_id;
     }
 
-    sycl::queue* get_queue() const { return queue_.get(); }
+    ::sycl::queue* get_queue() const { return queue_.get(); }
 
     /**
      * Get the number of devices present on the system.
@@ -2088,7 +2088,7 @@ class SyclExecutor : public detail::ExecutorBase<SyclExecutor>,
 
     template <typename T>
     using queue_manager = std::unique_ptr<T, std::function<void(T*)>>;
-    queue_manager<sycl::queue> queue_;
+    queue_manager<::sycl::queue> queue_;
 };