diff --git a/benchmark/utils/dpcpp_linops.dp.cpp b/benchmark/utils/dpcpp_linops.dp.cpp index 1775035365c..efd48f0213f 100644 --- a/benchmark/utils/dpcpp_linops.dp.cpp +++ b/benchmark/utils/dpcpp_linops.dp.cpp @@ -65,9 +65,9 @@ class OnemklBase : public gko::LinOp { return this->mat_handle_.get(); } - std::shared_ptr get_device_exec() const + std::shared_ptr get_device_exec() const { - return std::dynamic_pointer_cast( + return std::dynamic_pointer_cast( this->get_executor()); } diff --git a/benchmark/utils/dpcpp_timer.dp.cpp b/benchmark/utils/dpcpp_timer.dp.cpp index 35f2dc8f3e5..ddd5fe7a698 100644 --- a/benchmark/utils/dpcpp_timer.dp.cpp +++ b/benchmark/utils/dpcpp_timer.dp.cpp @@ -40,25 +40,25 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. /** - * DpcppTimer uses dpcpp executor and event to measure the timing. + * SyclTimer uses sycl executor and event to measure the timing. */ -class DpcppTimer : public Timer { +class SyclTimer : public Timer { public: /** - * Create a DpcppTimer. + * Create a SyclTimer. * - * @param exec Executor which should be a DpcppExecutor + * @param exec Executor which should be a SyclExecutor */ - DpcppTimer(std::shared_ptr exec) - : DpcppTimer(std::dynamic_pointer_cast(exec)) + SyclTimer(std::shared_ptr exec) + : SyclTimer(std::dynamic_pointer_cast(exec)) {} /** - * Create a DpcppTimer. + * Create a SyclTimer. * - * @param exec DpcppExecutor associated to the timer + * @param exec SyclExecutor associated to the timer */ - DpcppTimer(std::shared_ptr exec) : Timer() + SyclTimer(std::shared_ptr exec) : Timer() { assert(exec != nullptr); if (!exec->get_queue() @@ -73,7 +73,7 @@ class DpcppTimer : public Timer { void tic_impl() override { exec_->synchronize(); - // Currently, gko::DpcppExecutor always use default stream. + // Currently, gko::SyclExecutor always use default stream. start_ = exec_->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for(1, [=](sycl::id<1> id) {}); }); @@ -96,14 +96,14 @@ class DpcppTimer : public Timer { } private: - std::shared_ptr exec_; + std::shared_ptr exec_; sycl::event start_; int id_; }; -std::shared_ptr get_dpcpp_timer( - std::shared_ptr exec) +std::shared_ptr get_sycl_timer( + std::shared_ptr exec) { - return std::make_shared(exec); + return std::make_shared(exec); } diff --git a/benchmark/utils/formats.hpp b/benchmark/utils/formats.hpp index 6b024b16d1c..a1cb07a6a0c 100644 --- a/benchmark/utils/formats.hpp +++ b/benchmark/utils/formats.hpp @@ -177,9 +177,8 @@ std::shared_ptr create_gpu_strategy( return std::make_shared(cuda->shared_from_this()); } else if (auto hip = dynamic_cast(exec.get())) { return std::make_shared(hip->shared_from_this()); - } else if (auto dpcpp = - dynamic_cast(exec.get())) { - return std::make_shared(dpcpp->shared_from_this()); + } else if (auto sycl = dynamic_cast(exec.get())) { + return std::make_shared(sycl->shared_from_this()); } else { return std::make_shared(); } diff --git a/benchmark/utils/general.hpp b/benchmark/utils/general.hpp index b7ec0e72cf1..e138f6f2107 100644 --- a/benchmark/utils/general.hpp +++ b/benchmark/utils/general.hpp @@ -389,13 +389,13 @@ const std::map(bool)>> gko::OmpExecutor::create(), create_hip_allocator()); }}, - {"dpcpp", [](bool use_gpu_timer) { - auto property = dpcpp_queue_property::in_order; + {"sycl", [](bool use_gpu_timer) { + auto property = gko::sycl_queue_property::in_order; if (use_gpu_timer) { - property = dpcpp_queue_property::in_order | - dpcpp_queue_property::enable_profiling; + property = gko::sycl_queue_property::in_order | + gko::sycl_queue_property::enable_profiling; } - return gko::DpcppExecutor::create( + return gko::SyclExecutor::create( FLAGS_device_id, gko::OmpExecutor::create(), "all", property); }}}; @@ -425,20 +425,20 @@ const std::map get_hip_timer( #ifdef HAS_DPCPP_TIMER -std::shared_ptr get_dpcpp_timer( - std::shared_ptr exec); +std::shared_ptr get_sycl_timer( + std::shared_ptr exec); #endif // HAS_DPCPP_TIMER @@ -278,13 +278,13 @@ inline std::shared_ptr get_timer( #endif // HAS_HIP_TIMER #ifdef HAS_DPCPP_TIMER - if (auto dpcpp = - std::dynamic_pointer_cast(exec)) { - return get_dpcpp_timer(dpcpp); + if (auto sycl = + std::dynamic_pointer_cast(exec)) { + return get_sycl_timer(sycl); } #endif // HAS_DPCPP_TIMER } - // No cuda/hip/dpcpp executor available or no gpu_timer used + // No cuda/hip/sycl executor available or no gpu_timer used return std::make_shared(exec); } diff --git a/cmake/create_test.cmake b/cmake/create_test.cmake index 054c683e5ee..e1bb60b8642 100644 --- a/cmake/create_test.cmake +++ b/cmake/create_test.cmake @@ -236,7 +236,7 @@ function(ginkgo_create_common_device_test test_name) cmake_parse_arguments(PARSE_ARGV 1 common_device_test "" "${gko_test_single_args}" "${gko_test_multi_args}") ginkgo_build_test_name(${test_name} test_target_name) if(GINKGO_BUILD_SYCL) - ginkgo_create_common_test_internal(${test_name} DpcppExecutor dpcpp ${ARGN}) + ginkgo_create_common_test_internal(${test_name} SyclExecutor dpcpp ${ARGN}) target_compile_features(${test_target_name}_dpcpp PRIVATE cxx_std_17) target_compile_options(${test_target_name}_dpcpp PRIVATE ${GINKGO_DPCPP_FLAGS}) # We need to use a new file to avoid sycl setting in other backends because add_sycl_to_target will change the source property. diff --git a/common/unified/base/kernel_launch.hpp b/common/unified/base/kernel_launch.hpp index 0f4ddf594b5..736b1999319 100644 --- a/common/unified/base/kernel_launch.hpp +++ b/common/unified/base/kernel_launch.hpp @@ -99,7 +99,7 @@ GKO_INLINE GKO_ATTRIBUTES constexpr unpack_member_type unpack_member(T value) #elif defined(GKO_COMPILING_DPCPP) -#define GKO_DEVICE_NAMESPACE dpcpp +#define GKO_DEVICE_NAMESPACE sycl #define GKO_KERNEL diff --git a/common/unified/distributed/partition_helpers_kernels.cpp b/common/unified/distributed/partition_helpers_kernels.cpp index 3c041dd7e4b..c8b2681638a 100644 --- a/common/unified/distributed/partition_helpers_kernels.cpp +++ b/common/unified/distributed/partition_helpers_kernels.cpp @@ -50,7 +50,7 @@ void check_consecutive_ranges(std::shared_ptr exec, { array result_uint32{exec, 1}; auto num_ranges = range_start_ends.get_num_elems() / 2; - // need additional guard because DPCPP doesn't return the initial value for + // need additional guard because SYCL doesn't return the initial value for // empty inputs if (num_ranges > 1) { run_kernel_reduction( diff --git a/core/base/executor.cpp b/core/base/executor.cpp index d49705221de..ac1f8d9a9d0 100644 --- a/core/base/executor.cpp +++ b/core/base/executor.cpp @@ -53,7 +53,7 @@ void Operation::run(std::shared_ptr executor) const GKO_NOT_IMPLEMENTED; -void Operation::run(std::shared_ptr executor) const +void Operation::run(std::shared_ptr executor) const GKO_NOT_IMPLEMENTED; diff --git a/core/base/noop_scoped_device_id_guard.hpp b/core/base/noop_scoped_device_id_guard.hpp index fc3bad581c6..af2d9a9f279 100644 --- a/core/base/noop_scoped_device_id_guard.hpp +++ b/core/base/noop_scoped_device_id_guard.hpp @@ -44,7 +44,7 @@ namespace detail { /** * An implementation of generic_scoped_device_id_guard that does nothing. * - * This is used for OmpExecutor and DpcppExecutor, since they don't require + * This is used for OmpExecutor and SyclExecutor, since they don't require * setting a device id. */ class noop_scoped_device_id_guard : public generic_scoped_device_id_guard {}; diff --git a/core/base/timer.cpp b/core/base/timer.cpp index bc1c7169b3d..a34b98ce306 100644 --- a/core/base/timer.cpp +++ b/core/base/timer.cpp @@ -65,8 +65,8 @@ time_point::~time_point() case type::hip: kernels::hip::destroy_event(data_.hip_event); break; - case type::dpcpp: - kernels::dpcpp::destroy_event(data_.dpcpp_event); + case type::sycl: + kernels::sycl::destroy_event(data_.sycl_event); break; case type::cpu: default: @@ -137,9 +137,9 @@ std::unique_ptr Timer::create_for_executor( } else if (auto hip_exec = std::dynamic_pointer_cast(exec)) { return std::make_unique(hip_exec); - } else if (auto dpcpp_exec = - std::dynamic_pointer_cast(exec)) { - return std::make_unique(dpcpp_exec); + } else if (auto sycl_exec = + std::dynamic_pointer_cast(exec)) { + return std::make_unique(sycl_exec); } else { return std::make_unique(); } diff --git a/core/base/version.cpp b/core/base/version.cpp index 534dbff488d..f2178cefbe4 100644 --- a/core/base/version.cpp +++ b/core/base/version.cpp @@ -66,8 +66,8 @@ std::ostream& operator<<(std::ostream& os, const version_info& ver_info) print_version(os, ver_info.cuda_version); os << "\n the HIP module is "; print_version(os, ver_info.hip_version); - os << "\n the DPCPP module is "; - print_version(os, ver_info.dpcpp_version); + os << "\n the SYCL module is "; + print_version(os, ver_info.sycl_version); return os; } diff --git a/core/device_hooks/cuda_hooks.cpp b/core/device_hooks/cuda_hooks.cpp index ff644a5f05f..93890ea3a83 100644 --- a/core/device_hooks/cuda_hooks.cpp +++ b/core/device_hooks/cuda_hooks.cpp @@ -165,7 +165,7 @@ void CudaExecutor::raw_copy_to(const HipExecutor*, size_type num_bytes, GKO_NOT_COMPILED(cuda); -void CudaExecutor::raw_copy_to(const DpcppExecutor*, size_type num_bytes, +void CudaExecutor::raw_copy_to(const SyclExecutor*, size_type num_bytes, const void* src_ptr, void* dest_ptr) const GKO_NOT_COMPILED(cuda); diff --git a/core/device_hooks/dpcpp_hooks.cpp b/core/device_hooks/dpcpp_hooks.cpp index efca9737725..465c5cf2215 100644 --- a/core/device_hooks/dpcpp_hooks.cpp +++ b/core/device_hooks/dpcpp_hooks.cpp @@ -45,7 +45,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { -version version_info::get_dpcpp_version() noexcept +version version_info::get_sycl_version() noexcept { // We just return the version with a special "not compiled" tag in // placeholder modules. @@ -53,27 +53,27 @@ version version_info::get_dpcpp_version() noexcept } -std::shared_ptr DpcppExecutor::create( +std::shared_ptr SyclExecutor::create( int device_id, std::shared_ptr master, std::string device_type, - dpcpp_queue_property property) + sycl_queue_property property) { - return std::shared_ptr( - new DpcppExecutor(device_id, std::move(master), device_type, property)); + return std::shared_ptr( + new SyclExecutor(device_id, std::move(master), device_type, property)); } -void DpcppExecutor::populate_exec_info(const machine_topology* mach_topo) +void SyclExecutor::populate_exec_info(const machine_topology* mach_topo) { // This method is always called, so cannot throw when not compiled. } -void OmpExecutor::raw_copy_to(const DpcppExecutor*, size_type num_bytes, +void OmpExecutor::raw_copy_to(const SyclExecutor*, size_type num_bytes, const void* src_ptr, void* dest_ptr) const - GKO_NOT_COMPILED(dpcpp); + GKO_NOT_COMPILED(sycl); -bool OmpExecutor::verify_memory_to(const DpcppExecutor* dest_exec) const +bool OmpExecutor::verify_memory_to(const SyclExecutor* dest_exec) const { // Dummy check auto dev_type = dest_exec->get_device_type(); @@ -81,7 +81,7 @@ bool OmpExecutor::verify_memory_to(const DpcppExecutor* dest_exec) const } -void DpcppExecutor::raw_free(void* ptr) const noexcept +void SyclExecutor::raw_free(void* ptr) const noexcept { // Free must never fail, as it can be called in destructors. // If the nvidia module was not compiled, the library couldn't have @@ -89,51 +89,50 @@ void DpcppExecutor::raw_free(void* ptr) const noexcept } -void* DpcppExecutor::raw_alloc(size_type num_bytes) const - GKO_NOT_COMPILED(dpcpp); +void* SyclExecutor::raw_alloc(size_type num_bytes) const GKO_NOT_COMPILED(sycl); -void DpcppExecutor::raw_copy_to(const OmpExecutor*, size_type num_bytes, - const void* src_ptr, void* dest_ptr) const - GKO_NOT_COMPILED(dpcpp); +void SyclExecutor::raw_copy_to(const OmpExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const + GKO_NOT_COMPILED(sycl); -void DpcppExecutor::raw_copy_to(const CudaExecutor*, size_type num_bytes, - const void* src_ptr, void* dest_ptr) const - GKO_NOT_COMPILED(dpcpp); +void SyclExecutor::raw_copy_to(const CudaExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const + GKO_NOT_COMPILED(sycl); -void DpcppExecutor::raw_copy_to(const HipExecutor*, size_type num_bytes, - const void* src_ptr, void* dest_ptr) const - GKO_NOT_COMPILED(dpcpp); +void SyclExecutor::raw_copy_to(const HipExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const + GKO_NOT_COMPILED(sycl); -void DpcppExecutor::raw_copy_to(const DpcppExecutor*, size_type num_bytes, - const void* src_ptr, void* dest_ptr) const - GKO_NOT_COMPILED(dpcpp); +void SyclExecutor::raw_copy_to(const SyclExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const + GKO_NOT_COMPILED(sycl); -void DpcppExecutor::synchronize() const GKO_NOT_COMPILED(dpcpp); +void SyclExecutor::synchronize() const GKO_NOT_COMPILED(sycl); -scoped_device_id_guard DpcppExecutor::get_scoped_device_id_guard() const - GKO_NOT_COMPILED(dpcpp); +scoped_device_id_guard SyclExecutor::get_scoped_device_id_guard() const + GKO_NOT_COMPILED(sycl); -int DpcppExecutor::get_num_devices(std::string) { return 0; } +int SyclExecutor::get_num_devices(std::string) { return 0; } -void DpcppExecutor::set_device_property(dpcpp_queue_property property) {} +void SyclExecutor::set_device_property(sycl_queue_property property) {} -bool DpcppExecutor::verify_memory_to(const OmpExecutor* dest_exec) const +bool SyclExecutor::verify_memory_to(const OmpExecutor* dest_exec) const { // Dummy check return this->get_device_type() == "cpu" || this->get_device_type() == "host"; } -bool DpcppExecutor::verify_memory_to(const DpcppExecutor* dest_exec) const +bool SyclExecutor::verify_memory_to(const SyclExecutor* dest_exec) const { // Dummy check return dest_exec->get_device_type() == this->get_device_type() && @@ -141,43 +140,43 @@ bool DpcppExecutor::verify_memory_to(const DpcppExecutor* dest_exec) const } -scoped_device_id_guard::scoped_device_id_guard(const DpcppExecutor* exec, +scoped_device_id_guard::scoped_device_id_guard(const SyclExecutor* exec, int device_id) - GKO_NOT_COMPILED(dpcpp); + GKO_NOT_COMPILED(sycl); namespace kernels { namespace sycl { -void destroy_event(sycl::event* event) GKO_NOT_COMPILED(dpcpp); +void destroy_event(sycl::event* event) GKO_NOT_COMPILED(sycl); } // namespace sycl } // namespace kernels -DpcppTimer::DpcppTimer(std::shared_ptr exec) - GKO_NOT_COMPILED(dpcpp); +SyclTimer::SyclTimer(std::shared_ptr exec) + GKO_NOT_COMPILED(sycl); -void DpcppTimer::init_time_point(time_point&) GKO_NOT_COMPILED(dpcpp); +void SyclTimer::init_time_point(time_point&) GKO_NOT_COMPILED(sycl); -void DpcppTimer::record(time_point&) GKO_NOT_COMPILED(dpcpp); +void SyclTimer::record(time_point&) GKO_NOT_COMPILED(sycl); -void DpcppTimer::wait(time_point& time) GKO_NOT_COMPILED(dpcpp); +void SyclTimer::wait(time_point& time) GKO_NOT_COMPILED(sycl); -std::chrono::nanoseconds DpcppTimer::difference_async(const time_point& start, - const time_point& stop) - GKO_NOT_COMPILED(dpcpp); +std::chrono::nanoseconds SyclTimer::difference_async(const time_point& start, + const time_point& stop) + GKO_NOT_COMPILED(sycl); } // namespace gko -#define GKO_HOOK_MODULE dpcpp +#define GKO_HOOK_MODULE sycl #include "core/device_hooks/common_kernels.inc.cpp" #undef GKO_HOOK_MODULE diff --git a/core/device_hooks/hip_hooks.cpp b/core/device_hooks/hip_hooks.cpp index 521b2590626..6908aa696ce 100644 --- a/core/device_hooks/hip_hooks.cpp +++ b/core/device_hooks/hip_hooks.cpp @@ -164,7 +164,7 @@ void HipExecutor::raw_copy_to(const HipExecutor*, size_type num_bytes, GKO_NOT_COMPILED(hip); -void HipExecutor::raw_copy_to(const DpcppExecutor*, size_type num_bytes, +void HipExecutor::raw_copy_to(const SyclExecutor*, size_type num_bytes, const void* src_ptr, void* dest_ptr) const GKO_NOT_COMPILED(hip); diff --git a/core/log/profiler_hook.cpp b/core/log/profiler_hook.cpp index 468f8aa83d3..29d150a9b0a 100644 --- a/core/log/profiler_hook.cpp +++ b/core/log/profiler_hook.cpp @@ -424,7 +424,7 @@ std::shared_ptr ProfilerHook::create_for_executor( return create_roctx(); } #endif - if (std::dynamic_pointer_cast(exec)) { + if (std::dynamic_pointer_cast(exec)) { return create_vtune(); } return create_tau(); diff --git a/core/solver/multigrid.cpp b/core/solver/multigrid.cpp index 303106fa4f6..4d06bdbaed3 100644 --- a/core/solver/multigrid.cpp +++ b/core/solver/multigrid.cpp @@ -564,8 +564,8 @@ void Multigrid::generate() // default coarse grid solver, direct LU // TODO: maybe remove fixed index type auto gen_default_solver = [&]() -> std::unique_ptr { - // TODO: unify when dpcpp supports direct solver - if (dynamic_cast(exec.get())) { + // TODO: unify when sycl supports direct solver + if (dynamic_cast(exec.get())) { using absolute_value_type = remove_complex; return solver::Gmres::build() .with_criteria( diff --git a/core/test/base/executor.cpp b/core/test/base/executor.cpp index a331d8f3485..9a3e0c4f01e 100644 --- a/core/test/base/executor.cpp +++ b/core/test/base/executor.cpp @@ -282,21 +282,21 @@ TEST(HipExecutor, KnowsItsDeviceId) } -TEST(DpcppExecutor, KnowsItsMaster) +TEST(SyclExecutor, KnowsItsMaster) { auto ref = gko::ReferenceExecutor::create(); - exec_ptr dpcpp = gko::DpcppExecutor::create(0, ref); + exec_ptr sycl = gko::SyclExecutor::create(0, ref); - ASSERT_EQ(ref, dpcpp->get_master()); + ASSERT_EQ(ref, sycl->get_master()); } -TEST(DpcppExecutor, KnowsItsDeviceId) +TEST(SyclExecutor, KnowsItsDeviceId) { auto ref = gko::ReferenceExecutor::create(); - auto dpcpp = gko::DpcppExecutor::create(0, ref); + auto sycl = gko::SyclExecutor::create(0, ref); - ASSERT_EQ(0, dpcpp->get_device_id()); + ASSERT_EQ(0, sycl->get_device_id()); } @@ -311,23 +311,23 @@ TEST(Executor, CanVerifyMemory) auto cuda2 = gko::CudaExecutor::create(0, ref); auto hip_1 = gko::HipExecutor::create(1, ref); auto cuda_1 = gko::CudaExecutor::create(1, ref); - std::shared_ptr host_dpcpp; - std::shared_ptr cpu_dpcpp; - std::shared_ptr gpu_dpcpp; - std::shared_ptr host_dpcpp_dup; - std::shared_ptr cpu_dpcpp_dup; - std::shared_ptr gpu_dpcpp_dup; - if (gko::DpcppExecutor::get_num_devices("host")) { - host_dpcpp = gko::DpcppExecutor::create(0, ref, "host"); - host_dpcpp_dup = gko::DpcppExecutor::create(0, ref, "host"); + std::shared_ptr host_sycl; + std::shared_ptr cpu_sycl; + std::shared_ptr gpu_sycl; + std::shared_ptr host_sycl_dup; + std::shared_ptr cpu_sycl_dup; + std::shared_ptr gpu_sycl_dup; + if (gko::SyclExecutor::get_num_devices("host")) { + host_sycl = gko::SyclExecutor::create(0, ref, "host"); + host_sycl_dup = gko::SyclExecutor::create(0, ref, "host"); } - if (gko::DpcppExecutor::get_num_devices("cpu")) { - cpu_dpcpp = gko::DpcppExecutor::create(0, ref, "cpu"); - cpu_dpcpp_dup = gko::DpcppExecutor::create(0, ref, "cpu"); + if (gko::SyclExecutor::get_num_devices("cpu")) { + cpu_sycl = gko::SyclExecutor::create(0, ref, "cpu"); + cpu_sycl_dup = gko::SyclExecutor::create(0, ref, "cpu"); } - if (gko::DpcppExecutor::get_num_devices("gpu")) { - gpu_dpcpp = gko::DpcppExecutor::create(0, ref, "gpu"); - gpu_dpcpp_dup = gko::DpcppExecutor::create(0, ref, "gpu"); + if (gko::SyclExecutor::get_num_devices("gpu")) { + gpu_sycl = gko::SyclExecutor::create(0, ref, "gpu"); + gpu_sycl_dup = gko::SyclExecutor::create(0, ref, "gpu"); } ASSERT_EQ(false, ref->memory_accessible(omp)); @@ -340,29 +340,29 @@ TEST(Executor, CanVerifyMemory) ASSERT_EQ(false, cuda->memory_accessible(ref)); ASSERT_EQ(false, omp->memory_accessible(cuda)); ASSERT_EQ(false, cuda->memory_accessible(omp)); - if (gko::DpcppExecutor::get_num_devices("host")) { - ASSERT_EQ(false, host_dpcpp->memory_accessible(ref)); - ASSERT_EQ(false, ref->memory_accessible(host_dpcpp)); - ASSERT_EQ(true, host_dpcpp->memory_accessible(omp)); - ASSERT_EQ(true, omp->memory_accessible(host_dpcpp)); - ASSERT_EQ(true, host_dpcpp->memory_accessible(host_dpcpp_dup)); - ASSERT_EQ(true, host_dpcpp_dup->memory_accessible(host_dpcpp)); + if (gko::SyclExecutor::get_num_devices("host")) { + ASSERT_EQ(false, host_sycl->memory_accessible(ref)); + ASSERT_EQ(false, ref->memory_accessible(host_sycl)); + ASSERT_EQ(true, host_sycl->memory_accessible(omp)); + ASSERT_EQ(true, omp->memory_accessible(host_sycl)); + ASSERT_EQ(true, host_sycl->memory_accessible(host_sycl_dup)); + ASSERT_EQ(true, host_sycl_dup->memory_accessible(host_sycl)); } - if (gko::DpcppExecutor::get_num_devices("cpu")) { - ASSERT_EQ(false, ref->memory_accessible(cpu_dpcpp)); - ASSERT_EQ(false, cpu_dpcpp->memory_accessible(ref)); - ASSERT_EQ(true, cpu_dpcpp->memory_accessible(omp)); - ASSERT_EQ(true, omp->memory_accessible(cpu_dpcpp)); - ASSERT_EQ(true, cpu_dpcpp->memory_accessible(cpu_dpcpp_dup)); - ASSERT_EQ(true, cpu_dpcpp_dup->memory_accessible(cpu_dpcpp)); + if (gko::SyclExecutor::get_num_devices("cpu")) { + ASSERT_EQ(false, ref->memory_accessible(cpu_sycl)); + ASSERT_EQ(false, cpu_sycl->memory_accessible(ref)); + ASSERT_EQ(true, cpu_sycl->memory_accessible(omp)); + ASSERT_EQ(true, omp->memory_accessible(cpu_sycl)); + ASSERT_EQ(true, cpu_sycl->memory_accessible(cpu_sycl_dup)); + ASSERT_EQ(true, cpu_sycl_dup->memory_accessible(cpu_sycl)); } - if (gko::DpcppExecutor::get_num_devices("gpu")) { - ASSERT_EQ(false, gpu_dpcpp->memory_accessible(ref)); - ASSERT_EQ(false, ref->memory_accessible(gpu_dpcpp)); - ASSERT_EQ(false, gpu_dpcpp->memory_accessible(omp)); - ASSERT_EQ(false, omp->memory_accessible(gpu_dpcpp)); - ASSERT_EQ(false, gpu_dpcpp->memory_accessible(gpu_dpcpp_dup)); - ASSERT_EQ(false, gpu_dpcpp_dup->memory_accessible(gpu_dpcpp)); + if (gko::SyclExecutor::get_num_devices("gpu")) { + ASSERT_EQ(false, gpu_sycl->memory_accessible(ref)); + ASSERT_EQ(false, ref->memory_accessible(gpu_sycl)); + ASSERT_EQ(false, gpu_sycl->memory_accessible(omp)); + ASSERT_EQ(false, omp->memory_accessible(gpu_sycl)); + ASSERT_EQ(false, gpu_sycl->memory_accessible(gpu_sycl_dup)); + ASSERT_EQ(false, gpu_sycl_dup->memory_accessible(gpu_sycl)); } #if GINKGO_HIP_PLATFORM_NVCC ASSERT_EQ(true, hip->memory_accessible(cuda)); @@ -534,7 +534,7 @@ class ExampleOperation : public gko::Operation { void run(std::shared_ptr) const override {} void run(std::shared_ptr) const override {} void run(std::shared_ptr) const override {} - void run(std::shared_ptr) const override {} + void run(std::shared_ptr) const override {} void run(std::shared_ptr) const override {} }; diff --git a/core/test/log/profiler_hook.cpp b/core/test/log/profiler_hook.cpp index 281eed2d70b..9fda18bfc19 100644 --- a/core/test/log/profiler_hook.cpp +++ b/core/test/log/profiler_hook.cpp @@ -71,7 +71,7 @@ class DummyOperation : public gko::Operation { void run(std::shared_ptr) const override {} - void run(std::shared_ptr) const override {} + void run(std::shared_ptr) const override {} void run(std::shared_ptr) const override {} diff --git a/cuda/base/executor.cpp b/cuda/base/executor.cpp index f296fb9da86..aae6e7d83cf 100644 --- a/cuda/base/executor.cpp +++ b/cuda/base/executor.cpp @@ -177,7 +177,7 @@ void CudaExecutor::raw_copy_to(const HipExecutor* dest, size_type num_bytes, } -void CudaExecutor::raw_copy_to(const DpcppExecutor* dest, size_type num_bytes, +void CudaExecutor::raw_copy_to(const SyclExecutor* dest, size_type num_bytes, const void* src_ptr, void* dest_ptr) const { GKO_NOT_SUPPORTED(dest); diff --git a/cuda/test/base/cuda_executor.cu b/cuda/test/base/cuda_executor.cu index 83cfd1827ad..b37367ec7ca 100644 --- a/cuda/test/base/cuda_executor.cu +++ b/cuda/test/base/cuda_executor.cu @@ -71,7 +71,7 @@ public: value = -3; } - void run(std::shared_ptr) const override + void run(std::shared_ptr) const override { value = -4; } diff --git a/devices/dpcpp/executor.cpp b/devices/dpcpp/executor.cpp index 663e2d43aa9..2992e7410db 100644 --- a/devices/dpcpp/executor.cpp +++ b/devices/dpcpp/executor.cpp @@ -44,13 +44,13 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { -std::shared_ptr DpcppExecutor::get_master() noexcept +std::shared_ptr SyclExecutor::get_master() noexcept { return master_; } -std::shared_ptr DpcppExecutor::get_master() const noexcept +std::shared_ptr SyclExecutor::get_master() const noexcept { return master_; } diff --git a/doc/headers/dpcpp_executor.hpp b/doc/headers/dpcpp_executor.hpp index 50e8d1b3e53..57c14cac4ac 100644 --- a/doc/headers/dpcpp_executor.hpp +++ b/doc/headers/dpcpp_executor.hpp @@ -31,9 +31,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. *************************************************************/ /** - * @defgroup exec_dpcpp DPC++ Executor + * @defgroup exec_sycl SYCL Executor * - * @brief A module dedicated to the implementation and usage of the DPC++ + * @brief A module dedicated to the implementation and usage of the SYCL * executor in Ginkgo. * * @ingroup Executor diff --git a/doc/headers/executors.hpp b/doc/headers/executors.hpp index 6a31010e2d0..72e311f4706 100644 --- a/doc/headers/executors.hpp +++ b/doc/headers/executors.hpp @@ -53,7 +53,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * operations executed on the NVIDIA GPU accelerator; * + @ref exec_hip uses the HIP library to compile code for either NVIDIA or * AMD GPU accelerator; - * + @ref exec_dpcpp uses the DPC++ compiler for any DPC++ supported hardware + * + @ref exec_sycl uses the SYCL compiler for any SYCL supported hardware * (e.g. Intel CPUs, GPU, FPGAs, ...); * + @ref exec_ref executes a non-optimized reference implementation, * which can be used to debug the library. diff --git a/doc/joss/figures/ginkgo-hierarchy.tex b/doc/joss/figures/ginkgo-hierarchy.tex index d72b7839e7d..185590fb368 100644 --- a/doc/joss/figures/ginkgo-hierarchy.tex +++ b/doc/joss/figures/ginkgo-hierarchy.tex @@ -352,7 +352,7 @@ }; \node (dpcpp) [abstract, rectangle, below=of cuda] { - \textbf{DpcppExecutor} + \textbf{SyclExecutor} }; \node (hip) [abstract, rectangle, below=of dpcpp] { diff --git a/dpcpp/base/batch_multi_vector_kernels.hpp.inc b/dpcpp/base/batch_multi_vector_kernels.hpp.inc index 22d00d780f9..75e1981d659 100644 --- a/dpcpp/base/batch_multi_vector_kernels.hpp.inc +++ b/dpcpp/base/batch_multi_vector_kernels.hpp.inc @@ -92,7 +92,7 @@ __dpct_inline__ void compute_gen_dot_product_kernel( y.values[r * y.stride + rhs_index]; } - val = ::gko::kernels::dpcpp::reduce( + val = ::gko::kernels::sycl::reduce( subg, val, [](ValueType a, ValueType b) { return a + b; }); if (subgroup.get_local_id() == 0) { @@ -126,7 +126,7 @@ __dpct_inline__ void compute_norm2_kernel( r += subgroup_size) val += squared_norm(x.values[r * x.stride + rhs_index]); - val = ::gko::kernels::dpcpp::reduce( + val = ::gko::kernels::sycl::reduce( subg, val, [](real_type a, real_type b) { return a + b; }); if (subgroup.get_local_id() == 0) { diff --git a/dpcpp/base/batch_struct.hpp b/dpcpp/base/batch_struct.hpp index 6637cef6930..0259a37a45b 100644 --- a/dpcpp/base/batch_struct.hpp +++ b/dpcpp/base/batch_struct.hpp @@ -50,7 +50,7 @@ namespace sycl { /** @file batch_struct.hpp * * Helper functions to generate a batch struct from a batch LinOp, - * while also shallow-casting to the required DPCPP scalar type. + * while also shallow-casting to the required SYCL scalar type. * * A specialization is needed for every format of every kind of linear algebra * object. These are intended to be called on the host. diff --git a/dpcpp/base/config.hpp b/dpcpp/base/config.hpp index 466ae9b1027..dd048abb8c1 100644 --- a/dpcpp/base/config.hpp +++ b/dpcpp/base/config.hpp @@ -54,7 +54,7 @@ struct config { using lane_mask_type = uint64; /** - * The number of threads within a Dpcpp subgroup. + * The number of threads within a SYCL subgroup. */ static constexpr uint32 warp_size = 32; @@ -70,7 +70,7 @@ struct config { static constexpr uint32 min_warps_per_block = 4; /** - * The default maximal number of threads allowed in DPCPP group + * The default maximal number of threads allowed in SYCL group */ static constexpr uint32 max_block_size = 256; }; diff --git a/dpcpp/base/executor.dp.cpp b/dpcpp/base/executor.dp.cpp index 6c1ff739de9..ff29d9f1d54 100644 --- a/dpcpp/base/executor.dp.cpp +++ b/dpcpp/base/executor.dp.cpp @@ -68,7 +68,7 @@ const std::vector get_devices(std::string device_type) } // namespace detail -void OmpExecutor::raw_copy_to(const DpcppExecutor* dest, size_type num_bytes, +void OmpExecutor::raw_copy_to(const SyclExecutor* dest, size_type num_bytes, const void* src_ptr, void* dest_ptr) const { if (num_bytes > 0) { @@ -77,7 +77,7 @@ void OmpExecutor::raw_copy_to(const DpcppExecutor* dest, size_type num_bytes, } -bool OmpExecutor::verify_memory_to(const DpcppExecutor* dest_exec) const +bool OmpExecutor::verify_memory_to(const SyclExecutor* dest_exec) const { auto device = detail::get_devices( dest_exec->get_device_type())[dest_exec->get_device_id()]; @@ -85,23 +85,23 @@ bool OmpExecutor::verify_memory_to(const DpcppExecutor* dest_exec) const } -std::shared_ptr DpcppExecutor::create( +std::shared_ptr SyclExecutor::create( int device_id, std::shared_ptr master, std::string device_type, - dpcpp_queue_property property) + sycl_queue_property property) { - return std::shared_ptr( - new DpcppExecutor(device_id, std::move(master), device_type, property)); + return std::shared_ptr( + new SyclExecutor(device_id, std::move(master), device_type, property)); } -void DpcppExecutor::populate_exec_info(const machine_topology* mach_topo) +void SyclExecutor::populate_exec_info(const machine_topology* mach_topo) { // Closest CPUs, NUMA node can be updated when there is a way to identify // the device itself, which is currently not available with DPC++. } -void DpcppExecutor::raw_free(void* ptr) const noexcept +void SyclExecutor::raw_free(void* ptr) const noexcept { // the free function may synchronize execution or not, which depends on // implementation or backend, so it is not guaranteed. @@ -112,7 +112,7 @@ void DpcppExecutor::raw_free(void* ptr) const noexcept } catch (sycl::exception& err) { #if GKO_VERBOSE_LEVEL >= 1 // Unfortunately, if memory free fails, there's not much we can do - std::cerr << "Unrecoverable Dpcpp error on device " + std::cerr << "Unrecoverable Sycl error on device " << this->get_device_id() << " in " << __func__ << ": " << err.what() << std::endl << "Exiting program" << std::endl; @@ -120,7 +120,7 @@ void DpcppExecutor::raw_free(void* ptr) const noexcept // OpenCL error code use 0 for CL_SUCCESS and negative number for others // error. if the error is not from OpenCL, it will return CL_SUCCESS. int err_code = err.get_cl_code(); - // if return CL_SUCCESS, exit 1 as DPCPP error. + // if return CL_SUCCESS, exit 1 as SYCL error. if (err_code == 0) { err_code = 1; } @@ -129,7 +129,7 @@ void DpcppExecutor::raw_free(void* ptr) const noexcept } -void* DpcppExecutor::raw_alloc(size_type num_bytes) const +void* SyclExecutor::raw_alloc(size_type num_bytes) const { void* dev_ptr = sycl::malloc_device(num_bytes, *queue_.get()); GKO_ENSURE_ALLOCATED(dev_ptr, "DPC++", num_bytes); @@ -137,8 +137,8 @@ void* DpcppExecutor::raw_alloc(size_type num_bytes) const } -void DpcppExecutor::raw_copy_to(const OmpExecutor*, size_type num_bytes, - const void* src_ptr, void* dest_ptr) const +void SyclExecutor::raw_copy_to(const OmpExecutor*, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const { if (num_bytes > 0) { queue_->memcpy(dest_ptr, src_ptr, num_bytes).wait(); @@ -146,8 +146,8 @@ void DpcppExecutor::raw_copy_to(const OmpExecutor*, size_type num_bytes, } -void DpcppExecutor::raw_copy_to(const CudaExecutor* dest, size_type num_bytes, - const void* src_ptr, void* dest_ptr) const +void SyclExecutor::raw_copy_to(const CudaExecutor* dest, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const { // TODO: later when possible, if we have DPC++ with a CUDA backend // support/compiler, we could maybe support native copies? @@ -155,15 +155,15 @@ void DpcppExecutor::raw_copy_to(const CudaExecutor* dest, size_type num_bytes, } -void DpcppExecutor::raw_copy_to(const HipExecutor* dest, size_type num_bytes, - const void* src_ptr, void* dest_ptr) const +void SyclExecutor::raw_copy_to(const HipExecutor* dest, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const { GKO_NOT_SUPPORTED(dest); } -void DpcppExecutor::raw_copy_to(const DpcppExecutor* dest, size_type num_bytes, - const void* src_ptr, void* dest_ptr) const +void SyclExecutor::raw_copy_to(const SyclExecutor* dest, size_type num_bytes, + const void* src_ptr, void* dest_ptr) const { if (num_bytes > 0) { // If the queue is different and is not cpu/host, the queue can not @@ -185,28 +185,28 @@ void DpcppExecutor::raw_copy_to(const DpcppExecutor* dest, size_type num_bytes, } -void DpcppExecutor::synchronize() const { queue_->wait_and_throw(); } +void SyclExecutor::synchronize() const { queue_->wait_and_throw(); } -scoped_device_id_guard DpcppExecutor::get_scoped_device_id_guard() const +scoped_device_id_guard SyclExecutor::get_scoped_device_id_guard() const { return {this, this->get_device_id()}; } -int DpcppExecutor::get_num_devices(std::string device_type) +int SyclExecutor::get_num_devices(std::string device_type) { return detail::get_devices(device_type).size(); } -bool DpcppExecutor::verify_memory_to(const OmpExecutor* dest_exec) const +bool SyclExecutor::verify_memory_to(const OmpExecutor* dest_exec) const { auto device = detail::get_devices( get_exec_info().device_type)[get_exec_info().device_id]; return device.is_host() || device.is_cpu(); } -bool DpcppExecutor::verify_memory_to(const DpcppExecutor* dest_exec) const +bool SyclExecutor::verify_memory_to(const SyclExecutor* dest_exec) const { // If the queue is different and is not cpu/host, the queue can not access // the data from another queue (on the same device) @@ -231,12 +231,12 @@ void delete_queue(sycl::queue* queue) } -sycl::property_list get_property_list(dpcpp_queue_property property) +sycl::property_list get_property_list(sycl_queue_property property) { - if (property == dpcpp_queue_property::in_order) { + if (property == sycl_queue_property::in_order) { return {sycl::property::queue::in_order{}}; - } else if (property == (dpcpp_queue_property::in_order | - dpcpp_queue_property::enable_profiling)) { + } else if (property == (sycl_queue_property::in_order | + sycl_queue_property::enable_profiling)) { return {sycl::property::queue::in_order{}, sycl::property::queue::enable_profiling{}}; } else { @@ -248,10 +248,10 @@ sycl::property_list get_property_list(dpcpp_queue_property property) } // namespace detail -void DpcppExecutor::set_device_property(dpcpp_queue_property property) +void SyclExecutor::set_device_property(sycl_queue_property property) { assert(this->get_exec_info().device_id < - DpcppExecutor::get_num_devices(this->get_exec_info().device_type)); + SyclExecutor::get_num_devices(this->get_exec_info().device_type)); auto device = detail::get_devices( this->get_exec_info().device_type)[this->get_exec_info().device_id]; if (!device.is_host()) { diff --git a/dpcpp/base/index_set_kernels.dp.cpp b/dpcpp/base/index_set_kernels.dp.cpp index c9dd46a7824..08536242f56 100644 --- a/dpcpp/base/index_set_kernels.dp.cpp +++ b/dpcpp/base/index_set_kernels.dp.cpp @@ -44,9 +44,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { namespace kernels { /** - * @brief The Dpcpp namespace. + * @brief The Sycl namespace. * - * @ingroup dpcpp + * @ingroup sycl */ namespace sycl { /** diff --git a/dpcpp/base/kernel_launch.dp.hpp b/dpcpp/base/kernel_launch.dp.hpp index 0a179c0b382..744f086dbc8 100644 --- a/dpcpp/base/kernel_launch.dp.hpp +++ b/dpcpp/base/kernel_launch.dp.hpp @@ -76,7 +76,7 @@ void generic_kernel_2d(sycl::handler& cgh, int64 rows, int64 cols, template -void run_kernel(std::shared_ptr exec, KernelFunction fn, +void run_kernel(std::shared_ptr exec, KernelFunction fn, size_type size, KernelArgs&&... args) { exec->get_queue()->submit([&](sycl::handler& cgh) { @@ -86,7 +86,7 @@ void run_kernel(std::shared_ptr exec, KernelFunction fn, } template -void run_kernel(std::shared_ptr exec, KernelFunction fn, +void run_kernel(std::shared_ptr exec, KernelFunction fn, dim<2> size, KernelArgs&&... args) { exec->get_queue()->submit([&](sycl::handler& cgh) { diff --git a/dpcpp/base/kernel_launch_reduction.dp.hpp b/dpcpp/base/kernel_launch_reduction.dp.hpp index f9fa63f1ac2..713fc02a34f 100644 --- a/dpcpp/base/kernel_launch_reduction.dp.hpp +++ b/dpcpp/base/kernel_launch_reduction.dp.hpp @@ -87,7 +87,7 @@ void generic_kernel_reduction_1d(sycl::handler& cgh, int64 size, for (int64 i = tidx; i < size; i += global_size) { partial = op(partial, fn(i, args...)); } - partial = ::gko::kernels::dpcpp::reduce(subgroup, partial, op); + partial = ::gko::kernels::sycl::reduce(subgroup, partial, op); if (subgroup.thread_rank() == 0) { subgroup_partial[local_tidx / sg_size] = partial; } @@ -97,7 +97,7 @@ void generic_kernel_reduction_1d(sycl::handler& cgh, int64 size, for (int64 i = local_tidx; i < num_partials; i += sg_size) { partial = op(partial, subgroup_partial[i]); } - partial = ::gko::kernels::dpcpp::reduce(subgroup, partial, op); + partial = ::gko::kernels::sycl::reduce(subgroup, partial, op); if (subgroup.thread_rank() == 0) { storage[tidx / wg_size] = finalize(partial); } @@ -138,7 +138,7 @@ void generic_kernel_reduction_2d(sycl::handler& cgh, int64 rows, int64 cols, const auto col = i % cols; partial = op(partial, fn(row, col, args...)); } - partial = ::gko::kernels::dpcpp::reduce(subgroup, partial, op); + partial = ::gko::kernels::sycl::reduce(subgroup, partial, op); if (subgroup.thread_rank() == 0) { subgroup_partial[local_tidx / sg_size] = partial; } @@ -148,7 +148,7 @@ void generic_kernel_reduction_2d(sycl::handler& cgh, int64 rows, int64 cols, for (int64 i = local_tidx; i < num_partials; i += sg_size) { partial = op(partial, subgroup_partial[i]); } - partial = ::gko::kernels::dpcpp::reduce(subgroup, partial, op); + partial = ::gko::kernels::sycl::reduce(subgroup, partial, op); if (subgroup.thread_rank() == 0) { storage[tidx / wg_size] = finalize(partial); } @@ -160,7 +160,7 @@ void generic_kernel_reduction_2d(sycl::handler& cgh, int64 rows, int64 cols, template -void run_kernel_reduction_impl(std::shared_ptr exec, +void run_kernel_reduction_impl(std::shared_ptr exec, KernelFunction fn, ReductionOp op, FinalizeOp finalize, ValueType identity, ValueType* result, size_type size, @@ -204,7 +204,7 @@ void run_kernel_reduction_impl(std::shared_ptr exec, template -void run_kernel_reduction_impl(std::shared_ptr exec, +void run_kernel_reduction_impl(std::shared_ptr exec, KernelFunction fn, ReductionOp op, FinalizeOp finalize, ValueType identity, ValueType* result, dim<2> size, array& tmp, @@ -254,7 +254,7 @@ GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION_TOTYPE(select_run_kernel_reduction, template -void run_kernel_reduction_cached(std::shared_ptr exec, +void run_kernel_reduction_cached(std::shared_ptr exec, KernelFunction fn, ReductionOp op, FinalizeOp finalize, ValueType identity, ValueType* result, dim<2> size, @@ -276,7 +276,7 @@ void run_kernel_reduction_cached(std::shared_ptr exec, template -void run_kernel_reduction_cached(std::shared_ptr exec, +void run_kernel_reduction_cached(std::shared_ptr exec, KernelFunction fn, ReductionOp op, FinalizeOp finalize, ValueType identity, ValueType* result, size_type size, @@ -303,7 +303,7 @@ template void generic_kernel_row_reduction_2d(syn::value_list, - std::shared_ptr exec, + std::shared_ptr exec, int64 rows, int64 cols, int64 col_blocks, KernelFunction fn, ReductionOp op, FinalizeOp finalize, ValueType identity, @@ -503,7 +503,7 @@ template void run_generic_col_reduction_small(syn::value_list, - std::shared_ptr exec, + std::shared_ptr exec, int64 max_workgroups, KernelFunction fn, ReductionOp op, FinalizeOp finalize, ValueType identity, ValueType* result, @@ -551,7 +551,7 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_generic_col_reduction_small, template -void run_kernel_row_reduction_stage1(std::shared_ptr exec, +void run_kernel_row_reduction_stage1(std::shared_ptr exec, KernelFunction fn, ReductionOp op, FinalizeOp finalize, ValueType identity, ValueType* result, size_type result_stride, @@ -606,7 +606,7 @@ GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION_TOTYPE( template -void run_kernel_col_reduction_stage1(std::shared_ptr exec, +void run_kernel_col_reduction_stage1(std::shared_ptr exec, KernelFunction fn, ReductionOp op, FinalizeOp finalize, ValueType identity, ValueType* result, dim<2> size, @@ -674,7 +674,7 @@ GKO_ENABLE_IMPLEMENTATION_CONFIG_SELECTION_TOTYPE( template -void run_kernel_row_reduction_cached(std::shared_ptr exec, +void run_kernel_row_reduction_cached(std::shared_ptr exec, KernelFunction fn, ReductionOp op, FinalizeOp finalize, ValueType identity, ValueType* result, size_type result_stride, @@ -698,7 +698,7 @@ void run_kernel_row_reduction_cached(std::shared_ptr exec, template -void run_kernel_col_reduction_cached(std::shared_ptr exec, +void run_kernel_col_reduction_cached(std::shared_ptr exec, KernelFunction fn, ReductionOp op, FinalizeOp finalize, ValueType identity, ValueType* result, dim<2> size, diff --git a/dpcpp/base/kernel_launch_solver.dp.hpp b/dpcpp/base/kernel_launch_solver.dp.hpp index 95bf7eb5da5..2c41fd5c3e6 100644 --- a/dpcpp/base/kernel_launch_solver.dp.hpp +++ b/dpcpp/base/kernel_launch_solver.dp.hpp @@ -58,15 +58,15 @@ void generic_kernel_2d_solver(sycl::handler& cgh, int64 rows, int64 cols, template -void run_kernel_solver(std::shared_ptr exec, +void run_kernel_solver(std::shared_ptr exec, KernelFunction fn, dim<2> size, size_type default_stride, KernelArgs&&... args) { exec->get_queue()->submit([&](sycl::handler& cgh) { - kernels::dpcpp::generic_kernel_2d_solver( + kernels::sycl::generic_kernel_2d_solver( cgh, static_cast(size[0]), static_cast(size[1]), static_cast(default_stride), fn, - kernels::dpcpp::map_to_device(args)...); + kernels::sycl::map_to_device(args)...); }); } diff --git a/dpcpp/base/onedpl.hpp b/dpcpp/base/onedpl.hpp index db4052ee9b0..32ca1ee3303 100644 --- a/dpcpp/base/onedpl.hpp +++ b/dpcpp/base/onedpl.hpp @@ -47,7 +47,7 @@ namespace kernels { namespace sycl { -inline auto onedpl_policy(std::shared_ptr exec) +inline auto onedpl_policy(std::shared_ptr exec) { return oneapi::dpl::execution::make_device_policy(*exec->get_queue()); } diff --git a/dpcpp/base/scoped_device_id.dp.cpp b/dpcpp/base/scoped_device_id.dp.cpp index c7cb149f6ba..8aa67cdaf23 100644 --- a/dpcpp/base/scoped_device_id.dp.cpp +++ b/dpcpp/base/scoped_device_id.dp.cpp @@ -40,7 +40,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { -scoped_device_id_guard::scoped_device_id_guard(const DpcppExecutor* exec, +scoped_device_id_guard::scoped_device_id_guard(const SyclExecutor* exec, int device_id) : scope_(std::make_unique()) {} diff --git a/dpcpp/base/timer.dp.cpp b/dpcpp/base/timer.dp.cpp index 63dcd2e9dac..874bb82dba2 100644 --- a/dpcpp/base/timer.dp.cpp +++ b/dpcpp/base/timer.dp.cpp @@ -42,7 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { -DpcppTimer::DpcppTimer(std::shared_ptr exec) +SyclTimer::SyclTimer(std::shared_ptr exec) : exec_{std::move(exec)} { if (!exec_->get_queue() @@ -53,41 +53,41 @@ DpcppTimer::DpcppTimer(std::shared_ptr exec) } -void DpcppTimer::init_time_point(time_point& time) +void SyclTimer::init_time_point(time_point& time) { - time.type_ = time_point::type::dpcpp; - time.data_.dpcpp_event = new sycl::event{}; + time.type_ = time_point::type::sycl; + time.data_.sycl_event = new sycl::event{}; } -void DpcppTimer::record(time_point& time) +void SyclTimer::record(time_point& time) { - GKO_ASSERT(time.type_ == time_point::type::dpcpp); - *time.data_.dpcpp_event = + GKO_ASSERT(time.type_ == time_point::type::sycl); + *time.data_.sycl_event = exec_->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for(1, [=](sycl::id<1> id) {}); }); } -void DpcppTimer::wait(time_point& time) +void SyclTimer::wait(time_point& time) { - GKO_ASSERT(time.type_ == time_point::type::dpcpp); - time.data_.dpcpp_event->wait_and_throw(); + GKO_ASSERT(time.type_ == time_point::type::sycl); + time.data_.sycl_event->wait_and_throw(); } -std::chrono::nanoseconds DpcppTimer::difference_async(const time_point& start, - const time_point& stop) +std::chrono::nanoseconds SyclTimer::difference_async(const time_point& start, + const time_point& stop) { - GKO_ASSERT(start.type_ == time_point::type::dpcpp); - GKO_ASSERT(stop.type_ == time_point::type::dpcpp); - stop.data_.dpcpp_event->wait_and_throw(); + GKO_ASSERT(start.type_ == time_point::type::sycl); + GKO_ASSERT(stop.type_ == time_point::type::sycl); + stop.data_.sycl_event->wait_and_throw(); auto stop_time = - stop.data_.dpcpp_event + stop.data_.sycl_event ->get_profiling_info(); auto start_time = - start.data_.dpcpp_event + start.data_.sycl_event ->get_profiling_info(); return std::chrono::nanoseconds{static_cast(stop_time - start_time)}; } diff --git a/dpcpp/base/version.dp.cpp b/dpcpp/base/version.dp.cpp index ca3399c1f02..604401798de 100644 --- a/dpcpp/base/version.dp.cpp +++ b/dpcpp/base/version.dp.cpp @@ -36,7 +36,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { -version version_info::get_dpcpp_version() noexcept +version version_info::get_sycl_version() noexcept { // When compiling the module, the header version is the same as the library // version. Mismatch between the header and the module versions may happen diff --git a/dpcpp/components/cooperative_groups.dp.hpp b/dpcpp/components/cooperative_groups.dp.hpp index f8ae4039e00..435419bbdf4 100644 --- a/dpcpp/components/cooperative_groups.dp.hpp +++ b/dpcpp/components/cooperative_groups.dp.hpp @@ -52,7 +52,7 @@ namespace sycl { /** * Ginkgo uses cooperative groups to handle communication among the threads. * - * However, DPCPP's implementation of cooperative groups is still quite limited + * However, SYCL's implementation of cooperative groups is still quite limited * in functionality, and some parts are not supported on all hardware * interesting for Ginkgo. For this reason, Ginkgo exposes only a part of the * original functionality, and possibly extends it if it is required. Thus, @@ -61,7 +61,7 @@ namespace sycl { * by Ginkgo's implementation is equivalent to the standard interface, with some * useful extensions. * - * A cooperative group (both from standard DPCPP and from Ginkgo) is not a + * A cooperative group (both from standard SYCL and from Ginkgo) is not a * specific type, but a concept. That is, any type satisfying the interface * imposed by the cooperative groups API is considered a cooperative * group (a.k.a. "duck typing"). To maximize the generality of components that @@ -74,7 +74,7 @@ namespace sycl { * Instead, use the thread_rank() method of the group to distinguish between * distinct threads of a group. * - * The original DPCPP implementation does not provide ways to verify if a + * The original SYCL implementation does not provide ways to verify if a * certain type represents a cooperative group. Ginkgo's implementation provides * metafunctions which do that. Additionally, not all cooperative groups have * equivalent functionality, so Ginkgo splits the cooperative group concept into @@ -113,7 +113,7 @@ namespace sycl { * to existing cooperative groups, or create new groups if the existing * groups do not cover your use-case. For an example, see the * enable_extended_shuffle mixin, which adds extended shuffles support - * to built-in DPCPP cooperative groups. + * to built-in SYCL cooperative groups. */ namespace group { @@ -165,7 +165,7 @@ namespace detail { /** - * This is a limited implementation of the DPCPP thread_block_tile. + * This is a limited implementation of the SYCL thread_block_tile. */ template class thread_block_tile : public sycl::sub_group { @@ -435,7 +435,7 @@ struct is_synchronizable_group_impl : std::true_type {}; /** - * This is a limited implementation of the DPCPP grid_group that works even on + * This is a limited implementation of the SYCL grid_group that works even on * devices that do not support device-wide synchronization and without special * kernel launch syntax. * @@ -489,7 +489,7 @@ namespace detail { template struct is_sub_group< - ::gko::kernels::dpcpp::group::detail::thread_block_tile> + ::gko::kernels::sycl::group::detail::thread_block_tile> : std::true_type {}; @@ -501,7 +501,7 @@ struct group_scope; template struct group_scope< - ::gko::kernels::dpcpp::group::detail::thread_block_tile> { + ::gko::kernels::sycl::group::detail::thread_block_tile> { static constexpr __spv::Scope::Flag value = __spv::Scope::Flag::Subgroup; }; diff --git a/dpcpp/components/format_conversion.dp.hpp b/dpcpp/components/format_conversion.dp.hpp index 31f17ead068..9ba7b983403 100644 --- a/dpcpp/components/format_conversion.dp.hpp +++ b/dpcpp/components/format_conversion.dp.hpp @@ -68,10 +68,10 @@ namespace host_kernel { * architecture and the number of stored elements. */ template -size_type calculate_nwarps(std::shared_ptr exec, +size_type calculate_nwarps(std::shared_ptr exec, const size_type nnz) { - size_type nsgs_in_dpcpp = exec->get_num_subgroups(); + size_type nsgs_in_sycl = exec->get_num_subgroups(); size_type multiple = 8; if (nnz >= 2e8) { multiple = 256; @@ -83,7 +83,7 @@ size_type calculate_nwarps(std::shared_ptr exec, multiple = _tuned_value; } #endif // GINKGO_BENCHMARK_ENABLE_TUNING - return std::min(multiple * nsgs_in_dpcpp, + return std::min(multiple * nsgs_in_sycl, size_type(ceildiv(nnz, subgroup_size))); } diff --git a/dpcpp/components/prefix_sum_kernels.dp.cpp b/dpcpp/components/prefix_sum_kernels.dp.cpp index 01f6e62c283..b832cfdae83 100644 --- a/dpcpp/components/prefix_sum_kernels.dp.cpp +++ b/dpcpp/components/prefix_sum_kernels.dp.cpp @@ -66,7 +66,7 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(finalize_prefix_sum_call, finalize_prefix_sum, template -void prefix_sum_nonnegative(std::shared_ptr exec, +void prefix_sum_nonnegative(std::shared_ptr exec, IndexType* counts, size_type num_entries) { // prefix_sum should only be performed on a valid array diff --git a/dpcpp/components/reduction.dp.hpp b/dpcpp/components/reduction.dp.hpp index 1b25f77186f..a0f4d3e8d6c 100644 --- a/dpcpp/components/reduction.dp.hpp +++ b/dpcpp/components/reduction.dp.hpp @@ -110,7 +110,7 @@ __dpct_inline__ int choose_pivot(const Group& group, ValueType local_data, { using real = remove_complex; real lmag = is_pivoted ? -one() : abs(local_data); - const auto pivot = ::gko::kernels::dpcpp::reduce( + const auto pivot = ::gko::kernels::sycl::reduce( group, group.thread_rank(), [&](int lidx, int ridx) { const auto rmag = group.shfl(lmag, ridx); if (rmag > lmag) { @@ -155,8 +155,8 @@ void reduce(const Group& __restrict__ group, ValueType* __restrict__ data, if (warp_id > 0) { return; } - auto result = ::gko::kernels::dpcpp::reduce(warp, data[warp.thread_rank()], - reduce_op); + auto result = + ::gko::kernels::sycl::reduce(warp, data[warp.thread_rank()], reduce_op); if (warp.thread_rank() == 0) { data[0] = result; } @@ -252,7 +252,7 @@ GKO_ENABLE_DEFAULT_CONFIG_CALL(reduce_add_array_call, reduce_add_array_config, * @return the reduction result */ template -ValueType reduce_add_array(std::shared_ptr exec, +ValueType reduce_add_array(std::shared_ptr exec, size_type size, const ValueType* source) { auto block_results_val = source; diff --git a/dpcpp/components/thread_ids.dp.hpp b/dpcpp/components/thread_ids.dp.hpp index 358b7b495f8..a5724d2f5d0 100644 --- a/dpcpp/components/thread_ids.dp.hpp +++ b/dpcpp/components/thread_ids.dp.hpp @@ -45,16 +45,16 @@ namespace gko { namespace kernels { namespace sycl { /** - * @brief The DPCPP thread namespace. + * @brief The SYCL thread namespace. * - * @ingroup dpcpp_thread + * @ingroup sycl_thread */ namespace thread { // TODO: porting - need to refine functions and their name in this file // the grid/block description uses the cuda dim3 to represent. i.e. using dim3 -// to launch dpcpp kernel, the kernel will reverse the ordering to keep the same +// to launch sycl kernel, the kernel will reverse the ordering to keep the same // linear memory usage as cuda. @@ -129,7 +129,7 @@ __dpct_inline__ size_type get_local_warp_id(sycl::nd_item<3> item_ct1) template __dpct_inline__ size_type get_local_subwarp_id(sycl::nd_item<3> item_ct1) { - // dpcpp does not have subwarp. + // sycl does not have subwarp. constexpr auto subwarps_per_warp = subwarp_size / subwarp_size; return get_local_warp_id(item_ct1) * subwarps_per_warp + item_ct1.get_local_id(1); @@ -201,7 +201,7 @@ __dpct_inline__ size_type get_warp_id(sycl::nd_item<3> item_ct1) template __dpct_inline__ size_type get_subwarp_id(sycl::nd_item<3> item_ct1) { - // dpcpp does not have subwarp + // sycl does not have subwarp constexpr auto subwarps_per_warp = subwarp_size / subwarp_size; return get_warp_id(item_ct1) * subwarps_per_warp + item_ct1.get_local_id(1); diff --git a/dpcpp/components/uninitialized_array.hpp b/dpcpp/components/uninitialized_array.hpp index a44d61e275b..7326984780f 100644 --- a/dpcpp/components/uninitialized_array.hpp +++ b/dpcpp/components/uninitialized_array.hpp @@ -107,8 +107,8 @@ class uninitialized_array { } private: - // if dpcpp uses char to represent data in char, compiling gives error. - // Thanksfully, dpcpp support complex data allocation directly. + // if sycl uses char to represent data in char, compiling gives error. + // Thanksfully, sycl support complex data allocation directly. ValueType data_[size]; }; diff --git a/dpcpp/components/warp_blas.dp.hpp b/dpcpp/components/warp_blas.dp.hpp index cbe2ba8623b..496994dc920 100644 --- a/dpcpp/components/warp_blas.dp.hpp +++ b/dpcpp/components/warp_blas.dp.hpp @@ -348,7 +348,7 @@ __dpct_inline__ void multiply_transposed_vec( mtx_elem = static_cast(mtx_row[i * mtx_increment]); } - const auto out = ::gko::kernels::dpcpp::reduce( + const auto out = ::gko::kernels::sycl::reduce( group, mtx_elem * vec, [](VectorValueType x, VectorValueType y) { return x + y; }); if (group.thread_rank() == 0) { @@ -460,7 +460,7 @@ __dpct_inline__ remove_complex compute_infinity_norm( } } } - return ::gko::kernels::dpcpp::reduce( + return ::gko::kernels::sycl::reduce( group, sum, [](result_type x, result_type y) { return max(x, y); }); } diff --git a/dpcpp/factorization/factorization_kernels.dp.cpp b/dpcpp/factorization/factorization_kernels.dp.cpp index 292cf13917a..c771a73cb29 100644 --- a/dpcpp/factorization/factorization_kernels.dp.cpp +++ b/dpcpp/factorization/factorization_kernels.dp.cpp @@ -508,7 +508,7 @@ void initialize_l(dim3 grid, dim3 block, size_type dynamic_shared_memory, template -void add_diagonal_elements(std::shared_ptr exec, +void add_diagonal_elements(std::shared_ptr exec, matrix::Csr* mtx, bool is_sorted) { @@ -526,10 +526,10 @@ void add_diagonal_elements(std::shared_ptr exec, array needs_change_device{exec, 1}; needs_change_device = needs_change_host; - auto dpcpp_old_values = mtx->get_const_values(); - auto dpcpp_old_col_idxs = mtx->get_const_col_idxs(); - auto dpcpp_old_row_ptrs = mtx->get_row_ptrs(); - auto dpcpp_row_ptrs_add = row_ptrs_addition.get_data(); + auto sycl_old_values = mtx->get_const_values(); + auto sycl_old_col_idxs = mtx->get_const_col_idxs(); + auto sycl_old_row_ptrs = mtx->get_row_ptrs(); + auto sycl_row_ptrs_add = row_ptrs_addition.get_data(); const dim3 block_dim{default_block_size, 1, 1}; const dim3 grid_dim{ @@ -538,12 +538,12 @@ void add_diagonal_elements(std::shared_ptr exec, if (is_sorted) { kernel::find_missing_diagonal_elements( grid_dim, block_dim, 0, exec->get_queue(), num_rows, num_cols, - dpcpp_old_col_idxs, dpcpp_old_row_ptrs, dpcpp_row_ptrs_add, + sycl_old_col_idxs, sycl_old_row_ptrs, sycl_row_ptrs_add, needs_change_device.get_data()); } else { kernel::find_missing_diagonal_elements( grid_dim, block_dim, 0, exec->get_queue(), num_rows, num_cols, - dpcpp_old_col_idxs, dpcpp_old_row_ptrs, dpcpp_row_ptrs_add, + sycl_old_col_idxs, sycl_old_row_ptrs, sycl_row_ptrs_add, needs_change_device.get_data()); } needs_change_host = needs_change_device; @@ -551,30 +551,30 @@ void add_diagonal_elements(std::shared_ptr exec, return; } - components::prefix_sum_nonnegative(exec, dpcpp_row_ptrs_add, row_ptrs_size); + components::prefix_sum_nonnegative(exec, sycl_row_ptrs_add, row_ptrs_size); exec->synchronize(); auto total_additions = - exec->copy_val_to_host(dpcpp_row_ptrs_add + row_ptrs_size - 1); + exec->copy_val_to_host(sycl_row_ptrs_add + row_ptrs_size - 1); size_type new_num_elems = static_cast(total_additions) + mtx->get_num_stored_elements(); array new_values{exec, new_num_elems}; array new_col_idxs{exec, new_num_elems}; - auto dpcpp_new_values = new_values.get_data(); - auto dpcpp_new_col_idxs = new_col_idxs.get_data(); + auto sycl_new_values = new_values.get_data(); + auto sycl_new_col_idxs = new_col_idxs.get_data(); kernel::add_missing_diagonal_elements( - grid_dim, block_dim, 0, exec->get_queue(), num_rows, dpcpp_old_values, - dpcpp_old_col_idxs, dpcpp_old_row_ptrs, dpcpp_new_values, - dpcpp_new_col_idxs, dpcpp_row_ptrs_add); + grid_dim, block_dim, 0, exec->get_queue(), num_rows, sycl_old_values, + sycl_old_col_idxs, sycl_old_row_ptrs, sycl_new_values, + sycl_new_col_idxs, sycl_row_ptrs_add); const dim3 grid_dim_row_ptrs_update{ static_cast(ceildiv(num_rows, block_dim.x)), 1, 1}; kernel::update_row_ptrs(grid_dim_row_ptrs_update, block_dim, 0, - exec->get_queue(), num_rows + 1, dpcpp_old_row_ptrs, - dpcpp_row_ptrs_add); + exec->get_queue(), num_rows + 1, sycl_old_row_ptrs, + sycl_row_ptrs_add); matrix::CsrBuilder mtx_builder{mtx}; mtx_builder.get_value_array() = std::move(new_values); @@ -587,7 +587,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void initialize_row_ptrs_l_u( - std::shared_ptr exec, + std::shared_ptr exec, const matrix::Csr* system_matrix, IndexType* l_row_ptrs, IndexType* u_row_ptrs) { @@ -613,7 +613,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void initialize_l_u(std::shared_ptr exec, +void initialize_l_u(std::shared_ptr exec, const matrix::Csr* system_matrix, matrix::Csr* csr_l, matrix::Csr* csr_u) @@ -639,7 +639,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void initialize_row_ptrs_l( - std::shared_ptr exec, + std::shared_ptr exec, const matrix::Csr* system_matrix, IndexType* l_row_ptrs) { @@ -663,7 +663,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void initialize_l(std::shared_ptr exec, +void initialize_l(std::shared_ptr exec, const matrix::Csr* system_matrix, matrix::Csr* csr_l, bool diag_sqrt) { diff --git a/dpcpp/factorization/par_ict_kernels.dp.cpp b/dpcpp/factorization/par_ict_kernels.dp.cpp index 31aef75edde..99f80a2c61e 100644 --- a/dpcpp/factorization/par_ict_kernels.dp.cpp +++ b/dpcpp/factorization/par_ict_kernels.dp.cpp @@ -383,7 +383,7 @@ void ict_sweep(const IndexType* __restrict__ a_row_ptrs, return true; }); // accumulate result from all threads - sum = ::gko::kernels::dpcpp::reduce( + sum = ::gko::kernels::sycl::reduce( subwarp, sum, [](ValueType a, ValueType b) { return a + b; }); if (subwarp.thread_rank() == 0) { diff --git a/dpcpp/factorization/par_ilu_kernels.dp.cpp b/dpcpp/factorization/par_ilu_kernels.dp.cpp index 351f2129216..775b9ee8ad2 100644 --- a/dpcpp/factorization/par_ilu_kernels.dp.cpp +++ b/dpcpp/factorization/par_ilu_kernels.dp.cpp @@ -132,7 +132,7 @@ void compute_l_u_factors(dim3 grid, dim3 block, size_type dynamic_shared_memory, template -void compute_l_u_factors(std::shared_ptr exec, +void compute_l_u_factors(std::shared_ptr exec, size_type iterations, const matrix::Coo* system_matrix, matrix::Csr* l_factor, diff --git a/dpcpp/factorization/par_ilut_sweep_kernel.dp.cpp b/dpcpp/factorization/par_ilut_sweep_kernel.dp.cpp index 0373d77bbe5..4209e402d00 100644 --- a/dpcpp/factorization/par_ilut_sweep_kernel.dp.cpp +++ b/dpcpp/factorization/par_ilut_sweep_kernel.dp.cpp @@ -146,7 +146,7 @@ void sweep(const IndexType* __restrict__ a_row_ptrs, return true; }); // accumulate result from all threads - sum = ::gko::kernels::dpcpp::reduce( + sum = ::gko::kernels::sycl::reduce( subwarp, sum, [](ValueType a, ValueType b) { return a + b; }); if (subwarp.thread_rank() == 0) { diff --git a/dpcpp/matrix/coo_kernels.dp.cpp b/dpcpp/matrix/coo_kernels.dp.cpp index 6168491ee97..05fcab9627f 100644 --- a/dpcpp/matrix/coo_kernels.dp.cpp +++ b/dpcpp/matrix/coo_kernels.dp.cpp @@ -57,9 +57,9 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace gko { namespace kernels { /** - * @brief The DPCPP namespace. + * @brief The SYCL namespace. * - * @ingroup dpcpp + * @ingroup sycl */ namespace sycl { /** @@ -282,7 +282,7 @@ GKO_ENABLE_DEFAULT_HOST(abstract_spmm, abstract_spmm); template -void spmv(std::shared_ptr exec, +void spmv(std::shared_ptr exec, const matrix::Coo* a, const matrix::Dense* b, matrix::Dense* c) { @@ -294,7 +294,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV_KERNEL); template -void advanced_spmv(std::shared_ptr exec, +void advanced_spmv(std::shared_ptr exec, const matrix::Dense* alpha, const matrix::Coo* a, const matrix::Dense* b, @@ -310,7 +310,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void spmv2(std::shared_ptr exec, +void spmv2(std::shared_ptr exec, const matrix::Coo* a, const matrix::Dense* b, matrix::Dense* c) { @@ -346,7 +346,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_COO_SPMV2_KERNEL); template -void advanced_spmv2(std::shared_ptr exec, +void advanced_spmv2(std::shared_ptr exec, const matrix::Dense* alpha, const matrix::Coo* a, const matrix::Dense* b, diff --git a/dpcpp/matrix/csr_kernels.dp.cpp b/dpcpp/matrix/csr_kernels.dp.cpp index 39b78e6c639..95a240d710d 100644 --- a/dpcpp/matrix/csr_kernels.dp.cpp +++ b/dpcpp/matrix/csr_kernels.dp.cpp @@ -664,7 +664,7 @@ void device_classical_spmv(const size_type num_rows, ind += subgroup_size) { temp_val += val(ind) * b(col_idxs[ind], column_id); } - auto subgroup_result = ::gko::kernels::dpcpp::reduce( + auto subgroup_result = ::gko::kernels::sycl::reduce( subgroup_tile, temp_val, [](const arithmetic_type& a, const arithmetic_type& b) { return a + b; @@ -1058,7 +1058,7 @@ namespace host_kernel { template void merge_path_spmv(syn::value_list, - std::shared_ptr exec, + std::shared_ptr exec, const matrix::Csr* a, const matrix::Dense* b, matrix::Dense* c, @@ -1126,7 +1126,7 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_merge_path_spmv, merge_path_spmv); template -int compute_items_per_thread(std::shared_ptr exec) +int compute_items_per_thread(std::shared_ptr exec) { int num_item = 6; // Ensure that the following is satisfied: @@ -1142,7 +1142,7 @@ int compute_items_per_thread(std::shared_ptr exec) template void classical_spmv(syn::value_list, - std::shared_ptr exec, + std::shared_ptr exec, const matrix::Csr* a, const matrix::Dense* b, matrix::Dense* c, @@ -1191,7 +1191,7 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_classical_spmv, classical_spmv); template -void load_balance_spmv(std::shared_ptr exec, +void load_balance_spmv(std::shared_ptr exec, const matrix::Csr* a, const matrix::Dense* b, matrix::Dense* c, @@ -1238,7 +1238,7 @@ void load_balance_spmv(std::shared_ptr exec, template -bool try_general_sparselib_spmv(std::shared_ptr exec, +bool try_general_sparselib_spmv(std::shared_ptr exec, const ValueType host_alpha, const matrix::Csr* a, const matrix::Dense* b, @@ -1280,7 +1280,7 @@ template ::value || !std::is_same::value>> -bool try_sparselib_spmv(std::shared_ptr exec, +bool try_sparselib_spmv(std::shared_ptr exec, const matrix::Csr* a, const matrix::Dense* b, matrix::Dense* c, @@ -1292,7 +1292,7 @@ bool try_sparselib_spmv(std::shared_ptr exec, } template -bool try_sparselib_spmv(std::shared_ptr exec, +bool try_sparselib_spmv(std::shared_ptr exec, const matrix::Csr* a, const matrix::Dense* b, matrix::Dense* c, @@ -1316,7 +1316,7 @@ bool try_sparselib_spmv(std::shared_ptr exec, template -void spmv(std::shared_ptr exec, +void spmv(std::shared_ptr exec, const matrix::Csr* a, const matrix::Dense* b, matrix::Dense* c) @@ -1383,7 +1383,7 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template -void advanced_spmv(std::shared_ptr exec, +void advanced_spmv(std::shared_ptr exec, const matrix::Dense* alpha, const matrix::Csr* a, const matrix::Dense* b, @@ -1772,7 +1772,7 @@ auto spgemm_multiway_merge(size_type row, template -void spgemm(std::shared_ptr exec, +void spgemm(std::shared_ptr exec, const matrix::Csr* a, const matrix::Csr* b, matrix::Csr* c) @@ -1846,7 +1846,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEMM_KERNEL); template -void advanced_spgemm(std::shared_ptr exec, +void advanced_spgemm(std::shared_ptr exec, const matrix::Dense* alpha, const matrix::Csr* a, const matrix::Csr* b, @@ -1980,7 +1980,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void spgeam(std::shared_ptr exec, +void spgeam(std::shared_ptr exec, const matrix::Dense* alpha, const matrix::Csr* a, const matrix::Dense* beta, @@ -2065,7 +2065,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_SPGEAM_KERNEL); template -void fill_in_dense(std::shared_ptr exec, +void fill_in_dense(std::shared_ptr exec, const matrix::Csr* source, matrix::Dense* result) { @@ -2097,7 +2097,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void generic_transpose(std::shared_ptr exec, +void generic_transpose(std::shared_ptr exec, const matrix::Csr* orig, matrix::Csr* trans) { @@ -2148,7 +2148,7 @@ void generic_transpose(std::shared_ptr exec, template -void transpose(std::shared_ptr exec, +void transpose(std::shared_ptr exec, const matrix::Csr* orig, matrix::Csr* trans) { @@ -2159,7 +2159,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_TRANSPOSE_KERNEL); template -void conj_transpose(std::shared_ptr exec, +void conj_transpose(std::shared_ptr exec, const matrix::Csr* orig, matrix::Csr* trans) { @@ -2171,7 +2171,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void inv_symm_permute(std::shared_ptr exec, +void inv_symm_permute(std::shared_ptr exec, const IndexType* perm, const matrix::Csr* orig, matrix::Csr* permuted) @@ -2197,7 +2197,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void row_permute(std::shared_ptr exec, +void row_permute(std::shared_ptr exec, const IndexType* perm, const matrix::Csr* orig, matrix::Csr* row_permuted) @@ -2223,7 +2223,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void inverse_row_permute(std::shared_ptr exec, +void inverse_row_permute(std::shared_ptr exec, const IndexType* perm, const matrix::Csr* orig, matrix::Csr* row_permuted) @@ -2249,7 +2249,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void sort_by_column_index(std::shared_ptr exec, +void sort_by_column_index(std::shared_ptr exec, matrix::Csr* to_sort) { const auto num_rows = to_sort->get_size()[0]; @@ -2308,7 +2308,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( - std::shared_ptr exec, + std::shared_ptr exec, const matrix::Csr* to_check, bool* is_sorted) { array is_sorted_device_array{exec, {true}}; @@ -2339,7 +2339,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void extract_diagonal(std::shared_ptr exec, +void extract_diagonal(std::shared_ptr exec, const matrix::Csr* orig, matrix::Diagonal* diag) { @@ -2363,7 +2363,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_CSR_EXTRACT_DIAGONAL); template void check_diagonal_entries_exist( - std::shared_ptr exec, + std::shared_ptr exec, const matrix::Csr* const mtx, bool& has_all_diags) GKO_NOT_IMPLEMENTED; @@ -2372,7 +2372,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void add_scaled_identity(std::shared_ptr exec, +void add_scaled_identity(std::shared_ptr exec, const matrix::Dense* const alpha, const matrix::Dense* const beta, matrix::Csr* const mtx) @@ -2467,7 +2467,7 @@ void csr_lookup_build_hash(IndexType row_len, IndexType available_storage, template -void build_lookup(std::shared_ptr exec, +void build_lookup(std::shared_ptr exec, const IndexType* row_ptrs, const IndexType* col_idxs, size_type num_rows, matrix::csr::sparsity_type allowed, const IndexType* storage_offsets, int64* row_desc, diff --git a/dpcpp/matrix/diagonal_kernels.dp.cpp b/dpcpp/matrix/diagonal_kernels.dp.cpp index acb1f138bf7..60bc52c18d2 100644 --- a/dpcpp/matrix/diagonal_kernels.dp.cpp +++ b/dpcpp/matrix/diagonal_kernels.dp.cpp @@ -95,7 +95,7 @@ GKO_ENABLE_DEFAULT_HOST(apply_to_csr, apply_to_csr); template -void apply_to_csr(std::shared_ptr exec, +void apply_to_csr(std::shared_ptr exec, const matrix::Diagonal* a, const matrix::Csr* b, matrix::Csr* c, bool inverse) diff --git a/dpcpp/matrix/ell_kernels.dp.cpp b/dpcpp/matrix/ell_kernels.dp.cpp index 32d167c6de2..88e2bf40efa 100644 --- a/dpcpp/matrix/ell_kernels.dp.cpp +++ b/dpcpp/matrix/ell_kernels.dp.cpp @@ -308,7 +308,7 @@ template void abstract_spmv(syn::value_list, - std::shared_ptr exec, + std::shared_ptr exec, int num_worker_per_row, const matrix::Ell* a, const matrix::Dense* b, @@ -370,7 +370,7 @@ GKO_ENABLE_IMPLEMENTATION_TWO_SELECTION(select_abstract_spmv, abstract_spmv); template std::array compute_thread_worker_and_atomicity( - std::shared_ptr exec, + std::shared_ptr exec, const matrix::Ell* a) { int num_thread_per_worker = 16; @@ -379,7 +379,7 @@ std::array compute_thread_worker_and_atomicity( const auto nrows = a->get_size()[0]; const auto ell_ncols = a->get_num_stored_elements_per_row(); - // TODO: num_threads_per_core should be tuned for Dpcpp + // TODO: num_threads_per_core should be tuned for Sycl const auto nwarps = 16 * num_threads_per_core; // Use multithreads to perform the reduction on each row when the matrix is @@ -413,7 +413,7 @@ std::array compute_thread_worker_and_atomicity( template -void spmv(std::shared_ptr exec, +void spmv(std::shared_ptr exec, const matrix::Ell* a, const matrix::Dense* b, matrix::Dense* c) @@ -424,7 +424,7 @@ void spmv(std::shared_ptr exec, const int num_worker_per_row = std::get<2>(data); /** - * info is the parameter for selecting the dpcpp kernel. + * info is the parameter for selecting the sycl kernel. * for info == 0, it uses the kernel by warp_size threads with atomic * operation for other value, it uses the kernel without atomic_add */ @@ -447,7 +447,7 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template -void advanced_spmv(std::shared_ptr exec, +void advanced_spmv(std::shared_ptr exec, const matrix::Dense* alpha, const matrix::Ell* a, const matrix::Dense* b, @@ -460,7 +460,7 @@ void advanced_spmv(std::shared_ptr exec, const int num_worker_per_row = std::get<2>(data); /** - * info is the parameter for selecting the dpcpp kernel. + * info is the parameter for selecting the sycl kernel. * for info == 0, it uses the kernel by warp_size threads with atomic * operation for other value, it uses the kernel without atomic_add */ diff --git a/dpcpp/matrix/fbcsr_kernels.dp.cpp b/dpcpp/matrix/fbcsr_kernels.dp.cpp index 977bfec653f..9cc11ea2e90 100644 --- a/dpcpp/matrix/fbcsr_kernels.dp.cpp +++ b/dpcpp/matrix/fbcsr_kernels.dp.cpp @@ -58,7 +58,7 @@ namespace fbcsr { template -void spmv(std::shared_ptr exec, +void spmv(std::shared_ptr exec, const matrix::Fbcsr* a, const matrix::Dense* b, matrix::Dense* c) GKO_NOT_IMPLEMENTED; @@ -67,7 +67,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_FBCSR_SPMV_KERNEL); template -void advanced_spmv(std::shared_ptr exec, +void advanced_spmv(std::shared_ptr exec, const matrix::Dense* alpha, const matrix::Fbcsr* a, const matrix::Dense* b, @@ -90,7 +90,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void fill_in_dense(std::shared_ptr exec, +void fill_in_dense(std::shared_ptr exec, const matrix::Fbcsr* source, matrix::Dense* result) GKO_NOT_IMPLEMENTED; @@ -99,7 +99,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void convert_to_csr(const std::shared_ptr exec, +void convert_to_csr(const std::shared_ptr exec, const matrix::Fbcsr* const source, matrix::Csr* const result) GKO_NOT_IMPLEMENTED; @@ -109,7 +109,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void transpose(std::shared_ptr exec, +void transpose(std::shared_ptr exec, const matrix::Fbcsr* orig, matrix::Fbcsr* trans) GKO_NOT_IMPLEMENTED; @@ -118,7 +118,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void conj_transpose(std::shared_ptr exec, +void conj_transpose(std::shared_ptr exec, const matrix::Fbcsr* orig, matrix::Fbcsr* trans) GKO_NOT_IMPLEMENTED; @@ -129,7 +129,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( - std::shared_ptr exec, + std::shared_ptr exec, const matrix::Fbcsr* to_check, bool* is_sorted) GKO_NOT_IMPLEMENTED; @@ -138,7 +138,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void sort_by_column_index(const std::shared_ptr exec, +void sort_by_column_index(const std::shared_ptr exec, matrix::Fbcsr* const to_sort) GKO_NOT_IMPLEMENTED; @@ -147,7 +147,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void extract_diagonal(std::shared_ptr exec, +void extract_diagonal(std::shared_ptr exec, const matrix::Fbcsr* orig, matrix::Diagonal* diag) GKO_NOT_IMPLEMENTED; diff --git a/dpcpp/matrix/sellp_kernels.dp.cpp b/dpcpp/matrix/sellp_kernels.dp.cpp index aefb958323a..85300bb531a 100644 --- a/dpcpp/matrix/sellp_kernels.dp.cpp +++ b/dpcpp/matrix/sellp_kernels.dp.cpp @@ -135,7 +135,7 @@ GKO_ENABLE_DEFAULT_HOST(advanced_spmv_kernel, advanced_spmv_kernel); template -void spmv(std::shared_ptr exec, +void spmv(std::shared_ptr exec, const matrix::Sellp* a, const matrix::Dense* b, matrix::Dense* c) { @@ -154,7 +154,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE(GKO_DECLARE_SELLP_SPMV_KERNEL); template -void advanced_spmv(std::shared_ptr exec, +void advanced_spmv(std::shared_ptr exec, const matrix::Dense* alpha, const matrix::Sellp* a, const matrix::Dense* b, diff --git a/dpcpp/matrix/sparsity_csr_kernels.dp.cpp b/dpcpp/matrix/sparsity_csr_kernels.dp.cpp index 8e7fd4a7ee9..29ddfc7bb40 100644 --- a/dpcpp/matrix/sparsity_csr_kernels.dp.cpp +++ b/dpcpp/matrix/sparsity_csr_kernels.dp.cpp @@ -97,7 +97,7 @@ void device_classical_spmv(const size_type num_rows, ind += subgroup_size) { temp_val += value * b(col_idxs[ind], column_id); } - auto subgroup_result = ::gko::kernels::dpcpp::reduce( + auto subgroup_result = ::gko::kernels::sycl::reduce( subgroup_tile, temp_val, [](const arithmetic_type& a, const arithmetic_type& b) { return a + b; @@ -193,7 +193,7 @@ namespace host_kernel { template void classical_spmv(syn::value_list, - std::shared_ptr exec, + std::shared_ptr exec, const matrix::SparsityCsr* a, const matrix::Dense* b, matrix::Dense* c, @@ -258,7 +258,7 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_classical_spmv, classical_spmv); template -void spmv(std::shared_ptr exec, +void spmv(std::shared_ptr exec, const matrix::SparsityCsr* a, const matrix::Dense* b, matrix::Dense* c) @@ -274,7 +274,7 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template -void advanced_spmv(std::shared_ptr exec, +void advanced_spmv(std::shared_ptr exec, const matrix::Dense* alpha, const matrix::SparsityCsr* a, const matrix::Dense* b, @@ -291,7 +291,7 @@ GKO_INSTANTIATE_FOR_EACH_MIXED_VALUE_AND_INDEX_TYPE( template -void transpose(std::shared_ptr exec, +void transpose(std::shared_ptr exec, const matrix::SparsityCsr* orig, matrix::SparsityCsr* trans) GKO_NOT_IMPLEMENTED; @@ -301,7 +301,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template -void sort_by_column_index(std::shared_ptr exec, +void sort_by_column_index(std::shared_ptr exec, matrix::SparsityCsr* to_sort) { const auto num_rows = to_sort->get_size()[0]; @@ -327,7 +327,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( template void is_sorted_by_column_index( - std::shared_ptr exec, + std::shared_ptr exec, const matrix::SparsityCsr* to_check, bool* is_sorted) { *is_sorted = true; diff --git a/dpcpp/multigrid/pgm_kernels.dp.cpp b/dpcpp/multigrid/pgm_kernels.dp.cpp index 6ee2fae4a10..b395e5ac422 100644 --- a/dpcpp/multigrid/pgm_kernels.dp.cpp +++ b/dpcpp/multigrid/pgm_kernels.dp.cpp @@ -105,7 +105,7 @@ void compute_coarse_coo(std::shared_ptr exec, const IndexType* col_idxs, const ValueType* vals, matrix::Coo* coarse_coo) { - // WORKAROUND: reduce_by_segment needs unique policy. Otherwise, dpcpp + // WORKAROUND: reduce_by_segment needs unique policy. Otherwise, sycl // throws same mangled name error. Related: // https://github.com/oneapi-src/oneDPL/issues/507 auto policy = oneapi::dpl::execution::make_device_policy< diff --git a/dpcpp/preconditioner/jacobi_advanced_apply_kernel.dp.cpp b/dpcpp/preconditioner/jacobi_advanced_apply_kernel.dp.cpp index b0c82a273c3..b952e94476e 100644 --- a/dpcpp/preconditioner/jacobi_advanced_apply_kernel.dp.cpp +++ b/dpcpp/preconditioner/jacobi_advanced_apply_kernel.dp.cpp @@ -56,7 +56,7 @@ template void advanced_apply( syn::value_list, - std::shared_ptr exec, size_type num_blocks, + std::shared_ptr exec, size_type num_blocks, const precision_reduction* block_precisions, const IndexType* block_pointers, const ValueType* blocks, const preconditioner::block_interleaved_storage_scheme& @@ -68,7 +68,7 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_advanced_apply, advanced_apply); template -void apply(std::shared_ptr exec, size_type num_blocks, +void apply(std::shared_ptr exec, size_type num_blocks, uint32 max_block_size, const preconditioner::block_interleaved_storage_scheme& storage_scheme, diff --git a/dpcpp/preconditioner/jacobi_generate_instantiate.inc.dp.cpp b/dpcpp/preconditioner/jacobi_generate_instantiate.inc.dp.cpp index 31c8693abb7..b1109c5e0fb 100644 --- a/dpcpp/preconditioner/jacobi_generate_instantiate.inc.dp.cpp +++ b/dpcpp/preconditioner/jacobi_generate_instantiate.inc.dp.cpp @@ -188,7 +188,7 @@ namespace detail { */ template -GKO_ATTRIBUTES GKO_INLINE uint32 get_supported_storage_reductions_dpcpp( +GKO_ATTRIBUTES GKO_INLINE uint32 get_supported_storage_reductions_sycl( AccuracyType accuracy, CondType cond, Predicate1 verificator1, Predicate2 verificator2, Predicate3 verificator3) { @@ -274,8 +274,8 @@ void adaptive_generate( preconditioner::detail::precision_reduction_descriptor::singleton( prec); if (prec == precision_reduction::autodetect()) { - using detail::get_supported_storage_reductions_dpcpp; - prec_descriptor = get_supported_storage_reductions_dpcpp( + using detail::get_supported_storage_reductions_sycl; + prec_descriptor = get_supported_storage_reductions_sycl( accuracy, block_cond, [&subwarp, &block_size, &row, &block_data, &storage_scheme, &block_id] { @@ -314,8 +314,8 @@ void adaptive_generate( // make sure all blocks in the group have the same precision const auto warp = group::tiled_partition(block); const auto prec = preconditioner::detail::get_optimal_storage_reduction( - ::gko::kernels::dpcpp::reduce( - warp, prec_descriptor, [](uint32 x, uint32 y) { return x & y; })); + ::gko::kernels::sycl::reduce(warp, prec_descriptor, + [](uint32 x, uint32 y) { return x & y; })); // store the block back into memory if (block_id < num_blocks) { diff --git a/dpcpp/preconditioner/jacobi_generate_kernel.dp.cpp b/dpcpp/preconditioner/jacobi_generate_kernel.dp.cpp index 7ed6b16a6ec..8bfdc557d82 100644 --- a/dpcpp/preconditioner/jacobi_generate_kernel.dp.cpp +++ b/dpcpp/preconditioner/jacobi_generate_kernel.dp.cpp @@ -69,7 +69,7 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_generate, generate); template -void generate(std::shared_ptr exec, +void generate(std::shared_ptr exec, const matrix::Csr* system_matrix, size_type num_blocks, uint32 max_block_size, remove_complex accuracy, diff --git a/dpcpp/preconditioner/jacobi_kernels.dp.cpp b/dpcpp/preconditioner/jacobi_kernels.dp.cpp index 52128c6bda1..13bc822d036 100644 --- a/dpcpp/preconditioner/jacobi_kernels.dp.cpp +++ b/dpcpp/preconditioner/jacobi_kernels.dp.cpp @@ -63,7 +63,7 @@ namespace { // a total of 8 32-subgroup (256 threads) constexpr int default_num_warps = 8; -// TODO: get a default_grid_size for dpcpp +// TODO: get a default_grid_size for sycl // with current architectures, at most 32 warps can be scheduled per SM (and // current GPUs have at most 84 SMs) constexpr int default_grid_size = 32 * 32 * 128; diff --git a/dpcpp/preconditioner/jacobi_simple_apply_kernel.dp.cpp b/dpcpp/preconditioner/jacobi_simple_apply_kernel.dp.cpp index 0303e430fac..d49d5ae038d 100644 --- a/dpcpp/preconditioner/jacobi_simple_apply_kernel.dp.cpp +++ b/dpcpp/preconditioner/jacobi_simple_apply_kernel.dp.cpp @@ -68,7 +68,7 @@ GKO_ENABLE_IMPLEMENTATION_SELECTION(select_apply, apply); template void simple_apply( - std::shared_ptr exec, size_type num_blocks, + std::shared_ptr exec, size_type num_blocks, uint32 max_block_size, const preconditioner::block_interleaved_storage_scheme& storage_scheme, diff --git a/dpcpp/reorder/rcm_kernels.dp.cpp b/dpcpp/reorder/rcm_kernels.dp.cpp index f773cf7b1be..202f0bf1023 100644 --- a/dpcpp/reorder/rcm_kernels.dp.cpp +++ b/dpcpp/reorder/rcm_kernels.dp.cpp @@ -56,7 +56,7 @@ namespace rcm { template -void get_degree_of_nodes(std::shared_ptr exec, +void get_degree_of_nodes(std::shared_ptr exec, const IndexType num_vertices, const IndexType* const row_ptrs, IndexType* const degrees) GKO_NOT_IMPLEMENTED; @@ -66,7 +66,7 @@ GKO_INSTANTIATE_FOR_EACH_INDEX_TYPE(GKO_DECLARE_RCM_GET_DEGREE_OF_NODES_KERNEL); template void get_permutation( - std::shared_ptr exec, const IndexType num_vertices, + std::shared_ptr exec, const IndexType num_vertices, const IndexType* const row_ptrs, const IndexType* const col_idxs, const IndexType* const degrees, IndexType* const permutation, IndexType* const inv_permutation, diff --git a/dpcpp/solver/cb_gmres_kernels.dp.cpp b/dpcpp/solver/cb_gmres_kernels.dp.cpp index 618159151ca..fb5633999d1 100644 --- a/dpcpp/solver/cb_gmres_kernels.dp.cpp +++ b/dpcpp/solver/cb_gmres_kernels.dp.cpp @@ -248,7 +248,7 @@ void multinorm2_kernel( local_res = reduction_helper[tidy * (default_dot_dim + 1) + tidx]; const auto tile_block = group::tiled_partition( group::this_thread_block(item_ct1)); - const auto sum = ::gko::kernels::dpcpp::reduce( + const auto sum = ::gko::kernels::sycl::reduce( tile_block, local_res, [](const rc_vtype& a, const rc_vtype& b) { return a + b; }); const auto new_col_idx = item_ct1.get_group(2) * default_dot_dim + tidy; @@ -326,7 +326,7 @@ void multinorminf_without_stop_kernel( local_max = reduction_helper[tidy * (default_dot_dim + 1) + tidx]; const auto tile_block = group::tiled_partition( group::this_thread_block(item_ct1)); - const auto value = ::gko::kernels::dpcpp::reduce( + const auto value = ::gko::kernels::sycl::reduce( tile_block, local_max, [](const rc_vtype& a, const rc_vtype& b) { return ((a >= b) ? a : b); }); @@ -417,13 +417,13 @@ void multinorm2_inf_kernel( local_res = reduction_helper_add[tidy * (default_dot_dim + 1) + tidx]; const auto tile_block = group::tiled_partition( group::this_thread_block(item_ct1)); - const auto sum = ::gko::kernels::dpcpp::reduce( + const auto sum = ::gko::kernels::sycl::reduce( tile_block, local_res, [](const rc_vtype& a, const rc_vtype& b) { return a + b; }); rc_vtype reduced_max{}; if (compute_inf) { local_max = reduction_helper_max[tidy * (default_dot_dim + 1) + tidx]; - reduced_max = ::gko::kernels::dpcpp::reduce( + reduced_max = ::gko::kernels::sycl::reduce( tile_block, local_max, [](const rc_vtype& a, const rc_vtype& b) { return ((a >= b) ? a : b); }); @@ -521,7 +521,7 @@ void multidot_kernel( const auto new_col_idx = item_ct1.get_group(2) * item_ct1.get_local_range().get(2) + tidy; const auto tile_block = group::tiled_partition(thread_block); - const auto sum = ::gko::kernels::dpcpp::reduce( + const auto sum = ::gko::kernels::sycl::reduce( tile_block, local_res, [](const ValueType& a, const ValueType& b) { return a + b; }); if (tidx == 0 && new_col_idx < num_cols && @@ -604,7 +604,7 @@ void singledot_kernel( reduction_helper[tidx] = local_res; auto thread_block = group::this_thread_block(item_ct1); thread_block.sync(); - ::gko::kernels::dpcpp::reduce( + ::gko::kernels::sycl::reduce( thread_block, reduction_helper, [](const ValueType& a, const ValueType& b) { return a + b; }); if (tidx == 0 && !stop_status[col_idx].has_stopped()) { @@ -947,7 +947,7 @@ void calculate_Qy_kernel(dim3 grid, dim3 block, size_type dynamic_shared_memory, template -void zero_matrix(std::shared_ptr exec, size_type m, +void zero_matrix(std::shared_ptr exec, size_type m, size_type n, size_type stride, ValueType* array) { const dim3 block_size(default_block_size, 1, 1); @@ -958,7 +958,7 @@ void zero_matrix(std::shared_ptr exec, size_type m, template -void initialize(std::shared_ptr exec, +void initialize(std::shared_ptr exec, const matrix::Dense* b, matrix::Dense* residual, matrix::Dense* givens_sin, @@ -984,7 +984,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_CB_GMRES_INITIALIZE_KERNEL); template -void restart(std::shared_ptr exec, +void restart(std::shared_ptr exec, const matrix::Dense* residual, matrix::Dense>* residual_norm, matrix::Dense* residual_norm_collection, @@ -1012,8 +1012,8 @@ void restart(std::shared_ptr exec, residual->get_size()[1], krylov_dim, krylov_bases, residual_norm_collection->get_values(), residual_norm_collection->get_stride()); - kernels::dpcpp::dense::compute_norm2_dispatch(exec, residual, residual_norm, - reduction_tmp); + kernels::sycl::dense::compute_norm2_dispatch(exec, residual, residual_norm, + reduction_tmp); if (use_scalar) { components::fill_array(exec, @@ -1054,7 +1054,7 @@ GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE(GKO_DECLARE_CB_GMRES_RESTART_KERNEL); template -void finish_arnoldi_CGS(std::shared_ptr exec, +void finish_arnoldi_CGS(std::shared_ptr exec, matrix::Dense* next_krylov_basis, Accessor3dim krylov_bases, matrix::Dense* hessenberg_iter, @@ -1215,7 +1215,7 @@ void finish_arnoldi_CGS(std::shared_ptr exec, } template -void givens_rotation(std::shared_ptr exec, +void givens_rotation(std::shared_ptr exec, matrix::Dense* givens_sin, matrix::Dense* givens_cos, matrix::Dense* hessenberg_iter, @@ -1242,7 +1242,7 @@ void givens_rotation(std::shared_ptr exec, template -void arnoldi(std::shared_ptr exec, +void arnoldi(std::shared_ptr exec, matrix::Dense* next_krylov_basis, matrix::Dense* givens_sin, matrix::Dense* givens_cos, @@ -1274,7 +1274,7 @@ GKO_INSTANTIATE_FOR_EACH_CB_GMRES_TYPE(GKO_DECLARE_CB_GMRES_ARNOLDI_KERNEL); template void solve_upper_triangular( - std::shared_ptr exec, + std::shared_ptr exec, const matrix::Dense* residual_norm_collection, const matrix::Dense* hessenberg, matrix::Dense* y, const array* final_iter_nums) @@ -1296,7 +1296,7 @@ void solve_upper_triangular( template -void calculate_qy(std::shared_ptr exec, +void calculate_qy(std::shared_ptr exec, ConstAccessor3d krylov_bases, size_type num_krylov_bases, const matrix::Dense* y, matrix::Dense* before_preconditioner, @@ -1326,7 +1326,7 @@ void calculate_qy(std::shared_ptr exec, template -void solve_krylov(std::shared_ptr exec, +void solve_krylov(std::shared_ptr exec, const matrix::Dense* residual_norm_collection, ConstAccessor3d krylov_bases, const matrix::Dense* hessenberg, diff --git a/dpcpp/solver/idr_kernels.dp.cpp b/dpcpp/solver/idr_kernels.dp.cpp index 00e36972c86..5549992f990 100644 --- a/dpcpp/solver/idr_kernels.dp.cpp +++ b/dpcpp/solver/idr_kernels.dp.cpp @@ -133,7 +133,7 @@ void orthonormalize_subspace_vectors_kernel( // Ensure already finish reading this shared memory item_ct1.barrier(sycl::access::fence_space::local_space); reduction_helper[tidx] = dot; - ::gko::kernels::dpcpp::reduce( + ::gko::kernels::sycl::reduce( group::this_thread_block(item_ct1), reduction_helper, [](const ValueType& a, const ValueType& b) { return a + b; }); item_ct1.barrier(sycl::access::fence_space::local_space); @@ -152,7 +152,7 @@ void orthonormalize_subspace_vectors_kernel( // Ensure already finish reading this shared memory item_ct1.barrier(sycl::access::fence_space::local_space); reduction_helper_real[tidx] = norm; - ::gko::kernels::dpcpp::reduce( + ::gko::kernels::sycl::reduce( group::this_thread_block(item_ct1), reduction_helper_real, [](const remove_complex& a, const remove_complex& b) { return a + b; }); @@ -371,7 +371,7 @@ void multidot_kernel( local_res = reduction_helper[tidy * (default_dot_dim + 1) + tidx]; const auto tile_block = group::tiled_partition( group::this_thread_block(item_ct1)); - const auto sum = ::gko::kernels::dpcpp::reduce( + const auto sum = ::gko::kernels::sycl::reduce( tile_block, local_res, [](const ValueType& a, const ValueType& b) { return a + b; }); const auto new_rhs = item_ct1.get_group(2) * default_dot_dim + tidy; @@ -606,7 +606,7 @@ namespace { template -void initialize_m(std::shared_ptr exec, +void initialize_m(std::shared_ptr exec, const size_type nrhs, matrix::Dense* m, array* stop_status) { @@ -621,7 +621,7 @@ void initialize_m(std::shared_ptr exec, template -void initialize_subspace_vectors(std::shared_ptr exec, +void initialize_subspace_vectors(std::shared_ptr exec, matrix::Dense* subspace_vectors, bool deterministic) { @@ -648,7 +648,7 @@ void initialize_subspace_vectors(std::shared_ptr exec, template -void orthonormalize_subspace_vectors(std::shared_ptr exec, +void orthonormalize_subspace_vectors(std::shared_ptr exec, matrix::Dense* subspace_vectors) { orthonormalize_subspace_vectors_kernel( @@ -659,7 +659,7 @@ void orthonormalize_subspace_vectors(std::shared_ptr exec, template -void solve_lower_triangular(std::shared_ptr exec, +void solve_lower_triangular(std::shared_ptr exec, const size_type nrhs, const matrix::Dense* m, const matrix::Dense* f, @@ -678,7 +678,7 @@ void solve_lower_triangular(std::shared_ptr exec, template -void update_g_and_u(std::shared_ptr exec, +void update_g_and_u(std::shared_ptr exec, const size_type nrhs, const size_type k, const matrix::Dense* p, const matrix::Dense* m, @@ -723,7 +723,7 @@ void update_g_and_u(std::shared_ptr exec, template -void update_m(std::shared_ptr exec, const size_type nrhs, +void update_m(std::shared_ptr exec, const size_type nrhs, const size_type k, const matrix::Dense* p, const matrix::Dense* g_k, matrix::Dense* m, const array* stop_status) @@ -755,7 +755,7 @@ void update_m(std::shared_ptr exec, const size_type nrhs, template -void update_x_r_and_f(std::shared_ptr exec, +void update_x_r_and_f(std::shared_ptr exec, const size_type nrhs, const size_type k, const matrix::Dense* m, const matrix::Dense* g, @@ -784,7 +784,7 @@ void update_x_r_and_f(std::shared_ptr exec, template -void initialize(std::shared_ptr exec, const size_type nrhs, +void initialize(std::shared_ptr exec, const size_type nrhs, matrix::Dense* m, matrix::Dense* subspace_vectors, bool deterministic, array* stop_status) @@ -798,7 +798,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_INITIALIZE_KERNEL); template -void step_1(std::shared_ptr exec, const size_type nrhs, +void step_1(std::shared_ptr exec, const size_type nrhs, const size_type k, const matrix::Dense* m, const matrix::Dense* f, const matrix::Dense* residual, @@ -824,7 +824,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_1_KERNEL); template -void step_2(std::shared_ptr exec, const size_type nrhs, +void step_2(std::shared_ptr exec, const size_type nrhs, const size_type k, const matrix::Dense* omega, const matrix::Dense* preconditioned_vector, const matrix::Dense* c, matrix::Dense* u, @@ -849,7 +849,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_2_KERNEL); template -void step_3(std::shared_ptr exec, const size_type nrhs, +void step_3(std::shared_ptr exec, const size_type nrhs, const size_type k, const matrix::Dense* p, matrix::Dense* g, matrix::Dense* g_k, matrix::Dense* u, matrix::Dense* m, @@ -867,7 +867,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_TYPE(GKO_DECLARE_IDR_STEP_3_KERNEL); template void compute_omega( - std::shared_ptr exec, const size_type nrhs, + std::shared_ptr exec, const size_type nrhs, const remove_complex kappa, const matrix::Dense* tht, const matrix::Dense>* residual_norm, matrix::Dense* omega, const array* stop_status) diff --git a/dpcpp/solver/lower_trs_kernels.dp.cpp b/dpcpp/solver/lower_trs_kernels.dp.cpp index ce09c3ac0e3..d96a257c669 100644 --- a/dpcpp/solver/lower_trs_kernels.dp.cpp +++ b/dpcpp/solver/lower_trs_kernels.dp.cpp @@ -59,7 +59,7 @@ namespace sycl { namespace lower_trs { -void should_perform_transpose(std::shared_ptr exec, +void should_perform_transpose(std::shared_ptr exec, bool& do_transpose) { do_transpose = false; @@ -67,7 +67,7 @@ void should_perform_transpose(std::shared_ptr exec, template -void generate(std::shared_ptr exec, +void generate(std::shared_ptr exec, const matrix::Csr* matrix, std::shared_ptr& solve_struct, bool unit_diag, const solver::trisolve_algorithm algorithm, @@ -82,7 +82,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( * versions <=9.1 due to a limitation in the cssrsm_solve algorithm */ template -void solve(std::shared_ptr exec, +void solve(std::shared_ptr exec, const matrix::Csr* matrix, const solver::SolveStruct* solve_struct, bool unit_diag, const solver::trisolve_algorithm algorithm, diff --git a/dpcpp/solver/upper_trs_kernels.dp.cpp b/dpcpp/solver/upper_trs_kernels.dp.cpp index 12cdcc72183..5e1bf9fdc47 100644 --- a/dpcpp/solver/upper_trs_kernels.dp.cpp +++ b/dpcpp/solver/upper_trs_kernels.dp.cpp @@ -59,7 +59,7 @@ namespace sycl { namespace upper_trs { -void should_perform_transpose(std::shared_ptr exec, +void should_perform_transpose(std::shared_ptr exec, bool& do_transpose) { do_transpose = false; @@ -67,7 +67,7 @@ void should_perform_transpose(std::shared_ptr exec, template -void generate(std::shared_ptr exec, +void generate(std::shared_ptr exec, const matrix::Csr* matrix, std::shared_ptr& solve_struct, bool unit_diag, const solver::trisolve_algorithm algorithm, @@ -82,7 +82,7 @@ GKO_INSTANTIATE_FOR_EACH_VALUE_AND_INDEX_TYPE( * versions <=9.1 due to a limitation in the cssrsm_solve algorithm */ template -void solve(std::shared_ptr exec, +void solve(std::shared_ptr exec, const matrix::Csr* matrix, const solver::SolveStruct* solve_struct, bool unit_diag, const solver::trisolve_algorithm algorithm, diff --git a/dpcpp/stop/criterion_kernels.dp.cpp b/dpcpp/stop/criterion_kernels.dp.cpp index 5fd78cc2115..3a24092ecac 100644 --- a/dpcpp/stop/criterion_kernels.dp.cpp +++ b/dpcpp/stop/criterion_kernels.dp.cpp @@ -50,7 +50,7 @@ namespace sycl { namespace set_all_statuses { -void set_all_statuses(std::shared_ptr exec, +void set_all_statuses(std::shared_ptr exec, uint8 stoppingId, bool setFinalized, array* stop_status) { diff --git a/dpcpp/stop/residual_norm_kernels.dp.cpp b/dpcpp/stop/residual_norm_kernels.dp.cpp index 6b351e8a421..76295864840 100644 --- a/dpcpp/stop/residual_norm_kernels.dp.cpp +++ b/dpcpp/stop/residual_norm_kernels.dp.cpp @@ -57,7 +57,7 @@ namespace residual_norm { template -void residual_norm(std::shared_ptr exec, +void residual_norm(std::shared_ptr exec, const matrix::Dense* tau, const matrix::Dense* orig_tau, ValueType rel_residual_goal, uint8 stoppingId, @@ -116,7 +116,7 @@ namespace implicit_residual_norm { template void implicit_residual_norm( - std::shared_ptr exec, + std::shared_ptr exec, const matrix::Dense* tau, const matrix::Dense>* orig_tau, remove_complex rel_residual_goal, uint8 stoppingId, diff --git a/dpcpp/synthesizer/implementation_selection.hpp b/dpcpp/synthesizer/implementation_selection.hpp index 10963f09c6e..e8c35f675eb 100644 --- a/dpcpp/synthesizer/implementation_selection.hpp +++ b/dpcpp/synthesizer/implementation_selection.hpp @@ -138,7 +138,7 @@ namespace syn { { \ if (is_eligible(K)) { \ _callable(K), _dcfg::decode<1>(K)>>( \ std::forward(args)...); \ } else { \ @@ -214,8 +214,8 @@ namespace syn { { \ if (is_eligible(K)) { \ _callable(kernel_args, kernel_is_eligible, \ - ::gko::kernels::dpcpp::device_config(), \ - bool_args, int_args, size_args, type_args, \ + ::gko::kernels::sycl::device_config(), bool_args, \ + int_args, size_args, type_args, \ std::forward(args)...); \ } else { \ _name(::gko::syn::value_list(), \ diff --git a/dpcpp/test/base/dim3.dp.cpp b/dpcpp/test/base/dim3.dp.cpp index bba2a4755e1..538cbbf2d04 100644 --- a/dpcpp/test/base/dim3.dp.cpp +++ b/dpcpp/test/base/dim3.dp.cpp @@ -42,7 +42,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { -using namespace gko::kernels::dpcpp; +using namespace gko::kernels::sycl; TEST(DpcppDim3, CanGenerate1DRange) diff --git a/dpcpp/test/base/executor.dp.cpp b/dpcpp/test/base/executor.dp.cpp index 2819c23793c..19f03039c73 100644 --- a/dpcpp/test/base/executor.dp.cpp +++ b/dpcpp/test/base/executor.dp.cpp @@ -52,23 +52,23 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { -class DpcppExecutor : public ::testing::Test { +class SyclExecutor : public ::testing::Test { protected: - DpcppExecutor() - : ref(gko::ReferenceExecutor::create()), dpcpp(nullptr), dpcpp2(nullptr) + SyclExecutor() + : ref(gko::ReferenceExecutor::create()), sycl(nullptr), sycl2(nullptr) {} void SetUp() { - if (gko::DpcppExecutor::get_num_devices("gpu") > 0) { - dpcpp = gko::DpcppExecutor::create(0, ref, "gpu"); - if (gko::DpcppExecutor::get_num_devices("gpu") > 1) { - dpcpp2 = gko::DpcppExecutor::create(1, ref, "gpu"); + if (gko::SyclExecutor::get_num_devices("gpu") > 0) { + sycl = gko::SyclExecutor::create(0, ref, "gpu"); + if (gko::SyclExecutor::get_num_devices("gpu") > 1) { + sycl2 = gko::SyclExecutor::create(1, ref, "gpu"); } - } else if (gko::DpcppExecutor::get_num_devices("cpu") > 0) { - dpcpp = gko::DpcppExecutor::create(0, ref, "cpu"); - if (gko::DpcppExecutor::get_num_devices("cpu") > 1) { - dpcpp2 = gko::DpcppExecutor::create(1, ref, "cpu"); + } else if (gko::SyclExecutor::get_num_devices("cpu") > 0) { + sycl = gko::SyclExecutor::create(0, ref, "cpu"); + if (gko::SyclExecutor::get_num_devices("cpu") > 1) { + sycl2 = gko::SyclExecutor::create(1, ref, "cpu"); } } else { GKO_NOT_IMPLEMENTED; @@ -78,104 +78,104 @@ class DpcppExecutor : public ::testing::Test { void TearDown() { // ensure that previous calls finished and didn't throw an error - ASSERT_NO_THROW(dpcpp->synchronize()); - if (dpcpp2 != nullptr) { - ASSERT_NO_THROW(dpcpp2->synchronize()); + ASSERT_NO_THROW(sycl->synchronize()); + if (sycl2 != nullptr) { + ASSERT_NO_THROW(sycl2->synchronize()); } } std::shared_ptr ref{}; - std::shared_ptr dpcpp{}; - std::shared_ptr dpcpp2{}; + std::shared_ptr sycl{}; + std::shared_ptr sycl2{}; }; -TEST_F(DpcppExecutor, CanInstantiateTwoExecutorsOnOneDevice) +TEST_F(SyclExecutor, CanInstantiateTwoExecutorsOnOneDevice) { - auto dpcpp = gko::DpcppExecutor::create(0, ref); - if (dpcpp2 != nullptr) { - auto dpcpp2 = gko::DpcppExecutor::create(0, ref); + auto sycl = gko::SyclExecutor::create(0, ref); + if (sycl2 != nullptr) { + auto sycl2 = gko::SyclExecutor::create(0, ref); } // We want automatic deinitialization to not create any error } -TEST_F(DpcppExecutor, CanGetExecInfo) +TEST_F(SyclExecutor, CanGetExecInfo) { - dpcpp = gko::DpcppExecutor::create(0, ref); + sycl = gko::SyclExecutor::create(0, ref); - ASSERT_TRUE(dpcpp->get_num_computing_units() > 0); - ASSERT_TRUE(dpcpp->get_subgroup_sizes().size() > 0); - ASSERT_TRUE(dpcpp->get_max_workitem_sizes().size() > 0); - ASSERT_TRUE(dpcpp->get_max_workgroup_size() > 0); - ASSERT_TRUE(dpcpp->get_max_subgroup_size() > 0); + ASSERT_TRUE(sycl->get_num_computing_units() > 0); + ASSERT_TRUE(sycl->get_subgroup_sizes().size() > 0); + ASSERT_TRUE(sycl->get_max_workitem_sizes().size() > 0); + ASSERT_TRUE(sycl->get_max_workgroup_size() > 0); + ASSERT_TRUE(sycl->get_max_subgroup_size() > 0); } -TEST_F(DpcppExecutor, KnowsNumberOfDevicesOfTypeAll) +TEST_F(SyclExecutor, KnowsNumberOfDevicesOfTypeAll) { auto count = sycl::device::get_devices(sycl::info::device_type::all).size(); - auto num_devices = gko::DpcppExecutor::get_num_devices("all"); + auto num_devices = gko::SyclExecutor::get_num_devices("all"); ASSERT_EQ(count, num_devices); } -TEST_F(DpcppExecutor, KnowsNumberOfDevicesOfTypeCPU) +TEST_F(SyclExecutor, KnowsNumberOfDevicesOfTypeCPU) { auto count = sycl::device::get_devices(sycl::info::device_type::cpu).size(); - auto num_devices = gko::DpcppExecutor::get_num_devices("cpu"); + auto num_devices = gko::SyclExecutor::get_num_devices("cpu"); ASSERT_EQ(count, num_devices); } -TEST_F(DpcppExecutor, KnowsNumberOfDevicesOfTypeGPU) +TEST_F(SyclExecutor, KnowsNumberOfDevicesOfTypeGPU) { auto count = sycl::device::get_devices(sycl::info::device_type::gpu).size(); - auto num_devices = gko::DpcppExecutor::get_num_devices("gpu"); + auto num_devices = gko::SyclExecutor::get_num_devices("gpu"); ASSERT_EQ(count, num_devices); } -TEST_F(DpcppExecutor, KnowsNumberOfDevicesOfTypeAccelerator) +TEST_F(SyclExecutor, KnowsNumberOfDevicesOfTypeAccelerator) { auto count = sycl::device::get_devices(sycl::info::device_type::accelerator).size(); - auto num_devices = gko::DpcppExecutor::get_num_devices("accelerator"); + auto num_devices = gko::SyclExecutor::get_num_devices("accelerator"); ASSERT_EQ(count, num_devices); } -TEST_F(DpcppExecutor, AllocatesAndFreesMemory) +TEST_F(SyclExecutor, AllocatesAndFreesMemory) { int* ptr = nullptr; - ASSERT_NO_THROW(ptr = dpcpp->alloc(2)); - ASSERT_NO_THROW(dpcpp->free(ptr)); + ASSERT_NO_THROW(ptr = sycl->alloc(2)); + ASSERT_NO_THROW(sycl->free(ptr)); } -TEST_F(DpcppExecutor, FailsWhenOverallocating) +TEST_F(SyclExecutor, FailsWhenOverallocating) { const gko::size_type num_elems = 1ll << 50; // 4PB of integers int* ptr = nullptr; ASSERT_THROW( { - ptr = dpcpp->alloc(num_elems); - dpcpp->synchronize(); + ptr = sycl->alloc(num_elems); + sycl->synchronize(); }, gko::AllocationError); - dpcpp->free(ptr); + sycl->free(ptr); } @@ -187,24 +187,24 @@ void check_data(int* data, bool* result) } } -TEST_F(DpcppExecutor, CopiesDataToCPU) +TEST_F(SyclExecutor, CopiesDataToCPU) { int orig[] = {3, 8}; - auto* copy = dpcpp->alloc(2); + auto* copy = sycl->alloc(2); gko::array is_set(ref, 1); - dpcpp->copy_from(ref, 2, orig, copy); + sycl->copy_from(ref, 2, orig, copy); - is_set.set_executor(dpcpp); - ASSERT_NO_THROW(dpcpp->synchronize()); - ASSERT_NO_THROW(dpcpp->get_queue()->submit([&](sycl::handler& cgh) { + is_set.set_executor(sycl); + ASSERT_NO_THROW(sycl->synchronize()); + ASSERT_NO_THROW(sycl->get_queue()->submit([&](sycl::handler& cgh) { auto* is_set_ptr = is_set.get_data(); cgh.single_task([=]() { check_data(copy, is_set_ptr); }); })); is_set.set_executor(ref); ASSERT_EQ(*is_set.get_data(), true); - ASSERT_NO_THROW(dpcpp->synchronize()); - dpcpp->free(copy); + ASSERT_NO_THROW(sycl->synchronize()); + sycl->free(copy); } void init_data(int* data) @@ -213,79 +213,78 @@ void init_data(int* data) data[1] = 8; } -TEST_F(DpcppExecutor, CopiesDataFromCPU) +TEST_F(SyclExecutor, CopiesDataFromCPU) { int copy[2]; - auto orig = dpcpp->alloc(2); - dpcpp->get_queue()->submit([&](sycl::handler& cgh) { + auto orig = sycl->alloc(2); + sycl->get_queue()->submit([&](sycl::handler& cgh) { cgh.single_task([=]() { init_data(orig); }); }); - ref->copy_from(dpcpp, 2, orig, copy); + ref->copy_from(sycl, 2, orig, copy); EXPECT_EQ(3, copy[0]); ASSERT_EQ(8, copy[1]); - dpcpp->free(orig); + sycl->free(orig); } -TEST_F(DpcppExecutor, CopiesDataFromDpcppToDpcpp) +TEST_F(SyclExecutor, CopiesDataFromSyclToSycl) { - if (dpcpp2 == nullptr) { + if (sycl2 == nullptr) { GTEST_SKIP(); } int copy[2]; gko::array is_set(ref, 1); - auto orig = dpcpp->alloc(2); - dpcpp->get_queue()->submit([&](sycl::handler& cgh) { + auto orig = sycl->alloc(2); + sycl->get_queue()->submit([&](sycl::handler& cgh) { cgh.single_task([=]() { init_data(orig); }); }); - auto copy_dpcpp2 = dpcpp2->alloc(2); - dpcpp2->copy_from(dpcpp, 2, orig, copy_dpcpp2); + auto copy_sycl2 = sycl2->alloc(2); + sycl2->copy_from(sycl, 2, orig, copy_sycl2); // Check that the data is really on GPU - is_set.set_executor(dpcpp2); - ASSERT_NO_THROW(dpcpp2->get_queue()->submit([&](sycl::handler& cgh) { + is_set.set_executor(sycl2); + ASSERT_NO_THROW(sycl2->get_queue()->submit([&](sycl::handler& cgh) { auto* is_set_ptr = is_set.get_data(); - cgh.single_task([=]() { check_data(copy_dpcpp2, is_set_ptr); }); + cgh.single_task([=]() { check_data(copy_sycl2, is_set_ptr); }); })); is_set.set_executor(ref); ASSERT_EQ(*is_set.get_data(), true); // Put the results on OpenMP and run CPU side assertions - ref->copy_from(dpcpp2, 2, copy_dpcpp2, copy); + ref->copy_from(sycl2, 2, copy_sycl2, copy); EXPECT_EQ(3, copy[0]); ASSERT_EQ(8, copy[1]); - dpcpp2->free(copy_dpcpp2); - dpcpp->free(orig); + sycl2->free(copy_sycl2); + sycl->free(orig); } -TEST_F(DpcppExecutor, Synchronizes) +TEST_F(SyclExecutor, Synchronizes) { // Todo design a proper unit test once we support streams - ASSERT_NO_THROW(dpcpp->synchronize()); + ASSERT_NO_THROW(sycl->synchronize()); } -TEST_F(DpcppExecutor, FreeAfterKernel) +TEST_F(SyclExecutor, FreeAfterKernel) { size_t length = 10000; - auto dpcpp = - gko::DpcppExecutor::create(0, gko::ReferenceExecutor::create()); + auto sycl = gko::SyclExecutor::create(0, gko::ReferenceExecutor::create()); { - gko::array x(dpcpp, length); - gko::array y(dpcpp, length); + gko::array x(sycl, length); + gko::array y(sycl, length); auto x_val = x.get_data(); auto y_val = y.get_data(); - dpcpp->get_queue()->submit([&](sycl::handler& cgh) { + sycl->get_queue()->submit([&](sycl::handler& cgh) { cgh.parallel_for(sycl::range<1>{length}, [=](sycl::id<1> i) { y_val[i] += x_val[i]; }); }); } // to ensure everything on queue is finished. - dpcpp->synchronize(); + sycl->synchronize(); } diff --git a/dpcpp/test/base/kernel_launch.dp.cpp b/dpcpp/test/base/kernel_launch.dp.cpp index 4bf383eeed3..263eea7c4b3 100644 --- a/dpcpp/test/base/kernel_launch.dp.cpp +++ b/dpcpp/test/base/kernel_launch.dp.cpp @@ -97,9 +97,9 @@ class KernelLaunch : public ::testing::Test { using Mtx = gko::matrix::Dense; KernelLaunch() - : exec(gko::DpcppExecutor::create( + : exec(gko::SyclExecutor::create( 0, gko::ReferenceExecutor::create(), - gko::DpcppExecutor::get_num_devices("gpu") > 0 ? "gpu" : "cpu")), + gko::SyclExecutor::get_num_devices("gpu") > 0 ? "gpu" : "cpu")), zero_array(exec->get_master(), 16), iota_array(exec->get_master(), 16), iota_transp_array(exec->get_master(), 16), @@ -123,7 +123,7 @@ class KernelLaunch : public ::testing::Test { iota_transp_array.set_executor(exec); } - std::shared_ptr exec; + std::shared_ptr exec; gko::array zero_array; gko::array iota_array; gko::array iota_transp_array; @@ -136,7 +136,7 @@ class KernelLaunch : public ::testing::Test { TEST_F(KernelLaunch, Runs1D) { - gko::kernels::dpcpp::run_kernel( + gko::kernels::sycl::run_kernel( exec, [] GKO_KERNEL(auto i, auto d, auto dummy) { static_assert(is_same::value, "index"); @@ -152,7 +152,7 @@ TEST_F(KernelLaunch, Runs1D) TEST_F(KernelLaunch, Runs1DArray) { - gko::kernels::dpcpp::run_kernel( + gko::kernels::sycl::run_kernel( exec, [] GKO_KERNEL(auto i, auto d, auto d_ptr, auto dummy) { static_assert(is_same::value, "index"); @@ -174,7 +174,7 @@ TEST_F(KernelLaunch, Runs1DArray) TEST_F(KernelLaunch, Runs1DDense) { - gko::kernels::dpcpp::run_kernel( + gko::kernels::sycl::run_kernel( exec, [] GKO_KERNEL(auto i, auto d, auto d2, auto d_ptr, auto dummy) { static_assert(is_same::value, "index"); @@ -207,7 +207,7 @@ TEST_F(KernelLaunch, Runs1DDense) TEST_F(KernelLaunch, Runs2D) { - gko::kernels::dpcpp::run_kernel( + gko::kernels::sycl::run_kernel( exec, [] GKO_KERNEL(auto i, auto j, auto d, auto dummy) { static_assert(is_same::value, "index"); @@ -224,7 +224,7 @@ TEST_F(KernelLaunch, Runs2D) TEST_F(KernelLaunch, Runs2DArray) { - gko::kernels::dpcpp::run_kernel( + gko::kernels::sycl::run_kernel( exec, [] GKO_KERNEL(auto i, auto j, auto d, auto d_ptr, auto dummy) { static_assert(is_same::value, "index"); @@ -246,7 +246,7 @@ TEST_F(KernelLaunch, Runs2DArray) TEST_F(KernelLaunch, Runs2DDense) { - gko::kernels::dpcpp::run_kernel_solver( + gko::kernels::sycl::run_kernel_solver( exec, [] GKO_KERNEL(auto i, auto j, auto d, auto d2, auto d_ptr, auto d3, auto d4, auto d2_ptr, auto d3_ptr, auto dummy) { @@ -283,8 +283,8 @@ TEST_F(KernelLaunch, Runs2DDense) dim<2>{4, 4}, zero_dense->get_stride(), zero_dense2.get(), static_cast(zero_dense2.get()), zero_dense2->get_const_values(), - gko::kernels::dpcpp::default_stride(zero_dense.get()), - gko::kernels::dpcpp::row_vector(vec_dense.get()), + gko::kernels::sycl::default_stride(zero_dense.get()), + gko::kernels::sycl::row_vector(vec_dense.get()), zero_dense->get_values(), vec_dense->get_values(), move_only_val); GKO_ASSERT_MTX_NEAR(zero_dense2, iota_dense, 0.0); @@ -295,7 +295,7 @@ TEST_F(KernelLaunch, Reduction1D) { gko::array output{exec, 1}; - gko::kernels::dpcpp::run_kernel_reduction( + gko::kernels::sycl::run_kernel_reduction( exec, [] GKO_KERNEL(auto i, auto a, auto dummy) { static_assert(is_same::value, "index"); @@ -317,7 +317,7 @@ TEST_F(KernelLaunch, Reduction1D) // 2 * sum i=0...99999 (i+1) EXPECT_EQ(exec->copy_val_to_host(output.get_const_data()), 10000100000LL); - gko::kernels::dpcpp::run_kernel_reduction( + gko::kernels::sycl::run_kernel_reduction( exec, [] GKO_KERNEL(auto i, auto a, auto dummy) { static_assert(is_same::value, "index"); @@ -345,7 +345,7 @@ TEST_F(KernelLaunch, Reduction2D) { gko::array output{exec, 1}; - gko::kernels::dpcpp::run_kernel_reduction( + gko::kernels::sycl::run_kernel_reduction( exec, [] GKO_KERNEL(auto i, auto j, auto a, auto dummy) { static_assert(is_same::value, "index"); @@ -369,7 +369,7 @@ TEST_F(KernelLaunch, Reduction2D) // 4 * sum i=0...999 sum j=0...99 of (i+1)*(j+1) EXPECT_EQ(exec->copy_val_to_host(output.get_const_data()), 10110100000LL); - gko::kernels::dpcpp::run_kernel_reduction( + gko::kernels::sycl::run_kernel_reduction( exec, [] GKO_KERNEL(auto i, auto j, auto a, auto dummy) { static_assert(is_same::value, "index"); @@ -411,7 +411,7 @@ TEST_F(KernelLaunch, ReductionRow2D) static_cast(num_cols) * (num_cols + 1) * (i + 1); } - gko::kernels::dpcpp::run_kernel_row_reduction( + gko::kernels::sycl::run_kernel_row_reduction( exec, [] GKO_KERNEL(auto i, auto j, auto a, auto dummy) { static_assert(is_same::value, "index"); @@ -451,7 +451,7 @@ TEST_F(KernelLaunch, ReductionCol2D) static_cast(num_rows) * (num_rows + 1) * (i + 1); } - gko::kernels::dpcpp::run_kernel_col_reduction( + gko::kernels::sycl::run_kernel_col_reduction( exec, [] GKO_KERNEL(auto i, auto j, auto a, auto dummy) { static_assert(is_same::value, "index"); diff --git a/dpcpp/test/components/cooperative_groups.dp.cpp b/dpcpp/test/components/cooperative_groups.dp.cpp index dd3a21ac3cd..d48d3a0d6a9 100644 --- a/dpcpp/test/components/cooperative_groups.dp.cpp +++ b/dpcpp/test/components/cooperative_groups.dp.cpp @@ -60,7 +60,7 @@ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. namespace { -using namespace gko::kernels::dpcpp; +using namespace gko::kernels::sycl; constexpr auto default_config_list = dcfg_1sg_list_t(); @@ -68,11 +68,11 @@ class CooperativeGroups : public testing::TestWithParam { protected: CooperativeGroups() : ref(gko::ReferenceExecutor::create()), - dpcpp(gko::DpcppExecutor::create(0, ref)), + sycl(gko::SyclExecutor::create(0, ref)), test_case(3), max_num(test_case * 64), result(ref, max_num), - dresult(dpcpp) + dresult(sycl) { for (int i = 0; i < max_num; i++) { result.get_data()[i] = false; @@ -84,15 +84,14 @@ class CooperativeGroups : public testing::TestWithParam { void test_all_subgroup(Kernel kernel) { auto subgroup_size = GetParam(); - auto queue = dpcpp->get_queue(); - if (gko::kernels::dpcpp::validate(queue, subgroup_size, - subgroup_size)) { + auto queue = sycl->get_queue(); + if (gko::kernels::sycl::validate(queue, subgroup_size, subgroup_size)) { const auto cfg = DCFG_1D::encode(subgroup_size, subgroup_size); for (int i = 0; i < test_case * subgroup_size; i++) { result.get_data()[i] = true; } - kernel(cfg, 1, subgroup_size, 0, dpcpp->get_queue(), + kernel(cfg, 1, subgroup_size, 0, sycl->get_queue(), dresult.get_data()); // each subgreoup size segment for one test @@ -106,7 +105,7 @@ class CooperativeGroups : public testing::TestWithParam { int test_case; int max_num; std::shared_ptr ref; - std::shared_ptr dpcpp; + std::shared_ptr sycl; gko::array result; gko::array dresult; }; diff --git a/dpcpp/test/matrix/fbcsr_kernels.cpp b/dpcpp/test/matrix/fbcsr_kernels.cpp index c6d25bd999f..1182bc92586 100644 --- a/dpcpp/test/matrix/fbcsr_kernels.cpp +++ b/dpcpp/test/matrix/fbcsr_kernels.cpp @@ -54,20 +54,20 @@ class Fbcsr : public ::testing::Test { void SetUp() { - ASSERT_GT(gko::DpcppExecutor::get_num_devices("all"), 0); + ASSERT_GT(gko::SyclExecutor::get_num_devices("all"), 0); ref = gko::ReferenceExecutor::create(); - dpcpp = gko::DpcppExecutor::create(0, ref); + sycl = gko::SyclExecutor::create(0, ref); } void TearDown() { - if (dpcpp != nullptr) { - ASSERT_NO_THROW(dpcpp->synchronize()); + if (sycl != nullptr) { + ASSERT_NO_THROW(sycl->synchronize()); } } std::shared_ptr ref; - std::shared_ptr dpcpp; + std::shared_ptr sycl; std::unique_ptr mtx; }; @@ -80,14 +80,14 @@ TEST_F(Fbcsr, CanWriteFromMatrixOnDevice) using MatData = gko::matrix_data; gko::testing::FbcsrSample sample(ref); auto refmat = sample.generate_fbcsr(); - auto dpcppmat = gko::clone(dpcpp, refmat); + auto syclmat = gko::clone(sycl, refmat); MatData refdata; - MatData dpcppdata; + MatData sycldata; refmat->write(refdata); - dpcppmat->write(dpcppdata); + syclmat->write(sycldata); - ASSERT_TRUE(refdata.nonzeros == dpcppdata.nonzeros); + ASSERT_TRUE(refdata.nonzeros == sycldata.nonzeros); } diff --git a/dpcpp/test/preconditioner/jacobi_kernels.cpp b/dpcpp/test/preconditioner/jacobi_kernels.cpp index aae15245357..73013a6c0f7 100644 --- a/dpcpp/test/preconditioner/jacobi_kernels.cpp +++ b/dpcpp/test/preconditioner/jacobi_kernels.cpp @@ -71,15 +71,15 @@ class Jacobi : public ::testing::Test { void SetUp() { - ASSERT_GT(gko::DpcppExecutor::get_num_devices("all"), 0); + ASSERT_GT(gko::SyclExecutor::get_num_devices("all"), 0); ref = gko::ReferenceExecutor::create(); - dpcpp = gko::DpcppExecutor::create(0, ref); + sycl = gko::SyclExecutor::create(0, ref); } void TearDown() { - if (dpcpp != nullptr) { - ASSERT_NO_THROW(dpcpp->synchronize()); + if (sycl != nullptr) { + ASSERT_NO_THROW(sycl->synchronize()); } } @@ -121,7 +121,7 @@ class Jacobi : public ::testing::Test { .with_max_block_size(max_block_size) .with_block_pointers(block_ptrs) .with_skip_sorting(skip_sorting) - .on(dpcpp); + .on(sycl); } else { bj_factory = Bj::build() .with_max_block_size(max_block_size) @@ -136,16 +136,16 @@ class Jacobi : public ::testing::Test { .with_storage_optimization(block_prec) .with_accuracy(accuracy) .with_skip_sorting(skip_sorting) - .on(dpcpp); + .on(sycl); } b = gko::test::generate_random_matrix( dim, num_rhs, std::uniform_int_distribution<>(num_rhs, num_rhs), std::normal_distribution(0.0, 1.0), engine, ref); - d_b = gko::clone(dpcpp, b); + d_b = gko::clone(sycl, b); x = gko::test::generate_random_matrix( dim, num_rhs, std::uniform_int_distribution<>(num_rhs, num_rhs), std::normal_distribution(0.0, 1.0), engine, ref); - d_x = gko::clone(dpcpp, x); + d_x = gko::clone(sycl, x); } const gko::precision_reduction dp{}; @@ -157,7 +157,7 @@ class Jacobi : public ::testing::Test { const gko::precision_reduction ap{gko::precision_reduction::autodetect()}; std::shared_ptr ref; - std::shared_ptr dpcpp; + std::shared_ptr sycl; std::shared_ptr mtx; std::unique_ptr x; std::unique_ptr b; @@ -169,7 +169,7 @@ class Jacobi : public ::testing::Test { }; -TEST_F(Jacobi, DpcppFindNaturalBlocksEquivalentToRef) +TEST_F(Jacobi, SyclFindNaturalBlocksEquivalentToRef) { /* example matrix: 1 1 @@ -189,14 +189,14 @@ TEST_F(Jacobi, DpcppFindNaturalBlocksEquivalentToRef) {3, 2, 1.0}}}); auto bj = Bj::build().with_max_block_size(3u).on(ref)->generate(mtx); - auto d_bj = Bj::build().with_max_block_size(3u).on(dpcpp)->generate(mtx); + auto d_bj = Bj::build().with_max_block_size(3u).on(sycl)->generate(mtx); ASSERT_EQ(d_bj->get_num_blocks(), bj->get_num_blocks()); // TODO: actually check if the results are the same } -TEST_F(Jacobi, DpcppExecutesSupervariableAgglomerationEquivalentToRef) +TEST_F(Jacobi, SyclExecutesSupervariableAgglomerationEquivalentToRef) { /* example matrix: 1 1 @@ -218,14 +218,14 @@ TEST_F(Jacobi, DpcppExecutesSupervariableAgglomerationEquivalentToRef) {4, 4, 1.0}}}); auto bj = Bj::build().with_max_block_size(3u).on(ref)->generate(mtx); - auto d_bj = Bj::build().with_max_block_size(3u).on(dpcpp)->generate(mtx); + auto d_bj = Bj::build().with_max_block_size(3u).on(sycl)->generate(mtx); ASSERT_EQ(d_bj->get_num_blocks(), bj->get_num_blocks()); // TODO: actually check if the results are the same } -TEST_F(Jacobi, DpcppFindNaturalBlocksInLargeMatrixEquivalentToRef) +TEST_F(Jacobi, SyclFindNaturalBlocksInLargeMatrixEquivalentToRef) { /* example matrix: 1 1 @@ -245,7 +245,7 @@ TEST_F(Jacobi, DpcppFindNaturalBlocksInLargeMatrixEquivalentToRef) {1.0, 0.0, 1.0, 0.0, 0.0, 0.0}})); auto bj = Bj::build().with_max_block_size(3u).on(ref)->generate(mtx); - auto d_bj = Bj::build().with_max_block_size(3u).on(dpcpp)->generate(mtx); + auto d_bj = Bj::build().with_max_block_size(3u).on(sycl)->generate(mtx); ASSERT_EQ(d_bj->get_num_blocks(), bj->get_num_blocks()); // TODO: actually check if the results are the same @@ -253,7 +253,7 @@ TEST_F(Jacobi, DpcppFindNaturalBlocksInLargeMatrixEquivalentToRef) TEST_F(Jacobi, - DpcppExecutesSupervariableAgglomerationInLargeMatrixEquivalentToRef) + SyclExecutesSupervariableAgglomerationInLargeMatrixEquivalentToRef) { /* example matrix: 1 1 @@ -271,7 +271,7 @@ TEST_F(Jacobi, {0.0, 0.0, 0.0, 0.0, 1.0}})); auto bj = Bj::build().with_max_block_size(3u).on(ref)->generate(mtx); - auto d_bj = Bj::build().with_max_block_size(3u).on(dpcpp)->generate(mtx); + auto d_bj = Bj::build().with_max_block_size(3u).on(sycl)->generate(mtx); ASSERT_EQ(d_bj->get_num_blocks(), bj->get_num_blocks()); // TODO: actually check if the results are the same @@ -279,7 +279,7 @@ TEST_F(Jacobi, TEST_F(Jacobi, - DpcppExecutesSupervarAgglomerationEquivalentToRefFor150NonzerowsPerRow) + SyclExecutesSupervarAgglomerationEquivalentToRefFor150NonzerowsPerRow) { /* example matrix duplicated 50 times: 1 1 1 @@ -299,14 +299,14 @@ TEST_F(Jacobi, auto bj = Bj::build().with_max_block_size(3u).on(ref)->generate(mtx); - auto d_bj = Bj::build().with_max_block_size(3u).on(dpcpp)->generate(mtx); + auto d_bj = Bj::build().with_max_block_size(3u).on(sycl)->generate(mtx); ASSERT_EQ(d_bj->get_num_blocks(), bj->get_num_blocks()); // TODO: actually check if the results are the same } -TEST_F(Jacobi, DpcppPreconditionerEquivalentToRefWithBlockSize32Sorted) +TEST_F(Jacobi, SyclPreconditionerEquivalentToRefWithBlockSize32Sorted) { initialize_data({0, 32, 64, 96, 128}, {}, {}, 32, 100, 110); @@ -318,7 +318,7 @@ TEST_F(Jacobi, DpcppPreconditionerEquivalentToRefWithBlockSize32Sorted) } -TEST_F(Jacobi, DpcppPreconditionerEquivalentToRefWithBlockSize32Unsorted) +TEST_F(Jacobi, SyclPreconditionerEquivalentToRefWithBlockSize32Unsorted) { std::default_random_engine engine(42); initialize_data({0, 32, 64, 96, 128}, {}, {}, 32, 100, 110, 1, 0.1, false); @@ -332,7 +332,7 @@ TEST_F(Jacobi, DpcppPreconditionerEquivalentToRefWithBlockSize32Unsorted) } -TEST_F(Jacobi, DpcppPreconditionerEquivalentToRefWithDifferentBlockSize) +TEST_F(Jacobi, SyclPreconditionerEquivalentToRefWithDifferentBlockSize) { initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {}, {}, 32, 97, 99); @@ -345,7 +345,7 @@ TEST_F(Jacobi, DpcppPreconditionerEquivalentToRefWithDifferentBlockSize) } -TEST_F(Jacobi, DpcppPreconditionerEquivalentToRefWithMPW) +TEST_F(Jacobi, SyclPreconditionerEquivalentToRefWithMPW) { initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {}, {}, 32, 97, 99); @@ -358,7 +358,7 @@ TEST_F(Jacobi, DpcppPreconditionerEquivalentToRefWithMPW) } -TEST_F(Jacobi, DpcppTransposedPreconditionerEquivalentToRefWithMPW) +TEST_F(Jacobi, SyclTransposedPreconditionerEquivalentToRefWithMPW) { initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {}, {}, 32, 97, 99); @@ -372,7 +372,7 @@ TEST_F(Jacobi, DpcppTransposedPreconditionerEquivalentToRefWithMPW) } -TEST_F(Jacobi, DpcppConjTransposedPreconditionerEquivalentToRefWithMPW) +TEST_F(Jacobi, SyclConjTransposedPreconditionerEquivalentToRefWithMPW) { initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {}, {}, 32, 97, 99); @@ -386,7 +386,7 @@ TEST_F(Jacobi, DpcppConjTransposedPreconditionerEquivalentToRefWithMPW) } -TEST_F(Jacobi, DpcppApplyEquivalentToRefWithBlockSize32) +TEST_F(Jacobi, SyclApplyEquivalentToRefWithBlockSize32) { initialize_data({0, 32, 64, 96, 128}, {}, {}, 32, 100, 111); auto bj = bj_factory->generate(mtx); @@ -399,7 +399,7 @@ TEST_F(Jacobi, DpcppApplyEquivalentToRefWithBlockSize32) } -TEST_F(Jacobi, DpcppApplyEquivalentToRefWithDifferentBlockSize) +TEST_F(Jacobi, SyclApplyEquivalentToRefWithDifferentBlockSize) { initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {}, {}, 32, 97, 99); @@ -413,7 +413,7 @@ TEST_F(Jacobi, DpcppApplyEquivalentToRefWithDifferentBlockSize) } -TEST_F(Jacobi, DpcppApplyEquivalentToRef) +TEST_F(Jacobi, SyclApplyEquivalentToRef) { initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {}, {}, 32, 97, 99); @@ -427,7 +427,7 @@ TEST_F(Jacobi, DpcppApplyEquivalentToRef) } -TEST_F(Jacobi, DpcppScalarApplyEquivalentToRef) +TEST_F(Jacobi, SyclScalarApplyEquivalentToRef) { gko::size_type dim = 313; std::default_random_engine engine(42); @@ -445,14 +445,14 @@ TEST_F(Jacobi, DpcppScalarApplyEquivalentToRef) std::normal_distribution(0.0, 1.0), engine, ref)); auto sx = Vec::create(ref, sb->get_size()); - auto d_smtx = gko::share(Mtx::create(dpcpp)); - auto d_sb = gko::share(Vec::create(dpcpp)); - auto d_sx = gko::share(Vec::create(dpcpp, sb->get_size())); + auto d_smtx = gko::share(Mtx::create(sycl)); + auto d_sb = gko::share(Vec::create(sycl)); + auto d_sx = gko::share(Vec::create(sycl, sb->get_size())); d_smtx->copy_from(smtx); d_sb->copy_from(sb); auto sj = Bj::build().with_max_block_size(1u).on(ref)->generate(smtx); - auto d_sj = Bj::build().with_max_block_size(1u).on(dpcpp)->generate(d_smtx); + auto d_sj = Bj::build().with_max_block_size(1u).on(sycl)->generate(d_smtx); sj->apply(sb, sx); d_sj->apply(d_sb, d_sx); @@ -461,14 +461,14 @@ TEST_F(Jacobi, DpcppScalarApplyEquivalentToRef) } -TEST_F(Jacobi, DpcppLinearCombinationApplyEquivalentToRef) +TEST_F(Jacobi, SyclLinearCombinationApplyEquivalentToRef) { initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {}, {}, 32, 97, 99); auto alpha = gko::initialize({2.0}, ref); - auto d_alpha = gko::initialize({2.0}, dpcpp); + auto d_alpha = gko::initialize({2.0}, sycl); auto beta = gko::initialize({-1.0}, ref); - auto d_beta = gko::initialize({-1.0}, dpcpp); + auto d_beta = gko::initialize({-1.0}, sycl); auto bj = bj_factory->generate(mtx); auto d_bj = d_bj_factory->generate(mtx); @@ -479,7 +479,7 @@ TEST_F(Jacobi, DpcppLinearCombinationApplyEquivalentToRef) } -TEST_F(Jacobi, DpcppScalarLinearCombinationApplyEquivalentToRef) +TEST_F(Jacobi, SyclScalarLinearCombinationApplyEquivalentToRef) { gko::size_type dim = 313; std::default_random_engine engine(42); @@ -501,16 +501,16 @@ TEST_F(Jacobi, DpcppScalarLinearCombinationApplyEquivalentToRef) std::normal_distribution(0.0, 1.0), engine, ref, gko::dim<2>(dim, 3), 4)); - auto d_smtx = gko::share(gko::clone(dpcpp, smtx)); - auto d_sb = gko::share(gko::clone(dpcpp, sb)); - auto d_sx = gko::share(gko::clone(dpcpp, sx)); + auto d_smtx = gko::share(gko::clone(sycl, smtx)); + auto d_sb = gko::share(gko::clone(sycl, sb)); + auto d_sx = gko::share(gko::clone(sycl, sx)); auto alpha = gko::initialize({2.0}, ref); - auto d_alpha = gko::initialize({2.0}, dpcpp); + auto d_alpha = gko::initialize({2.0}, sycl); auto beta = gko::initialize({-1.0}, ref); - auto d_beta = gko::initialize({-1.0}, dpcpp); + auto d_beta = gko::initialize({-1.0}, sycl); auto sj = Bj::build().with_max_block_size(1u).on(ref)->generate(smtx); - auto d_sj = Bj::build().with_max_block_size(1u).on(dpcpp)->generate(d_smtx); + auto d_sj = Bj::build().with_max_block_size(1u).on(sycl)->generate(d_smtx); sj->apply(alpha, sb, beta, sx); d_sj->apply(d_alpha, d_sb, d_beta, d_sx); @@ -519,7 +519,7 @@ TEST_F(Jacobi, DpcppScalarLinearCombinationApplyEquivalentToRef) } -TEST_F(Jacobi, DpcppApplyToMultipleVectorsEquivalentToRef) +TEST_F(Jacobi, SyclApplyToMultipleVectorsEquivalentToRef) { initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {}, {}, 32, 97, 99, 5); @@ -533,14 +533,14 @@ TEST_F(Jacobi, DpcppApplyToMultipleVectorsEquivalentToRef) } -TEST_F(Jacobi, DpcppLinearCombinationApplyToMultipleVectorsEquivalentToRef) +TEST_F(Jacobi, SyclLinearCombinationApplyToMultipleVectorsEquivalentToRef) { initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {}, {}, 32, 97, 99, 5); auto alpha = gko::initialize({2.0}, ref); - auto d_alpha = gko::initialize({2.0}, dpcpp); + auto d_alpha = gko::initialize({2.0}, sycl); auto beta = gko::initialize({-1.0}, ref); - auto d_beta = gko::initialize({-1.0}, dpcpp); + auto d_beta = gko::initialize({-1.0}, sycl); auto bj = bj_factory->generate(mtx); auto d_bj = d_bj_factory->generate(mtx); @@ -590,7 +590,7 @@ TEST_F(Jacobi, SelectsTheSamePrecisionsAsRef) TEST_F(Jacobi, AvoidsPrecisionsThatOverflow) { - auto mtx = gko::matrix::Csr::create(dpcpp); + auto mtx = gko::matrix::Csr::create(sycl); // clang-format off mtx->read(mtx_data::diag({ // perfectly conditioned block, small value difference, @@ -607,13 +607,13 @@ TEST_F(Jacobi, AvoidsPrecisionsThatOverflow) auto bj = Bj::build() .with_max_block_size(32u) - .with_block_pointers(gko::array(dpcpp, {0, 2, 4})) + .with_block_pointers(gko::array(sycl, {0, 2, 4})) .with_storage_optimization(gko::precision_reduction::autodetect()) .with_accuracy(value_type{0.1}) - .on(dpcpp) + .on(sycl) ->generate(give(mtx)); - // dpcpp considers all block separately + // sycl considers all block separately auto h_bj = clone(ref, bj); auto prec = h_bj->get_parameters().storage_optimization.block_wise.get_const_data(); @@ -627,7 +627,7 @@ TEST_F(Jacobi, AvoidsPrecisionsThatOverflow) } -TEST_F(Jacobi, DpcppPreconditionerEquivalentToRefWithFullPrecision) +TEST_F(Jacobi, SyclPreconditionerEquivalentToRefWithFullPrecision) { SKIP_IF_SINGLE_MODE; initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, @@ -640,7 +640,7 @@ TEST_F(Jacobi, DpcppPreconditionerEquivalentToRefWithFullPrecision) } -TEST_F(Jacobi, DpcppPreconditionerEquivalentToRefWithReducedPrecision) +TEST_F(Jacobi, SyclPreconditionerEquivalentToRefWithReducedPrecision) { initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {sp, sp, sp, sp, sp, sp, sp, sp, sp, sp, sp}, {}, 32, 97, @@ -653,7 +653,7 @@ TEST_F(Jacobi, DpcppPreconditionerEquivalentToRefWithReducedPrecision) } -TEST_F(Jacobi, DpcppPreconditionerEquivalentToRefWithCustomReducedPrecision) +TEST_F(Jacobi, SyclPreconditionerEquivalentToRefWithCustomReducedPrecision) { initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {tp, tp, tp, tp, tp, tp, tp, tp, tp, tp, tp}, {}, 32, 97, @@ -666,7 +666,7 @@ TEST_F(Jacobi, DpcppPreconditionerEquivalentToRefWithCustomReducedPrecision) } -TEST_F(Jacobi, DpcppPreconditionerEquivalentToRefWithQuarteredPrecision) +TEST_F(Jacobi, SyclPreconditionerEquivalentToRefWithQuarteredPrecision) { initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {hp, hp, hp, hp, hp, hp, hp, hp, hp, hp, hp}, {}, 32, 97, @@ -679,7 +679,7 @@ TEST_F(Jacobi, DpcppPreconditionerEquivalentToRefWithQuarteredPrecision) } -TEST_F(Jacobi, DpcppPreconditionerEquivalentToRefWithCustomQuarteredPrecision) +TEST_F(Jacobi, SyclPreconditionerEquivalentToRefWithCustomQuarteredPrecision) { initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {qp, qp, qp, qp, qp, qp, qp, qp, qp, qp, qp}, {}, 32, 97, @@ -692,7 +692,7 @@ TEST_F(Jacobi, DpcppPreconditionerEquivalentToRefWithCustomQuarteredPrecision) } -TEST_F(Jacobi, DpcppPreconditionerEquivalentToRefWithAdaptivePrecision) +TEST_F(Jacobi, SyclPreconditionerEquivalentToRefWithAdaptivePrecision) { SKIP_IF_SINGLE_MODE; initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, @@ -706,8 +706,7 @@ TEST_F(Jacobi, DpcppPreconditionerEquivalentToRefWithAdaptivePrecision) } -TEST_F(Jacobi, - DpcppTransposedPreconditionerEquivalentToRefWithAdaptivePrecision) +TEST_F(Jacobi, SyclTransposedPreconditionerEquivalentToRefWithAdaptivePrecision) { SKIP_IF_SINGLE_MODE; initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, @@ -724,7 +723,7 @@ TEST_F(Jacobi, TEST_F(Jacobi, - DpcppConjTransposedPreconditionerEquivalentToRefWithAdaptivePrecision) + SyclConjTransposedPreconditionerEquivalentToRefWithAdaptivePrecision) { SKIP_IF_SINGLE_MODE; initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, @@ -740,7 +739,7 @@ TEST_F(Jacobi, } -TEST_F(Jacobi, DpcppApplyEquivalentToRefWithFullPrecision) +TEST_F(Jacobi, SyclApplyEquivalentToRefWithFullPrecision) { SKIP_IF_SINGLE_MODE; initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, @@ -756,7 +755,7 @@ TEST_F(Jacobi, DpcppApplyEquivalentToRefWithFullPrecision) } -TEST_F(Jacobi, DpcppApplyEquivalentToRefWithReducedPrecision) +TEST_F(Jacobi, SyclApplyEquivalentToRefWithReducedPrecision) { SKIP_IF_SINGLE_MODE; initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, @@ -772,7 +771,7 @@ TEST_F(Jacobi, DpcppApplyEquivalentToRefWithReducedPrecision) } -TEST_F(Jacobi, DpcppApplyEquivalentToRefWithCustomReducedPrecision) +TEST_F(Jacobi, SyclApplyEquivalentToRefWithCustomReducedPrecision) { SKIP_IF_SINGLE_MODE; initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, @@ -788,7 +787,7 @@ TEST_F(Jacobi, DpcppApplyEquivalentToRefWithCustomReducedPrecision) } -TEST_F(Jacobi, DpcppApplyEquivalentToRefWithQuarteredPrecision) +TEST_F(Jacobi, SyclApplyEquivalentToRefWithQuarteredPrecision) { initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {hp, hp, hp, hp, hp, hp, hp, hp, hp, hp, hp}, {}, 32, 97, @@ -803,7 +802,7 @@ TEST_F(Jacobi, DpcppApplyEquivalentToRefWithQuarteredPrecision) } -TEST_F(Jacobi, DpcppApplyEquivalentToRefWithCustomReducedAndReducedPrecision) +TEST_F(Jacobi, SyclApplyEquivalentToRefWithCustomReducedAndReducedPrecision) { initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {up, up, up, up, up, up, up, up, up, up, up}, {}, 32, 97, @@ -818,7 +817,7 @@ TEST_F(Jacobi, DpcppApplyEquivalentToRefWithCustomReducedAndReducedPrecision) } -TEST_F(Jacobi, DpcppApplyEquivalentToRefWithCustomQuarteredPrecision) +TEST_F(Jacobi, SyclApplyEquivalentToRefWithCustomQuarteredPrecision) { initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {qp, qp, qp, qp, qp, qp, qp, qp, qp, qp, qp}, {}, 32, 97, @@ -833,7 +832,7 @@ TEST_F(Jacobi, DpcppApplyEquivalentToRefWithCustomQuarteredPrecision) } -TEST_F(Jacobi, DpcppApplyEquivalentToRefWithAdaptivePrecision) +TEST_F(Jacobi, SyclApplyEquivalentToRefWithAdaptivePrecision) { SKIP_IF_SINGLE_MODE; initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, @@ -849,16 +848,16 @@ TEST_F(Jacobi, DpcppApplyEquivalentToRefWithAdaptivePrecision) } -TEST_F(Jacobi, DpcppLinearCombinationApplyEquivalentToRefWithAdaptivePrecision) +TEST_F(Jacobi, SyclLinearCombinationApplyEquivalentToRefWithAdaptivePrecision) { SKIP_IF_SINGLE_MODE; initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {sp, dp, dp, sp, sp, sp, dp, dp, sp, dp, sp}, {}, 32, 97, 99); auto alpha = gko::initialize({2.0}, ref); - auto d_alpha = gko::initialize({2.0}, dpcpp); + auto d_alpha = gko::initialize({2.0}, sycl); auto beta = gko::initialize({-1.0}, ref); - auto d_beta = gko::initialize({-1.0}, dpcpp); + auto d_beta = gko::initialize({-1.0}, sycl); auto bj = bj_factory->generate(mtx); auto d_bj = d_bj_factory->generate(mtx); @@ -869,7 +868,7 @@ TEST_F(Jacobi, DpcppLinearCombinationApplyEquivalentToRefWithAdaptivePrecision) } -TEST_F(Jacobi, DpcppApplyToMultipleVectorsEquivalentToRefWithFullPrecision) +TEST_F(Jacobi, SyclApplyToMultipleVectorsEquivalentToRefWithFullPrecision) { SKIP_IF_SINGLE_MODE; initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, @@ -885,7 +884,7 @@ TEST_F(Jacobi, DpcppApplyToMultipleVectorsEquivalentToRefWithFullPrecision) } -TEST_F(Jacobi, DpcppApplyToMultipleVectorsEquivalentToRefWithReducedPrecision) +TEST_F(Jacobi, SyclApplyToMultipleVectorsEquivalentToRefWithReducedPrecision) { initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {sp, sp, sp, sp, sp, sp, sp, sp, sp, sp, sp}, {}, 32, 97, @@ -900,7 +899,7 @@ TEST_F(Jacobi, DpcppApplyToMultipleVectorsEquivalentToRefWithReducedPrecision) } -TEST_F(Jacobi, DpcppApplyToMultipleVectorsEquivalentToRefWithAdaptivePrecision) +TEST_F(Jacobi, SyclApplyToMultipleVectorsEquivalentToRefWithAdaptivePrecision) { SKIP_IF_SINGLE_MODE; initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, @@ -918,16 +917,16 @@ TEST_F(Jacobi, DpcppApplyToMultipleVectorsEquivalentToRefWithAdaptivePrecision) TEST_F( Jacobi, - DpcppLinearCombinationApplyToMultipleVectorsEquivalentToRefWithAdaptivePrecision) + SyclLinearCombinationApplyToMultipleVectorsEquivalentToRefWithAdaptivePrecision) { SKIP_IF_SINGLE_MODE; initialize_data({0, 11, 24, 33, 45, 55, 67, 70, 80, 92, 100}, {sp, dp, dp, sp, sp, sp, dp, dp, sp, dp, sp}, {}, 32, 97, 99, 5); auto alpha = gko::initialize({2.0}, ref); - auto d_alpha = gko::initialize({2.0}, dpcpp); + auto d_alpha = gko::initialize({2.0}, sycl); auto beta = gko::initialize({-1.0}, ref); - auto d_beta = gko::initialize({-1.0}, dpcpp); + auto d_beta = gko::initialize({-1.0}, sycl); auto bj = bj_factory->generate(mtx); auto d_bj = d_bj_factory->generate(mtx); diff --git a/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp b/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp index 79b197aacc8..4c616e080c6 100644 --- a/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp +++ b/examples/adaptiveprecision-blockjacobi/adaptiveprecision-blockjacobi.cpp @@ -75,10 +75,10 @@ int main(int argc, char* argv[]) [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create(0, - gko::OmpExecutor::create()); + return gko::SyclExecutor::create(0, + gko::OmpExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/examples/cb-gmres/cb-gmres.cpp b/examples/cb-gmres/cb-gmres.cpp index b096e48c71a..46522346e15 100644 --- a/examples/cb-gmres/cb-gmres.cpp +++ b/examples/cb-gmres/cb-gmres.cpp @@ -115,10 +115,10 @@ int main(int argc, char* argv[]) [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create(0, - gko::OmpExecutor::create()); + return gko::SyclExecutor::create(0, + gko::OmpExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/examples/custom-logger/custom-logger.cpp b/examples/custom-logger/custom-logger.cpp index 7e6cf531edd..fdbb64860e4 100644 --- a/examples/custom-logger/custom-logger.cpp +++ b/examples/custom-logger/custom-logger.cpp @@ -256,10 +256,10 @@ int main(int argc, char* argv[]) [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create(0, - gko::OmpExecutor::create()); + return gko::SyclExecutor::create(0, + gko::OmpExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/examples/custom-matrix-format/custom-matrix-format.cpp b/examples/custom-matrix-format/custom-matrix-format.cpp index 4610413fe9c..95ce6a81071 100644 --- a/examples/custom-matrix-format/custom-matrix-format.cpp +++ b/examples/custom-matrix-format/custom-matrix-format.cpp @@ -262,10 +262,10 @@ int main(int argc, char* argv[]) [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create(0, - gko::OmpExecutor::create()); + return gko::SyclExecutor::create(0, + gko::OmpExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/examples/custom-stopping-criterion/custom-stopping-criterion.cpp b/examples/custom-stopping-criterion/custom-stopping-criterion.cpp index 800846cfbd9..976bc3f253f 100644 --- a/examples/custom-stopping-criterion/custom-stopping-criterion.cpp +++ b/examples/custom-stopping-criterion/custom-stopping-criterion.cpp @@ -165,10 +165,10 @@ int main(int argc, char* argv[]) [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create(0, - gko::OmpExecutor::create()); + return gko::SyclExecutor::create(0, + gko::OmpExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/examples/distributed-solver/distributed-solver.cpp b/examples/distributed-solver/distributed-solver.cpp index 123f93775f5..3e8e8e4cee4 100644 --- a/examples/distributed-solver/distributed-solver.cpp +++ b/examples/distributed-solver/distributed-solver.cpp @@ -129,18 +129,18 @@ int main(int argc, char* argv[]) return gko::HipExecutor::create( device_id, gko::ReferenceExecutor::create(), true); }}, - {"dpcpp", [](MPI_Comm comm) { + {"sycl", [](MPI_Comm comm) { int device_id = 0; - if (gko::DpcppExecutor::get_num_devices("gpu")) { + if (gko::SyclExecutor::get_num_devices("gpu")) { device_id = gko::experimental::mpi::map_rank_to_device_id( - comm, gko::DpcppExecutor::get_num_devices("gpu")); - } else if (gko::DpcppExecutor::get_num_devices("cpu")) { + comm, gko::SyclExecutor::get_num_devices("gpu")); + } else if (gko::SyclExecutor::get_num_devices("cpu")) { device_id = gko::experimental::mpi::map_rank_to_device_id( - comm, gko::DpcppExecutor::get_num_devices("cpu")); + comm, gko::SyclExecutor::get_num_devices("cpu")); } else { throw std::runtime_error("No suitable DPC++ devices"); } - return gko::DpcppExecutor::create( + return gko::SyclExecutor::create( device_id, gko::ReferenceExecutor::create()); }}}; diff --git a/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp b/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp index 33946b7de44..f009430d793 100644 --- a/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp +++ b/examples/ilu-preconditioned-solver/ilu-preconditioned-solver.cpp @@ -75,10 +75,10 @@ int main(int argc, char* argv[]) [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create(0, - gko::OmpExecutor::create()); + return gko::SyclExecutor::create(0, + gko::OmpExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/examples/inverse-iteration/inverse-iteration.cpp b/examples/inverse-iteration/inverse-iteration.cpp index 460370b7e00..0889aa0c6a4 100644 --- a/examples/inverse-iteration/inverse-iteration.cpp +++ b/examples/inverse-iteration/inverse-iteration.cpp @@ -79,10 +79,10 @@ int main(int argc, char* argv[]) [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create(0, - gko::OmpExecutor::create()); + return gko::SyclExecutor::create(0, + gko::OmpExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp b/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp index 407a083e548..de00342f1f7 100644 --- a/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp +++ b/examples/ir-ilu-preconditioned-solver/ir-ilu-preconditioned-solver.cpp @@ -78,10 +78,10 @@ int main(int argc, char* argv[]) [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create(0, - gko::OmpExecutor::create()); + return gko::SyclExecutor::create(0, + gko::OmpExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/examples/iterative-refinement/iterative-refinement.cpp b/examples/iterative-refinement/iterative-refinement.cpp index 14384eaab52..62f70c46849 100644 --- a/examples/iterative-refinement/iterative-refinement.cpp +++ b/examples/iterative-refinement/iterative-refinement.cpp @@ -75,10 +75,10 @@ int main(int argc, char* argv[]) [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create(0, - gko::OmpExecutor::create()); + return gko::SyclExecutor::create(0, + gko::OmpExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp b/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp index cef918983e9..0d225e82f3d 100644 --- a/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp +++ b/examples/mixed-multigrid-preconditioned-solver/mixed-multigrid-preconditioned-solver.cpp @@ -78,9 +78,9 @@ int main(int argc, char* argv[]) [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create( + return gko::SyclExecutor::create( 0, gko::ReferenceExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp b/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp index 4241a74cdf2..79e4d7f4b49 100644 --- a/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp +++ b/examples/mixed-multigrid-solver/mixed-multigrid-solver.cpp @@ -76,9 +76,9 @@ int main(int argc, char* argv[]) [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create( + return gko::SyclExecutor::create( 0, gko::ReferenceExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/examples/mixed-precision-ir/mixed-precision-ir.cpp b/examples/mixed-precision-ir/mixed-precision-ir.cpp index 0882d755cdc..653cbc0ac81 100644 --- a/examples/mixed-precision-ir/mixed-precision-ir.cpp +++ b/examples/mixed-precision-ir/mixed-precision-ir.cpp @@ -83,9 +83,9 @@ int main(int argc, char* argv[]) [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create( + return gko::SyclExecutor::create( 0, gko::ReferenceExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/examples/mixed-spmv/mixed-spmv.cpp b/examples/mixed-spmv/mixed-spmv.cpp index 6b327c1c708..239e6afc494 100644 --- a/examples/mixed-spmv/mixed-spmv.cpp +++ b/examples/mixed-spmv/mixed-spmv.cpp @@ -177,10 +177,10 @@ int main(int argc, char* argv[]) [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create(0, - gko::OmpExecutor::create()); + return gko::SyclExecutor::create(0, + gko::OmpExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp b/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp index f82a603d662..3b5c6716528 100644 --- a/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp +++ b/examples/multigrid-preconditioned-solver-customized/multigrid-preconditioned-solver-customized.cpp @@ -71,9 +71,9 @@ int main(int argc, char* argv[]) [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create( + return gko::SyclExecutor::create( 0, gko::ReferenceExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp b/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp index b31b7906902..7cd376f50ad 100644 --- a/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp +++ b/examples/multigrid-preconditioned-solver/multigrid-preconditioned-solver.cpp @@ -69,9 +69,9 @@ int main(int argc, char* argv[]) [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create( + return gko::SyclExecutor::create( 0, gko::ReferenceExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp b/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp index 05ee0503a5f..312603aa68a 100644 --- a/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp +++ b/examples/nine-pt-stencil-solver/nine-pt-stencil-solver.cpp @@ -237,10 +237,10 @@ void solve_system(const std::string& executor_string, [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create(0, - gko::OmpExecutor::create()); + return gko::SyclExecutor::create(0, + gko::OmpExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/examples/papi-logging/papi-logging.cpp b/examples/papi-logging/papi-logging.cpp index 1ae2ae9ec08..d12d90d89aa 100644 --- a/examples/papi-logging/papi-logging.cpp +++ b/examples/papi-logging/papi-logging.cpp @@ -158,10 +158,10 @@ int main(int argc, char* argv[]) [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create(0, - gko::OmpExecutor::create()); + return gko::SyclExecutor::create(0, + gko::OmpExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/examples/par-ilu-convergence/par-ilu-convergence.cpp b/examples/par-ilu-convergence/par-ilu-convergence.cpp index 93e32422a7e..8ff1c3464c9 100644 --- a/examples/par-ilu-convergence/par-ilu-convergence.cpp +++ b/examples/par-ilu-convergence/par-ilu-convergence.cpp @@ -54,8 +54,8 @@ const std::map()>> [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", [] { - return gko::DpcppExecutor::create(0, gko::OmpExecutor::create()); + {"sycl", [] { + return gko::SyclExecutor::create(0, gko::OmpExecutor::create()); }}}; @@ -106,7 +106,7 @@ int main(int argc, char* argv[]) // print usage message if (argc < 2 || executors.find(argv[1]) == executors.end()) { std::cerr << "Usage: executable" - << " [] " + << " [] " "[] " "[] []\n"; return -1; diff --git a/examples/performance-debugging/performance-debugging.cpp b/examples/performance-debugging/performance-debugging.cpp index 5f036728924..d780ec2acb2 100644 --- a/examples/performance-debugging/performance-debugging.cpp +++ b/examples/performance-debugging/performance-debugging.cpp @@ -378,10 +378,10 @@ int main(int argc, char* argv[]) [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create(0, - gko::OmpExecutor::create()); + return gko::SyclExecutor::create(0, + gko::OmpExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/examples/poisson-solver/poisson-solver.cpp b/examples/poisson-solver/poisson-solver.cpp index e16f0b26968..05bc34b22ba 100644 --- a/examples/poisson-solver/poisson-solver.cpp +++ b/examples/poisson-solver/poisson-solver.cpp @@ -151,10 +151,10 @@ int main(int argc, char* argv[]) [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create(0, - gko::OmpExecutor::create()); + return gko::SyclExecutor::create(0, + gko::OmpExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/examples/preconditioned-solver/preconditioned-solver.cpp b/examples/preconditioned-solver/preconditioned-solver.cpp index b64b588c4ef..6d4ba41c1a1 100644 --- a/examples/preconditioned-solver/preconditioned-solver.cpp +++ b/examples/preconditioned-solver/preconditioned-solver.cpp @@ -76,10 +76,10 @@ int main(int argc, char* argv[]) [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create(0, - gko::OmpExecutor::create()); + return gko::SyclExecutor::create(0, + gko::OmpExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/examples/preconditioner-export/preconditioner-export.cpp b/examples/preconditioner-export/preconditioner-export.cpp index 81aeece1cb1..a2a6d6339ef 100644 --- a/examples/preconditioner-export/preconditioner-export.cpp +++ b/examples/preconditioner-export/preconditioner-export.cpp @@ -55,8 +55,8 @@ const std::map()>> return gko::HipExecutor::create( 0, gko::ReferenceExecutor::create()); }}, - {"dpcpp", [] { - return gko::DpcppExecutor::create( + {"sycl", [] { + return gko::SyclExecutor::create( 0, gko::ReferenceExecutor::create()); }}}; @@ -89,7 +89,7 @@ int main(int argc, char* argv[]) // print usage message if (argc < 2 || executors.find(argv[1]) == executors.end()) { std::cerr << "Usage: executable" - << " [] " + << " [] " "[]\n"; std::cerr << "Jacobi parameters: [] [] " diff --git a/examples/simple-solver-logging/simple-solver-logging.cpp b/examples/simple-solver-logging/simple-solver-logging.cpp index 02318dd7784..ca6cd0d6ba5 100644 --- a/examples/simple-solver-logging/simple-solver-logging.cpp +++ b/examples/simple-solver-logging/simple-solver-logging.cpp @@ -92,10 +92,10 @@ int main(int argc, char* argv[]) [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create(0, - gko::OmpExecutor::create()); + return gko::SyclExecutor::create(0, + gko::OmpExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/examples/simple-solver/simple-solver.cpp b/examples/simple-solver/simple-solver.cpp index 81dc9ee6d74..94f299457a5 100644 --- a/examples/simple-solver/simple-solver.cpp +++ b/examples/simple-solver/simple-solver.cpp @@ -96,10 +96,10 @@ int main(int argc, char* argv[]) [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create(0, - gko::OmpExecutor::create()); + return gko::SyclExecutor::create(0, + gko::OmpExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp b/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp index 63adfaa5571..a125c8d34e1 100644 --- a/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp +++ b/examples/three-pt-stencil-solver/three-pt-stencil-solver.cpp @@ -172,10 +172,10 @@ void solve_system(const std::string& executor_string, [] { return gko::HipExecutor::create(0, gko::OmpExecutor::create()); }}, - {"dpcpp", + {"sycl", [] { - return gko::DpcppExecutor::create(0, - gko::OmpExecutor::create()); + return gko::SyclExecutor::create(0, + gko::OmpExecutor::create()); }}, {"reference", [] { return gko::ReferenceExecutor::create(); }}}; diff --git a/hip/base/executor.hip.cpp b/hip/base/executor.hip.cpp index 8d175c0e424..af4f31febc1 100644 --- a/hip/base/executor.hip.cpp +++ b/hip/base/executor.hip.cpp @@ -173,7 +173,7 @@ void HipExecutor::raw_copy_to(const CudaExecutor* dest, size_type num_bytes, } -void HipExecutor::raw_copy_to(const DpcppExecutor* dest, size_type num_bytes, +void HipExecutor::raw_copy_to(const SyclExecutor* dest, size_type num_bytes, const void* src_ptr, void* dest_ptr) const { GKO_NOT_SUPPORTED(dest); diff --git a/hip/test/base/hip_executor.hip.cpp b/hip/test/base/hip_executor.hip.cpp index 42499384704..3feeeef10e1 100644 --- a/hip/test/base/hip_executor.hip.cpp +++ b/hip/test/base/hip_executor.hip.cpp @@ -76,7 +76,7 @@ class ExampleOperation : public gko::Operation { value = -3; } - void run(std::shared_ptr) const override + void run(std::shared_ptr) const override { value = -4; } diff --git a/include/ginkgo/core/base/executor.hpp b/include/ginkgo/core/base/executor.hpp index 01b15e2c5ef..7f91c08db0d 100644 --- a/include/ginkgo/core/base/executor.hpp +++ b/include/ginkgo/core/base/executor.hpp @@ -123,11 +123,14 @@ constexpr allocation_mode default_hip_alloc_mode = } // namespace gko +namespace gko { + + /** - * The enum class is for the dpcpp queue property. It's legal to use a binary + * The enum class is for the sycl queue property. It's legal to use a binary * or(|) operation to combine several properties. */ -enum class dpcpp_queue_property { +enum class sycl_queue_property { /** * queue executes in order */ @@ -139,14 +142,21 @@ enum class dpcpp_queue_property { enable_profiling = 2 }; -GKO_ATTRIBUTES GKO_INLINE dpcpp_queue_property operator|(dpcpp_queue_property a, - dpcpp_queue_property b) +GKO_ATTRIBUTES GKO_INLINE sycl_queue_property operator|(sycl_queue_property a, + sycl_queue_property b) { - return static_cast(static_cast(a) | - static_cast(b)); + return static_cast(static_cast(a) | + static_cast(b)); } +} // namespace gko + + +using dpcpp_queue_property [[deprecated("using gko::sycl_queue_property")]] = + gko::sycl_queue_property; + + namespace gko { @@ -206,7 +216,7 @@ class ExecutorBase; * void run(const gko::HipExecutor *exec) const override * { os_ << "HIP(" << exec->get_device_id() << ")"; } * - * void run(const gko::DpcppExecutor *exec) const override + * void run(const gko::SyclExecutor *exec) const override * { os_ << "DPC++(" << exec->get_device_id() << ")"; } * * // This is optional, if not overloaded, defaults to OmpExecutor overload @@ -237,7 +247,7 @@ class ExecutorBase; * std::cout << *omp << std::endl * << *gko::CudaExecutor::create(0, omp) << std::endl * << *gko::HipExecutor::create(0, omp) << std::endl - * << *gko::DpcppExecutor::create(0, omp) << std::endl + * << *gko::SyclExecutor::create(0, omp) << std::endl * << *gko::ReferenceExecutor::create() << std::endl; * ``` * @@ -272,7 +282,7 @@ class ExecutorBase; * .get_device_id() * << ")"; }); * [&]() { os << "DPC++(" // DPC++ closure - * << static_cast(exec) + * << static_cast(exec) * .get_device_id() * << ")"; }); * return os; @@ -352,7 +362,7 @@ class RegisteredOperation : public Operation { op_(exec); } - void run(std::shared_ptr exec) const override + void run(std::shared_ptr exec) const override { op_(exec); } @@ -382,7 +392,7 @@ RegisteredOperation make_register_operation(const char* name, * kernel when the operation is executed. * * The kernels used to bind the operation are searched in `kernels::DEV_TYPE` - * namespace, where `DEV_TYPE` is replaced by `omp`, `cuda`, `hip`, `dpcpp` and + * namespace, where `DEV_TYPE` is replaced by `omp`, `cuda`, `hip`, `sycl` and * `reference`. * * @param _name operation name @@ -412,7 +422,7 @@ RegisteredOperation make_register_operation(const char* name, * } * namespace sycl { * void my_kernel(int x) { - * // dpcpp code + * // sycl code * } * } * namespace reference { @@ -429,7 +439,7 @@ RegisteredOperation make_register_operation(const char* name, * auto omp = OmpExecutor::create(); * auto cuda = CudaExecutor::create(0, omp); * auto hip = HipExecutor::create(0, omp); - * auto dpcpp = DpcppExecutor::create(0, omp); + * auto sycl = SyclExecutor::create(0, omp); * auto ref = ReferenceExecutor::create(); * * // create the operation @@ -438,67 +448,67 @@ RegisteredOperation make_register_operation(const char* name, * omp->run(op); // run omp kernel * cuda->run(op); // run cuda kernel * hip->run(op); // run hip kernel - * dpcpp->run(op); // run DPC++ kernel + * sycl->run(op); // run DPC++ kernel * ref->run(op); // run reference kernel * } * ``` * * @ingroup Executor */ -#define GKO_REGISTER_OPERATION(_name, _kernel) \ - template \ - auto make_##_name(Args&&... args) \ - { \ - return ::gko::detail::make_register_operation( \ - #_kernel, [&args...](auto exec) { \ - using exec_type = decltype(exec); \ - if (std::is_same< \ - exec_type, \ - std::shared_ptr>:: \ - value) { \ - ::gko::kernels::reference::_kernel( \ - std::dynamic_pointer_cast< \ - const ::gko::ReferenceExecutor>(exec), \ - std::forward(args)...); \ - } else if (std::is_same< \ - exec_type, \ - std::shared_ptr>:: \ - value) { \ - ::gko::kernels::omp::_kernel( \ - std::dynamic_pointer_cast( \ - exec), \ - std::forward(args)...); \ - } else if (std::is_same< \ - exec_type, \ - std::shared_ptr>:: \ - value) { \ - ::gko::kernels::cuda::_kernel( \ - std::dynamic_pointer_cast( \ - exec), \ - std::forward(args)...); \ - } else if (std::is_same< \ - exec_type, \ - std::shared_ptr>:: \ - value) { \ - ::gko::kernels::hip::_kernel( \ - std::dynamic_pointer_cast( \ - exec), \ - std::forward(args)...); \ - } else if (std::is_same< \ - exec_type, \ - std::shared_ptr>:: \ - value) { \ - ::gko::kernels::dpcpp::_kernel( \ - std::dynamic_pointer_cast( \ - exec), \ - std::forward(args)...); \ - } else { \ - GKO_NOT_IMPLEMENTED; \ - } \ - }); \ - } \ - static_assert(true, \ - "This assert is used to counter the false positive extra " \ +#define GKO_REGISTER_OPERATION(_name, _kernel) \ + template \ + auto make_##_name(Args&&... args) \ + { \ + return ::gko::detail::make_register_operation( \ + #_kernel, [&args...](auto exec) { \ + using exec_type = decltype(exec); \ + if (std::is_same< \ + exec_type, \ + std::shared_ptr>:: \ + value) { \ + ::gko::kernels::reference::_kernel( \ + std::dynamic_pointer_cast< \ + const ::gko::ReferenceExecutor>(exec), \ + std::forward(args)...); \ + } else if (std::is_same< \ + exec_type, \ + std::shared_ptr>:: \ + value) { \ + ::gko::kernels::omp::_kernel( \ + std::dynamic_pointer_cast( \ + exec), \ + std::forward(args)...); \ + } else if (std::is_same< \ + exec_type, \ + std::shared_ptr>:: \ + value) { \ + ::gko::kernels::cuda::_kernel( \ + std::dynamic_pointer_cast( \ + exec), \ + std::forward(args)...); \ + } else if (std::is_same< \ + exec_type, \ + std::shared_ptr>:: \ + value) { \ + ::gko::kernels::hip::_kernel( \ + std::dynamic_pointer_cast( \ + exec), \ + std::forward(args)...); \ + } else if (std::is_same< \ + exec_type, \ + std::shared_ptr>:: \ + value) { \ + ::gko::kernels::sycl::_kernel( \ + std::dynamic_pointer_cast( \ + exec), \ + std::forward(args)...); \ + } else { \ + GKO_NOT_IMPLEMENTED; \ + } \ + }); \ + } \ + static_assert(true, \ + "This assert is used to counter the false positive extra " \ "semi-colon warnings") @@ -566,7 +576,7 @@ RegisteredOperation make_register_operation(const char* name, * operations executed on the NVIDIA GPU accelerator; * + HipExecutor specifies that the data should be stored and the * operations executed on either an NVIDIA or AMD GPU accelerator; - * + DpcppExecutor specifies that the data should be stored and the + * + SyclExecutor specifies that the data should be stored and the * operations executed on an hardware supporting DPC++; * + ReferenceExecutor executes a non-optimized reference implementation, * which can be used to debug the library. @@ -670,21 +680,21 @@ class Executor : public log::EnableLogging { * @tparam ClosureOmp type of op_omp * @tparam ClosureCuda type of op_cuda * @tparam ClosureHip type of op_hip - * @tparam ClosureDpcpp type of op_dpcpp + * @tparam ClosureSycl type of op_sycl * * @param op_omp functor to run in case of a OmpExecutor or * ReferenceExecutor * @param op_cuda functor to run in case of a CudaExecutor * @param op_hip functor to run in case of a HipExecutor - * @param op_dpcpp functor to run in case of a DpcppExecutor + * @param op_sycl functor to run in case of a SyclExecutor */ template + typename ClosureSycl> void run(const ClosureOmp& op_omp, const ClosureCuda& op_cuda, - const ClosureHip& op_hip, const ClosureDpcpp& op_dpcpp) const + const ClosureHip& op_hip, const ClosureSycl& op_sycl) const { - LambdaOperation op( - op_omp, op_cuda, op_hip, op_dpcpp); + LambdaOperation op( + op_omp, op_cuda, op_hip, op_sycl); this->run(op); } @@ -906,7 +916,7 @@ class Executor : public log::EnableLogging { int device_id = -1; /** - * The type of the device, relevant only for the dpcpp executor. + * The type of the device, relevant only for the sycl executor. */ std::string device_type; @@ -920,7 +930,7 @@ class Executor : public log::EnableLogging { * * @note In CPU executors this is equivalent to the number of cores. * In CUDA and HIP executors this is the number of Streaming - * Multiprocessors. In DPCPP, this is the number of computing + * Multiprocessors. In SYCL, this is the number of computing * units. */ int num_computing_units = -1; @@ -932,7 +942,7 @@ class Executor : public log::EnableLogging { * per core. * In CUDA and HIP executors this is the number of warps * per SM. - * In DPCPP, this is currently number of hardware threads per eu. + * In SYCL, this is currently number of hardware threads per eu. * If the device does not support, it will be set as 1. * (TODO: check) */ @@ -943,8 +953,8 @@ class Executor : public log::EnableLogging { * * @note In CPU executors this is invalid. * In CUDA and HIP executors this is invalid. - * In DPCPP, this is the subgroup sizes for the device associated - * with the dpcpp executor. + * In SYCL, this is the subgroup sizes for the device associated + * with the sycl executor. */ std::vector subgroup_sizes{}; @@ -953,8 +963,8 @@ class Executor : public log::EnableLogging { * * @note In CPU executors this is invalid. * In CUDA and HIP executors this is the warp size. - * In DPCPP, this is the maximum subgroup size for the device - * associated with the dpcpp executor. + * In SYCL, this is the maximum subgroup size for the device + * associated with the sycl executor. */ int max_subgroup_size = -1; @@ -964,9 +974,9 @@ class Executor : public log::EnableLogging { * @note In CPU executors this is invalid. * In CUDA and HIP executors this is the maximum number of threads * in each dimension of a block (x, y, z). - * In DPCPP, this is the maximum number of workitems, in each + * In SYCL, this is the maximum number of workitems, in each * direction of the workgroup for the device associated with the - * dpcpp executor. + * sycl executor. */ std::vector max_workitem_sizes{}; @@ -976,7 +986,7 @@ class Executor : public log::EnableLogging { * @note In CPU executors this is invalid. * In CUDA and HIP executors this is the maximum number of threads * in block. - * In DPCPP, this is the maximum number of workitems that are + * In SYCL, this is the maximum number of workitems that are * permitted in a workgroup. */ int max_workgroup_size; @@ -1001,7 +1011,7 @@ class Executor : public log::EnableLogging { /** * The host processing units closest to the device. * - * @note Currently only relevant for CUDA, HIP and DPCPP executors. + * @note Currently only relevant for CUDA, HIP and SYCL executors. * [Definition from hwloc * documentation:](https://www.open-mpi.org/projects/hwloc/doc/v2.4.0/a00350.php) * "The smallest processing element that can be represented by a @@ -1123,16 +1133,16 @@ class Executor : public log::EnableLogging { * * The first object is called by the OmpExecutor, the second one by the * CudaExecutor, the third one by the HipExecutor and the last one by - * the DpcppExecutor. When run on the + * the SyclExecutor. When run on the * ReferenceExecutor, the implementation will launch the OpenMP version. * * @tparam ClosureOmp the type of the first functor * @tparam ClosureCuda the type of the second functor * @tparam ClosureHip the type of the third functor - * @tparam ClosureDpcpp the type of the fourth functor + * @tparam ClosureSycl the type of the fourth functor */ template + typename ClosureSycl> class LambdaOperation : public Operation { public: /** @@ -1142,15 +1152,15 @@ class Executor : public log::EnableLogging { * and ReferenceExecutor * @param op_cuda a functor object which will be called by CudaExecutor * @param op_hip a functor object which will be called by HipExecutor - * @param op_dpcpp a functor object which will be called by - * DpcppExecutor + * @param op_sycl a functor object which will be called by + * SyclExecutor */ LambdaOperation(const ClosureOmp& op_omp, const ClosureCuda& op_cuda, - const ClosureHip& op_hip, const ClosureDpcpp& op_dpcpp) + const ClosureHip& op_hip, const ClosureSycl& op_sycl) : op_omp_(op_omp), op_cuda_(op_cuda), op_hip_(op_hip), - op_dpcpp_(op_dpcpp) + op_sycl_(op_sycl) {} void run(std::shared_ptr) const override @@ -1173,16 +1183,16 @@ class Executor : public log::EnableLogging { op_hip_(); } - void run(std::shared_ptr) const override + void run(std::shared_ptr) const override { - op_dpcpp_(); + op_sycl_(); } private: ClosureOmp op_omp_; ClosureCuda op_cuda_; ClosureHip op_hip_; - ClosureDpcpp op_dpcpp_; + ClosureSycl op_sycl_; }; }; @@ -1423,7 +1433,7 @@ class OmpExecutor : public detail::ExecutorBase, GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(CudaExecutor, false); - bool verify_memory_to(const DpcppExecutor* dest_exec) const override; + bool verify_memory_to(const SyclExecutor* dest_exec) const override; std::shared_ptr alloc_; }; @@ -1490,7 +1500,7 @@ class ReferenceExecutor : public OmpExecutor { GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(OmpExecutor, false); - GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(DpcppExecutor, false); + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(SyclExecutor, false); GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(CudaExecutor, false); @@ -1691,7 +1701,7 @@ class CudaExecutor : public detail::ExecutorBase, GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(ReferenceExecutor, false); - GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(DpcppExecutor, false); + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(SyclExecutor, false); bool verify_memory_to(const HipExecutor* dest_exec) const override; @@ -1889,7 +1899,7 @@ class HipExecutor : public detail::ExecutorBase, GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(ReferenceExecutor, false); - GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(DpcppExecutor, false); + GKO_DEFAULT_OVERRIDE_VERIFY_MEMORY(SyclExecutor, false); bool verify_memory_to(const CudaExecutor* dest_exec) const override; @@ -1919,27 +1929,27 @@ using DefaultExecutor = HipExecutor; /** * This is the Executor subclass which represents a DPC++ enhanced device. * - * @ingroup exec_dpcpp + * @ingroup exec_sycl * @ingroup Executor */ -class DpcppExecutor : public detail::ExecutorBase, - public std::enable_shared_from_this { - friend class detail::ExecutorBase; +class SyclExecutor : public detail::ExecutorBase, + public std::enable_shared_from_this { + friend class detail::ExecutorBase; public: /** - * Creates a new DpcppExecutor. + * Creates a new SyclExecutor. * - * @param device_id the DPCPP device id of this device + * @param device_id the SYCL device id of this device * @param master an executor on the host that is used to invoke the device * kernels * @param device_type a string representing the type of device to consider * (accelerator, cpu, gpu or all). */ - static std::shared_ptr create( + static std::shared_ptr create( int device_id, std::shared_ptr master, std::string device_type = "all", - dpcpp_queue_property property = dpcpp_queue_property::in_order); + sycl_queue_property property = sycl_queue_property::in_order); std::shared_ptr get_master() noexcept override; @@ -1950,9 +1960,9 @@ class DpcppExecutor : public detail::ExecutorBase, scoped_device_id_guard get_scoped_device_id_guard() const override; /** - * Get the DPCPP device id of the device associated to this executor. + * Get the SYCL device id of the device associated to this executor. * - * @return the DPCPP device id of the device associated to this executor + * @return the SYCL device id of the device associated to this executor */ int get_device_id() const noexcept { @@ -2041,12 +2051,11 @@ class DpcppExecutor : public detail::ExecutorBase, protected: void set_device_property( - dpcpp_queue_property property = dpcpp_queue_property::in_order); + sycl_queue_property property = sycl_queue_property::in_order); - DpcppExecutor( - int device_id, std::shared_ptr master, - std::string device_type = "all", - dpcpp_queue_property property = dpcpp_queue_property::in_order) + SyclExecutor(int device_id, std::shared_ptr master, + std::string device_type = "all", + sycl_queue_property property = sycl_queue_property::in_order) : master_(master) { std::for_each(device_type.begin(), device_type.end(), @@ -2072,7 +2081,7 @@ class DpcppExecutor : public detail::ExecutorBase, bool verify_memory_to(const OmpExecutor* dest_exec) const override; - bool verify_memory_to(const DpcppExecutor* dest_exec) const override; + bool verify_memory_to(const SyclExecutor* dest_exec) const override; private: std::shared_ptr master_; @@ -2083,16 +2092,19 @@ class DpcppExecutor : public detail::ExecutorBase, }; +using DpcppExecutor [[deprecated("using SyclExecutor")]] = SyclExecutor; + + namespace kernels { namespace sycl { -using DefaultExecutor = DpcppExecutor; +using DefaultExecutor = SyclExecutor; } // namespace sycl } // namespace kernels namespace kernels { namespace dpcpp { -using DefaultExecutor [[deprecated("using sycl namespace")]] = DpcppExecutor; +using DefaultExecutor [[deprecated("using sycl namespace")]] = SyclExecutor; } // namespace dpcpp } // namespace kernels diff --git a/include/ginkgo/core/base/scoped_device_id_guard.hpp b/include/ginkgo/core/base/scoped_device_id_guard.hpp index 6b236a6a37e..be2ea307399 100644 --- a/include/ginkgo/core/base/scoped_device_id_guard.hpp +++ b/include/ginkgo/core/base/scoped_device_id_guard.hpp @@ -44,7 +44,7 @@ class OmpExecutor; class ReferenceExecutor; class CudaExecutor; class HipExecutor; -class DpcppExecutor; +class SyclExecutor; namespace detail { @@ -84,7 +84,7 @@ class generic_scoped_device_id_guard { * block, when run with multiple devices. Depending on the executor it will * record the current device id and set the device id to the one being passed * in. After the scope has been exited, the destructor sets the device_id back - * to the one before entering the scope. The OmpExecutor and DpcppExecutor don't + * to the one before entering the scope. The OmpExecutor and SyclExecutor don't * require setting an device id, so in those cases, the class is a no-op. * * The device id scope has to be constructed from a executor with concrete type @@ -144,14 +144,14 @@ class scoped_device_id_guard { scoped_device_id_guard(const HipExecutor* exec, int device_id); /** - * Create a scoped device id from an DpcppExecutor. + * Create a scoped device id from an SyclExecutor. * * The resulting object will be a noop. * * @param exec Not used. * @param device_id Not used. */ - scoped_device_id_guard(const DpcppExecutor* exec, int device_id); + scoped_device_id_guard(const SyclExecutor* exec, int device_id); scoped_device_id_guard() = default; diff --git a/include/ginkgo/core/base/timer.hpp b/include/ginkgo/core/base/timer.hpp index f2732f4e6d9..fc16bab7fcf 100644 --- a/include/ginkgo/core/base/timer.hpp +++ b/include/ginkgo/core/base/timer.hpp @@ -65,7 +65,7 @@ class time_point { friend class CpuTimer; friend class CudaTimer; friend class HipTimer; - friend class DpcppTimer; + friend class SyclTimer; /** What kind of timer was used to generate the time point? */ enum class type { @@ -76,14 +76,14 @@ class time_point { /** hipEvent-based timer */ hip, /** sycl::event-based timer */ - dpcpp, + sycl, }; type type_; union data_union { CUevent_st* cuda_event; GKO_HIP_EVENT_STRUCT* hip_event; - sycl::event* dpcpp_event; + sycl::event* sycl_event; std::chrono::steady_clock::time_point chrono; data_union(); @@ -159,8 +159,8 @@ class Timer { * @param exec the executor to create a Timer for * * @return CpuTimer for ReferenceExecutor and OmpExecutor, CudaTimer for - * CudaExecutor, HipTimer for HipExecutor or DpcppTimer for - * DpcppExecutor. + * CudaExecutor, HipTimer for HipExecutor or SyclTimer for + * SyclExecutor. */ static std::unique_ptr create_for_executor( std::shared_ptr exec); @@ -226,8 +226,8 @@ class HipTimer : public Timer { }; -/** A timer using kernels for timing on a DpcppExecutor in profiling mode. */ -class DpcppTimer : public Timer { +/** A timer using kernels for timing on a SyclExecutor in profiling mode. */ +class SyclTimer : public Timer { public: void record(time_point& time) override; @@ -236,16 +236,19 @@ class DpcppTimer : public Timer { std::chrono::nanoseconds difference_async(const time_point& start, const time_point& stop) override; - DpcppTimer(std::shared_ptr exec); + SyclTimer(std::shared_ptr exec); protected: void init_time_point(time_point& time) override; private: - std::shared_ptr exec_; + std::shared_ptr exec_; }; +using DpcppTimer [[deprecated]] = SyclTimer; + + } // namespace gko diff --git a/include/ginkgo/core/base/types.hpp b/include/ginkgo/core/base/types.hpp index 68b5da6e3eb..6182a6a8142 100644 --- a/include/ginkgo/core/base/types.hpp +++ b/include/ginkgo/core/base/types.hpp @@ -415,7 +415,7 @@ GKO_ATTRIBUTES constexpr bool operator!=(precision_reduction x, #define GKO_ENABLE_FOR_ALL_EXECUTORS(_enable_macro) \ _enable_macro(OmpExecutor, omp); \ _enable_macro(HipExecutor, hip); \ - _enable_macro(DpcppExecutor, dpcpp); \ + _enable_macro(SyclExecutor, sycl); \ _enable_macro(CudaExecutor, cuda) diff --git a/include/ginkgo/core/base/version.hpp b/include/ginkgo/core/base/version.hpp index caa0cbe0761..198982a8a13 100644 --- a/include/ginkgo/core/base/version.hpp +++ b/include/ginkgo/core/base/version.hpp @@ -154,7 +154,7 @@ inline std::ostream& operator<<(std::ostream& os, const version& ver) * earlier version may have this implemented or fixed in a later version). * * This structure provides versions of different parts of Ginkgo: the headers, - * the core and the kernel modules (reference, OpenMP, CUDA, HIP, DPCPP). + * the core and the kernel modules (reference, OpenMP, CUDA, HIP, SYCL). * To obtain an instance of version_info filled with information about the * current version of Ginkgo, call the version_info::get() static method. */ @@ -213,11 +213,19 @@ class version_info { version hip_version; /** - * Contains version information of the DPC++ module. + * Contains version information of the SYCL module. * - * This is the version of the static/shared library called "ginkgo_dpcpp". + * It's deprecated, please use sycl_version instead. */ - version dpcpp_version; + [[deprecated("using sycl_version")]] version dpcpp_version; + + + /** + * Contains version information of the SYCL module. + * + * This is the version of the static/shared library called "ginkgo_sycl". + */ + version sycl_version; private: static constexpr version get_header_version() noexcept @@ -236,7 +244,7 @@ class version_info { static version get_hip_version() noexcept; - static version get_dpcpp_version() noexcept; + static version get_sycl_version() noexcept; version_info() : header_version{get_header_version()}, @@ -245,7 +253,8 @@ class version_info { omp_version{get_omp_version()}, cuda_version{get_cuda_version()}, hip_version{get_hip_version()}, - dpcpp_version{get_dpcpp_version()} + dpcpp_version{get_sycl_version()}, + sycl_version{get_sycl_version()} {} }; diff --git a/include/ginkgo/core/matrix/csr.hpp b/include/ginkgo/core/matrix/csr.hpp index 611e5d33c64..30bb16429f9 100644 --- a/include/ginkgo/core/matrix/csr.hpp +++ b/include/ginkgo/core/matrix/csr.hpp @@ -401,13 +401,13 @@ class Csr : public EnableLinOp>, {} /** - * Creates a load_balance strategy with DPCPP executor. + * Creates a load_balance strategy with SYCL executor. * - * @param exec the DPCPP executor + * @param exec the SYCL executor * * @note TODO: porting - we hardcode the subgroup size is 32 */ - load_balance(std::shared_ptr exec) + load_balance(std::shared_ptr exec) : load_balance(exec->get_num_subgroups(), 32, false, "intel") {} @@ -589,13 +589,13 @@ class Csr : public EnableLinOp>, {} /** - * Creates an automatical strategy with Dpcpp executor. + * Creates an automatical strategy with Sycl executor. * - * @param exec the Dpcpp executor + * @param exec the Sycl executor * * @note TODO: porting - we hardcode the subgroup size is 32 */ - automatical(std::shared_ptr exec) + automatical(std::shared_ptr exec) : automatical(exec->get_num_subgroups(), 32, false, "intel") {} @@ -1153,14 +1153,14 @@ class Csr : public EnableLinOp>, { auto cuda_exec = std::dynamic_pointer_cast(exec); auto hip_exec = std::dynamic_pointer_cast(exec); - auto dpcpp_exec = std::dynamic_pointer_cast(exec); + auto sycl_exec = std::dynamic_pointer_cast(exec); std::shared_ptr new_strategy; if (cuda_exec) { new_strategy = std::make_shared(cuda_exec); } else if (hip_exec) { new_strategy = std::make_shared(hip_exec); - } else if (dpcpp_exec) { - new_strategy = std::make_shared(dpcpp_exec); + } else if (sycl_exec) { + new_strategy = std::make_shared(sycl_exec); } else { new_strategy = std::make_shared(); } @@ -1186,8 +1186,8 @@ class Csr : public EnableLinOp>, auto cuda_exec = std::dynamic_pointer_cast(rexec); auto hip_exec = std::dynamic_pointer_cast(rexec); - auto dpcpp_exec = - std::dynamic_pointer_cast(rexec); + auto sycl_exec = + std::dynamic_pointer_cast(rexec); auto lb = dynamic_cast(strat); if (cuda_exec) { if (lb) { @@ -1207,14 +1207,14 @@ class Csr : public EnableLinOp>, new_strat = std::make_shared( hip_exec); } - } else if (dpcpp_exec) { + } else if (sycl_exec) { if (lb) { new_strat = std::make_shared( - dpcpp_exec); + sycl_exec); } else { new_strat = std::make_shared( - dpcpp_exec); + sycl_exec); } } else { // Try to preserve this executor's configuration @@ -1224,8 +1224,8 @@ class Csr : public EnableLinOp>, auto this_hip_exec = std::dynamic_pointer_cast( this->get_executor()); - auto this_dpcpp_exec = - std::dynamic_pointer_cast( + auto this_sycl_exec = + std::dynamic_pointer_cast( this->get_executor()); if (this_cuda_exec) { if (lb) { @@ -1247,15 +1247,15 @@ class Csr : public EnableLinOp>, std::make_shared( this_hip_exec); } - } else if (this_dpcpp_exec) { + } else if (this_sycl_exec) { if (lb) { new_strat = std::make_shared( - this_dpcpp_exec); + this_sycl_exec); } else { new_strat = std::make_shared( - this_dpcpp_exec); + this_sycl_exec); } } else { // FIXME: this changes strategies. diff --git a/omp/CMakeLists.txt b/omp/CMakeLists.txt index c689ffc42f3..4e17a9517f1 100644 --- a/omp/CMakeLists.txt +++ b/omp/CMakeLists.txt @@ -79,7 +79,7 @@ target_compile_options(ginkgo_omp PRIVATE "${GINKGO_COMPILER_FLAGS}") target_link_libraries(ginkgo_omp PRIVATE ginkgo_cuda) # Need to link against ginkgo_hip for the `raw_copy_to(HipExecutor ...)` method target_link_libraries(ginkgo_omp PRIVATE ginkgo_hip) -# Need to link against ginkgo_dpcpp for the `raw_copy_to(DpcppExecutor ...)` method +# Need to link against ginkgo_dpcpp for the `raw_copy_to(SyclExecutor ...)` method target_link_libraries(ginkgo_omp PRIVATE ginkgo_dpcpp) target_link_libraries(ginkgo_omp PUBLIC ginkgo_device) diff --git a/test/base/executor.cpp b/test/base/executor.cpp index c51fcb8c1f1..cb190a4515f 100644 --- a/test/base/executor.cpp +++ b/test/base/executor.cpp @@ -79,9 +79,9 @@ class ExampleOperation : public gko::Operation { { value = hip::value; } - void run(std::shared_ptr) const override + void run(std::shared_ptr) const override { - value = dpcpp::value; + value = sycl::value; } void run(std::shared_ptr) const override { @@ -128,9 +128,9 @@ TEST_F(Executor, RunsCorrectLambdaOperation) auto omp_lambda = [&value]() { value = omp::value; }; auto cuda_lambda = [&value]() { value = cuda::value; }; auto hip_lambda = [&value]() { value = hip::value; }; - auto dpcpp_lambda = [&value]() { value = dpcpp::value; }; + auto sycl_lambda = [&value]() { value = sycl::value; }; - exec->run(omp_lambda, cuda_lambda, hip_lambda, dpcpp_lambda); + exec->run(omp_lambda, cuda_lambda, hip_lambda, sycl_lambda); ASSERT_EQ(EXEC_NAMESPACE::value, value); } diff --git a/test/base/timer.cpp b/test/base/timer.cpp index 39b06a873df..95295ce34ff 100644 --- a/test/base/timer.cpp +++ b/test/base/timer.cpp @@ -50,12 +50,12 @@ class Timer : public CommonTestFixture { Timer() { // require profiling capability - const auto property = dpcpp_queue_property::in_order | - dpcpp_queue_property::enable_profiling; - if (gko::DpcppExecutor::get_num_devices("gpu") > 0) { - exec = gko::DpcppExecutor::create(0, ref, "gpu", property); - } else if (gko::DpcppExecutor::get_num_devices("cpu") > 0) { - exec = gko::DpcppExecutor::create(0, ref, "cpu", property); + const auto property = gko::sycl_queue_property::in_order | + gko::sycl_queue_property::enable_profiling; + if (gko::SyclExecutor::get_num_devices("gpu") > 0) { + exec = gko::SyclExecutor::create(0, ref, "gpu", property); + } else if (gko::SyclExecutor::get_num_devices("cpu") > 0) { + exec = gko::SyclExecutor::create(0, ref, "cpu", property); } else { throw std::runtime_error{"No suitable DPC++ devices"}; } diff --git a/test/log/profiler_hook.cpp b/test/log/profiler_hook.cpp index 0d16d5d42ca..f0dce3e4955 100644 --- a/test/log/profiler_hook.cpp +++ b/test/log/profiler_hook.cpp @@ -48,12 +48,12 @@ class ProfilerHook : public CommonTestFixture { ProfilerHook() { // require profiling capability - const auto property = dpcpp_queue_property::in_order | - dpcpp_queue_property::enable_profiling; - if (gko::DpcppExecutor::get_num_devices("gpu") > 0) { - exec = gko::DpcppExecutor::create(0, ref, "gpu", property); - } else if (gko::DpcppExecutor::get_num_devices("cpu") > 0) { - exec = gko::DpcppExecutor::create(0, ref, "cpu", property); + const auto property = gko::sycl_queue_property::in_order | + gko::sycl_queue_property::enable_profiling; + if (gko::SyclExecutor::get_num_devices("gpu") > 0) { + exec = gko::SyclExecutor::create(0, ref, "gpu", property); + } else if (gko::SyclExecutor::get_num_devices("cpu") > 0) { + exec = gko::SyclExecutor::create(0, ref, "cpu", property); } else { throw std::runtime_error{"No suitable DPC++ devices"}; } diff --git a/test/utils/executor.hpp b/test/utils/executor.hpp index 200f4652644..dc22b9affd0 100644 --- a/test/utils/executor.hpp +++ b/test/utils/executor.hpp @@ -128,12 +128,12 @@ inline void init_executor(std::shared_ptr ref, inline void init_executor(std::shared_ptr ref, - std::shared_ptr& exec) + std::shared_ptr& exec) { - if (gko::DpcppExecutor::get_num_devices("gpu") > 0) { - exec = gko::DpcppExecutor::create(0, ref, "gpu"); - } else if (gko::DpcppExecutor::get_num_devices("cpu") > 0) { - exec = gko::DpcppExecutor::create(0, ref, "cpu"); + if (gko::SyclExecutor::get_num_devices("gpu") > 0) { + exec = gko::SyclExecutor::create(0, ref, "gpu"); + } else if (gko::SyclExecutor::get_num_devices("cpu") > 0) { + exec = gko::SyclExecutor::create(0, ref, "cpu"); } else { throw std::runtime_error{"No suitable DPC++ devices"}; } diff --git a/test/utils/mpi/executor.hpp b/test/utils/mpi/executor.hpp index d8c94e01804..896e0117b80 100644 --- a/test/utils/mpi/executor.hpp +++ b/test/utils/mpi/executor.hpp @@ -92,17 +92,17 @@ inline void init_executor(std::shared_ptr ref, inline void init_executor(std::shared_ptr ref, - std::shared_ptr& exec) + std::shared_ptr& exec) { - auto num_gpu_devices = gko::DpcppExecutor::get_num_devices("gpu"); - auto num_cpu_devices = gko::DpcppExecutor::get_num_devices("cpu"); + auto num_gpu_devices = gko::SyclExecutor::get_num_devices("gpu"); + auto num_cpu_devices = gko::SyclExecutor::get_num_devices("cpu"); if (num_gpu_devices > 0) { - exec = gko::DpcppExecutor::create( + exec = gko::SyclExecutor::create( gko::experimental::mpi::map_rank_to_device_id(MPI_COMM_WORLD, num_gpu_devices), ref, "gpu"); } else if (num_cpu_devices > 0) { - exec = gko::DpcppExecutor::create( + exec = gko::SyclExecutor::create( gko::experimental::mpi::map_rank_to_device_id(MPI_COMM_WORLD, num_cpu_devices), ref, "cpu");