diff --git a/onedal/datatypes/_data_conversion.py b/onedal/datatypes/_data_conversion.py index d1dedba81c..ab0d2887a3 100644 --- a/onedal/datatypes/_data_conversion.py +++ b/onedal/datatypes/_data_conversion.py @@ -21,15 +21,14 @@ from daal4py.sklearn._utils import make2d from onedal import _backend, _is_dpc_backend +from .._device_offload import dpctl_available, dpnp_available from ..utils import _is_csr -try: - import dpctl +if dpctl_available: import dpctl.tensor as dpt - dpctl_available = dpctl.__version__ >= "0.14" -except ImportError: - dpctl_available = False +if dpnp_available: + import dpnp def _apply_and_pass(func, *args): @@ -38,20 +37,39 @@ def _apply_and_pass(func, *args): return tuple(map(func, args)) -def from_table(*args): - return _apply_and_pass(_backend.from_table, *args) - - -def convert_one_to_table(arg): - if dpctl_available: - if isinstance(arg, dpt.usm_ndarray): - return _backend.dpctl_to_table(arg) +# TODO: +# add warnings if no dpc backend. +# TODO: +# sparse for sua data. +# TODO: +# update it for each of the datafrmae format. +# TODO: +# update func use with args and kwargs with _apply_and_pass. +def convert_one_from_table(table, sua_iface=None, xp=None): + # Currently only `__sycl_usm_array_interface__` protocol used to + # convert into dpnp/dpctl tensors. + if sua_iface: + return xp.asarray(table) + return _backend.from_table(table) + + +# TODO: +# add warnings if no dpc backend. +# TODO: +# sparse for sua data. +def convert_one_to_table(arg, sua_iface=None): + if sua_iface and _is_dpc_backend: + return _backend.sua_iface_to_table(arg) if not _is_csr(arg): arg = make2d(arg) return _backend.to_table(arg) +def from_table(*args): + return _apply_and_pass(convert_one_from_table, *args) + + def to_table(*args): return _apply_and_pass(convert_one_to_table, *args) diff --git a/onedal/datatypes/data_conversion.cpp b/onedal/datatypes/data_conversion.cpp index 5e46810248..ad9832da8b 100644 --- a/onedal/datatypes/data_conversion.cpp +++ b/onedal/datatypes/data_conversion.cpp @@ -23,7 +23,7 @@ #include "oneapi/dal/table/detail/homogen_utils.hpp" #include "onedal/datatypes/data_conversion.hpp" -#include "onedal/datatypes/numpy_helpers.hpp" +#include "onedal/datatypes/utils/numpy_helpers.hpp" #include "onedal/version.hpp" #if ONEDAL_VERSION <= 20230100 diff --git a/onedal/datatypes/data_conversion_dpctl.cpp b/onedal/datatypes/data_conversion_dpctl.cpp deleted file mode 100644 index cbdf4725da..0000000000 --- a/onedal/datatypes/data_conversion_dpctl.cpp +++ /dev/null @@ -1,225 +0,0 @@ -/******************************************************************************* -* Copyright 2023 Intel Corporation -* -* Licensed under the Apache License, Version 2.0 (the "License"); -* you may not use this file except in compliance with the License. -* You may obtain a copy of the License at -* -* http://www.apache.org/licenses/LICENSE-2.0 -* -* Unless required by applicable law or agreed to in writing, software -* distributed under the License is distributed on an "AS IS" BASIS, -* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -* See the License for the specific language governing permissions and -* limitations under the License. -*******************************************************************************/ - -#ifdef ONEDAL_DPCTL_INTEGRATION -#define NO_IMPORT_ARRAY - -#include -#include -#include - -#include "oneapi/dal/table/homogen.hpp" -#include "oneapi/dal/table/detail/homogen_utils.hpp" - -#include "onedal/datatypes/data_conversion_dpctl.hpp" -#include "onedal/datatypes/numpy_helpers.hpp" - -#include "dpctl4pybind11.hpp" - -namespace oneapi::dal::python { - -void report_problem_from_dptensor(const char* clarification) { - constexpr const char* const base_message = "Unable to convert from dptensor"; - - std::string message{ base_message }; - message += std::string{ clarification }; - throw std::invalid_argument{ message }; -} - -std::int64_t get_and_check_dptensor_ndim(const dpctl::tensor::usm_ndarray& tensor) { - constexpr const char* const err_message = ": only 1D & 2D tensors are allowed"; - - const auto ndim = dal::detail::integral_cast(tensor.get_ndim()); - if ((ndim != 1) && (ndim != 2)) - report_problem_from_dptensor(err_message); - return ndim; -} - -auto get_dptensor_shape(const dpctl::tensor::usm_ndarray& tensor) { - const auto ndim = get_and_check_dptensor_ndim(tensor); - std::int64_t row_count, col_count; - if (ndim == 1l) { - row_count = dal::detail::integral_cast(tensor.get_shape(0)); - col_count = 1l; - } - else { - row_count = dal::detail::integral_cast(tensor.get_shape(0)); - col_count = dal::detail::integral_cast(tensor.get_shape(1)); - } - - return std::make_pair(row_count, col_count); -} - -auto get_dptensor_layout(const dpctl::tensor::usm_ndarray& tensor) { - const auto ndim = get_and_check_dptensor_ndim(tensor); - const bool is_c_cont = tensor.is_c_contiguous(); - const bool is_f_cont = tensor.is_f_contiguous(); - - if (ndim == 1l) { - //if (!is_c_cont || !is_f_cont) report_problem_from_dptensor( - // ": 1D array should be contiguous both as C-order and F-order"); - return dal::data_layout::row_major; - } - else { - //if (!is_c_cont || !is_f_cont) report_problem_from_dptensor( - // ": 2D array should be contiguous at least by one axis"); - return is_c_cont ? dal::data_layout::row_major : dal::data_layout::column_major; - } -} - -template -dal::table convert_to_homogen_impl(py::object obj, dpctl::tensor::usm_ndarray& tensor) { - const dpctl::tensor::usm_ndarray* const ptr = &tensor; - const auto deleter = [obj](const Type*) { - obj.dec_ref(); - }; - const auto [r_count, c_count] = get_dptensor_shape(tensor); - const auto layout = get_dptensor_layout(tensor); - const auto* data = tensor.get_data(); - const auto queue = tensor.get_queue(); - - auto res = dal::homogen_table(queue, - data, - r_count, - c_count, // - deleter, - std::vector{}, - layout); - - obj.inc_ref(); - - return res; -} - -dal::table convert_from_dptensor(py::object obj) { - auto tensor = pybind11::cast(obj); - - const auto type = tensor.get_typenum(); - - dal::table res{}; - -#define MAKE_HOMOGEN_TABLE(CType) \ - res = convert_to_homogen_impl(obj, tensor); - - SET_NPY_FEATURE(type, - MAKE_HOMOGEN_TABLE, // - report_problem_from_dptensor(": unknown data type")); - -#undef MAKE_HOMOGEN_TABLE - - return res; -} - -void report_problem_to_dptensor(const char* clarification) { - constexpr const char* const base_message = "Unable to convert to dptensor"; - - std::string message{ base_message }; - message += std::string{ clarification }; - throw std::runtime_error{ message }; -} - -// TODO: -// return type. -std::string get_npy_typestr(const dal::data_type dtype) { - switch (dtype) { - case dal::data_type::float32: { - return "(input); - const dal::data_type dtype = homogen_input.get_metadata().get_data_type(0); - const dal::data_layout data_layout = homogen_input.get_data_layout(); - - npy_intp row_count = dal::detail::integral_cast( - homogen_input.get_row_count()); - npy_intp column_count = dal::detail::integral_cast( - homogen_input.get_column_count()); - - // need "version", "data", "shape", "typestr", "syclobj" - py::tuple shape = py::make_tuple(row_count, column_count); - py::list data_entry(2); - - auto bytes_array = dal::detail::get_original_data(homogen_input); - if (!bytes_array.get_queue().has_value()) { - report_problem_to_dptensor(": table has no queue"); - } - auto queue = bytes_array.get_queue().value(); - - const bool is_mutable = bytes_array.has_mutable_data(); - - static_assert(sizeof(std::size_t) == sizeof(void*)); - data_entry[0] = is_mutable ? reinterpret_cast(bytes_array.get_mutable_data()) - : reinterpret_cast(bytes_array.get_data()); - data_entry[1] = is_mutable; - - py::dict iface; - iface["data"] = data_entry; - iface["shape"] = shape; - iface["strides"] = get_npy_strides(data_layout, row_count, column_count); - // dpctl supports only version 1. - iface["version"] = 1; - iface["typestr"] = get_npy_typestr(dtype); - iface["syclobj"] = py::cast(queue); - - return iface; -} - -// We are using `__sycl_usm_array_interface__` attribute for constructing -// dpctl tensor on python level. -void define_sycl_usm_array_property(py::class_& table_obj) { - table_obj.def_property_readonly("__sycl_usm_array_interface__", &construct_sua_iface); -} - -} // namespace oneapi::dal::python - -#endif // ONEDAL_DPCTL_INTEGRATION diff --git a/onedal/datatypes/data_conversion_sua_iface.cpp b/onedal/datatypes/data_conversion_sua_iface.cpp new file mode 100644 index 0000000000..dc3a718d62 --- /dev/null +++ b/onedal/datatypes/data_conversion_sua_iface.cpp @@ -0,0 +1,147 @@ +/******************************************************************************* +* Copyright 2023 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifdef ONEDAL_DATA_PARALLEL +#define NO_IMPORT_ARRAY + +#include +#include +#include + +#include "oneapi/dal/common.hpp" +#include "oneapi/dal/detail/common.hpp" +#include "oneapi/dal/table/homogen.hpp" +#include "oneapi/dal/table/detail/homogen_utils.hpp" + +#include "onedal/common/policy_common.hpp" +#include "onedal/datatypes/data_conversion_sua_iface.hpp" +#include "onedal/datatypes/utils/dtype_conversions.hpp" +#include "onedal/datatypes/utils/dtype_dispatcher.hpp" +#include "onedal/datatypes/utils/sua_iface_helpers.hpp" + +namespace oneapi::dal::python { + +template +dal::table convert_to_homogen_impl(py::object obj) { + auto sua_iface_dict = get_sua_interface(obj); + + const auto deleter = [obj](const Type*) { + obj.dec_ref(); + }; + + const auto ndim = get_and_check_sua_iface_ndim(sua_iface_dict); + + const auto [r_count, c_count] = get_sua_iface_shape_by_values(sua_iface_dict, ndim); + + const auto layout = get_sua_iface_layout(sua_iface_dict, r_count, c_count); + + const auto* const ptr = reinterpret_cast(get_sua_ptr(sua_iface_dict)); + auto syclobj = sua_iface_dict["syclobj"].cast(); + const auto queue = get_queue_from_python(syclobj); + bool is_readonly = is_sua_readonly(sua_iface_dict); + + dal::table res{}; + + if (is_readonly) { + res = dal::homogen_table(queue, + ptr, + r_count, + c_count, + deleter, + std::vector{}, + layout); + } + else { + auto* const mut_ptr = const_cast(ptr); + res = dal::homogen_table(queue, + mut_ptr, + r_count, + c_count, + deleter, + std::vector{}, + layout); + } + obj.inc_ref(); + return res; +} + +dal::table convert_from_sua_iface(py::object obj) { + auto sua_iface_dict = get_sua_interface(obj); + const auto type = get_sua_dtype(sua_iface_dict); + + dal::table res{}; + +#define MAKE_HOMOGEN_TABLE(CType) res = convert_to_homogen_impl(obj); + + SET_DAL_TYPE_FROM_DAL_TYPE(type, + MAKE_HOMOGEN_TABLE, // + report_problem_for_sua_iface(": unknown data type")); + +#undef MAKE_HOMOGEN_TABLE + + return res; +} + +py::dict construct_sua_iface(const dal::table& input) { + const auto kind = input.get_kind(); + if (kind != dal::homogen_table::kind()) + report_problem_to_sua_iface(": only homogen tables are supported"); + + const auto& homogen_input = reinterpret_cast(input); + const dal::data_type dtype = homogen_input.get_metadata().get_data_type(0); + const dal::data_layout data_layout = homogen_input.get_data_layout(); + + npy_intp row_count = dal::detail::integral_cast(homogen_input.get_row_count()); + npy_intp column_count = dal::detail::integral_cast(homogen_input.get_column_count()); + + // need "version", "data", "shape", "typestr", "syclobj" + py::tuple shape = py::make_tuple(row_count, column_count); + py::list data_entry(2); + + auto bytes_array = dal::detail::get_original_data(homogen_input); + if (!bytes_array.get_queue().has_value()) { + report_problem_to_sua_iface(": table has no queue"); + } + auto queue = std::make_shared(bytes_array.get_queue().value()); + + const bool is_mutable = bytes_array.has_mutable_data(); + + static_assert(sizeof(std::size_t) == sizeof(void*)); + data_entry[0] = is_mutable ? reinterpret_cast(bytes_array.get_mutable_data()) + : reinterpret_cast(bytes_array.get_data()); + data_entry[1] = is_mutable; + + py::dict iface; + iface["data"] = data_entry; + iface["shape"] = shape; + iface["strides"] = get_npy_strides(data_layout, row_count, column_count); + // dpctl supports only version 1. + iface["version"] = 1; + iface["typestr"] = convert_dal_to_sua_type(dtype); + iface["syclobj"] = pack_queue(queue); + + return iface; +} + +// We are using `__sycl_usm_array_interface__` attribute for constructing +// dpctl tensor on python level. +void define_sycl_usm_array_property(py::class_& table_obj) { + table_obj.def_property_readonly("__sycl_usm_array_interface__", &construct_sua_iface); +} + +} // namespace oneapi::dal::python + +#endif // ONEDAL_DATA_PARALLEL diff --git a/onedal/datatypes/data_conversion_dpctl.hpp b/onedal/datatypes/data_conversion_sua_iface.hpp similarity index 95% rename from onedal/datatypes/data_conversion_dpctl.hpp rename to onedal/datatypes/data_conversion_sua_iface.hpp index b9fdb64b16..8400120893 100644 --- a/onedal/datatypes/data_conversion_dpctl.hpp +++ b/onedal/datatypes/data_conversion_sua_iface.hpp @@ -27,7 +27,7 @@ namespace oneapi::dal::python { namespace py = pybind11; -dal::table convert_from_dptensor(py::object obj); +dal::table convert_from_sua_iface(py::object obj); py::dict construct_sua_iface(const dal::table& input); void define_sycl_usm_array_property(py::class_& t); diff --git a/onedal/datatypes/table.cpp b/onedal/datatypes/table.cpp index c4a14a9d3f..9771306118 100644 --- a/onedal/datatypes/table.cpp +++ b/onedal/datatypes/table.cpp @@ -17,12 +17,12 @@ #include "oneapi/dal/table/common.hpp" #include "oneapi/dal/table/homogen.hpp" -#ifdef ONEDAL_DPCTL_INTEGRATION -#include "onedal/datatypes/data_conversion_dpctl.hpp" -#endif // ONEDAL_DPCTL_INTEGRATION +#ifdef ONEDAL_DATA_PARALLEL +#include "onedal/datatypes/data_conversion_sua_iface.hpp" +#endif // ONEDAL_DATA_PARALLEL #include "onedal/datatypes/data_conversion.hpp" -#include "onedal/datatypes/numpy_helpers.hpp" +#include "onedal/datatypes/utils/numpy_helpers.hpp" #include "onedal/common/pybind11_helpers.hpp" #include "onedal/version.hpp" @@ -73,9 +73,9 @@ ONEDAL_PY_INIT_MODULE(table) { return py::make_tuple(row_count, column_count); }); -#ifdef ONEDAL_DPCTL_INTEGRATION +#ifdef ONEDAL_DATA_PARALLEL define_sycl_usm_array_property(table_obj); -#endif // ONEDAL_DPCTL_INTEGRATION +#endif // ONEDAL_DATA_PARALLEL m.def("to_table", [](py::object obj) { auto* obj_ptr = obj.ptr(); @@ -87,11 +87,11 @@ ONEDAL_PY_INIT_MODULE(table) { return obj_ptr; }); -#ifdef ONEDAL_DPCTL_INTEGRATION - m.def("dpctl_to_table", [](py::object obj) { - return convert_from_dptensor(obj); +#ifdef ONEDAL_DATA_PARALLEL + m.def("sua_iface_to_table", [](py::object obj) { + return convert_from_sua_iface(obj); }); -#endif // ONEDAL_DPCTL_INTEGRATION +#endif // ONEDAL_DATA_PARALLEL } } // namespace oneapi::dal::python diff --git a/onedal/datatypes/table_metadata.cpp b/onedal/datatypes/table_metadata.cpp index 3b265664d3..2ddd32570c 100644 --- a/onedal/datatypes/table_metadata.cpp +++ b/onedal/datatypes/table_metadata.cpp @@ -16,7 +16,7 @@ #include "oneapi/dal/table/common.hpp" -#include "onedal/datatypes/numpy_helpers.hpp" +#include "onedal/datatypes/utils/numpy_helpers.hpp" #include "onedal/common/pybind11_helpers.hpp" #include "onedal/version.hpp" diff --git a/onedal/datatypes/tests/common.py b/onedal/datatypes/tests/common.py new file mode 100644 index 0000000000..59aaabfe83 --- /dev/null +++ b/onedal/datatypes/tests/common.py @@ -0,0 +1,83 @@ +# =============================================================================== +# Copyright 2024 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# =============================================================================== + +from onedal._device_offload import dpctl_available, dpnp_available + +if dpnp_available: + import dpnp + +if dpctl_available: + from dpctl.tensor import usm_ndarray + + +def _assert_tensor_attr(actual, desired, order): + is_usm_tensor = lambda x: isinstance(x, dpnp.ndarray) or isinstance(x, usm_ndarray) + assert is_usm_tensor(actual) + assert is_usm_tensor(desired) + # dpctl.tensor is the dpnp.ndarrays's core tensor structure along + # with advanced device management. Convert dpnp to dpctl.tensor with zero copy. + get_tensor = lambda x: ( + x.get_array() if dpnp_available and isinstance(x, dpnp.ndarray) else x + ) + # Now DPCtl tensors + actual = get_tensor(actual) + desired = get_tensor(desired) + + assert actual.shape == desired.shape + assert actual.strides == desired.strides + assert actual.dtype == desired.dtype + if order == "F": + assert actual.flags.f_contiguous + assert desired.flags.f_contiguous + assert actual.flags.f_contiguous == desired.flags.f_contiguous + else: + assert actual.flags.c_contiguous + assert desired.flags.c_contiguous + assert actual.flags.c_contiguous == desired.flags.c_contiguous + assert actual.flags == desired.flags + assert actual.sycl_queue == desired.sycl_queue + # TODO: + # check better way to check usm ptrs. + assert actual.usm_data._pointer == desired.usm_data._pointer + + +# TODO: +# remove skip_syclobj and skip_data_1 params. +# def _assert_sua_iface_fields(actual, desired): +def _assert_sua_iface_fields(actual, desired, skip_syclobj=False, skip_data_1=False): + assert hasattr(actual, "__sycl_usm_array_interface__") + assert hasattr(desired, "__sycl_usm_array_interface__") + actual_sua_iface = actual.__sycl_usm_array_interface__ + desired_sua_iface = desired.__sycl_usm_array_interface__ + # TODO: + # do value checks by the dict keys in for. + assert actual_sua_iface["data"][0] == desired_sua_iface["data"][0] + # TODO: + # remove this condition/param. + if not skip_data_1: + assert actual_sua_iface["data"][1] == desired_sua_iface["data"][1] + assert actual_sua_iface["shape"] == desired_sua_iface["shape"] + if not actual_sua_iface["strides"] and not desired_sua_iface["strides"]: + # None to indicate a C-style contiguous 1D array. + # onedal4py constructs __sycl_usm_array_interface__["strides"] with + # real values. + assert actual_sua_iface["strides"] == desired_sua_iface["strides"] + assert actual_sua_iface["version"] == desired_sua_iface["version"] + assert actual_sua_iface["typestr"] == desired_sua_iface["typestr"] + if not skip_syclobj: + # TODO: + # comment and the conditions to check values. + assert actual_sua_iface["syclobj"]._get_capsule() == desired_sua_iface["syclobj"] diff --git a/onedal/datatypes/tests/test_data.py b/onedal/datatypes/tests/test_data.py index 17e182bc7a..45cc90e816 100644 --- a/onedal/datatypes/tests/test_data.py +++ b/onedal/datatypes/tests/test_data.py @@ -17,19 +17,79 @@ import numpy as np import pytest from numpy.testing import assert_allclose +from sklearn.datasets import make_blobs -from onedal import _backend +from onedal import _backend, _is_dpc_backend +from onedal._device_offload import dpctl_available, dpnp_available from onedal.datatypes import from_table, to_table + +# TODO: +# re-impl and use from_table, to_table instead. +from onedal.datatypes._data_conversion import convert_one_from_table, convert_one_to_table +from onedal.datatypes.tests.common import _assert_sua_iface_fields, _assert_tensor_attr from onedal.primitives import linear_kernel +from onedal.tests.utils._dataframes_support import ( + _convert_to_dataframe, + get_dataframes_and_queues, +) from onedal.tests.utils._device_selection import get_queues +from onedal.utils._array_api import _get_sycl_namespace -try: - import dpctl +if dpctl_available: import dpctl.tensor as dpt - dpctl_available = dpctl.__version__ >= "0.14" -except ImportError: - dpctl_available = False +if dpnp_available: + import dpnp + + +data_shapes = [ + pytest.param((1000, 100), id="(1000, 100)"), + pytest.param((2000, 50), id="(2000, 50)"), +] + +ORDER_DICT = {"F": np.asfortranarray, "C": np.ascontiguousarray} + + +if _is_dpc_backend: + from daal4py.sklearn._utils import get_dtype + from onedal.cluster.dbscan import BaseDBSCAN + from onedal.common._policy import _get_policy + + # TODO: + # use from_table, to_table. + from onedal.datatypes._data_conversion import ( + convert_one_from_table, + convert_one_to_table, + ) + + class DummyEstimatorWithTableConversions: + + def fit(self, X, y=None): + policy = _get_policy(X.sycl_queue, None) + bs_DBSCAN = BaseDBSCAN() + types = [np.float32, np.float64] + if get_dtype(X) not in types: + X = X.astype(np.float64) + dtype = get_dtype(X) + params = bs_DBSCAN._get_onedal_params(dtype) + X_table = convert_one_to_table(X, True) + # TODO: + # check other candidates for the dummy base OneDAL func. + # OneDAL backend func is needed to check result table checks. + result = _backend.dbscan.clustering.compute( + policy, params, X_table, convert_one_to_table(None) + ) + result_responses_table = result.responses + sua_iface, xp, _ = _get_sycl_namespace(X) + result_responses_df = convert_one_from_table( + result_responses_table, sua_iface=sua_iface, xp=xp + ) + return X_table, result_responses_table, result_responses_df + +else: + + class DummyEstimatorWithTableConversions: + pass def _test_input_format_c_contiguous_numpy(queue, dtype): @@ -168,68 +228,96 @@ def test_conversion_to_table(dtype): # TODO: -# Currently `dpctl_to_table` is not used in onedal estimators. -# The test will be enabled after future data management update, that brings -# re-impl of conversions between onedal tables and usm ndarrays. -@pytest.mark.skip( - reason="Currently removed. Will be enabled after data management update" +# rename test suit. +@pytest.mark.parametrize( + "dataframe,queue", get_dataframes_and_queues("dpctl,dpnp", "cpu, gpu") ) -@pytest.mark.skipif(not dpctl_available, reason="requires dpctl>=0.14") -@pytest.mark.parametrize("queue", get_queues("cpu,gpu")) +@pytest.mark.parametrize("order", ["C", "F"]) @pytest.mark.parametrize("dtype", [np.float32, np.float64, np.int32, np.int64]) -def test_input_format_c_contiguous_dpctl(queue, dtype): +def test_input_sua_iface_zero_copy(dataframe, queue, order, dtype): + # TODO: + # investigate in the same PR. + if dataframe in "dpnp": + pytest.skip("Some bug with Sycl.Queue for DPNP inputs.") + # TODO: + # add description to the test. rng = np.random.RandomState(0) - x_default = np.array(5 * rng.random_sample((10, 59)), dtype=dtype) + X_default = np.array(5 * rng.random_sample((10, 59)), dtype=dtype) - x_numpy = np.asanyarray(x_default, dtype=dtype, order="C") - x_dpt = dpt.asarray(x_numpy, usm_type="device", sycl_queue=queue) - # assert not x_dpt.flags.fnc - assert isinstance(x_dpt, dpt.usm_ndarray) + X_np = np.asanyarray(X_default, dtype=dtype, order=order) - x_table = _backend.dpctl_to_table(x_dpt) - assert hasattr(x_table, "__sycl_usm_array_interface__") - x_dpt_from_table = dpt.asarray(x_table) + X_dp = _convert_to_dataframe(X_np, sycl_queue=queue, target_df=dataframe) - assert ( - x_dpt.__sycl_usm_array_interface__["data"][0] - == x_dpt_from_table.__sycl_usm_array_interface__["data"][0] + sua_iface, X_dp_namespace, _ = _get_sycl_namespace(X_dp) + + X_table = convert_one_to_table(X_dp, sua_iface=sua_iface) + # TODO: + # investigate in the same PR skip_syclobj WO. + _assert_sua_iface_fields(X_dp, X_table, skip_syclobj=True) + + X_dp_from_table = convert_one_from_table( + X_table, sua_iface=sua_iface, xp=X_dp_namespace ) - assert x_dpt.shape == x_dpt_from_table.shape - assert x_dpt.strides == x_dpt_from_table.strides - assert x_dpt.dtype == x_dpt_from_table.dtype - assert x_dpt.flags.c_contiguous - assert x_dpt_from_table.flags.c_contiguous + # TODO: + # investigate in the same PR skip_syclobj WO. + _assert_sua_iface_fields(X_table, X_dp_from_table, skip_syclobj=True) + _assert_tensor_attr(X_dp, X_dp_from_table, order) # TODO: -# Currently `dpctl_to_table` is not used in onedal estimators. -# The test will be enabled after future data management update, that brings -# re-impl of conversions between onedal tables and usm ndarrays. -@pytest.mark.skip( - reason="Currently removed. Will be enabled after data management update" +# rename test suit. +@pytest.mark.skipif( + not _is_dpc_backend, + reason="__sycl_usm_array_interface__ support requires DPC backend.", ) -@pytest.mark.skipif(not dpctl_available, reason="requires dpctl>=0.14") -@pytest.mark.parametrize("queue", get_queues("cpu,gpu")) -@pytest.mark.parametrize("dtype", [np.float32, np.float64, np.int32, np.int64]) -def test_input_format_f_contiguous_dpctl(queue, dtype): - rng = np.random.RandomState(0) - x_default = np.array(5 * rng.random_sample((10, 59)), dtype=dtype) - - x_numpy = np.asanyarray(x_default, dtype=dtype, order="F") - x_dpt = dpt.asarray(x_numpy, usm_type="device", sycl_queue=queue) - # assert not x_dpt.flags.fnc - assert isinstance(x_dpt, dpt.usm_ndarray) - - x_table = _backend.dpctl_to_table(x_dpt) - assert hasattr(x_table, "__sycl_usm_array_interface__") - x_dpt_from_table = dpt.asarray(x_table) +@pytest.mark.parametrize( + "dataframe,queue", get_dataframes_and_queues("dpctl, dpnp", "cpu, gpu") +) +@pytest.mark.parametrize("order", ["F", "C"]) +@pytest.mark.parametrize("data_shape", data_shapes) +def test_table_conversions(dataframe, queue, order, data_shape): + # TODO: + # Description for the test. + if queue.sycl_device.is_cpu: + pytest.skip("OneDAL returns None sycl queue for CPU sycl queue inputs.") + # TODO: + # investigate in the same PR. + if dataframe in "dpnp": + pytest.skip("Some bug with Sycl.Queue for DPNP inputs.") + + n_samples, n_features = data_shape + X, y = make_blobs( + n_samples=n_samples, centers=3, n_features=n_features, random_state=0 + ) + X = ORDER_DICT[order](X) + + X = _convert_to_dataframe(X, sycl_queue=queue, target_df=dataframe) + alg = DummyEstimatorWithTableConversions() + X_table, result_responses_table, result_responses_df = alg.fit(X) + + assert hasattr(X_table, "__sycl_usm_array_interface__") + assert hasattr(result_responses_table, "__sycl_usm_array_interface__") + assert hasattr(result_responses_df, "__sycl_usm_array_interface__") + assert hasattr(X, "__sycl_usm_array_interface__") + # TODO: + # investigate in the same PR skip_syclobj and skip_data_1 WO. + _assert_sua_iface_fields(X, X_table, skip_syclobj=True, skip_data_1=True) + # TODO: + # investigate in the same PR skip_syclobj and skip_data_1 WO. + _assert_sua_iface_fields( + result_responses_df, result_responses_table, skip_syclobj=True, skip_data_1=True + ) + assert X.sycl_queue == result_responses_df.sycl_queue + if order == "F": + assert X.flags.f_contiguous == result_responses_df.flags.f_contiguous + else: + assert X.flags.c_contiguous == result_responses_df.flags.c_contiguous + # 1D output expected to have the same c_contiguous and f_contiguous flag values. assert ( - x_dpt.__sycl_usm_array_interface__["data"][0] - == x_dpt_from_table.__sycl_usm_array_interface__["data"][0] + result_responses_df.flags.c_contiguous == result_responses_df.flags.f_contiguous ) - assert x_dpt.shape == x_dpt_from_table.shape - assert x_dpt.strides == x_dpt_from_table.strides - assert x_dpt.dtype == x_dpt_from_table.dtype - assert x_dpt.flags.f_contiguous - assert x_dpt_from_table.flags.f_contiguous + + +# TODO: +# def test_wrong_inputs_for_sua_iface_conversion diff --git a/onedal/datatypes/utils/common.cpp b/onedal/datatypes/utils/common.cpp new file mode 100644 index 0000000000..3891d17d6a --- /dev/null +++ b/onedal/datatypes/utils/common.cpp @@ -0,0 +1,41 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include + +namespace oneapi::dal::python { + +union endian_checker { + std::uint64_t blob; + std::uint8_t arr[8]; +}; + +bool is_little_endian_impl() { + constexpr std::uint64_t one = 0xfful; + constexpr endian_checker checker{ one }; + return static_cast(checker.arr[0]); +} + +bool is_little_endian() { + static const bool value = is_little_endian_impl(); + return value; +} + +bool is_big_endian() { + return !is_little_endian(); +} + +} // namespace oneapi::dal::python diff --git a/onedal/datatypes/utils/common.hpp b/onedal/datatypes/utils/common.hpp new file mode 100644 index 0000000000..f356dd1f0d --- /dev/null +++ b/onedal/datatypes/utils/common.hpp @@ -0,0 +1,39 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#pragma once + +#include + +namespace oneapi::dal::python { + +template +inline auto inverse_map(const std::unordered_map& input) + -> std::unordered_map { + const auto b_count = input.bucket_count(); + std::unordered_map output(b_count); + + for (const auto& [key, value] : input) { + output.emplace(value, key); + } + + return output; +} + +bool is_big_endian(); +bool is_little_endian(); + +} // namespace oneapi::dal::python diff --git a/onedal/datatypes/utils/dtype_conversions.cpp b/onedal/datatypes/utils/dtype_conversions.cpp new file mode 100644 index 0000000000..97af6e2de7 --- /dev/null +++ b/onedal/datatypes/utils/dtype_conversions.cpp @@ -0,0 +1,129 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#include + +#include + +#include "oneapi/dal/common.hpp" +#include "oneapi/dal/detail/common.hpp" + +#include "onedal/datatypes/utils/common.hpp" +#include "onedal/datatypes/utils/dtype_conversions.hpp" +#include "onedal/datatypes/utils/dtype_dispatcher.hpp" + +namespace oneapi::dal::python { + +using fwd_map_t = std::unordered_map; +using inv_map_t = std::unordered_map; + +inline void unknown_type() { + throw std::runtime_error("Unknown type"); +} + +template +constexpr inline char type_desc() { + if constexpr (std::is_integral_v) { + if (std::is_unsigned_v) { + return 'u'; + } + else { + return 'i'; + } + } + else { + if (std::is_floating_point_v) { + return 'f'; + } + else { + unknown_type(); + } + } +} + +template +constexpr inline char type_size() { + switch (sizeof(Type)) { + case 1ul: return '1'; + case 2ul: return '2'; + case 4ul: return '4'; + case 8ul: return '8'; + default: unknown_type(); + }; +} + +template +inline std::string describe(char e = '<') { + constexpr auto s = type_size(); + constexpr auto d = type_desc(); + return std::string{ { e, d, s } }; +} + +const char end = is_big_endian() ? '>' : '<'; + +template +inline auto make_fwd_map(const std::tuple* const = nullptr) { + fwd_map_t result(3ul * sizeof...(Types)); + + dal::detail::apply( + [&](auto type_tag) -> void { + using type_t = std::decay_t; + constexpr auto dal_v = detail::make_data_type(); + result.emplace(describe(end), dal_v); + result.emplace(describe('='), dal_v); + result.emplace(describe('|'), dal_v); + }, + Types{}...); + + return result; +} + +template +inline auto make_inv_map(const std::tuple* const = nullptr) { + inv_map_t result(sizeof...(Types)); + + dal::detail::apply( + [&](auto type_tag) -> void { + using type_t = std::decay_t; + constexpr auto dal_v = detail::make_data_type(); + result.emplace(dal_v, describe('|')); + }, + Types{}...); + + return result; +} + +static const fwd_map_t& get_fwd_map() { + constexpr const supported_types_t* types = nullptr; + static const fwd_map_t body = make_fwd_map(types); + return body; +} + +static const inv_map_t& get_inv_map() { + constexpr const supported_types_t* types = nullptr; + static const inv_map_t body = make_inv_map(types); + return body; +} + +dal::data_type convert_sua_to_dal_type(std::string dtype) { + return get_fwd_map().at(dtype); +} + +std::string convert_dal_to_sua_type(dal::data_type dtype) { + return get_inv_map().at(dtype); +} + +} // namespace oneapi::dal::python diff --git a/onedal/datatypes/utils/dtype_conversions.hpp b/onedal/datatypes/utils/dtype_conversions.hpp new file mode 100644 index 0000000000..ad7984127a --- /dev/null +++ b/onedal/datatypes/utils/dtype_conversions.hpp @@ -0,0 +1,53 @@ +/******************************************************************************* +* Copyright 2023 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#pragma once + +#include + +#include + +#include "oneapi/dal/common.hpp" + +namespace py = pybind11; + +#define SET_DAL_TYPE_FROM_DAL_TYPE(_T, _FUNCT, _EXCEPTION) \ + switch (_T) { \ + case dal::data_type::float32: { \ + _FUNCT(float); \ + break; \ + } \ + case dal::data_type::float64: { \ + _FUNCT(double); \ + break; \ + } \ + case dal::data_type::int32: { \ + _FUNCT(std::int32_t); \ + break; \ + } \ + case dal::data_type::int64: { \ + _FUNCT(std::int64_t); \ + break; \ + } \ + default: _EXCEPTION; \ + }; + +namespace oneapi::dal::python { + +dal::data_type convert_sua_to_dal_type(std::string dtype); +std::string convert_dal_to_sua_type(dal::data_type dtype); + +} // namespace oneapi::dal::python diff --git a/onedal/datatypes/utils/dtype_dispatcher.hpp b/onedal/datatypes/utils/dtype_dispatcher.hpp new file mode 100644 index 0000000000..672321510e --- /dev/null +++ b/onedal/datatypes/utils/dtype_dispatcher.hpp @@ -0,0 +1,101 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#pragma once + +#include +#include + +#include "onedal/common.hpp" +#include "oneapi/dal/common.hpp" +#include "oneapi/dal/detail/common.hpp" + +// TODO: Using includes should be the primary path +#if defined(ONEDAL_VERSION) && (20240400 < ONEDAL_VERSION) + +#include "oneapi/dal/detail/dtype_dispatcher.hpp" + +#else // Version check + +#include "oneapi/dal/detail/error_messages.hpp" + +namespace oneapi::dal::detail { + +template +inline constexpr auto dispatch_by_data_type(data_type dtype, Op&& op, OnUnknown&& on_unknown) { + switch (dtype) { + case data_type::int8: return op(std::int8_t{}); + case data_type::uint8: return op(std::uint8_t{}); + case data_type::int16: return op(std::int16_t{}); + case data_type::uint16: return op(std::uint16_t{}); + case data_type::int32: return op(std::int32_t{}); + case data_type::uint32: return op(std::uint32_t{}); + case data_type::int64: return op(std::int64_t{}); + case data_type::uint64: return op(std::uint64_t{}); + case data_type::float32: return op(float{}); + case data_type::float64: return op(double{}); + default: return on_unknown(dtype); + } +} + +template > +inline constexpr ResultType dispatch_by_data_type(data_type dtype, Op&& op) { + // Necessary to make the return type conformant with + // other dispatch branches + const auto on_unknown = [](data_type) -> ResultType { + using msg = oneapi::dal::detail::error_messages; + throw unimplemented{ msg::unsupported_conversion_types() }; + }; + + return dispatch_by_data_type(dtype, std::forward(op), on_unknown); +} + +} // namespace oneapi::dal::detail + +#endif // Version check + +// TODO: Using includes should be the primary path +#if defined(ONEDAL_VERSION) && (ONEDAL_VERSION < 20240000) + +namespace oneapi::dal::detail { + +template +constexpr inline void apply(Op&& op) { + ((void)op(Types{}), ...); +} + +template +constexpr inline void apply(Op&& op, Args&&... args) { + ((void)op(std::forward(args)), ...); +} + +} //namespace oneapi::dal::detail + +#endif // Version check + +namespace oneapi::dal::python { + +using supported_types_t = std::tuple; +} // namespace oneapi::dal::python diff --git a/onedal/datatypes/numpy_helpers.cpp b/onedal/datatypes/utils/numpy_helpers.cpp similarity index 97% rename from onedal/datatypes/numpy_helpers.cpp rename to onedal/datatypes/utils/numpy_helpers.cpp index 5786f06624..4fb774a6c6 100644 --- a/onedal/datatypes/numpy_helpers.cpp +++ b/onedal/datatypes/utils/numpy_helpers.cpp @@ -14,7 +14,7 @@ * limitations under the License. *******************************************************************************/ -#include "onedal/datatypes/numpy_helpers.hpp" +#include "onedal/datatypes/utils/numpy_helpers.hpp" namespace oneapi::dal::python { diff --git a/onedal/datatypes/numpy_helpers.hpp b/onedal/datatypes/utils/numpy_helpers.hpp similarity index 100% rename from onedal/datatypes/numpy_helpers.hpp rename to onedal/datatypes/utils/numpy_helpers.hpp diff --git a/onedal/datatypes/utils/sua_iface_helpers.cpp b/onedal/datatypes/utils/sua_iface_helpers.cpp new file mode 100644 index 0000000000..a3d5b0d4c5 --- /dev/null +++ b/onedal/datatypes/utils/sua_iface_helpers.cpp @@ -0,0 +1,190 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifdef ONEDAL_DATA_PARALLEL +#define NO_IMPORT_ARRAY + +#include +#include +#include + +#include "oneapi/dal/common.hpp" +#include "oneapi/dal/detail/common.hpp" +#include "oneapi/dal/table/homogen.hpp" +#include "oneapi/dal/table/detail/homogen_utils.hpp" + +#include "onedal/common/policy_common.hpp" +#include "onedal/datatypes/data_conversion_sua_iface.hpp" +#include "onedal/datatypes/utils/dtype_conversions.hpp" +#include "onedal/datatypes/utils/dtype_dispatcher.hpp" + +// TODO: +// add description for the sua_iface dict. + +namespace oneapi::dal::python { + +dal::data_type get_sua_dtype(const py::dict& sua) { + auto dtype = sua["typestr"].cast(); + return convert_sua_to_dal_type(std::move(dtype)); +} + +py::dict get_sua_interface(const py::object& obj) { + constexpr const char name[] = "__sycl_usm_array_interface__"; + return obj.attr(name).cast(); +} + +py::tuple get_sua_data(const py::dict& sua) { + py::tuple result = sua["data"].cast(); + if (result.size() != py::ssize_t{ 2ul }) { + throw std::length_error("Size of \"data\" tuple should be 2"); + } + return result; +} + +std::uintptr_t get_sua_ptr(const py::dict& sua) { + const py::tuple data = get_sua_data(sua); + return data[0ul].cast(); +} + +bool is_sua_readonly(const py::dict& sua) { + const py::tuple data = get_sua_data(sua); + return data[1ul].cast(); +} + +py::tuple get_sua_shape(const py::dict& sua) { + py::tuple shape = sua["shape"].cast(); + if (shape.size() == py::ssize_t{ 0ul }) { + throw std::runtime_error("Wrong number of dimensions"); + } + return shape; +} + +void report_problem_for_sua_iface(const char* clarification) { + constexpr const char* const base_message = "Unable to convert from SUA interface."; + + std::string message{ base_message }; + message += std::string{ clarification }; + throw std::invalid_argument{ message }; +} + +std::int64_t get_and_check_sua_iface_ndim(const py::dict& sua_dict) { + constexpr const char* const err_message = ": only 1D & 2D tensors are allowed"; + py::tuple shape = get_sua_shape(sua_dict); + const py::ssize_t raw_ndim = shape.size(); + const auto ndim = detail::integral_cast(raw_ndim); + if ((ndim != 1l) && (ndim != 2l)) + report_problem_for_sua_iface(err_message); + return ndim; +} + +std::pair get_sua_iface_shape_by_values(const py::dict sua_dict, + const std::int64_t ndim) { + std::int64_t row_count, col_count; + auto shape = sua_dict["shape"].cast(); + if (ndim == 1l) { + row_count = shape[0l].cast(); + col_count = 1l; + } + else { + row_count = shape[0l].cast(); + col_count = shape[1l].cast(); + } + return std::make_pair(row_count, col_count); +} + +dal::data_layout get_sua_iface_layout(const py::dict& sua_dict, + const std::int64_t& r_count, + const std::int64_t& c_count) { + const auto raw_strides = sua_dict["strides"]; + if (raw_strides.is_none()) { + // None to indicate a C-style contiguous array. + return dal::data_layout::row_major; + } + auto strides_tuple = raw_strides.cast(); + + auto strides_len = py::len(strides_tuple); + + if (strides_len == 1l) { + return dal::data_layout::row_major; + } + else if (strides_len == 2l) { + auto r_strides = strides_tuple[0l].cast(); + auto c_strides = strides_tuple[1l].cast(); + using shape_t = std::decay_t; + using stride_t = std::decay_t; + constexpr auto one = static_cast(1); + static_assert(std::is_same_v); + if (r_strides == c_count && c_strides == one) { + return dal::data_layout::row_major; + } + else if (r_strides == one && c_strides == r_count) { + return dal::data_layout::column_major; + } + else { + throw std::runtime_error("Wrong strides"); + } + } + else { + throw std::runtime_error("Unsupporterd data shape.`"); + } +} + +void report_problem_to_sua_iface(const char* clarification) { + constexpr const char* const base_message = "Unable to convert to SUA interface."; + + std::string message{ base_message }; + message += std::string{ clarification }; + throw std::runtime_error{ message }; +} + +py::tuple get_npy_strides(const dal::data_layout& data_layout, + npy_intp row_count, + npy_intp column_count) { + if (data_layout == dal::data_layout::unknown) { + report_problem_to_sua_iface(": unknown data layout"); + } + py::tuple strides; + if (data_layout == dal::data_layout::row_major) { + strides = py::make_tuple(column_count, 1l); + } + else { + strides = py::make_tuple(1l, row_count); + } + return strides; +} + +py::capsule pack_queue(const std::shared_ptr& queue) { + static const char queue_capsule_name[] = "SyclQueueRef"; + if (queue.get() == nullptr) { + throw std::runtime_error("Empty queue"); + } + else { + void (*deleter)(void*) = [](void* const queue) -> void { + delete reinterpret_cast(queue); + }; + + sycl::queue* ptr = new sycl::queue{ *queue }; + void* const raw = reinterpret_cast(ptr); + + py::capsule capsule(raw, deleter); + capsule.set_name(queue_capsule_name); + return capsule; + } +} + +} // namespace oneapi::dal::python + +#endif // ONEDAL_DATA_PARALLEL diff --git a/onedal/datatypes/utils/sua_iface_helpers.hpp b/onedal/datatypes/utils/sua_iface_helpers.hpp new file mode 100644 index 0000000000..9ce084ac94 --- /dev/null +++ b/onedal/datatypes/utils/sua_iface_helpers.hpp @@ -0,0 +1,74 @@ +/******************************************************************************* +* Copyright 2024 Intel Corporation +* +* Licensed under the Apache License, Version 2.0 (the "License"); +* you may not use this file except in compliance with the License. +* You may obtain a copy of the License at +* +* http://www.apache.org/licenses/LICENSE-2.0 +* +* Unless required by applicable law or agreed to in writing, software +* distributed under the License is distributed on an "AS IS" BASIS, +* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +* See the License for the specific language governing permissions and +* limitations under the License. +*******************************************************************************/ + +#ifdef ONEDAL_DATA_PARALLEL +#define NO_IMPORT_ARRAY + +#include +#include +#include + +#include "oneapi/dal/common.hpp" +#include "oneapi/dal/detail/common.hpp" +#include "oneapi/dal/table/homogen.hpp" +#include "oneapi/dal/table/detail/homogen_utils.hpp" + +#include "onedal/common/policy_common.hpp" +#include "onedal/datatypes/data_conversion_sua_iface.hpp" +#include "onedal/datatypes/utils/dtype_conversions.hpp" +#include "onedal/datatypes/utils/dtype_dispatcher.hpp" + +// TODO: +// add description for the sua_iface dict. + +namespace oneapi::dal::python { + +dal::data_type get_sua_dtype(const py::dict& sua); + +py::dict get_sua_interface(const py::object& obj); + +py::tuple get_sua_data(const py::dict& sua); + +std::uintptr_t get_sua_ptr(const py::dict& sua); + +bool is_sua_readonly(const py::dict& sua); + +py::tuple get_sua_shape(const py::dict& sua); + +// TODO: +// rename and update. +void report_problem_for_sua_iface(const char* clarification); + +std::int64_t get_and_check_sua_iface_ndim(const py::dict& sua_dict); + +std::pair get_sua_iface_shape_by_values(const py::dict sua_dict, + const std::int64_t ndim); + +dal::data_layout get_sua_iface_layout(const py::dict& sua_dict, + const std::int64_t& r_count, + const std::int64_t& c_count); + +void report_problem_to_sua_iface(const char* clarification); + +py::tuple get_npy_strides(const dal::data_layout& data_layout, + npy_intp row_count, + npy_intp column_count); + +py::capsule pack_queue(const std::shared_ptr& queue); + +} // namespace oneapi::dal::python + +#endif // ONEDAL_DATA_PARALLEL diff --git a/onedal/utils/_array_api.py b/onedal/utils/_array_api.py index 4accdd3ac0..9f8fa6c7c1 100644 --- a/onedal/utils/_array_api.py +++ b/onedal/utils/_array_api.py @@ -73,19 +73,19 @@ def _get_sycl_namespace(*arrays): """Get namespace of sycl arrays.""" # sycl support designed to work regardless of array_api_dispatch sklearn global value - sycl_type = {type(x): x for x in arrays if hasattr(x, "__sycl_usm_array_interface__")} + sua_iface = {type(x): x for x in arrays if hasattr(x, "__sycl_usm_array_interface__")} - if len(sycl_type) > 1: - raise ValueError(f"Multiple SYCL types for array inputs: {sycl_type}") + if len(sua_iface) > 1: + raise ValueError(f"Multiple SYCL types for array inputs: {sua_iface}") - if sycl_type: - (X,) = sycl_type.values() + if sua_iface: + (X,) = sua_iface.values() if hasattr(X, "__array_namespace__"): - return sycl_type, X.__array_namespace__(), True + return sua_iface, X.__array_namespace__(), True elif dpnp_available and isinstance(X, dpnp.ndarray): - return sycl_type, dpnp, False + return sua_iface, dpnp, False else: - raise ValueError(f"SYCL type not recognized: {sycl_type}") + raise ValueError(f"SYCL type not recognized: {sua_iface}") - return sycl_type, None, False + return sua_iface, None, False diff --git a/sklearnex/tests/test_memory_usage.py b/sklearnex/tests/test_memory_usage.py index 012086507b..83b4f60981 100644 --- a/sklearnex/tests/test_memory_usage.py +++ b/sklearnex/tests/test_memory_usage.py @@ -23,7 +23,6 @@ from inspect import isclass import numpy as np -import pandas as pd import pytest from scipy.stats import pearsonr from sklearn.base import BaseEstimator, clone @@ -36,6 +35,7 @@ get_dataframes_and_queues, ) from onedal.tests.utils._device_selection import get_queues, is_dpctl_available +from onedal.utils._array_api import _get_sycl_namespace from sklearnex import config_context from sklearnex.tests.utils import PATCHED_FUNCTIONS, PATCHED_MODELS, SPECIAL_INSTANCES from sklearnex.utils._array_api import get_namespace @@ -124,6 +124,36 @@ def gen_functions(functions): ORDER_DICT = {"F": np.asfortranarray, "C": np.ascontiguousarray} +DUMMY_ESTIMATOR = {} +if _is_dpc_backend: + # TODO: + # use from_table, to_table. + from onedal.datatypes._data_conversion import ( + convert_one_from_table, + convert_one_to_table, + ) + + class DummyEstimatorWithTableConversions(BaseEstimator): + + # __name__ = 'DummyEstimatorWithTableConversions' + + def fit(self, X, y=None): + sua_iface, _, _ = _get_sycl_namespace(X) + X_table = convert_one_to_table(X, sua_iface=sua_iface) + y_table = convert_one_to_table(y, sua_iface=sua_iface) + return self + + def predict(self, X): + sua_iface, xp, _ = _get_sycl_namespace(X) + X_table = convert_one_to_table(X, sua_iface=sua_iface) + returned_X = convert_one_from_table(X_table, sua_iface=sua_iface, xp=xp) + return returned_X + + DUMMY_ESTIMATOR["DummyEstimatorWithTableConversions"] = ( + DummyEstimatorWithTableConversions + ) + + def gen_clsf_data(n_samples, n_features): data, label = make_classification( n_classes=2, n_samples=n_samples, n_features=n_features, random_state=777 @@ -184,10 +214,15 @@ def split_train_inference(kf, x, y, estimator, queue=None): return mem_tracks +# TODO: +# def _kfold_function_template(estimator, dataframe, data_shape, queue=None, func=None, get_data_func=None): +# add custom get_data_func. def _kfold_function_template(estimator, dataframe, data_shape, queue=None, func=None): tracemalloc.start() n_samples, n_features = data_shape + # get_data_func = get_data_func if get_data_func else gen_clsf_data + # X, y, data_memory_size = get_data_func(n_samples, n_features) X, y, data_memory_size = gen_clsf_data(n_samples, n_features) kf = KFold(n_splits=N_SPLITS) if func: @@ -289,3 +324,30 @@ def test_gpu_memory_leaks(estimator, queue, order, data_shape): with config_context(target_offload=queue): _kfold_function_template(GPU_ESTIMATORS[estimator], None, data_shape, queue, func) + + +@pytest.mark.skipif( + not _is_dpc_backend, + reason="__sycl_usm_array_interface__ support requires DPC backend.", +) +@pytest.mark.parametrize( + "dataframe,queue", get_dataframes_and_queues("dpctl, dpnp", "cpu, gpu") +) +@pytest.mark.parametrize("estimator", DUMMY_ESTIMATOR.keys()) +@pytest.mark.parametrize("order", ["F", "C"]) +@pytest.mark.parametrize("data_shape", data_shapes) +def test_table_conversions_memory_leaks(estimator, dataframe, queue, order, data_shape): + func = ORDER_DICT[order] + + if queue.sycl_device.is_gpu and ( + os.getenv("ZES_ENABLE_SYSMAN") is None or not is_dpctl_available("gpu") + ): + pytest.skip("SYCL device memory leak check requires the level zero sysman") + + _kfold_function_template( + DUMMY_ESTIMATOR[estimator], + dataframe, + data_shape, + queue, + func, + )