Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[fix] changes for GitHub actions PR CI for matching oneDAL Nightly-build #2076

Open
wants to merge 24 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/scripts/activate_components.bat
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ rem %3 - dpcpp activate flag
rem prepare vc
call "C:\Program Files\Microsoft Visual Studio\2022\Enterprise\VC\Auxiliary\Build\vcvarsall" x64
rem prepare icx only if no parameter is given.
if "%3"=="" call .\dpcpp\compiler\%1\env\vars.bat
if "%3"=="" call .\oneapi\compiler\%1\env\vars.bat
rem prepare tbb
call .\dpcpp\tbb\%2\env\vars.bat
call .\oneapi\tbb\%2\env\vars.bat
rem prepare oneDAL
call .\__release_win\daal\latest\env\vars.bat
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -219,8 +219,8 @@ jobs:
shell: cmd
run: |
call .\venv\Scripts\activate.bat
call .\dpcpp\compiler\${{ env.DPCPP_VERSION }}\env\vars.bat
call .\dpcpp\compiler\${{ env.DPCPP_VERSION }}\bin\sycl-ls.exe
call .\oneapi\compiler\${{ env.DPCPP_VERSION }}\env\vars.bat
call .\oneapi\compiler\${{ env.DPCPP_VERSION }}\bin\sycl-ls.exe
bash .ci/scripts/describe_system.sh
- name: Build daal4py/sklearnex
shell: cmd
Expand Down
3 changes: 1 addition & 2 deletions deselected_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -382,8 +382,7 @@ deselected_tests:

# Deselections for 2025.0
- ensemble/tests/test_forest.py::test_importances[ExtraTreesRegressor-squared_error-float64]

- cluster/tests/test_k_means.py::test_kmeans_elkan_results[42-1e-100-sparse_array-normal]
- cluster/tests/test_k_means.py::test_kmeans_elkan_results

# --------------------------------------------------------
# No need to test daal4py patching
Expand Down
2 changes: 1 addition & 1 deletion sklearnex/cluster/k_means.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,8 +102,8 @@ def _onedal_fit_supported(self, method_name, X, y=None, sample_weight=None):
patching_status = PatchingConditionsChain(f"sklearn.cluster.{class_name}.fit")

sample_count = _num_samples(X)
self._algorithm = self.algorithm
supported_algs = ["auto", "full", "lloyd", "elkan"]

if self.algorithm == "elkan":
logging.getLogger("sklearnex").info(
"oneDAL does not support 'elkan', using 'lloyd' algorithm instead."
Expand Down
28 changes: 17 additions & 11 deletions sklearnex/cluster/tests/test_kmeans.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
get_queues,
)
from sklearnex import config_context
from sklearnex.tests.utils import _IS_INTEL


def generate_dense_dataset(n_samples, n_features, density, n_clusters):
Expand All @@ -45,11 +46,11 @@ def generate_dense_dataset(n_samples, n_features, density, n_clusters):


@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
@pytest.mark.parametrize("algorithm", ["lloyd", "elkan"])
@pytest.mark.parametrize(
"algorithm", ["lloyd" if sklearn_check_version("1.1") else "full", "elkan"]
)
@pytest.mark.parametrize("init", ["k-means++", "random"])
def test_sklearnex_import_for_dense_data(dataframe, queue, algorithm, init):
if not sklearn_check_version("1.1") and algorithm == "lloyd":
pytest.skip("lloyd requires sklearn>=1.1.")
from sklearnex.cluster import KMeans

X_dense = generate_dense_dataset(1000, 10, 0.5, 3)
Expand All @@ -70,7 +71,9 @@ def test_sklearnex_import_for_dense_data(dataframe, queue, algorithm, init):
reason="Sparse data requires oneDAL>=2024.7.0",
)
@pytest.mark.parametrize("queue", get_queues())
@pytest.mark.parametrize("algorithm", ["lloyd", "elkan"])
@pytest.mark.parametrize(
"algorithm", ["lloyd" if sklearn_check_version("1.1") else "full", "elkan"]
)
@pytest.mark.parametrize("init", ["k-means++", "random"])
def test_sklearnex_import_for_sparse_data(queue, algorithm, init):
from sklearnex.cluster import KMeans
Expand All @@ -86,11 +89,10 @@ def test_sklearnex_import_for_sparse_data(queue, algorithm, init):


@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
@pytest.mark.parametrize("algorithm", ["lloyd", "elkan"])
@pytest.mark.parametrize(
"algorithm", ["lloyd" if sklearn_check_version("1.1") else "full", "elkan"]
)
def test_results_on_dense_gold_data(dataframe, queue, algorithm):
if not sklearn_check_version("1.1") and algorithm == "lloyd":
pytest.skip("lloyd requires sklearn>=1.1.")

from sklearnex.cluster import KMeans

X_train = np.array([[1, 2], [1, 4], [1, 0], [10, 2], [10, 4], [10, 0]])
Expand Down Expand Up @@ -121,15 +123,19 @@ def test_results_on_dense_gold_data(dataframe, queue, algorithm):
)
@pytest.mark.parametrize("queue", get_queues())
@pytest.mark.parametrize("init", ["k-means++", "random", "arraylike"])
@pytest.mark.parametrize("algorithm", ["lloyd", "elkan"])
@pytest.mark.parametrize(
"algorithm", ["lloyd" if sklearn_check_version("1.1") else "full", "elkan"]
)
@pytest.mark.parametrize(
"dims", [(1000, 10, 0.95, 3), (50000, 100, 0.75, 10), (10000, 10, 0.8, 5)]
)
def test_dense_vs_sparse(queue, init, algorithm, dims):
from sklearnex.cluster import KMeans

if init == "random":
pytest.skip("Random initialization in sparse K-means is buggy.")
if init == "random" or (not _IS_INTEL and init == "k-means++"):
if daal_check_version((2025, "P", 200)):
pytest.fail("Re-verify failure of k-means++ in 2025.2 oneDAL")
pytest.skip(f"{init} initialization for sparse K-means is non-conformant.")

# For higher level of sparsity (smaller density) the test may fail
n_samples, n_features, density, n_clusters = dims
Expand Down
6 changes: 5 additions & 1 deletion sklearnex/linear_model/tests/test_incremental_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
get_dataframes_and_queues,
)
from sklearnex.linear_model import IncrementalLinearRegression
from sklearnex.tests.utils import _IS_INTEL


@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
Expand Down Expand Up @@ -129,7 +130,10 @@ def test_sklearnex_partial_fit_multitarget_on_gold_data(
np_y_pred = _as_numpy(y_pred)

assert inclin.n_features_in_ == 2
tol = 7e-6 if dtype == np.float32 else 1e-7
tol = 1e-7
if dtype == np.float32:
tol = 7e-6 if _IS_INTEL else 2e-5

assert_allclose(inclin.coef_, [1.0, 2.0], atol=tol)
if fit_intercept:
assert_allclose(inclin.intercept_, 3.0, atol=tol)
Expand Down
5 changes: 4 additions & 1 deletion sklearnex/linear_model/tests/test_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
_convert_to_dataframe,
get_dataframes_and_queues,
)
from sklearnex.tests.utils import _IS_INTEL


@pytest.mark.parametrize("dataframe,queue", get_dataframes_and_queues())
Expand Down Expand Up @@ -56,7 +57,9 @@ def test_sklearnex_import_linear(dataframe, queue, dtype, macro_block):
assert "sklearnex" in linreg.__module__
assert linreg.n_features_in_ == 2

tol = 1e-5 if _as_numpy(linreg.coef_).dtype == np.float32 else 1e-7
tol = 1e-7
if _as_numpy(linreg.coef_).dtype == np.float32:
tol = 1e-5 if _IS_INTEL else 2e-5
assert_allclose(_as_numpy(linreg.intercept_), 3.0, rtol=tol)
assert_allclose(_as_numpy(linreg.coef_), [1.0, 2.0], rtol=tol)

Expand Down
45 changes: 42 additions & 3 deletions sklearnex/tests/test_run_to_run_stability.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
)
from sklearnex.svm import SVC
from sklearnex.tests.utils import (
_IS_INTEL,
PATCHED_MODELS,
SPECIAL_INSTANCES,
call_method,
Expand Down Expand Up @@ -154,6 +155,14 @@ def test_standard_estimator_stability(estimator, method, dataframe, queue):
pytest.skip(f"variation observed in {estimator}.score")
if estimator in ["IncrementalEmpiricalCovariance"] and method == "mahalanobis":
pytest.skip("allowed fallback to sklearn occurs")
if (
not _IS_INTEL
and ("Neighbors" in estimator or "LocalOutlierFactor" in estimator)
and method in ["score", "predict", "kneighbors", "kneighbors_graph"]
):
if daal_check_version((2025, "P", 200)):
pytest.fail("Re-verify failure of algorithms in 2025.2 oneDAL")
pytest.skip(f"{estimator} shows instability on Non-Intel hardware")

if "NearestNeighbors" in estimator and "radius" in method:
pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex")
Expand Down Expand Up @@ -182,6 +191,14 @@ def test_special_estimator_stability(estimator, method, dataframe, queue):
pytest.skip(f"variation observed in KMeans.score")
if "NearestNeighbors" in estimator and "radius" in method:
pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex")
if (
not _IS_INTEL
and ("Neighbors" in estimator or "LocalOutlierFactor" in estimator)
and method in ["score", "predict", "kneighbors", "kneighbors_graph"]
):
if daal_check_version((2025, "P", 200)):
pytest.fail("Re-verify failure of algorithms in 2025.2 oneDAL")
pytest.skip(f"{estimator} shows instability on Non-Intel hardware")

est = SPECIAL_INSTANCES[estimator]

Expand All @@ -200,11 +217,25 @@ def test_special_estimator_stability(estimator, method, dataframe, queue):
@pytest.mark.parametrize("dataframe, queue", get_dataframes_and_queues("numpy,array_api"))
@pytest.mark.parametrize("estimator, method", gen_models_info(SPARSE_INSTANCES))
def test_sparse_estimator_stability(estimator, method, dataframe, queue):
if "KMeans" in estimator and method == "score" and queue == None:
pytest.skip(f"variation observed in KMeans.score")

if "KMeans" in estimator and method in "score" and queue == None:
pytest.skip(f"variation observed in KMeans.{method}")
if (
not daal_check_version((2025, "P", 0))
and "KMeans()" in estimator
and queue == None
):
pytest.skip(f"variation observed in KMeans.{method} in 2024.7 oneDAL")
if "NearestNeighbors" in estimator and "radius" in method:
pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex")
if (
not _IS_INTEL
and ("Neighbors" in estimator or "LocalOutlierFactor" in estimator)
and method in ["score", "predict", "kneighbors", "kneighbors_graph"]
):
if daal_check_version((2025, "P", 200)):
pytest.fail("Re-verify failure of algorithms in 2025.2 oneDAL")
pytest.skip(f"{estimator} shows instability on Non-Intel hardware")

est = SPARSE_INSTANCES[estimator]

if method and not hasattr(est, method):
Expand All @@ -228,6 +259,14 @@ def test_other_estimator_stability(estimator, method, dataframe, queue):
pytest.skip(f"variation observed in KMeans.score")
if "NearestNeighbors" in estimator and "radius" in method:
pytest.skip(f"RadiusNeighbors estimator not implemented in sklearnex")
if (
not _IS_INTEL
and ("Neighbors" in estimator or "LocalOutlierFactor" in estimator)
and method in ["score", "predict", "kneighbors", "kneighbors_graph"]
):
if daal_check_version((2025, "P", 200)):
pytest.fail("Re-verify failure of algorithms in 2025.2 oneDAL")
pytest.skip(f"{estimator} shows instability on Non-Intel hardware")
Comment on lines +262 to +269
Copy link
Contributor

@samir-nasibli samir-nasibli Oct 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I propose to make a common function for checking and disabling as the nature of these crashes is the same.
Also less code duplication

def _skip_neighbors(estimator, method):
    if (
        not _IS_INTEL
        and ("Neighbors" in estimator or "LocalOutlierFactor" in estimator)
        and method in ["score", "predict", "kneighbors", "kneighbors_graph"]
    ):
        if daal_check_version((2025, "P", 200)):
            pytest.fail("Re-verify failure of algorithms in 2025.2 oneDAL")
        pytest.skip(f"{estimator} shows instability on Non-Intel hardware")
Suggested change
if (
not _IS_INTEL
and ("Neighbors" in estimator or "LocalOutlierFactor" in estimator)
and method in ["score", "predict", "kneighbors", "kneighbors_graph"]
):
if daal_check_version((2025, "P", 200)):
pytest.fail("Re-verify failure of algorithms in 2025.2 oneDAL")
pytest.skip(f"{estimator} shows instability on Non-Intel hardware")
_skip_neighbors(estimator, estimator)


est = STABILITY_INSTANCES[estimator]

Expand Down
3 changes: 3 additions & 0 deletions sklearnex/tests/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
SPECIAL_INSTANCES,
UNPATCHED_FUNCTIONS,
UNPATCHED_MODELS,
_get_processor_info,
call_method,
gen_dataset,
gen_models_info,
Expand All @@ -39,3 +40,5 @@
"gen_dataset",
"sklearn_clone_dict",
]

_IS_INTEL = "GenuineIntel" in _get_processor_info()
22 changes: 22 additions & 0 deletions sklearnex/tests/utils/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@
# limitations under the License.
# ==============================================================================

import platform
import subprocess
from functools import partial
from inspect import Parameter, getattr_static, isclass, signature

Expand Down Expand Up @@ -344,3 +346,23 @@ def gen_dataset(
np.uint32,
np.uint64,
]


def _get_processor_info():
proc = ""
if platform.system() == "Linux":
proc = (
subprocess.check_output(["/usr/bin/cat", "/proc/cpuinfo"])
.strip()
.decode("utf-8")
)
elif platform.system() == "Windows":
proc = platform.processor()
elif platform.system() == "Darwin":
proc = (
subprocess.check_output(["/usr/bin/sysctl", "-n", "machdep.cpu.brand_string"])
.strip()
.decode("utf-8")
)

return proc