[MNT] Prepare for Numpy 2.0 and Scikit-Learn 1.5 (#92)

* Fixing histogramdd tests * Fixing median test * Fixing LinearRegression tests * Fixing histogram2d test * Fixing Exponential mechanism test * Fixing StandardScalar tests * Removing numpy.core usage and other deprecations for numpy 2.0 * Fixing scikit-learn 1.5.0 bug * Updating scikit-learn legach tests * Updating numpy legacy tests
IBM · May 22, 2024 · 1fd735b · 1fd735b
1 parent 2ec5865
commit 1fd735b
Show file tree

Hide file tree

Showing 10 changed files with 37 additions and 36 deletions.
diff --git a/.github/workflows/libraries.yml b/.github/workflows/libraries.yml
@@ -19,25 +19,25 @@ jobs:
 
       matrix:
         include:
-          - library: numpy
-            version: 1.23.5
-            python-version: '3.10'
           - library: numpy
             version: 1.24.4
             python-version: '3.11'
           - library: numpy
             version: 1.25.2
             python-version: '3.11'
+          - library: numpy
+            version: 1.26.4
+            python-version: '3.11'
 
-          - library: scikit-learn
-            version: 1.1.3
-            python-version: '3.10'
           - library: scikit-learn
             version: 1.2.2
             python-version: '3.10'
           - library: scikit-learn
             version: 1.3.2
             python-version: '3.11'
+          - library: scikit-learn
+            version: 1.4.2
+            python-version: '3.11'
 
           - library: scipy
             version: 1.9.3

diff --git a/diffprivlib/models/naive_bayes.py b/diffprivlib/models/naive_bayes.py
@@ -148,7 +148,7 @@ def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None):
         classes = self.classes_
 
         unique_y = np.unique(y)
-        unique_y_in_classes = np.in1d(unique_y, classes)
+        unique_y_in_classes = np.isin(unique_y, classes)
 
         if not np.all(unique_y_in_classes):
             raise ValueError(f"The target label(s) {unique_y[~unique_y_in_classes]} in y do not exist in the initial "

diff --git a/diffprivlib/models/pca.py b/diffprivlib/models/pca.py
@@ -205,7 +205,7 @@ def __init__(self, n_components=None, *, epsilon=1.0, data_norm=None, centered=F
     def n_features_(self):
         return self.n_features_in_
 
-    def _fit_full(self, X, n_components):
+    def _fit_full(self, X, n_components, xp=None, is_array_api_compliant=False):
         self.accountant.check(self.epsilon, 0)
 
         random_state = check_random_state(self.random_state)

diff --git a/diffprivlib/tools/utils.py b/diffprivlib/tools/utils.py
@@ -45,8 +45,6 @@
 import warnings
 from numbers import Integral
 import numpy as np
-from numpy.core import multiarray as mu
-from numpy.core import umath as um
 
 from diffprivlib.accountant import BudgetAccountant
 from diffprivlib.mechanisms import LaplaceBoundedDomain, GeometricTruncated, LaplaceTruncated
@@ -586,12 +584,12 @@ def _std(array, epsilon=1.0, bounds=None, axis=None, dtype=None, keepdims=False,
     ret = _var(array, epsilon=epsilon, bounds=bounds, axis=axis, dtype=dtype, keepdims=keepdims,
                random_state=random_state, accountant=accountant, nan=nan)
 
-    if isinstance(ret, mu.ndarray):
-        ret = um.sqrt(ret)
+    if isinstance(ret, np.ndarray):
+        ret = np.sqrt(ret)
     elif hasattr(ret, 'dtype'):
-        ret = ret.dtype.type(um.sqrt(ret))
+        ret = ret.dtype.type(np.sqrt(ret))
     else:
-        ret = um.sqrt(ret)
+        ret = np.sqrt(ret)
 
     return ret
 

diff --git a/tests/mechanisms/test_Exponential.py b/tests/mechanisms/test_Exponential.py
@@ -133,7 +133,7 @@ def test_zero_measure(self):
         measure = [1, 1, 0]
         utility = [1, 1, 1]
         runs = 10000
-        mech = self.mech(epsilon=1, utility=utility, measure=measure, sensitivity=1)
+        mech = self.mech(epsilon=1, utility=utility, measure=measure, sensitivity=1, random_state=0)
         count = [0] * 3
 
         for i in range(runs):

diff --git a/tests/models/test_LinearRegression.py b/tests/models/test_LinearRegression.py
@@ -1,10 +1,8 @@
 import numpy as np
 from unittest import TestCase
 
-import pytest
-
 from diffprivlib.models.linear_regression import LinearRegression
-from diffprivlib.utils import PrivacyLeakWarning, DiffprivlibCompatibilityWarning, BudgetError
+from diffprivlib.utils import PrivacyLeakWarning, DiffprivlibCompatibilityWarning, BudgetError, check_random_state
 
 
 class TestLinearRegression(TestCase):
@@ -58,7 +56,6 @@ def test_large_data(self):
 
         self.assertIsNotNone(clf.fit(X, y))
 
-    @pytest.mark.filterwarnings('ignore: numpy.ufunc size changed')
     def test_different_results(self):
         from sklearn import datasets
         from sklearn import linear_model
@@ -87,17 +84,19 @@ def test_different_results(self):
         self.assertFalse(np.all(predict1 == predict2))
         self.assertFalse(np.all(predict3 == predict1) and np.all(predict3 == predict2))
 
-    @pytest.mark.filterwarnings('ignore: numpy.ufunc size changed')
     def test_same_results(self):
         from sklearn import datasets
         from sklearn.model_selection import train_test_split
         from sklearn import linear_model
 
+        rng = check_random_state(42)
+
         dataset = datasets.load_iris()
-        X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)
+        X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2,
+                                                            random_state=rng)
 
         clf = LinearRegression(epsilon=float("inf"), bounds_X=([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5]),
-                               bounds_y=(0, 2))
+                               bounds_y=(0, 2), random_state=rng)
         clf.fit(X_train, y_train)
 
         predict1 = clf.predict(X_test)

diff --git a/tests/models/test_StandardScaler.py b/tests/models/test_StandardScaler.py
@@ -4,7 +4,7 @@
 import sklearn.preprocessing as sk_pp
 
 from diffprivlib.models.standard_scaler import StandardScaler
-from diffprivlib.utils import PrivacyLeakWarning, DiffprivlibCompatibilityWarning, BudgetError
+from diffprivlib.utils import PrivacyLeakWarning, DiffprivlibCompatibilityWarning, BudgetError, check_random_state
 
 
 class TestStandardScaler(TestCase):
@@ -65,12 +65,13 @@ def test_inf_epsilon(self):
         self.assertTrue(np.all(dp_ss.n_samples_seen_ == sk_ss.n_samples_seen_))
 
     def test_different_results(self):
-        X = np.random.rand(10, 5)
+        rng = check_random_state(1)
+        X = rng.random((10, 5))
 
-        ss1 = StandardScaler(bounds=(0, 1))
+        ss1 = StandardScaler(bounds=(0, 1), random_state=rng)
         ss1.fit(X)
 
-        ss2 = StandardScaler(bounds=(0, 1))
+        ss2 = StandardScaler(bounds=(0, 1), random_state=rng)
         ss2.fit(X)
 
         self.assertFalse(np.allclose(ss1.mean_, ss2.mean_), "Arrays %s and %s should be different" %
@@ -88,8 +89,8 @@ def test_functionality(self):
         self.assertIsNotNone(ss.fit_transform(X))
 
     def test_similar_results(self):
-        rng = np.random.RandomState(0)
-        X = rng.rand(100000, 5)
+        rng = check_random_state(0)
+        X = rng.random((100000, 5))
 
         dp_ss = StandardScaler(bounds=(0, 1), epsilon=float("inf"), random_state=rng)
         dp_ss.fit(X)
@@ -104,8 +105,8 @@ def test_similar_results(self):
         self.assertTrue(np.all(dp_ss.n_samples_seen_ == sk_ss.n_samples_seen_))
 
     def test_random_state(self):
-        rng = np.random.RandomState(0)
-        X = rng.rand(100000, 5)
+        rng = check_random_state(0)
+        X = rng.random((100000, 5))
 
         ss0 = StandardScaler(bounds=(0, 1), epsilon=1, random_state=0)
         ss1 = StandardScaler(bounds=(0, 1), epsilon=1, random_state=1)

diff --git a/tests/tools/test_histogram2d.py b/tests/tools/test_histogram2d.py
@@ -3,7 +3,7 @@
 
 from diffprivlib.accountant import BudgetAccountant
 from diffprivlib.tools.histograms import histogram2d
-from diffprivlib.utils import PrivacyLeakWarning, BudgetError
+from diffprivlib.utils import PrivacyLeakWarning, BudgetError, check_random_state
 
 
 class TestHistogram2d(TestCase):
@@ -60,9 +60,11 @@ def test_different_result(self):
         self.assertTrue((hist != dp_hist).any())
 
     def test_density(self):
+        rng = check_random_state(1)
+
         x = np.array([1, 2, 3, 4, 5])
         y = np.array([5, 7, 1, 5, 9])
-        dp_hist, _, _ = histogram2d(x, y, epsilon=1, bins=3, range=[(0, 10), (0, 10)], density=True)
+        dp_hist, _, _ = histogram2d(x, y, epsilon=1, bins=3, range=[(0, 10), (0, 10)], density=True, random_state=rng)
 
         # print(dp_hist.sum())
 

diff --git a/tests/tools/test_histogramdd.py b/tests/tools/test_histogramdd.py
@@ -35,7 +35,7 @@ def test_same_edges(self):
     def test_different_result(self):
         a = np.array([1, 2, 3, 4, 5])
         hist, _ = np.histogramdd(a, bins=3, range=[(0, 10)])
-        dp_hist, _ = histogramdd(a, epsilon=0.1, bins=3, range=[(0, 10)])
+        dp_hist, _ = histogramdd(a, epsilon=0.1, bins=3, range=[(0, 10)], random_state=0)
 
         # print("Non-private histogram: %s" % hist)
         # print("Private histogram: %s" % dp_hist)

diff --git a/tests/tools/test_median.py b/tests/tools/test_median.py
@@ -3,7 +3,7 @@
 import numpy as np
 
 from diffprivlib.tools.quantiles import median
-from diffprivlib.utils import PrivacyLeakWarning, BudgetError
+from diffprivlib.utils import PrivacyLeakWarning, BudgetError, check_random_state
 
 
 class TestMedian(TestCase):
@@ -57,9 +57,10 @@ def test_output_type(self):
         self.assertTrue(isinstance(res, float))
 
     def test_simple(self):
-        a = np.random.random(1000)
+        rng = check_random_state(10)
+        a = rng.random(1000)
 
-        res = median(a, epsilon=5, bounds=(0, 1))
+        res = median(a, epsilon=5, bounds=(0, 1), random_state=rng)
         self.assertAlmostEqual(res, 0.5, delta=0.05)
 
     def test_normal(self):