Skip to content

Commit

Permalink
[MNT] Prepare for Numpy 2.0 and Scikit-Learn 1.5 (#92)
Browse files Browse the repository at this point in the history
* Fixing histogramdd tests

* Fixing median test

* Fixing LinearRegression tests

* Fixing histogram2d test

* Fixing Exponential mechanism test

* Fixing StandardScalar tests

* Removing numpy.core usage and other deprecations for numpy 2.0

* Fixing scikit-learn 1.5.0 bug

* Updating scikit-learn legach tests

* Updating numpy legacy tests
  • Loading branch information
naoise-h authored May 22, 2024
1 parent 2ec5865 commit 1fd735b
Show file tree
Hide file tree
Showing 10 changed files with 37 additions and 36 deletions.
12 changes: 6 additions & 6 deletions .github/workflows/libraries.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,25 @@ jobs:

matrix:
include:
- library: numpy
version: 1.23.5
python-version: '3.10'
- library: numpy
version: 1.24.4
python-version: '3.11'
- library: numpy
version: 1.25.2
python-version: '3.11'
- library: numpy
version: 1.26.4
python-version: '3.11'

- library: scikit-learn
version: 1.1.3
python-version: '3.10'
- library: scikit-learn
version: 1.2.2
python-version: '3.10'
- library: scikit-learn
version: 1.3.2
python-version: '3.11'
- library: scikit-learn
version: 1.4.2
python-version: '3.11'

- library: scipy
version: 1.9.3
Expand Down
2 changes: 1 addition & 1 deletion diffprivlib/models/naive_bayes.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ def _partial_fit(self, X, y, classes=None, _refit=False, sample_weight=None):
classes = self.classes_

unique_y = np.unique(y)
unique_y_in_classes = np.in1d(unique_y, classes)
unique_y_in_classes = np.isin(unique_y, classes)

if not np.all(unique_y_in_classes):
raise ValueError(f"The target label(s) {unique_y[~unique_y_in_classes]} in y do not exist in the initial "
Expand Down
2 changes: 1 addition & 1 deletion diffprivlib/models/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,7 +205,7 @@ def __init__(self, n_components=None, *, epsilon=1.0, data_norm=None, centered=F
def n_features_(self):
return self.n_features_in_

def _fit_full(self, X, n_components):
def _fit_full(self, X, n_components, xp=None, is_array_api_compliant=False):
self.accountant.check(self.epsilon, 0)

random_state = check_random_state(self.random_state)
Expand Down
10 changes: 4 additions & 6 deletions diffprivlib/tools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@
import warnings
from numbers import Integral
import numpy as np
from numpy.core import multiarray as mu
from numpy.core import umath as um

from diffprivlib.accountant import BudgetAccountant
from diffprivlib.mechanisms import LaplaceBoundedDomain, GeometricTruncated, LaplaceTruncated
Expand Down Expand Up @@ -586,12 +584,12 @@ def _std(array, epsilon=1.0, bounds=None, axis=None, dtype=None, keepdims=False,
ret = _var(array, epsilon=epsilon, bounds=bounds, axis=axis, dtype=dtype, keepdims=keepdims,
random_state=random_state, accountant=accountant, nan=nan)

if isinstance(ret, mu.ndarray):
ret = um.sqrt(ret)
if isinstance(ret, np.ndarray):
ret = np.sqrt(ret)
elif hasattr(ret, 'dtype'):
ret = ret.dtype.type(um.sqrt(ret))
ret = ret.dtype.type(np.sqrt(ret))
else:
ret = um.sqrt(ret)
ret = np.sqrt(ret)

return ret

Expand Down
2 changes: 1 addition & 1 deletion tests/mechanisms/test_Exponential.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,7 @@ def test_zero_measure(self):
measure = [1, 1, 0]
utility = [1, 1, 1]
runs = 10000
mech = self.mech(epsilon=1, utility=utility, measure=measure, sensitivity=1)
mech = self.mech(epsilon=1, utility=utility, measure=measure, sensitivity=1, random_state=0)
count = [0] * 3

for i in range(runs):
Expand Down
13 changes: 6 additions & 7 deletions tests/models/test_LinearRegression.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import numpy as np
from unittest import TestCase

import pytest

from diffprivlib.models.linear_regression import LinearRegression
from diffprivlib.utils import PrivacyLeakWarning, DiffprivlibCompatibilityWarning, BudgetError
from diffprivlib.utils import PrivacyLeakWarning, DiffprivlibCompatibilityWarning, BudgetError, check_random_state


class TestLinearRegression(TestCase):
Expand Down Expand Up @@ -58,7 +56,6 @@ def test_large_data(self):

self.assertIsNotNone(clf.fit(X, y))

@pytest.mark.filterwarnings('ignore: numpy.ufunc size changed')
def test_different_results(self):
from sklearn import datasets
from sklearn import linear_model
Expand Down Expand Up @@ -87,17 +84,19 @@ def test_different_results(self):
self.assertFalse(np.all(predict1 == predict2))
self.assertFalse(np.all(predict3 == predict1) and np.all(predict3 == predict2))

@pytest.mark.filterwarnings('ignore: numpy.ufunc size changed')
def test_same_results(self):
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import linear_model

rng = check_random_state(42)

dataset = datasets.load_iris()
X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2)
X_train, X_test, y_train, y_test = train_test_split(dataset.data, dataset.target, test_size=0.2,
random_state=rng)

clf = LinearRegression(epsilon=float("inf"), bounds_X=([4.3, 2.0, 1.0, 0.1], [7.9, 4.4, 6.9, 2.5]),
bounds_y=(0, 2))
bounds_y=(0, 2), random_state=rng)
clf.fit(X_train, y_train)

predict1 = clf.predict(X_test)
Expand Down
17 changes: 9 additions & 8 deletions tests/models/test_StandardScaler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import sklearn.preprocessing as sk_pp

from diffprivlib.models.standard_scaler import StandardScaler
from diffprivlib.utils import PrivacyLeakWarning, DiffprivlibCompatibilityWarning, BudgetError
from diffprivlib.utils import PrivacyLeakWarning, DiffprivlibCompatibilityWarning, BudgetError, check_random_state


class TestStandardScaler(TestCase):
Expand Down Expand Up @@ -65,12 +65,13 @@ def test_inf_epsilon(self):
self.assertTrue(np.all(dp_ss.n_samples_seen_ == sk_ss.n_samples_seen_))

def test_different_results(self):
X = np.random.rand(10, 5)
rng = check_random_state(1)
X = rng.random((10, 5))

ss1 = StandardScaler(bounds=(0, 1))
ss1 = StandardScaler(bounds=(0, 1), random_state=rng)
ss1.fit(X)

ss2 = StandardScaler(bounds=(0, 1))
ss2 = StandardScaler(bounds=(0, 1), random_state=rng)
ss2.fit(X)

self.assertFalse(np.allclose(ss1.mean_, ss2.mean_), "Arrays %s and %s should be different" %
Expand All @@ -88,8 +89,8 @@ def test_functionality(self):
self.assertIsNotNone(ss.fit_transform(X))

def test_similar_results(self):
rng = np.random.RandomState(0)
X = rng.rand(100000, 5)
rng = check_random_state(0)
X = rng.random((100000, 5))

dp_ss = StandardScaler(bounds=(0, 1), epsilon=float("inf"), random_state=rng)
dp_ss.fit(X)
Expand All @@ -104,8 +105,8 @@ def test_similar_results(self):
self.assertTrue(np.all(dp_ss.n_samples_seen_ == sk_ss.n_samples_seen_))

def test_random_state(self):
rng = np.random.RandomState(0)
X = rng.rand(100000, 5)
rng = check_random_state(0)
X = rng.random((100000, 5))

ss0 = StandardScaler(bounds=(0, 1), epsilon=1, random_state=0)
ss1 = StandardScaler(bounds=(0, 1), epsilon=1, random_state=1)
Expand Down
6 changes: 4 additions & 2 deletions tests/tools/test_histogram2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

from diffprivlib.accountant import BudgetAccountant
from diffprivlib.tools.histograms import histogram2d
from diffprivlib.utils import PrivacyLeakWarning, BudgetError
from diffprivlib.utils import PrivacyLeakWarning, BudgetError, check_random_state


class TestHistogram2d(TestCase):
Expand Down Expand Up @@ -60,9 +60,11 @@ def test_different_result(self):
self.assertTrue((hist != dp_hist).any())

def test_density(self):
rng = check_random_state(1)

x = np.array([1, 2, 3, 4, 5])
y = np.array([5, 7, 1, 5, 9])
dp_hist, _, _ = histogram2d(x, y, epsilon=1, bins=3, range=[(0, 10), (0, 10)], density=True)
dp_hist, _, _ = histogram2d(x, y, epsilon=1, bins=3, range=[(0, 10), (0, 10)], density=True, random_state=rng)

# print(dp_hist.sum())

Expand Down
2 changes: 1 addition & 1 deletion tests/tools/test_histogramdd.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_same_edges(self):
def test_different_result(self):
a = np.array([1, 2, 3, 4, 5])
hist, _ = np.histogramdd(a, bins=3, range=[(0, 10)])
dp_hist, _ = histogramdd(a, epsilon=0.1, bins=3, range=[(0, 10)])
dp_hist, _ = histogramdd(a, epsilon=0.1, bins=3, range=[(0, 10)], random_state=0)

# print("Non-private histogram: %s" % hist)
# print("Private histogram: %s" % dp_hist)
Expand Down
7 changes: 4 additions & 3 deletions tests/tools/test_median.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import numpy as np

from diffprivlib.tools.quantiles import median
from diffprivlib.utils import PrivacyLeakWarning, BudgetError
from diffprivlib.utils import PrivacyLeakWarning, BudgetError, check_random_state


class TestMedian(TestCase):
Expand Down Expand Up @@ -57,9 +57,10 @@ def test_output_type(self):
self.assertTrue(isinstance(res, float))

def test_simple(self):
a = np.random.random(1000)
rng = check_random_state(10)
a = rng.random(1000)

res = median(a, epsilon=5, bounds=(0, 1))
res = median(a, epsilon=5, bounds=(0, 1), random_state=rng)
self.assertAlmostEqual(res, 0.5, delta=0.05)

def test_normal(self):
Expand Down

0 comments on commit 1fd735b

Please sign in to comment.