Skip to content

Commit

Permalink
[BUG] Fix bounds calculation in nanmean, etc. (#90)
Browse files Browse the repository at this point in the history
* Fixing bounds retrieval for nan arrays

* Adding tests for new bounds retrieval

* Update README.md
  • Loading branch information
naoise-h authored Aug 31, 2023
1 parent dff17d0 commit 0e6cea9
Show file tree
Hide file tree
Showing 7 changed files with 44 additions and 24 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Diffprivlib v0.6

[![Python versions](https://img.shields.io/pypi/pyversions/diffprivlib.svg)](https://pypi.org/project/diffprivlib/)
[![Downloads](https://pepy.tech/badge/diffprivlib)](https://pepy.tech/project/diffprivlib)
[![Downloads](https://static.pepy.tech/badge/diffprivlib)](https://pepy.tech/project/diffprivlib)
[![PyPi version](https://img.shields.io/pypi/v/diffprivlib.svg)](https://pypi.org/project/diffprivlib/)
[![PyPi status](https://img.shields.io/pypi/status/diffprivlib.svg)](https://pypi.org/project/diffprivlib/)
[![General tests](https://github.com/IBM/differential-privacy-library/actions/workflows/general.yml/badge.svg)](https://github.com/IBM/differential-privacy-library/actions/workflows/general.yml)
Expand Down
6 changes: 3 additions & 3 deletions diffprivlib/tools/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,7 +276,7 @@ def _mean(array, epsilon=1.0, bounds=None, axis=None, dtype=None, keepdims=False
warnings.warn("Bounds have not been specified and will be calculated on the data provided. This will "
"result in additional privacy leakage. To ensure differential privacy and no additional "
"privacy leakage, specify bounds for each dimension.", PrivacyLeakWarning)
bounds = (np.min(array), np.max(array))
bounds = (np.nanmin(array), np.nanmax(array))

if axis is not None or keepdims:
return _wrap_axis(_mean, array, epsilon=epsilon, bounds=bounds, axis=axis, dtype=dtype, keepdims=keepdims,
Expand Down Expand Up @@ -432,7 +432,7 @@ def _var(array, epsilon=1.0, bounds=None, axis=None, dtype=None, keepdims=False,
warnings.warn("Bounds have not been specified and will be calculated on the data provided. This will "
"result in additional privacy leakage. To ensure differential privacy and no additional "
"privacy leakage, specify bounds for each dimension.", PrivacyLeakWarning)
bounds = (np.min(array), np.max(array))
bounds = (np.nanmin(array), np.nanmax(array))

if axis is not None or keepdims:
return _wrap_axis(_var, array, epsilon=epsilon, bounds=bounds, axis=axis, dtype=dtype, keepdims=keepdims,
Expand Down Expand Up @@ -720,7 +720,7 @@ def _sum(array, epsilon=1.0, bounds=None, axis=None, dtype=None, keepdims=False,
warnings.warn("Bounds have not been specified and will be calculated on the data provided. This will "
"result in additional privacy leakage. To ensure differential privacy and no additional "
"privacy leakage, specify bounds for each dimension.", PrivacyLeakWarning)
bounds = (np.min(array), np.max(array))
bounds = (np.nanmin(array), np.nanmax(array))

if axis is not None or keepdims:
return _wrap_axis(_sum, array, epsilon=epsilon, bounds=bounds, axis=axis, dtype=dtype, keepdims=keepdims,
Expand Down
14 changes: 9 additions & 5 deletions tests/tools/test_nanmean.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,18 @@ def test_no_epsilon(self):
self.assertIsNotNone(nanmean(a, bounds=(0, 1)))

def test_no_bounds(self):
a = np.array([1, 2, 3])
a = np.array([1, 2, 3, np.nan])
with self.assertWarns(PrivacyLeakWarning):
res = nanmean(a, epsilon=1)
self.assertIsNotNone(res)

def test_bad_bounds(self):
a = np.array([1, 2, 3])
a = np.array([1, 2, 3, np.nan])
with self.assertRaises(ValueError):
nanmean(a, epsilon=1, bounds=(0, -1))

def test_missing_bounds(self):
a = np.array([1, 2, 3])
a = np.array([1, 2, 3, np.nan])
with self.assertWarns(PrivacyLeakWarning):
res = nanmean(a, epsilon=1, bounds=None)
self.assertIsNotNone(res)
Expand All @@ -54,8 +54,8 @@ def test_large_epsilon_axis(self):
self.assertAlmostEqual(res[i], res_dp[i], delta=0.01)

def test_array_like(self):
self.assertIsNotNone(nanmean([1, 2, 3], bounds=(1, 3)))
self.assertIsNotNone(nanmean((1, 2, 3), bounds=(1, 3)))
self.assertIsNotNone(nanmean([1, 2, 3, np.nan], bounds=(1, 3)))
self.assertIsNotNone(nanmean((1, 2, 3, np.nan), bounds=(1, 3)))

def test_clipped_output(self):
a = np.random.random((10,))
Expand All @@ -70,6 +70,10 @@ def test_nan(self):
res = nanmean(a, bounds=(0, 1))
self.assertFalse(np.isnan(res))

with self.assertWarns(PrivacyLeakWarning):
res = nanmean(a)
self.assertFalse(np.isnan(res))

def test_accountant(self):
from diffprivlib.accountant import BudgetAccountant
acc = BudgetAccountant(1.5, 0)
Expand Down
14 changes: 9 additions & 5 deletions tests/tools/test_nanstd.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,17 @@ def test_no_epsilon(self):
self.assertIsNotNone(nanstd(a, bounds=(0, 1)))

def test_no_bounds(self):
a = np.array([1, 2, 3])
a = np.array([1, 2, 3, np.nan])
with self.assertWarns(PrivacyLeakWarning):
nanstd(a, epsilon=1)

def test_bad_bounds(self):
a = np.array([1, 2, 3])
a = np.array([1, 2, 3, np.nan])
with self.assertRaises(ValueError):
nanstd(a, epsilon=1, bounds=(0, -1))

def test_missing_bounds(self):
a = np.array([1, 2, 3])
a = np.array([1, 2, 3, np.nan])
with self.assertWarns(PrivacyLeakWarning):
res = nanstd(a, 1, None)
self.assertIsNotNone(res)
Expand All @@ -53,8 +53,8 @@ def test_large_epsilon_axis(self):
self.assertAlmostEqual(res[i], res_dp[i], delta=0.01)

def test_array_like(self):
self.assertIsNotNone(nanstd([1, 2, 3], bounds=(1, 3)))
self.assertIsNotNone(nanstd((1, 2, 3), bounds=(1, 3)))
self.assertIsNotNone(nanstd([1, 2, 3, np.nan], bounds=(1, 3)))
self.assertIsNotNone(nanstd((1, 2, 3, np.nan), bounds=(1, 3)))

def test_clipped_output(self):
a = np.random.random((10,))
Expand All @@ -70,6 +70,10 @@ def test_nan(self):
res = nanstd(a, bounds=(0, 1))
self.assertFalse(np.isnan(res))

with self.assertWarns(PrivacyLeakWarning):
res = nanstd(a)
self.assertFalse(np.isnan(res))

def test_accountant(self):
from diffprivlib.accountant import BudgetAccountant
acc = BudgetAccountant(1.5, 0)
Expand Down
14 changes: 9 additions & 5 deletions tests/tools/test_nansum.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,18 +22,18 @@ def test_no_epsilon(self):
self.assertIsNotNone(nansum(a, bounds=(1, 3)))

def test_no_bounds(self):
a = np.array([1, 2, 3])
a = np.array([1, 2, 3, np.nan])
with self.assertWarns(PrivacyLeakWarning):
res = nansum(a, epsilon=1)
self.assertIsNotNone(res)

def test_mis_ordered_bounds(self):
a = np.array([1, 2, 3])
a = np.array([1, 2, 3, np.nan])
with self.assertRaises(ValueError):
nansum(a, epsilon=1, bounds=(1, 0))

def test_missing_bounds(self):
a = np.array([1, 2, 3])
a = np.array([1, 2, 3, np.nan])
with self.assertWarns(PrivacyLeakWarning):
res = nansum(a, epsilon=1, bounds=None)
self.assertIsNotNone(res)
Expand All @@ -53,8 +53,8 @@ def test_large_epsilon(self):
self.assertAlmostEqual(res, res_dp, delta=0.01 * res)

def test_array_like(self):
self.assertIsNotNone(nansum([1, 2, 3], bounds=(1, 3)))
self.assertIsNotNone(nansum((1, 2, 3), bounds=(1, 3)))
self.assertIsNotNone(nansum([1, 2, 3, np.nan], bounds=(1, 3)))
self.assertIsNotNone(nansum((1, 2, 3, np.nan), bounds=(1, 3)))

def test_axis(self):
a = np.random.random((1000, 5))
Expand Down Expand Up @@ -83,6 +83,10 @@ def test_nan(self):
res = nansum(a, bounds=(0, 1))
self.assertFalse(np.isnan(res))

with self.assertWarns(PrivacyLeakWarning):
res = nansum(a)
self.assertFalse(np.isnan(res))

a = np.array([np.nan] * 10)
res = nansum(a, epsilon=float("inf"), bounds=(0, 1))
self.assertEqual(0, res)
Expand Down
14 changes: 9 additions & 5 deletions tests/tools/test_nanvar.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,17 +22,17 @@ def test_no_epsilon(self):
self.assertIsNotNone(nanvar(a, bounds=(0, 1)))

def test_no_bounds(self):
a = np.array([1, 2, 3])
a = np.array([1, 2, 3, np.nan])
with self.assertWarns(PrivacyLeakWarning):
nanvar(a, epsilon=1)

def test_bad_bounds(self):
a = np.array([1, 2, 3])
a = np.array([1, 2, 3, np.nan])
with self.assertRaises(ValueError):
nanvar(a, epsilon=1, bounds=(0, -1))

def test_missing_bounds(self):
a = np.array([1, 2, 3])
a = np.array([1, 2, 3, np.nan])
with self.assertWarns(PrivacyLeakWarning):
res = nanvar(a, 1, None)
self.assertIsNotNone(res)
Expand All @@ -53,8 +53,8 @@ def test_large_epsilon_axis(self):
self.assertAlmostEqual(res[i], res_dp[i], delta=0.01)

def test_array_like(self):
self.assertIsNotNone(nanvar([1, 2, 3], bounds=(1, 3)))
self.assertIsNotNone(nanvar((1, 2, 3), bounds=(1, 3)))
self.assertIsNotNone(nanvar([1, 2, 3, np.nan], bounds=(1, 3)))
self.assertIsNotNone(nanvar((1, 2, 3, np.nan), bounds=(1, 3)))

def test_clipped_output(self):
a = np.random.random((10,))
Expand All @@ -70,6 +70,10 @@ def test_nan(self):
res = nanvar(a, bounds=(0, 1))
self.assertFalse(np.isnan(res))

with self.assertWarns(PrivacyLeakWarning):
res = nanvar(a)
self.assertFalse(np.isnan(res))

def test_accountant(self):
from diffprivlib.accountant import BudgetAccountant
acc = BudgetAccountant(1.5, 0)
Expand Down
4 changes: 4 additions & 0 deletions tests/tools/test_var.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,10 @@ def test_nan(self):
res = var(a, bounds=(0, 1))
self.assertTrue(np.isnan(res))

with self.assertWarns(PrivacyLeakWarning):
res = var(a)
self.assertTrue(np.isnan(res))

def test_accountant(self):
from diffprivlib.accountant import BudgetAccountant
acc = BudgetAccountant(1.5, 0)
Expand Down

0 comments on commit 0e6cea9

Please sign in to comment.