Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

unumpy.average: init #265

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ Unreleased
Fixes:

- fix `readthedocs` configuration so that the build passes (#254)
- Add `unumpy.covariance_matrix`: A vectorized variant of the pure Python function `covariance_matrix` (#265)
- Add `unumpy.average` to calculate uncertainties aware average (#264)

3.2.2 2024-July-08
-----------------------
Expand Down
23 changes: 23 additions & 0 deletions doc/numpy_guide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -144,6 +144,29 @@ functions is available in the documentation for :mod:`uncertainties.umath`.
.. index::
pair: testing and operations (in arrays); NaN

Uncertainties aware average
---------------------------

If you have measured a certain value multiple times, with a different
uncertainty every measurement. Averaging over the results in a manner aware of
the different uncertainties, is not trivial. The function ``unumpy.average()``
does that:

>>> measurements = numpy.array([2.1, 2.0, 2.05, 2.08, 2.02])
>>> stds = numpy.array([0.05, 0.03, 0.04, 0.06, 0.05])
>>> arr = unumpy.uarray(measurements, stds)
>>> unumpy.average(arr)
2.03606+/-0.019

Note how that function gives a value different from numpy's ``mean`` function:

>>> numpy.mean(arr)
2.050+/-0.019

If you have an array with correlated values, the covariances will be considered
as well. You can also specify an ``axes`` argument, to specify a certain axis
or a tuple of axes upon which to average the result.

NaN testing and NaN-aware operations
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

Expand Down
69 changes: 69 additions & 0 deletions tests/test_unumpy.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import pytest

try:
import numpy
except ImportError:
Expand Down Expand Up @@ -300,3 +302,70 @@ def test_array_comparisons():
# For matrices, 1D arrays are converted to 2D arrays:
mat = unumpy.umatrix([1, 2], [1, 4])
assert numpy.all((mat == [mat[0, 0], 4]) == [True, False])


class TestAverage:
arr1d = unumpy.uarray(
[2.1, 2.0, 2.05, 2.08, 2.02],
[0.05, 0.03, 0.04, 0.06, 0.05],
)

def __init__(self):
sigma2d = 0.3
means2d = np.linspace(4, 20, num=50).reshape(5, 10)
self.arr2d = unumpy.uarray(
np.random.normal(loc=means, scale=sigma2d),
np.random.uniform(low=0, high=sigma2d),
)
meansNd = np.random.rand(4, 7, 5, 2, 10, 14) + 10
self.arrNd = unumpy.uarray(
meansNd, np.random.uniform(low=0, high=0.2, size=meansNd.shape)
)

def test_average_type_check():
with pytest.raises(ValueError):
unumpy.average(numpy.array(["bla"]))

def test_average_example():
"""Tests the example from the docs."""
avg = unumpy.average(self.arr1d)
assert np.isclose(avg.n, 2.0360612043435338)
assert np.isclose(avg.s, 0.018851526708200846)

@pytest.mark.parametrize("invalid_axis", [1, 2, 3])
def test_average1d_invalid_axes(invalid_axis):
with pytest.raises(ValueError):
unumpy.average(self.arr1d, axes=invalid_axis)

@pytest.mark.parametrize("invalid_axis", [2, 3])
def test_average2d_invalid_axes(invalid_axis):
with pytest.raises(ValueError):
unumpy.average(self.arr1d, axes=invalid_axis)

@pytest.mark.parametrize(
"expected_shape, axis_argument",
[
((), None),
# According to the linspace reshape in __init__
((5,), 1),
((5,), (1,)),
((10,), 0),
((10,), (0,)),
],
)
def test_average2d_shape(expected_shape, axis_argument):
assert unumpy.average(self.arr2d, axes=axis_argument).shape == expected_shape

@pytest.mark.parametrize(
"expected_shape, axis_argument",
[
((), None),
# According to random.rand() argument in __init__
((4, 5, 10), (1, 3, 5)),
((10,), (0, 1, 2, 3, 4, 6)),
((14,), (0, 1, 2, 3, 4, 5)),
((7, 2), (0, 2, 4, 5, 6)),
],
)
def test_averageNd_shape(expected_shape, axis_argument):
assert unumpy.average(self.arrNd, axes=axis_argument).shape == expected_shape
82 changes: 82 additions & 0 deletions uncertainties/unumpy/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,81 @@
"umatrix",
# Utilities:
"nominal_values",
"covariance_matrix",
"std_devs",
"average",
# Classes:
"matrix",
]


def _average(arr):
"""
The real implementation of average, for 1D arrays.
"""
assert arr.ndim == 1
cov_matrix = covariance_matrix(arr)
weights = numpy.diagonal(cov_matrix) ** -1
weights_sum = weights.sum()
return uarray(
(nominal_values(arr) * weights).sum() / weights_sum,
numpy.sqrt(
numpy.einsum(
"i,ij,j",
weights,
cov_matrix,
weights,
)
) / weights_sum,
)

def average(arr, axes=None):
"""
Return a weighted averaged along with a weighted mean over a certain axis
or a axes. The formulas implemented by this are:

$$ \\mu = \frac{\\sum_i (x_i/\\sigma_i^2)}{\\sum_i \\sigma_i^{-2}}$$

$$\\sigma_\\mu = \\frac{\\sqrt{\\sum_{i,j} \\sigma_i^{-2} \\sigma_j^{-2} \\cdot Cov(x_i, x_j)}}{\\sum_i \\sigma_i^{-2}}$$

Where of course $Cov(x_i, x_i) = \\sigma_i^2$.

By default, (when ``axes=None``), it flattens the given array first and
then applies the above equations. When axes is a list or tuple, it applies
the same formula over each axis in a loop, and hence correlations between
values in different axes are not taken into account.
"""
# NOTE regarding the above implementation disclaimer: Ideally we could have
# taken the (2N)-D shaped covariance_matrix(arr) and worked with that, but
# it is not very clear how to do that truely correctly.
arr = np.asanyarray(arr)
if axes is None:
axes = [0]
arr = arr.flatten()
# The following sanity checks on axes similar to what Numpy 2.x's
# lib.array_utils.normalize_axis_tuple do.
elif not isinstance(axes, [tuple, list]):
axes = [operator.index(axes)]
else:
axes = tuple(axes)
if len(set(axes)) != len(axes):
raise ValueError("repeated axis found in axes argument")
# This one is not checked by np.lib.array_utils.normalize_axis_tuple
if max(axes) >= arr.ndim:
raise ValueError(
f"Cannot average over an inexistent axis {max(axes)} >= arr.ndim = "
f"{arr.ndim}"
)
if not isinstance(arr.flat[0], core.Variable):
raise ValueError(
"unumpy.average is meant to operate upon numpy arrays of ufloats, "
"not pure numpy arrays"
)
for axis in sorted(axes, reverse=True):
arr = numpy.apply_over_axis(_average, axis, arr)
Comment on lines +102 to +103
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Hmm on a 2nd thought, perhaps this means that e.g for A with shape=(3,4,5,7), and axes=(1,3), if there are correlations between A[:,0,:,0] and A[:,1,:,1] are not taken into account? Because once axis=1 is averaged, the covariance_matrix call for the 1d slices of axis 1 won't take into account the correlations to the values already averaged on axis 3...

I think we can live with that, but perhaps warn the users about it in the function's doc, or elsewhere. Unless of course someone here will think of a better way to implement this.

return arr


###############################################################################
# Utilities:

Expand Down Expand Up @@ -70,6 +140,18 @@
),
)

def covariance_matrix(a):
"""
Return an array of covariances of given array's numbers with uncertainties,
given shape ``(d0,d1,d2,...,dN)``, the output shape is
``(d0,d1,d2,...,dN,d0,d1,d2,...,dN)``.
"""
arr = numpy.array(a)
return numpy.reshape(
uncert_core.covariance_matrix(arr.ravel()),
arr.shape*2,
)


def unumpy_to_numpy_matrix(arr):
"""
Expand Down