From 31141770abb6323195830f41260f75fa124a9468 Mon Sep 17 00:00:00 2001 From: Doron Behar Date: Mon, 21 Oct 2024 11:49:23 +0300 Subject: [PATCH] unumpy.average: init Ref: https://github.com/lmfit/uncertainties/issues/38 --- CHANGES.rst | 1 + doc/numpy_guide.rst | 23 ++++++++++++ tests/test_unumpy.py | 69 ++++++++++++++++++++++++++++++++++++ uncertainties/unumpy/core.py | 69 ++++++++++++++++++++++++++++++++++++ 4 files changed, 162 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 59b51bf8..f93f4411 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -8,6 +8,7 @@ Fixes: - fix `readthedocs` configuration so that the build passes (#254) - Add `unumpy.covariance_matrix`: A vectorized variant of the pure Python function `covariance_matrix` (#265) +- Add `unumpy.average` to calculate uncertainties aware average (#264) 3.2.2 2024-July-08 ----------------------- diff --git a/doc/numpy_guide.rst b/doc/numpy_guide.rst index f350765f..bac88f52 100644 --- a/doc/numpy_guide.rst +++ b/doc/numpy_guide.rst @@ -144,6 +144,29 @@ functions is available in the documentation for :mod:`uncertainties.umath`. .. index:: pair: testing and operations (in arrays); NaN +Uncertainties aware average +--------------------------- + +If you have measured a certain value multiple times, with a different +uncertainty every measurement. Averaging over the results in a manner aware of +the different uncertainties, is not trivial. The function ``unumpy.average()`` +does that: + +>>> measurements = numpy.array([2.1, 2.0, 2.05, 2.08, 2.02]) +>>> stds = numpy.array([0.05, 0.03, 0.04, 0.06, 0.05]) +>>> arr = unumpy.uarray(measurements, stds) +>>> unumpy.average(arr) +2.03606+/-0.019 + +Note how that function gives a value different from numpy's ``mean`` function: + +>>> numpy.mean(arr) +2.050+/-0.019 + +If you have an array with correlated values, the covariances will be considered +as well. You can also specify an ``axes`` argument, to specify a certain axis +or a tuple of axes upon which to average the result. + NaN testing and NaN-aware operations ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ diff --git a/tests/test_unumpy.py b/tests/test_unumpy.py index 783fd336..f1d1d703 100644 --- a/tests/test_unumpy.py +++ b/tests/test_unumpy.py @@ -1,3 +1,5 @@ +import pytest + try: import numpy except ImportError: @@ -300,3 +302,70 @@ def test_array_comparisons(): # For matrices, 1D arrays are converted to 2D arrays: mat = unumpy.umatrix([1, 2], [1, 4]) assert numpy.all((mat == [mat[0, 0], 4]) == [True, False]) + + +class TestAverage: + arr1d = unumpy.uarray( + [2.1, 2.0, 2.05, 2.08, 2.02], + [0.05, 0.03, 0.04, 0.06, 0.05], + ) + + def __init__(self): + sigma2d = 0.3 + means2d = np.linspace(4, 20, num=50).reshape(5, 10) + self.arr2d = unumpy.uarray( + np.random.normal(loc=means, scale=sigma2d), + np.random.uniform(low=0, high=sigma2d), + ) + meansNd = np.random.rand(4, 7, 5, 2, 10, 14) + 10 + self.arrNd = unumpy.uarray( + meansNd, np.random.uniform(low=0, high=0.2, size=meansNd.shape) + ) + + def test_average_type_check(): + with pytest.raises(ValueError): + unumpy.average(numpy.array(["bla"])) + + def test_average_example(): + """Tests the example from the docs.""" + avg = unumpy.average(self.arr1d) + assert np.isclose(avg.n, 2.0360612043435338) + assert np.isclose(avg.s, 0.018851526708200846) + + @pytest.mark.parametrize("invalid_axis", [1, 2, 3]) + def test_average1d_invalid_axes(invalid_axis): + with pytest.raises(ValueError): + unumpy.average(self.arr1d, axes=invalid_axis) + + @pytest.mark.parametrize("invalid_axis", [2, 3]) + def test_average2d_invalid_axes(invalid_axis): + with pytest.raises(ValueError): + unumpy.average(self.arr1d, axes=invalid_axis) + + @pytest.mark.parametrize( + "expected_shape, axis_argument", + [ + ((), None), + # According to the linspace reshape in __init__ + ((5,), 1), + ((5,), (1,)), + ((10,), 0), + ((10,), (0,)), + ], + ) + def test_average2d_shape(expected_shape, axis_argument): + assert unumpy.average(self.arr2d, axes=axis_argument).shape == expected_shape + + @pytest.mark.parametrize( + "expected_shape, axis_argument", + [ + ((), None), + # According to random.rand() argument in __init__ + ((4, 5, 10), (1, 3, 5)), + ((10,), (0, 1, 2, 3, 4, 6)), + ((14,), (0, 1, 2, 3, 4, 5)), + ((7, 2), (0, 2, 4, 5, 6)), + ], + ) + def test_averageNd_shape(expected_shape, axis_argument): + assert unumpy.average(self.arrNd, axes=axis_argument).shape == expected_shape diff --git a/uncertainties/unumpy/core.py b/uncertainties/unumpy/core.py index 8e28ccf2..40e4f668 100644 --- a/uncertainties/unumpy/core.py +++ b/uncertainties/unumpy/core.py @@ -31,10 +31,79 @@ "nominal_values", "covariance_matrix", "std_devs", + "average", # Classes: "matrix", ] + +def _average(arr): + """ + The real implementation of average, for 1D arrays. + """ + assert arr.ndim == 1 + cov_matrix = covariance_matrix(arr) + weights = numpy.diagonal(cov_matrix) ** -1 + weights_sum = weights.sum() + return uarray( + (nominal_values(arr) * weights).sum() / weights_sum, + numpy.sqrt( + numpy.einsum( + "i,ij,j", + weights, + cov_matrix, + weights, + ) + ) / weights_sum, + ) + +def average(arr, axes=None): + """ + Return a weighted averaged along with a weighted mean over a certain axis + or a axes. The formulas implemented by this are: + + $$ \\mu = \frac{\\sum_i (x_i/\\sigma_i^2)}{\\sum_i \\sigma_i^{-2}}$$ + + $$\\sigma_\\mu = \\frac{\\sqrt{\\sum_{i,j} \\sigma_i^{-2} \\sigma_j^{-2} \\cdot Cov(x_i, x_j)}}{\\sum_i \\sigma_i^{-2}}$$ + + Where of course $Cov(x_i, x_i) = \\sigma_i^2$. + + By default, (when ``axes=None``), it flattens the given array first and + then applies the above equations. When axes is a list or tuple, it applies + the same formula over each axis in a loop, and hence correlations between + values in different axes are not taken into account. + """ + # NOTE regarding the above implementation disclaimer: Ideally we could have + # taken the (2N)-D shaped covariance_matrix(arr) and worked with that, but + # it is not very clear how to do that truely correctly. + arr = np.asanyarray(arr) + if axes is None: + axes = [0] + arr = arr.flatten() + # The following sanity checks on axes similar to what Numpy 2.x's + # lib.array_utils.normalize_axis_tuple do. + elif not isinstance(axes, [tuple, list]): + axes = [operator.index(axes)] + else: + axes = tuple(axes) + if len(set(axes)) != len(axes): + raise ValueError("repeated axis found in axes argument") + # This one is not checked by np.lib.array_utils.normalize_axis_tuple + if max(axes) >= arr.ndim: + raise ValueError( + f"Cannot average over an inexistent axis {max(axes)} >= arr.ndim = " + f"{arr.ndim}" + ) + if not isinstance(arr.flat[0], core.Variable): + raise ValueError( + "unumpy.average is meant to operate upon numpy arrays of ufloats, " + "not pure numpy arrays" + ) + for axis in sorted(axes, reverse=True): + arr = numpy.apply_over_axis(_average, axis, arr) + return arr + + ############################################################################### # Utilities: