Skip to content

Commit

Permalink
Merge pull request #49 from scipp/event-broadcast
Browse files Browse the repository at this point in the history
Event broadcast with uncertainty upper-bounds
  • Loading branch information
SimonHeybrock authored Jul 8, 2024
2 parents 9a6e771 + ed0a6e6 commit 5f38ebf
Show file tree
Hide file tree
Showing 2 changed files with 162 additions and 24 deletions.
75 changes: 51 additions & 24 deletions src/ess/reduce/uncertainty.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@
"""

from enum import Enum
from typing import Dict, TypeVar, Union, overload
from typing import TypeVar, overload

import numpy as np
import scipp as sc
from scipp.core.concepts import irreducible_mask

T = TypeVar("T", bound=Union[sc.Variable, sc.DataArray])
T = TypeVar("T", bound=sc.Variable | sc.DataArray)


UncertaintyBroadcastMode = Enum(
Expand Down Expand Up @@ -51,14 +51,15 @@ def broadcast_with_upper_bound_variances(


def broadcast_with_upper_bound_variances(
data: Union[sc.Variable, sc.DataArray], prototype: sc.DataArray | sc.Variable
) -> Union[sc.Variable, sc.DataArray]:
data: sc.Variable | sc.DataArray, prototype: sc.DataArray | sc.Variable
) -> sc.Variable | sc.DataArray:
"""
Compute an upper bound for the variances of the broadcasted data.
The variances of the broadcasted data are computed by scaling the variances of the
input data by the volume of the new subspace. The volume of the new subspace is
computed as the product of the sizes of the new dimensions.
computed as the product of the sizes of the new dimensions. In the case of an
event-data prototype the events are counted.
Parameters
----------
Expand All @@ -73,8 +74,21 @@ def broadcast_with_upper_bound_variances(
:
The data with the variances scaled by the volume of the new subspace.
"""
if _no_variance_broadcast(data, prototype.sizes):
if _no_variance_broadcast(data, prototype):
return data
for dim in prototype.dims:
coord1 = None if isinstance(data, sc.Variable) else data.coords.get(dim)
coord2 = (
None if isinstance(prototype, sc.Variable) else prototype.coords.get(dim)
)
if coord1 is None or coord2 is None:
if dim in data.dims:
if data.sizes[dim] != prototype.sizes[dim]:
raise ValueError("Mismatching binning not supported in broadcast.")
continue
elif sc.identical(coord1, coord2):
continue
raise ValueError("Mismatching binning not supported in broadcast.")
sizes = prototype.sizes
mask = sc.scalar(False)
if isinstance(prototype, sc.DataArray):
Expand All @@ -83,17 +97,28 @@ def broadcast_with_upper_bound_variances(
if dim in irred.dims:
irred = irred.all(dim)
mask = irred
size = (~mask).sum().value
for dim, dim_size in sizes.items():
if dim not in data.dims and dim not in mask.dims:
size *= dim_size
data = data.copy()
data.variances *= size
sizes = {**sizes, **data.sizes}
data = data.broadcast(sizes=sizes).copy()
if mask is not None:
if prototype.bins is None:
size = (~mask).sum().to(dtype='int64', copy=False)
for dim, dim_size in sizes.items():
if dim not in data.dims and dim not in mask.dims:
size *= sc.index(dim_size)
else:
size = prototype.bins.size().sum(set(prototype.dims) - set(data.dims))
scale = size.broadcast(sizes=sizes).to(dtype='float64')
if not sc.identical(mask, sc.scalar(False)):
# The masked values are not counted in the variance, so we set them to infinity.
data.variances[mask.broadcast(sizes=sizes).values] = np.inf
scale.values[mask.broadcast(sizes=sizes).values] = np.inf
data = data.broadcast(sizes=sizes).copy()
data.variances *= scale.values
if prototype.bins is not None:
# Note that we are not using event masks in the upper-bound computation. Less
# than optimal, but simpler.
if isinstance(data, sc.Variable):
data = sc.bins_like(prototype, data)
else:
data.data = sc.bins_like(prototype, data.data)
return data


Expand All @@ -112,8 +137,8 @@ def drop_variances_if_broadcast(


def drop_variances_if_broadcast(
data: Union[sc.Variable, sc.DataArray], prototype: sc.DataArray | sc.Variable
) -> Union[sc.Variable, sc.DataArray]:
data: sc.Variable | sc.DataArray, prototype: sc.DataArray | sc.Variable
) -> sc.Variable | sc.DataArray:
"""
Drop variances if the data is broadcasted.
Expand All @@ -129,21 +154,23 @@ def drop_variances_if_broadcast(
:
The data without variances if the data is broadcasted.
"""
if _no_variance_broadcast(data, prototype.sizes):
if _no_variance_broadcast(data, prototype):
return data
return sc.values(data)


def _no_variance_broadcast(
data: Union[sc.Variable, sc.DataArray], sizes: Dict[str, int]
data: sc.Variable | sc.DataArray, prototype: sc.Variable | sc.DataArray
) -> bool:
return (data.variances is None) or all(
data.sizes.get(dim) == size for dim, size in sizes.items()
)

if data.bins is not None:
raise ValueError("Cannot broadcast binned data.")
if data.variances is None:
return True
if prototype.bins is not None:
return False
sizes = prototype.sizes
return all(data.sizes.get(dim) == size for dim, size in sizes.items())

# TODO: For now, we only have broadcasters for dense data. Event-data broadcasters will
# be added at a later stage, as we currently only have one which is valid for SANS.

broadcasters = {
UncertaintyBroadcastMode.drop: drop_variances_if_broadcast,
Expand Down
111 changes: 111 additions & 0 deletions tests/uncertainty_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,3 +163,114 @@ def test_broadcast_into_nonorthogonal_2d_mask_reducible_mask_counts_masked():
expected.variances *= 2
expected['y', 1].variances = [np.inf, np.inf]
assert_identical(xy, expected)


def test_upper_bound_broadcast_raises_if_input_is_binned():
x = sc.linspace('x', 0.0, 1.0, 10).bin(x=1).squeeze()
x.value.variances = x.value.values
y = sc.linspace('y', 0.0, 1.0, 10)
with pytest.raises(ValueError, match="Cannot broadcast binned data."):
unc.broadcast_with_upper_bound_variances(x, prototype=y)


def test_upper_bound_event_broadcast_raises_if_binning_mismatching():
prototype = sc.linspace('x', 0.0, 1.0, 10).bin(x=3).squeeze()
data = sc.DataArray(
sc.ones(dims=['x'], shape=[2], with_variances=True),
coords={'x': sc.linspace('x', 0.0, 1.0, 3)},
)
with pytest.raises(
ValueError, match="Mismatching binning not supported in broadcast."
):
unc.broadcast_with_upper_bound_variances(data, prototype=prototype)


def test_upper_bound_event_broadcast_counts_events():
content = sc.ones(dims=['event'], shape=[10])
# sizes = [0,1,2,4,3]
begin = sc.array(dims=['x'], values=[0, 0, 1, 3, 7], unit=None)
prototype = sc.bins(data=content, dim='event', begin=begin)
# We are not supporting the case of prototype missing dims of the data.
prototype = sc.concat([prototype, prototype], 'y')
y = sc.array(dims=['y'], values=[1.0, 2.0], variances=[1.0, 2.0])
upper_bound_broadcast = unc.broadcast_with_upper_bound_variances(
y, prototype=prototype
)

expected = prototype.copy().bins.constituents
expected['data'].values[:10] = 1.0
expected['data'].values[10:] = 2.0
# There are 5 bins along x, but 10 events, so variance scale factor is 10.
expected['data'].variances = expected['data'].values * 10
expected = sc.bins(**expected)

assert_identical(upper_bound_broadcast, expected)
# The point of broadcast_with_upper_bound_variances is that we can afterwards
# perform the following operation with getting a variance broadcast error.
# Did it work?
_ = prototype * upper_bound_broadcast


def test_upper_bound_event_broadcast_event_count_excludes_masked():
content = sc.ones(dims=['event'], shape=[10])
# sizes = [0,1,2,4,3]
begin = sc.array(dims=['x'], values=[0, 0, 1, 3, 7], unit=None)
prototype = sc.bins(data=content, dim='event', begin=begin)
# We are not supporting the case of prototype missing dims of the data.
prototype = sc.DataArray(sc.concat([prototype, prototype], 'y'))
prototype.masks['x'] = sc.array(
dims=['x'], values=[False, True, False, False, True]
)
y = sc.array(dims=['y'], values=[1.0, 2.0], variances=[1.0, 2.0])
upper_bound_broadcast = unc.broadcast_with_upper_bound_variances(
y, prototype=prototype
)

expected = prototype.copy().bins.constituents
expected['data'].values[:10] = 1.0
expected['data'].values[10:] = 2.0
# There are 5 bins along x, but 10 events (4 masked), so variance scale factor is 6.
expected['data'].variances = expected['data'].values * 6
expected['data']['event', 0:1].variances = [np.inf]
expected['data']['event', 7:10].variances = [np.inf, np.inf, np.inf]
expected['data']['event', 10:11].variances = [np.inf]
expected['data']['event', 17:20].variances = [np.inf, np.inf, np.inf]
expected = sc.bins(**expected)

assert_identical(upper_bound_broadcast, expected)
# The point of broadcast_with_upper_bound_variances is that we can afterwards
# perform the following operation with getting a variance broadcast error.
# Did it work?
_ = prototype * upper_bound_broadcast
assert sc.all(sc.isinf(sc.variances(upper_bound_broadcast['x', 1])))
assert sc.all(sc.isinf(sc.variances(upper_bound_broadcast['x', 4])))


def test_upper_bound_event_broadcast_only_bin_broadcast():
content = sc.ones(dims=['event'], shape=[10])
begin = sc.array(dims=['x'], values=[0, 3, 6], unit=None)
prototype = sc.DataArray(sc.bins(data=content, dim='event', begin=begin))
prototype.masks['x'] = sc.array(dims=['x'], values=[False, True, False])
x = sc.array(dims=['x'], values=[1.0, 2.0, 3.0], variances=[1.0, 2.0, 3.0])
# Data has same dims as prototype, broadcast is only into bins
assert x.sizes == prototype.sizes
upper_bound_broadcast = unc.broadcast_with_upper_bound_variances(
x, prototype=prototype
)

expected = prototype.copy().bins.constituents
expected['data'].values[:3] = 1.0
expected['data'].values[3:6] = 2.0
expected['data'].values[6:] = 3.0
expected['data'].variances = expected['data'].values
expected['data']['event', 0:3].variances = [3.0, 3.0, 3.0]
# The x=1 bin is masked, but it is not being broadcasted, so no inf.
expected['data']['event', 3:6].variances = [6.0, 6.0, 6.0]
expected['data']['event', 6:].variances = [12.0, 12.0, 12.0, 12.0]
expected = sc.bins(**expected)

assert_identical(upper_bound_broadcast, expected)
# The point of broadcast_with_upper_bound_variances is that we can afterwards
# perform the following operation with getting a variance broadcast error.
# Did it work?
_ = prototype * upper_bound_broadcast

0 comments on commit 5f38ebf

Please sign in to comment.