From 479dc32b74d81e84eb419d6ca8eb1d5526e3a75e Mon Sep 17 00:00:00 2001 From: Alex Date: Wed, 20 Nov 2024 15:40:06 +1100 Subject: [PATCH] First draft of top level masking functions for bitmask and enumerated masks --- odc/geo/_xr_interop.py | 14 +++++ odc/geo/masking.py | 121 +++++++++++++++++++++++++++++++++++++++++ tests/test_masking.py | 52 ++++++++++++++++++ 3 files changed, 187 insertions(+) create mode 100644 odc/geo/masking.py create mode 100644 tests/test_masking.py diff --git a/odc/geo/_xr_interop.py b/odc/geo/_xr_interop.py index 2ca3cb0f..bbbb32aa 100644 --- a/odc/geo/_xr_interop.py +++ b/odc/geo/_xr_interop.py @@ -48,6 +48,12 @@ resolve_fill_value, resolve_nodata, ) +from .masking import ( + bits_to_bool, + enum_to_bool, + scale_and_offset, + scale_and_offset_dataset, +) from .overlap import compute_output_geobox from .roi import roi_is_empty from .types import Nodata, Resolution, SomeNodata, SomeResolution, SomeShape, xy_ @@ -1053,6 +1059,12 @@ def nodata(self, value: Nodata): colorize = _wrap_op(colorize) + scale_and_offset = _wrap_op(scale_and_offset) + + bits_to_bool = _wrap_op(bits_to_bool) + + enum_to_bool = _wrap_op(enum_to_bool) + if have.rasterio: write_cog = _wrap_op(write_cog) to_cog = _wrap_op(to_cog) @@ -1093,6 +1105,8 @@ def to_rgba( ) -> xarray.DataArray: return to_rgba(self._xx, bands=bands, vmin=vmin, vmax=vmax) + scale_and_offset = _wrap_op(scale_and_offset_dataset) + ODCExtensionDs.to_rgba.__doc__ = to_rgba.__doc__ diff --git a/odc/geo/masking.py b/odc/geo/masking.py new file mode 100644 index 00000000..820c6983 --- /dev/null +++ b/odc/geo/masking.py @@ -0,0 +1,121 @@ +# This file is part of the Open Data Cube, see https://opendatacube.org for more information +# +# Copyright (c) 2015-2020 ODC Contributors +# SPDX-License-Identifier: Apache-2.0 +""" +Functions around supporting cloud masking. +""" + +from xarray import DataArray, Dataset + + +def bits_to_bool( + xx: DataArray, bits: list[int] | None, bitflags: int | None, invert: bool = False +) -> DataArray: + """ + Convert integer array into boolean array using bitmasks. + + :param xx: DataArray with integer values + :param bits: List of bit positions to convert to a bitflag mask (e.g. [0, 1, 2] -> 0b111) + :param bitflags: Integer value with bits set that will be used to extract the boolean mask (e.g. 0b00011000) + :param invert: Invert the mask + :return: DataArray with boolean values + """ + assert not ( + bits is None and bitflags is None + ), "Either bits or bitflags must be provided" + assert not ( + bits is not None and bitflags is not None + ), "Only one of bits or bitflags can be provided" + + if bitflags is None: + bitflags = 0 + + if bits is not None: + for b in bits: + bitflags |= 1 << b + + mask = (xx & bitflags) != 0 + + if invert: + mask = ~mask + + return mask + + +def enum_to_bool(xx: DataArray, values: list, invert: bool = False) -> DataArray: + """ + Convert array into boolean array using a list of invalid values. + + :param xx: DataArray with integer values + :param values: List of valid values to convert to a boolean mask + :param invert: Invert the mask + :return: DataArray with boolean values + """ + + mask = xx.isin(values) + + if invert: + mask = ~mask + + return mask + + +def scale_and_offset( + xx: DataArray, + scale: float | None, + offset: float | None, + ignore_missing: bool = False, +) -> DataArray: + """ + Apply scale and offset to the DataArray. Leave scale and offset blank to use + the values from the DataArray's attrs. + + :param xx: DataArray with integer values + :param scale: Scale factor + :param offset: Offset + :return: DataArray with scaled and offset values + """ + + # Scales and offsets is used by GDAL. + if scale is None: + scale = xx.attrs.get("scales") + + if offset is None: + offset = xx.attrs.get("offsets") + + # Catch the case where one is provided and not the other... + if scale is None and offset is not None: + scale = 1.0 + + if offset is None and scale is not None: + offset = 0.0 + + if scale is not None and offset is not None: + xx = xx * scale + offset + else: + if not ignore_missing: + raise ValueError( + "Scale and offset not provided and not found in attrs.scales and attrs.offset" + ) + + return xx + + +def scale_and_offset_dataset( + xx: Dataset, scale: float | None, offset: float | None +) -> Dataset: + """ + Apply scale and offset to the Dataset. Leave scale and offset blank to use + the values from each DataArray's attrs. + + :param xx: Dataset with integer values + :param scale: Scale factor + :param offset: Offset + :return: Dataset with scaled and offset values + """ + + for var in xx.data_vars: + xx[var] = scale_and_offset(xx[var], scale, offset, ignore_missing=True) + + return xx diff --git a/tests/test_masking.py b/tests/test_masking.py new file mode 100644 index 00000000..920add6c --- /dev/null +++ b/tests/test_masking.py @@ -0,0 +1,52 @@ +from odc.geo.masking import bits_to_bool, enum_to_bool, scale_and_offset + +from xarray import DataArray + +# Top left is cloud, top right is cloud shadow +# Bottom left is both cloud and cloud shadow, bottom right is neither +xx_bits = DataArray( + [[0b00010000, 0b00001000], [0b00011000, 0b00000000]], dims=("y", "x") +) + +# Set up a 2x2 8 bit integer DataArray with some +# values set to 3 (shadow), 9 (high confidence cloud). +xx_values = DataArray([[3, 9], [3, 0]], dims=("y", "x")) + + +# Test bits_to_bool +def test_bits_to_bool(): + # Test with bits + mask = bits_to_bool(xx_bits, bits=[4, 3], bitflags=None) + assert mask.equals(DataArray([[True, True], [True, False]], dims=("y", "x"))) + + # Test with bitflags + mask = bits_to_bool(xx_bits, bits=None, bitflags=0b00011000) + assert mask.equals(DataArray([[True, True], [True, False]], dims=("y", "x"))) + + # Test with invert + mask = bits_to_bool(xx_bits, bits=[4, 3], bitflags=None, invert=True) + assert mask.equals(DataArray([[False, False], [False, True]], dims=("y", "x"))) + + mask = bits_to_bool(xx_bits, bits=None, bitflags=0b00010000, invert=True) + assert mask.equals(DataArray([[False, True], [False, True]], dims=("y", "x"))) + + +# Test enum_to_bool +def test_enum_to_bool(): + mask = enum_to_bool(xx_values, values=[3, 9]) + assert mask.equals(DataArray([[True, True], [True, False]], dims=("y", "x"))) + + mask = enum_to_bool(xx_values, values=[3, 9], invert=True) + assert mask.equals(DataArray([[False, False], [False, True]], dims=("y", "x"))) + + +# Test apply_scale_and_offset +def test_scale_and_offset(): + mask = scale_and_offset(xx_values, scale=1.0, offset=0.0) + assert mask.equals(DataArray([[3, 9], [3, 0]], dims=("y", "x"))) + + mask = scale_and_offset(xx_values, scale=None, offset=None, ignore_missing=True) + assert mask.equals(DataArray([[3, 9], [3, 0]], dims=("y", "x"))) + + mask = scale_and_offset(xx_values, scale=2.0, offset=1.0) + assert mask.equals(DataArray([[7, 19], [7, 1]], dims=("y", "x")))