diff --git a/openeo_processes_dask/process_implementations/__init__.py b/openeo_processes_dask/process_implementations/__init__.py index 64c4014d..5dd1333d 100644 --- a/openeo_processes_dask/process_implementations/__init__.py +++ b/openeo_processes_dask/process_implementations/__init__.py @@ -8,6 +8,7 @@ from .inspect import * from .logic import * from .math import * +from .text import * try: from .ml import * @@ -16,12 +17,10 @@ "Did not load machine learning processes due to missing dependencies: Install them like this: `pip install openeo-processes-dask[implementations, ml]`" ) -# try: -# from .experimental import * -# except ImportError as e: -# logger.warning( -# "Did not experimental processes due to missing dependencies: Install them like this: `pip install openeo-processes-dask[implementations, experimental]`" -# ) +try: + from .experimental import * +except ImportError as e: + logger.warning("Did not load experimental processes.") import rioxarray as rio # Required for the .rio accessor on xarrays. diff --git a/openeo_processes_dask/process_implementations/arrays.py b/openeo_processes_dask/process_implementations/arrays.py index ebbaeaf8..9e89acae 100644 --- a/openeo_processes_dask/process_implementations/arrays.py +++ b/openeo_processes_dask/process_implementations/arrays.py @@ -1,6 +1,7 @@ +import copy import itertools import logging -from typing import Any, Optional +from typing import Any, Callable, Optional, Union import dask.array as da import numpy as np @@ -10,6 +11,7 @@ from openeo_pg_parser_networkx.pg_schema import DateTime from xarray.core.duck_array_ops import isnull, notnull +from openeo_processes_dask.process_implementations.comparison import is_valid from openeo_processes_dask.process_implementations.cubes.utils import _is_dask_array from openeo_processes_dask.process_implementations.exceptions import ( ArrayElementNotAvailable, @@ -30,11 +32,14 @@ "array_contains", "array_find", "array_labels", + "array_apply", + "array_interpolate_linear", "first", "last", "order", "rearrange", "sort", + "count", ] @@ -161,10 +166,8 @@ def array_contains(data: ArrayLike, value: Any, axis=None) -> bool: value_is_valid = True if len(np.shape(data)) != 1 and axis is None: return False - if not value_is_valid: + if not value_is_valid or pd.isnull(value): return False - if pd.isnull(value): - return np.isnan(data).any(axis=axis) else: return np.isin(data, value).any(axis=axis) @@ -184,8 +187,14 @@ def array_find( idxs = (data == value).argmax(axis=axis) mask = ~np.array((data == value).any(axis=axis)) - if np.isnan(value): + if not isinstance(value, str) and np.isnan(value): mask = True + if reverse: + if axis is None: + size = data.size + else: + size = data.shape[axis] + idxs = size - 1 - idxs logger.warning( "array_find: numpy has no sentinel value for missing data in integer arrays, therefore np.masked_array is used to return the indices of found elements. Further operations might fail if not defined for masked arrays." @@ -210,6 +219,35 @@ def array_labels(data: ArrayLike) -> ArrayLike: return np.arange(len(data)) +def array_apply( + data: ArrayLike, process: Callable, context: Optional[Any] = None +) -> ArrayLike: + if not context: + context = {} + positional_parameters = {"x": 0} + named_parameters = {"x": data, "context": context} + if callable(process): + process_to_apply = np.vectorize(process) + return process_to_apply( + data, + positional_parameters=positional_parameters, + named_parameters=named_parameters, + ) + + +def array_interpolate_linear(data: ArrayLike): + if isinstance(data, list): + data = np.array(data) + x = np.arange(len(data)) + valid = np.isfinite(data) + if len(x[valid]) < 2: + return data + data[~valid] = np.interp( + x[~valid], x[valid], data[valid], left=np.nan, right=np.nan + ) + return data + + def first( data: ArrayLike, ignore_nodata: Optional[bool] = True, @@ -337,3 +375,23 @@ def sort( return data_sorted_flip elif nodata == True: # default sort behaviour, np.nan values are put last return data_sorted + + +def count( + data: ArrayLike, + condition: Optional[Union[Callable, bool]] = None, + context: Any = None, + axis=None, + keepdims=False, +): + if condition is None: + valid = is_valid(data) + return np.nansum(valid, axis=axis, keepdims=keepdims) + if condition is True: + return np.nansum(np.ones_like(data), axis=axis, keepdims=keepdims) + if callable(condition): + if not context: + context = {} + context.pop("x", None) + count = condition(x=data, **context) + return np.nansum(count, axis=axis, keepdims=keepdims) diff --git a/openeo_processes_dask/process_implementations/cubes/_filter.py b/openeo_processes_dask/process_implementations/cubes/_filter.py index 7eb50f4c..6fa1f566 100644 --- a/openeo_processes_dask/process_implementations/cubes/_filter.py +++ b/openeo_processes_dask/process_implementations/cubes/_filter.py @@ -1,7 +1,7 @@ import json import logging import warnings -from typing import Callable +from typing import Any, Callable, Optional import dask.array as da import geopandas as gpd @@ -21,6 +21,7 @@ BandFilterParameterMissing, DimensionMissing, DimensionNotAvailable, + TemporalExtentEmpty, TooManyDimensions, ) @@ -69,15 +70,33 @@ def filter_temporal( # https://github.com/numpy/numpy/issues/23904 with warnings.catch_warnings(): warnings.filterwarnings("ignore", category=DeprecationWarning) - start_time = extent[0] - if start_time is not None: + if isinstance(extent, TemporalInterval): + start_time = extent.start + end_time = extent.end + else: + start_time = extent[0] + end_time = extent[1] + + if isinstance(start_time, str): + start_time = np.datetime64(start_time) + elif start_time is not None: start_time = start_time.to_numpy() - end_time = extent[1] - if end_time is not None: - end_time = extent[1].to_numpy() - np.timedelta64(1, "ms") + + if isinstance(end_time, str): + end_time = np.datetime64(end_time) + elif end_time is not None: + end_time = end_time.to_numpy() + # The second element is the end of the temporal interval. # The specified instance in time is excluded from the interval. # See https://processes.openeo.org/#filter_temporal + if end_time is not None: + end_time -= np.timedelta64(1, "ms") + + if start_time is not None and end_time is not None and end_time < start_time: + raise TemporalExtentEmpty( + "The temporal extent is empty. The second instant in time must always be greater/later than the first instant in time." + ) data = data.where(~np.isnat(data[applicable_temporal_dimension]), drop=True) filtered = data.loc[ @@ -87,16 +106,27 @@ def filter_temporal( return filtered -def filter_labels(data: RasterCube, condition: Callable, dimension: str) -> RasterCube: +def filter_labels( + data: RasterCube, condition: Callable, dimension: str, context: Optional[Any] = None +) -> RasterCube: if dimension not in data.dims: raise DimensionNotAvailable( f"Provided dimension ({dimension}) not found in data.dims: {data.dims}" ) - labels = data[dimension].values - label_mask = condition(x=labels) - label = labels[label_mask] - data = data.sel(**{dimension: label}) + labels = np.array(data[dimension].values) + if not context: + context = {} + positional_parameters = {"x": 0} + named_parameters = {"x": labels, "context": context} + filter_condition = np.vectorize(condition) + filtered_labels = filter_condition( + labels, + positional_parameters=positional_parameters, + named_parameters=named_parameters, + ) + label = np.argwhere(filtered_labels) + data = data.isel(**{dimension: label[0]}) return data diff --git a/openeo_processes_dask/process_implementations/cubes/general.py b/openeo_processes_dask/process_implementations/cubes/general.py index 66aac394..91ca5d60 100644 --- a/openeo_processes_dask/process_implementations/cubes/general.py +++ b/openeo_processes_dask/process_implementations/cubes/general.py @@ -1,4 +1,5 @@ -from typing import Optional +import copy +from typing import Optional, Union import numpy as np import xarray as xr @@ -11,7 +12,15 @@ DimensionNotAvailable, ) -__all__ = ["create_raster_cube", "drop_dimension", "dimension_labels", "add_dimension"] +__all__ = [ + "create_data_cube", + "drop_dimension", + "dimension_labels", + "add_dimension", + "rename_dimension", + "rename_labels", + "trim_cube", +] def drop_dimension(data: RasterCube, name: str) -> RasterCube: @@ -26,10 +35,27 @@ def drop_dimension(data: RasterCube, name: str) -> RasterCube: return data.drop_vars(name).squeeze(name) -def create_raster_cube() -> RasterCube: +def create_data_cube() -> RasterCube: return xr.DataArray() +def trim_cube(data) -> RasterCube: + for dim in data.dims: + if ( + dim in data.openeo.temporal_dims + or dim in data.openeo.band_dims + or dim in data.openeo.other_dims + ): + values = data[dim].values + other_dims = [d for d in data.dims if d != dim] + available_data = values[(np.isnan(data)).all(dim=other_dims) == 0] + if len(available_data) == 0: + raise ValueError(f"Data contains NaN values only. ") + data = data.sel({dim: available_data}) + + return data + + def dimension_labels(data: RasterCube, dimension: str) -> ArrayLike: if dimension not in data.dims: raise DimensionNotAvailable( @@ -72,3 +98,111 @@ def add_dimension( # Register dimension in the openeo accessor data_e.openeo.add_dim_type(name=name, type=type) return data_e + + +def rename_dimension( + data: RasterCube, + source: str, + target: str, +): + """ + Parameters + ---------- + data : xr.DataArray + A data cube. + source : str + The current name of the dimension. + Fails with a DimensionNotAvailable exception if the specified dimension does not exist. + labels : number, str + A new Name for the dimension. + Fails with a DimensionExists exception if a dimension with the specified name exists. + Returns + ------- + xr.DataArray : + A data cube with the same dimensions, + but the name of one of the dimensions changes. + The old name can not be referred to any longer. + The dimension properties (name, type, labels, reference system and resolution) + remain unchanged. + """ + if source not in data.dims: + raise DimensionNotAvailable( + f"Provided dimension ({source}) not found in data.dims: {data.dims}" + ) + if target in data.dims: + raise Exception( + f"DimensionExists - A dimension with the specified name already exists. The existing dimensions are: {data.dims}" + ) + # Register dimension in the openeo accessor + if source in data.openeo.spatial_dims: + dim_type = "spatial" + elif source in data.openeo.temporal_dims: + dim_type = "temporal" + elif source in data.openeo.band_dims: + dim_type = "bands" + else: + dim_type = "other" + data = data.rename({source: target}) + data.openeo.add_dim_type(name=target, type=dim_type) + return data + + +def rename_labels( + data: RasterCube, + dimension: str, + target: list[Union[str, float]], + source: Optional[list[Union[str, float]]] = [], +): + data_rename = copy.deepcopy(data) + if dimension not in data_rename.dims: + raise DimensionNotAvailable( + f"Provided dimension ({dimension}) not found in data.dims: {data_rename.dims}" + ) + if source: + if len(source) != len(target): + raise Exception( + f"LabelMismatch - The number of labels in the parameters `source` and `target` don't match." + ) + + source_labels = data_rename[dimension].values + if isinstance(source_labels, np.ndarray): + source_labels = source_labels.tolist() + if isinstance(target, np.ndarray): + target = target.tolist() + + target_values = [] + + for label in source_labels: + if label in target: + raise Exception(f"LabelExists - A label with the specified name exists.") + if source: + if label in source: + target_values.append(target[source.index(label)]) + else: + target_values.append(label) + + if not source: + if len(source_labels) == len(target): + data_rename[dimension] = target + elif len(target) < len(source_labels): + if 0 in source_labels: + target_values = target + source_labels[len(target) :] + data_rename[dimension] = target_values + else: + raise Exception( + f"LabelsNotEnumerated - The dimension labels are not enumerated." + ) + else: + raise Exception( + f"LabelMismatch - The number of labels in the parameters `source` and `target` don't match." + ) + + else: + for label in source: + if label not in source_labels: + raise Exception( + f"LabelNotAvailable - A label with the specified name does not exist." + ) + data_rename[dimension] = target_values + + return data_rename diff --git a/openeo_processes_dask/process_implementations/dates.py b/openeo_processes_dask/process_implementations/dates.py new file mode 100644 index 00000000..59dd76f5 --- /dev/null +++ b/openeo_processes_dask/process_implementations/dates.py @@ -0,0 +1,126 @@ +from typing import Optional + +import numpy as np + +__all__ = [ + "date_between", + "date_difference", + "date_shift", +] + + +def datetime_from_str(date: str): + daytime = np.datetime64(date) + return daytime + + +def date_between( + x: str, min: str, max: str, exclude_max: bool = False +) -> Optional[bool]: + x = datetime_from_str(x) + min = datetime_from_str(min) + max = datetime_from_str(max) + if exclude_max: + return bool((x >= min) and (x < max)) + else: + return bool((x >= min) and (x <= max)) + + +def date_difference(date1: str, date2: str, unit: Optional[str] = "second") -> float: + date1 = datetime_from_str(date1) + date2 = datetime_from_str(date2) + units = { + "millisecond": 1, + "second": 1000, + "minute": 1000 * 60, + "hour": 1000 * 60 * 60, + "day": 1000 * 60 * 60 * 24, + "week": 1000 * 60 * 60 * 24 * 7, + "month": "M", + "year": "Y", + } + if unit in units: + unit = units[unit] + if unit in ["M", "Y"]: + return float( + ( + date2.astype(f"datetime64[{unit}]") + - date1.astype(f"datetime64[{unit}]") + ).astype(float) + ) + else: + # we do this, so the examples are fulfilled: + # date_difference(date1 = "2020-01-01T00:00:00.0Z", date2 = "2020-01-01T00:00:15.5Z") -> 15.5 + return ( + float( + ( + date2.astype(f"datetime64[ms]") - date1.astype(f"datetime64[ms]") + ).astype(float) + ) + / unit + ) + + +def date_shift(date: str, value: int, unit: str) -> str: + if date.endswith("Z"): + end = "Z" + elif "+" in date: + end = "+" + date.split("+")[-1] + date = date.split("+")[0] + else: + end = "" + units = { + "millisecond": "ms", + "second": "s", + "minute": "m", + "hour": "h", + "day": "D", + "week": "W", + "month": "M", + "year": "Y", + } + if unit in units: + unit = units[unit] + if unit in ["M", "Y"]: + if len(date) > 7: + date_M = np.datetime64(date, "M") + day = ( + int( + (np.datetime64(date, "D") - date_M.astype("datetime64[D]")).astype( + int + ) + ) + + 1 + ) + if " " in date: + time = "T" + date.split(" ")[-1] + elif "T" in date: + time = "T" + date.split("T")[-1] + else: + time = "" + new_date = str(date_M + np.timedelta64(value, unit)) + if day in [29, 30, 31]: + for i in range(3): + try: + new_daytime = f"{new_date}-{day-i}" + new_daytime_numpy = np.datetime64(new_daytime) + result = f"{new_daytime}{time}" + return result + except: + pass + elif int(day) < 10: + new_daytime = f"{new_date}-0{day}{time}" + else: + new_daytime = f"{new_date}-{day}T{time}" + new_daytime_numpy = np.datetime64(new_daytime) + return new_daytime + + date = datetime_from_str(date) + return str(date_M + np.timedelta64(value, unit)) + + date = datetime_from_str(date) + if unit in ["ms"]: + result = str((date + np.timedelta64(value, unit)).astype(f"datetime64[{unit}]")) + else: + result = str((date + np.timedelta64(value, unit)).astype(date.dtype)) + return result + end diff --git a/openeo_processes_dask/process_implementations/experimental/__init__.py b/openeo_processes_dask/process_implementations/experimental/__init__.py index e69de29b..425c9b93 100644 --- a/openeo_processes_dask/process_implementations/experimental/__init__.py +++ b/openeo_processes_dask/process_implementations/experimental/__init__.py @@ -0,0 +1 @@ +from .ddmc import * diff --git a/openeo_processes_dask/process_implementations/experimental/ddmc.py b/openeo_processes_dask/process_implementations/experimental/ddmc.py new file mode 100644 index 00000000..65eb912b --- /dev/null +++ b/openeo_processes_dask/process_implementations/experimental/ddmc.py @@ -0,0 +1,86 @@ +from openeo_processes_dask.process_implementations.arrays import array_element +from openeo_processes_dask.process_implementations.cubes.general import add_dimension +from openeo_processes_dask.process_implementations.cubes.merge import merge_cubes +from openeo_processes_dask.process_implementations.cubes.reduce import reduce_dimension +from openeo_processes_dask.process_implementations.data_model import RasterCube + +__all__ = ["ddmc"] + + +def ddmc( + data: RasterCube, + nir08="nir08", + nir09="nir09", + cirrus="cirrus", + swir16="swir16", + swir22="swir22", + gain=2.5, + target_band=None, +): + dimension = data.openeo.band_dims[0] + if target_band is None: + target_band = dimension + + # Mid-Level Clouds + def MIDCL(data): + # B08 = array_element(data, label=nir08, axis = axis) + + B08 = data.sel(**{dimension: nir08}) + + # B09 = array_element(data, label=nir09, axis = axis) + + B09 = data.sel(**{dimension: nir09}) + + MIDCL = B08 - B09 + + MIDCL_result = MIDCL * gain + + return MIDCL_result + + # Deep moist convection + def DC(data): + # B10 = array_element(data, label=cirrus, axis = axis) + # B12 = array_element(data, label=swir22, axis = axis) + + B10 = data.sel(**{dimension: cirrus}) + B12 = data.sel(**{dimension: swir22}) + + DC = B10 - B12 + + DC_result = DC * gain + + return DC_result + + # low-level cloudiness + def LOWCL(data): + # B10 = array_element(data, label=cirrus, axis = axis) + # B11 = array_element(data, label=swir16, axis = axis) + B10 = data.sel(**{dimension: cirrus}) + B11 = data.sel(**{dimension: swir16}) + + LOWCL = B11 - B10 + + LOWCL_result = LOWCL * gain + + return LOWCL_result + + # midcl = reduce_dimension(data, reducer=MIDCL, dimension=dimension) + midcl = MIDCL(data) + midcl = add_dimension(midcl, name=target_band, label="midcl", type=dimension) + + # dc = reduce_dimension(data, reducer=DC, dimension=dimension) + dc = DC(data) + # dc = add_dimension(dc, target_band, "dc") + dc = add_dimension(dc, target_band, label="dc", type=dimension) + + # lowcl = reduce_dimension(data, reducer=LOWCL, dimension=dimension) + lowcl = LOWCL(data) + lowcl = add_dimension(lowcl, target_band, label="lowcl", type=dimension) + + # ddmc = merge_cubes(merge_cubes(midcl, dc), lowcl) + ddmc1 = merge_cubes(midcl, lowcl) + ddmc1.openeo.add_dim_type(name=target_band, type=dimension) + ddmc = merge_cubes(dc, ddmc1, overlap_resolver=target_band) + + # return a datacube + return ddmc diff --git a/openeo_processes_dask/process_implementations/math.py b/openeo_processes_dask/process_implementations/math.py index 55e2d7c2..e8d8b31b 100644 --- a/openeo_processes_dask/process_implementations/math.py +++ b/openeo_processes_dask/process_implementations/math.py @@ -1,3 +1,5 @@ +import logging + import dask import dask.array as da import numpy as np @@ -62,6 +64,8 @@ "normalized_difference", ] +logger = logging.getLogger(__name__) + def e(): return np.e @@ -290,10 +294,23 @@ def quantiles( "The process `quantiles` only allows that either the `probabilities` or the `q` parameter is set." ) - if isinstance(probabilities, list): + if isinstance(probabilities, int): + probabilities = np.arange(1.0 / probabilities, 1, 1.0 / probabilities) + + elif ( + isinstance(probabilities, list) + and len(probabilities) == 1 + and isinstance(probabilities[0], int) + ): + probabilities = np.arange(1.0 / probabilities[0], 1, 1.0 / probabilities[0]) + + elif isinstance(probabilities, list): probabilities = np.array(probabilities) if q is not None: + logger.warning( + "This parameter has been **deprecated**. Please use the parameter `probabilities` instead." + ) probabilities = np.arange(1.0 / q, 1, 1.0 / q) if data.size == 0: diff --git a/openeo_processes_dask/process_implementations/text.py b/openeo_processes_dask/process_implementations/text.py new file mode 100644 index 00000000..18be3bfd --- /dev/null +++ b/openeo_processes_dask/process_implementations/text.py @@ -0,0 +1,57 @@ +from typing import Any, Optional + +__all__ = [ + "text_begins", + "text_contains", + "text_concat", + "text_ends", +] + + +def text_begins(data: str, pattern: str, case_sensitive: Optional[bool] = True) -> str: + if data: + if case_sensitive: + return data.startswith(pattern) + else: + return data.lower().startswith(pattern.lower()) + else: + return None + + +def text_contains( + data: str, pattern: str, case_sensitive: Optional[bool] = True +) -> str: + if data: + if case_sensitive: + return pattern in data + else: + return pattern.lower() in data.lower() + else: + return None + + +def text_ends(data: str, pattern: str, case_sensitive: Optional[bool] = True) -> str: + if data: + if case_sensitive: + return data.endswith(pattern) + else: + return data.lower().endswith(pattern.lower()) + else: + return None + + +def text_concat(data: list[Any], separator: Any) -> str: + string = "" + for elem in data: + if isinstance(elem, bool) or elem is None: + string += str(elem).lower() + else: + string += str(elem) + if isinstance(separator, bool) or separator is None: + string += str(separator).lower() + else: + string += str(separator) + if separator == "": + return string + else: + return string[: -len(str(separator))] diff --git a/openeo_processes_dask/specs/openeo-processes b/openeo_processes_dask/specs/openeo-processes index 8b7bb946..f4ba0b91 160000 --- a/openeo_processes_dask/specs/openeo-processes +++ b/openeo_processes_dask/specs/openeo-processes @@ -1 +1 @@ -Subproject commit 8b7bb94691eef89b3587406a1276ad26350e47ac +Subproject commit f4ba0b912ab85ca27773b8e2c9c3f5c4fda983dc diff --git a/pyproject.toml b/pyproject.toml index d01974d0..6d96cf6c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "openeo-processes-dask" -version = "2024.7.0" +version = "2024.10.1" description = "Python implementations of many OpenEO processes, dask-friendly by default." authors = ["Lukas Weidenholzer ", "Sean Hoyal ", "Valentina Hutter "] maintainers = ["EODC Staff "] @@ -27,12 +27,12 @@ python = ">=3.10,<3.12" geopandas = { version = ">=0.11.1,<1", optional = true } pandas = { version = ">=2.0.0", optional = true } xarray = { version = ">=2022.11.0,<=2024.3.0", optional = true } -dask = {extras = ["array", "dataframe"], version = ">=2023.4.0", optional = true} +dask = {extras = ["array", "dataframe", "distributed"], version = ">=2023.4.0", optional = true} rasterio = { version = "^1.3.4", optional = true } dask-geopandas = { version = ">=0.2.0,<1", optional = true } xgboost = { version = ">=1.5.1", optional = true } rioxarray = { version = ">=0.12.0,<1", optional = true } -openeo-pg-parser-networkx = { version = ">=2023.5.1", optional = true } +openeo-pg-parser-networkx = { version = ">=2024.7", optional = true } odc-geo = { version = ">=0.4.1,<1", optional = true } stac_validator = { version = ">=3.3.1", optional = true } odc-stac = { version = ">=0.3.9", optional = true } diff --git a/tests/test_arrays.py b/tests/test_arrays.py index 3d760214..631d9d01 100644 --- a/tests/test_arrays.py +++ b/tests/test_arrays.py @@ -14,6 +14,7 @@ ArrayElementNotAvailable, TooManyDimensions, ) +from openeo_processes_dask.process_implementations.math import add from tests.general_checks import general_output_checks from tests.mockdata import create_fake_rastercube @@ -179,7 +180,7 @@ def test_array_append(data, value, expected): ([1, 2, 3], 2, True), (["A", "B", "C"], "b", False), ([1, 2, 3], "2", False), - ([1, 2, np.nan], np.nan, True), + ([1, 2, np.nan], np.nan, False), ([[2, 1], [3, 4]], [1, 2], False), ([[2, 1], [3, 4]], 2, False), ([1, 2, 3], np.int64(2), True), @@ -221,14 +222,15 @@ def test_array_contains_object_dtype(): [ ([1, 0, 3, 2], 3, 2, None, False), ([1, 0, 3, 2, np.nan, 3], np.nan, 999999, None, False), - ([1, 0, 3, 2], 3, 2, None, False), + ([1, 0, 3, 0, 2], 0, 1, None, False), ([[1, 0, 3, 2], [5, 3, 6, 8]], 3, [999999, 1, 0, 999999], 0, False), ([[1, 0, 3, 2], [5, 3, 6, 8]], 3, [2, 1], 1, False), - ([1, 0, 3, 2], 3, 1, None, True), + ([1, 0, 3, 2], 3, 2, None, True), ([1, 0, 3, 2, np.nan, 3], np.nan, 999999, None, True), - ([1, 0, 3, 2], 3, 1, None, True), - ([[1, 0, 3, 2], [5, 3, 6, 8]], 3, [999999, 0, 1, 999999], 0, True), - ([[1, 0, 3, 2], [5, 3, 6, 8]], 3, [1, 2], 1, True), + ([1, 0, 3, 0, 2], 0, 3, None, True), + ([[1, 0, 3, 2], [5, 3, 6, 8]], 3, [999999, 1, 0, 999999], 0, True), + ([[1, 0, 3, 2], [5, 3, 6, 8]], 3, [2, 1], 1, True), + (["A", "B", "C"], "b", 99999, None, False), ], ) def test_array_find(data, value, expected, axis, reverse): @@ -251,6 +253,44 @@ def test_array_labels(): array_labels(np.array([[1, 0, 3, 2], [5, 0, 6, 4]])) +def test_array_apply(process_registry): + _process = partial( + process_registry["add"].implementation, + y=1, + x=ParameterReference(from_parameter="x"), + ) + + output_cube = array_apply(data=np.array([1, 2, 3, 4, 5, 6]), process=_process) + assert (output_cube == [2, 3, 4, 5, 6, 7]).all() + + +@pytest.mark.parametrize( + "data, expected", + [ + ([np.nan, 1, np.nan, 6, np.nan, -8], [np.nan, 1, 3.5, 6, -1, -8]), + ([np.nan, 1, np.nan, np.nan], [np.nan, 1, np.nan, np.nan]), + ], +) +def test_array_interpolate_linear(data, expected): + assert np.array_equal( + array_interpolate_linear(data), + expected, + equal_nan=True, + ) + data_np = np.array(data) + assert np.array_equal( + array_interpolate_linear(data_np), + expected, + equal_nan=True, + ) + data_da = da.from_array(data_np) + assert np.array_equal( + array_interpolate_linear(data_da), + expected, + equal_nan=True, + ) + + def test_first(): assert first(np.array([1, 0, 3, 2])) == 1 assert pd.isnull(first(np.array([np.nan, 2, 3]), ignore_nodata=False)) @@ -459,3 +499,83 @@ def test_reduce_dimension( ) assert output_cube[0, 0, 0].data.compute().item() is True assert not output_cube[slice(1, None), :, :].data.compute().any() + + +@pytest.mark.parametrize("size", [(3, 3, 2, 4)]) +@pytest.mark.parametrize("dtype", [np.float32]) +def test_count(temporal_interval, bounding_box, random_raster_data, process_registry): + input_cube = create_fake_rastercube( + data=random_raster_data, + spatial_extent=bounding_box, + temporal_extent=temporal_interval, + bands=["B02", "B03", "B04", "B08"], + backend="dask", + ) + + _process = partial( + process_registry["count"].implementation, + data=ParameterReference(from_parameter="data"), + ) + output_cube = reduce_dimension(data=input_cube, reducer=_process, dimension="bands") + general_output_checks( + input_cube=input_cube, + output_cube=output_cube, + verify_attrs=False, + verify_crs=True, + ) + assert output_cube.dims == ("x", "y", "t") + xr.testing.assert_equal(output_cube, xr.zeros_like(output_cube) + 4) + + _process = partial( + process_registry["count"].implementation, + data=ParameterReference(from_parameter="data"), + condition=True, + ) + output_cube = reduce_dimension(data=input_cube, reducer=_process, dimension="bands") + general_output_checks( + input_cube=input_cube, + output_cube=output_cube, + verify_attrs=False, + verify_crs=True, + ) + assert output_cube.dims == ("x", "y", "t") + xr.testing.assert_equal(output_cube, xr.zeros_like(output_cube) + 4) + + _process = partial( + process_registry["count"].implementation, + data=ParameterReference(from_parameter="data"), + condition=process_registry["gt"].implementation, + ) + output_cube = reduce_dimension( + data=input_cube, + reducer=_process, + dimension="bands", + context={"y": -100}, + ) + general_output_checks( + input_cube=input_cube, + output_cube=output_cube, + verify_attrs=False, + verify_crs=True, + ) + assert output_cube.dims == ("x", "y", "t") + xr.testing.assert_equal(output_cube, xr.zeros_like(output_cube) + 4) + + _process = partial( + process_registry["count"].implementation, + data=ParameterReference(from_parameter="data"), + condition=process_registry["is_infinite"].implementation, + ) + output_cube = reduce_dimension( + data=input_cube, + reducer=_process, + dimension="bands", + ) + general_output_checks( + input_cube=input_cube, + output_cube=output_cube, + verify_attrs=False, + verify_crs=True, + ) + assert output_cube.dims == ("x", "y", "t") + xr.testing.assert_equal(output_cube, xr.zeros_like(output_cube)) diff --git a/tests/test_dates.py b/tests/test_dates.py new file mode 100644 index 00000000..41ec0e1f --- /dev/null +++ b/tests/test_dates.py @@ -0,0 +1,50 @@ +from openeo_processes_dask.process_implementations.dates import ( + date_between, + date_difference, + date_shift, +) + + +def test_date_between(): + assert not date_between(x="2020-01-01", min="2021-01-01", max="2022-01-01") + + +def test_date_difference(): + assert ( + date_difference(date1="2020-01-01T00:00:00.0Z", date2="2020-01-01T00:00:15.5Z") + == 15.5 + ) + assert ( + date_difference(date1="2020-01-01T00:00:00Z", date2="2020-01-01T01:00:00+01:00") + == 0 + ) + assert date_difference(date1="2020-01-02", date2="2020-01-01") == -86400 + assert date_difference(date1="2020-01-02", date2="2020-01-01", unit="day") == -1 + + +def test_date_shift(): + month_shift = date_shift(date="2020-02-01T17:22:45Z", value=6, unit="month") + assert month_shift == "2020-08-01T17:22:45Z" + + day_shift = date_shift(date="2021-03-31T00:00:00+02:00", value=-7, unit="day") + assert day_shift == "2021-03-24T00:00:00+02:00" + + year_shift = date_shift(date="2020-02-29T17:22:45Z", value=1, unit="year") + assert year_shift == "2021-02-28T17:22:45Z" + + month_shift = date_shift(date="2020-01-31", value=1, unit="month") + assert month_shift == "2020-02-29" + + second_shift = date_shift(date="2016-12-31T23:59:59Z", value=1, unit="second") + assert second_shift == "2017-01-01T00:00:00Z" + + millisecond_shift = date_shift( + date="2018-12-31T17:22:45Z", value=1150, unit="millisecond" + ) + assert millisecond_shift == "2018-12-31T17:22:46.150Z" + + hour_shift = date_shift(date="2018-01-01", value=25, unit="hour") + assert hour_shift == "2018-01-02" + + hour_shift = date_shift(date="2018-01-01", value=-1, unit="hour") + assert hour_shift == "2017-12-31" diff --git a/tests/test_ddmc.py b/tests/test_ddmc.py new file mode 100644 index 00000000..13e5787f --- /dev/null +++ b/tests/test_ddmc.py @@ -0,0 +1,75 @@ +from functools import partial + +import numpy as np +import pytest +import xarray as xr +from openeo_pg_parser_networkx.pg_schema import ( + BoundingBox, + ParameterReference, + TemporalInterval, +) + +from openeo_processes_dask.process_implementations.cubes.load import load_stac +from openeo_processes_dask.process_implementations.cubes.reduce import ( + reduce_dimension, + reduce_spatial, +) +from openeo_processes_dask.process_implementations.exceptions import ( + ArrayElementNotAvailable, +) +from openeo_processes_dask.process_implementations.experimental.ddmc import ddmc +from tests.general_checks import general_output_checks +from tests.mockdata import create_fake_rastercube + + +@pytest.mark.parametrize("size", [(30, 30, 20, 5)]) +@pytest.mark.parametrize("dtype", [np.float32]) +def test_ddmc_instance_dims( + temporal_interval: TemporalInterval, bounding_box: BoundingBox, random_raster_data +): + input_cube = create_fake_rastercube( + data=random_raster_data, + spatial_extent=bounding_box, + temporal_extent=temporal_interval, + bands=["nir08", "nir09", "cirrus", "swir16", "swir22"], + backend="dask", + ) + + data = ddmc(input_cube) + + assert isinstance(data, xr.DataArray) + assert set(input_cube.dims) == set(data.dims) + + +@pytest.mark.parametrize("size", [(30, 30, 20, 5)]) +@pytest.mark.parametrize("dtype", [np.float32]) +def test_ddmc_target_band( + temporal_interval: TemporalInterval, bounding_box: BoundingBox, random_raster_data +): + input_cube = create_fake_rastercube( + data=random_raster_data, + spatial_extent=bounding_box, + temporal_extent=temporal_interval, + bands=["nir08", "nir09", "cirrus", "swir16", "swir22"], + backend="dask", + ) + + data_band = ddmc(data=input_cube, target_band="ddmc") + assert "ddmc" in data_band.dims + + +@pytest.mark.parametrize("size", [(30, 30, 20, 5)]) +@pytest.mark.parametrize("dtype", [np.float32]) +def test_ddmc_input_cube_exception( + temporal_interval: TemporalInterval, bounding_box: BoundingBox, random_raster_data +): + input_cube_exception = create_fake_rastercube( + data=random_raster_data, + spatial_extent=bounding_box, + temporal_extent=temporal_interval, + bands=["b04", "nir09", "cirrus", "swir16", "swir22"], + backend="dask", + ) + + with pytest.raises(KeyError): + data = ddmc(input_cube_exception) diff --git a/tests/test_dimensions.py b/tests/test_dimensions.py index d17742f1..37ae373d 100644 --- a/tests/test_dimensions.py +++ b/tests/test_dimensions.py @@ -4,6 +4,9 @@ from openeo_processes_dask.process_implementations.cubes.general import ( add_dimension, drop_dimension, + rename_dimension, + rename_labels, + trim_cube, ) from openeo_processes_dask.process_implementations.exceptions import ( DimensionLabelCountMismatch, @@ -66,3 +69,78 @@ def test_drop_dimension(temporal_interval, bounding_box, random_raster_data): DIMS_TO_KEEP = tuple(filter(lambda y: y != DIM_TO_DROP, input_cube.dims)) assert DIM_TO_DROP not in output_cube.dims assert DIMS_TO_KEEP == output_cube.dims + + +@pytest.mark.parametrize("size", [(30, 30, 1, 2)]) +@pytest.mark.parametrize("dtype", [np.float32]) +def test_rename_dimension(temporal_interval, bounding_box, random_raster_data): + input_cube = create_fake_rastercube( + data=random_raster_data, + spatial_extent=bounding_box, + temporal_extent=temporal_interval, + bands=["B02", "B04"], + backend="dask", + ) + output_cube = rename_dimension(input_cube, source="bands", target="spectral") + + assert "bands" not in output_cube.dims + assert "spectral" in output_cube.dims + assert "spectral" in output_cube.openeo.band_dims + assert "spectral" not in output_cube.openeo.spatial_dims + + with pytest.raises(DimensionNotAvailable): + rename_dimension(input_cube, source="notthere", target="there") + + with pytest.raises(Exception): + rename_dimension(input_cube, source="y", target="x") + + +@pytest.mark.parametrize("size", [(30, 30, 1, 5)]) +@pytest.mark.parametrize("dtype", [np.float32]) +def test_rename_labels(temporal_interval, bounding_box, random_raster_data): + input_cube = create_fake_rastercube( + data=random_raster_data, + spatial_extent=bounding_box, + temporal_extent=temporal_interval, + bands=["B02", "B03", "B04", "B05", "B08"], + backend="dask", + ) + output_cube = rename_labels( + input_cube, dimension="bands", target=["blue", "green", "red", "rededge", "nir"] + ) + + assert "red" in output_cube["bands"] + + with pytest.raises(DimensionNotAvailable): + rename_labels(input_cube, dimension="band", target=["blue"]) + + with pytest.raises(Exception): + rename_labels( + input_cube, dimension="bands", target=["B02", "B03", "B04", "B05", "B08"] + ) + + with pytest.raises(Exception): + rename_labels( + input_cube, + dimension="bands", + target=["B02", "B03", "B04", "B05", "B08", "B11", "B12"], + ) + + +@pytest.mark.parametrize("size", [(30, 30, 20, 4)]) +@pytest.mark.parametrize("dtype", [np.float32]) +def test_trim_cube(temporal_interval, bounding_box, random_raster_data): + input_cube = create_fake_rastercube( + data=random_raster_data, + spatial_extent=bounding_box, + temporal_extent=temporal_interval, + bands=["B02", "B03", "B04", "B08"], + backend="dask", + ) + input_cube[:, :, :, 2] = np.zeros((30, 30, 20)) * np.nan + output_cube = trim_cube(input_cube) + assert output_cube.shape == (30, 30, 20, 3) + + all_nan = input_cube * np.nan + with pytest.raises(ValueError): + output_cube = trim_cube(all_nan) diff --git a/tests/test_filter.py b/tests/test_filter.py index cf7f840f..9b29906d 100644 --- a/tests/test_filter.py +++ b/tests/test_filter.py @@ -1,4 +1,5 @@ import copy +import datetime from functools import partial import numpy as np @@ -7,15 +8,11 @@ import xarray as xr from openeo_pg_parser_networkx.pg_schema import ParameterReference, TemporalInterval -from openeo_processes_dask.process_implementations.cubes._filter import ( - filter_bands, - filter_bbox, - filter_spatial, - filter_temporal, -) +from openeo_processes_dask.process_implementations.cubes._filter import * from openeo_processes_dask.process_implementations.cubes.reduce import reduce_dimension from openeo_processes_dask.process_implementations.exceptions import ( DimensionNotAvailable, + TemporalExtentEmpty, ) from tests.general_checks import general_output_checks from tests.mockdata import create_fake_rastercube @@ -54,6 +51,12 @@ def test_filter_temporal(temporal_interval, bounding_box, random_raster_data): data=input_cube, extent=temporal_interval_part, dimension="immissing" ) + with pytest.raises(TemporalExtentEmpty): + filter_temporal( + data=input_cube, + extent=["2018-05-31T23:59:59", "2018-05-15T00:00:00"], + ) + temporal_interval_open = TemporalInterval.parse_obj([None, "2018-05-03T00:00:00"]) output_cube = filter_temporal(data=input_cube, extent=temporal_interval_open) @@ -68,6 +71,28 @@ def test_filter_temporal(temporal_interval, bounding_box, random_raster_data): filter_temporal(invalid_input_cube, temporal_interval) +@pytest.mark.parametrize("size", [(30, 30, 30, 3)]) +@pytest.mark.parametrize("dtype", [np.uint8]) +def test_filter_labels( + temporal_interval, bounding_box, random_raster_data, process_registry +): + input_cube = create_fake_rastercube( + data=random_raster_data, + spatial_extent=bounding_box, + temporal_extent=temporal_interval, + bands=["B02", "B03", "B04"], + backend="dask", + ) + _process = partial( + process_registry["eq"].implementation, + y="B04", + x=ParameterReference(from_parameter="x"), + ) + + output_cube = filter_labels(data=input_cube, condition=_process, dimension="bands") + assert len(output_cube["bands"]) == 1 + + @pytest.mark.parametrize("size", [(1, 1, 1, 2)]) @pytest.mark.parametrize("dtype", [np.uint8]) def test_filter_bands(temporal_interval, bounding_box, random_raster_data): diff --git a/tests/test_math.py b/tests/test_math.py index 9eea0d1b..93f1037f 100644 --- a/tests/test_math.py +++ b/tests/test_math.py @@ -13,10 +13,10 @@ def test_quantiles(): ) quantiles_1 = [_round(quantile, p=2) for quantile in quantiles_1] assert quantiles_1 == [2.07, 2.14, 2.28, 2.7, 3.4, 4.5] - quantiles_2 = quantiles(data=np.array([2, 4, 4, 4, 5, 5, 7, 9]), q=4) + quantiles_2 = quantiles(data=np.array([2, 4, 4, 4, 5, 5, 7, 9]), probabilities=4) quantiles_2 = [_round(quantile, p=2) for quantile in quantiles_2] assert quantiles_2 == [4, 4.5, 5.5] - quantiles_3 = quantiles(data=np.array([-1, -0.5, np.nan, 1]), q=2) + quantiles_3 = quantiles(data=np.array([-1, -0.5, np.nan, 1]), probabilities=[2]) quantiles_3 = [_round(quantile, p=2) for quantile in quantiles_3] assert quantiles_3 == [-0.5] quantiles_4 = quantiles( diff --git a/tests/test_text.py b/tests/test_text.py new file mode 100644 index 00000000..7130faf4 --- /dev/null +++ b/tests/test_text.py @@ -0,0 +1,75 @@ +import pytest + +from openeo_processes_dask.process_implementations.text import * + + +@pytest.mark.parametrize( + "string,expected,pattern,case_sensitive", + [ + ("Lorem ipsum dolor sit amet", False, "amet", True), + ("Lorem ipsum dolor sit amet", True, "Lorem", True), + ("Lorem ipsum dolor sit amet", False, "lorem", True), + ("Lorem ipsum dolor sit amet", True, "lorem", False), + ("Ä", True, "ä", False), + (None, "nan", "null", True), + ], +) +def test_text_begins(string, expected, pattern, case_sensitive): + result = text_begins(string, pattern, case_sensitive) + if isinstance(expected, str) and "nan" == expected: + assert result is None + else: + assert result == expected + + +@pytest.mark.parametrize( + "string,expected,pattern,case_sensitive", + [ + ("Lorem ipsum dolor sit amet", True, "amet", True), + ("Lorem ipsum dolor sit amet", False, "Lorem", True), + ("Lorem ipsum dolor sit amet", False, "AMET", True), + ("Lorem ipsum dolor sit amet", True, "AMET", False), + ("Ä", True, "ä", False), + (None, "nan", "null", True), + ], +) +def test_text_ends(string, expected, pattern, case_sensitive): + result = text_ends(string, pattern, case_sensitive) + if isinstance(expected, str) and "nan" == expected: + assert result is None + else: + assert result == expected + + +@pytest.mark.parametrize( + "string,expected,pattern,case_sensitive", + [ + ("Lorem ipsum dolor sit amet", False, "openEO", True), + ("Lorem ipsum dolor sit amet", True, "ipsum dolor", True), + ("Lorem ipsum dolor sit amet", False, "Ipsum Dolor", True), + ("Lorem ipsum dolor sit amet", True, "SIT", False), + ("ÄÖÜ", True, "ö", False), + (None, "nan", "null", True), + ], +) +def test_text_contains(string, expected, pattern, case_sensitive): + result = text_contains(string, pattern, case_sensitive) + if isinstance(expected, str) and "nan" == expected: + assert result is None + else: + assert result == expected + + +@pytest.mark.parametrize( + "data,expected,separator", + [ + (["Hello", "World"], "Hello World", " "), + ([1, 2, 3, 4, 5, 6, 7, 8, 9, 0], "1234567890", ""), + ([None, True, False, 1, -1.5, "ß"], "none\ntrue\nfalse\n1\n-1.5\nß", "\n"), + ([2, 0], "210", 1), + ([], "", ""), + ], +) +def test_text_contains(data, expected, separator): + result = text_concat(data, separator) + assert result == expected