Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Eoepca test suite #297

Merged
merged 6 commits into from
Nov 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 33 additions & 6 deletions openeo_processes_dask/process_implementations/arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
]


def get_labels(data, dimension="labels", axis=0):
def get_labels(data, dimension="labels", axis=0, dim_labels=None):
if isinstance(data, xr.DataArray):
dimension = data.dims[0] if len(data.dims) == 1 else dimension
if axis:
Expand All @@ -61,6 +61,8 @@ def get_labels(data, dimension="labels", axis=0):
labels = []
if isinstance(data, list):
data = np.asarray(data)
if not isinstance(dim_labels, type(None)):
labels = dim_labels
return labels, data


Expand All @@ -82,9 +84,7 @@ def array_element(
raise ArrayElementParameterConflict(
"The process `array_element` only allows that either the `index` or the `labels` parameter is set."
)

if isinstance(data, xr.DataArray):
dim_labels, data = get_labels(data, axis=axis)
dim_labels, data = get_labels(data, axis=axis, dim_labels=dim_labels)

if label is not None:
if len(dim_labels) == 0:
Expand Down Expand Up @@ -189,7 +189,7 @@ def modify(data):
return modified


def array_concat(array1: ArrayLike, array2: ArrayLike) -> ArrayLike:
def array_concat(array1: ArrayLike, array2: ArrayLike, axis=None) -> ArrayLike:
labels1, array1 = get_labels(array1)
labels2, array2 = get_labels(array2)

Expand All @@ -198,7 +198,21 @@ def array_concat(array1: ArrayLike, array2: ArrayLike) -> ArrayLike:
"At least one label exists in both arrays and the conflict must be resolved before."
)

concat = np.concatenate([array1, array2])
if (len(array1.shape) - len(array2.shape)) == 1:
if axis is None:
s1 = np.array(list(array1.shape))
s2 = list(array2.shape)
s2.append(0)
s2 = np.array(s2)

axis = np.argmax(s1 != s2)

array2 = np.expand_dims(array2, axis=axis)

if axis:
concat = np.concatenate([array1, array2], axis=axis)
else:
concat = np.concatenate([array1, array2])

# e.g. concating int32 and str arrays results in the result being cast to a Unicode dtype of a certain length (e.g. <U22).
# There isn't really anything better to do as numpy does not support heterogenuous arrays.
Expand All @@ -219,7 +233,20 @@ def array_append(
value: Any,
label: Optional[Any] = None,
dim_labels=None,
axis=None,
) -> ArrayLike:
if axis:
if isinstance(value, list) and len(value) == 1:
value = value[0]
if (isinstance(value, np.ndarray) or isinstance(value, da.core.Array)) and len(
value.flatten()
) == 1:
value = value.flatten()[0]

value = np.take(np.ones_like(data), indices=0, axis=axis) * value
concat = array_concat(data, value, axis=axis)
return concat

if dim_labels:
data = array_create_labeled(data=data, labels=dim_labels)
if label is not None:
Expand Down
39 changes: 31 additions & 8 deletions openeo_processes_dask/process_implementations/cubes/apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,22 +77,45 @@ def apply_dimension(
exclude_dims={dimension},
)

reordered_result = result.transpose(*data.dims, ...).rename(
{dimension: target_dimension}
)
reordered_result = result.transpose(*data.dims, ...)

if len(reordered_result[target_dimension]) == 1:
reordered_result[target_dimension] = ["0"]
if dimension in reordered_result.dims:
result_len = len(reordered_result[dimension])
else:
result_len = 1

# Case 1: target_dimension is not defined/ is source dimension
if dimension == target_dimension:
# dimension labels preserved
# if the number of source dimension's values is equal to the number of computed values
if len(reordered_data[dimension]) == result_len:
reordered_result[dimension] == reordered_data[dimension].values
else:
reordered_result[dimension] = np.arange(result_len)
elif target_dimension in reordered_result.dims:
# source dimension is not target dimension
# target dimension exists with a single label only
if len(reordered_result[target_dimension]) == 1:
reordered_result = reordered_result.drop_vars(target_dimension).squeeze(
target_dimension
)
reordered_result = reordered_result.rename({dimension: target_dimension})
reordered_result[dimension] = np.arange(result_len)
else:
raise Exception(
f"Cannot rename dimension {dimension} to {target_dimension} as {target_dimension} already exists in dataset and contains more than one label: {reordered_result[target_dimension]}. See process definition. "
)
else:
# source dimension is not the target dimension and the latter does not exist
reordered_result = reordered_result.rename({dimension: target_dimension})
reordered_result[target_dimension] = np.arange(result_len)

if data.rio.crs is not None:
try:
reordered_result.rio.write_crs(data.rio.crs, inplace=True)
except ValueError:
pass

if is_new_dim_added:
reordered_result.openeo.add_dim_type(name=target_dimension, type="other")

return reordered_result


Expand Down
57 changes: 1 addition & 56 deletions tests/test_apply.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def test_apply(temporal_interval, bounding_box, random_raster_data, process_regi

@pytest.mark.parametrize("size", [(6, 5, 4, 4)])
@pytest.mark.parametrize("dtype", [np.float32])
def test_apply_dimension_case_1(
def test_apply_dimension_add(
temporal_interval, bounding_box, random_raster_data, process_registry
):
input_cube = create_fake_rastercube(
Expand Down Expand Up @@ -78,61 +78,6 @@ def test_apply_dimension_case_1(
)


@pytest.mark.parametrize("size", [(6, 5, 4, 4)])
@pytest.mark.parametrize("dtype", [np.float32])
def test_apply_dimension_target_dimension(
temporal_interval, bounding_box, random_raster_data, process_registry
):
input_cube = create_fake_rastercube(
data=random_raster_data,
spatial_extent=bounding_box,
temporal_extent=temporal_interval,
bands=["B02", "B03", "B04", "B08"],
backend="dask",
)

_process = partial(
process_registry["mean"].implementation,
data=ParameterReference(from_parameter="data"),
)

# Target dimension is null and therefore defaults to the source dimension
output_cube_reduced = apply_dimension(
data=input_cube, process=_process, dimension="x", target_dimension="target"
)

expected_output = (input_cube.mean(dim="x")).expand_dims("target")

general_output_checks(
input_cube=input_cube,
output_cube=output_cube_reduced,
verify_attrs=True,
verify_crs=False,
expected_results=expected_output,
)

# Target dimension is null and therefore defaults to the source dimension
output_cube_reduced = apply_dimension(
data=input_cube, process=_process, dimension="x", target_dimension="y"
)
expected_output = (
input_cube.mean(dim="x")
.expand_dims("target")
.drop_vars("y")
.rename({"target": "y"})
)

general_output_checks(
input_cube=input_cube,
output_cube=output_cube_reduced,
verify_attrs=True,
verify_crs=False,
expected_results=expected_output,
)

assert "y" in output_cube_reduced.openeo.other_dims


@pytest.mark.parametrize("size", [(6, 5, 4, 4)])
@pytest.mark.parametrize("dtype", [np.float32])
def test_apply_dimension_ordering_processes(
Expand Down
Loading