From 578526bd7059c7f1e28e8bd4e2012ae1ff766cf1 Mon Sep 17 00:00:00 2001 From: ValentinaHutter Date: Thu, 21 Nov 2024 08:35:20 +0100 Subject: [PATCH 1/6] array concat handle axis --- .../process_implementations/arrays.py | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/openeo_processes_dask/process_implementations/arrays.py b/openeo_processes_dask/process_implementations/arrays.py index d0f31d2..530ed13 100644 --- a/openeo_processes_dask/process_implementations/arrays.py +++ b/openeo_processes_dask/process_implementations/arrays.py @@ -83,8 +83,7 @@ def array_element( "The process `array_element` only allows that either the `index` or the `labels` parameter is set." ) - if isinstance(data, xr.DataArray): - dim_labels, data = get_labels(data, axis=axis) + dim_labels, data = get_labels(data, axis=axis) if label is not None: if len(dim_labels) == 0: @@ -189,7 +188,7 @@ def modify(data): return modified -def array_concat(array1: ArrayLike, array2: ArrayLike) -> ArrayLike: +def array_concat(array1: ArrayLike, array2: ArrayLike, axis=None) -> ArrayLike: labels1, array1 = get_labels(array1) labels2, array2 = get_labels(array2) @@ -198,7 +197,21 @@ def array_concat(array1: ArrayLike, array2: ArrayLike) -> ArrayLike: "At least one label exists in both arrays and the conflict must be resolved before." ) - concat = np.concatenate([array1, array2]) + if (len(array1.shape) - len(array2.shape)) == 1: + if axis is None: + s1 = np.array(list(array1.shape)) + s2 = list(array2.shape) + s2.append(0) + s2 = np.array(s2) + + axis = np.argmax(s1 != s2) + + array2 = np.expand_dims(array2, axis=axis) + + if axis: + concat = np.concatenate([array1, array2], axis=axis) + else: + concat = np.concatenate([array1, array2]) # e.g. concating int32 and str arrays results in the result being cast to a Unicode dtype of a certain length (e.g. Date: Thu, 21 Nov 2024 09:45:19 +0100 Subject: [PATCH 2/6] array append handle axis --- .../process_implementations/arrays.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/openeo_processes_dask/process_implementations/arrays.py b/openeo_processes_dask/process_implementations/arrays.py index 530ed13..6c0e236 100644 --- a/openeo_processes_dask/process_implementations/arrays.py +++ b/openeo_processes_dask/process_implementations/arrays.py @@ -232,7 +232,20 @@ def array_append( value: Any, label: Optional[Any] = None, dim_labels=None, + axis=None, ) -> ArrayLike: + if axis: + if isinstance(value, list) and len(value) == 1: + value = value[0] + if (isinstance(value, np.ndarray) or isinstance(value, da.core.Array)) and len( + value.flatten() + ) == 1: + value = value.flatten()[0] + + value = np.take(np.ones_like(data), indices=0, axis=axis) * value + concat = array_concat(data, value, axis=axis) + return concat + if dim_labels: data = array_create_labeled(data=data, labels=dim_labels) if label is not None: From e82968e1ab800989813d295d03d99de572cc9d54 Mon Sep 17 00:00:00 2001 From: ValentinaHutter Date: Thu, 21 Nov 2024 10:43:59 +0100 Subject: [PATCH 3/6] fix array apply cases --- .../process_implementations/cubes/apply.py | 36 +++++++++++++++---- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/openeo_processes_dask/process_implementations/cubes/apply.py b/openeo_processes_dask/process_implementations/cubes/apply.py index 2d902b1..24537f0 100644 --- a/openeo_processes_dask/process_implementations/cubes/apply.py +++ b/openeo_processes_dask/process_implementations/cubes/apply.py @@ -47,7 +47,7 @@ def apply_dimension( keepdims = False is_new_dim_added = target_dimension is not None - if is_new_dim_added: + if is_new_dim_added and target_dimension not in data.dims: keepdims = True if target_dimension is None: @@ -77,12 +77,34 @@ def apply_dimension( exclude_dims={dimension}, ) - reordered_result = result.transpose(*data.dims, ...).rename( - {dimension: target_dimension} - ) - - if len(reordered_result[target_dimension]) == 1: - reordered_result[target_dimension] = ["0"] + reordered_result = result.transpose(*data.dims, ...) + + # Case 1: target_dimension is not defined/ is source dimension + if dimension == target_dimension: + # dimension labels preserved + # if the number of source dimension's values is equal to the number of computed values + result_len = len(reordered_result[dimension]) + if len(reordered_data[dimension]) == result_len: + reordered_result[dimension] == reordered_data[dimension].values + else: + reordered_result[dimension] = np.arange(result_len) + elif target_dimension in reordered_result.dims: + # source dimension is not target dimension + # target dimension exists with a single label only + if len(reordered_result[target_dimension]) == 1: + reordered_result = reordered_result.drop_vars(target_dimension).squeeze( + target_dimension + ) + reordered_result = reordered_result.rename({dimension: target_dimension}) + reordered_result[dimension] = np.arange(result_len) + else: + raise Exception( + f"Cannot rename dimension {dimension} to {target_dimension} as {target_dimension} already exists in dataset and contains more than one label: {reordered_result[target_dimension]}. See process definition. " + ) + else: + # source dimension is not the target dimension and the latter does not exist + reordered_result = reordered_result.rename({dimension: target_dimension}) + reordered_result[dimension] = np.arange(result_len) if data.rio.crs is not None: try: From 28fc4c5076b269d5b56df8a50662437571666720 Mon Sep 17 00:00:00 2001 From: ValentinaHutter Date: Thu, 21 Nov 2024 11:35:02 +0100 Subject: [PATCH 4/6] update for tests --- openeo_processes_dask/process_implementations/arrays.py | 7 ++++--- .../process_implementations/cubes/apply.py | 6 +++++- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/openeo_processes_dask/process_implementations/arrays.py b/openeo_processes_dask/process_implementations/arrays.py index 6c0e236..c9ae24a 100644 --- a/openeo_processes_dask/process_implementations/arrays.py +++ b/openeo_processes_dask/process_implementations/arrays.py @@ -50,7 +50,7 @@ ] -def get_labels(data, dimension="labels", axis=0): +def get_labels(data, dimension="labels", axis=0, dim_labels=None): if isinstance(data, xr.DataArray): dimension = data.dims[0] if len(data.dims) == 1 else dimension if axis: @@ -61,6 +61,8 @@ def get_labels(data, dimension="labels", axis=0): labels = [] if isinstance(data, list): data = np.asarray(data) + if dim_labels: + labels = dim_labels return labels, data @@ -82,8 +84,7 @@ def array_element( raise ArrayElementParameterConflict( "The process `array_element` only allows that either the `index` or the `labels` parameter is set." ) - - dim_labels, data = get_labels(data, axis=axis) + dim_labels, data = get_labels(data, axis=axis, dim_labels=dim_labels) if label is not None: if len(dim_labels) == 0: diff --git a/openeo_processes_dask/process_implementations/cubes/apply.py b/openeo_processes_dask/process_implementations/cubes/apply.py index 24537f0..904a51d 100644 --- a/openeo_processes_dask/process_implementations/cubes/apply.py +++ b/openeo_processes_dask/process_implementations/cubes/apply.py @@ -79,11 +79,15 @@ def apply_dimension( reordered_result = result.transpose(*data.dims, ...) + if dimension in reordered_result.dims: + result_len = len(reordered_result[dimension]) + else: + result_len = 1 + # Case 1: target_dimension is not defined/ is source dimension if dimension == target_dimension: # dimension labels preserved # if the number of source dimension's values is equal to the number of computed values - result_len = len(reordered_result[dimension]) if len(reordered_data[dimension]) == result_len: reordered_result[dimension] == reordered_data[dimension].values else: From f81be141493be840ae961088dade7d7baa20d1fa Mon Sep 17 00:00:00 2001 From: ValentinaHutter Date: Thu, 21 Nov 2024 13:12:27 +0100 Subject: [PATCH 5/6] array test update --- .../process_implementations/arrays.py | 2 +- .../process_implementations/cubes/apply.py | 8 +-- tests/test_apply.py | 57 +------------------ 3 files changed, 3 insertions(+), 64 deletions(-) diff --git a/openeo_processes_dask/process_implementations/arrays.py b/openeo_processes_dask/process_implementations/arrays.py index c9ae24a..5e5e0bb 100644 --- a/openeo_processes_dask/process_implementations/arrays.py +++ b/openeo_processes_dask/process_implementations/arrays.py @@ -61,7 +61,7 @@ def get_labels(data, dimension="labels", axis=0, dim_labels=None): labels = [] if isinstance(data, list): data = np.asarray(data) - if dim_labels: + if not isinstance(dim_labels, type(None)): labels = dim_labels return labels, data diff --git a/openeo_processes_dask/process_implementations/cubes/apply.py b/openeo_processes_dask/process_implementations/cubes/apply.py index 904a51d..6534153 100644 --- a/openeo_processes_dask/process_implementations/cubes/apply.py +++ b/openeo_processes_dask/process_implementations/cubes/apply.py @@ -45,10 +45,7 @@ def apply_dimension( f"Provided dimension ({dimension}) not found in data.dims: {data.dims}" ) - keepdims = False - is_new_dim_added = target_dimension is not None - if is_new_dim_added and target_dimension not in data.dims: - keepdims = True + keepdims = True if target_dimension is None: target_dimension = dimension @@ -116,9 +113,6 @@ def apply_dimension( except ValueError: pass - if is_new_dim_added: - reordered_result.openeo.add_dim_type(name=target_dimension, type="other") - return reordered_result diff --git a/tests/test_apply.py b/tests/test_apply.py index be1ac74..cc8a1a9 100644 --- a/tests/test_apply.py +++ b/tests/test_apply.py @@ -47,7 +47,7 @@ def test_apply(temporal_interval, bounding_box, random_raster_data, process_regi @pytest.mark.parametrize("size", [(6, 5, 4, 4)]) @pytest.mark.parametrize("dtype", [np.float32]) -def test_apply_dimension_case_1( +def test_apply_dimension_add( temporal_interval, bounding_box, random_raster_data, process_registry ): input_cube = create_fake_rastercube( @@ -78,61 +78,6 @@ def test_apply_dimension_case_1( ) -@pytest.mark.parametrize("size", [(6, 5, 4, 4)]) -@pytest.mark.parametrize("dtype", [np.float32]) -def test_apply_dimension_target_dimension( - temporal_interval, bounding_box, random_raster_data, process_registry -): - input_cube = create_fake_rastercube( - data=random_raster_data, - spatial_extent=bounding_box, - temporal_extent=temporal_interval, - bands=["B02", "B03", "B04", "B08"], - backend="dask", - ) - - _process = partial( - process_registry["mean"].implementation, - data=ParameterReference(from_parameter="data"), - ) - - # Target dimension is null and therefore defaults to the source dimension - output_cube_reduced = apply_dimension( - data=input_cube, process=_process, dimension="x", target_dimension="target" - ) - - expected_output = (input_cube.mean(dim="x")).expand_dims("target") - - general_output_checks( - input_cube=input_cube, - output_cube=output_cube_reduced, - verify_attrs=True, - verify_crs=False, - expected_results=expected_output, - ) - - # Target dimension is null and therefore defaults to the source dimension - output_cube_reduced = apply_dimension( - data=input_cube, process=_process, dimension="x", target_dimension="y" - ) - expected_output = ( - input_cube.mean(dim="x") - .expand_dims("target") - .drop_vars("y") - .rename({"target": "y"}) - ) - - general_output_checks( - input_cube=input_cube, - output_cube=output_cube_reduced, - verify_attrs=True, - verify_crs=False, - expected_results=expected_output, - ) - - assert "y" in output_cube_reduced.openeo.other_dims - - @pytest.mark.parametrize("size", [(6, 5, 4, 4)]) @pytest.mark.parametrize("dtype", [np.float32]) def test_apply_dimension_ordering_processes( From b11c10290c2fdc7f491d5601a6983bd8e8996d9d Mon Sep 17 00:00:00 2001 From: ValentinaHutter Date: Mon, 25 Nov 2024 08:20:31 +0100 Subject: [PATCH 6/6] fix array apply test --- .../process_implementations/cubes/apply.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/openeo_processes_dask/process_implementations/cubes/apply.py b/openeo_processes_dask/process_implementations/cubes/apply.py index 6534153..ecad9cf 100644 --- a/openeo_processes_dask/process_implementations/cubes/apply.py +++ b/openeo_processes_dask/process_implementations/cubes/apply.py @@ -45,7 +45,10 @@ def apply_dimension( f"Provided dimension ({dimension}) not found in data.dims: {data.dims}" ) - keepdims = True + keepdims = False + is_new_dim_added = target_dimension is not None + if is_new_dim_added: + keepdims = True if target_dimension is None: target_dimension = dimension @@ -105,7 +108,7 @@ def apply_dimension( else: # source dimension is not the target dimension and the latter does not exist reordered_result = reordered_result.rename({dimension: target_dimension}) - reordered_result[dimension] = np.arange(result_len) + reordered_result[target_dimension] = np.arange(result_len) if data.rio.crs is not None: try: