diff --git a/openeo_processes_dask/process_implementations/arrays.py b/openeo_processes_dask/process_implementations/arrays.py index d0f31d2..5e5e0bb 100644 --- a/openeo_processes_dask/process_implementations/arrays.py +++ b/openeo_processes_dask/process_implementations/arrays.py @@ -50,7 +50,7 @@ ] -def get_labels(data, dimension="labels", axis=0): +def get_labels(data, dimension="labels", axis=0, dim_labels=None): if isinstance(data, xr.DataArray): dimension = data.dims[0] if len(data.dims) == 1 else dimension if axis: @@ -61,6 +61,8 @@ def get_labels(data, dimension="labels", axis=0): labels = [] if isinstance(data, list): data = np.asarray(data) + if not isinstance(dim_labels, type(None)): + labels = dim_labels return labels, data @@ -82,9 +84,7 @@ def array_element( raise ArrayElementParameterConflict( "The process `array_element` only allows that either the `index` or the `labels` parameter is set." ) - - if isinstance(data, xr.DataArray): - dim_labels, data = get_labels(data, axis=axis) + dim_labels, data = get_labels(data, axis=axis, dim_labels=dim_labels) if label is not None: if len(dim_labels) == 0: @@ -189,7 +189,7 @@ def modify(data): return modified -def array_concat(array1: ArrayLike, array2: ArrayLike) -> ArrayLike: +def array_concat(array1: ArrayLike, array2: ArrayLike, axis=None) -> ArrayLike: labels1, array1 = get_labels(array1) labels2, array2 = get_labels(array2) @@ -198,7 +198,21 @@ def array_concat(array1: ArrayLike, array2: ArrayLike) -> ArrayLike: "At least one label exists in both arrays and the conflict must be resolved before." ) - concat = np.concatenate([array1, array2]) + if (len(array1.shape) - len(array2.shape)) == 1: + if axis is None: + s1 = np.array(list(array1.shape)) + s2 = list(array2.shape) + s2.append(0) + s2 = np.array(s2) + + axis = np.argmax(s1 != s2) + + array2 = np.expand_dims(array2, axis=axis) + + if axis: + concat = np.concatenate([array1, array2], axis=axis) + else: + concat = np.concatenate([array1, array2]) # e.g. concating int32 and str arrays results in the result being cast to a Unicode dtype of a certain length (e.g. ArrayLike: + if axis: + if isinstance(value, list) and len(value) == 1: + value = value[0] + if (isinstance(value, np.ndarray) or isinstance(value, da.core.Array)) and len( + value.flatten() + ) == 1: + value = value.flatten()[0] + + value = np.take(np.ones_like(data), indices=0, axis=axis) * value + concat = array_concat(data, value, axis=axis) + return concat + if dim_labels: data = array_create_labeled(data=data, labels=dim_labels) if label is not None: diff --git a/openeo_processes_dask/process_implementations/cubes/apply.py b/openeo_processes_dask/process_implementations/cubes/apply.py index 2d902b1..ecad9cf 100644 --- a/openeo_processes_dask/process_implementations/cubes/apply.py +++ b/openeo_processes_dask/process_implementations/cubes/apply.py @@ -77,12 +77,38 @@ def apply_dimension( exclude_dims={dimension}, ) - reordered_result = result.transpose(*data.dims, ...).rename( - {dimension: target_dimension} - ) + reordered_result = result.transpose(*data.dims, ...) - if len(reordered_result[target_dimension]) == 1: - reordered_result[target_dimension] = ["0"] + if dimension in reordered_result.dims: + result_len = len(reordered_result[dimension]) + else: + result_len = 1 + + # Case 1: target_dimension is not defined/ is source dimension + if dimension == target_dimension: + # dimension labels preserved + # if the number of source dimension's values is equal to the number of computed values + if len(reordered_data[dimension]) == result_len: + reordered_result[dimension] == reordered_data[dimension].values + else: + reordered_result[dimension] = np.arange(result_len) + elif target_dimension in reordered_result.dims: + # source dimension is not target dimension + # target dimension exists with a single label only + if len(reordered_result[target_dimension]) == 1: + reordered_result = reordered_result.drop_vars(target_dimension).squeeze( + target_dimension + ) + reordered_result = reordered_result.rename({dimension: target_dimension}) + reordered_result[dimension] = np.arange(result_len) + else: + raise Exception( + f"Cannot rename dimension {dimension} to {target_dimension} as {target_dimension} already exists in dataset and contains more than one label: {reordered_result[target_dimension]}. See process definition. " + ) + else: + # source dimension is not the target dimension and the latter does not exist + reordered_result = reordered_result.rename({dimension: target_dimension}) + reordered_result[target_dimension] = np.arange(result_len) if data.rio.crs is not None: try: @@ -90,9 +116,6 @@ def apply_dimension( except ValueError: pass - if is_new_dim_added: - reordered_result.openeo.add_dim_type(name=target_dimension, type="other") - return reordered_result diff --git a/tests/test_apply.py b/tests/test_apply.py index be1ac74..cc8a1a9 100644 --- a/tests/test_apply.py +++ b/tests/test_apply.py @@ -47,7 +47,7 @@ def test_apply(temporal_interval, bounding_box, random_raster_data, process_regi @pytest.mark.parametrize("size", [(6, 5, 4, 4)]) @pytest.mark.parametrize("dtype", [np.float32]) -def test_apply_dimension_case_1( +def test_apply_dimension_add( temporal_interval, bounding_box, random_raster_data, process_registry ): input_cube = create_fake_rastercube( @@ -78,61 +78,6 @@ def test_apply_dimension_case_1( ) -@pytest.mark.parametrize("size", [(6, 5, 4, 4)]) -@pytest.mark.parametrize("dtype", [np.float32]) -def test_apply_dimension_target_dimension( - temporal_interval, bounding_box, random_raster_data, process_registry -): - input_cube = create_fake_rastercube( - data=random_raster_data, - spatial_extent=bounding_box, - temporal_extent=temporal_interval, - bands=["B02", "B03", "B04", "B08"], - backend="dask", - ) - - _process = partial( - process_registry["mean"].implementation, - data=ParameterReference(from_parameter="data"), - ) - - # Target dimension is null and therefore defaults to the source dimension - output_cube_reduced = apply_dimension( - data=input_cube, process=_process, dimension="x", target_dimension="target" - ) - - expected_output = (input_cube.mean(dim="x")).expand_dims("target") - - general_output_checks( - input_cube=input_cube, - output_cube=output_cube_reduced, - verify_attrs=True, - verify_crs=False, - expected_results=expected_output, - ) - - # Target dimension is null and therefore defaults to the source dimension - output_cube_reduced = apply_dimension( - data=input_cube, process=_process, dimension="x", target_dimension="y" - ) - expected_output = ( - input_cube.mean(dim="x") - .expand_dims("target") - .drop_vars("y") - .rename({"target": "y"}) - ) - - general_output_checks( - input_cube=input_cube, - output_cube=output_cube_reduced, - verify_attrs=True, - verify_crs=False, - expected_results=expected_output, - ) - - assert "y" in output_cube_reduced.openeo.other_dims - - @pytest.mark.parametrize("size", [(6, 5, 4, 4)]) @pytest.mark.parametrize("dtype", [np.float32]) def test_apply_dimension_ordering_processes(