Skip to content

Commit

Permalink
update array processes (#280)
Browse files Browse the repository at this point in the history
* update array_find

* update array contains

* add array_interpolate

* add array_interpolate

* add array_interpolate

* update tests!
  • Loading branch information
ValentinaHutter authored Sep 19, 2024
1 parent 712cf10 commit de071a6
Show file tree
Hide file tree
Showing 2 changed files with 56 additions and 10 deletions.
26 changes: 22 additions & 4 deletions openeo_processes_dask/process_implementations/arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
"array_find",
"array_labels",
"array_apply",
"array_interpolate_linear",
"first",
"last",
"order",
Expand Down Expand Up @@ -165,10 +166,8 @@ def array_contains(data: ArrayLike, value: Any, axis=None) -> bool:
value_is_valid = True
if len(np.shape(data)) != 1 and axis is None:
return False
if not value_is_valid:
if not value_is_valid or pd.isnull(value):
return False
if pd.isnull(value):
return np.isnan(data).any(axis=axis)
else:
return np.isin(data, value).any(axis=axis)

Expand All @@ -188,8 +187,14 @@ def array_find(
idxs = (data == value).argmax(axis=axis)

mask = ~np.array((data == value).any(axis=axis))
if np.isnan(value):
if not isinstance(value, str) and np.isnan(value):
mask = True
if reverse:
if axis is None:
size = data.size
else:
size = data.shape[axis]
idxs = size - 1 - idxs

logger.warning(
"array_find: numpy has no sentinel value for missing data in integer arrays, therefore np.masked_array is used to return the indices of found elements. Further operations might fail if not defined for masked arrays."
Expand Down Expand Up @@ -230,6 +235,19 @@ def array_apply(
)


def array_interpolate_linear(data: ArrayLike):
if isinstance(data, list):
data = np.array(data)
x = np.arange(len(data))
valid = np.isfinite(data)
if len(x[valid]) < 2:
return data
data[~valid] = np.interp(
x[~valid], x[valid], data[valid], left=np.nan, right=np.nan
)
return data


def first(
data: ArrayLike,
ignore_nodata: Optional[bool] = True,
Expand Down
40 changes: 34 additions & 6 deletions tests/test_arrays.py
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,7 @@ def test_array_append(data, value, expected):
([1, 2, 3], 2, True),
(["A", "B", "C"], "b", False),
([1, 2, 3], "2", False),
([1, 2, np.nan], np.nan, True),
([1, 2, np.nan], np.nan, False),
([[2, 1], [3, 4]], [1, 2], False),
([[2, 1], [3, 4]], 2, False),
([1, 2, 3], np.int64(2), True),
Expand Down Expand Up @@ -222,14 +222,15 @@ def test_array_contains_object_dtype():
[
([1, 0, 3, 2], 3, 2, None, False),
([1, 0, 3, 2, np.nan, 3], np.nan, 999999, None, False),
([1, 0, 3, 2], 3, 2, None, False),
([1, 0, 3, 0, 2], 0, 1, None, False),
([[1, 0, 3, 2], [5, 3, 6, 8]], 3, [999999, 1, 0, 999999], 0, False),
([[1, 0, 3, 2], [5, 3, 6, 8]], 3, [2, 1], 1, False),
([1, 0, 3, 2], 3, 1, None, True),
([1, 0, 3, 2], 3, 2, None, True),
([1, 0, 3, 2, np.nan, 3], np.nan, 999999, None, True),
([1, 0, 3, 2], 3, 1, None, True),
([[1, 0, 3, 2], [5, 3, 6, 8]], 3, [999999, 0, 1, 999999], 0, True),
([[1, 0, 3, 2], [5, 3, 6, 8]], 3, [1, 2], 1, True),
([1, 0, 3, 0, 2], 0, 3, None, True),
([[1, 0, 3, 2], [5, 3, 6, 8]], 3, [999999, 1, 0, 999999], 0, True),
([[1, 0, 3, 2], [5, 3, 6, 8]], 3, [2, 1], 1, True),
(["A", "B", "C"], "b", 99999, None, False),
],
)
def test_array_find(data, value, expected, axis, reverse):
Expand Down Expand Up @@ -263,6 +264,33 @@ def test_array_apply(process_registry):
assert (output_cube == [2, 3, 4, 5, 6, 7]).all()


@pytest.mark.parametrize(
"data, expected",
[
([np.nan, 1, np.nan, 6, np.nan, -8], [np.nan, 1, 3.5, 6, -1, -8]),
([np.nan, 1, np.nan, np.nan], [np.nan, 1, np.nan, np.nan]),
],
)
def test_array_interpolate_linear(data, expected):
assert np.array_equal(
array_interpolate_linear(data),
expected,
equal_nan=True,
)
data_np = np.array(data)
assert np.array_equal(
array_interpolate_linear(data_np),
expected,
equal_nan=True,
)
data_da = da.from_array(data_np)
assert np.array_equal(
array_interpolate_linear(data_da),
expected,
equal_nan=True,
)


def test_first():
assert first(np.array([1, 0, 3, 2])) == 1
assert pd.isnull(first(np.array([np.nan, 2, 3]), ignore_nodata=False))
Expand Down

0 comments on commit de071a6

Please sign in to comment.