Skip to content

Commit

Permalink
add text processes (#279)
Browse files Browse the repository at this point in the history
* add text processes

* fix test

* pre commit
  • Loading branch information
ValentinaHutter authored Sep 19, 2024
1 parent de071a6 commit 9522ef7
Show file tree
Hide file tree
Showing 3 changed files with 126 additions and 0 deletions.
1 change: 1 addition & 0 deletions openeo_processes_dask/process_implementations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from .inspect import *
from .logic import *
from .math import *
from .text import *

try:
from .ml import *
Expand Down
50 changes: 50 additions & 0 deletions openeo_processes_dask/process_implementations/text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from typing import Any, Optional


def text_begins(data: str, pattern: str, case_sensitive: Optional[bool] = True) -> str:
if data:
if case_sensitive:
return data.startswith(pattern)
else:
return data.lower().startswith(pattern.lower())
else:
return None


def text_contains(
data: str, pattern: str, case_sensitive: Optional[bool] = True
) -> str:
if data:
if case_sensitive:
return pattern in data
else:
return pattern.lower() in data.lower()
else:
return None


def text_ends(data: str, pattern: str, case_sensitive: Optional[bool] = True) -> str:
if data:
if case_sensitive:
return data.endswith(pattern)
else:
return data.lower().endswith(pattern.lower())
else:
return None


def text_concat(data: list[Any], separator: Any) -> str:
string = ""
for elem in data:
if isinstance(elem, bool) or elem is None:
string += str(elem).lower()
else:
string += str(elem)
if isinstance(separator, bool) or separator is None:
string += str(separator).lower()
else:
string += str(separator)
if separator == "":
return string
else:
return string[: -len(str(separator))]
75 changes: 75 additions & 0 deletions tests/test_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import pytest

from openeo_processes_dask.process_implementations.text import *


@pytest.mark.parametrize(
"string,expected,pattern,case_sensitive",
[
("Lorem ipsum dolor sit amet", False, "amet", True),
("Lorem ipsum dolor sit amet", True, "Lorem", True),
("Lorem ipsum dolor sit amet", False, "lorem", True),
("Lorem ipsum dolor sit amet", True, "lorem", False),
("Ä", True, "ä", False),
(None, "nan", "null", True),
],
)
def test_text_begins(string, expected, pattern, case_sensitive):
result = text_begins(string, pattern, case_sensitive)
if isinstance(expected, str) and "nan" == expected:
assert result is None
else:
assert result == expected


@pytest.mark.parametrize(
"string,expected,pattern,case_sensitive",
[
("Lorem ipsum dolor sit amet", True, "amet", True),
("Lorem ipsum dolor sit amet", False, "Lorem", True),
("Lorem ipsum dolor sit amet", False, "AMET", True),
("Lorem ipsum dolor sit amet", True, "AMET", False),
("Ä", True, "ä", False),
(None, "nan", "null", True),
],
)
def test_text_ends(string, expected, pattern, case_sensitive):
result = text_ends(string, pattern, case_sensitive)
if isinstance(expected, str) and "nan" == expected:
assert result is None
else:
assert result == expected


@pytest.mark.parametrize(
"string,expected,pattern,case_sensitive",
[
("Lorem ipsum dolor sit amet", False, "openEO", True),
("Lorem ipsum dolor sit amet", True, "ipsum dolor", True),
("Lorem ipsum dolor sit amet", False, "Ipsum Dolor", True),
("Lorem ipsum dolor sit amet", True, "SIT", False),
("ÄÖÜ", True, "ö", False),
(None, "nan", "null", True),
],
)
def test_text_contains(string, expected, pattern, case_sensitive):
result = text_contains(string, pattern, case_sensitive)
if isinstance(expected, str) and "nan" == expected:
assert result is None
else:
assert result == expected


@pytest.mark.parametrize(
"data,expected,separator",
[
(["Hello", "World"], "Hello World", " "),
([1, 2, 3, 4, 5, 6, 7, 8, 9, 0], "1234567890", ""),
([None, True, False, 1, -1.5, "ß"], "none\ntrue\nfalse\n1\n-1.5\nß", "\n"),
([2, 0], "210", 1),
([], "", ""),
],
)
def test_text_contains(data, expected, separator):
result = text_concat(data, separator)
assert result == expected

0 comments on commit 9522ef7

Please sign in to comment.