Skip to content

Commit

Permalink
add text processes
Browse files Browse the repository at this point in the history
  • Loading branch information
ValentinaHutter committed Sep 16, 2024
1 parent 712cf10 commit 00f34bc
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 0 deletions.
1 change: 1 addition & 0 deletions openeo_processes_dask/process_implementations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from .inspect import *
from .logic import *
from .math import *
from .text import *

try:
from .ml import *
Expand Down
42 changes: 42 additions & 0 deletions openeo_processes_dask/process_implementations/text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from typing import Any, Optional

def text_begins(data: str, pattern: str, case_sensitive: Optional[bool] = True) -> str:
if data:
if case_sensitive:
return data.startswith(pattern)
else:
return data.lower().startswith(pattern.lower())
else:
return None

def text_contains(data: str, pattern: str, case_sensitive: Optional[bool] = True) -> str:
if data:
if case_sensitive:
return pattern in data
else:
return pattern.lower() in data.lower()
else:
return None

def text_ends(data: str, pattern: str, case_sensitive: Optional[bool] = True) -> str:
if data:
if case_sensitive:
return data.endswith(pattern)
else:
return data.lower().endswith(pattern.lower())
else:
return None

def text_concat(data: list[Any], separator: Any) -> str:
string = ""
for elem in data:
if isinstance(elem, bool) or elem is None:
string += str(elem).lower()
else:
string += str(elem)
if isinstance(separator, bool) or separator is None:
string += str(separator).lower()
else:
string += str(separator)

return string
75 changes: 75 additions & 0 deletions tests/test_text.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
import pytest

from openeo_processes_dask.process_implementations.text import *


@pytest.mark.parametrize(
"string,expected,pattern,case_sensitive",
[
("Lorem ipsum dolor sit amet", False, "amet", True),
("Lorem ipsum dolor sit amet", True, "Lorem", True),
("Lorem ipsum dolor sit amet", False, "lorem", True),
("Lorem ipsum dolor sit amet", True, "lorem", False),
("Ä", True, "ä", False),
(None, "nan", "null", True)
],
)
def test_text_begins(string, expected, pattern, case_sensitive):
result = text_begins(string, pattern, case_sensitive)
if isinstance(expected, str) and "nan" == expected:
assert result is None
else:
assert result == expected


@pytest.mark.parametrize(
"string,expected,pattern,case_sensitive",
[
("Lorem ipsum dolor sit amet", True, "amet", True),
("Lorem ipsum dolor sit amet", False, "Lorem", True),
("Lorem ipsum dolor sit amet", False, "AMET", True),
("Lorem ipsum dolor sit amet", True, "AMET", False),
("Ä", True, "ä", False),
(None, "nan", "null", True)
],
)
def test_text_ends(string, expected, pattern, case_sensitive):
result = text_ends(string, pattern, case_sensitive)
if isinstance(expected, str) and "nan" == expected:
assert result is None
else:
assert result == expected


@pytest.mark.parametrize(
"string,expected,pattern,case_sensitive",
[
("Lorem ipsum dolor sit amet", False, "openEO", True),
("Lorem ipsum dolor sit amet", True, "ipsum dolor", True),
("Lorem ipsum dolor sit amet", False, "Ipsum Dolor", True),
("Lorem ipsum dolor sit amet", True, "SIT", False),
("ÄÖÜ", True, "ö", False),
(None, "nan", "null", True)
],
)
def test_text_contains(string, expected, pattern, case_sensitive):
result = text_contains(string, pattern, case_sensitive)
if isinstance(expected, str) and "nan" == expected:
assert result is None
else:
assert result == expected


@pytest.mark.parametrize(
"data,expected,separator",
[
(["Hello","World"], "Hello World", " "),
([1,2,3,4,5,6,7,8,9,0], "1234567890", ""),
([None,True,False,1,-1.5,"ß"], "none\ntrue\nfalse\n1\n-1.5\nß", "\n"),
([2,0], "210", 1),
([], "", "")
],
)
def test_text_contains(data, expected, separator):
result = text_concat(data, separator)
assert result == expected

0 comments on commit 00f34bc

Please sign in to comment.