diff --git a/assets/processes b/assets/processes index 5e3ce04..0e979e4 160000 --- a/assets/processes +++ b/assets/processes @@ -1 +1 @@ -Subproject commit 5e3ce04dbe77e7073beac388f6d629076e65897f +Subproject commit 0e979e4307b9aad1855eb3a87f7f53a3247057a9 diff --git a/pyproject.toml b/pyproject.toml index 4bdc767..69ee2aa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -15,6 +15,7 @@ dependencies = [ "xarray>=2023.11.0", "numpy>=1.26.2", "deepdiff>=6.7.1", + "python-dateutil>=2.8.2", ] classifiers = [ "Programming Language :: Python :: 3", @@ -32,3 +33,6 @@ Dask = [ testpaths = [ "src/openeo_test_suite/tests", ] +filterwarnings = [ + "ignore:(pkg_resources|jsonschema.RefResolver):DeprecationWarning", +] diff --git a/src/openeo_test_suite/lib/process_runner/base.py b/src/openeo_test_suite/lib/process_runner/base.py index d87afcc..8af4d8f 100644 --- a/src/openeo_test_suite/lib/process_runner/base.py +++ b/src/openeo_test_suite/lib/process_runner/base.py @@ -48,9 +48,18 @@ def encode_datacube(self, data: Dict) -> Any: """ raise Exception("datacubes not implemented yet") - def decode_data(self, data: Any) -> Any: + def encode_data(self, data: Any) -> Any: """ - Converts data from the internal backend representation to the process test/JSON5 representation + Converts data from the process test/JSON5 representation to the internal backend representation, + excluding datacubes and labeled arrays. + For example: JSON data types to numpy arrays. + openEO process tests specification -> backend + """ + return data + + def decode_data(self, data: Any, expected: Any) -> Any: + """ + Converts data from the internal backend representation to the process test/JSON5 representation. For example: numpy values to JSON data types, labeled-array or datacube to JSON object representation. backend -> openEO process tests specification @@ -63,3 +72,9 @@ def is_json_only(self) -> bool: If True, the runner will skip all tests that contain non JSON values such as infinity and NaN. """ return False + + def get_nodata_value(self) -> Any: + """ + Returns the nodata value of the backend. + """ + return None diff --git a/src/openeo_test_suite/lib/process_runner/dask.py b/src/openeo_test_suite/lib/process_runner/dask.py index e9ce3dd..4410d8e 100644 --- a/src/openeo_test_suite/lib/process_runner/dask.py +++ b/src/openeo_test_suite/lib/process_runner/dask.py @@ -1,6 +1,7 @@ import importlib import inspect +import dask from openeo_pg_parser_networkx import OpenEOProcessGraph, ProcessRegistry from openeo_pg_parser_networkx.process_registry import Process from openeo_processes_dask.process_implementations.core import process @@ -24,6 +25,11 @@ def create_process_registry(): ) ] + # not sure why this is needed + from openeo_processes_dask.process_implementations.math import e + + processes_from_module.append(e) + specs_module = importlib.import_module("openeo_processes_dask.specs") specs = { func.__name__: getattr(specs_module, func.__name__) @@ -61,7 +67,14 @@ def encode_process_graph( def encode_datacube(self, data): return datacube_to_xarray(data) - def decode_data(self, data): - data = numpy_to_native(data) + def decode_data(self, data, expected): + if isinstance(data, dask.array.core.Array): + data = data.compute() + + data = numpy_to_native(data, expected) data = xarray_to_datacube(data) + return data + + def get_nodata_value(self): + return float("nan") diff --git a/src/openeo_test_suite/lib/process_runner/util.py b/src/openeo_test_suite/lib/process_runner/util.py index 19e9305..cfe7b4d 100644 --- a/src/openeo_test_suite/lib/process_runner/util.py +++ b/src/openeo_test_suite/lib/process_runner/util.py @@ -2,15 +2,21 @@ import numpy as np import xarray as xr +from dateutil.parser import parse -def numpy_to_native(data): +def numpy_to_native(data, expected): # Converting numpy dtypes to native python types if isinstance(data, np.ndarray) or isinstance(data, np.generic): - if data.size == 1: - return data.item() - elif data.size > 1: + if isinstance(expected, list): return data.tolist() + else: + if data.size == 0: + return None + if data.size == 1: + return data.item() + elif data.size > 1: + return data.tolist() return data @@ -18,7 +24,8 @@ def numpy_to_native(data): def datacube_to_xarray(cube): coords = [] crs = None - for dim in cube["dimensions"]: + for name in cube["order"]: + dim = cube["dimensions"][name] if dim["type"] == "temporal": # date replace for older Python versions that don't support ISO parsing (only available since 3.11) values = [ @@ -31,7 +38,7 @@ def datacube_to_xarray(cube): else: values = dim["values"] - coords.append((dim["name"], values)) + coords.append((name, values)) da = xr.DataArray(cube["data"], coords=coords) if crs is not None: @@ -45,14 +52,16 @@ def xarray_to_datacube(data): if not isinstance(data, xr.DataArray): return data - dims = [] + order = list(data.dims) + + dims = {} for c in data.coords: type = "bands" values = [] axis = None - if isinstance(data.coords[c].values[0], np.datetime64): + if np.issubdtype(data.coords[c].dtype, np.datetime64): type = "temporal" - values = [iso_datetime(date) for date in data.coords[c].values] + values = [datetime_to_isostr(date) for date in data.coords[c].values] else: values = data.coords[c].values.tolist() if c == "x": # todo: non-standardized @@ -62,14 +71,20 @@ def xarray_to_datacube(data): type = "spatial" axis = "y" - dim = {"name": c, "type": type, "values": values} + dim = {"type": type, "values": values} if axis is not None: dim["axis"] = axis if "crs" in data.attrs: dim["reference_system"] = data.attrs["crs"] # todo: non-standardized - dims.append(dim) - cube = {"type": "datacube", "dimensions": dims, "data": data.values.tolist()} + dims[c] = dim + + cube = { + "type": "datacube", + "order": order, + "dimensions": dims, + "data": data.values.tolist(), + } if "nodata" in data.attrs: cube["nodata"] = data.attrs["nodata"] # todo: non-standardized @@ -77,7 +92,11 @@ def xarray_to_datacube(data): return cube -def iso_datetime(dt): +def isostr_to_datetime(dt): + return parse(dt) + + +def datetime_to_isostr(dt): # Convert numpy.datetime64 to timestamp (in seconds) timestamp = dt.astype("datetime64[s]").astype(int) # Create a datetime object from the timestamp diff --git a/src/openeo_test_suite/lib/process_runner/vito.py b/src/openeo_test_suite/lib/process_runner/vito.py index c759380..7e3da83 100644 --- a/src/openeo_test_suite/lib/process_runner/vito.py +++ b/src/openeo_test_suite/lib/process_runner/vito.py @@ -22,7 +22,10 @@ def execute(self, id, arguments): def encode_datacube(self, data): return datacube_to_xarray(data) - def decode_data(self, data): - data = numpy_to_native(data) + def decode_data(self, data, expected): + data = numpy_to_native(data, expected) data = xarray_to_datacube(data) return data + + def get_nodata_value(self): + return float("nan") diff --git a/src/openeo_test_suite/tests/conftest.py b/src/openeo_test_suite/tests/conftest.py index 7f86a49..f629a6c 100644 --- a/src/openeo_test_suite/tests/conftest.py +++ b/src/openeo_test_suite/tests/conftest.py @@ -1,5 +1,7 @@ +import argparse import logging import os +from distutils.util import strtobool import openeo import pytest @@ -14,6 +16,13 @@ def pytest_addoption(parser): default=None, help="The openEO backend URL to connect to.", ) + parser.addoption( + "--experimental", + type=bool, + action=argparse.BooleanOptionalAction, + default=False, + help="Run tests for experimental functionality or not. By default the tests will be skipped.", + ) parser.addoption( "--process-levels", action="store", @@ -55,6 +64,62 @@ def backend_url(request) -> str: return url +@pytest.fixture(scope="session") +def skip_experimental(request) -> str: + """ + Fixture to determine whether experimental functionality should be tested or not. + """ + # TODO: also support getting it from a config file? + if request.config.getoption("--experimental"): + skip = False + elif "OPENEO_EXPERIMENTAL" in os.environ: + skip = bool(strtobool(os.environ["OPENEO_EXPERIMENTAL"])) + else: + skip = True + + _log.info(f"Skip experimental functionality {skip!r}") + + return skip + + +@pytest.fixture(scope="session") +def process_levels(request): + """ + Fixture to get the desired openEO profiles levels. + """ + levels_str = "" + # TODO: also support getting it from a config file? + if request.config.getoption("--process-levels"): + levels_str = request.config.getoption("--process-levels") + elif "OPENEO_PROCESS_LEVELS" in os.environ: + levels_str = os.environ["OPENEO_PROCESS_LEVELS"] + + if isinstance(levels_str, str) and len(levels_str) > 0: + _log.info(f"Testing process levels {levels_str!r}") + return list(map(lambda l: l.strip(), levels_str.split(","))) + else: + return [] + + +@pytest.fixture(scope="session") +def processes(request): + """ + Fixture to get the desired profiles to test against. + """ + processes_str = "" + # TODO: also support getting it from a config file? + if request.config.getoption("--processes"): + processes_str = request.config.getoption("--processes") + elif "OPENEO_PROCESSES" in os.environ: + processes_str = os.environ["OPENEO_PROCESSES"] + + if isinstance(processes_str, str) and len(processes_str) > 0: + _log.info(f"Testing processes {processes_str!r}") + return list(map(lambda p: p.strip(), processes_str.split(","))) + else: + return [] + + @pytest.fixture def auto_authenticate() -> bool: """ diff --git a/src/openeo_test_suite/tests/processes/README.md b/src/openeo_test_suite/tests/processes/README.md index f20fb12..f1f191e 100644 --- a/src/openeo_test_suite/tests/processes/README.md +++ b/src/openeo_test_suite/tests/processes/README.md @@ -9,13 +9,16 @@ ## Individual Process Testing -Examples: +### Examples - `pytest --openeo-backend-url=https://openeo.cloud --processes=min,max` - `pytest --runner=vito --process-levels=L1,L2,L2A` - `pytest --runner=dask` +- `pytest src/openeo_test_suite/tests/processes/processing/test_example.py --runner=dask` -Parameters: +### Parameters + +Specify `src/openeo_test_suite/tests/processes/processing/test_example.py` to only run individual process tests. - `--runner`: The execution engine. One of: - `vito` (needs being installed) diff --git a/src/openeo_test_suite/tests/processes/processing/conftest.py b/src/openeo_test_suite/tests/processes/processing/conftest.py index 68cbf84..3b81dba 100644 --- a/src/openeo_test_suite/tests/processes/processing/conftest.py +++ b/src/openeo_test_suite/tests/processes/processing/conftest.py @@ -27,44 +27,6 @@ def runner(request) -> str: return runner -@pytest.fixture(scope="session") -def process_levels(request): - """ - Fixture to get the desired openEO profiles levels. - """ - levels_str = "" - # TODO: also support getting it from a config file? - if request.config.getoption("--process-levels"): - levels_str = request.config.getoption("--process-levels") - elif "OPENEO_PROCESS_LEVELS" in os.environ: - levels_str = os.environ["OPENEO_PROCESS_LEVELS"] - - if isinstance(levels_str, str) and len(levels_str) > 0: - _log.info(f"Testing process levels {levels_str!r}") - return list(map(lambda l: l.strip(), levels_str.split(","))) - else: - return [] - - -@pytest.fixture(scope="session") -def processes(request): - """ - Fixture to get the desired profiles to test against. - """ - processes_str = "" - # TODO: also support getting it from a config file? - if request.config.getoption("--processes"): - processes_str = request.config.getoption("--processes") - elif "OPENEO_PROCESSES" in os.environ: - processes_str = os.environ["OPENEO_PROCESSES"] - - if isinstance(processes_str, str) and len(processes_str) > 0: - _log.info(f"Testing processes {processes_str!r}") - return list(map(lambda p: p.strip(), processes_str.split(","))) - else: - return [] - - @pytest.fixture def connection( backend_url: str, runner: str, auto_authenticate: bool, capfd diff --git a/src/openeo_test_suite/tests/processes/processing/test_example.py b/src/openeo_test_suite/tests/processes/processing/test_example.py index 8b224f3..d788bdd 100644 --- a/src/openeo_test_suite/tests/processes/processing/test_example.py +++ b/src/openeo_test_suite/tests/processes/processing/test_example.py @@ -7,10 +7,22 @@ import xarray as xr from deepdiff import DeepDiff +from openeo_test_suite.lib.process_runner.util import isostr_to_datetime + # glob path to the test files examples_path = "assets/processes/tests/*.json5" +def get_prop(prop, data, test, default=None): + if prop in test: + level = test[prop] + elif prop in data: + level = data[prop] + else: + level = default + return level + + def get_examples(): examples = [] package_root_folder = Path(__file__).parents[5] @@ -21,23 +33,30 @@ def get_examples(): with file.open() as f: data = json5.load(f) for test in data["tests"]: - if "level" in test: - level = test["level"] - elif "level" in data: - level = data["level"] - else: - level = "L4" - - examples.append([id, test, file, level]) + level = get_prop("level", data, test, "L4") + experimental = get_prop("experimental", data, test, False) + examples.append([id, test, file, level, experimental]) except Exception as e: warnings.warn("Failed to load {} due to {}".format(file, e)) return examples -@pytest.mark.parametrize("id,example,file,level", get_examples()) -def test_process(connection, process_levels, processes, id, example, file, level): - if len(process_levels) > 0 and level not in process_levels: +@pytest.mark.parametrize("id,example,file,level, experimental", get_examples()) +def test_process( + connection, + skip_experimental, + process_levels, + processes, + id, + example, + file, + level, + experimental, +): + if skip_experimental and experimental: + pytest.skip("Skipping experimental process {}".format(id)) + elif len(process_levels) > 0 and level not in process_levels: pytest.skip( "Skipping process {} because {} is not in the specified levels: {}".format( id, level, ", ".join(process_levels) @@ -45,8 +64,8 @@ def test_process(connection, process_levels, processes, id, example, file, level ) elif len(processes) > 0 and id not in processes: pytest.skip( - "Skipping process {} because it is not in the specified processes: {}".format( - id, ", ".join(processes) + "Skipping process {} because it is not in the specified processes".format( + id ) ) @@ -62,11 +81,7 @@ def test_process(connection, process_levels, processes, id, example, file, level try: connection.describe_process(pid) except: - pytest.skip( - "Test requires additional process {} which is not available".format( - pid - ) - ) + pytest.skip("Test requires missing process {}".format(pid)) # prepare the arguments from test JSON encoding to internal backend representations # or skip if not supported by the test runner @@ -75,8 +90,6 @@ def test_process(connection, process_levels, processes, id, example, file, level except Exception as e: pytest.skip(str(e)) - # todo: handle experimental processes (warning instead of error?) - experimental = example["experimental"] if "experimental" in example else False throws = bool(example["throws"]) if "throws" in example else False returns = "returns" in example @@ -91,63 +104,110 @@ def test_process(connection, process_levels, processes, id, example, file, level if isinstance(result, Exception): check_exception(example, result) else: - check_return_value(example, result, connection) + check_return_value(example, result, connection, file) elif throws: check_exception(example, result) elif returns: - check_return_value(example, result, connection) + check_return_value(example, result, connection, file) else: - pytest.skip("Test doesn't provide an expected result") + pytest.skip( + "Test for process {} doesn't provide an expected result for arguments: {}".format( + id, example["arguments"] + ) + ) def prepare_arguments(arguments, process_id, connection, file): for name in arguments: - arg = arguments[name] - - # handle external references to files - if isinstance(arg, dict) and "$ref" in arg: - arg = load_ref(arg["$ref"], file) - - # handle custom types of data - if isinstance(arg, dict): - if "type" in arg: - # labeled arrays - if arg["type"] == "labeled-array": - arg = connection.encode_labeled_array(arg) - # datacubes - elif arg["type"] == "datacube": - if "data" in arg: - arg["data"] = load_datacube(arg) - arg = connection.encode_datacube(arg) - elif "process_graph" in arg: - arg = connection.encode_process_graph(arg, process_id, name) - - if connection.is_json_only(): - check_non_json_values(arg) - - arguments[name] = arg + arguments[name] = prepare_argument( + arguments[name], process_id, name, connection, file + ) return arguments -def load_datacube(cube): - if isinstance(cube["data"], str): - path = posixpath.join(cube["path"], cube["data"]) - if path.endswith(".nc"): - return xr.open_dataarray(path) +def prepare_argument(arg, process_id, name, connection, file): + # handle external references to files + if isinstance(arg, dict) and "$ref" in arg: + arg = load_ref(arg["$ref"], file) + + # handle custom types of data + if isinstance(arg, dict): + if "type" in arg: + # labeled arrays + if arg["type"] == "labeled-array": + arg = connection.encode_labeled_array(arg) + # datacubes + elif arg["type"] == "datacube": + arg = connection.encode_datacube(arg) + # nodata-values + elif arg["type"] == "nodata": + arg = connection.get_nodata_value() + elif "process_graph" in arg: + arg = connection.encode_process_graph(arg, process_id, name) else: - raise Exception("Datacubes from non-netCDF files not implemented yet") - else: - return cube["data"] + for key in arg: + arg[key] = prepare_argument( + arg[key], process_id, name, connection, file + ) + + elif isinstance(arg, list): + for i in range(len(arg)): + arg[i] = prepare_argument(arg[i], process_id, name, connection, file) + + arg = connection.encode_data(arg) + + if connection.is_json_only(): + check_non_json_values(arg) + + return arg + + +def prepare_results(connection, file, example, result=None): + # go through the example and result recursively and convert datetimes to iso strings + # could be used for more conversions in the future... + + if isinstance(example, dict): + # handle external references to files + if isinstance(example, dict) and "$ref" in example: + example = load_ref(example["$ref"], file) + + if "type" in example: + if example["type"] == "datetime": + example = isostr_to_datetime(example["value"]) + try: + result = isostr_to_datetime(result) + except: + pass + elif example["type"] == "nodata": + example = connection.get_nodata_value() + else: + for key in example: + if key not in result: + (example[key], _) = prepare_results(connection, file, example[key]) + else: + (example[key], result[key]) = prepare_results( + connection, file, example[key], result[key] + ) + + elif isinstance(example, list): + for i in range(len(example)): + if i >= len(result): + (example[i], _) = prepare_results(connection, file, example[i]) + else: + (example[i], result[i]) = prepare_results( + connection, file, example[i], result[i] + ) + + return (example, result) def load_ref(ref, file): - if ref.endswith(".json") or ref.endswith(".json5"): + if ref.endswith(".json") or ref.endswith(".json5") or ref.endswith(".geojson"): try: path = posixpath.join(file.parent, ref) with open(path) as f: data = json5.load(f) - data["path"] = path return data except Exception as e: raise Exception("Failed to load external reference {}: {}".format(ref, e)) @@ -170,7 +230,9 @@ def check_non_json_values(value): def check_exception(example, result): - assert isinstance(result, Exception) + assert isinstance(result, Exception), "Excpected an exception, but got {}".format( + result + ) if isinstance(example["throws"], str): if result.__class__.__name__ != example["throws"]: warnings.warn( @@ -182,33 +244,71 @@ def check_exception(example, result): # assert result.__class__.__name__ == example["throws"] -def check_return_value(example, result, connection): - assert not isinstance(result, Exception) +def check_return_value(example, result, connection, file): + assert not isinstance(result, Exception), "Unexpected exception: {} ".format( + str(result) + ) # handle custom types of data - result = connection.decode_data(result) + result = connection.decode_data(result, example["returns"]) + + # decode special types (currently mostly datetimes and nodata) + (example["returns"], result) = prepare_results( + connection, file, example["returns"], result + ) + + delta = example["delta"] if "delta" in example else 0.0000000001 if isinstance(example["returns"], dict): - assert isinstance(result, dict) - assert {} == DeepDiff( + assert isinstance(result, dict), "Expected a dict but got {}".format( + type(result) + ) + exclude_regex_paths = [] + exclude_paths = [] + ignore_order_func = None + if "type" in example["returns"] and example["returns"]["type"] == "datacube": + # todo: non-standardized + exclude_regex_paths.append( + r"root\['dimensions'\]\[[^\]]+\]\['reference_system'\]" + ) + # todo: non-standardized + exclude_paths.append("root['nodata']") + # ignore data if operation is not changing data + if example["returns"]["data"] is None: + exclude_paths.append("root['data']") + + diff = DeepDiff( + example["returns"], + result, + math_epsilon=delta, + ignore_numeric_type_changes=True, + ignore_nan_inequality=True, + exclude_paths=exclude_paths, + exclude_regex_paths=exclude_regex_paths, + ignore_order_func=ignore_order_func, + ) + assert {} == diff, "Differences: {}".format(str(diff)) + elif isinstance(example["returns"], list): + assert isinstance(result, list), "Expected a list but got {}".format( + type(result) + ) + diff = DeepDiff( example["returns"], result, - significant_digits=10, # todo + math_epsilon=delta, ignore_numeric_type_changes=True, - exclude_paths=["root['nodata']"], # todo: non-standardized - exclude_regex_paths=[ - r"root\['dimensions'\]\[\d+\]\['reference_system'\]" # todo: non-standardized - ], + ignore_nan_inequality=True, ) + assert {} == diff, "Differences: {}".format(str(diff)) elif isinstance(example["returns"], float) and math.isnan(example["returns"]): - assert math.isnan(result) + assert math.isnan(result), "Got {} instead of NaN".format(result) elif isinstance(example["returns"], float) or isinstance(example["returns"], int): msg = "Expected a numerical result but got {} of type {}".format( result, type(result) ) assert isinstance(result, float) or isinstance(result, int), msg + assert not math.isnan(result), "Got unexpected NaN as result" # handle numerical data with a delta - delta = example["delta"] if "delta" in example else 0.0000000001 assert result == pytest.approx(example["returns"], delta) else: msg = "Expected {} but got {}".format(example["returns"], result)