diff --git a/.gitignore b/.gitignore index 4f4d1b8..cf08a46 100644 --- a/.gitignore +++ b/.gitignore @@ -169,3 +169,7 @@ poetry.toml # LSP config files pyrightconfig.json + +# vscode +.vscode/ +pytest.ini diff --git a/README.md b/README.md index 20be729..e8091dc 100644 --- a/README.md +++ b/README.md @@ -40,7 +40,23 @@ focussing on a specific API aspect to test or verify ``` - **WP3 Validation of process metadata** (lead implementation partner: EODC) - Main location: [`src/openeo_test_suite/tests/processes/metadata`](./src/openeo_test_suite/tests/processes/metadata) - - TODO: [Open-EO/openeo-test-suite#19](https://github.com/Open-EO/openeo-test-suite/issues/19) + - Defines tests to validate openEO process metadata against specs + defined in the [openeo-processes](https://github.com/Open-EO/openeo-processes) project + - Functional tests concern actual values and behavior of processes (like parameters and return values), + failures in these tests should be looked into and fixed. + - Non-functional tests concern descriptions and other metadata of processes that have no impact on the actual behavior of the process, + failures in these tests should be taken as warnings, but don't necessarily need to be fixed. These can be skipped by adding + '-m "not optional"' to the pytest command. + - Usage example for running these tests against a desired openEO backend URL: + ```bash + pytest src/openeo_test_suite/tests/processes/metadata \ + --html=reports/process-metadata.html \ + --openeo-backend-url=openeo.example \ + --tb=no \ + -vv + ``` + It is recommended to run these tests with the `--tb=no` option to avoid excessive output of no substance. + and the `-vv` option to get more detailed output. - **WP4 General openEO API compliance validation** (lead implementation partner: EODC) - TODO: [Open-EO/openeo-test-suite#20](https://github.com/Open-EO/openeo-test-suite/issues/20) - **WP5 Individual process testing** (lead implementation partner: M. Mohr) @@ -352,7 +368,6 @@ Some general guidelines: extend existing tests or add new tests at `src/openeo_test_suite/tests/collections`. - Validation of process metadata: add new tests to `src/openeo_test_suite/tests/processes/metadata`. - - TODO: [Open-EO/openeo-test-suite#19](https://github.com/Open-EO/openeo-test-suite/issues/19) - General openEO API compliance validation: - TODO: [Open-EO/openeo-test-suite#20](https://github.com/Open-EO/openeo-test-suite/issues/20) - Individual process testing: diff --git a/src/openeo_test_suite/lib/internal-tests/test_process_registry.py b/src/openeo_test_suite/lib/internal-tests/test_process_registry.py index aeaa4ef..a7012b6 100644 --- a/src/openeo_test_suite/lib/internal-tests/test_process_registry.py +++ b/src/openeo_test_suite/lib/internal-tests/test_process_registry.py @@ -19,7 +19,7 @@ def test_get_process_match(self, process_registry): assert process_data.process_id == "add" assert process_data.level == "L1" assert process_data.experimental is False - assert process_data.path.name == "add.json5" + assert process_data.path.name == "add.json" assert len(process_data.tests) def test_get_process_no_match(self, process_registry): @@ -37,7 +37,7 @@ def test_get_all_processes_add(self, process_registry): assert add.level == "L1" assert add.experimental is False - assert add.path.name == "add.json5" + assert add.path.name == "add.json" assert len(add.tests) add00 = {"arguments": {"x": 0, "y": 0}, "returns": 0} @@ -50,7 +50,7 @@ def test_get_all_processes_divide(self, process_registry): assert divide.level == "L1" assert divide.experimental is False - assert divide.path.name == "divide.json5" + assert divide.path.name == "divide.json" assert len(divide.tests) divide0 = { diff --git a/src/openeo_test_suite/lib/process_registry.py b/src/openeo_test_suite/lib/process_registry.py index eea2ca4..5625006 100644 --- a/src/openeo_test_suite/lib/process_registry.py +++ b/src/openeo_test_suite/lib/process_registry.py @@ -1,3 +1,5 @@ +import itertools +import json import logging import os from dataclasses import dataclass @@ -16,6 +18,7 @@ class ProcessData: """Process data, including profile level and list of tests""" process_id: str + spec: dict level: str tests: List[dict] # TODO: also make dataclass for each test? experimental: bool @@ -32,12 +35,10 @@ def __init__(self, root: Optional[Path] = None): """ :param root: Root directory of the tests folder in openeo-processes project """ - self._root = Path( - root - # TODO: eliminate need for this env var? - or os.environ.get("OPENEO_TEST_SUITE_PROCESSES_TEST_ROOT") - or self._guess_root() - ) + + self._root = Path(root or self._guess_root()) + self._root_json5 = Path(self._root.joinpath("tests")) + # Lazy load cache self._processes: Union[None, List[ProcessData]] = None @@ -45,9 +46,9 @@ def _guess_root(self): # TODO: avoid need for guessing and properly include assets in (installed) package project_root = Path(openeo_test_suite.__file__).parents[2] candidates = [ - project_root / "assets/processes/tests", - Path("./assets/processes/tests"), - Path("./openeo-test-suite/assets/processes/tests"), + project_root / "assets/processes", + Path("./assets/processes"), + Path("./openeo-test-suite/assets/processes"), ] for candidate in candidates: if candidate.exists() and candidate.is_dir(): @@ -62,18 +63,33 @@ def _load(self) -> Iterator[ProcessData]: if not self._root.is_dir(): raise ValueError(f"Invalid process test root directory: {self._root}") _log.info(f"Loading process definitions from {self._root}") - for path in self._root.glob("*.json5"): + + process_paths = itertools.chain( + self._root.glob("*.json"), self._root.glob("proposals/*.json") + ) + for path in process_paths: + test_metadata_path = self._root_json5 / f"{path.stem}.json5" try: with path.open() as f: - data = json5.load(f) + data = json.load(f) if data["id"] != path.stem: raise ValueError( f"Process id mismatch between id {data['id']!r} and filename {path.name!r}" ) + # Metadata is stored in sibling json file + if test_metadata_path.exists(): + with test_metadata_path.open() as f: + metadata = json5.load(f) + else: + metadata = {} + yield ProcessData( process_id=data["id"], - level=data.get("level"), - tests=data.get("tests", []), + spec=data, + level=metadata.get( + "level", "L4" + ), # default to L4 is intended for processes without a specific level + tests=metadata.get("tests", []), experimental=data.get("experimental", False), path=path, ) diff --git a/src/openeo_test_suite/tests/processes/metadata/test_process_metadata.py b/src/openeo_test_suite/tests/processes/metadata/test_process_metadata.py new file mode 100644 index 0000000..fe03d9d --- /dev/null +++ b/src/openeo_test_suite/tests/processes/metadata/test_process_metadata.py @@ -0,0 +1,151 @@ +from typing import List +import pytest +from openeo_test_suite.lib.process_registry import ProcessData +from openeo_test_suite.lib.process_selection import get_selected_processes +import logging + + +@pytest.fixture(scope="module") +def api_processes(connection) -> List[dict]: + return connection.list_processes() + + +def _get_test_id(val): + if isinstance(val, ProcessData): + return val.process_id + + +@pytest.mark.parametrize( + "expected_process", + [p for p in get_selected_processes()], + ids=_get_test_id, +) +def test_process_metadata_functional(api_processes, expected_process, skipper): + """ + Tests if the metadata of processes are correct, first tests if the process exists, + then tests if the parameters of processes are correct and finally tests if the return type of processes is correct. + + Any process that has no metadata is skipped. + + These are the functional parts of the process metadata e.g.: the parameters and return type. + """ + + skipper.skip_if_unsupported_process(expected_process.process_id) + + actual_process = [ + process + for process in api_processes + if process["id"] == expected_process.process_id + ][0] + # Tests if the parameters of processes are correct + + expected_parameters = expected_process.spec.get("parameters", []) + actual_parameters = actual_process["parameters"] + + if len(expected_parameters) > len(actual_parameters): + logging.warn( + f"Process {expected_process.process_id} has {len(expected_parameters)} expected parameters, but only {len(actual_parameters)} actual parameters" + ) + for expected_parameter in expected_parameters[len(actual_parameters) :]: + assert ( + "default" in expected_parameter + ), f"Missing Parameter {expected_parameter} has no default value. Cannot remove required parameters." + + if len(expected_parameters) < len(actual_parameters): + logging.warn( + f"Process {expected_process.process_id} has {len(expected_parameters)} expected parameters, but {len(actual_parameters)} actual parameters" + ) + for actual_parameter in actual_parameters[len(expected_parameters) :]: + assert ( + "default" in actual_parameter + ), f"Additional Parameter {actual_parameter} has no default value." + + # Check if the expected parameters are in the actual parameters + # throw a warning if there are added parameters with default values + + for expected_parameter, actual_parameter in zip( + expected_parameters, actual_parameters + ): + # Tests if parameter names are equivalent + assert expected_parameter["name"] == actual_parameter["name"] + # Tests if optionality of parameters is equivalent + assert expected_parameter.get("optional", False) == actual_parameter.get( + "optional", False + ) + # Tests if the type of parameters is equivalent + assert expected_parameter["schema"] == actual_parameter["schema"] + + # Tests if the return type of processes is correct + expected_return_type = expected_process.spec.get("returns", {}) + + actual_return_type = actual_process["returns"] + assert expected_return_type["schema"] == actual_return_type["schema"] + + # Tests the deprecated and experimental flags (Should be true if expected process is true as well, but not the other way around) + if expected_process.spec.get("experimental", False): + assert actual_process.get("experimental", False) + + if expected_process.spec.get("deprecated", False): + assert actual_process.get("deprecated", False) + + +@pytest.mark.optional +@pytest.mark.parametrize( + "expected_process", + [p for p in get_selected_processes()], + ids=_get_test_id, +) +def test_process_metadata_non_functional(api_processes, expected_process, skipper): + """ + Tests if the non-functional metadata of processes are correct (descriptions and categories), first tests if the process exists, + then tests if the categories of processes are correct. + """ + + skipper.skip_if_unsupported_process(expected_process.process_id) + + actual_process = [ + process + for process in api_processes + if process["id"] == expected_process.process_id + ][0] + + # Tests if the categories of processes is equivalent + assert expected_process.spec.get("categories", []) == actual_process["categories"] + + # Tests if the description of processes is equivalent + assert expected_process.spec.get("description", "") == actual_process["description"] + + # Tests if the summary of processes is equivalent + assert expected_process.spec.get("summary", "") == actual_process["summary"] + + # Tests if the description of parameters is equivalent + expected_parameters = expected_process.spec.get("parameters", []) + actual_parameters = actual_process["parameters"] + + assert len(expected_parameters) == len(actual_parameters) + + for expected_parameter, actual_parameter in zip( + expected_parameters, actual_parameters + ): + assert expected_parameter.get("description", "") == actual_parameter.get( + "description", "" + ) + + # Tests if the description of returns is equivalent + expected_return_type = expected_process.spec.get("returns", {}) + actual_return_type = actual_process["returns"] + assert expected_return_type.get("description", "") == actual_return_type.get( + "description", "" + ) + + # Tests if the links of processes are equivalent + expected_links = expected_process.spec.get("links", []) + actual_links = actual_process.get("links", []) + + expected_links.sort(key=lambda x: x.get("href", "")) + actual_links.sort(key=lambda x: x.get("href", "")) + + for expected_link, actual_link in zip(expected_links, actual_links): + assert expected_link.get("href", "") == actual_link.get("href", "") + assert expected_link.get("rel", "") == actual_link.get("rel", "") + assert expected_link.get("title", "") == actual_link.get("title", "")