From 35bd4eb4b90e0ebd0dcc92a6eec5995590a1490f Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Fri, 14 Jul 2023 17:23:33 +0200 Subject: [PATCH 1/2] Issue #401 add tests for current behavior of `VectorCube.download` --- tests/rest/datacube/test_vectorcube.py | 147 +++++++++++++++++++++++++ 1 file changed, 147 insertions(+) diff --git a/tests/rest/datacube/test_vectorcube.py b/tests/rest/datacube/test_vectorcube.py index f4eca0044..0858b832a 100644 --- a/tests/rest/datacube/test_vectorcube.py +++ b/tests/rest/datacube/test_vectorcube.py @@ -1,3 +1,55 @@ +from typing import List + +import pytest + +from openeo.internal.graph_building import PGNode +from openeo.rest.vectorcube import VectorCube + + +@pytest.fixture +def vector_cube(con100) -> VectorCube: + pgnode = PGNode(process_id="create_vector_cube") + return VectorCube(graph=pgnode, connection=con100) + + +class DownloadSpy: + """ + Test helper to track download requests and optionally override next response to return. + """ + + __slots__ = ["requests", "next_response"] + + def __init__(self): + self.requests: List[dict] = [] + self.next_response: bytes = b"Spy data" + + @property + def only_request(self) -> dict: + """Get progress graph of only request done""" + assert len(self.requests) == 1 + return self.requests[-1] + + @property + def last_request(self) -> dict: + """Get last progress graph""" + assert len(self.requests) > 0 + return self.requests[-1] + + +@pytest.fixture +def download_spy(requests_mock, con100) -> DownloadSpy: + """Test fixture to spy on (and mock) `POST /result` (download) requests.""" + spy = DownloadSpy() + + def post_result(request, context): + pg = request.json()["process"]["process_graph"] + spy.requests.append(pg) + return spy.next_response + + requests_mock.post(con100.build_url("/result"), content=post_result) + yield spy + + def test_raster_to_vector(con100): img = con100.load_collection("S2") vector_cube = img.raster_to_vector() @@ -27,3 +79,98 @@ def test_raster_to_vector(con100): 'process_id': 'run_udf', 'result': True} } + + +@pytest.mark.parametrize( + ["filename", "expected_format"], + [ + ("result.json", "GeoJSON"), # TODO #401 possible to detect "GeoJSON from ".json" extension? + ("result.geojson", "GeoJSON"), + ("result.nc", "GeoJSON"), # TODO #401 autodetect format from filename + ], +) +def test_download_auto_save_result_only_file(vector_cube, download_spy, tmp_path, filename, expected_format): + vector_cube.download(tmp_path / filename) + + assert download_spy.only_request == { + "createvectorcube1": {"process_id": "create_vector_cube", "arguments": {}}, + "saveresult1": { + "process_id": "save_result", + "arguments": { + "data": {"from_node": "createvectorcube1"}, + "format": expected_format, + "options": {}, + }, + "result": True, + }, + } + assert (tmp_path / filename).read_bytes() == b"Spy data" + + +@pytest.mark.parametrize( + ["filename", "format", "expected_format"], + [ + ("result.json", "JSON", "JSON"), + ("result.geojson", "GeoJSON", "GeoJSON"), + ("result.nc", "netCDF", "netCDF"), + # TODO #401 more formats to autodetect? + ("result.nc", "NETcDf", "NETcDf"), # TODO #401 normalize format + ("result.nc", "inV6l1d!!!", "inV6l1d!!!"), # TODO #401 this should fail + ("result.json", None, None), # TODO #401 autodetect format from filename + ("result.nc", None, None), # TODO #401 autodetect format from filename + ], +) +def test_download_auto_save_result_with_format(vector_cube, download_spy, tmp_path, filename, format, expected_format): + vector_cube.download(tmp_path / filename, format=format) + + assert download_spy.only_request == { + "createvectorcube1": {"process_id": "create_vector_cube", "arguments": {}}, + "saveresult1": { + "process_id": "save_result", + "arguments": { + "data": {"from_node": "createvectorcube1"}, + "format": expected_format, + "options": {}, + }, + "result": True, + }, + } + assert (tmp_path / filename).read_bytes() == b"Spy data" + + +def test_download_auto_save_result_with_options(vector_cube, download_spy, tmp_path): + vector_cube.download(tmp_path / "result.json", format="GeoJSON", options={"precision": 7}) + + assert download_spy.only_request == { + "createvectorcube1": {"process_id": "create_vector_cube", "arguments": {}}, + "saveresult1": { + "process_id": "save_result", + "arguments": { + "data": {"from_node": "createvectorcube1"}, + "format": "GeoJSON", + "options": {"precision": 7}, + }, + "result": True, + }, + } + assert (tmp_path / "result.json").read_bytes() == b"Spy data" + + +def test_save_result_and_download(vector_cube, download_spy, tmp_path): + """e.g. https://github.com/Open-EO/openeo-geopyspark-driver/issues/477""" + vector_cube = vector_cube.save_result(format="JSON") + vector_cube.download(tmp_path / "result.json") + # TODO #401 there should only be one save_result node + assert download_spy.only_request == { + "createvectorcube1": {"process_id": "create_vector_cube", "arguments": {}}, + "saveresult1": { + "process_id": "save_result", + "arguments": {"data": {"from_node": "createvectorcube1"}, "format": "JSON", "options": {}}, + }, + "saveresult2": { + "process_id": "save_result", + "arguments": {"data": {"from_node": "saveresult1"}, "format": "GeoJSON", "options": {}}, + "result": True, + }, + } + assert (tmp_path / "result.json").read_bytes() == b"Spy data" From 8cbaf2585ccf02526dfddc11141e7e8ebda27a32 Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Fri, 14 Jul 2023 17:56:27 +0200 Subject: [PATCH 2/2] Issue #401 avoid double `save_result` nodes with `VectorCube.download` --- CHANGELOG.md | 3 ++ openeo/rest/datacube.py | 2 +- openeo/rest/vectorcube.py | 49 ++++++++++++++++++++++---- tests/rest/datacube/test_vectorcube.py | 49 ++++++++++++++++---------- 4 files changed, 76 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 649fae197..b9df3154a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Avoid double `save_result` nodes when combining `VectorCube.save_result()` and `.download()`. + ([#401](https://github.com/Open-EO/openeo-python-client/issues/401), [#448](https://github.com/Open-EO/openeo-python-client/issues/448)) + ## [0.20.0] - 2023-06-30 diff --git a/openeo/rest/datacube.py b/openeo/rest/datacube.py index 412aee984..1c471e68d 100644 --- a/openeo/rest/datacube.py +++ b/openeo/rest/datacube.py @@ -1904,7 +1904,7 @@ def _ensure_save_result( :param options: (optional) desired `save_result` file format parameters :return: """ - # TODO: move to generic data cube parent class (not only for raster cubes, but also vector cubes) + # TODO #401 Unify with VectorCube._ensure_save_result and move to generic data cube parent class (not only for raster cubes, but also vector cubes) result_node = self.result_node() if result_node.process_id == "save_result": # There is already a `save_result` node: diff --git a/openeo/rest/vectorcube.py b/openeo/rest/vectorcube.py index a9db44110..3715b7a31 100644 --- a/openeo/rest/vectorcube.py +++ b/openeo/rest/vectorcube.py @@ -89,23 +89,57 @@ def run_udf( ) @openeo_process - def save_result(self, format: str = "GeoJson", options: dict = None): + def save_result(self, format: Union[str, None] = "GeoJSON", options: dict = None): + # TODO #401: guard against duplicate save_result nodes? return self.process( process_id="save_result", arguments={ "data": self, - "format": format, - "options": options or {} - } + "format": format or "GeoJSON", + "options": options or {}, + }, ) + def _ensure_save_result( + self, + format: Optional[str] = None, + options: Optional[dict] = None, + ) -> "VectorCube": + """ + Make sure there is a (final) `save_result` node in the process graph. + If there is already one: check if it is consistent with the given format/options (if any) + and add a new one otherwise. + + :param format: (optional) desired `save_result` file format + :param options: (optional) desired `save_result` file format parameters + :return: + """ + # TODO #401 Unify with DataCube._ensure_save_result and move to generic data cube parent class + result_node = self.result_node() + if result_node.process_id == "save_result": + # There is already a `save_result` node: + # check if it is consistent with given format/options (if any) + args = result_node.arguments + if format is not None and format.lower() != args["format"].lower(): + raise ValueError(f"Existing `save_result` node with different format {args['format']!r} != {format!r}") + if options is not None and options != args["options"]: + raise ValueError( + f"Existing `save_result` node with different options {args['options']!r} != {options!r}" + ) + cube = self + else: + # No `save_result` node yet: automatically add it. + cube = self.save_result(format=format or "GeoJSON", options=options) + return cube + def execute(self) -> dict: """Executes the process graph of the imagery.""" return self._connection.execute(self.flat_graph()) - def download(self, outputfile: str, format: str = "GeoJSON", options: dict = None): - # TODO: only add save_result, when not already present (see DataCube.download) - cube = self.save_result(format=format, options=options) + def download(self, outputfile: Union[str, pathlib.Path], format: Optional[str] = None, options: dict = None): + # TODO #401 guess format from outputfile? + # TODO #401 make outputfile optional (See DataCube.download) + cube = self._ensure_save_result(format=format, options=options) return self._connection.download(cube.flat_graph(), outputfile) def execute_batch( @@ -160,6 +194,7 @@ def create_job( cube = self if out_format: # add `save_result` node + # TODO #401: avoid duplicate save_result cube = cube.save_result(format=out_format, options=format_options) return self._connection.create_job( process_graph=cube.flat_graph(), diff --git a/tests/rest/datacube/test_vectorcube.py b/tests/rest/datacube/test_vectorcube.py index 0858b832a..7f7f4d944 100644 --- a/tests/rest/datacube/test_vectorcube.py +++ b/tests/rest/datacube/test_vectorcube.py @@ -1,3 +1,4 @@ +from pathlib import Path from typing import List import pytest @@ -89,8 +90,12 @@ def test_raster_to_vector(con100): ("result.nc", "GeoJSON"), # TODO #401 autodetect format from filename ], ) -def test_download_auto_save_result_only_file(vector_cube, download_spy, tmp_path, filename, expected_format): - vector_cube.download(tmp_path / filename) +@pytest.mark.parametrize("path_class", [str, Path]) +def test_download_auto_save_result_only_file( + vector_cube, download_spy, tmp_path, filename, expected_format, path_class +): + output_path = tmp_path / filename + vector_cube.download(path_class(output_path)) assert download_spy.only_request == { "createvectorcube1": {"process_id": "create_vector_cube", "arguments": {}}, @@ -104,7 +109,7 @@ def test_download_auto_save_result_only_file(vector_cube, download_spy, tmp_path "result": True, }, } - assert (tmp_path / filename).read_bytes() == b"Spy data" + assert output_path.read_bytes() == b"Spy data" @pytest.mark.parametrize( @@ -116,12 +121,13 @@ def test_download_auto_save_result_only_file(vector_cube, download_spy, tmp_path # TODO #401 more formats to autodetect? ("result.nc", "NETcDf", "NETcDf"), # TODO #401 normalize format ("result.nc", "inV6l1d!!!", "inV6l1d!!!"), # TODO #401 this should fail - ("result.json", None, None), # TODO #401 autodetect format from filename - ("result.nc", None, None), # TODO #401 autodetect format from filename + ("result.json", None, "GeoJSON"), # TODO #401 autodetect format from filename? + ("result.nc", None, "GeoJSON"), # TODO #401 autodetect format from filename ], ) def test_download_auto_save_result_with_format(vector_cube, download_spy, tmp_path, filename, format, expected_format): - vector_cube.download(tmp_path / filename, format=format) + output_path = tmp_path / filename + vector_cube.download(output_path, format=format) assert download_spy.only_request == { "createvectorcube1": {"process_id": "create_vector_cube", "arguments": {}}, @@ -135,11 +141,12 @@ def test_download_auto_save_result_with_format(vector_cube, download_spy, tmp_pa "result": True, }, } - assert (tmp_path / filename).read_bytes() == b"Spy data" + assert output_path.read_bytes() == b"Spy data" def test_download_auto_save_result_with_options(vector_cube, download_spy, tmp_path): - vector_cube.download(tmp_path / "result.json", format="GeoJSON", options={"precision": 7}) + output_path = tmp_path / "result.json" + vector_cube.download(output_path, format="GeoJSON", options={"precision": 7}) assert download_spy.only_request == { "createvectorcube1": {"process_id": "create_vector_cube", "arguments": {}}, @@ -153,24 +160,28 @@ def test_download_auto_save_result_with_options(vector_cube, download_spy, tmp_p "result": True, }, } - assert (tmp_path / "result.json").read_bytes() == b"Spy data" + assert output_path.read_bytes() == b"Spy data" -def test_save_result_and_download(vector_cube, download_spy, tmp_path): +@pytest.mark.parametrize( + ["format", "expected_format"], + [ + (None, "GeoJSON"), + ("JSON", "JSON"), + ("netCDF", "netCDF"), + ], +) +def test_save_result_and_download(vector_cube, download_spy, tmp_path, format, expected_format): """e.g. https://github.com/Open-EO/openeo-geopyspark-driver/issues/477""" - vector_cube = vector_cube.save_result(format="JSON") - vector_cube.download(tmp_path / "result.json") - # TODO #401 there should only be one save_result node + vector_cube = vector_cube.save_result(format=format) + output_path = tmp_path / "result.json" + vector_cube.download(output_path) assert download_spy.only_request == { "createvectorcube1": {"process_id": "create_vector_cube", "arguments": {}}, "saveresult1": { "process_id": "save_result", - "arguments": {"data": {"from_node": "createvectorcube1"}, "format": "JSON", "options": {}}, - }, - "saveresult2": { - "process_id": "save_result", - "arguments": {"data": {"from_node": "saveresult1"}, "format": "GeoJSON", "options": {}}, + "arguments": {"data": {"from_node": "createvectorcube1"}, "format": expected_format, "options": {}}, "result": True, }, } - assert (tmp_path / "result.json").read_bytes() == b"Spy data" + assert output_path.read_bytes() == b"Spy data"