diff --git a/CHANGELOG.md b/CHANGELOG.md index 649fae197..b9df3154a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,6 +14,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Avoid double `save_result` nodes when combining `VectorCube.save_result()` and `.download()`. + ([#401](https://github.com/Open-EO/openeo-python-client/issues/401), [#448](https://github.com/Open-EO/openeo-python-client/issues/448)) + ## [0.20.0] - 2023-06-30 diff --git a/openeo/rest/datacube.py b/openeo/rest/datacube.py index 412aee984..1c471e68d 100644 --- a/openeo/rest/datacube.py +++ b/openeo/rest/datacube.py @@ -1904,7 +1904,7 @@ def _ensure_save_result( :param options: (optional) desired `save_result` file format parameters :return: """ - # TODO: move to generic data cube parent class (not only for raster cubes, but also vector cubes) + # TODO #401 Unify with VectorCube._ensure_save_result and move to generic data cube parent class (not only for raster cubes, but also vector cubes) result_node = self.result_node() if result_node.process_id == "save_result": # There is already a `save_result` node: diff --git a/openeo/rest/vectorcube.py b/openeo/rest/vectorcube.py index a9db44110..3715b7a31 100644 --- a/openeo/rest/vectorcube.py +++ b/openeo/rest/vectorcube.py @@ -89,23 +89,57 @@ def run_udf( ) @openeo_process - def save_result(self, format: str = "GeoJson", options: dict = None): + def save_result(self, format: Union[str, None] = "GeoJSON", options: dict = None): + # TODO #401: guard against duplicate save_result nodes? return self.process( process_id="save_result", arguments={ "data": self, - "format": format, - "options": options or {} - } + "format": format or "GeoJSON", + "options": options or {}, + }, ) + def _ensure_save_result( + self, + format: Optional[str] = None, + options: Optional[dict] = None, + ) -> "VectorCube": + """ + Make sure there is a (final) `save_result` node in the process graph. + If there is already one: check if it is consistent with the given format/options (if any) + and add a new one otherwise. + + :param format: (optional) desired `save_result` file format + :param options: (optional) desired `save_result` file format parameters + :return: + """ + # TODO #401 Unify with DataCube._ensure_save_result and move to generic data cube parent class + result_node = self.result_node() + if result_node.process_id == "save_result": + # There is already a `save_result` node: + # check if it is consistent with given format/options (if any) + args = result_node.arguments + if format is not None and format.lower() != args["format"].lower(): + raise ValueError(f"Existing `save_result` node with different format {args['format']!r} != {format!r}") + if options is not None and options != args["options"]: + raise ValueError( + f"Existing `save_result` node with different options {args['options']!r} != {options!r}" + ) + cube = self + else: + # No `save_result` node yet: automatically add it. + cube = self.save_result(format=format or "GeoJSON", options=options) + return cube + def execute(self) -> dict: """Executes the process graph of the imagery.""" return self._connection.execute(self.flat_graph()) - def download(self, outputfile: str, format: str = "GeoJSON", options: dict = None): - # TODO: only add save_result, when not already present (see DataCube.download) - cube = self.save_result(format=format, options=options) + def download(self, outputfile: Union[str, pathlib.Path], format: Optional[str] = None, options: dict = None): + # TODO #401 guess format from outputfile? + # TODO #401 make outputfile optional (See DataCube.download) + cube = self._ensure_save_result(format=format, options=options) return self._connection.download(cube.flat_graph(), outputfile) def execute_batch( @@ -160,6 +194,7 @@ def create_job( cube = self if out_format: # add `save_result` node + # TODO #401: avoid duplicate save_result cube = cube.save_result(format=out_format, options=format_options) return self._connection.create_job( process_graph=cube.flat_graph(), diff --git a/tests/rest/datacube/test_vectorcube.py b/tests/rest/datacube/test_vectorcube.py index 0858b832a..7f7f4d944 100644 --- a/tests/rest/datacube/test_vectorcube.py +++ b/tests/rest/datacube/test_vectorcube.py @@ -1,3 +1,4 @@ +from pathlib import Path from typing import List import pytest @@ -89,8 +90,12 @@ def test_raster_to_vector(con100): ("result.nc", "GeoJSON"), # TODO #401 autodetect format from filename ], ) -def test_download_auto_save_result_only_file(vector_cube, download_spy, tmp_path, filename, expected_format): - vector_cube.download(tmp_path / filename) +@pytest.mark.parametrize("path_class", [str, Path]) +def test_download_auto_save_result_only_file( + vector_cube, download_spy, tmp_path, filename, expected_format, path_class +): + output_path = tmp_path / filename + vector_cube.download(path_class(output_path)) assert download_spy.only_request == { "createvectorcube1": {"process_id": "create_vector_cube", "arguments": {}}, @@ -104,7 +109,7 @@ def test_download_auto_save_result_only_file(vector_cube, download_spy, tmp_path "result": True, }, } - assert (tmp_path / filename).read_bytes() == b"Spy data" + assert output_path.read_bytes() == b"Spy data" @pytest.mark.parametrize( @@ -116,12 +121,13 @@ def test_download_auto_save_result_only_file(vector_cube, download_spy, tmp_path # TODO #401 more formats to autodetect? ("result.nc", "NETcDf", "NETcDf"), # TODO #401 normalize format ("result.nc", "inV6l1d!!!", "inV6l1d!!!"), # TODO #401 this should fail - ("result.json", None, None), # TODO #401 autodetect format from filename - ("result.nc", None, None), # TODO #401 autodetect format from filename + ("result.json", None, "GeoJSON"), # TODO #401 autodetect format from filename? + ("result.nc", None, "GeoJSON"), # TODO #401 autodetect format from filename ], ) def test_download_auto_save_result_with_format(vector_cube, download_spy, tmp_path, filename, format, expected_format): - vector_cube.download(tmp_path / filename, format=format) + output_path = tmp_path / filename + vector_cube.download(output_path, format=format) assert download_spy.only_request == { "createvectorcube1": {"process_id": "create_vector_cube", "arguments": {}}, @@ -135,11 +141,12 @@ def test_download_auto_save_result_with_format(vector_cube, download_spy, tmp_pa "result": True, }, } - assert (tmp_path / filename).read_bytes() == b"Spy data" + assert output_path.read_bytes() == b"Spy data" def test_download_auto_save_result_with_options(vector_cube, download_spy, tmp_path): - vector_cube.download(tmp_path / "result.json", format="GeoJSON", options={"precision": 7}) + output_path = tmp_path / "result.json" + vector_cube.download(output_path, format="GeoJSON", options={"precision": 7}) assert download_spy.only_request == { "createvectorcube1": {"process_id": "create_vector_cube", "arguments": {}}, @@ -153,24 +160,28 @@ def test_download_auto_save_result_with_options(vector_cube, download_spy, tmp_p "result": True, }, } - assert (tmp_path / "result.json").read_bytes() == b"Spy data" + assert output_path.read_bytes() == b"Spy data" -def test_save_result_and_download(vector_cube, download_spy, tmp_path): +@pytest.mark.parametrize( + ["format", "expected_format"], + [ + (None, "GeoJSON"), + ("JSON", "JSON"), + ("netCDF", "netCDF"), + ], +) +def test_save_result_and_download(vector_cube, download_spy, tmp_path, format, expected_format): """e.g. https://github.com/Open-EO/openeo-geopyspark-driver/issues/477""" - vector_cube = vector_cube.save_result(format="JSON") - vector_cube.download(tmp_path / "result.json") - # TODO #401 there should only be one save_result node + vector_cube = vector_cube.save_result(format=format) + output_path = tmp_path / "result.json" + vector_cube.download(output_path) assert download_spy.only_request == { "createvectorcube1": {"process_id": "create_vector_cube", "arguments": {}}, "saveresult1": { "process_id": "save_result", - "arguments": {"data": {"from_node": "createvectorcube1"}, "format": "JSON", "options": {}}, - }, - "saveresult2": { - "process_id": "save_result", - "arguments": {"data": {"from_node": "saveresult1"}, "format": "GeoJSON", "options": {}}, + "arguments": {"data": {"from_node": "createvectorcube1"}, "format": expected_format, "options": {}}, "result": True, }, } - assert (tmp_path / "result.json").read_bytes() == b"Spy data" + assert output_path.read_bytes() == b"Spy data"