Skip to content

Commit

Permalink
Issue #401 avoid double save_result nodes with VectorCube.download
Browse files Browse the repository at this point in the history
  • Loading branch information
soxofaan committed Jul 14, 2023
1 parent 35bd4eb commit 8cbaf25
Show file tree
Hide file tree
Showing 4 changed files with 76 additions and 27 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed

- Avoid double `save_result` nodes when combining `VectorCube.save_result()` and `.download()`.
([#401](https://github.com/Open-EO/openeo-python-client/issues/401), [#448](https://github.com/Open-EO/openeo-python-client/issues/448))


## [0.20.0] - 2023-06-30

Expand Down
2 changes: 1 addition & 1 deletion openeo/rest/datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -1904,7 +1904,7 @@ def _ensure_save_result(
:param options: (optional) desired `save_result` file format parameters
:return:
"""
# TODO: move to generic data cube parent class (not only for raster cubes, but also vector cubes)
# TODO #401 Unify with VectorCube._ensure_save_result and move to generic data cube parent class (not only for raster cubes, but also vector cubes)
result_node = self.result_node()
if result_node.process_id == "save_result":
# There is already a `save_result` node:
Expand Down
49 changes: 42 additions & 7 deletions openeo/rest/vectorcube.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,23 +89,57 @@ def run_udf(
)

@openeo_process
def save_result(self, format: str = "GeoJson", options: dict = None):
def save_result(self, format: Union[str, None] = "GeoJSON", options: dict = None):
# TODO #401: guard against duplicate save_result nodes?
return self.process(
process_id="save_result",
arguments={
"data": self,
"format": format,
"options": options or {}
}
"format": format or "GeoJSON",
"options": options or {},
},
)

def _ensure_save_result(
self,
format: Optional[str] = None,
options: Optional[dict] = None,
) -> "VectorCube":
"""
Make sure there is a (final) `save_result` node in the process graph.
If there is already one: check if it is consistent with the given format/options (if any)
and add a new one otherwise.
:param format: (optional) desired `save_result` file format
:param options: (optional) desired `save_result` file format parameters
:return:
"""
# TODO #401 Unify with DataCube._ensure_save_result and move to generic data cube parent class
result_node = self.result_node()
if result_node.process_id == "save_result":
# There is already a `save_result` node:
# check if it is consistent with given format/options (if any)
args = result_node.arguments
if format is not None and format.lower() != args["format"].lower():
raise ValueError(f"Existing `save_result` node with different format {args['format']!r} != {format!r}")
if options is not None and options != args["options"]:
raise ValueError(
f"Existing `save_result` node with different options {args['options']!r} != {options!r}"
)
cube = self
else:
# No `save_result` node yet: automatically add it.
cube = self.save_result(format=format or "GeoJSON", options=options)
return cube

def execute(self) -> dict:
"""Executes the process graph of the imagery."""
return self._connection.execute(self.flat_graph())

def download(self, outputfile: str, format: str = "GeoJSON", options: dict = None):
# TODO: only add save_result, when not already present (see DataCube.download)
cube = self.save_result(format=format, options=options)
def download(self, outputfile: Union[str, pathlib.Path], format: Optional[str] = None, options: dict = None):
# TODO #401 guess format from outputfile?
# TODO #401 make outputfile optional (See DataCube.download)
cube = self._ensure_save_result(format=format, options=options)
return self._connection.download(cube.flat_graph(), outputfile)

def execute_batch(
Expand Down Expand Up @@ -160,6 +194,7 @@ def create_job(
cube = self
if out_format:
# add `save_result` node
# TODO #401: avoid duplicate save_result
cube = cube.save_result(format=out_format, options=format_options)
return self._connection.create_job(
process_graph=cube.flat_graph(),
Expand Down
49 changes: 30 additions & 19 deletions tests/rest/datacube/test_vectorcube.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
from pathlib import Path
from typing import List

import pytest
Expand Down Expand Up @@ -89,8 +90,12 @@ def test_raster_to_vector(con100):
("result.nc", "GeoJSON"), # TODO #401 autodetect format from filename
],
)
def test_download_auto_save_result_only_file(vector_cube, download_spy, tmp_path, filename, expected_format):
vector_cube.download(tmp_path / filename)
@pytest.mark.parametrize("path_class", [str, Path])
def test_download_auto_save_result_only_file(
vector_cube, download_spy, tmp_path, filename, expected_format, path_class
):
output_path = tmp_path / filename
vector_cube.download(path_class(output_path))

assert download_spy.only_request == {
"createvectorcube1": {"process_id": "create_vector_cube", "arguments": {}},
Expand All @@ -104,7 +109,7 @@ def test_download_auto_save_result_only_file(vector_cube, download_spy, tmp_path
"result": True,
},
}
assert (tmp_path / filename).read_bytes() == b"Spy data"
assert output_path.read_bytes() == b"Spy data"


@pytest.mark.parametrize(
Expand All @@ -116,12 +121,13 @@ def test_download_auto_save_result_only_file(vector_cube, download_spy, tmp_path
# TODO #401 more formats to autodetect?
("result.nc", "NETcDf", "NETcDf"), # TODO #401 normalize format
("result.nc", "inV6l1d!!!", "inV6l1d!!!"), # TODO #401 this should fail
("result.json", None, None), # TODO #401 autodetect format from filename
("result.nc", None, None), # TODO #401 autodetect format from filename
("result.json", None, "GeoJSON"), # TODO #401 autodetect format from filename?
("result.nc", None, "GeoJSON"), # TODO #401 autodetect format from filename
],
)
def test_download_auto_save_result_with_format(vector_cube, download_spy, tmp_path, filename, format, expected_format):
vector_cube.download(tmp_path / filename, format=format)
output_path = tmp_path / filename
vector_cube.download(output_path, format=format)

assert download_spy.only_request == {
"createvectorcube1": {"process_id": "create_vector_cube", "arguments": {}},
Expand All @@ -135,11 +141,12 @@ def test_download_auto_save_result_with_format(vector_cube, download_spy, tmp_pa
"result": True,
},
}
assert (tmp_path / filename).read_bytes() == b"Spy data"
assert output_path.read_bytes() == b"Spy data"


def test_download_auto_save_result_with_options(vector_cube, download_spy, tmp_path):
vector_cube.download(tmp_path / "result.json", format="GeoJSON", options={"precision": 7})
output_path = tmp_path / "result.json"
vector_cube.download(output_path, format="GeoJSON", options={"precision": 7})

assert download_spy.only_request == {
"createvectorcube1": {"process_id": "create_vector_cube", "arguments": {}},
Expand All @@ -153,24 +160,28 @@ def test_download_auto_save_result_with_options(vector_cube, download_spy, tmp_p
"result": True,
},
}
assert (tmp_path / "result.json").read_bytes() == b"Spy data"
assert output_path.read_bytes() == b"Spy data"


def test_save_result_and_download(vector_cube, download_spy, tmp_path):
@pytest.mark.parametrize(
["format", "expected_format"],
[
(None, "GeoJSON"),
("JSON", "JSON"),
("netCDF", "netCDF"),
],
)
def test_save_result_and_download(vector_cube, download_spy, tmp_path, format, expected_format):
"""e.g. https://github.com/Open-EO/openeo-geopyspark-driver/issues/477"""
vector_cube = vector_cube.save_result(format="JSON")
vector_cube.download(tmp_path / "result.json")
# TODO #401 there should only be one save_result node
vector_cube = vector_cube.save_result(format=format)
output_path = tmp_path / "result.json"
vector_cube.download(output_path)
assert download_spy.only_request == {
"createvectorcube1": {"process_id": "create_vector_cube", "arguments": {}},
"saveresult1": {
"process_id": "save_result",
"arguments": {"data": {"from_node": "createvectorcube1"}, "format": "JSON", "options": {}},
},
"saveresult2": {
"process_id": "save_result",
"arguments": {"data": {"from_node": "saveresult1"}, "format": "GeoJSON", "options": {}},
"arguments": {"data": {"from_node": "createvectorcube1"}, "format": expected_format, "options": {}},
"result": True,
},
}
assert (tmp_path / "result.json").read_bytes() == b"Spy data"
assert output_path.read_bytes() == b"Spy data"

0 comments on commit 8cbaf25

Please sign in to comment.