Skip to content

Commit

Permalink
Merge branch 'Open-EO:master' into add_load_stac
Browse files Browse the repository at this point in the history
  • Loading branch information
clausmichele authored Jul 17, 2023
2 parents ae7f8d9 + 8cbaf25 commit 4b5ee22
Show file tree
Hide file tree
Showing 4 changed files with 204 additions and 8 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Fixed

- Avoid double `save_result` nodes when combining `VectorCube.save_result()` and `.download()`.
([#401](https://github.com/Open-EO/openeo-python-client/issues/401), [#448](https://github.com/Open-EO/openeo-python-client/issues/448))


## [0.20.0] - 2023-06-30

Expand Down
2 changes: 1 addition & 1 deletion openeo/rest/datacube.py
Original file line number Diff line number Diff line change
Expand Up @@ -1904,7 +1904,7 @@ def _ensure_save_result(
:param options: (optional) desired `save_result` file format parameters
:return:
"""
# TODO: move to generic data cube parent class (not only for raster cubes, but also vector cubes)
# TODO #401 Unify with VectorCube._ensure_save_result and move to generic data cube parent class (not only for raster cubes, but also vector cubes)
result_node = self.result_node()
if result_node.process_id == "save_result":
# There is already a `save_result` node:
Expand Down
49 changes: 42 additions & 7 deletions openeo/rest/vectorcube.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,23 +89,57 @@ def run_udf(
)

@openeo_process
def save_result(self, format: str = "GeoJson", options: dict = None):
def save_result(self, format: Union[str, None] = "GeoJSON", options: dict = None):
# TODO #401: guard against duplicate save_result nodes?
return self.process(
process_id="save_result",
arguments={
"data": self,
"format": format,
"options": options or {}
}
"format": format or "GeoJSON",
"options": options or {},
},
)

def _ensure_save_result(
self,
format: Optional[str] = None,
options: Optional[dict] = None,
) -> "VectorCube":
"""
Make sure there is a (final) `save_result` node in the process graph.
If there is already one: check if it is consistent with the given format/options (if any)
and add a new one otherwise.
:param format: (optional) desired `save_result` file format
:param options: (optional) desired `save_result` file format parameters
:return:
"""
# TODO #401 Unify with DataCube._ensure_save_result and move to generic data cube parent class
result_node = self.result_node()
if result_node.process_id == "save_result":
# There is already a `save_result` node:
# check if it is consistent with given format/options (if any)
args = result_node.arguments
if format is not None and format.lower() != args["format"].lower():
raise ValueError(f"Existing `save_result` node with different format {args['format']!r} != {format!r}")
if options is not None and options != args["options"]:
raise ValueError(
f"Existing `save_result` node with different options {args['options']!r} != {options!r}"
)
cube = self
else:
# No `save_result` node yet: automatically add it.
cube = self.save_result(format=format or "GeoJSON", options=options)
return cube

def execute(self) -> dict:
"""Executes the process graph of the imagery."""
return self._connection.execute(self.flat_graph())

def download(self, outputfile: str, format: str = "GeoJSON", options: dict = None):
# TODO: only add save_result, when not already present (see DataCube.download)
cube = self.save_result(format=format, options=options)
def download(self, outputfile: Union[str, pathlib.Path], format: Optional[str] = None, options: dict = None):
# TODO #401 guess format from outputfile?
# TODO #401 make outputfile optional (See DataCube.download)
cube = self._ensure_save_result(format=format, options=options)
return self._connection.download(cube.flat_graph(), outputfile)

def execute_batch(
Expand Down Expand Up @@ -160,6 +194,7 @@ def create_job(
cube = self
if out_format:
# add `save_result` node
# TODO #401: avoid duplicate save_result
cube = cube.save_result(format=out_format, options=format_options)
return self._connection.create_job(
process_graph=cube.flat_graph(),
Expand Down
158 changes: 158 additions & 0 deletions tests/rest/datacube/test_vectorcube.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,56 @@
from pathlib import Path
from typing import List

import pytest

from openeo.internal.graph_building import PGNode
from openeo.rest.vectorcube import VectorCube


@pytest.fixture
def vector_cube(con100) -> VectorCube:
pgnode = PGNode(process_id="create_vector_cube")
return VectorCube(graph=pgnode, connection=con100)


class DownloadSpy:
"""
Test helper to track download requests and optionally override next response to return.
"""

__slots__ = ["requests", "next_response"]

def __init__(self):
self.requests: List[dict] = []
self.next_response: bytes = b"Spy data"

@property
def only_request(self) -> dict:
"""Get progress graph of only request done"""
assert len(self.requests) == 1
return self.requests[-1]

@property
def last_request(self) -> dict:
"""Get last progress graph"""
assert len(self.requests) > 0
return self.requests[-1]


@pytest.fixture
def download_spy(requests_mock, con100) -> DownloadSpy:
"""Test fixture to spy on (and mock) `POST /result` (download) requests."""
spy = DownloadSpy()

def post_result(request, context):
pg = request.json()["process"]["process_graph"]
spy.requests.append(pg)
return spy.next_response

requests_mock.post(con100.build_url("/result"), content=post_result)
yield spy


def test_raster_to_vector(con100):
img = con100.load_collection("S2")
vector_cube = img.raster_to_vector()
Expand Down Expand Up @@ -27,3 +80,108 @@ def test_raster_to_vector(con100):
'process_id': 'run_udf',
'result': True}
}


@pytest.mark.parametrize(
["filename", "expected_format"],
[
("result.json", "GeoJSON"), # TODO #401 possible to detect "GeoJSON from ".json" extension?
("result.geojson", "GeoJSON"),
("result.nc", "GeoJSON"), # TODO #401 autodetect format from filename
],
)
@pytest.mark.parametrize("path_class", [str, Path])
def test_download_auto_save_result_only_file(
vector_cube, download_spy, tmp_path, filename, expected_format, path_class
):
output_path = tmp_path / filename
vector_cube.download(path_class(output_path))

assert download_spy.only_request == {
"createvectorcube1": {"process_id": "create_vector_cube", "arguments": {}},
"saveresult1": {
"process_id": "save_result",
"arguments": {
"data": {"from_node": "createvectorcube1"},
"format": expected_format,
"options": {},
},
"result": True,
},
}
assert output_path.read_bytes() == b"Spy data"


@pytest.mark.parametrize(
["filename", "format", "expected_format"],
[
("result.json", "JSON", "JSON"),
("result.geojson", "GeoJSON", "GeoJSON"),
("result.nc", "netCDF", "netCDF"),
# TODO #401 more formats to autodetect?
("result.nc", "NETcDf", "NETcDf"), # TODO #401 normalize format
("result.nc", "inV6l1d!!!", "inV6l1d!!!"), # TODO #401 this should fail
("result.json", None, "GeoJSON"), # TODO #401 autodetect format from filename?
("result.nc", None, "GeoJSON"), # TODO #401 autodetect format from filename
],
)
def test_download_auto_save_result_with_format(vector_cube, download_spy, tmp_path, filename, format, expected_format):
output_path = tmp_path / filename
vector_cube.download(output_path, format=format)

assert download_spy.only_request == {
"createvectorcube1": {"process_id": "create_vector_cube", "arguments": {}},
"saveresult1": {
"process_id": "save_result",
"arguments": {
"data": {"from_node": "createvectorcube1"},
"format": expected_format,
"options": {},
},
"result": True,
},
}
assert output_path.read_bytes() == b"Spy data"


def test_download_auto_save_result_with_options(vector_cube, download_spy, tmp_path):
output_path = tmp_path / "result.json"
vector_cube.download(output_path, format="GeoJSON", options={"precision": 7})

assert download_spy.only_request == {
"createvectorcube1": {"process_id": "create_vector_cube", "arguments": {}},
"saveresult1": {
"process_id": "save_result",
"arguments": {
"data": {"from_node": "createvectorcube1"},
"format": "GeoJSON",
"options": {"precision": 7},
},
"result": True,
},
}
assert output_path.read_bytes() == b"Spy data"


@pytest.mark.parametrize(
["format", "expected_format"],
[
(None, "GeoJSON"),
("JSON", "JSON"),
("netCDF", "netCDF"),
],
)
def test_save_result_and_download(vector_cube, download_spy, tmp_path, format, expected_format):
"""e.g. https://github.com/Open-EO/openeo-geopyspark-driver/issues/477"""
vector_cube = vector_cube.save_result(format=format)
output_path = tmp_path / "result.json"
vector_cube.download(output_path)
assert download_spy.only_request == {
"createvectorcube1": {"process_id": "create_vector_cube", "arguments": {}},
"saveresult1": {
"process_id": "save_result",
"arguments": {"data": {"from_node": "createvectorcube1"}, "format": expected_format, "options": {}},
"result": True,
},
}
assert output_path.read_bytes() == b"Spy data"

0 comments on commit 4b5ee22

Please sign in to comment.