From ab2ad969255adbab3f5509c82976555857509ed9 Mon Sep 17 00:00:00 2001 From: Stefaan Lippens Date: Mon, 7 Aug 2023 13:52:50 +0200 Subject: [PATCH] Issue #114/#211 initial support for `load_url` (vector cubes) --- openeo_driver/ProcessGraphDeserializer.py | 74 ++++++++++-------- openeo_driver/_version.py | 2 +- openeo_driver/datacube.py | 6 ++ openeo_driver/processes.py | 22 ++++++ openeo_driver/testing.py | 11 ++- tests/test_processes.py | 31 +++++++- tests/test_views_execute.py | 94 +++++++++++++++++++++++ 7 files changed, 204 insertions(+), 36 deletions(-) diff --git a/openeo_driver/ProcessGraphDeserializer.py b/openeo_driver/ProcessGraphDeserializer.py index 6c978cdb..0b34f46a 100644 --- a/openeo_driver/ProcessGraphDeserializer.py +++ b/openeo_driver/ProcessGraphDeserializer.py @@ -6,29 +6,29 @@ import datetime import logging import math +import re import tempfile import time import warnings from pathlib import Path -from typing import Dict, Callable, List, Union, Tuple, Any, Iterable +from typing import Any, Callable, Dict, Iterable, List, Tuple, Union -import pandas as pd import geopandas as gpd import numpy as np +import openeo.udf import openeo_processes +import pandas as pd import pyproj import requests -from dateutil.relativedelta import relativedelta -from requests.structures import CaseInsensitiveDict import shapely.geometry -from shapely.geometry import shape, GeometryCollection, shape, mapping, MultiPolygon import shapely.ops - -import openeo.udf +from dateutil.relativedelta import relativedelta from openeo.capabilities import ComparableVersion -from openeo.internal.process_graph_visitor import ProcessGraphVisitor, ProcessGraphVisitException +from openeo.internal.process_graph_visitor import ProcessGraphVisitException, ProcessGraphVisitor from openeo.metadata import CollectionMetadata, MetadataException -from openeo.util import load_json, rfc3339, deep_get, str_truncate +from openeo.util import deep_get, load_json, rfc3339, str_truncate +from shapely.geometry import GeometryCollection, MultiPolygon, mapping, shape + from openeo_driver import dry_run from openeo_driver.backend import ( UserDefinedProcessMetadata, @@ -53,13 +53,9 @@ to_save_result, AggregatePolygonSpatialResult, MlModelResult from openeo_driver.specs import SPECS_ROOT, read_spec from openeo_driver.util.date_math import month_shift -from openeo_driver.util.geometry import ( - geojson_to_geometry, - geojson_to_multipolygon, - spatial_extent_union, -) +from openeo_driver.util.geometry import geojson_to_geometry, geojson_to_multipolygon, spatial_extent_union from openeo_driver.util.utm import auto_utm_epsg_for_geometry -from openeo_driver.utils import smart_bool, EvalEnv +from openeo_driver.utils import EvalEnv, smart_bool _log = logging.getLogger(__name__) @@ -1540,26 +1536,24 @@ def read_vector(args: Dict, env: EvalEnv) -> DelayedVector: @process_registry_100.add_function(spec=read_spec("openeo-processes/1.x/proposals/load_uploaded_files.json")) -def load_uploaded_files(args: dict, env: EvalEnv) -> Union[DriverVectorCube,DriverDataCube]: +def load_uploaded_files(args: ProcessArgs, env: EvalEnv) -> Union[DriverVectorCube, DriverDataCube]: # TODO #114 EP-3981 process name is still under discussion https://github.com/Open-EO/openeo-processes/issues/322 - paths = extract_arg(args, 'paths', process_id="load_uploaded_files") - format = extract_arg(args, 'format', process_id="load_uploaded_files") - options = args.get("options", {}) - - input_formats = CaseInsensitiveDict(env.backend_implementation.file_formats()["input"]) - if format not in input_formats: - raise FileTypeInvalidException(type=format, types=", ".join(input_formats.keys())) + paths = args.get_required("paths", expected_type=list) + format = args.get_required( + "format", + expected_type=str, + validator=ProcessArgs.validator_file_format(formats=env.backend_implementation.file_formats()["input"]), + ) + options = args.get_optional("options", default={}) - if format.lower() in {"geojson", "esri shapefile", "gpkg", "parquet"}: + if DriverVectorCube.from_fiona_supports(format): return DriverVectorCube.from_fiona(paths, driver=format, options=options) elif format.lower() in {"GTiff"}: - if(len(paths)!=1): - raise FeatureUnsupportedException(f"load_uploaded_files only supports a single raster of format {format!r}, you provided {paths}") - kwargs = dict( - glob_pattern=paths[0], - format=format, - options=options - ) + if len(paths) != 1: + raise FeatureUnsupportedException( + f"load_uploaded_files only supports a single raster of format {format!r}, you provided {paths}" + ) + kwargs = dict(glob_pattern=paths[0], format=format, options=options) dry_run_tracer: DryRunDataTracer = env.get(ENV_DRY_RUN_TRACER) if dry_run_tracer: return dry_run_tracer.load_disk_data(**kwargs) @@ -1604,6 +1598,24 @@ def load_geojson(args: ProcessArgs, env: EvalEnv) -> DriverVectorCube: return vector_cube +@process_registry_100.add_function(spec=read_spec("openeo-processes/2.x/proposals/load_url.json")) +def load_url(args: ProcessArgs, env: EvalEnv) -> DriverVectorCube: + # TODO: Follow up possible `load_url` changes https://github.com/Open-EO/openeo-processes/issues/450 ? + url = args.get_required("url", expected_type=str, validator=re.compile("^https?://").match) + format = args.get_required( + "format", + expected_type=str, + validator=ProcessArgs.validator_file_format(formats=env.backend_implementation.file_formats()["input"]), + ) + options = args.get_optional("options", default={}) + + if DriverVectorCube.from_fiona_supports(format): + # TODO: for GeoJSON (and related) support `properties` option like load_geojson? https://github.com/Open-EO/openeo-processes/issues/450 + return DriverVectorCube.from_fiona(paths=[url], driver=format, options=options) + else: + raise FeatureUnsupportedException(f"Loading format {format!r} is not supported") + + @non_standard_process( ProcessSpec("get_geometries", description="Reads vector data from a file or a URL or get geometries from a FeatureCollection") .param('filename', description="filename or http url of a vector file", schema={"type": "string"}, required=False) diff --git a/openeo_driver/_version.py b/openeo_driver/_version.py index 1db47040..95169f2b 100644 --- a/openeo_driver/_version.py +++ b/openeo_driver/_version.py @@ -1 +1 @@ -__version__ = "0.61.0a1" +__version__ = "0.61.1a1" diff --git a/openeo_driver/datacube.py b/openeo_driver/datacube.py index dfc48804..f90af6fa 100644 --- a/openeo_driver/datacube.py +++ b/openeo_driver/datacube.py @@ -331,6 +331,12 @@ def from_geodataframe( ) return cls(geometries=data, cube=cube) + @classmethod + def from_fiona_supports(cls, format: str) -> bool: + """Does `from_fiona` supports given format?""" + # TODO: also cover input format options? + return format.lower() in {"geojson", "esri shapefile", "gpkg", "parquet"} + @classmethod def from_fiona( cls, diff --git a/openeo_driver/processes.py b/openeo_driver/processes.py index e77d8211..094d4773 100644 --- a/openeo_driver/processes.py +++ b/openeo_driver/processes.py @@ -6,6 +6,8 @@ from typing import Any, Callable, Collection, Dict, List, Optional, Tuple, Union from openeo_driver.errors import ( + FileTypeInvalidException, + OpenEOApiException, ProcessParameterInvalidException, ProcessParameterRequiredException, ProcessUnsupportedException, @@ -325,6 +327,9 @@ def _check_value( try: valid = validator(value) reason = "Failed validation." + except OpenEOApiException: + # Preserve original OpenEOApiException + raise except Exception as e: valid = False reason = str(e) @@ -441,6 +446,23 @@ def validator(value): return validator + @staticmethod + def validator_file_format(formats: Union[List[str], Dict[str, dict]]): + """ + Build validator for input/output format (case-insensitive check) + + :param formats list of valid formats, or dictionary with formats as keys + """ + formats = list(formats) + options = set(f.lower() for f in formats) + + def validator(value: str): + if value.lower() not in options: + raise FileTypeInvalidException(type=value, types=", ".join(formats)) + return True + + return validator + @staticmethod def validator_geojson_dict( allowed_types: Optional[Collection[str]] = None, diff --git a/openeo_driver/testing.py b/openeo_driver/testing.py index 24d18d66..0e8d2d82 100644 --- a/openeo_driver/testing.py +++ b/openeo_driver/testing.py @@ -614,10 +614,19 @@ def test_my_function(caplog, monkeypatch) @contextlib.contextmanager def ephemeral_fileserver(path: Union[Path, str], host: str = "localhost", port: int = 0) -> str: """ - Context manager to run a short-lived (static) file HTTP server, serving some local test data. + Context manager to run a short-lived (static) file HTTP server, serving files from a given local test data folder. + This is an alternative to traditional mocking of HTTP requests (e.g. with requests_mock) for situations where that doesn't work (requests are done in a subprocess or at the level of a C-extension/library). + Usage example: + + >>> # create temp file with `tmp_path` fixture + >>> (tmp_path / "hello.txt").write_text("Hello world") + >>> with ephemeral_fileserver(tmp_path) as fileserver_root: + ... res = subprocess.check_output(["curl", f"{fileserver_root}/hello.txt"]) + >>> assert res.strip() == "Hello world" + :param path: root path of the local files to serve :return: root URL of the ephemeral file server (e.g. "http://localhost:21342") """ diff --git a/tests/test_processes.py b/tests/test_processes.py index 9ed730af..548242e4 100644 --- a/tests/test_processes.py +++ b/tests/test_processes.py @@ -4,11 +4,12 @@ from openeo_driver.datacube import DriverDataCube from openeo_driver.errors import ( - ProcessUnsupportedException, - ProcessParameterRequiredException, + FileTypeInvalidException, ProcessParameterInvalidException, + ProcessParameterRequiredException, + ProcessUnsupportedException, ) -from openeo_driver.processes import ProcessSpec, ProcessRegistry, ProcessRegistryException, ProcessArgs +from openeo_driver.processes import ProcessArgs, ProcessRegistry, ProcessRegistryException, ProcessSpec def test_process_spec_basic_040(): @@ -635,3 +636,27 @@ def test_validator_geojson_dict(self): ), ): _ = args.get_required("geometry", validator=validator) + + @pytest.mark.parametrize( + ["formats"], + [ + (["GeoJSON", "CSV"],), + ({"GeoJSON": {}, "CSV": {}},), + ], + ) + def test_validator_file_format(self, formats): + args = ProcessArgs( + {"format1": "GeoJSON", "format2": "geojson", "format3": "TooExotic"}, + process_id="wibble", + ) + + validator = ProcessArgs.validator_file_format(formats=formats) + + assert args.get_required("format1", validator=validator) == "GeoJSON" + assert args.get_required("format2", validator=validator) == "geojson" + + with pytest.raises( + FileTypeInvalidException, + match=re.escape("File format TooExotic not allowed. Allowed file formats: GeoJSON, CSV"), + ): + _ = args.get_required("format3", validator=validator) diff --git a/tests/test_views_execute.py b/tests/test_views_execute.py index d1596a1c..ca081617 100644 --- a/tests/test_views_execute.py +++ b/tests/test_views_execute.py @@ -1965,11 +1965,105 @@ def test_to_vector_cube(self, api100, geojson, expected): ], ) def test_load_geojson(self, api100, geojson, expected): + # TODO: cover `properties` parameter res = api100.check_result( {"vc": {"process_id": "load_geojson", "arguments": {"data": geojson}, "result": True}} ) assert res.json == DictSubSet({"type": "FeatureCollection", "features": expected}) + @pytest.mark.parametrize( + ["geometry", "expected"], + [ + ( + {"type": "Point", "coordinates": (1, 2)}, + [ + { + "type": "Feature", + "geometry": {"type": "Point", "coordinates": [1, 2]}, + "properties": {}, + }, + ], + ), + ( + {"type": "Polygon", "coordinates": [[(1, 1), (3, 1), (2, 3), (1, 1)]]}, + [ + { + "type": "Feature", + "geometry": {"type": "Polygon", "coordinates": [[[1, 1], [3, 1], [2, 3], [1, 1]]]}, + "properties": {}, + }, + ], + ), + ( + {"type": "MultiPolygon", "coordinates": [[[(1, 1), (3, 1), (2, 3), (1, 1)]]]}, + [ + { + "type": "Feature", + "geometry": {"type": "MultiPolygon", "coordinates": [[[[1, 1], [3, 1], [2, 3], [1, 1]]]]}, + "properties": {}, + }, + ], + ), + ( + { + "type": "Feature", + "geometry": {"type": "MultiPolygon", "coordinates": [[[(1, 1), (3, 1), (2, 3), (1, 1)]]]}, + "properties": {"id": "12_3"}, + }, + [ + { + "type": "Feature", + "geometry": {"type": "MultiPolygon", "coordinates": [[[[1, 1], [3, 1], [2, 3], [1, 1]]]]}, + "properties": {"id": "12_3"}, + }, + ], + ), + ( + { + "type": "FeatureCollection", + "features": [ + { + "type": "Feature", + "geometry": {"type": "Polygon", "coordinates": [[(1, 1), (3, 1), (2, 3), (1, 1)]]}, + "properties": {"id": 1}, + }, + { + "type": "Feature", + "geometry": {"type": "MultiPolygon", "coordinates": [[[(1, 1), (3, 1), (2, 3), (1, 1)]]]}, + "properties": {"id": 2}, + }, + ], + }, + [ + { + "type": "Feature", + "geometry": {"type": "Polygon", "coordinates": [[[1, 1], [3, 1], [2, 3], [1, 1]]]}, + "properties": {"id": 1}, + }, + { + "type": "Feature", + "geometry": {"type": "MultiPolygon", "coordinates": [[[[1, 1], [3, 1], [2, 3], [1, 1]]]]}, + "properties": {"id": 2}, + }, + ], + ), + ], + ) + def test_load_url_geojson(self, api100, geometry, expected, tmp_path): + (tmp_path / "geometry.json").write_text(json.dumps(geometry)) + with ephemeral_fileserver(tmp_path) as fileserver_root: + url = f"{fileserver_root}/geometry.json" + res = api100.check_result( + { + "load": { + "process_id": "load_url", + "arguments": {"url": url, "format": "GeoJSON"}, + "result": True, + } + } + ) + assert res.json == DictSubSet({"type": "FeatureCollection", "features": expected}) + def test_no_nested_JSONResult(api): api.set_auth_bearer_token()