diff --git a/CHANGELOG.txt b/CHANGELOG.txt index b84caba..9959792 100644 --- a/CHANGELOG.txt +++ b/CHANGELOG.txt @@ -8,6 +8,8 @@ - Enforce required `type` key for `Collection` and `Catalog` models - Add queryables link relation type (#123, @constantinius) - Fix STAC API Query Extension operator names from ne->neq, le->lte, and ge->gte (#120, @philvarner) +- Better **datetime** parsing/validation by using Pydantic native types and remove `ciso8601` requirement (#131, @eseglem) +- move datetime validation in `StacCommonMetadata` model definition (#131, @eseglem) 3.0.0 (2024-01-25) ------------------ diff --git a/pyproject.toml b/pyproject.toml index b975144..35a2b91 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,12 +20,7 @@ keywords=["stac", "pydantic", "validation"] authors=[{ name = "Arturo Engineering", email = "engineering@arturo.ai"}] license= { text = "MIT" } requires-python=">=3.8" -dependencies = [ - "click>=8.1.7", - "pydantic>=2.4.1", - "geojson-pydantic>=1.0.0", - "ciso8601~=2.3", -] +dependencies = ["click>=8.1.7", "pydantic>=2.4.1", "geojson-pydantic>=1.0.0"] dynamic = ["version", "readme"] [project.scripts] @@ -37,7 +32,6 @@ repository ="https://github.com/stac-utils/stac-pydantic.git" [project.optional-dependencies] dev = [ - "arrow>=1.2.3", "pytest>=7.4.2", "pytest-cov>=4.1.0", "pytest-icdiff>=0.8", diff --git a/stac_pydantic/api/search.py b/stac_pydantic/api/search.py index a8ff277..62d1e73 100644 --- a/stac_pydantic/api/search.py +++ b/stac_pydantic/api/search.py @@ -1,9 +1,8 @@ from datetime import datetime as dt from typing import Any, Dict, List, Optional, Tuple, Union, cast -from ciso8601 import parse_rfc3339 -from geojson_pydantic.geometries import GeometryCollection # type: ignore from geojson_pydantic.geometries import ( + GeometryCollection, LineString, MultiLineString, MultiPoint, @@ -11,12 +10,12 @@ Point, Polygon, ) -from pydantic import BaseModel, Field, field_validator, model_validator +from pydantic import BaseModel, Field, TypeAdapter, field_validator, model_validator from stac_pydantic.api.extensions.fields import FieldsExtension from stac_pydantic.api.extensions.query import Operator from stac_pydantic.api.extensions.sort import SortExtension -from stac_pydantic.shared import BBox +from stac_pydantic.shared import BBox, UtcDatetime Intersection = Union[ Point, @@ -28,6 +27,8 @@ GeometryCollection, ] +SearchDatetime = TypeAdapter(Optional[UtcDatetime]) + class Search(BaseModel): """ @@ -43,23 +44,18 @@ class Search(BaseModel): datetime: Optional[str] = None limit: int = 10 + # Private properties to store the parsed datetime values. Not part of the model schema. + _start_date: Optional[dt] = None + _end_date: Optional[dt] = None + + # Properties to return the private values @property def start_date(self) -> Optional[dt]: - values = (self.datetime or "").split("/") - if len(values) == 1: - return None - if values[0] == ".." or values[0] == "": - return None - return parse_rfc3339(values[0]) + return self._start_date @property def end_date(self) -> Optional[dt]: - values = (self.datetime or "").split("/") - if len(values) == 1: - return parse_rfc3339(values[0]) - if values[1] == ".." or values[1] == "": - return None - return parse_rfc3339(values[1]) + return self._end_date # Check https://docs.pydantic.dev/dev-v2/migration/#changes-to-validators for more information. @model_validator(mode="before") @@ -102,32 +98,43 @@ def validate_bbox(cls, v: BBox) -> BBox: @field_validator("datetime") @classmethod - def validate_datetime(cls, v: str) -> str: - if "/" in v: - values = v.split("/") - else: - # Single date is interpreted as end date - values = ["..", v] - - dates: List[dt] = [] - for value in values: - if value == ".." or value == "": - continue - - dates.append(parse_rfc3339(value)) + def validate_datetime(cls, value: str) -> str: + # Split on "/" and replace no value or ".." with None + values = [v if v and v != ".." else None for v in value.split("/")] + # If there are more than 2 dates, it's invalid if len(values) > 2: raise ValueError( - "Invalid datetime range, must match format (begin_date, end_date)" + "Invalid datetime range. Too many values. Must match format: {begin_date}/{end_date}" ) - if not {"..", ""}.intersection(set(values)): - if dates[0] > dates[1]: - raise ValueError( - "Invalid datetime range, must match format (begin_date, end_date)" - ) + # If there is only one date, insert a None for the start date + if len(values) == 1: + values.insert(0, None) + + # Cast because pylance gets confused by the type adapter and annotated type + dates = cast( + List[Optional[dt]], + [ + # Use the type adapter to validate the datetime strings, strict is necessary + # due to pydantic issues #8736 and #8762 + SearchDatetime.validate_strings(v, strict=True) if v else None + for v in values + ], + ) + + # If there is a start and end date, check that the start date is before the end date + if dates[0] and dates[1] and dates[0] > dates[1]: + raise ValueError( + "Invalid datetime range. Begin date after end date. " + "Must match format: {begin_date}/{end_date}" + ) - return v + # Store the parsed dates + cls._start_date = dates[0] + cls._end_date = dates[1] + # Return the original string value + return value @property def spatial_filter(self) -> Optional[Intersection]: diff --git a/stac_pydantic/item.py b/stac_pydantic/item.py index 452b51a..4b2d54a 100644 --- a/stac_pydantic/item.py +++ b/stac_pydantic/item.py @@ -1,24 +1,15 @@ -from datetime import datetime as dt -from typing import Any, Dict, List, Optional, Union +from typing import Any, Dict, List, Optional -from ciso8601 import parse_rfc3339 from geojson_pydantic import Feature -from pydantic import ( - AnyUrl, - ConfigDict, - Field, - field_serializer, - model_serializer, - model_validator, -) +from pydantic import AnyUrl, ConfigDict, Field, model_serializer, model_validator from stac_pydantic.links import Links from stac_pydantic.shared import ( - DATETIME_RFC339, SEMVER_REGEX, Asset, StacBaseModel, StacCommonMetadata, + UtcDatetime, ) from stac_pydantic.version import STAC_VERSION @@ -28,39 +19,12 @@ class ItemProperties(StacCommonMetadata): https://github.com/radiantearth/stac-spec/blob/v1.0.0/item-spec/item-spec.md#properties-object """ - datetime: Union[dt, str] = Field(..., alias="datetime") + # Overide the datetime field to be required + datetime: Optional[UtcDatetime] # Check https://docs.pydantic.dev/dev-v2/migration/#changes-to-config for more information. model_config = ConfigDict(extra="allow") - @model_validator(mode="before") - @classmethod - def validate_datetime(cls, data: Dict[str, Any]) -> Dict[str, Any]: - datetime = data.get("datetime") - start_datetime = data.get("start_datetime") - end_datetime = data.get("end_datetime") - - if not datetime or datetime == "null": - if not start_datetime and not end_datetime: - raise ValueError( - "start_datetime and end_datetime must be specified when datetime is null" - ) - - if isinstance(datetime, str): - data["datetime"] = parse_rfc3339(datetime) - - if isinstance(start_datetime, str): - data["start_datetime"] = parse_rfc3339(start_datetime) - - if isinstance(end_datetime, str): - data["end_datetime"] = parse_rfc3339(end_datetime) - - return data - - @field_serializer("datetime") - def serialize_datetime(self, v: dt, _info: Any) -> str: - return v.strftime(DATETIME_RFC339) - class Item(Feature, StacBaseModel): """ diff --git a/stac_pydantic/shared.py b/stac_pydantic/shared.py index 412761a..126379b 100644 --- a/stac_pydantic/shared.py +++ b/stac_pydantic/shared.py @@ -1,9 +1,17 @@ -from datetime import datetime +from datetime import timezone from enum import Enum, auto from typing import Any, Dict, List, Optional, Tuple, Union from warnings import warn -from pydantic import BaseModel, ConfigDict, Field +from pydantic import ( + AfterValidator, + AwareDatetime, + BaseModel, + ConfigDict, + Field, + model_validator, +) +from typing_extensions import Annotated, Self from stac_pydantic.utils import AutoValueEnum @@ -15,9 +23,14 @@ SEMVER_REGEX = r"^(0|[1-9]\d*)\.(0|[1-9]\d*)\.(0|[1-9]\d*)(?:-((?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\.(?:0|[1-9]\d*|\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\+([0-9a-zA-Z-]+(?:\.[0-9a-zA-Z-]+)*))?$" -# https://tools.ietf.org/html/rfc3339#section-5.6 -# Unused, but leaving it here since it's used by dependencies -DATETIME_RFC339 = "%Y-%m-%dT%H:%M:%SZ" +# Allows for some additional flexibility in the input datetime format. As long as +# the input value has timezone information, it will be converted to UTC timezone. +UtcDatetime = Annotated[ + # Input value must be in a format which has timezone information + AwareDatetime, + # Convert the input value to UTC timezone + AfterValidator(lambda d: d.astimezone(timezone.utc)), +] class MimeTypes(str, Enum): @@ -106,7 +119,7 @@ class Provider(StacBaseModel): https://github.com/radiantearth/stac-spec/blob/v1.0.0/collection-spec/collection-spec.md#provider-object """ - name: str = Field(..., alias="name", min_length=1) + name: str = Field(..., min_length=1) description: Optional[str] = None roles: Optional[List[str]] = None url: Optional[str] = None @@ -114,21 +127,48 @@ class Provider(StacBaseModel): class StacCommonMetadata(StacBaseModel): """ - https://github.com/radiantearth/stac-spec/blob/v1.0.0/item-spec/common-metadata.md#date-and-time-range + https://github.com/radiantearth/stac-spec/blob/v1.0.0/item-spec/common-metadata.md """ - title: Optional[str] = Field(None, alias="title") - description: Optional[str] = Field(None, alias="description") - start_datetime: Optional[datetime] = Field(None, alias="start_datetime") - end_datetime: Optional[datetime] = Field(None, alias="end_datetime") - created: Optional[datetime] = Field(None, alias="created") - updated: Optional[datetime] = Field(None, alias="updated") - platform: Optional[str] = Field(None, alias="platform") - instruments: Optional[List[str]] = Field(None, alias="instruments") - constellation: Optional[str] = Field(None, alias="constellation") - mission: Optional[str] = Field(None, alias="mission") - providers: Optional[List[Provider]] = Field(None, alias="providers") - gsd: Optional[float] = Field(None, alias="gsd", gt=0) + # Basic + title: Optional[str] = None + description: Optional[str] = None + # Date and Time + datetime: Optional[UtcDatetime] = None + created: Optional[UtcDatetime] = None + updated: Optional[UtcDatetime] = None + # Date and Time Range + start_datetime: Optional[UtcDatetime] = None + end_datetime: Optional[UtcDatetime] = None + # Provider + providers: Optional[List[Provider]] = None + # Instrument + platform: Optional[str] = None + instruments: Optional[List[str]] = None + constellation: Optional[str] = None + mission: Optional[str] = None + gsd: Optional[float] = Field(None, gt=0) + + @model_validator(mode="after") + def validate_datetime_or_start_end(self) -> Self: + # When datetime is null, start_datetime and end_datetime must be specified + if not self.datetime and (not self.start_datetime or not self.end_datetime): + raise ValueError( + "start_datetime and end_datetime must be specified when datetime is null" + ) + + return self + + @model_validator(mode="after") + def validate_start_end(self) -> Self: + # Using one of start_datetime or end_datetime requires the use of the other + if (self.start_datetime and not self.end_datetime) or ( + not self.start_datetime and self.end_datetime + ): + raise ValueError( + "use of start_datetime or end_datetime requires the use of the other" + ) + return self class Asset(StacCommonMetadata): @@ -136,11 +176,19 @@ class Asset(StacCommonMetadata): https://github.com/radiantearth/stac-spec/blob/v1.0.0/item-spec/item-spec.md#asset-object """ - href: str = Field(..., alias="href", min_length=1) + href: str = Field(..., min_length=1) type: Optional[str] = None title: Optional[str] = None description: Optional[str] = None roles: Optional[List[str]] = None + model_config = ConfigDict( populate_by_name=True, use_enum_values=True, extra="allow" ) + + @model_validator(mode="after") + def validate_datetime_or_start_end(self) -> Self: + # Overriding the parent method to avoid requiring datetime or start/end_datetime + # Additional fields MAY be added on the Asset object, but are not required. + # https://github.com/radiantearth/stac-spec/blob/v1.0.0/item-spec/item-spec.md#additional-fields-for-assets + return self diff --git a/tests/api/extensions/test_fields.py b/tests/api/extensions/test_fields.py index cbc2e74..664057c 100644 --- a/tests/api/extensions/test_fields.py +++ b/tests/api/extensions/test_fields.py @@ -1,4 +1,4 @@ -from datetime import datetime +from datetime import datetime, timezone from shapely.geometry import Polygon @@ -15,7 +15,7 @@ def test_fields_filter_item(): item = Item( id="test-fields-filter", geometry=Polygon.from_bounds(0, 0, 0, 0), - properties={"datetime": datetime.utcnow(), "foo": "foo", "bar": "bar"}, + properties={"datetime": datetime.now(timezone.utc), "foo": "foo", "bar": "bar"}, assets={}, links=[ {"href": "http://link", "rel": "self"}, diff --git a/tests/api/test_search.py b/tests/api/test_search.py index 7bb9a1b..15fd19c 100644 --- a/tests/api/test_search.py +++ b/tests/api/test_search.py @@ -1,4 +1,3 @@ -import time from datetime import datetime, timedelta, timezone import pytest @@ -6,7 +5,6 @@ from shapely.geometry import Polygon, shape from stac_pydantic.api.search import Search -from stac_pydantic.shared import DATETIME_RFC339 def test_search(): @@ -57,8 +55,8 @@ def test_invalid_spatial_search(): def test_temporal_search_single_tailed(): # Test single tailed - utcnow = datetime.utcnow().replace(microsecond=0, tzinfo=timezone.utc) - utcnow_str = utcnow.strftime(DATETIME_RFC339) + utcnow = datetime.now(timezone.utc) + utcnow_str = utcnow.isoformat() search = Search(collections=["collection1"], datetime=utcnow_str) assert search.start_date is None assert search.end_date == utcnow @@ -66,8 +64,8 @@ def test_temporal_search_single_tailed(): def test_temporal_search_two_tailed(): # Test two tailed - utcnow = datetime.utcnow().replace(microsecond=0, tzinfo=timezone.utc) - utcnow_str = utcnow.strftime(DATETIME_RFC339) + utcnow = datetime.now(timezone.utc) + utcnow_str = utcnow.isoformat() search = Search(collections=["collection1"], datetime=f"{utcnow_str}/{utcnow_str}") assert search.start_date == search.end_date == utcnow @@ -87,29 +85,33 @@ def test_temporal_search_open(): assert search.end_date is None -def test_invalid_temporal_search(): - # Not RFC339 - utcnow = datetime.utcnow().strftime("%Y-%m-%d") +def test_invalid_temporal_search_date(): + # Just a date, no time + utcnow = datetime.now(timezone.utc).strftime("%Y-%m-%d") with pytest.raises(ValidationError): Search(collections=["collection1"], datetime=utcnow) - t1 = datetime.utcnow() + +def test_invalid_temporal_search_too_many(): + # Too many dates + t1 = datetime.now(timezone.utc) t2 = t1 + timedelta(seconds=100) t3 = t2 + timedelta(seconds=100) with pytest.raises(ValidationError): Search( collections=["collection1"], - datetime=f"{t1.strftime(DATETIME_RFC339)}/{t2.strftime(DATETIME_RFC339)}/{t3.strftime(DATETIME_RFC339)}", + datetime=f"{t1.isoformat()}/{t2.isoformat()}/{t3.isoformat()}", ) + +def test_invalid_temporal_search_date_wrong_order(): # End date is before start date - start = datetime.utcnow() - time.sleep(2) - end = datetime.utcnow() + start = datetime.now(timezone.utc) + end = start - timedelta(seconds=100) with pytest.raises(ValidationError): Search( collections=["collection1"], - datetime=f"{end.strftime(DATETIME_RFC339)}/{start.strftime(DATETIME_RFC339)}", + datetime=f"{start.isoformat()}/{end.isoformat()}", ) diff --git a/tests/conftest.py b/tests/conftest.py index 9b3ca20..9152816 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,15 +1,15 @@ import json -import operator import os from copy import deepcopy from typing import List, Optional, Type -import arrow import dictdiffer import pytest import requests from click.testing import CliRunner -from pydantic import BaseModel +from pydantic import BaseModel, TypeAdapter + +from stac_pydantic.shared import UtcDatetime def request(url: str, path: Optional[List[str]] = None): @@ -30,6 +30,10 @@ def request(url: str, path: Optional[List[str]] = None): return json.loads(full_file) +# Use a TypeAdapter to parse any datetime strings in a consistent manner +UtcDatetimeAdapter = TypeAdapter(UtcDatetime) + + def dict_match(d1: dict, d2: dict): test = dictdiffer.diff(d1, d2) for diff in test: @@ -39,16 +43,17 @@ def dict_match(d1: dict, d2: dict): # same for bbox elif "bbox" in diff[1]: assert list(diff[2][0]) == list(diff[2][1]) - # test data is pretty variable with how it represents datetime, RFC3339 is quite flexible - # but stac-pydantic only supports a single datetime format, so just validate to the day. + # RFC3339 is quite flexible and the test data uses various options to represent datetimes. + # The datetime string stac-pydantic outputs may not be identical to the input. So we need + # to compare the values as datetime objects. elif "datetime" in diff[1]: - dates = [] - for date in diff[2]: - if isinstance(date, str): - date = arrow.get(date) - dates.append(date) - dates.sort(reverse=True) - assert operator.sub(*dates).days == 0 + dates = [ + UtcDatetimeAdapter.validate_strings(date) + if isinstance(date, str) + else date + for date in diff[2] + ] + assert dates[0] == dates[1] # any other differences are errors elif "stac_extensions" in diff[1]: url1, url2 = map(str, diff[2]) diff --git a/tests/test_models.py b/tests/test_models.py index c64637e..6472bee 100644 --- a/tests/test_models.py +++ b/tests/test_models.py @@ -8,7 +8,7 @@ from stac_pydantic import Collection, Item, ItemProperties from stac_pydantic.extensions import validate_extensions from stac_pydantic.links import Link, Links -from stac_pydantic.shared import MimeTypes +from stac_pydantic.shared import MimeTypes, StacCommonMetadata from .conftest import dict_match, request @@ -160,6 +160,57 @@ def test_geo_interface() -> None: Item(**test_item) +@pytest.mark.parametrize( + "args", + [ + {"datetime": "2024-01-01T00:00:00Z"}, + { + "datetime": None, + "start_datetime": "2024-01-01T00:00:00Z", + "end_datetime": "2024-01-02T00:00:00Z", + }, + { + "datetime": "2024-01-01T00:00:00Z", + "start_datetime": "2024-01-01T00:00:00Z", + "end_datetime": "2024-01-02T00:00:00Z", + }, + ], +) +def test_stac_common_dates(args) -> None: + StacCommonMetadata(**args) + + +@pytest.mark.parametrize( + "args", + [ + {"datetime": None}, + {"datetime": None, "start_datetime": "2024-01-01T00:00:00Z"}, + {"datetime": None, "end_datetime": "2024-01-01T00:00:00Z"}, + ], +) +def test_stac_common_no_dates(args) -> None: + with pytest.raises( + ValueError, + match="start_datetime and end_datetime must be specified when datetime is null", + ): + StacCommonMetadata(**args) + + +@pytest.mark.parametrize( + "args", + [ + {"datetime": "2024-01-01T00:00:00Z", "start_datetime": "2024-01-01T00:00:00Z"}, + {"datetime": "2024-01-01T00:00:00Z", "end_datetime": "2024-01-01T00:00:00Z"}, + ], +) +def test_stac_common_start_and_end(args) -> None: + with pytest.raises( + ValueError, + match="use of start_datetime or end_datetime requires the use of the other", + ): + StacCommonMetadata(**args) + + def test_declared_model() -> None: class TestProperties(ItemProperties): foo: str