From 342c1973157903b03eb8887954abf049a559b858 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Wed, 20 Sep 2023 16:45:05 -0400 Subject: [PATCH] Add odc-stac as an option in addition to stackstac (#26) --- .github/workflows/tests.yml | 5 ++-- README.md | 11 +++++---- environment.yaml | 3 ++- tests/test_core.py | 22 ++++++++++++++++- xpystac/core.py | 47 +++++++++++++++++++++++++++++++------ 5 files changed, 71 insertions(+), 17 deletions(-) diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index edfbdc7..0d5c78c 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -35,12 +35,11 @@ jobs: uses: actions/checkout@v3.3.0 - name: setup micromamba - uses: mamba-org/provision-with-micromamba@main + uses: mamba-org/setup-micromamba@main with: environment-file: ${{ matrix.environment-file }} micromamba-version: "latest" - extra-specs: python=${{ matrix.python-version }} - channel-priority: "flexible" + create-args: python=${{ matrix.python-version }} - name: install xpystac run: pip install . diff --git a/README.md b/README.md index 2ae85c3..dbb94c1 100644 --- a/README.md +++ b/README.md @@ -13,13 +13,13 @@ import xarray as xr catalog = pystac_client.Client.open( - "https://earth-search.aws.element84.com/v0" + "https://earth-search.aws.element84.com/v1", ) search = catalog.search( intersects=dict(type="Point", coordinates=[-105.78, 35.79]), - collections=['sentinel-s2-l2a-cogs'], - datetime="2020-04-01/2020-05-01", + collections=['sentinel-2-l2a'], + datetime="2022-04-01/2022-05-01", ) xr.open_dataset(search, engine="stac") @@ -70,8 +70,9 @@ pip install git+https://github.com/stac-utils/xpystac ## How it works -When you call ``xarray.open_dataset(object, engine="stac")`` this library maps that open call to the correct library. -Depending on the ``type`` of ``object`` that might be [stackstac](https://github.com/gjoseph92/stackstac) +When you call ``xarray.open_dataset(object, engine="stac")`` this library maps that `open` call to the correct library. +Depending on the ``type`` of ``object`` that might be a stacking library (either +[odc-stac](https://github.com/opendatacube/odc-stac) or [stackstac](https://github.com/gjoseph92/stackstac)) or back to ``xarray.open_dataset`` itself but with the engine and other options pulled from the pystac object. ## Prior Art diff --git a/environment.yaml b/environment.yaml index 2e84b2f..c32d731 100644 --- a/environment.yaml +++ b/environment.yaml @@ -1,4 +1,4 @@ -name: xpystac-dev +name: xpystac-broken channels: - conda-forge - nodefaults @@ -11,6 +11,7 @@ dependencies: - adlfs - aiohttp - fsspec + - odc-stac - planetary-computer - pystac-client - requests diff --git a/tests/test_core.py b/tests/test_core.py index 803ca2d..688c04b 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,4 +1,6 @@ +import dask.array import pytest +import xarray as xr from xpystac.core import to_xarray @@ -9,10 +11,28 @@ def test_to_xarray_with_cog_asset(simple_cog): def test_to_xarray_with_pystac_client_search(simple_search): - ds = to_xarray(simple_search, assets=["blue", "green", "red"]) + ds = to_xarray(simple_search) assert ds +def test_to_xarray_returns_dask_backed_object(simple_search): + ds = to_xarray(simple_search) + assert isinstance(ds.blue.data, dask.array.Array) + assert ds.blue.data.npartitions > 1 + + +def test_to_xarray_with_pystac_client_search_passes_kwargs_through(simple_search): + ds = to_xarray(simple_search, bands=["red", "green", "blue"], chunks={}) + assert list(ds.data_vars) == ["red", "green", "blue"] + assert ds.blue.data.npartitions == 1 + + +@pytest.mark.parametrize("stacking_library", ["odc.stac", "stackstac"]) +def test_to_xarray_with_different_stacking_library(simple_search, stacking_library): + ds = to_xarray(simple_search, stacking_library=stacking_library) + assert isinstance(ds, xr.Dataset) + + def test_to_xarray_with_drop_variables_raises(simple_search): with pytest.raises(KeyError, match="not implemented for pystac items"): to_xarray(simple_search, drop_variables=["blue"]) diff --git a/xpystac/core.py b/xpystac/core.py index a9d90f8..aacdf5c 100644 --- a/xpystac/core.py +++ b/xpystac/core.py @@ -1,5 +1,5 @@ import functools -from typing import List, Mapping, Union +from typing import List, Literal, Mapping, Union import pystac import xarray @@ -8,11 +8,20 @@ @functools.singledispatch -def to_xarray(obj, **kwargs) -> xarray.Dataset: - """Given a pystac object return an xarray dataset""" +def to_xarray( + obj, + stacking_library: Union[Literal["odc.stac", "stackstac"], None] = None, + **kwargs, +) -> xarray.Dataset: + """Given a pystac object return an xarray dataset + + When stacking multiple items, an optional ``stacking_library`` argument + is accepted. It defaults to ``odc.stac`` if available and otherwise ``stackstac``. + Control the behavior by setting ``stacking_library`` + """ if _is_item_search(obj): item_collection = obj.item_collection() - return to_xarray(item_collection, **kwargs) + return to_xarray(item_collection, stacking_library=stacking_library, **kwargs) raise TypeError @@ -21,16 +30,40 @@ def to_xarray(obj, **kwargs) -> xarray.Dataset: def _( obj: Union[pystac.Item, pystac.ItemCollection], drop_variables: Union[str, List[str], None] = None, + stacking_library: Union[Literal["odc.stac", "stackstac"], None] = None, **kwargs, ) -> xarray.Dataset: - stackstac = _import_optional_dependency("stackstac") if drop_variables is not None: raise KeyError("``drop_variables`` not implemented for pystac items") - return stackstac.stack(obj, **kwargs).to_dataset(dim="band", promote_attrs=True) + + if stacking_library is None: + try: + _import_optional_dependency("odc.stac") + stacking_library = "odc.stac" + except ImportError: + _import_optional_dependency("stackstac") + stacking_library = "stackstac" + elif stacking_library not in ["odc.stac", "stackstac"]: + raise ValueError(f"{stacking_library=} is not a valid option") + + if stacking_library == "odc.stac": + odc_stac = _import_optional_dependency("odc.stac") + if isinstance(obj, pystac.Item): + items = [obj] + else: + items = [i for i in obj] + return odc_stac.load(items, **{"chunks": {"x": 1024, "y": 1024}, **kwargs}) + elif stacking_library == "stackstac": + stackstac = _import_optional_dependency("stackstac") + return stackstac.stack(obj, **kwargs).to_dataset(dim="band", promote_attrs=True) @to_xarray.register -def _(obj: pystac.Asset, **kwargs) -> xarray.Dataset: +def _( + obj: pystac.Asset, + stacking_library: Union[Literal["odc.stac", "stackstac"], None] = None, + **kwargs, +) -> xarray.Dataset: default_kwargs: Mapping = {"chunks": {}} open_kwargs = obj.extra_fields.get("xarray:open_kwargs", {})