Skip to content

Commit

Permalink
Add odc-stac as an option in addition to stackstac (#26)
Browse files Browse the repository at this point in the history
  • Loading branch information
jsignell authored Sep 20, 2023
1 parent a183525 commit 342c197
Show file tree
Hide file tree
Showing 5 changed files with 71 additions and 17 deletions.
5 changes: 2 additions & 3 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,11 @@ jobs:
uses: actions/[email protected]

- name: setup micromamba
uses: mamba-org/provision-with-micromamba@main
uses: mamba-org/setup-micromamba@main
with:
environment-file: ${{ matrix.environment-file }}
micromamba-version: "latest"
extra-specs: python=${{ matrix.python-version }}
channel-priority: "flexible"
create-args: python=${{ matrix.python-version }}

- name: install xpystac
run: pip install .
Expand Down
11 changes: 6 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,13 @@ import xarray as xr


catalog = pystac_client.Client.open(
"https://earth-search.aws.element84.com/v0"
"https://earth-search.aws.element84.com/v1",
)

search = catalog.search(
intersects=dict(type="Point", coordinates=[-105.78, 35.79]),
collections=['sentinel-s2-l2a-cogs'],
datetime="2020-04-01/2020-05-01",
collections=['sentinel-2-l2a'],
datetime="2022-04-01/2022-05-01",
)

xr.open_dataset(search, engine="stac")
Expand Down Expand Up @@ -70,8 +70,9 @@ pip install git+https://github.com/stac-utils/xpystac

## How it works

When you call ``xarray.open_dataset(object, engine="stac")`` this library maps that open call to the correct library.
Depending on the ``type`` of ``object`` that might be [stackstac](https://github.com/gjoseph92/stackstac)
When you call ``xarray.open_dataset(object, engine="stac")`` this library maps that `open` call to the correct library.
Depending on the ``type`` of ``object`` that might be a stacking library (either
[odc-stac](https://github.com/opendatacube/odc-stac) or [stackstac](https://github.com/gjoseph92/stackstac))
or back to ``xarray.open_dataset`` itself but with the engine and other options pulled from the pystac object.

## Prior Art
Expand Down
3 changes: 2 additions & 1 deletion environment.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: xpystac-dev
name: xpystac-broken
channels:
- conda-forge
- nodefaults
Expand All @@ -11,6 +11,7 @@ dependencies:
- adlfs
- aiohttp
- fsspec
- odc-stac
- planetary-computer
- pystac-client
- requests
Expand Down
22 changes: 21 additions & 1 deletion tests/test_core.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
import dask.array
import pytest
import xarray as xr

from xpystac.core import to_xarray

Expand All @@ -9,10 +11,28 @@ def test_to_xarray_with_cog_asset(simple_cog):


def test_to_xarray_with_pystac_client_search(simple_search):
ds = to_xarray(simple_search, assets=["blue", "green", "red"])
ds = to_xarray(simple_search)
assert ds


def test_to_xarray_returns_dask_backed_object(simple_search):
ds = to_xarray(simple_search)
assert isinstance(ds.blue.data, dask.array.Array)
assert ds.blue.data.npartitions > 1


def test_to_xarray_with_pystac_client_search_passes_kwargs_through(simple_search):
ds = to_xarray(simple_search, bands=["red", "green", "blue"], chunks={})
assert list(ds.data_vars) == ["red", "green", "blue"]
assert ds.blue.data.npartitions == 1


@pytest.mark.parametrize("stacking_library", ["odc.stac", "stackstac"])
def test_to_xarray_with_different_stacking_library(simple_search, stacking_library):
ds = to_xarray(simple_search, stacking_library=stacking_library)
assert isinstance(ds, xr.Dataset)


def test_to_xarray_with_drop_variables_raises(simple_search):
with pytest.raises(KeyError, match="not implemented for pystac items"):
to_xarray(simple_search, drop_variables=["blue"])
Expand Down
47 changes: 40 additions & 7 deletions xpystac/core.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import functools
from typing import List, Mapping, Union
from typing import List, Literal, Mapping, Union

import pystac
import xarray
Expand All @@ -8,11 +8,20 @@


@functools.singledispatch
def to_xarray(obj, **kwargs) -> xarray.Dataset:
"""Given a pystac object return an xarray dataset"""
def to_xarray(
obj,
stacking_library: Union[Literal["odc.stac", "stackstac"], None] = None,
**kwargs,
) -> xarray.Dataset:
"""Given a pystac object return an xarray dataset
When stacking multiple items, an optional ``stacking_library`` argument
is accepted. It defaults to ``odc.stac`` if available and otherwise ``stackstac``.
Control the behavior by setting ``stacking_library``
"""
if _is_item_search(obj):
item_collection = obj.item_collection()
return to_xarray(item_collection, **kwargs)
return to_xarray(item_collection, stacking_library=stacking_library, **kwargs)
raise TypeError


Expand All @@ -21,16 +30,40 @@ def to_xarray(obj, **kwargs) -> xarray.Dataset:
def _(
obj: Union[pystac.Item, pystac.ItemCollection],
drop_variables: Union[str, List[str], None] = None,
stacking_library: Union[Literal["odc.stac", "stackstac"], None] = None,
**kwargs,
) -> xarray.Dataset:
stackstac = _import_optional_dependency("stackstac")
if drop_variables is not None:
raise KeyError("``drop_variables`` not implemented for pystac items")
return stackstac.stack(obj, **kwargs).to_dataset(dim="band", promote_attrs=True)

if stacking_library is None:
try:
_import_optional_dependency("odc.stac")
stacking_library = "odc.stac"
except ImportError:
_import_optional_dependency("stackstac")
stacking_library = "stackstac"
elif stacking_library not in ["odc.stac", "stackstac"]:
raise ValueError(f"{stacking_library=} is not a valid option")

if stacking_library == "odc.stac":
odc_stac = _import_optional_dependency("odc.stac")
if isinstance(obj, pystac.Item):
items = [obj]
else:
items = [i for i in obj]
return odc_stac.load(items, **{"chunks": {"x": 1024, "y": 1024}, **kwargs})
elif stacking_library == "stackstac":
stackstac = _import_optional_dependency("stackstac")
return stackstac.stack(obj, **kwargs).to_dataset(dim="band", promote_attrs=True)


@to_xarray.register
def _(obj: pystac.Asset, **kwargs) -> xarray.Dataset:
def _(
obj: pystac.Asset,
stacking_library: Union[Literal["odc.stac", "stackstac"], None] = None,
**kwargs,
) -> xarray.Dataset:
default_kwargs: Mapping = {"chunks": {}}
open_kwargs = obj.extra_fields.get("xarray:open_kwargs", {})

Expand Down

0 comments on commit 342c197

Please sign in to comment.