Skip to content

Commit

Permalink
Implemented a messy OpenTopo search and test for NOAA and NCALM
Browse files Browse the repository at this point in the history
  • Loading branch information
Jack-Hayes committed Dec 16, 2024
1 parent 4bafcab commit 3a96130
Show file tree
Hide file tree
Showing 8 changed files with 243 additions and 3 deletions.
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ dev = [
"pytest >=6",
"pytest-cov >=3",
"sliderule>=4.7.1,<5",
"requests>=2.32.3,<3",
"types-requests>=2.32.0.20241016,<3"
]
docs = [
"folium", # comes w/ geopandas on conda-forge but not pypi
Expand Down Expand Up @@ -214,6 +216,7 @@ odc-stac = "*"
planetary-computer = "*"
pystac-client = "*"
requests = "*"
types-requests = "*"
rioxarray = "*"
# stac-asset = "*" # not on conda-forge
stac-geoparquet = "*"
Expand Down
6 changes: 4 additions & 2 deletions src/coincident/datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

from __future__ import annotations

from coincident.datasets import csda, maxar, nasa, planetary_computer, usgs
from coincident.datasets import csda, maxar, nasa, opentopo, planetary_computer, usgs
from coincident.datasets.general import Dataset

# Convenience mapping of string aliases to supported dataset classes
Expand All @@ -24,9 +24,11 @@
planetary_computer.COP30(),
planetary_computer.WorldCover(),
csda.TDX(),
opentopo.NOAA(),
opentopo.NCALM(),
]

aliases = [x.alias for x in _datasets]
_alias_to_Dataset = dict(zip(aliases, _datasets, strict=False))

__all__ = ["Dataset", "usgs", "maxar", "nasa", "planetary_computer", "csda"]
__all__ = ["Dataset", "usgs", "maxar", "nasa", "planetary_computer", "csda", "opentopo"]
38 changes: 38 additions & 0 deletions src/coincident/datasets/opentopo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
"""
OpenTopography Datasets (NOAA, NCALM)
NOAA: https://www.opentopography.org/
NCALM: https://www.ncalm.org/
"""

from __future__ import annotations

from dataclasses import dataclass

from coincident.datasets.general import Dataset


@dataclass
class NOAA(Dataset):
"""Essential metadata for OpenTopography NOAA LiDAR"""

has_stac_api: bool = False
search: str = "https://portal.opentopography.org/API/otCatalog?productFormat=PointCloud&polygon={coords}&detail=true&outputFormat=json&include_federated=true"
start: str = "1996-10-09"
end: str | None = None
type: str = "lidar"
alias: str = "noaa"
provider: str = "opentopography"


@dataclass
class NCALM(Dataset):
"""Essential metadata for OpenTopography NCALM LiDAR"""

has_stac_api: bool = False
search: str = "https://portal.opentopography.org/API/otCatalog?productFormat=PointCloud&polygon={coords}&detail=true&outputFormat=json&include_federated=false"
start: str = "2003-05-15"
end: str | None = None
type: str = "lidar"
alias: str = "ncalm"
provider: str = "opentopography"
16 changes: 15 additions & 1 deletion src/coincident/search/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from coincident.datasets import _alias_to_Dataset
from coincident.datasets.general import Dataset
from coincident.overlaps import subset_by_minimum_area
from coincident.search import stac, wesm
from coincident.search import opentopo_api, stac, wesm

_pystac_client = _ItemSearch("no_url")

Expand Down Expand Up @@ -147,6 +147,20 @@ def search(
**kwargs,
)

elif dataset.provider == "opentopography":
# TODO: remove ValueErrors and implement a catalog-wide search
# if intersects and/or datetime is None
if intersects is None:
msg_intersects = "For OpenTopography, the intersects parameter is required."
raise ValueError(msg_intersects)
if datetime is None:
msg_datetime = "For OpenTopography, the datetime parameter is required."
raise ValueError(msg_datetime)

gf = opentopo_api.search_opentopo(
intersects=intersects, datetime=datetime, dataset=dataset.alias
)

# Keep track of dataset alias in geodataframe metadata
# NOTE: attrs not always retrained and not saved to file
# https://github.com/geopandas/geopandas/issues/3320
Expand Down
148 changes: 148 additions & 0 deletions src/coincident/search/opentopo_api.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,148 @@
"""
Searching for NCALM and NOAA data with an input search polygon is made easy via the OpenTopography API
https://portal.opentopography.org/apidocs/
There is also a GitHub repo containing flight extent polygons for NOAA
https://github.com/OpenTopography/Data_Catalog_Spatial_Boundaries/tree/main/NOAA_Coastal_Lidar
"""

from __future__ import annotations

from datetime import datetime

import geopandas as gpd
import requests
from shapely.geometry import shape


def search_ncalm_noaa(
aoi: gpd.GeoDataFrame | gpd.GeoSeries,
start: str,
end: str,
dataset: str | None, # Default dataset is noaa for now
) -> gpd.GeoDataFrame:
"""
Perform a search for geospatial data using OpenTopography API.
This function dynamically adjusts the API URL based on the dataset.
Parameters
----------
aoi : gpd.GeoDataFrame | gpd.GeoSeries
A GeoDataFrame or GeoSeries containing a geometry to restrict the search area.
start : str
The start datetime in ISO 8601 format (e.g., "2018-01-01").
end : str
The end datetime in ISO 8601 format (e.g., "2024-12-31").
dataset : str
The dataset type (either "noaa" or "ncalm").
**kwargs : Any
Additional keyword arguments to pass to the OpenTopography API.
Returns
-------
gpd.GeoDataFrame
A GeoDataFrame containing the search results.
Raises
------
ValueError
If no valid `aoi` is provided or the API request fails.
"""
# convex hull for search
# convex_hull works better than simplify for more-complex geometry (ie. Louisiana)
# https://raw.githubusercontent.com/unitedstates/districts/refs/heads/gh-pages/states/LA/shape.geojson
search_poly = aoi.to_crs(4326).union_all()
search_poly_chull = search_poly.convex_hull
coords = ",".join([f"{x},{y}" for x, y in search_poly_chull.exterior.coords])

# alter the API URL based on the dataset
if dataset == "noaa":
url = f"https://portal.opentopography.org/API/otCatalog?productFormat=PointCloud&polygon={coords}&detail=true&outputFormat=json&include_federated=true"
elif dataset == "ncalm":
url = f"https://portal.opentopography.org/API/otCatalog?productFormat=PointCloud&polygon={coords}&detail=true&outputFormat=json&include_federated=false"
else:
msg = f"Unsupported dataset: {dataset}"
raise ValueError(msg)

response = requests.get(url)
if response.status_code != 200:
msg = f"Error querying OpenTopography API: {response.status_code}"
raise ValueError(msg)

catalog = response.json()
if dataset == "noaa":
catalog = [
ds
for ds in catalog["Datasets"]
if "NOAA" in ds["Dataset"]["identifier"]["propertyID"]
]
else:
catalog = catalog["Datasets"]

# temporal filtering
start_dt, end_dt = datetime.fromisoformat(start), datetime.fromisoformat(end)
filtered_catalog = [
entry
for entry in catalog
if any(
start_dt <= datetime.fromisoformat(date) <= end_dt
for date in (entry["Dataset"]["temporalCoverage"].split(" / "))
)
]

# convert the filtered catalog into a GeoDataFrame
gf_lidar = gpd.GeoDataFrame(
[
{
"id": entry["Dataset"]["identifier"]["value"],
"title": entry["Dataset"]["name"],
"start_datetime": datetime.fromisoformat(
entry["Dataset"]["temporalCoverage"].split(" / ")[0]
),
"end_datetime": datetime.fromisoformat(
entry["Dataset"]["temporalCoverage"].split(" / ")[-1]
),
"geometry": shape(
entry["Dataset"]["spatialCoverage"]["geo"]["geojson"]["features"][
0
]["geometry"]
),
}
for entry in filtered_catalog
],
crs="EPSG:4326",
)

# filter the GeoDataFrame to only include datasets that intersect with the search polygon
return gf_lidar[gf_lidar.intersects(search_poly)].reset_index(drop=True)


# TODO: add meaningful error messages when search fails
def search_opentopo(
dataset: str | None,
intersects: gpd.GeoDataFrame | gpd.GeoSeries,
datetime: list[str] | str,
) -> gpd.GeoDataFrame:
"""
Integrate the OpenTopography dataset search into the main search function.
Parameters
----------
dataset : str
Dataset alias ('noaa' or 'ncalm').
intersects : gpd.GeoDataFrame | gpd.GeoSeries
The geometry to restrict the search.
datetime : list[str] | str
The datetime range for the search in ISO 8601 format.
**kwargs : Any
Additional parameters for the search.
Returns
-------
gpd.GeoDataFrame
A GeoDataFrame containing the search results from the OpenTopography API.
"""
# extract start and end dates
start, end = datetime if isinstance(datetime, list) else [datetime, datetime]

# search using the OpenTopography API
return search_ncalm_noaa(intersects, start=start, end=end, dataset=dataset)
8 changes: 8 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,11 @@ def large_aoi(scope="package"):
# 260 vertices, large area 269,590 km^2
aoi_url = "https://raw.githubusercontent.com/unitedstates/districts/refs/heads/gh-pages/states/CO/shape.geojson"
return gpd.read_file(aoi_url)


# TODO: add a small geojson AOI for testing bathy sites
@pytest.fixture
def bathy_aoi(scope="package"):
# 631 vertices, large area ~ 27,000 km^2
aoi_url = "https://raw.githubusercontent.com/unitedstates/districts/refs/heads/gh-pages/states/MA/shape.geojson"
return gpd.read_file(aoi_url)
7 changes: 7 additions & 0 deletions tests/test_datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,3 +18,10 @@ def test_maxar_defaults():
def test_threedep_defaults():
ds = coincident.datasets.usgs.ThreeDEP()
assert ds.alias == "3dep"


def test_opentopo_deafults():
ds_noaa = coincident.datasets.opentopo.NOAA()
assert ds_noaa.alias == "noaa"
ds_ncalm = coincident.datasets.opentopo.NCALM()
assert ds_ncalm.alias == "ncalm"
20 changes: 20 additions & 0 deletions tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,3 +183,23 @@ def test_get_swath_polygon():
def test_swath_polygon_not_found():
with pytest.raises(ValueError, match="No swath polygons found for workunit="):
coincident.search.wesm.get_swath_polygons("AL_SWCentral_1_B22")


# opentopo (NCALM and NOAA)
# =======
# TODO: remove datetime argument constraint for opentopo search
# also, smaller aois
@network
def test_noaa_search(bathy_aoi):
gf = coincident.search.search(
dataset="noaa", intersects=bathy_aoi, datetime=["2019-01-01", "2023-12-31"]
)
assert len(gf) == 2


@network
def test_ncalm_search(large_aoi):
gf = coincident.search.search(
dataset="ncalm", intersects=large_aoi, datetime=["2019-01-01", "2023-12-31"]
)
assert len(gf) == 6

2 comments on commit 3a96130

@Jack-Hayes
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added an OpenTopo dataset (which utilizes the OpenTopo otCatalog API endpoint URL) to incorporate NOAA Coastal LiDAR and NCALM into the search.
Updated test_search and test_dataset accordingly. I'm still not certain on the pyproject.toml cimmits @scottyhq , after incorporating requests and types-requests in [project.optional-dependencies], I'm not sure if including

requests = "*" 
types-requests = "*" 

in [tool.pixi.dependencies] is necessary

Note that searching these datasets via OpenTopo is reliant on a static structure of the catalogs as I hardcode a lot of nested key-value pairs. I left a handful of # TODO:'s for making the network tests more efficient, search function argument structure, error messages, etc

@Jack-Hayes
Copy link
Member Author

@Jack-Hayes Jack-Hayes commented on 3a96130 Dec 16, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Some of these mentioned TODOs, mainly for temporal aspect of search, were addressed here
dd81ea5

Please sign in to comment.