From cf75217d42a67ebe7db19bb7bc9a4a0b9377b21c Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Fri, 10 Nov 2023 00:26:30 -0500 Subject: [PATCH 01/15] add shared stac-populator CLI for all populator impl + add directory crawler populator --- STACpopulator/cli.py | 104 ++++++++++++++++++ .../implementations/CMIP6_UofT/add_CMIP6.py | 45 ++++---- .../DirectoryLoader/__init__.py | 0 .../DirectoryLoader/crawl_directory.py | 62 +++++++++++ STACpopulator/input.py | 63 ++++++++++- STACpopulator/stac_utils.py | 4 +- pyproject.toml | 3 + 7 files changed, 256 insertions(+), 25 deletions(-) create mode 100644 STACpopulator/cli.py create mode 100644 STACpopulator/implementations/DirectoryLoader/__init__.py create mode 100644 STACpopulator/implementations/DirectoryLoader/crawl_directory.py diff --git a/STACpopulator/cli.py b/STACpopulator/cli.py new file mode 100644 index 0000000..10f35c8 --- /dev/null +++ b/STACpopulator/cli.py @@ -0,0 +1,104 @@ +import argparse +import glob +import importlib +import os +import sys +from typing import Callable, Optional + +from STACpopulator import __version__ + +POPULATORS = {} + + +def make_main_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(prog="stac-populator", description="STACpopulator operations.") + parser.add_argument("--version", "-V", action="version", version=f"%(prog)s {__version__}", + help="prints the version of the library and exits") + commands = parser.add_subparsers(title="command", dest="command", description="STAC populator command to execute.") + + run_cmd_parser = make_run_command_parser() + commands.add_parser( + "run", + prog=f"{parser.prog} {run_cmd_parser.prog}", parents=[run_cmd_parser], + formatter_class=run_cmd_parser.formatter_class, usage=run_cmd_parser.usage, + add_help=False, help=run_cmd_parser.description, description=run_cmd_parser.description + ) + + # add more commands as needed... + + return parser + + +def make_run_command_parser() -> argparse.ArgumentParser: + """ + Groups all sub-populator CLI listed in :py:mod:`STACpopulator.implementations` as a common ``stac-populator`` CLI. + + Dispatches the provided arguments to the appropriate sub-populator CLI as requested. Each sub-populator CLI must + implement functions ``make_parser`` and ``main`` to generate the arguments and dispatch them to the corresponding + caller. The ``main`` function should accept a sequence of string arguments, which can be passed to the parser + obtained from ``make_parser``. + + An optional ``runner`` can also be defined in each populator module. If provided, the namespace arguments that have + already been parsed to resolve the populator to run will be used directly, avoiding parsing arguments twice. + """ + parser = argparse.ArgumentParser(prog="command", description="STACpopulator implementation runner.") + subparsers = parser.add_subparsers(title="populator", dest="populator", description="Implementation to run.") + populators_impl = "implementations" + populators_dir = os.path.join(os.path.dirname(__file__), populators_impl) + populator_mods = glob.glob(f"{populators_dir}/**/[!__init__]*.py", recursive=True) # potential candidate scripts + for populator_path in sorted(populator_mods): + populator_script = populator_path.split(populators_dir, 1)[1][1:] + populator_py_mod = os.path.splitext(populator_script)[0].replace(os.sep, ".") + populator_name, pop_mod_file = populator_py_mod.rsplit(".", 1) + populator_root = f"STACpopulator.{populators_impl}.{populator_name}" + pop_mod_file_loc = f"{populator_root}.{pop_mod_file}" + populator_module = importlib.import_module(pop_mod_file_loc, populator_root) + parser_maker: Callable[[], argparse.ArgumentParser] = getattr(populator_module, "make_parser", None) + populator_runner = getattr(populator_module, "runner", None) # optional, call main directly if not available + populator_caller = getattr(populator_module, "main", None) + if callable(parser_maker) and callable(populator_caller): + populator_parser = parser_maker() + populator_prog = f"{parser.prog} {populator_name}" + subparsers.add_parser( + populator_name, + prog=populator_prog, parents=[populator_parser], formatter_class=populator_parser.formatter_class, + add_help=False, # add help disabled otherwise conflicts with this main populator help + help=populator_parser.description, description=populator_parser.description, + usage=populator_parser.usage, + ) + POPULATORS[populator_name] = { + "name": populator_name, + "caller": populator_caller, + "parser": populator_parser, + "runner": populator_runner, + } + return parser + + +def main(*args: str) -> Optional[int]: + parser = make_main_parser() + args = args or sys.argv[1:] # same as was parse args does, but we must provide them to subparser + ns = parser.parse_args(args=args) # if 'command' or 'populator' unknown, auto prints the help message with exit(2) + params = vars(ns) + populator_cmd = params.pop("command") + if not populator_cmd: + parser.print_help() + return 0 + result = None + if populator_cmd == "run": + populator_name = params.pop("populator") + if not populator_name: + parser.print_help() + return 0 + populator_args = args[2:] # skip [command] [populator] + populator_caller = POPULATORS[populator_name]["caller"] + populator_runner = POPULATORS[populator_name]["runner"] + if populator_runner: + result = populator_runner(ns) + else: + result = populator_caller(*populator_args) + return 0 if result is None else result + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py b/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py index ae86892..c36f5e2 100644 --- a/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py +++ b/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py @@ -2,11 +2,10 @@ import json import logging from datetime import datetime -from typing import Any, List, Literal, MutableMapping, Optional +from typing import Any, List, Literal, MutableMapping, NoReturn, Optional import pydantic_core import pyessv -from colorlog import ColoredFormatter from pydantic import AnyHttpUrl, ConfigDict, Field, FieldValidationInfo, field_validator from pystac.extensions.datacube import DatacubeExtension @@ -14,16 +13,7 @@ from STACpopulator.input import GenericLoader, ErrorLoader, THREDDSLoader from STACpopulator.models import GeoJSONPolygon, STACItemProperties from STACpopulator.populator_base import STACpopulatorBase -from STACpopulator.stac_utils import STAC_item_from_metadata, collection2literal - -LOGGER = logging.getLogger(__name__) -LOGFORMAT = " %(log_color)s%(levelname)s:%(reset)s %(blue)s[%(name)-30s]%(reset)s %(message)s" -formatter = ColoredFormatter(LOGFORMAT) -stream = logging.StreamHandler() -stream.setFormatter(formatter) -LOGGER.addHandler(stream) -LOGGER.setLevel(logging.INFO) -LOGGER.propagate = False +from STACpopulator.stac_utils import LOGGER, STAC_item_from_metadata, collection2literal # CMIP6 controlled vocabulary (CV) CV = pyessv.WCRP.CMIP6 @@ -169,23 +159,34 @@ def create_stac_item(self, item_name: str, item_data: MutableMapping[str, Any]) return json.loads(json.dumps(item.to_dict())) -if __name__ == "__main__": - parser = argparse.ArgumentParser(prog="CMIP6 STAC populator") +def make_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="CMIP6 STAC populator") parser.add_argument("stac_host", type=str, help="STAC API address") parser.add_argument("thredds_catalog_URL", type=str, help="URL to the CMIP6 THREDDS catalog") parser.add_argument("--update", action="store_true", help="Update collection and its items") + parser.add_argument("--mode", choices=["full", "single"], + help="Operation mode, processing the full dataset or only the single reference.") + return parser - args = parser.parse_args() - LOGGER.info(f"Arguments to call: {args}") +def runner(ns: argparse.Namespace) -> Optional[int] | NoReturn: + LOGGER.info(f"Arguments to call: {vars(ns)}") - mode = "full" - - if mode == "full": - data_loader = THREDDSLoader(args.thredds_catalog_URL) + if ns.mode == "full": + data_loader = THREDDSLoader(ns.thredds_catalog_URL) else: # To be implemented - data_loader = ErrorLoader(args.error_file) + data_loader = ErrorLoader() - c = CMIP6populator(args.stac_host, data_loader, args.update) + c = CMIP6populator(ns.stac_host, data_loader, ns.update) c.ingest() + + +def main(*args: str) -> Optional[int]: + parser = make_parser() + ns = parser.parse_args(args) + return runner(ns) + + +if __name__ == "__main__": + main() diff --git a/STACpopulator/implementations/DirectoryLoader/__init__.py b/STACpopulator/implementations/DirectoryLoader/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/STACpopulator/implementations/DirectoryLoader/crawl_directory.py b/STACpopulator/implementations/DirectoryLoader/crawl_directory.py new file mode 100644 index 0000000..b6a15dc --- /dev/null +++ b/STACpopulator/implementations/DirectoryLoader/crawl_directory.py @@ -0,0 +1,62 @@ +import argparse +from typing import NoReturn, Optional, MutableMapping, Any + +from STACpopulator.input import STACDirectoryLoader +from STACpopulator.models import GeoJSONPolygon, STACItemProperties +from STACpopulator.populator_base import STACpopulatorBase +from STACpopulator.stac_utils import LOGGER + + +class DirectoryPopulator(STACpopulatorBase): + item_properties_model = STACItemProperties + item_geometry_model = GeoJSONPolygon + + def __init__( + self, + stac_host: str, + loader: STACDirectoryLoader, + update: bool, + collection: MutableMapping[str, Any], + ) -> None: + self._collection_info = collection + super().__init__(stac_host, loader, update) + + def load_config(self): + pass # ignore + + def create_stac_collection(self) -> MutableMapping[str, Any]: + return self._collection_info + + def create_stac_item(self, item_name: str, item_data: MutableMapping[str, Any]) -> MutableMapping[str, Any]: + return item_data + + +def make_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser(description="Directory STAC populator") + parser.add_argument("stac_host", type=str, help="STAC API URL.") + parser.add_argument("directory", type=str, help="Path to a directory structure with STAC Collections and Items.") + parser.add_argument("--update", action="store_true", help="Update collection and its items.") + parser.add_argument( + "--prune", action="store_true", + help="Limit search of STAC Collections only to first top-most matches in the crawled directory structure." + ) + return parser + + +def runner(ns: argparse.Namespace) -> Optional[int] | NoReturn: + LOGGER.info(f"Arguments to call: {vars(ns)}") + + for collection_path, collection_json in STACDirectoryLoader(ns.directory, "collection", ns.prune): + loader = STACDirectoryLoader(collection_path, "item", False) + populator = DirectoryPopulator(ns.stac_host, loader, ns.update, collection_json) + populator.ingest() + + +def main(*args: str) -> Optional[int]: + parser = make_parser() + ns = parser.parse_args(args) + return runner(ns) + + +if __name__ == "__main__": + main() diff --git a/STACpopulator/input.py b/STACpopulator/input.py index a5d2774..2ec8f2d 100644 --- a/STACpopulator/input.py +++ b/STACpopulator/input.py @@ -1,6 +1,8 @@ +import json import logging +import os from abc import ABC, abstractmethod -from typing import Any, Iterator, MutableMapping, Optional, Tuple +from typing import Any, Iterator, Literal, MutableMapping, Optional, Tuple, Union import pystac import requests @@ -131,6 +133,65 @@ def extract_metadata(self, ds: siphon.catalog.Dataset) -> MutableMapping[str, An return attrs +class STACDirectoryLoader(GenericLoader): + """ + Iterates through a directory structure looking for STAC Collections or Items. + + For each directory that gets crawled, if a file is named ``collection.json``, it assumed to be a STAC Collection. + All other ``.json`` files under the directory where ``collection.json`` was found are assumed to be STAC Items. + These JSON STAC Items can be either at the same directory level as the STAC Collection, or under nested directories. + + Using the mode option, yielded results will be either the STAC Collections or the STAC Items. + This allows this class to be used in conjunction (2 nested loops) to find collections and their underlying items. + + .. code-block:: python + + for collection_path, collection_json in STACDirectoryLoader(dir_path, mode="collection"): + for item_path, item_json in STACDirectoryLoader(collection_path, mode="item"): + ... # do stuff + + For convenience, option ``prune`` can be used to stop crawling deeper once a STAC Collection is found. + Any collection files found further down the directory were a top-most match was found will not be yielded. + This can be useful to limit search, or to ignore nested directories using subsets of STAC Collections. + """ + + def __init__(self, path: str, mode: Literal["collection", "item"], prune: bool = False) -> None: + super().__init__() + self.path = path + self.iter = None + self.prune = prune + self.reset() + self._collection_mode = mode == "collection" + self._collection_name = "collection.json" + + def __iter__(self) -> Iterator[Tuple[str, MutableMapping[str, Any]]]: + for root, dirs, files in self.iter: + if self.prune and self._collection_mode and self._collection_name in files: + del dirs[:] + for name in files: + if self._collection_mode and self._is_collection(name): + col_path = os.path.join(root, name) + yield col_path, self._load_json(col_path) + elif not self._collection_mode and self._is_item(name): + item_path = os.path.join(root, name) + yield item_path, self._load_json(item_path) + + def _is_collection(self, path: Union[os.PathLike[str], str]) -> bool: + name = os.path.split(path)[-1] + return name == self._collection_name + + def _is_item(self, path: Union[os.PathLike[str], str]) -> bool: + name = os.path.split(path)[-1] + return name != self._collection_name and os.path.splitext(name)[-1] in [".json", ".geojson"] + + def _load_json(self, path: Union[os.PathLike[str], str]) -> MutableMapping[str, Any]: + with open(path, mode="r", encoding="utf-8") as file: + return json.load(file) + + def reset(self): + self.iter = os.walk(self.path) + + class STACLoader(GenericLoader): def __init__(self) -> None: super().__init__() diff --git a/STACpopulator/stac_utils.py b/STACpopulator/stac_utils.py index c8c8aaa..3997179 100644 --- a/STACpopulator/stac_utils.py +++ b/STACpopulator/stac_utils.py @@ -14,8 +14,8 @@ from STACpopulator.models import STACItem LOGGER = logging.getLogger(__name__) -LOGFORMAT = " %(log_color)s%(levelname)s:%(reset)s %(blue)s[%(name)-30s]%(reset)s %(message)s" -formatter = ColoredFormatter(LOGFORMAT) +LOG_FORMAT = " %(log_color)s%(levelname)s:%(reset)s %(blue)s[%(name)-30s]%(reset)s %(message)s" +formatter = ColoredFormatter(LOG_FORMAT) stream = logging.StreamHandler() stream.setFormatter(formatter) LOGGER.addHandler(stream) diff --git a/pyproject.toml b/pyproject.toml index ac83d51..377aab4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,6 +68,9 @@ keywords = [ "CMIP6" ] +[project.scripts] +stac-populator = "STACpopulator.cli:main" + [project.urls] Repository = "https://github.com/crim-ca/stac-populator" Changelog = "https://github.com/crim-ca/stac-populator/blob/master/CHANGES.md" From c011c6119407e3fd8443cf23f8e1678f4b2c6ba8 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Fri, 10 Nov 2023 00:55:33 -0500 Subject: [PATCH 02/15] fix name of sub-CLIs --- STACpopulator/cli.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/STACpopulator/cli.py b/STACpopulator/cli.py index 10f35c8..d209757 100644 --- a/STACpopulator/cli.py +++ b/STACpopulator/cli.py @@ -16,7 +16,7 @@ def make_main_parser() -> argparse.ArgumentParser: help="prints the version of the library and exits") commands = parser.add_subparsers(title="command", dest="command", description="STAC populator command to execute.") - run_cmd_parser = make_run_command_parser() + run_cmd_parser = make_run_command_parser(parser.prog) commands.add_parser( "run", prog=f"{parser.prog} {run_cmd_parser.prog}", parents=[run_cmd_parser], @@ -29,7 +29,7 @@ def make_main_parser() -> argparse.ArgumentParser: return parser -def make_run_command_parser() -> argparse.ArgumentParser: +def make_run_command_parser(parent) -> argparse.ArgumentParser: """ Groups all sub-populator CLI listed in :py:mod:`STACpopulator.implementations` as a common ``stac-populator`` CLI. @@ -41,7 +41,7 @@ def make_run_command_parser() -> argparse.ArgumentParser: An optional ``runner`` can also be defined in each populator module. If provided, the namespace arguments that have already been parsed to resolve the populator to run will be used directly, avoiding parsing arguments twice. """ - parser = argparse.ArgumentParser(prog="command", description="STACpopulator implementation runner.") + parser = argparse.ArgumentParser(prog="run", description="STACpopulator implementation runner.") subparsers = parser.add_subparsers(title="populator", dest="populator", description="Implementation to run.") populators_impl = "implementations" populators_dir = os.path.join(os.path.dirname(__file__), populators_impl) @@ -58,7 +58,7 @@ def make_run_command_parser() -> argparse.ArgumentParser: populator_caller = getattr(populator_module, "main", None) if callable(parser_maker) and callable(populator_caller): populator_parser = parser_maker() - populator_prog = f"{parser.prog} {populator_name}" + populator_prog = f"{parent} {parser.prog} {populator_name}" subparsers.add_parser( populator_name, prog=populator_prog, parents=[populator_parser], formatter_class=populator_parser.formatter_class, From 1305b48ff3f33bb1d6bee2a7df68368c7d0431ca Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 14 Nov 2023 01:55:55 -0500 Subject: [PATCH 03/15] add requests session capabilities, CLI args parsing for session auth + working STAC Collection/Items dir iter loading --- STACpopulator/api_requests.py | 48 +++++++----- STACpopulator/cli.py | 74 +++++++++++++++++++ .../implementations/CMIP6_UofT/add_CMIP6.py | 14 +++- .../DirectoryLoader/crawl_directory.py | 31 +++++--- STACpopulator/input.py | 20 +++-- STACpopulator/populator_base.py | 19 +++-- 6 files changed, 159 insertions(+), 47 deletions(-) diff --git a/STACpopulator/api_requests.py b/STACpopulator/api_requests.py index e7a5380..58f4d1e 100644 --- a/STACpopulator/api_requests.py +++ b/STACpopulator/api_requests.py @@ -3,6 +3,7 @@ from typing import Any, Optional import requests +from requests import Session from colorlog import ColoredFormatter LOGGER = logging.getLogger(__name__) @@ -15,27 +16,36 @@ LOGGER.propagate = False -def stac_host_reachable(url: str) -> bool: +def stac_host_reachable(url: str, session: Optional[Session] = None) -> bool: try: - registry = requests.get(url) - registry.raise_for_status() - return True - except (requests.exceptions.RequestException, requests.exceptions.ConnectionError): - return False - - -def stac_collection_exists(stac_host: str, collection_id: str) -> bool: + session = session or requests + response = session.get(url, headers={"Accept": "application/json"}) + response.raise_for_status() + body = response.json() + if body["type"] == "Catalog" and "stac_version" in body: + return True + except (requests.exceptions.RequestException, requests.exceptions.ConnectionError) as exc: + LOGGER.error("Could not validate STAC host. Not reachable [%s] due to [%s]", url, exc, exc_info=exc) + return False + + +def stac_collection_exists(stac_host: str, collection_id: str, session: Optional[Session] = None) -> bool: """ Get a STAC collection Returns the collection JSON. """ - r = requests.get(os.path.join(stac_host, "collections", collection_id), verify=False) - + session = session or requests + r = session.get(os.path.join(stac_host, "collections", collection_id), verify=False) return r.status_code == 200 -def post_stac_collection(stac_host: str, json_data: dict[str, Any], update: Optional[bool] = True) -> None: +def post_stac_collection( + stac_host: str, + json_data: dict[str, Any], + update: Optional[bool] = True, + session: Optional[Session] = None, +) -> None: """Post/create a collection on the STAC host :param stac_host: address of the STAC host @@ -44,16 +54,18 @@ def post_stac_collection(stac_host: str, json_data: dict[str, Any], update: Opti :type json_data: dict[str, Any] :param update: if True, update the collection on the host server if it is already present, defaults to True :type update: Optional[bool], optional + :param session: Session with additional configuration to perform requests. """ + session = session or requests collection_id = json_data["id"] - r = requests.post(os.path.join(stac_host, "collections"), json=json_data, verify=False) + r = session.post(os.path.join(stac_host, "collections"), json=json_data) if r.status_code == 200: LOGGER.info(f"Collection {collection_id} successfully created") elif r.status_code == 409: if update: LOGGER.info(f"Collection {collection_id} already exists. Updating.") - r = requests.put(os.path.join(stac_host, "collections"), json=json_data, verify=False) + r = session.put(os.path.join(stac_host, "collections"), json=json_data) r.raise_for_status() else: LOGGER.info(f"Collection {collection_id} already exists.") @@ -67,6 +79,7 @@ def post_stac_item( item_name: str, json_data: dict[str, dict], update: Optional[bool] = True, + session: Optional[Session] = None, ) -> None: """Post a STAC item to the host server. @@ -80,17 +93,18 @@ def post_stac_item( :type json_data: dict[str, dict] :param update: if True, update the item on the host server if it is already present, defaults to True :type update: Optional[bool], optional + :param session: Session with additional configuration to perform requests. """ item_id = json_data["id"] - - r = requests.post(os.path.join(stac_host, f"collections/{collection_id}/items"), json=json_data) + session = session or requests + r = session.post(os.path.join(stac_host, f"collections/{collection_id}/items"), json=json_data) if r.status_code == 200: LOGGER.info(f"Item {item_name} successfully added") elif r.status_code == 409: if update: LOGGER.info(f"Item {item_id} already exists. Updating.") - r = requests.put(os.path.join(stac_host, f"collections/{collection_id}/items/{item_id}"), json=json_data) + r = session.put(os.path.join(stac_host, f"collections/{collection_id}/items/{item_id}"), json=json_data) r.raise_for_status() else: LOGGER.info(f"Item {item_id} already exists.") diff --git a/STACpopulator/cli.py b/STACpopulator/cli.py index d209757..f91d97b 100644 --- a/STACpopulator/cli.py +++ b/STACpopulator/cli.py @@ -5,11 +5,85 @@ import sys from typing import Callable, Optional +import requests +from http import cookiejar +from requests.auth import AuthBase, HTTPBasicAuth, HTTPDigestAuth, HTTPProxyAuth +from requests.sessions import Session + from STACpopulator import __version__ POPULATORS = {} +class HTTPBearerTokenAuth(AuthBase): + def __init__(self, token: str) -> None: + self._token = token + + def __call__(self, r: requests.PreparedRequest) -> requests.PreparedRequest: + r.headers["Authorization"] = f"Bearer {self._token}" + return r + + +class HTTPCookieAuth(AuthBase): + """ + Employ a cookie-jar file for authorization. + + Useful command: + + .. code-block:: shell + + curl --cookie-jar /path/to/cookie-jar.txt [authorization-provider-arguments] + + """ + def __init__(self, cookie_jar: str) -> None: + self._cookie_jar = cookie_jar + + def __call__(self, r: requests.PreparedRequest) -> requests.PreparedRequest: + r.prepare_cookies(cookiejar.FileCookieJar(self._cookie_jar)) + return r + + +def add_request_options(parser: argparse.ArgumentParser) -> None: + """ + Adds arguments to a parser to allow update of a request session definition used across a populator procedure. + """ + parser.add_argument( + "--no-verify", "--no-ssl", "--no-ssl-verify", dest="verify", action="store_false", + help="Disable SSL verification (not recommended unless for development/test servers)." + ) + parser.add_argument( + "--cert", type=argparse.FileType(), required=False, help="Path to a certificate file to use." + ) + parser.add_argument( + "--auth-handler", choices=["basic", "digest", "bearer", "proxy", "cookie"], required=False, + help="Authentication strategy to employ for the requests session." + ) + parser.add_argument( + "--auth-identity", required=False, + help="Bearer token, cookie-jar file or proxy/digest/basic username:password for selected authorization handler." + ) + + +def apply_request_options(session: Session, namespace: argparse.Namespace) -> None: + """ + Applies the relevant request session options from parsed input arguments. + """ + session.verify = namespace.verify + session.cert = namespace.cert + if namespace.auth_handler in ["basic", "digest", "proxy"]: + usr, pwd = namespace.auth_identity.split(":", 1) + if namespace.auth_handler == "basic": + session.auth = HTTPBasicAuth(usr, pwd) + elif namespace.auth_handler == "digest": + session.auth = HTTPDigestAuth(usr, pwd) + else: + session.auth = HTTPProxyAuth(usr, pwd) + elif namespace.auth_handler == "bearer": + session.auth = HTTPBearerTokenAuth(namespace.auth_identity) + elif namespace.auth_handler == "cookie": + session.auth = HTTPCookieAuth(namespace.auth_identity) + + def make_main_parser() -> argparse.ArgumentParser: parser = argparse.ArgumentParser(prog="stac-populator", description="STACpopulator operations.") parser.add_argument("--version", "-V", action="version", version=f"%(prog)s {__version__}", diff --git a/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py b/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py index c36f5e2..6dd30d6 100644 --- a/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py +++ b/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py @@ -1,11 +1,11 @@ import argparse import json -import logging from datetime import datetime from typing import Any, List, Literal, MutableMapping, NoReturn, Optional import pydantic_core import pyessv +from requests.sessions import Session from pydantic import AnyHttpUrl, ConfigDict, Field, FieldValidationInfo, field_validator from pystac.extensions.datacube import DatacubeExtension @@ -98,7 +98,13 @@ class CMIP6populator(STACpopulatorBase): item_properties_model = CMIP6ItemProperties item_geometry_model = GeoJSONPolygon - def __init__(self, stac_host: str, data_loader: GenericLoader, update: Optional[bool] = False) -> None: + def __init__( + self, + stac_host: str, + data_loader: GenericLoader, + update: Optional[bool] = False, + session: Optional[Session] = None, + ) -> None: """Constructor :param stac_host: URL to the STAC API @@ -106,7 +112,7 @@ def __init__(self, stac_host: str, data_loader: GenericLoader, update: Optional[ :param thredds_catalog_url: the URL to the THREDDS catalog to ingest :type thredds_catalog_url: str """ - super().__init__(stac_host, data_loader, update) + super().__init__(stac_host, data_loader, update=update, session=session) @staticmethod def make_cmip6_item_id(attrs: MutableMapping[str, Any]) -> str: @@ -184,7 +190,7 @@ def runner(ns: argparse.Namespace) -> Optional[int] | NoReturn: def main(*args: str) -> Optional[int]: parser = make_parser() - ns = parser.parse_args(args) + ns = parser.parse_args(args or None) return runner(ns) diff --git a/STACpopulator/implementations/DirectoryLoader/crawl_directory.py b/STACpopulator/implementations/DirectoryLoader/crawl_directory.py index b6a15dc..d2517d8 100644 --- a/STACpopulator/implementations/DirectoryLoader/crawl_directory.py +++ b/STACpopulator/implementations/DirectoryLoader/crawl_directory.py @@ -1,6 +1,10 @@ import argparse +import os.path from typing import NoReturn, Optional, MutableMapping, Any +from requests.sessions import Session + +from STACpopulator.cli import add_request_options, apply_request_options from STACpopulator.input import STACDirectoryLoader from STACpopulator.models import GeoJSONPolygon, STACItemProperties from STACpopulator.populator_base import STACpopulatorBase @@ -16,15 +20,18 @@ def __init__( stac_host: str, loader: STACDirectoryLoader, update: bool, - collection: MutableMapping[str, Any], + collection: dict[str, Any], + session: Optional[Session] = None, ) -> None: - self._collection_info = collection - super().__init__(stac_host, loader, update) + self._collection = collection + super().__init__(stac_host, loader, update=update, session=session) - def load_config(self): - pass # ignore + def load_config(self) -> MutableMapping[str, Any]: + self._collection_info = self._collection + return self._collection_info def create_stac_collection(self) -> MutableMapping[str, Any]: + self.publish_stac_collection(self._collection_info) return self._collection_info def create_stac_item(self, item_name: str, item_data: MutableMapping[str, Any]) -> MutableMapping[str, Any]: @@ -40,21 +47,25 @@ def make_parser() -> argparse.ArgumentParser: "--prune", action="store_true", help="Limit search of STAC Collections only to first top-most matches in the crawled directory structure." ) + add_request_options(parser) return parser def runner(ns: argparse.Namespace) -> Optional[int] | NoReturn: LOGGER.info(f"Arguments to call: {vars(ns)}") - for collection_path, collection_json in STACDirectoryLoader(ns.directory, "collection", ns.prune): - loader = STACDirectoryLoader(collection_path, "item", False) - populator = DirectoryPopulator(ns.stac_host, loader, ns.update, collection_json) - populator.ingest() + with Session() as session: + apply_request_options(session, ns) + for collection_path, collection_json in STACDirectoryLoader(ns.directory, "collection", ns.prune): + collection_dir = os.path.dirname(collection_path) + loader = STACDirectoryLoader(collection_dir, "item", False) + populator = DirectoryPopulator(ns.stac_host, loader, ns.update, collection_json, session=session) + populator.ingest() def main(*args: str) -> Optional[int]: parser = make_parser() - ns = parser.parse_args(args) + ns = parser.parse_args(args or None) return runner(ns) diff --git a/STACpopulator/input.py b/STACpopulator/input.py index 59861ee..62ebcfe 100644 --- a/STACpopulator/input.py +++ b/STACpopulator/input.py @@ -147,7 +147,7 @@ class STACDirectoryLoader(GenericLoader): .. code-block:: python for collection_path, collection_json in STACDirectoryLoader(dir_path, mode="collection"): - for item_path, item_json in STACDirectoryLoader(collection_path, mode="item"): + for item_path, item_json in STACDirectoryLoader(os.path.dirname(collection_path), mode="item"): ... # do stuff For convenience, option ``prune`` can be used to stop crawling deeper once a STAC Collection is found. @@ -166,20 +166,18 @@ def __init__(self, path: str, mode: Literal["collection", "item"], prune: bool = def __iter__(self) -> Iterator[Tuple[str, MutableMapping[str, Any]]]: for root, dirs, files in self.iter: - if self.prune and self._collection_mode and self._collection_name in files: - del dirs[:] + # since there can ever be only one 'collection' file name in a same directory + # directly retrieve it instead of looping through all other files + if self._collection_mode and self._collection_name in files: + if self.prune: # stop recursive search if requested + del dirs[:] + col_path = os.path.join(root, self._collection_name) + yield col_path, self._load_json(col_path) for name in files: - if self._collection_mode and self._is_collection(name): - col_path = os.path.join(root, name) - yield col_path, self._load_json(col_path) - elif not self._collection_mode and self._is_item(name): + if not self._collection_mode and self._is_item(name): item_path = os.path.join(root, name) yield item_path, self._load_json(item_path) - def _is_collection(self, path: Union[os.PathLike[str], str]) -> bool: - name = os.path.split(path)[-1] - return name == self._collection_name - def _is_item(self, path: Union[os.PathLike[str], str]) -> bool: name = os.path.split(path)[-1] return name != self._collection_name and os.path.splitext(name)[-1] in [".json", ".geojson"] diff --git a/STACpopulator/populator_base.py b/STACpopulator/populator_base.py index 55db015..a2f1cb3 100644 --- a/STACpopulator/populator_base.py +++ b/STACpopulator/populator_base.py @@ -6,6 +6,7 @@ import pystac from colorlog import ColoredFormatter +from requests.sessions import Session from STACpopulator.api_requests import ( post_stac_collection, @@ -31,6 +32,7 @@ def __init__( stac_host: str, data_loader: GenericLoader, update: Optional[bool] = False, + session: Optional[Session] = None, ) -> None: """Constructor @@ -43,15 +45,15 @@ def __init__( super().__init__() self._collection_info = None + self._session = session self.load_config() self._ingest_pipeline = data_loader self._stac_host = self.validate_host(stac_host) self.update = update - self._collection_id = self.collection_name LOGGER.info("Initialization complete") - LOGGER.info(f"Collection {self.collection_name} is assigned id {self._collection_id}") + LOGGER.info(f"Collection {self.collection_name} is assigned ID {self.collection_id}") self.create_stac_collection() def load_config(self): @@ -90,7 +92,7 @@ def create_stac_item(self, item_name: str, item_data: dict[str, Any]) -> dict[st def validate_host(self, stac_host: str) -> str: if not url_validate(stac_host): raise ValueError("stac_host URL is not appropriately formatted") - if not stac_host_reachable(stac_host): + if not stac_host_reachable(stac_host, session=self._session): raise RuntimeError("stac_host is not reachable") return stac_host @@ -126,7 +128,7 @@ def create_stac_collection(self) -> dict[str, Any]: return collection_data def publish_stac_collection(self, collection_data: dict[str, Any]) -> None: - post_stac_collection(self.stac_host, collection_data, self.update) + post_stac_collection(self.stac_host, collection_data, self.update, session=self._session) def ingest(self) -> None: LOGGER.info("Data ingestion") @@ -134,4 +136,11 @@ def ingest(self) -> None: LOGGER.info(f"Creating STAC representation for {item_name}") stac_item = self.create_stac_item(item_name, item_data) if stac_item != -1: - post_stac_item(self.stac_host, self.collection_id, item_name, stac_item, self.update) + post_stac_item( + self.stac_host, + self.collection_id, + item_name, + stac_item, + update=self.update, + session=self._session, + ) From 9bbe51f02dfbab2f2bd181afd3a6457abe4ccb49 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 14 Nov 2023 10:35:04 -0500 Subject: [PATCH 04/15] fix issues with cookie-auth --- STACpopulator/api_requests.py | 8 +++++--- STACpopulator/cli.py | 25 ++++++++++++++++--------- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/STACpopulator/api_requests.py b/STACpopulator/api_requests.py index 58f4d1e..69fa292 100644 --- a/STACpopulator/api_requests.py +++ b/STACpopulator/api_requests.py @@ -58,7 +58,8 @@ def post_stac_collection( """ session = session or requests collection_id = json_data["id"] - r = session.post(os.path.join(stac_host, "collections"), json=json_data) + collection_url = os.path.join(stac_host, "collections") + r = session.post(collection_url, json=json_data) if r.status_code == 200: LOGGER.info(f"Collection {collection_id} successfully created") @@ -95,9 +96,10 @@ def post_stac_item( :type update: Optional[bool], optional :param session: Session with additional configuration to perform requests. """ - item_id = json_data["id"] session = session or requests - r = session.post(os.path.join(stac_host, f"collections/{collection_id}/items"), json=json_data) + item_id = json_data["id"] + item_url = os.path.join(stac_host, f"collections/{collection_id}/items") + r = session.post(item_url, json=json_data) if r.status_code == 200: LOGGER.info(f"Item {item_name} successfully added") diff --git a/STACpopulator/cli.py b/STACpopulator/cli.py index f91d97b..2a2dad2 100644 --- a/STACpopulator/cli.py +++ b/STACpopulator/cli.py @@ -24,23 +24,29 @@ def __call__(self, r: requests.PreparedRequest) -> requests.PreparedRequest: return r -class HTTPCookieAuth(AuthBase): +class HTTPCookieAuth(cookiejar.MozillaCookieJar): """ Employ a cookie-jar file for authorization. - Useful command: + Examples of useful command: .. code-block:: shell curl --cookie-jar /path/to/cookie-jar.txt [authorization-provider-arguments] + curl \ + -k \ + -X POST \ + --cookie-jar /tmp/magpie-cookie.txt \ + -d '{"user_name":"...","password":"..."}' \ + -H 'Accept:application/json' \ + -H 'Content-Type:application/json' \ + 'https://{hostname}/magpie/signin' + + .. note:: + Due to implementation details with :mod:`requests`, this must be passed directly to the ``cookies`` + attribute rather than ``auth`` as in the case for other authorization handlers. """ - def __init__(self, cookie_jar: str) -> None: - self._cookie_jar = cookie_jar - - def __call__(self, r: requests.PreparedRequest) -> requests.PreparedRequest: - r.prepare_cookies(cookiejar.FileCookieJar(self._cookie_jar)) - return r def add_request_options(parser: argparse.ArgumentParser) -> None: @@ -81,7 +87,8 @@ def apply_request_options(session: Session, namespace: argparse.Namespace) -> No elif namespace.auth_handler == "bearer": session.auth = HTTPBearerTokenAuth(namespace.auth_identity) elif namespace.auth_handler == "cookie": - session.auth = HTTPCookieAuth(namespace.auth_identity) + session.cookies = HTTPCookieAuth(namespace.auth_identity) + session.cookies.load(namespace.auth_identity) def make_main_parser() -> argparse.ArgumentParser: From 99a741928bb7b8420ee30ccdf87646fc232d1afd Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 14 Nov 2023 10:56:14 -0500 Subject: [PATCH 05/15] fix session fowarding to THREDDS catalog siphon impl --- CHANGES.md | 10 +++++- .../implementations/CMIP6_UofT/add_CMIP6.py | 21 ++++++------ STACpopulator/input.py | 32 +++++++++++++++++-- 3 files changed, 51 insertions(+), 12 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index 0b6b22d..ced6fd4 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,7 +2,15 @@ ## [Unreleased](https://github.com/crim-ca/stac-populator) (latest) - +* Add request ``session`` keyword to all request-related functions and populator methods to allow sharing a common set + of settings (`auth`, SSL `verify`, `cert`) across requests toward the STAC Catalog. +* Add `DirectoryLoader` that allows populating a STAC Catalog with Collections and Items loaded from a crawled directory + hierarchy that contains `collection.json` files and other `.json`/`.geojson` items. +* Add a generic CLI `stac-populator` that can be called to run populator implementations directly + using command `stac-populator run [impl-args]`. +* Remove hardcoded `verify=False` to requests calls. + If needed for testing purposes, users should use a custom `requests.sessions.Session` with `verify=False` passed to + the populator, or alternatively, employ the CLI argument `--no-verify` that will accomplish the same behavior. ## [0.2.0](https://github.com/crim-ca/stac-populator/tree/0.2.0) (2023-11-10) diff --git a/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py b/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py index 6dd30d6..5174f4a 100644 --- a/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py +++ b/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py @@ -9,6 +9,7 @@ from pydantic import AnyHttpUrl, ConfigDict, Field, FieldValidationInfo, field_validator from pystac.extensions.datacube import DatacubeExtension +from STACpopulator.cli import add_request_options, apply_request_options from STACpopulator.implementations.CMIP6_UofT.extensions import DataCubeHelper from STACpopulator.input import GenericLoader, ErrorLoader, THREDDSLoader from STACpopulator.models import GeoJSONPolygon, STACItemProperties @@ -155,9 +156,9 @@ def create_stac_item(self, item_name: str, item_data: MutableMapping[str, Any]) # Add datacube extension try: - dchelper = DataCubeHelper(item_data) + dc_helper = DataCubeHelper(item_data) dc_ext = DatacubeExtension.ext(item, add_if_missing=True) - dc_ext.apply(dimensions=dchelper.dimensions, variables=dchelper.variables) + dc_ext.apply(dimensions=dc_helper.dimensions, variables=dc_helper.variables) except Exception: LOGGER.warning(f"Failed to add Datacube extension to item {item_name}") @@ -178,14 +179,16 @@ def make_parser() -> argparse.ArgumentParser: def runner(ns: argparse.Namespace) -> Optional[int] | NoReturn: LOGGER.info(f"Arguments to call: {vars(ns)}") - if ns.mode == "full": - data_loader = THREDDSLoader(ns.thredds_catalog_URL) - else: - # To be implemented - data_loader = ErrorLoader() + with Session() as session: + apply_request_options(session, ns) + if ns.mode == "full": + data_loader = THREDDSLoader(ns.thredds_catalog_URL, session=session) + else: + # To be implemented + data_loader = ErrorLoader() - c = CMIP6populator(ns.stac_host, data_loader, ns.update) - c.ingest() + c = CMIP6populator(ns.stac_host, data_loader, update=ns.update, session=session) + c.ingest() def main(*args: str) -> Optional[int]: diff --git a/STACpopulator/input.py b/STACpopulator/input.py index 62ebcfe..1328b05 100644 --- a/STACpopulator/input.py +++ b/STACpopulator/input.py @@ -9,7 +9,8 @@ import siphon import xncml from colorlog import ColoredFormatter -from siphon.catalog import TDSCatalog +from requests.sessions import Session +from siphon.catalog import TDSCatalog, session_manager from STACpopulator.stac_utils import numpy_to_python_datatypes, url_validate @@ -52,8 +53,35 @@ def reset(self): raise NotImplementedError +class THREDDSCatalog(TDSCatalog): + """ + Patch to apply a custom request session. + + Because of how :class:`TDSCatalog` automatically loads and parses right away from ``__init__`` call, + we need to hack around how the ``session`` attribute gets defined. + """ + def __init__(self, catalog_url: str, session: Optional[Session] = None) -> None: + self._session = session + super().__init__(catalog_url) + + @property + def session(self) -> Session: + if self._session is None: + self._session = session_manager.create_session() + return self._session + + @session.setter + def session(self, session: Session) -> None: + pass # ignore to bypass TDSCatalog.__init__ enforcing create_session ! + + class THREDDSLoader(GenericLoader): - def __init__(self, thredds_catalog_url: str, depth: Optional[int] = None) -> None: + def __init__( + self, + thredds_catalog_url: str, + depth: Optional[int] = None, + session: Optional[Session] = None, + ) -> None: """Constructor :param thredds_catalog_url: the URL to the THREDDS catalog to ingest From fd316fdabd29fd0c472e1ad6e5b0221283852d02 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 14 Nov 2023 12:24:36 -0500 Subject: [PATCH 06/15] add directory loader impl test --- .../DirectoryLoader/crawl_directory.py | 2 +- STACpopulator/input.py | 7 + pyproject.toml | 1 + tests/data/test_directory/collection.json | 547 ++++++++++++++++++ tests/data/test_directory/item-0.json | 356 ++++++++++++ tests/data/test_directory/item-1.json | 356 ++++++++++++ .../test_directory/nested/collection.json | 347 +++++++++++ tests/data/test_directory/nested/item-0.json | 356 ++++++++++++ tests/data/test_directory/nested/item-1.json | 356 ++++++++++++ tests/test_directory_loader.py | 86 +++ 10 files changed, 2413 insertions(+), 1 deletion(-) create mode 100644 tests/data/test_directory/collection.json create mode 100644 tests/data/test_directory/item-0.json create mode 100644 tests/data/test_directory/item-1.json create mode 100644 tests/data/test_directory/nested/collection.json create mode 100644 tests/data/test_directory/nested/item-0.json create mode 100644 tests/data/test_directory/nested/item-1.json create mode 100644 tests/test_directory_loader.py diff --git a/STACpopulator/implementations/DirectoryLoader/crawl_directory.py b/STACpopulator/implementations/DirectoryLoader/crawl_directory.py index d2517d8..a8ddec1 100644 --- a/STACpopulator/implementations/DirectoryLoader/crawl_directory.py +++ b/STACpopulator/implementations/DirectoryLoader/crawl_directory.py @@ -58,7 +58,7 @@ def runner(ns: argparse.Namespace) -> Optional[int] | NoReturn: apply_request_options(session, ns) for collection_path, collection_json in STACDirectoryLoader(ns.directory, "collection", ns.prune): collection_dir = os.path.dirname(collection_path) - loader = STACDirectoryLoader(collection_dir, "item", False) + loader = STACDirectoryLoader(collection_dir, "item", prune=ns.prune) populator = DirectoryPopulator(ns.stac_host, loader, ns.update, collection_json, session=session) populator.ingest() diff --git a/STACpopulator/input.py b/STACpopulator/input.py index 1328b05..0004d81 100644 --- a/STACpopulator/input.py +++ b/STACpopulator/input.py @@ -193,6 +193,7 @@ def __init__(self, path: str, mode: Literal["collection", "item"], prune: bool = self._collection_name = "collection.json" def __iter__(self) -> Iterator[Tuple[str, MutableMapping[str, Any]]]: + is_root = True for root, dirs, files in self.iter: # since there can ever be only one 'collection' file name in a same directory # directly retrieve it instead of looping through all other files @@ -201,6 +202,12 @@ def __iter__(self) -> Iterator[Tuple[str, MutableMapping[str, Any]]]: del dirs[:] col_path = os.path.join(root, self._collection_name) yield col_path, self._load_json(col_path) + # if a collection is found deeper when not expected for items parsing + # drop the nested directories to avoid over-crawling nested collections + elif not self._collection_mode and not is_root and self._collection_name in files: + del dirs[:] + continue + is_root = False # for next loop for name in files: if not self._collection_mode and self._is_item(name): item_path = os.path.join(root, name) diff --git a/pyproject.toml b/pyproject.toml index 1f36c77..c42ae8c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -80,6 +80,7 @@ dev = [ "pytest", "pytest-cov", "coverage", + "responses", "bump-my-version", ] diff --git a/tests/data/test_directory/collection.json b/tests/data/test_directory/collection.json new file mode 100644 index 0000000..40dbba0 --- /dev/null +++ b/tests/data/test_directory/collection.json @@ -0,0 +1,547 @@ +{ + "stac_version": "1.0.0", + "stac_extensions": [ + "https://stac-extensions.github.io/eo/v1.1.0/schema.json", + "https://stac-extensions.github.io/ml-aoi/v0.1.0/schema.json", + "https://stac-extensions.github.io/scientific/v1.0.0/schema.json", + "https://stac-extensions.github.io/version/v1.0.0/schema.json", + "https://stac-extensions.github.io/view/v1.0.0/schema.json" + ], + "type": "Collection", + "id": "EuroSAT-subset-train", + "title": "EuroSAT subset train", + "description": "EuroSAT dataset with labeled annotations for land-cover classification and associated imagery. This collection represents the samples part of the train split set for training machine learning algorithms.", + "version": "0.4.0", + "experimental": true, + "license": "MIT", + "extent": { + "spatial": { + "bbox": [ + [ + -13.488814500989974, + 0.0003349561845232938, + 22.511506175937363, + 0.0005250899573945398 + ] + ] + }, + "temporal": { + "interval": [ + [ + "2015-06-27T10:25:31.456Z", + "2017-06-14T00:00:00Z" + ] + ] + } + }, + "summaries": { + "sci:doi": [ + "10.1109/JSTARS.2019.2918242" + ], + "sci:citation": [ + "Eurosat: A novel dataset and deep learning benchmark for land use and land cover classification. Patrick Helber, Benjamin Bischke, Andreas Dengel, Damian Borth. IEEE Journal of Selected Topics in Applied Earth Observations and Remote Sensing, 2019." + ], + "sci:publications": [ + { + "doi": "10.1109/IGARSS.2018.8519248", + "citation": "Introducing EuroSAT: A Novel Dataset and Deep Learning Benchmark for Land Use and Land Cover Classification. Patrick Helber, Benjamin Bischke, Andreas Dengel. 2018 IEEE International Geoscience and Remote Sensing Symposium, 2018." + } + ], + "ml-aoi:split": [ + "train" + ], + "constellation": [ + "sentinel-2" + ], + "instruments": [ + "msi" + ], + "eo:bands": [ + { + "name": "B01", + "common_name": "coastal", + "center_wavelength": 0.4439, + "full_width_half_max": 0.027 + }, + { + "name": "B02", + "common_name": "blue", + "center_wavelength": 0.4966, + "full_width_half_max": 0.098 + }, + { + "name": "B03", + "common_name": "green", + "center_wavelength": 0.56, + "full_width_half_max": 0.045 + }, + { + "name": "B04", + "common_name": "red", + "center_wavelength": 0.6645, + "full_width_half_max": 0.038 + }, + { + "name": "B05", + "center_wavelength": 0.7039, + "full_width_half_max": 0.019, + "common_name": "rededge" + }, + { + "name": "B06", + "center_wavelength": 0.7402, + "full_width_half_max": 0.018, + "common_name": "rededge" + }, + { + "name": "B07", + "center_wavelength": 0.7825, + "full_width_half_max": 0.028, + "common_name": "rededge" + }, + { + "name": "B08", + "common_name": "nir", + "center_wavelength": 0.8351, + "full_width_half_max": 0.145 + }, + { + "name": "B08A", + "center_wavelength": 0.8648, + "full_width_half_max": 0.033, + "common_name": "nir08" + }, + { + "name": "B09", + "center_wavelength": 0.945, + "full_width_half_max": 0.026, + "common_name": "nir09" + }, + { + "name": "B10", + "common_name": "cirrus", + "center_wavelength": 1.3735, + "full_width_half_max": 0.075 + }, + { + "name": "B11", + "common_name": "swir16", + "center_wavelength": 1.6137, + "full_width_half_max": 0.143 + }, + { + "name": "B12", + "common_name": "swir22", + "center_wavelength": 2.22024, + "full_width_half_max": 0.242 + } + ], + "view:off_nadir": [ + 0 + ], + "gsd": [ + 10 + ] + }, + "assets": { + "source": { + "href": "https://github.com/phelber/EuroSAT/", + "type": "text/html", + "roles": [ + "data", + "source", + "scientific", + "citation" + ], + "title": "GitHub repository", + "description": "Source GitHub repository of the EuroSAT dataset.", + "sci:doi": "10.1109/JSTARS.2019.2918242" + }, + "paper": { + "href": "https://www.researchgate.net/publication/319463676", + "type": "text/html", + "roles": [ + "paper", + "scientific", + "citation" + ], + "title": "Scientific Paper", + "description": "ResearchGate page with embedded PDF of the scientific paper supporting the dataset.", + "sci:doi": "10.1109/JSTARS.2019.2918242" + }, + "thumbnail": { + "href": "https://raw.githubusercontent.com/phelber/EuroSAT/master/eurosat_overview_small.jpg", + "type": "image/jpeg", + "roles": [ + "thumbnail", + "overview" + ], + "description": "Preview of dataset samples.", + "sci:doi": "10.1109/JSTARS.2019.2918242" + }, + "license": { + "href": "https://raw.githubusercontent.com/phelber/EuroSAT/master/LICENSE", + "type": "text/plain", + "roles": [ + "legal", + "license" + ], + "title": "License", + "description": "License contents associated to the EuroSAT dataset.", + "sci:doi": "10.1109/JSTARS.2019.2918242" + } + }, + "links": [ + { + "rel": "cite-as", + "href": "https://arxiv.org/abs/1709.00029", + "type": "text/html", + "title": "EuroSAT: A Novel Dataset and Deep Learning Benchmark for Land Use and Land Cover Classification" + }, + { + "rel": "license", + "href": "https://raw.githubusercontent.com/phelber/EuroSAT/master/LICENSE", + "type": "text/html", + "title": "EuroSAT: A Novel Dataset and Deep Learning Benchmark for Land Use and Land Cover Classification" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-0.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-1.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-2.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-3.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-4.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-5.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-6.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-7.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-8.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-9.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-10.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-11.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-12.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-13.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-14.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-15.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-16.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-17.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-18.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-19.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-20.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-21.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-22.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-23.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-24.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-25.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-26.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-27.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-28.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-29.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-30.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-31.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-32.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-33.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-34.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-35.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-36.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-37.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-38.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-39.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-40.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-41.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-42.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-43.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-44.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-45.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-46.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-47.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-48.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-49.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-50.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-51.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-52.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-53.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-54.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-55.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-56.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-57.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-58.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-59.json", + "type": "application/geo+json" + }, + { + "rel": "root", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/catalog.json", + "type": "application/json" + }, + { + "rel": "self", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/collection.json", + "type": "application/json", + "title": "EuroSAT STAC Collection with samples from 'train' split.", + "ml-aoi:split": "train" + }, + { + "rel": "collection", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/collection.json", + "type": "application/json", + "title": "EuroSAT STAC Collection with samples from 'train' split.", + "ml-aoi:split": "train" + }, + { + "rel": "related", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/validate/collection.json", + "type": "application/json", + "title": "EuroSAT STAC Collection with samples from 'train' split.", + "ml-aoi:split": "validate" + }, + { + "rel": "related", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/collection.json", + "type": "application/json", + "title": "EuroSAT STAC Collection with samples from 'train' split.", + "ml-aoi:split": "test" + }, + { + "rel": "parent", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/catalog.json", + "type": "application/json", + "title": "STAC Catalog" + } + ] +} \ No newline at end of file diff --git a/tests/data/test_directory/item-0.json b/tests/data/test_directory/item-0.json new file mode 100644 index 0000000..9774c02 --- /dev/null +++ b/tests/data/test_directory/item-0.json @@ -0,0 +1,356 @@ +{ + "stac_version": "1.0.0", + "stac_extensions": [ + "https://stac-extensions.github.io/eo/v1.1.0/schema.json", + "https://stac-extensions.github.io/file/v1.0.0/schema.json", + "https://stac-extensions.github.io/raster/v1.1.0/schema.json", + "https://stac-extensions.github.io/label/v1.0.1/schema.json", + "https://stac-extensions.github.io/ml-aoi/v0.1.0/schema.json", + "https://stac-extensions.github.io/version/v1.0.0/schema.json" + ], + "type": "Feature", + "id": "EuroSAT-subset-train-sample-0-class-AnnualCrop", + "title": "EuroSAT subset train sample 0 class AnnualCrop", + "description": "Annotated sample from the EuroSAT-subset-train collection.", + "bbox": [ + 4.677967553021795, + 48.94867558368056, + 4.686897851300048, + 48.95455470210916 + ], + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + -1.488701894616339, + 0.0004414865260322278 + ], + [ + -1.488701894616371, + 0.00044153955201383445 + ], + [ + -1.4887019746228884, + 0.0004415395519651046 + ], + [ + -1.4887019746228558, + 0.0004414865259835036 + ], + [ + -1.488701894616339, + 0.0004414865260322278 + ] + ] + ] + }, + "assets": { + "labels": { + "title": "Labels for image AnnualCrop_1175 with AnnualCrop class", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/label/AnnualCrop/AnnualCrop_1175.geojson", + "type": "application/geo+json", + "roles": [ + "data" + ], + "file:size": 763, + "ml-aoi:role": "label" + }, + "raster": { + "title": "Raster AnnualCrop_1175 with AnnualCrop class", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/tif/AnnualCrop/AnnualCrop_1175.tif", + "type": "image/tiff; application=geotiff", + "raster:bands": [ + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + } + ], + "roles": [ + "data" + ], + "file:size": 107244, + "ml-aoi:role": "feature", + "ml-aoi:reference-grid": true + }, + "thumbnail": { + "title": "Preview of AnnualCrop_1175.", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/png/AnnualCrop/AnnualCrop_1175.png", + "type": "image/png", + "roles": [ + "thumbnail", + "visual" + ], + "eo:bands": [ + { + "name": "B04", + "common_name": "red", + "center_wavelength": 0.6645, + "full_width_half_max": 0.038 + }, + { + "name": "B03", + "common_name": "green", + "center_wavelength": 0.56, + "full_width_half_max": 0.045 + }, + { + "name": "B02", + "common_name": "blue", + "center_wavelength": 0.4966, + "full_width_half_max": 0.098 + } + ], + "file:size": 5816 + } + }, + "collection": "EuroSAT-subset-train", + "properties": { + "datetime": "2023-11-10T01:51:20.089361+00:00", + "license": "MIT", + "version": "0.4.0", + "label:properties": [ + "class" + ], + "label:tasks": [ + "segmentation", + "classification" + ], + "label:type": "vector", + "label:methods": [ + "manual" + ], + "label:description": "Land-cover area classification on Sentinel-2 image.", + "label:classes": [ + { + "name": "class", + "classes": [ + "AnnualCrop", + "0" + ] + } + ], + "label:overviews": [ + { + "property_key": "class", + "counts": [ + { + "name": "AnnualCrop", + "count": 1 + } + ] + } + ], + "ml-aoi:split": "train", + "constellation": "sentinel-2", + "instruments": [ + "msi" + ], + "eo:bands": [ + { + "name": "B01", + "common_name": "coastal", + "center_wavelength": 0.4439, + "full_width_half_max": 0.027 + }, + { + "name": "B02", + "common_name": "blue", + "center_wavelength": 0.4966, + "full_width_half_max": 0.098 + }, + { + "name": "B03", + "common_name": "green", + "center_wavelength": 0.56, + "full_width_half_max": 0.045 + }, + { + "name": "B04", + "common_name": "red", + "center_wavelength": 0.6645, + "full_width_half_max": 0.038 + }, + { + "name": "B05", + "center_wavelength": 0.7039, + "full_width_half_max": 0.019, + "common_name": "rededge" + }, + { + "name": "B06", + "center_wavelength": 0.7402, + "full_width_half_max": 0.018, + "common_name": "rededge" + }, + { + "name": "B07", + "center_wavelength": 0.7825, + "full_width_half_max": 0.028, + "common_name": "rededge" + }, + { + "name": "B08", + "common_name": "nir", + "center_wavelength": 0.8351, + "full_width_half_max": 0.145 + }, + { + "name": "B08A", + "center_wavelength": 0.8648, + "full_width_half_max": 0.033, + "common_name": "nir08" + }, + { + "name": "B09", + "center_wavelength": 0.945, + "full_width_half_max": 0.026, + "common_name": "nir09" + }, + { + "name": "B10", + "common_name": "cirrus", + "center_wavelength": 1.3735, + "full_width_half_max": 0.075 + }, + { + "name": "B11", + "common_name": "swir16", + "center_wavelength": 1.6137, + "full_width_half_max": 0.143 + }, + { + "name": "B12", + "common_name": "swir22", + "center_wavelength": 2.22024, + "full_width_half_max": 0.242 + } + ], + "view:off_nadir": 0, + "gsd": 10 + }, + "links": [ + { + "title": "Preview of AnnualCrop_1175.", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/png/AnnualCrop/AnnualCrop_1175.png", + "type": "image/png", + "rel": "thumbnail" + }, + { + "title": "Raster AnnualCrop_1175 with AnnualCrop class", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/tif/AnnualCrop/AnnualCrop_1175.tif", + "type": "image/tiff; application=geotiff", + "rel": "source", + "label:assets": [ + "labels", + "raster" + ], + "ml-aoi:role": "label" + }, + { + "title": "Raster AnnualCrop_1175 with AnnualCrop class", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/tif/AnnualCrop/AnnualCrop_1175.tif", + "type": "image/tiff; application=geotiff", + "rel": "derived_from", + "ml-aoi:role": "feature" + }, + { + "rel": "root", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/catalog.json", + "type": "application/json" + }, + { + "rel": "parent", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/collection.json", + "type": "application/json", + "title": "EuroSAT STAC Collection with samples from 'train' split.", + "ml-aoi:split": "train" + }, + { + "rel": "collection", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/collection.json", + "type": "application/json", + "title": "EuroSAT STAC Collection with samples from 'train' split.", + "ml-aoi:split": "train" + }, + { + "rel": "self", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-0.json", + "type": "application/geo+json" + } + ] +} \ No newline at end of file diff --git a/tests/data/test_directory/item-1.json b/tests/data/test_directory/item-1.json new file mode 100644 index 0000000..f5eb8a5 --- /dev/null +++ b/tests/data/test_directory/item-1.json @@ -0,0 +1,356 @@ +{ + "stac_version": "1.0.0", + "stac_extensions": [ + "https://stac-extensions.github.io/eo/v1.1.0/schema.json", + "https://stac-extensions.github.io/file/v1.0.0/schema.json", + "https://stac-extensions.github.io/raster/v1.1.0/schema.json", + "https://stac-extensions.github.io/label/v1.0.1/schema.json", + "https://stac-extensions.github.io/ml-aoi/v0.1.0/schema.json", + "https://stac-extensions.github.io/version/v1.0.0/schema.json" + ], + "type": "Feature", + "id": "EuroSAT-subset-train-sample-1-class-AnnualCrop", + "title": "EuroSAT subset train sample 1 class AnnualCrop", + "description": "Annotated sample from the EuroSAT-subset-train collection.", + "bbox": [ + 27.617520089059845, + 43.664570997491296, + 27.625524008488643, + 43.67037279068068 + ], + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + 22.51150361252739, + 0.0003938273098336916 + ], + [ + 22.511503612527367, + 0.00039387963840469985 + ], + [ + 22.511503540820254, + 0.0003938796383657409 + ], + [ + 22.511503540820282, + 0.0003938273097947379 + ], + [ + 22.51150361252739, + 0.0003938273098336916 + ] + ] + ] + }, + "assets": { + "labels": { + "title": "Labels for image AnnualCrop_1210 with AnnualCrop class", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/label/AnnualCrop/AnnualCrop_1210.geojson", + "type": "application/geo+json", + "roles": [ + "data" + ], + "file:size": 759, + "ml-aoi:role": "label" + }, + "raster": { + "title": "Raster AnnualCrop_1210 with AnnualCrop class", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/tif/AnnualCrop/AnnualCrop_1210.tif", + "type": "image/tiff; application=geotiff", + "raster:bands": [ + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + } + ], + "roles": [ + "data" + ], + "file:size": 107244, + "ml-aoi:role": "feature", + "ml-aoi:reference-grid": true + }, + "thumbnail": { + "title": "Preview of AnnualCrop_1210.", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/png/AnnualCrop/AnnualCrop_1210.png", + "type": "image/png", + "roles": [ + "thumbnail", + "visual" + ], + "eo:bands": [ + { + "name": "B04", + "common_name": "red", + "center_wavelength": 0.6645, + "full_width_half_max": 0.038 + }, + { + "name": "B03", + "common_name": "green", + "center_wavelength": 0.56, + "full_width_half_max": 0.045 + }, + { + "name": "B02", + "common_name": "blue", + "center_wavelength": 0.4966, + "full_width_half_max": 0.098 + } + ], + "file:size": 5127 + } + }, + "collection": "EuroSAT-subset-train", + "properties": { + "datetime": "2023-11-10T01:51:20.167167+00:00", + "license": "MIT", + "version": "0.4.0", + "label:properties": [ + "class" + ], + "label:tasks": [ + "segmentation", + "classification" + ], + "label:type": "vector", + "label:methods": [ + "manual" + ], + "label:description": "Land-cover area classification on Sentinel-2 image.", + "label:classes": [ + { + "name": "class", + "classes": [ + "AnnualCrop", + "0" + ] + } + ], + "label:overviews": [ + { + "property_key": "class", + "counts": [ + { + "name": "AnnualCrop", + "count": 1 + } + ] + } + ], + "ml-aoi:split": "train", + "constellation": "sentinel-2", + "instruments": [ + "msi" + ], + "eo:bands": [ + { + "name": "B01", + "common_name": "coastal", + "center_wavelength": 0.4439, + "full_width_half_max": 0.027 + }, + { + "name": "B02", + "common_name": "blue", + "center_wavelength": 0.4966, + "full_width_half_max": 0.098 + }, + { + "name": "B03", + "common_name": "green", + "center_wavelength": 0.56, + "full_width_half_max": 0.045 + }, + { + "name": "B04", + "common_name": "red", + "center_wavelength": 0.6645, + "full_width_half_max": 0.038 + }, + { + "name": "B05", + "center_wavelength": 0.7039, + "full_width_half_max": 0.019, + "common_name": "rededge" + }, + { + "name": "B06", + "center_wavelength": 0.7402, + "full_width_half_max": 0.018, + "common_name": "rededge" + }, + { + "name": "B07", + "center_wavelength": 0.7825, + "full_width_half_max": 0.028, + "common_name": "rededge" + }, + { + "name": "B08", + "common_name": "nir", + "center_wavelength": 0.8351, + "full_width_half_max": 0.145 + }, + { + "name": "B08A", + "center_wavelength": 0.8648, + "full_width_half_max": 0.033, + "common_name": "nir08" + }, + { + "name": "B09", + "center_wavelength": 0.945, + "full_width_half_max": 0.026, + "common_name": "nir09" + }, + { + "name": "B10", + "common_name": "cirrus", + "center_wavelength": 1.3735, + "full_width_half_max": 0.075 + }, + { + "name": "B11", + "common_name": "swir16", + "center_wavelength": 1.6137, + "full_width_half_max": 0.143 + }, + { + "name": "B12", + "common_name": "swir22", + "center_wavelength": 2.22024, + "full_width_half_max": 0.242 + } + ], + "view:off_nadir": 0, + "gsd": 10 + }, + "links": [ + { + "title": "Preview of AnnualCrop_1210.", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/png/AnnualCrop/AnnualCrop_1210.png", + "type": "image/png", + "rel": "thumbnail" + }, + { + "title": "Raster AnnualCrop_1210 with AnnualCrop class", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/tif/AnnualCrop/AnnualCrop_1210.tif", + "type": "image/tiff; application=geotiff", + "rel": "source", + "label:assets": [ + "labels", + "raster" + ], + "ml-aoi:role": "label" + }, + { + "title": "Raster AnnualCrop_1210 with AnnualCrop class", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/tif/AnnualCrop/AnnualCrop_1210.tif", + "type": "image/tiff; application=geotiff", + "rel": "derived_from", + "ml-aoi:role": "feature" + }, + { + "rel": "root", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/catalog.json", + "type": "application/json" + }, + { + "rel": "parent", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/collection.json", + "type": "application/json", + "title": "EuroSAT STAC Collection with samples from 'train' split.", + "ml-aoi:split": "train" + }, + { + "rel": "collection", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/collection.json", + "type": "application/json", + "title": "EuroSAT STAC Collection with samples from 'train' split.", + "ml-aoi:split": "train" + }, + { + "rel": "self", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/item-1.json", + "type": "application/geo+json" + } + ] +} \ No newline at end of file diff --git a/tests/data/test_directory/nested/collection.json b/tests/data/test_directory/nested/collection.json new file mode 100644 index 0000000..4629645 --- /dev/null +++ b/tests/data/test_directory/nested/collection.json @@ -0,0 +1,347 @@ +{ + "stac_version": "1.0.0", + "stac_extensions": [ + "https://stac-extensions.github.io/eo/v1.1.0/schema.json", + "https://stac-extensions.github.io/ml-aoi/v0.1.0/schema.json", + "https://stac-extensions.github.io/scientific/v1.0.0/schema.json", + "https://stac-extensions.github.io/version/v1.0.0/schema.json", + "https://stac-extensions.github.io/view/v1.0.0/schema.json" + ], + "type": "Collection", + "id": "EuroSAT-subset-test", + "title": "EuroSAT subset test", + "description": "EuroSAT dataset with labeled annotations for land-cover classification and associated imagery. This collection represents the samples part of the test split set for training machine learning algorithms.", + "version": "0.4.0", + "experimental": true, + "license": "MIT", + "extent": { + "spatial": { + "bbox": [ + [ + -13.488814500989974, + 0.00031627870509240793, + 28.51155096021155, + 0.0005250899573945398 + ] + ] + }, + "temporal": { + "interval": [ + [ + "2015-06-27T10:25:31.456Z", + "2017-06-14T00:00:00Z" + ] + ] + } + }, + "summaries": { + "sci:doi": [ + "10.1109/JSTARS.2019.2918242" + ], + "sci:citation": [ + "Eurosat: A novel dataset and deep learning benchmark for land use and land cover classification. Patrick Helber, Benjamin Bischke, Andreas Dengel, Damian Borth. IEEE Journal of Selected Topics in Applied Earth Observations and Remote Sensing, 2019." + ], + "sci:publications": [ + { + "doi": "10.1109/IGARSS.2018.8519248", + "citation": "Introducing EuroSAT: A Novel Dataset and Deep Learning Benchmark for Land Use and Land Cover Classification. Patrick Helber, Benjamin Bischke, Andreas Dengel. 2018 IEEE International Geoscience and Remote Sensing Symposium, 2018." + } + ], + "ml-aoi:split": [ + "test" + ], + "constellation": [ + "sentinel-2" + ], + "instruments": [ + "msi" + ], + "eo:bands": [ + { + "name": "B01", + "common_name": "coastal", + "center_wavelength": 0.4439, + "full_width_half_max": 0.027 + }, + { + "name": "B02", + "common_name": "blue", + "center_wavelength": 0.4966, + "full_width_half_max": 0.098 + }, + { + "name": "B03", + "common_name": "green", + "center_wavelength": 0.56, + "full_width_half_max": 0.045 + }, + { + "name": "B04", + "common_name": "red", + "center_wavelength": 0.6645, + "full_width_half_max": 0.038 + }, + { + "name": "B05", + "center_wavelength": 0.7039, + "full_width_half_max": 0.019, + "common_name": "rededge" + }, + { + "name": "B06", + "center_wavelength": 0.7402, + "full_width_half_max": 0.018, + "common_name": "rededge" + }, + { + "name": "B07", + "center_wavelength": 0.7825, + "full_width_half_max": 0.028, + "common_name": "rededge" + }, + { + "name": "B08", + "common_name": "nir", + "center_wavelength": 0.8351, + "full_width_half_max": 0.145 + }, + { + "name": "B08A", + "center_wavelength": 0.8648, + "full_width_half_max": 0.033, + "common_name": "nir08" + }, + { + "name": "B09", + "center_wavelength": 0.945, + "full_width_half_max": 0.026, + "common_name": "nir09" + }, + { + "name": "B10", + "common_name": "cirrus", + "center_wavelength": 1.3735, + "full_width_half_max": 0.075 + }, + { + "name": "B11", + "common_name": "swir16", + "center_wavelength": 1.6137, + "full_width_half_max": 0.143 + }, + { + "name": "B12", + "common_name": "swir22", + "center_wavelength": 2.22024, + "full_width_half_max": 0.242 + } + ], + "view:off_nadir": [ + 0 + ], + "gsd": [ + 10 + ] + }, + "assets": { + "source": { + "href": "https://github.com/phelber/EuroSAT/", + "type": "text/html", + "roles": [ + "data", + "source", + "scientific", + "citation" + ], + "title": "GitHub repository", + "description": "Source GitHub repository of the EuroSAT dataset.", + "sci:doi": "10.1109/JSTARS.2019.2918242" + }, + "paper": { + "href": "https://www.researchgate.net/publication/319463676", + "type": "text/html", + "roles": [ + "paper", + "scientific", + "citation" + ], + "title": "Scientific Paper", + "description": "ResearchGate page with embedded PDF of the scientific paper supporting the dataset.", + "sci:doi": "10.1109/JSTARS.2019.2918242" + }, + "thumbnail": { + "href": "https://raw.githubusercontent.com/phelber/EuroSAT/master/eurosat_overview_small.jpg", + "type": "image/jpeg", + "roles": [ + "thumbnail", + "overview" + ], + "description": "Preview of dataset samples.", + "sci:doi": "10.1109/JSTARS.2019.2918242" + }, + "license": { + "href": "https://raw.githubusercontent.com/phelber/EuroSAT/master/LICENSE", + "type": "text/plain", + "roles": [ + "legal", + "license" + ], + "title": "License", + "description": "License contents associated to the EuroSAT dataset.", + "sci:doi": "10.1109/JSTARS.2019.2918242" + } + }, + "links": [ + { + "rel": "cite-as", + "href": "https://arxiv.org/abs/1709.00029", + "type": "text/html", + "title": "EuroSAT: A Novel Dataset and Deep Learning Benchmark for Land Use and Land Cover Classification" + }, + { + "rel": "license", + "href": "https://raw.githubusercontent.com/phelber/EuroSAT/master/LICENSE", + "type": "text/html", + "title": "EuroSAT: A Novel Dataset and Deep Learning Benchmark for Land Use and Land Cover Classification" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/item-0.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/item-1.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/item-2.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/item-3.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/item-4.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/item-5.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/item-6.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/item-7.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/item-8.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/item-9.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/item-10.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/item-11.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/item-12.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/item-13.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/item-14.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/item-15.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/item-16.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/item-17.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/item-18.json", + "type": "application/geo+json" + }, + { + "rel": "item", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/item-19.json", + "type": "application/geo+json" + }, + { + "rel": "root", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/catalog.json", + "type": "application/json" + }, + { + "rel": "self", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/collection.json", + "type": "application/json", + "title": "EuroSAT STAC Collection with samples from 'test' split.", + "ml-aoi:split": "test" + }, + { + "rel": "collection", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/collection.json", + "type": "application/json", + "title": "EuroSAT STAC Collection with samples from 'test' split.", + "ml-aoi:split": "test" + }, + { + "rel": "related", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/train/collection.json", + "type": "application/json", + "title": "EuroSAT STAC Collection with samples from 'test' split.", + "ml-aoi:split": "train" + }, + { + "rel": "related", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/validate/collection.json", + "type": "application/json", + "title": "EuroSAT STAC Collection with samples from 'test' split.", + "ml-aoi:split": "validate" + }, + { + "rel": "parent", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/catalog.json", + "type": "application/json", + "title": "STAC Catalog" + } + ] +} diff --git a/tests/data/test_directory/nested/item-0.json b/tests/data/test_directory/nested/item-0.json new file mode 100644 index 0000000..e62d3b4 --- /dev/null +++ b/tests/data/test_directory/nested/item-0.json @@ -0,0 +1,356 @@ +{ + "stac_version": "1.0.0", + "stac_extensions": [ + "https://stac-extensions.github.io/eo/v1.1.0/schema.json", + "https://stac-extensions.github.io/file/v1.0.0/schema.json", + "https://stac-extensions.github.io/raster/v1.1.0/schema.json", + "https://stac-extensions.github.io/label/v1.0.1/schema.json", + "https://stac-extensions.github.io/ml-aoi/v0.1.0/schema.json", + "https://stac-extensions.github.io/version/v1.0.0/schema.json" + ], + "type": "Feature", + "id": "EuroSAT-subset-test-sample-0-class-AnnualCrop", + "title": "EuroSAT subset test sample 0 class AnnualCrop", + "description": "Annotated sample from the EuroSAT-subset-test collection.", + "bbox": [ + 22.854989915168744, + 49.836213455735006, + 22.864099797549315, + 49.84210863494964 + ], + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + 16.511460954898432, + 0.00044949166508093715 + ], + [ + 16.5114609548984, + 0.00044954483593276134 + ], + [ + 16.51146087328297, + 0.000449544835882152 + ], + [ + 16.511460873283003, + 0.0004494916650303338 + ], + [ + 16.511460954898432, + 0.00044949166508093715 + ] + ] + ] + }, + "assets": { + "labels": { + "title": "Labels for image AnnualCrop_1636 with AnnualCrop class", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/label/AnnualCrop/AnnualCrop_1636.geojson", + "type": "application/geo+json", + "roles": [ + "data" + ], + "file:size": 759, + "ml-aoi:role": "label" + }, + "raster": { + "title": "Raster AnnualCrop_1636 with AnnualCrop class", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/tif/AnnualCrop/AnnualCrop_1636.tif", + "type": "image/tiff; application=geotiff", + "raster:bands": [ + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + } + ], + "roles": [ + "data" + ], + "file:size": 107244, + "ml-aoi:role": "feature", + "ml-aoi:reference-grid": true + }, + "thumbnail": { + "title": "Preview of AnnualCrop_1636.", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/png/AnnualCrop/AnnualCrop_1636.png", + "type": "image/png", + "roles": [ + "thumbnail", + "visual" + ], + "eo:bands": [ + { + "name": "B04", + "common_name": "red", + "center_wavelength": 0.6645, + "full_width_half_max": 0.038 + }, + { + "name": "B03", + "common_name": "green", + "center_wavelength": 0.56, + "full_width_half_max": 0.045 + }, + { + "name": "B02", + "common_name": "blue", + "center_wavelength": 0.4966, + "full_width_half_max": 0.098 + } + ], + "file:size": 7517 + } + }, + "collection": "EuroSAT-subset-test", + "properties": { + "datetime": "2023-11-10T01:51:27.661769+00:00", + "license": "MIT", + "version": "0.4.0", + "label:properties": [ + "class" + ], + "label:tasks": [ + "segmentation", + "classification" + ], + "label:type": "vector", + "label:methods": [ + "manual" + ], + "label:description": "Land-cover area classification on Sentinel-2 image.", + "label:classes": [ + { + "name": "class", + "classes": [ + "AnnualCrop", + "0" + ] + } + ], + "label:overviews": [ + { + "property_key": "class", + "counts": [ + { + "name": "AnnualCrop", + "count": 1 + } + ] + } + ], + "ml-aoi:split": "test", + "constellation": "sentinel-2", + "instruments": [ + "msi" + ], + "eo:bands": [ + { + "name": "B01", + "common_name": "coastal", + "center_wavelength": 0.4439, + "full_width_half_max": 0.027 + }, + { + "name": "B02", + "common_name": "blue", + "center_wavelength": 0.4966, + "full_width_half_max": 0.098 + }, + { + "name": "B03", + "common_name": "green", + "center_wavelength": 0.56, + "full_width_half_max": 0.045 + }, + { + "name": "B04", + "common_name": "red", + "center_wavelength": 0.6645, + "full_width_half_max": 0.038 + }, + { + "name": "B05", + "center_wavelength": 0.7039, + "full_width_half_max": 0.019, + "common_name": "rededge" + }, + { + "name": "B06", + "center_wavelength": 0.7402, + "full_width_half_max": 0.018, + "common_name": "rededge" + }, + { + "name": "B07", + "center_wavelength": 0.7825, + "full_width_half_max": 0.028, + "common_name": "rededge" + }, + { + "name": "B08", + "common_name": "nir", + "center_wavelength": 0.8351, + "full_width_half_max": 0.145 + }, + { + "name": "B08A", + "center_wavelength": 0.8648, + "full_width_half_max": 0.033, + "common_name": "nir08" + }, + { + "name": "B09", + "center_wavelength": 0.945, + "full_width_half_max": 0.026, + "common_name": "nir09" + }, + { + "name": "B10", + "common_name": "cirrus", + "center_wavelength": 1.3735, + "full_width_half_max": 0.075 + }, + { + "name": "B11", + "common_name": "swir16", + "center_wavelength": 1.6137, + "full_width_half_max": 0.143 + }, + { + "name": "B12", + "common_name": "swir22", + "center_wavelength": 2.22024, + "full_width_half_max": 0.242 + } + ], + "view:off_nadir": 0, + "gsd": 10 + }, + "links": [ + { + "title": "Preview of AnnualCrop_1636.", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/png/AnnualCrop/AnnualCrop_1636.png", + "type": "image/png", + "rel": "thumbnail" + }, + { + "title": "Raster AnnualCrop_1636 with AnnualCrop class", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/tif/AnnualCrop/AnnualCrop_1636.tif", + "type": "image/tiff; application=geotiff", + "rel": "source", + "label:assets": [ + "labels", + "raster" + ], + "ml-aoi:role": "label" + }, + { + "title": "Raster AnnualCrop_1636 with AnnualCrop class", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/tif/AnnualCrop/AnnualCrop_1636.tif", + "type": "image/tiff; application=geotiff", + "rel": "derived_from", + "ml-aoi:role": "feature" + }, + { + "rel": "root", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/catalog.json", + "type": "application/json" + }, + { + "rel": "parent", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/collection.json", + "type": "application/json", + "title": "EuroSAT STAC Collection with samples from 'test' split.", + "ml-aoi:split": "test" + }, + { + "rel": "collection", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/collection.json", + "type": "application/json", + "title": "EuroSAT STAC Collection with samples from 'test' split.", + "ml-aoi:split": "test" + }, + { + "rel": "self", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/item-0.json", + "type": "application/geo+json" + } + ] +} diff --git a/tests/data/test_directory/nested/item-1.json b/tests/data/test_directory/nested/item-1.json new file mode 100644 index 0000000..cafe69e --- /dev/null +++ b/tests/data/test_directory/nested/item-1.json @@ -0,0 +1,356 @@ +{ + "stac_version": "1.0.0", + "stac_extensions": [ + "https://stac-extensions.github.io/eo/v1.1.0/schema.json", + "https://stac-extensions.github.io/file/v1.0.0/schema.json", + "https://stac-extensions.github.io/raster/v1.1.0/schema.json", + "https://stac-extensions.github.io/label/v1.0.1/schema.json", + "https://stac-extensions.github.io/ml-aoi/v0.1.0/schema.json", + "https://stac-extensions.github.io/version/v1.0.0/schema.json" + ], + "type": "Feature", + "id": "EuroSAT-subset-test-sample-1-class-AnnualCrop", + "title": "EuroSAT subset test sample 1 class AnnualCrop", + "description": "Annotated sample from the EuroSAT-subset-test collection.", + "bbox": [ + 4.220941302587967, + 48.832221787376334, + 4.229804014722728, + 48.838073637305776 + ], + "geometry": { + "type": "Polygon", + "coordinates": [ + [ + [ + -1.4887059897180999, + 0.00044043618293010514 + ], + [ + -1.4887059897181307, + 0.0004404889629665576 + ], + [ + -1.4887060691191447, + 0.00044048896291831166 + ], + [ + -1.488706069119112, + 0.0004404361828818648 + ], + [ + -1.4887059897180999, + 0.00044043618293010514 + ] + ] + ] + }, + "assets": { + "labels": { + "title": "Labels for image AnnualCrop_367 with AnnualCrop class", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/label/AnnualCrop/AnnualCrop_367.geojson", + "type": "application/geo+json", + "roles": [ + "data" + ], + "file:size": 767, + "ml-aoi:role": "label" + }, + "raster": { + "title": "Raster AnnualCrop_367 with AnnualCrop class", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/tif/AnnualCrop/AnnualCrop_367.tif", + "type": "image/tiff; application=geotiff", + "raster:bands": [ + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + }, + { + "nodata": 0, + "unit": "m", + "spatial_resolution": 10, + "data_type": "uint16" + } + ], + "roles": [ + "data" + ], + "file:size": 107244, + "ml-aoi:role": "feature", + "ml-aoi:reference-grid": true + }, + "thumbnail": { + "title": "Preview of AnnualCrop_367.", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/png/AnnualCrop/AnnualCrop_367.png", + "type": "image/png", + "roles": [ + "thumbnail", + "visual" + ], + "eo:bands": [ + { + "name": "B04", + "common_name": "red", + "center_wavelength": 0.6645, + "full_width_half_max": 0.038 + }, + { + "name": "B03", + "common_name": "green", + "center_wavelength": 0.56, + "full_width_half_max": 0.045 + }, + { + "name": "B02", + "common_name": "blue", + "center_wavelength": 0.4966, + "full_width_half_max": 0.098 + } + ], + "file:size": 6080 + } + }, + "collection": "EuroSAT-subset-test", + "properties": { + "datetime": "2023-11-10T01:51:27.752557+00:00", + "license": "MIT", + "version": "0.4.0", + "label:properties": [ + "class" + ], + "label:tasks": [ + "segmentation", + "classification" + ], + "label:type": "vector", + "label:methods": [ + "manual" + ], + "label:description": "Land-cover area classification on Sentinel-2 image.", + "label:classes": [ + { + "name": "class", + "classes": [ + "AnnualCrop", + "0" + ] + } + ], + "label:overviews": [ + { + "property_key": "class", + "counts": [ + { + "name": "AnnualCrop", + "count": 1 + } + ] + } + ], + "ml-aoi:split": "test", + "constellation": "sentinel-2", + "instruments": [ + "msi" + ], + "eo:bands": [ + { + "name": "B01", + "common_name": "coastal", + "center_wavelength": 0.4439, + "full_width_half_max": 0.027 + }, + { + "name": "B02", + "common_name": "blue", + "center_wavelength": 0.4966, + "full_width_half_max": 0.098 + }, + { + "name": "B03", + "common_name": "green", + "center_wavelength": 0.56, + "full_width_half_max": 0.045 + }, + { + "name": "B04", + "common_name": "red", + "center_wavelength": 0.6645, + "full_width_half_max": 0.038 + }, + { + "name": "B05", + "center_wavelength": 0.7039, + "full_width_half_max": 0.019, + "common_name": "rededge" + }, + { + "name": "B06", + "center_wavelength": 0.7402, + "full_width_half_max": 0.018, + "common_name": "rededge" + }, + { + "name": "B07", + "center_wavelength": 0.7825, + "full_width_half_max": 0.028, + "common_name": "rededge" + }, + { + "name": "B08", + "common_name": "nir", + "center_wavelength": 0.8351, + "full_width_half_max": 0.145 + }, + { + "name": "B08A", + "center_wavelength": 0.8648, + "full_width_half_max": 0.033, + "common_name": "nir08" + }, + { + "name": "B09", + "center_wavelength": 0.945, + "full_width_half_max": 0.026, + "common_name": "nir09" + }, + { + "name": "B10", + "common_name": "cirrus", + "center_wavelength": 1.3735, + "full_width_half_max": 0.075 + }, + { + "name": "B11", + "common_name": "swir16", + "center_wavelength": 1.6137, + "full_width_half_max": 0.143 + }, + { + "name": "B12", + "common_name": "swir22", + "center_wavelength": 2.22024, + "full_width_half_max": 0.242 + } + ], + "view:off_nadir": 0, + "gsd": 10 + }, + "links": [ + { + "title": "Preview of AnnualCrop_367.", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/png/AnnualCrop/AnnualCrop_367.png", + "type": "image/png", + "rel": "thumbnail" + }, + { + "title": "Raster AnnualCrop_367 with AnnualCrop class", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/tif/AnnualCrop/AnnualCrop_367.tif", + "type": "image/tiff; application=geotiff", + "rel": "source", + "label:assets": [ + "labels", + "raster" + ], + "ml-aoi:role": "label" + }, + { + "title": "Raster AnnualCrop_367 with AnnualCrop class", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/data/subset/ds/images/remote_sensing/otherDatasets/sentinel_2/tif/AnnualCrop/AnnualCrop_367.tif", + "type": "image/tiff; application=geotiff", + "rel": "derived_from", + "ml-aoi:role": "feature" + }, + { + "rel": "root", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/catalog.json", + "type": "application/json" + }, + { + "rel": "parent", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/collection.json", + "type": "application/json", + "title": "EuroSAT STAC Collection with samples from 'test' split.", + "ml-aoi:split": "test" + }, + { + "rel": "collection", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/collection.json", + "type": "application/json", + "title": "EuroSAT STAC Collection with samples from 'test' split.", + "ml-aoi:split": "test" + }, + { + "rel": "self", + "href": "https://raw.githubusercontent.com/ai-extensions/stac-data-loader/0.4.0/data/EuroSAT/stac/subset/test/item-1.json", + "type": "application/geo+json" + } + ] +} diff --git a/tests/test_directory_loader.py b/tests/test_directory_loader.py new file mode 100644 index 0000000..361f123 --- /dev/null +++ b/tests/test_directory_loader.py @@ -0,0 +1,86 @@ +import argparse +import json +import os +import pytest +import responses + +from STACpopulator.implementations.DirectoryLoader import crawl_directory + +CUR_DIR = os.path.dirname(__file__) + + +@pytest.mark.parametrize( + "prune_option", + [True, False] +) +def test_directory_loader_populator_runner(prune_option: bool): + ns = argparse.Namespace() + stac_host = "http://test-host.com/stac/" + setattr(ns, "verify", False) + setattr(ns, "cert", None) + setattr(ns, "auth_handler", None) + setattr(ns, "stac_host", stac_host) + setattr(ns, "directory", os.path.join(CUR_DIR, "data/test_directory")) + setattr(ns, "prune", prune_option) + setattr(ns, "update", True) + + file_id_map = { + "collection.json": "EuroSAT-subset-train", + "item-0.json": "EuroSAT-subset-train-sample-0-class-AnnualCrop", + "item-1.json": "EuroSAT-subset-train-sample-1-class-AnnualCrop", + "nested/collection.json": "EuroSAT-subset-test", + "nested/item-0.json": "EuroSAT-subset-test-sample-0-class-AnnualCrop", + "nested/item-1.json": "EuroSAT-subset-test-sample-1-class-AnnualCrop", + } + file_contents = {} + for file_name in file_id_map: + ref_file = os.path.join(CUR_DIR, "data/test_directory", file_name) + with open(ref_file, mode="r", encoding="utf-8") as f: + json_data = json.load(f) + file_contents[file_name] = json.dumps(json_data, indent=None).encode() + + with responses.RequestsMock(assert_all_requests_are_fired=False) as request_mock: + request_mock.add("GET", stac_host, json={"stac_version": "1.0.0", "type": "Catalog"}) + request_mock.add( + "POST", + f"{stac_host}collections", + headers={"Content-Type": "application/json"}, + ) + request_mock.add( + "POST", + f"{stac_host}collections/{file_id_map['collection.json']}/items", + headers={"Content-Type": "application/json"}, + ) + request_mock.add( + "POST", + f"{stac_host}collections/{file_id_map['nested/collection.json']}/items", + headers={"Content-Type": "application/json"}, + ) + + crawl_directory.runner(ns) + + assert len(request_mock.calls) == (4 if prune_option else 8) + assert request_mock.calls[0].request.url == stac_host + + base_col = file_id_map['collection.json'] + assert request_mock.calls[1].request.path_url == "/stac/collections" + assert request_mock.calls[1].request.body == file_contents["collection.json"] + + assert request_mock.calls[2].request.path_url == f"/stac/collections/{base_col}/items" + assert request_mock.calls[2].request.body == file_contents["item-0.json"] + + assert request_mock.calls[3].request.path_url == f"/stac/collections/{base_col}/items" + assert request_mock.calls[3].request.body == file_contents["item-1.json"] + + if not prune_option: + assert request_mock.calls[4].request.url == stac_host + + nested_col = file_id_map["nested/collection.json"] + assert request_mock.calls[5].request.path_url == "/stac/collections" + assert request_mock.calls[5].request.body == file_contents["nested/collection.json"] + + assert request_mock.calls[6].request.path_url == f"/stac/collections/{nested_col}/items" + assert request_mock.calls[6].request.body == file_contents["nested/item-0.json"] + + assert request_mock.calls[7].request.path_url == f"/stac/collections/{nested_col}/items" + assert request_mock.calls[7].request.body == file_contents["nested/item-1.json"] From 7d22bd3da945aa2a77a99fef4f529a704a7b51f6 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 14 Nov 2023 12:46:47 -0500 Subject: [PATCH 07/15] add missing request arguments for CMIP6 impl --- STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py | 1 + 1 file changed, 1 insertion(+) diff --git a/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py b/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py index 5174f4a..d6b5a25 100644 --- a/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py +++ b/STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py @@ -173,6 +173,7 @@ def make_parser() -> argparse.ArgumentParser: parser.add_argument("--update", action="store_true", help="Update collection and its items") parser.add_argument("--mode", choices=["full", "single"], help="Operation mode, processing the full dataset or only the single reference.") + add_request_options(parser) return parser From bafebbb2ed3b05d0de960bb4e3de4b4edfdb0489 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Tue, 14 Nov 2023 12:47:09 -0500 Subject: [PATCH 08/15] ensure pydantic>=2 is instance as required by references in CMIP6 impl --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index c42ae8c..bdd7914 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,7 @@ dependencies = [ "siphon", "pystac", "xncml>=0.3.1", # python 3.12 support - "pydantic", + "pydantic>=2", "pyessv", "requests", "lxml", From 7215ca6a1a541a1cc41a9432aafff7e06307648a Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Tue, 14 Nov 2023 19:38:30 -0500 Subject: [PATCH 09/15] Update STACpopulator/api_requests.py Co-authored-by: David Huard --- STACpopulator/api_requests.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/STACpopulator/api_requests.py b/STACpopulator/api_requests.py index 69fa292..793fd03 100644 --- a/STACpopulator/api_requests.py +++ b/STACpopulator/api_requests.py @@ -22,8 +22,8 @@ def stac_host_reachable(url: str, session: Optional[Session] = None) -> bool: response = session.get(url, headers={"Accept": "application/json"}) response.raise_for_status() body = response.json() - if body["type"] == "Catalog" and "stac_version" in body: - return True + return body["type"] == "Catalog" and "stac_version" in body + except (requests.exceptions.RequestException, requests.exceptions.ConnectionError) as exc: LOGGER.error("Could not validate STAC host. Not reachable [%s] due to [%s]", url, exc, exc_info=exc) return False From eb525fa73c08dd499c58ab954e4dc61314824b2f Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Wed, 15 Nov 2023 21:57:51 -0500 Subject: [PATCH 10/15] add make git clone/update switch of pyessv archive --- Makefile | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 5f80f6f..62c1787 100644 --- a/Makefile +++ b/Makefile @@ -15,10 +15,15 @@ CATALOG = https://daccs.cs.toronto.edu/twitcher/ows/proxy/thredds/catalog/datase # CATALOG = https://daccs.cs.toronto.edu/twitcher/ows/proxy/thredds/catalog/datasets/CMIP6/CMIP/NOAA-GFDL/catalog.html # CATALOG = https://daccs.cs.toronto.edu/twitcher/ows/proxy/thredds/catalog/datasets/CMIP6/CMIP/AS-RCEC/catalog.html +PYESSV_ARCHIVE_DIR ?= ~/.esdoc/pyessv-archive +PYESSV_ARCHIVE_REF ?= https://github.com/ES-DOC/pyessv-archive + ## -- Testing targets -------------------------------------------------------------------------------------------- ## setup-pyessv-archive: - git clone "https://github.com/ES-DOC/pyessv-archive" ~/.esdoc/pyessv-archive + @echo "Updating pyessv archive [$(shell realpath $(PYESSV_ARCHIVE_DIR))]..." + @[ -d $(PYESSV_ARCHIVE_DIR) ] || git clone "$(PYESSV_ARCHIVE_REF)" $(PYESSV_ARCHIVE_DIR) + @cd $(PYESSV_ARCHIVE_DIR) && git pull test-cmip6: python $(IMP_DIR)/CMIP6_UofT/add_CMIP6.py $(STAC_HOST) $(CATALOG) From aabff79fe4c2d0130b5bcb1733f790430bfa0d19 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 16 Nov 2023 13:21:01 -0500 Subject: [PATCH 11/15] Version updated from 0.2.0 to 0.3.0 --- CHANGES.md | 5 +++++ Makefile | 2 +- README.md | 6 +++--- STACpopulator/__init__.py | 2 +- docker/Dockerfile | 2 +- pyproject.toml | 4 ++-- 6 files changed, 13 insertions(+), 8 deletions(-) diff --git a/CHANGES.md b/CHANGES.md index ced6fd4..7d6e158 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -2,6 +2,11 @@ ## [Unreleased](https://github.com/crim-ca/stac-populator) (latest) + + +## [0.3.0](https://github.com/crim-ca/stac-populator/tree/0.3.0) (2023-11-16) + + * Add request ``session`` keyword to all request-related functions and populator methods to allow sharing a common set of settings (`auth`, SSL `verify`, `cert`) across requests toward the STAC Catalog. * Add `DirectoryLoader` that allows populating a STAC Catalog with Collections and Items loaded from a crawled directory diff --git a/Makefile b/Makefile index 62c1787..5cbbe9c 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,7 @@ MAKEFILE_NAME := $(word $(words $(MAKEFILE_LIST)),$(MAKEFILE_LIST)) -include Makefile.config APP_ROOT := $(abspath $(lastword $(MAKEFILE_NAME))/..) APP_NAME := STACpopulator -APP_VERSION ?= 0.2.0 +APP_VERSION ?= 0.3.0 DOCKER_COMPOSE_FILES := -f "$(APP_ROOT)/docker/docker-compose.yml" DOCKER_TAG := ghcr.io/crim-ca/stac-populator:$(APP_VERSION) diff --git a/README.md b/README.md index 6d5893b..158ec58 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ # STAC Catalog Populator -![Latest Version](https://img.shields.io/badge/latest%20version-0.2.0-blue?logo=github) -![Commits Since Latest](https://img.shields.io/github/commits-since/crim-ca/stac-populator/0.2.0.svg?logo=github) +![Latest Version](https://img.shields.io/badge/latest%20version-0.3.0-blue?logo=github) +![Commits Since Latest](https://img.shields.io/github/commits-since/crim-ca/stac-populator/0.3.0.svg?logo=github) This repository contains a framework [STACpopulator](STACpopulator) that can be used to implement concrete populators (see [implementations](STACpopulator/implementations)) @@ -43,7 +43,7 @@ make install-dev You can also employ the pre-built Docker: ```shell -docker run -ti ghcr.io/crim-ca/stac-populator:0.2.0 [command] +docker run -ti ghcr.io/crim-ca/stac-populator:0.3.0 [command] ``` ## Testing diff --git a/STACpopulator/__init__.py b/STACpopulator/__init__.py index d3ec452..493f741 100644 --- a/STACpopulator/__init__.py +++ b/STACpopulator/__init__.py @@ -1 +1 @@ -__version__ = "0.2.0" +__version__ = "0.3.0" diff --git a/docker/Dockerfile b/docker/Dockerfile index 235d708..4ff0b05 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -3,7 +3,7 @@ LABEL description.short="STAC Populator" LABEL description.long="Utility to populate STAC Catalog, Collections and Items from various dataset/catalog sources." LABEL maintainer="Francis Charette-Migneault " LABEL vendor="CRIM" -LABEL version="0.2.0" +LABEL version="0.3.0" # setup paths ENV APP_DIR=/opt/local/src/stac-populator diff --git a/pyproject.toml b/pyproject.toml index bdd7914..0a1a65e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,7 @@ exclude = [ [project] name = "STACpopulator" -version = "0.2.0" +version = "0.3.0" description = "Utility to populate STAC Catalog, Collections and Items from various dataset/catalog sources." requires-python = ">=3.10" dependencies = [ @@ -114,7 +114,7 @@ directory = "reports/coverage/html" output = "reports/coverage.xml" [tool.bumpversion] -current_version = "0.2.0" +current_version = "0.3.0" commit = true commit_args = "--no-verify" tag = true From 785c665bd7deabc67142823229a474bad7d811d9 Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 16 Nov 2023 13:25:56 -0500 Subject: [PATCH 12/15] fix invalid tag reference for CI docker build/push --- .github/workflows/release.yml | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index ec87644..12bef03 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -15,7 +15,7 @@ jobs: release: name: release runs-on: ubuntu-latest - if: ${{ success() && (contains(github.ref, 'refs/tags') || github.ref == 'refs/heads/master') }} + # if: ${{ success() && (contains(github.ref, 'refs/tags') || github.ref == 'refs/heads/master') }} steps: - name: Checkout uses: actions/checkout@v2 @@ -38,16 +38,10 @@ jobs: registry: ${{ env.REGISTRY }} username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} - # - name: Build and push image using tag - # uses: docker/build-push-action@v3 - # with: - # context: . - # push: true - # tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.version.outputs.TAG_VERSION }} - name: Build and push image using branch name uses: docker/build-push-action@v3 with: context: . file: docker/Dockerfile push: true - tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.extract_branch.outputs.branch }} + tags: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}:${{ steps.version.outputs.TAG_VERSION }} From 481c99a0fb7bcaab2914bdc1f027e6ca6ba56c8d Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 16 Nov 2023 13:28:17 -0500 Subject: [PATCH 13/15] apply back the filter for docker tags on release/latest --- .github/workflows/release.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 12bef03..8716722 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -15,7 +15,7 @@ jobs: release: name: release runs-on: ubuntu-latest - # if: ${{ success() && (contains(github.ref, 'refs/tags') || github.ref == 'refs/heads/master') }} + if: ${{ success() && (contains(github.ref, 'refs/tags') || github.ref == 'refs/heads/master') }} steps: - name: Checkout uses: actions/checkout@v2 From 3ac36d1f865e35ad595dffe85bc8ae51b68bd0fd Mon Sep 17 00:00:00 2001 From: Francis Charette Migneault Date: Thu, 16 Nov 2023 14:23:22 -0500 Subject: [PATCH 14/15] update reamde with CLI commands and general usage notes --- README.md | 41 ++++++++++++++++++++++++++++++++++++++--- 1 file changed, 38 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 158ec58..cf5f4a0 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,7 @@ ![Latest Version](https://img.shields.io/badge/latest%20version-0.3.0-blue?logo=github) ![Commits Since Latest](https://img.shields.io/github/commits-since/crim-ca/stac-populator/0.3.0.svg?logo=github) +![GitHub License](https://img.shields.io/github/license/crim-ca/stac-populator) This repository contains a framework [STACpopulator](STACpopulator) that can be used to implement concrete populators (see [implementations](STACpopulator/implementations)) @@ -40,16 +41,44 @@ pip install .[dev] make install-dev ``` -You can also employ the pre-built Docker: +You should then be able to call the STAC populator CLI with following commands: + +```shell +# obtain the installed version of the STAC populator +stac-popultaor --version + +# obtain general help about available commands +stac-popultaor --help + +# obtain general help about available STAC populator implementations +stac-popultaor run --help + +# obtain help specifically for the execution of a STAC populator implementation +stac-popultaor run [implementation] --help +``` + +You can also employ the pre-built Docker, which can be called as follows, +where `[command]` corresponds to any of the above example operations. ```shell docker run -ti ghcr.io/crim-ca/stac-populator:0.3.0 [command] ``` +*Note*:
+If files needs to provided as input or obtained as output for using a command with `docker`, you will need to either +mount files individually or mount a workspace directory using `-v {local-path}:{docker-path}` inside the Docker +container to make them accessible to the command. + ## Testing The provided [`docker-compose`](docker/docker-compose.yml) configuration file can be used to launch a test STAC server. -For example, the [CMIP6_UofT][CMIP6_UofT] script can be run as: +Consider using `make docker-start` to start this server, and `make docker-stop` to stop it. +Alternatively, you can also use your own STAC server accessible from any remote location. + +To run the STAC populator, follow the steps from [Installation and Execution](#installation-and-execution). + +Alternatively, you can call the relevant populator Python scripts individually. +For example, using the [CMIP6_UofT][CMIP6_UofT] implementation, the script can be run as: ```shell python STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py \ @@ -58,5 +87,11 @@ python STACpopulator/implementations/CMIP6_UofT/add_CMIP6.py \ "STACpopulator/implementations/CMIP6_UofT/collection_config.yml" ``` -*Note*: +*Note*:
In the script above, a sample THREDDS catalog URL is employed and not one relevant to the global scale CMIP6 data. + +For more tests validation, you can also run the test suite with coverage analysis. + +```shell +make test-cov +``` From 668c80ad5485595e5fad9f9432a2e36fe62c9190 Mon Sep 17 00:00:00 2001 From: Francis Charette-Migneault Date: Fri, 17 Nov 2023 14:44:42 -0500 Subject: [PATCH 15/15] add CODEOWNERS --- .github/CODEOWNERS | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .github/CODEOWNERS diff --git a/.github/CODEOWNERS b/.github/CODEOWNERS new file mode 100644 index 0000000..cecdcca --- /dev/null +++ b/.github/CODEOWNERS @@ -0,0 +1,4 @@ +# see documentation +# https://docs.github.com/en/repositories/managing-your-repositorys-settings-and-features/customizing-your-repository/about-code-owners + +* @fmigneault @huard @dchandan