Skip to content

Commit

Permalink
PR 44 fixes
Browse files Browse the repository at this point in the history
  • Loading branch information
dchandan committed Jan 19, 2024
1 parent d07d5e6 commit e58e5d2
Show file tree
Hide file tree
Showing 3 changed files with 18 additions and 7 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def runner(ns: argparse.Namespace) -> Optional[int] | NoReturn:

with Session() as session:
apply_request_options(session, ns)
for collection_path, _, collection_json in STACDirectoryLoader(ns.directory, "collection", ns.prune):
for _, collection_path, collection_json in STACDirectoryLoader(ns.directory, "collection", ns.prune):
collection_dir = os.path.dirname(collection_path)
loader = STACDirectoryLoader(collection_dir, "item", prune=ns.prune)
populator = DirectoryPopulator(ns.stac_host, loader, ns.update, collection_json, session=session)
Expand Down
18 changes: 14 additions & 4 deletions STACpopulator/input.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,11 @@ def reset(self):
self.catalog_head = self.catalog

def __iter__(self) -> Iterator[Tuple[str, str, MutableMapping[str, Any]]]:
"""Return a generator walking a THREDDS data catalog for datasets."""
"""Return a generator walking a THREDDS data catalog for datasets.
:yield: Returns three quantities: name of the item, location of the item, and its attributes
:rtype: Iterator[Tuple[str, str, MutableMapping[str, Any]]]
"""

if self._depth > self._max_depth:
return
Expand Down Expand Up @@ -198,7 +202,13 @@ def __init__(self, path: str, mode: Literal["collection", "item"], prune: bool =
self._collection_mode = mode == "collection"
self._collection_name = "collection.json"

def __iter__(self) -> Iterator[Tuple[str, MutableMapping[str, Any]]]:
def __iter__(self) -> Iterator[Tuple[str, str, MutableMapping[str, Any]]]:
"""Return a generator that walks through a directory structure looking for sTAC Collections or Items.
:yield: Returns three quantities: name of the item, location of the item, and its attributes
:rtype: Iterator[Tuple[str, str, MutableMapping[str, Any]]]
"""

is_root = True
for root, dirs, files in self.iter:
# since there can ever be only one 'collection' file name in a same directory
Expand All @@ -207,7 +217,7 @@ def __iter__(self) -> Iterator[Tuple[str, MutableMapping[str, Any]]]:
if self.prune: # stop recursive search if requested
del dirs[:]
col_path = os.path.join(root, self._collection_name)
yield col_path, "", self._load_json(col_path)
yield self._collection_name, col_path, self._load_json(col_path)
# if a collection is found deeper when not expected for items parsing
# drop the nested directories to avoid over-crawling nested collections
elif not self._collection_mode and not is_root and self._collection_name in files:
Expand All @@ -217,7 +227,7 @@ def __iter__(self) -> Iterator[Tuple[str, MutableMapping[str, Any]]]:
for name in files:
if not self._collection_mode and self._is_item(name):
item_path = os.path.join(root, name)
yield item_path, "", self._load_json(item_path)
yield self._collection_name, item_path, self._load_json(item_path)

def _is_item(self, path: Union[os.PathLike[str], str]) -> bool:
name = os.path.split(path)[-1]
Expand Down
5 changes: 3 additions & 2 deletions STACpopulator/populator_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,8 +147,7 @@ def ingest(self) -> None:
LOGGER.info("Data ingestion")
for item_name, item_loc, item_data in self._ingest_pipeline:
LOGGER.info(f"New data item: {item_name}")
if item_loc:
LOGGER.info(f"Data location: {item_loc}")
LOGGER.info(f"Data location: {item_loc}")
stac_item = self.create_stac_item(item_name, item_data)
if stac_item:
post_stac_item(
Expand All @@ -161,3 +160,5 @@ def ingest(self) -> None:
)
counter += 1
LOGGER.info(f"Processed {counter} data items")
else:
LOGGER.error("Failed to create STAC representation")

0 comments on commit e58e5d2

Please sign in to comment.