Skip to content

Commit

Permalink
Tools for dataset management
Browse files Browse the repository at this point in the history
  • Loading branch information
avalentino committed Oct 20, 2024
1 parent 7b0352e commit 2362237
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 1 deletion.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -133,3 +133,6 @@ dmypy.json

# VSCode
/.vscode

# project
docs/notebooks/data
11 changes: 10 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ PYTHON=python3
SPHINX_APIDOC=sphinx-apidoc
TARGET=s1etad

.PHONY: default help dist lint api docs clean cleaner distclean
.PHONY: default help dist lint data api docs clean cleaner distclean

default: help

Expand All @@ -31,13 +31,18 @@ lint:
$(PYTHON) -m black --check $(TARGET) tests
# $(PYTHON) -m mypy $(TARGET) tests

data:
env PYTHONPATH=. \
$(PYTHON) -c "from tests.dataset import download_all; download_all()"

api:
$(RM) -r docs/api
$(SPHINX_APIDOC) --module-first --separate --no-toc -o docs/api \
--doc-project "$(TARGET) API" --templatedir docs/_templates/apidoc \
$(TARGET) $(TARGET)/tests

docs:
ln -s ../tests/data docs/notebooks/data
mkdir -p docs/_static
$(MAKE) -C docs html

Expand All @@ -53,6 +58,10 @@ cleaner: clean
$(RM) -r .pytest_cache .tox
$(RM) -r .mypy_cache .ruff_cache
$(RM) -r .ipynb_checkpoints
$(RM) docs/notebooks/data

distclean: cleaner
$(RM) -r dist
env PYTHONPATH=. \
$(PYTHON) -c "from tests.dataset import clean_cache; clean_cache()"
find . -name __pycache__ -type d -exec $(RM) -r {} +
Empty file added tests/__init__.py
Empty file.
2 changes: 2 additions & 0 deletions tests/data/registry.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
S1_SETAP_2.1.0_2023-03-10_ETAD_IW.tar.gz sha256:ca7042371f72081ff35e9b511fc0aee3155dcbe3383aaefb49b50d75a098e26c
S1_SETAP_2.1.0_2023-03-10_ETAD_SM.tar.gz sha256:50cc3080038aa438d03801513a5c6182ee17df2e0381fb7a2195687800941af9
40 changes: 40 additions & 0 deletions tests/dataset.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""Utility functiond for unittesting."""

import shutil
import pathlib

import pooch

BASEURL = "https://sentiwiki.copernicus.eu/__attachments/1673968"
# BASEURL = "file:///home/antonio/projects/esa/s1-etad/_local_test_data_repo"
DATADIR = pathlib.Path(__file__).parent / "data"
REGISTRY = DATADIR.joinpath("registry.txt")


datarepo = pooch.create(
path=DATADIR,
base_url=BASEURL,
# retry_if_failed=3,
# version="0.5"
registry=None,
env="S1ETAD_TEST_DATA_DIR",
)
datarepo.load_registry(REGISTRY)


def download_all(datarepo: pooch.Pooch = datarepo):
for item in datarepo.registry_files:
datarepo.fetch(
item,
processor=pooch.Untar(extract_dir=""),
progressbar=True,
)


def clean_cache(datarepo: pooch.Pooch = datarepo):
for item in datarepo.path.iterdir():
if item.match("S1?_??_ETA_*.SAFE"):
shutil.rmtree(item)

for item in datarepo.registry_files:
datarepo.path.joinpath(item).unlink(missing_ok=True)

0 comments on commit 2362237

Please sign in to comment.