diff --git a/.github/workflows/ci_pip.yml b/.github/workflows/ci_pip.yml index eaf07fb55..f4e41bc4f 100644 --- a/.github/workflows/ci_pip.yml +++ b/.github/workflows/ci_pip.yml @@ -35,6 +35,17 @@ jobs: - name: Install some testing dependencies (hard-coded) run: python -m pip install pytest devtools jsonschema requests wget + - name: Cache Zenodo data + id: cache-zenodo-data + uses: actions/cache@v3 + with: + path: tests/data/ + key: zenodo-data + + - name: Download Zenodo data + if: steps.cache-zenodo-data.outputs.cache-hit != 'true' + run: bash tests/data/download_zenodo_data.sh + - name: Test core library with pytest run: pytest tests --ignore tests/tasks @@ -65,5 +76,16 @@ jobs: - name: Install some testing dependencies (hard-coded) run: python -m pip install pytest devtools jsonschema requests wget + - name: Cache Zenodo data + id: cache-zenodo-data + uses: actions/cache@v3 + with: + path: tests/data/ + key: zenodo-data + + - name: Download Zenodo data + if: steps.cache-zenodo-data.outputs.cache-hit != 'true' + run: bash tests/data/download_zenodo_data.sh + - name: Test tasks with pytest run: pytest tests tests/tasks diff --git a/.github/workflows/ci_poetry.yml b/.github/workflows/ci_poetry.yml index cf32e7850..4ae2f78d0 100644 --- a/.github/workflows/ci_poetry.yml +++ b/.github/workflows/ci_poetry.yml @@ -42,6 +42,17 @@ jobs: - name: Install dependencies (without extras) run: poetry install --with dev --without docs --no-interaction + - name: Cache Zenodo data + id: cache-zenodo-data + uses: actions/cache@v3 + with: + path: tests/data/ + key: zenodo-data + + - name: Download Zenodo data + if: steps.cache-zenodo-data.outputs.cache-hit != 'true' + run: bash tests/data/download_zenodo_data.sh + - name: Test core library with pytest run: poetry run coverage run -m pytest tests --ignore tests/tasks @@ -88,6 +99,17 @@ jobs: - name: Check manifest task metadata run: poetry run python fractal_tasks_core/dev/check_manifest.py + - name: Cache Zenodo data + id: cache-zenodo-data + uses: actions/cache@v3 + with: + path: tests/data/ + key: zenodo-data + + - name: Download Zenodo data + if: steps.cache-zenodo-data.outputs.cache-hit != 'true' + run: bash tests/data/download_zenodo_data.sh + - name: Test tasks with pytest run: poetry run coverage run -m pytest tests/tasks diff --git a/CHANGELOG.md b/CHANGELOG.md index c4810eeaa..d6946bb08 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ **Note**: Numbers like (\#123) point to closed Pull Requests on the fractal-tasks-core repository. +# Unreleased + +* Testing: + * Cache Zenodo data, within GitHub actions (\#585). + # 0.13.0 * Tasks: diff --git a/tests/conftest.py b/tests/conftest.py index 931acc172..f2a0eeac6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,13 +1,19 @@ import json +import logging import os import shutil import time from pathlib import Path -from urllib.parse import unquote +import anndata as ad import pytest import requests # type: ignore import wget +import zarr +from devtools import debug + +from fractal_tasks_core.lib_regions_of_interest import reset_origin +from fractal_tasks_core.lib_write import write_table @pytest.fixture(scope="session") @@ -16,54 +22,44 @@ def testdata_path() -> Path: return TEST_DIR / "data/" -@pytest.fixture(scope="function") -def zenodo_images(testdata_path, capsys): +@pytest.fixture(scope="session") +def zenodo_images(testdata_path): """ Inspired by https://github.com/dvolgyes/zenodo_get/blob/master/zenodo_get/zget.py - - See https://docs.pytest.org/en/7.4.x/how-to/capture-stdout-stderr.html for - the use of capsys """ - t_start = time.perf_counter() - url = "10.5281/zenodo.7059515" - folder = str(testdata_path / (url.replace(".", "_").replace("/", "_"))) + # Download images and metadata files + recordID = "7059515" + url = "10_5281_zenodo_7059515" + folder = str(testdata_path / f"10_5281_zenodo_{recordID}") if os.path.isdir(folder): - print(f"{folder} already exists, skip") - return folder - os.makedirs(folder) - url = "https://doi.org/" + url - print(f"I will download {url} files to {folder}") - - r = requests.get(url) - recordID = r.url.split("/")[-1] - url = "https://zenodo.org/api/records/" - r = requests.get(url + recordID) - - js = json.loads(r.text) - files = js["files"] - for f in files: - fname = f["filename"] - link = f"https://zenodo.org/record/{recordID}/files/{fname}" - print(link) - link = unquote(link) - wget.download(link, out=folder) - print() + print(f"{folder} already exists, skip download") + else: + os.makedirs(folder) + url = f"https://zenodo.org/api/records/{recordID}" + r = requests.get(url) + js = json.loads(r.text) + files = js["files"] + for f in files: + file_url = f["links"]["download"] + file_name = file_url.split("/")[-2] + wget.download(file_url, out=f"{folder}/{file_name}", bar=False) # Add an image with invalid name, that should be skipped during parsing with open(f"{folder}/invalid_path.png", "w") as f: f.write("This file has an invalid filename, which cannot be parsed.") t_end = time.perf_counter() - with capsys.disabled(): - print(f"\n Time spent in zenodo_images: {t_end-t_start:.2f} s") + logging.warning( + f"\n Time spent in zenodo_images: {t_end-t_start:.2f} s" + ) return folder -@pytest.fixture(scope="function") +@pytest.fixture(scope="session") def zenodo_images_multiplex(testdata_path, zenodo_images): folder = str(testdata_path / "fake_multiplex") cycle_folder_1 = str(Path(folder) / "cycle1") @@ -78,70 +74,65 @@ def zenodo_images_multiplex(testdata_path, zenodo_images): return cycle_folders -@pytest.fixture(scope="function") -def zenodo_zarr(testdata_path, tmpdir_factory, capsys): - """ - See https://docs.pytest.org/en/7.4.x/how-to/capture-stdout-stderr.html for - the use of capsys - """ +@pytest.fixture(scope="session") +def zenodo_zarr(testdata_path, tmpdir_factory): t_start = time.perf_counter() doi = "10.5281/zenodo.8091756" rootfolder = testdata_path / (doi.replace(".", "_").replace("/", "_")) platenames = ["plate.zarr", "plate_mip.zarr"] folders = [rootfolder / plate for plate in platenames] + zarrnames = [ + "20200812-CardiomyocyteDifferentiation14-Cycle1.zarr", + "20200812-CardiomyocyteDifferentiation14-Cycle1_mip.zarr", + ] + # Download dataset if rootfolder.exists(): - print(f"{str(rootfolder)} already exists, skip") - folders = [str(f) for f in folders] - return folders + print(f"{str(rootfolder)} already exists, skip download part") else: - - import zarr - import anndata as ad - import logging - - from fractal_tasks_core.lib_regions_of_interest import reset_origin - from fractal_tasks_core.lib_write import write_table - rootfolder.mkdir() tmp_path = tmpdir_factory.mktemp("zenodo_zarr") - zarrnames = [ - "20200812-CardiomyocyteDifferentiation14-Cycle1.zarr", - "20200812-CardiomyocyteDifferentiation14-Cycle1_mip.zarr", - ] - for zarrname, folder in zip(zarrnames, folders): + for zarrname in zarrnames: zipname = f"{zarrname}.zip" url = f"https://zenodo.org/record/8091756/files/{zipname}" + debug(url) wget.download(url, out=str(tmp_path / zipname), bar=None) + time.sleep(0.5) shutil.unpack_archive( - str(tmp_path / zipname), extract_dir=rootfolder, format="zip" + str(tmp_path / zipname), + extract_dir=rootfolder, + format="zip", + ) + + # Based on the Zenodo OME-Zarrs, create the appropriate OME-Zarrs to be + # used in tests + for zarrname, folder in zip(zarrnames, folders): + if os.path.isdir(str(folder)): + shutil.rmtree(str(folder)) + shutil.copytree(str(rootfolder / zarrname), str(folder)) + + # Update well/FOV ROI tables, by shifting their origin to 0 + # TODO: remove this fix, by uploading new zarrs to zenodo (ref + # issue 526) + image_group_path = folder / "B/03/0" + group_image = zarr.open_group(str(image_group_path)) + for table_name in ["FOV_ROI_table", "well_ROI_table"]: + table_path = str(image_group_path / "tables" / table_name) + old_table = ad.read_zarr(table_path) + new_table = reset_origin(old_table) + write_table( + group_image, + table_name, + new_table, + overwrite=True, + logger=logging.getLogger(), ) - shutil.move(str(rootfolder / zarrname), str(folder)) - - # Update well/FOV ROI tables, by shifting their origin to 0 - # TODO: remove this fix, by uploading new zarrs to zenodo (ref - # issue 526) - image_group_path = folder / "B/03/0" - group_image = zarr.open_group(str(image_group_path)) - for table_name in ["FOV_ROI_table", "well_ROI_table"]: - table_path = str(image_group_path / "tables" / table_name) - old_table = ad.read_zarr(table_path) - new_table = reset_origin(old_table) - write_table( - group_image, - table_name, - new_table, - overwrite=True, - logger=logging.getLogger(), - ) folders = [str(f) for f in folders] t_end = time.perf_counter() - with capsys.disabled(): - print(f"\n Time spent in zenodo_zarr: {t_end-t_start:.2f} s") - + logging.warning(f"\n Time spent in zenodo_zarr: {t_end-t_start:.2f} s") return folders diff --git a/tests/data/download_zenodo_data.sh b/tests/data/download_zenodo_data.sh new file mode 100644 index 000000000..c6e868c34 --- /dev/null +++ b/tests/data/download_zenodo_data.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +LIST_RECORD_ID="7059515 8091756" +LIST_RECORD_ID="7059515 8091756" + +for RECORD_ID in $LIST_RECORD_ID; do + echo "****************************" + echo "START RECORD_ID=$RECORD_ID" + OUTPUT_FOLDER=tests/data/10_5281_zenodo_$RECORD_ID + echo "OUTPUT_FOLDER: $OUTPUT_FOLDER" + + if [ -d $OUTPUT_FOLDER ]; then + echo "OUTPUT_FOLDER already exists. Exit." + else + mkdir $OUTPUT_FOLDER + FILES=`curl https://zenodo.org/api/records/$RECORD_ID | jq -r ".files[].links.download"` + echo "curl exit code: $?" + echo + for FILE in $FILES; do + FILEPATH=${FILE%"/content"} + FILENAME=`basename $FILEPATH` + echo "FILE: $FILE" + echo "FILEPATH: $FILEPATH" + echo "FILENAME: $FILENAME" + echo + wget --no-verbose $FILE --output-document=${OUTPUT_FOLDER}/${FILENAME} + echo + done + + if [ $RECORD_ID == "8091756" ]; then + unzip tests/data/10_5281_zenodo_8091756/20200812-CardiomyocyteDifferentiation14-Cycle1.zarr.zip -d tests/data/10_5281_zenodo_8091756 + unzip tests/data/10_5281_zenodo_8091756/20200812-CardiomyocyteDifferentiation14-Cycle1_mip.zarr.zip -d tests/data/10_5281_zenodo_8091756 + fi + fi + + echo "END RECORD_ID=$RECORD_ID" + echo "****************************" + echo +done