diff --git a/CHANGES.md b/CHANGES.md index 2ef1bb0..b849bd3 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -8,6 +8,10 @@ - Don't crash if the local `dataset_description.json` file is empty when trying to resume an aborted download. +- We now by default exclude certain files from the download that are known to + be invalid for specific datasets. Once the datasets have been fixed on + OpenNeuro, we will revert these exclusions. + ## 2021.8 - Retry downloads if a `ReadError` has occurred. diff --git a/MANIFEST.in b/MANIFEST.in index c1a7121..c37f75f 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,3 @@ include LICENSE include README.md +include openneuro/default_excludes.json diff --git a/openneuro/default_excludes.json b/openneuro/default_excludes.json new file mode 100644 index 0000000..264350d --- /dev/null +++ b/openneuro/default_excludes.json @@ -0,0 +1,22 @@ +[ + { + "datasetName": "ds003104", + "excludeFiles": [ + "derivatives/freesurfer/subjects/01/mri/aparc+aseg.mgz", + "derivatives/freesurfer/subjects/01/mri/aparc.DKTatlas+aseg.mgz", + "derivatives/freesurfer/subjects/01/mri/aparc.a2009s+aseg.mgz" + ] + }, + { + "datasetName": "ds000248", + "excludeFiles": [ + "derivatives/freesurfer/subjects/fsaverage/mri/aparc.a2005s+aseg.mgz", + "derivatives/freesurfer/subjects/fsaverage/mri/aparc+aseg.mgz", + "derivatives/freesurfer/subjects/fsaverage/mri/aparc.a2009s+aseg.mgz", + "derivatives/freesurfer/subjects/fsaverage/xhemi/mri/aparc+aseg.mgz", + "derivatives/freesurfer/subjects/sub-01/mri/aparc+aseg.mgz", + "derivatives/freesurfer/subjects/sub-01/mri/aparc.DKTatlas+aseg.mgz", + "derivatives/freesurfer/subjects/sub-01/mri/aparc.a2009s+aseg.mgz" + ] + } +] diff --git a/openneuro/download.py b/openneuro/download.py index 04b9d21..d71d621 100644 --- a/openneuro/download.py +++ b/openneuro/download.py @@ -29,6 +29,11 @@ from . import __version__ from .config import default_base_url +DEFAULT_EXCLUDES = json.loads( + (Path(__file__).parent / 'default_excludes.json') + .read_text(encoding='utf-8') +) + if sys.stdout.encoding.lower() == 'utf-8': stdout_unicode = True @@ -573,6 +578,18 @@ def download(*, exclude = [exclude] if isinstance(exclude, str) else exclude exclude = [] if exclude is None else list(exclude) + for dataset_default_excludes in DEFAULT_EXCLUDES: + if dataset_default_excludes['datasetName'] == dataset: + msg = f'Adding default excludes for dataset {dataset}: \n ' + msg += '\n '.join(dataset_default_excludes['excludeFiles']) + if stdout_unicode: + msg = f'🤕 {msg}' + tqdm.write(msg) + exclude = list( + set(exclude + dataset_default_excludes['excludeFiles']) + ) + break + retry_backoff = 0.5 # seconds metadata = _get_download_metadata(base_url=default_base_url, dataset_id=dataset, diff --git a/openneuro/tests/test_download.py b/openneuro/tests/test_download.py index 79a3ddd..63ac885 100644 --- a/openneuro/tests/test_download.py +++ b/openneuro/tests/test_download.py @@ -77,3 +77,13 @@ def test_resume_download(tmp_path: Path): include = ['sub-0001/meg/sub-0001_coordsystem.json'] download(dataset=dataset, tag=tag, target_dir=tmp_path, include=include) + + +def test_ds000248(tmp_path: Path): + """Test a dataset for that we ship default excludes.""" + dataset = 'ds000248' + download( + dataset=dataset, + include=['participants.tsv'], + target_dir=tmp_path + ) diff --git a/setup.cfg b/setup.cfg index 13735d5..ecf5588 100644 --- a/setup.cfg +++ b/setup.cfg @@ -31,6 +31,7 @@ install_requires = sgqlc importlib-metadata; python_version < "3.8" typing-extensions; python_version < "3.8" +include_package_data = True [options.entry_points] console_scripts =