From 4d9704d6d24376ab7d97255b01e6d18a7db3564c Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Thu, 1 Feb 2024 13:20:23 -0500 Subject: [PATCH] Improve documentation (#290) * Improve documentation. * More improvements. * Add glossary. * Keep working on docs. * Add API page. * More work. * Run isort. * Fix more stuff. * Fix module name. * Update conf.py * Reorg docs a bit. * Fix import. * Try this. * Revert attempted link. * Update example.rst * Address review. * Fix paths. --- README.rst | 24 ++- cubids/__about__.py | 17 ++ cubids/__init__.py | 29 ++- cubids/tests/test_bond.py | 2 +- cubids/workflows.py | 2 +- docs/README.rst | 31 --- docs/about.rst | 101 ++++----- docs/api.rst | 74 +++++++ docs/cli.rst | 14 ++ docs/conf.py | 120 +++++++++-- docs/example.rst | 390 ++++++++++++++++++---------------- docs/glossary.rst | 40 ++++ docs/index.rst | 7 +- docs/installation.rst | 53 +++-- docs/links.rst | 1 + docs/sphinxext/github_link.py | 90 ++++++++ docs/usage.rst | 280 +++++++++++++----------- pyproject.toml | 3 + 18 files changed, 815 insertions(+), 463 deletions(-) create mode 100644 cubids/__about__.py delete mode 100644 docs/README.rst create mode 100644 docs/api.rst create mode 100644 docs/cli.rst create mode 100644 docs/glossary.rst create mode 100644 docs/links.rst create mode 100644 docs/sphinxext/github_link.py diff --git a/README.rst b/README.rst index db0440b24..7fd6c6ec5 100644 --- a/README.rst +++ b/README.rst @@ -2,34 +2,36 @@ CuBIDS: Curation of BIDS ======================== - .. image:: https://img.shields.io/pypi/v/cubids.svg - :target: https://pypi.python.org/pypi/cubids + :target: https://pypi.python.org/pypi/cubids .. image:: https://circleci.com/gh/PennLINC/CuBIDS.svg?style=svg - :target: https://circleci.com/gh/PennLINC/CuBIDS + :target: https://circleci.com/gh/PennLINC/CuBIDS .. image:: https://readthedocs.org/projects/cubids/badge/?version=latest - :target: https://cubids.readthedocs.io/en/latest/?badge=latest - :alt: Documentation Status + :target: https://cubids.readthedocs.io/en/latest/?badge=latest + :alt: Documentation Status + About ----- -Curation of BIDS, or ``CuBIDS``, is a workflow and software package designed to facilitate +``CuBIDS`` (Curation of BIDS) is a workflow and software package designed to facilitate reproducible curation of neuroimaging `BIDS `_ datasets. CuBIDS breaks down BIDS dataset curation into four main components and addresses each one using various command line programs complete with version control capabilities. These components are not necessarily linear but all are critical in the process of preparing BIDS data for successful preprocessing and analysis pipeline runs. - 1. CuBIDS facilitates the validation of BIDS data. - 2. CuBIDS visualizes and summarizes the heterogeneity in a BIDS dataset. - 3. CuBIDS helps users test pipelines on the entire parameter space of a BIDS dataset. - 4. CuBIDS allows users to perform metadata-based quality control on their BIDS data. + 1. CuBIDS facilitates the validation of BIDS data. + 2. CuBIDS visualizes and summarizes the heterogeneity in a BIDS dataset. + 3. CuBIDS helps users test pipelines on the entire parameter space of a BIDS dataset. + 4. CuBIDS allows users to perform metadata-based quality control on their BIDS data. + 5. CuBIDS helps users clean protected information in BIDS datasets, + in order to prepare them for public sharing. .. image:: https://github.com/PennLINC/CuBIDS/raw/main/docs/_static/cubids_workflow.png :width: 600 For full documentation, please visit our -`ReadTheDocs `_ \ No newline at end of file +`ReadTheDocs `_. diff --git a/cubids/__about__.py b/cubids/__about__.py new file mode 100644 index 000000000..d4341aebe --- /dev/null +++ b/cubids/__about__.py @@ -0,0 +1,17 @@ +# emacs: -*- mode: python; py-indent-offset: 4; indent-tabs-mode: nil -*- +# vi: set ft=python sts=4 ts=4 sw=4 et: +"""Base module variables.""" +try: + from cubids._version import __version__ +except ImportError: + __version__ = "0+unknown" + +__packagename__ = "CuBIDS" +__copyright__ = "Copyright 2023, The CuBIDS Developers" +__credits__ = ( + "Contributors: please check the ``.zenodo.json`` file at the top-level folder " + "of the repository." +) +__url__ = "https://github.com/PennLINC/CuBIDS" + +DOWNLOAD_URL = f"https://github.com/PennLINC/{__packagename__}/archive/{__version__}.tar.gz" diff --git a/cubids/__init__.py b/cubids/__init__.py index dd11a6c3c..ff9b1d3d5 100644 --- a/cubids/__init__.py +++ b/cubids/__init__.py @@ -1,11 +1,28 @@ """Top-level package for CuBIDS.""" -__author__ = """PennLINC""" -__email__ = "PennLINC@gmail.com" -__version__ = "0.1.0" - -from cubids.cubids import CuBIDS +from cubids import ( + cli, + config, + constants, + cubids, + metadata_merge, + utils, + validator, + workflows, +) +from cubids.__about__ import __copyright__, __credits__, __packagename__, __version__ __all__ = [ - "CuBIDS", + "__copyright__", + "__credits__", + "__packagename__", + "__version__", + "cli", + "config", + "constants", + "cubids", + "metadata_merge", + "utils", + "validator", + "workflows", ] diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index 15bfc35f0..867f22cd2 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -10,7 +10,7 @@ import pandas as pd import pytest -from cubids import CuBIDS +from cubids.cubids import CuBIDS from cubids.metadata_merge import merge_json_into_json, merge_without_overwrite from cubids.tests.utils import ( _add_deletion, diff --git a/cubids/workflows.py b/cubids/workflows.py index 7984216b6..307be51bf 100644 --- a/cubids/workflows.py +++ b/cubids/workflows.py @@ -13,7 +13,7 @@ import pandas as pd import tqdm -from cubids import CuBIDS +from cubids.cubids import CuBIDS from cubids.metadata_merge import merge_json_into_json from cubids.utils import _get_container_type from cubids.validator import ( diff --git a/docs/README.rst b/docs/README.rst deleted file mode 100644 index 2517f8a0f..000000000 --- a/docs/README.rst +++ /dev/null @@ -1,31 +0,0 @@ -======================== -CuBIDS: Curation of BIDS -======================== - - -.. image:: https://img.shields.io/pypi/v/cubids.svg - :target: https://pypi.python.org/pypi/cubids - -.. image:: https://circleci.com/gh/PennLINC/CuBIDS.svg?style=svg - :target: https://circleci.com/gh/PennLINC/CuBIDS - -.. image:: https://readthedocs.org/projects/cubids/badge/?version=latest - :target: https://cubids.readthedocs.io/en/latest/?badge=latest - :alt: Documentation Status - -About ------ - -Curation of BIDS, or ``CuBIDS``, is a workflow and software package designed to facilitate -reproducible curation of neuroimaging `BIDS `_ datasets. -CuBIDS breaks down BIDS dataset curation into four main components and addresses each one using -various command line programs complete with version control capabilities. These components are not necessarily linear but all are critical -in the process of preparing BIDS data for successful preprocessing and analysis pipeline runs. - - 1. CuBIDS facilitates the validation of BIDS data. - 2. CuBIDS visualizes and summarizes the heterogeneity in a BIDS dataset. - 3. CuBIDS helps users test pipelines on the entire parameter space of a BIDS dataset. - 4. CuBIDS allows users to perform metadata-based quality control on their BIDS data. - -.. image:: _static/cubids_workflow.png - :width: 600 \ No newline at end of file diff --git a/docs/about.rst b/docs/about.rst index 6dee284b0..80e55f950 100644 --- a/docs/about.rst +++ b/docs/about.rst @@ -1,85 +1,72 @@ -=================== +========== Background -=================== +========== Motivation -------------- +---------- The Brain Imaging Data Structure (BIDS) is a simple and intuitive way to -organize and describe MRI data [#f1]_. Because of its ease of use, a wide array of -preprocessing and analysis tools and pipelines have been developed specifically -to operate on data curated in BIDS [#f2]_. These tools are able to automatically -self-configure to the user's BIDS dataset, which saves time and effort on the -part of the user. However, as datasets increase in size and complexity, it -can be dangerous to blindly run these pipelines without a careful understanding of -what's really in your BIDS data. Having knowledge of this potential **heterogeneity** -ahead of time gives researchers the ability to **predict pipeline configurations**, -**predict potential errors**, avoid running **unwanted or unusable data**, and **budget -their computational time and resources** effectively. - -``CuBIDS`` is designed to facilitate the curation of large, neuroimaging data so -that users can infer useful information from descriptive and accurate BIDS labels -before running pipelines *en masse*. ``CuBIDS`` accomplishes this by summarizing -BIDS data using :ref:`keygroup`, :ref:`paramgroup`, and :ref:`acquisitiongroup` categorizations in your data (we'll explain what these -are in more detail in the next section). +organize and describe MRI data [#f1]_. +Because of its ease of use, a wide array of preprocessing and analysis tools and +pipelines have been developed specifically to operate on data curated in BIDS [#f2]_. +These tools are able to automatically self-configure to the user's BIDS dataset, +which saves time and effort on the part of the user. + +However, as datasets increase in size and complexity, +it can be dangerous to blindly run these pipelines without a careful understanding of +what's really in your BIDS data. +Having knowledge of this potential **heterogeneity** ahead of time gives researchers +the ability to **predict pipeline configurations**, **predict potential errors**, +avoid running **unwanted or unusable data**, +and **budget their computational time and resources** effectively. + +``CuBIDS`` is designed to facilitate the curation of large, +neuroimaging datasets so that users can infer useful information from descriptive and +accurate BIDS labels before running pipelines *en masse*. +``CuBIDS`` accomplishes this by summarizing BIDS data using :ref:`keygroup`, +:ref:`paramgroup`, and :ref:`acquisitiongroup` categorizations in your data +(we'll explain what these are in more detail in the next section). The image below demonstrates the ``CuBIDS`` workflow that we'll discuss on the next page. .. image:: _static/cubids_workflow.png :width: 600 -``CuBIDS`` also incorporates ``DataLad`` as an optional dependency for maintaining data provenance, enhancing -reproducibility, and supporting collaboration [#f3]_. +``CuBIDS`` also incorporates ``DataLad`` as an optional dependency for maintaining data provenance, +enhancing reproducibility, and supporting collaboration [#f3]_. -Definitions ------------- +What CuBIDS Is Not +------------------ +``CuBIDS`` is not designed to convert raw data into BIDS format. +For that, we recommend using `conversion tools `_. +``CuBIDS`` then takes over once you have a valid BIDS dataset, +prior to running any preprocessing or analysis pipelines, or to sharing the dataset. -.. topic:: Key Group +.. note:: - * A set of scans whose filenames share all `BIDS filename key-value pairs `_, excluding subject and session - * Derived from the BIDS Filename - * Example structure: ``acquisition-*_datatype-*_run-*_task-*_suffix`` + CuBIDS _should_ work on BIDS-ish (not quite BIDS compliant, but in a similar format) datasets, + but this is by no means guaranteed. -.. topic:: Parameter (Param) Group - - * The set of scans with identical metadata parameters in their sidecars - * Defined within a Key Group - * Numerically identified (each Key Group will have n Param Groups, where n is the number of unique sets of scanning parameters present in that Key Group. e.g. 1, 2, etc.) - -.. topic:: Dominant Group - - * The Param Group that contains the most scans in its Key Group - -.. topic:: Variant Group - - * Any Param Group that is non-dominant - -.. topic:: Rename Key Group - - * Auto-generated, recommended new Key Group name for Variant Groups - * Based on the metadata parameters that cause scans in Variant Groups to vary from those in their respective Dominant Groups - -.. topic:: Acquisition Group - - * A collection of sessions across participants that contains the exact same set of Key and Param Groups Examples """""""" Dominant Group resting state BOLD: - * Example Filename: ``sub-01_ses-A_task-rest_acq-singleband_bold.nii.gz`` - * Key Group: ``acquisition-singleband_datatype-func_suffix-bold_task-rest`` - * Param Group: ``1`` (Dominaint Group) + + * Example Filename: ``sub-01_ses-A_task-rest_acq-singleband_bold.nii.gz`` + * Key Group: ``acquisition-singleband_datatype-func_suffix-bold_task-rest`` + * Param Group: ``1`` (Dominant Group) Variant Group resting state BOLD (all scans in this Param Group are missing a fieldmap) - * Example Filename: ``sub-02_ses-A_task-rest_acq-singleband_bold.nii.gz`` - * Key Group: ``acquisition-singleband_datatype-func_suffix-bold_task-rest`` - * Param Group: ``2`` (Variant Group) - * Rename Key Group: ``acquisition-singlebandVARIANTNoFmap_datatype-func_suffix-bold_task-rest`` -In the next section, we'll discuss these definitions in more detail and demonstrate ``CuBIDS`` usage. + * Example Filename: ``sub-02_ses-A_task-rest_acq-singleband_bold.nii.gz`` + * Key Group: ``acquisition-singleband_datatype-func_suffix-bold_task-rest`` + * Param Group: ``2`` (Variant Group) + * Rename Key Group: ``acquisition-singlebandVARIANTNoFmap_datatype-func_suffix-bold_task-rest`` + +These definitions are described in more detail in :doc:`glossary` and :doc:`usage`. .. rubric:: Footnotes diff --git a/docs/api.rst b/docs/api.rst new file mode 100644 index 000000000..097c81d5f --- /dev/null +++ b/docs/api.rst @@ -0,0 +1,74 @@ +.. include:: links.rst + +=== +API +=== + +********************************* +:mod:`cubids.cubids`: Main Module +********************************* + +.. currentmodule:: cubids + +.. autosummary:: + :toctree: generated/ + :template: class.rst + + cubids.cubids.CuBIDS + + +******************************************* +:mod:`cubids.workflows`: Workflow Functions +******************************************* + +.. currentmodule:: cubids + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + cubids.workflows.validate + cubids.workflows.bids_sidecar_merge + cubids.workflows.group + cubids.workflows.apply + cubids.workflows.datalad_save + cubids.workflows.undo + cubids.workflows.copy_exemplars + cubids.workflows.add_nifti_info + cubids.workflows.purge + cubids.workflows.remove_metadata_fields + cubids.workflows.print_metadata_fields + + +********************************************** +:mod:`cubids.metadata_merge`: Merging Metadata +********************************************** + +.. currentmodule:: cubids + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + cubids.metadata_merge.check_merging_operations + cubids.metadata_merge.merge_without_overwrite + cubids.metadata_merge.merge_json_into_json + cubids.metadata_merge.get_acq_dictionary + cubids.metadata_merge.group_by_acquisition_sets + + +*********************************** +:mod:`cubids.validator`: Validation +*********************************** + +.. currentmodule:: cubids + +.. autosummary:: + :toctree: generated/ + :template: function.rst + + cubids.validator.build_validator_call + cubids.validator.build_subject_paths + cubids.validator.run_validator + cubids.validator.parse_validator_output + cubids.validator.get_val_dictionary diff --git a/docs/cli.rst b/docs/cli.rst new file mode 100644 index 000000000..7a1f9db18 --- /dev/null +++ b/docs/cli.rst @@ -0,0 +1,14 @@ +====================== +Command Line Interface +====================== + +.. code-block:: bash + + cubids --help + +This will print the instructions for using the command line interface in your command line. + +.. argparse:: + :ref: cubids.cli._get_parser + :prog: cubids + :func: _get_parser diff --git a/docs/conf.py b/docs/conf.py index 6b1bae97c..ae90cf8ce 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -23,6 +23,10 @@ import cubids +sys.path.insert(0, os.path.abspath("sphinxext")) + +from github_link import make_linkcode_resolve + # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. @@ -39,15 +43,19 @@ # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ + "hoverxref.extension", "nbsphinx", "sphinx.ext.autodoc", + "sphinx.ext.autosummary", # standard "sphinx.ext.doctest", - "sphinx.ext.intersphinx", - "sphinx.ext.coverage", - "sphinx.ext.mathjax", - "sphinxarg.ext", # argparse extension - "sphinx.ext.viewcode", + "sphinx.ext.intersphinx", # links code to other packages + "sphinx.ext.linkcode", # links to code from api + "sphinx.ext.mathjax", # include formulae in html + "sphinx.ext.napoleon", # alternative to numpydoc + "sphinx_copybutton", # for copying code snippets "sphinx_gallery.load_style", + "sphinxarg.ext", # argparse extension + "sphinxcontrib.bibtex", # bibtex-based bibliographies ] # Mock modules in autodoc: @@ -96,14 +104,34 @@ exclude_patterns = ["_build", "Thumbs.db", ".DS_Store"] # The name of the Pygments (syntax highlighting) style to use. -pygments_style = "sphinx" +pygments_style = "default" # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False - -# -- Options for HTML output ------------------------------------------- - +# ----------------------------------------------------------------------------- +# Napoleon settings +# ----------------------------------------------------------------------------- +napoleon_google_docstring = False +napoleon_numpy_docstring = True +napoleon_custom_sections = ["License"] +napoleon_include_init_with_doc = True +napoleon_include_private_with_doc = False +napoleon_include_special_with_doc = False +napoleon_use_admonition_for_examples = True +napoleon_use_admonition_for_notes = True +napoleon_use_admonition_for_references = True +napoleon_use_ivar = True +napoleon_use_param = True +napoleon_use_keyword = True +napoleon_use_rtype = True +napoleon_preprocess_types = False +napoleon_type_aliases = None +napoleon_attr_annotations = True + +# ----------------------------------------------------------------------------- +# HTML output +# ----------------------------------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # @@ -123,15 +151,50 @@ # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] - -# -- Options for HTMLHelp output --------------------------------------- - +# ----------------------------------------------------------------------------- +# HTMLHelp output +# ----------------------------------------------------------------------------- # Output file base name for HTML help builder. htmlhelp_basename = "cubidsdoc" +# The following is used by sphinx.ext.linkcode to provide links to github +linkcode_resolve = make_linkcode_resolve( + "cubids", + "https://github.com/PennLINC/cubids/blob/{revision}/{package}/{path}#L{lineno}", +) + +# ----------------------------------------------------------------------------- +# intersphinx +# ----------------------------------------------------------------------------- +_python_version_str = f"{sys.version_info.major}.{sys.version_info.minor}" +_python_doc_base = "https://docs.python.org/" + _python_version_str +intersphinx_mapping = { + "python": (_python_doc_base, None), + "numpy": ("https://numpy.org/doc/stable/", (None, "./_intersphinx/numpy-objects.inv")), + "scipy": ( + "https://docs.scipy.org/doc/scipy/reference", + (None, "./_intersphinx/scipy-objects.inv"), + ), + "sklearn": ("https://scikit-learn.org/stable", (None, "./_intersphinx/sklearn-objects.inv")), + "matplotlib": ("https://matplotlib.org/", (None, "https://matplotlib.org/objects.inv")), + "pandas": ("https://pandas.pydata.org/pandas-docs/stable/", None), + "pybids": ("https://bids-standard.github.io/pybids/", None), + "nibabel": ("https://nipy.org/nibabel/", None), + "nilearn": ("http://nilearn.github.io/stable/", None), + "datalad": ("https://docs.datalad.org/en/stable/", None), +} -# -- Options for LaTeX output ------------------------------------------ - +# ----------------------------------------------------------------------------- +# sphinxcontrib-bibtex +# ----------------------------------------------------------------------------- +bibtex_bibfiles = ["../cubids/data/references.bib"] +bibtex_style = "unsrt" +bibtex_reference_style = "author_year" +bibtex_footbibliography_header = "" + +# ----------------------------------------------------------------------------- +# Options for LaTeX output +# ----------------------------------------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # @@ -154,16 +217,16 @@ (master_doc, "cubids.tex", "CuBIDS Documentation", "PennLINC", "manual"), ] - -# -- Options for manual page output ------------------------------------ - +# ----------------------------------------------------------------------------- +# Options for manual page output +# ----------------------------------------------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [(master_doc, "cubids", "CuBIDS Documentation", [author], 1)] - -# -- Options for Texinfo output ---------------------------------------- - +# ----------------------------------------------------------------------------- +# Options for Texinfo output +# ----------------------------------------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) @@ -179,5 +242,20 @@ ), ] -# -- Fix automodule config +# ----------------------------------------------------------------------------- +# Automodule +# ----------------------------------------------------------------------------- add_module_names = False + +# ----------------------------------------------------------------------------- +# Hoverxref +# ----------------------------------------------------------------------------- +hoverxref_auto_ref = True +hoverxref_mathjax = True +hoverxref_roles = [ + "numref", + "confval", + "setting", + "term", + "footcite", +] diff --git a/docs/example.rst b/docs/example.rst index c977d4e4a..d562dc931 100644 --- a/docs/example.rst +++ b/docs/example.rst @@ -1,72 +1,78 @@ +.. include:: links.rst + =================== Example Walkthrough =================== -The ``CuBIDS`` workflow is currently being used in neuroimaging labs at a number of institutions -including University of Pennsylvania, Children's Hospital of Philadelphia, the Child Mind Institute, -and University of Minnesota's Masonic Institute for the Developing Brain. To demonstrate the utility of -``CuBIDS``, here we apply the software to a small example dataset that is included `in our Github repo `_. -This example dataset does not contain any PHI. +The ``CuBIDS`` workflow is currently being used in neuroimaging labs at a number of institutions +including University of Pennsylvania, Children's Hospital of Philadelphia, the Child Mind Institute, +and University of Minnesota's Masonic Institute for the Developing Brain. +To demonstrate the utility of ``CuBIDS``, +here we apply the software to a small example dataset that is included +`in our Github repo `_. +This example dataset does not contain any PHI. Following the installation instructions at :doc:`the installation page `, you should have successfully installed ``CuBIDS``, ``DataLad``, and the ``bids-validator`` inside a -conda environment titled "cubids". In this example, we use validator version ``1.7.2``. -Using a different version of the validator may result in slightly different validation -tsv outputs, but the example should still be useful. - -Throughout this example, we use ``DataLad`` for version control. Although ``DataLad`` -is an optional dependency of ``CuBIDS``, we use it here to demonstrate its -powerful integration with ``CuBIDS`` and the benefits it can provide its users. +conda environment titled "cubids". +In this example, we use validator version ``1.7.2``. +Using a different version of the validator may result in slightly different validation tsv outputs, +but the example should still be useful. -Now that we have installed CuBIDS and all necessary dependencies, we are ready to begin the curation -process on our example dataset. We create a ``CuBIDS_Test`` directory to function as our working directory -and navigate to it as follows: +Throughout this example, we use ``DataLad`` for version control. +Although ``DataLad`` is an optional dependency of ``CuBIDS``, +we use it here to demonstrate its powerful integration with ``CuBIDS`` and the benefits +it can provide its users. -mkdir $PWD/CuBIDS_Test -cd CuBIDS_Test +Now that we have installed CuBIDS and all necessary dependencies, +we are ready to begin the curation process on our example dataset. +We create a ``CuBIDS_Test`` directory to function as our working directory and navigate to it as follows. .. code-block:: console - $ conda activate cubids - $ mkdir -p $PWD/CuBIDS_Test + $ mkdir $PWD/CuBIDS_Test $ cd CuBIDS_Test + $ conda activate cubids Now, download and unzip the example data (you can also do this in your Finder window): .. code-block:: console - + $ curl -sSLO https://github.com/PennLINC/CuBIDS/raw/main/cubids/testdata/BIDS_Dataset.zip $ unzip BIDS_Dataset.zip $ rm BIDS_Dataset.zip -Identifying and removing PHI ------------------------------------------- +Identifying and removing PHI +---------------------------- As a first step, we use CuBIDS to identify the metadata fields present in the dataset, -and remove any protected health information (PHI) or other sensitive fields. We want to do this *before* implementing any -``DataLad`` commands, as we must ensure PHI is not tracked as part of version control. +and remove any protected health information (PHI) or other sensitive fields. +We want to do this *before* implementing any ``DataLad`` commands, +as we must ensure PHI is not tracked as part of version control. This is accomplished with the following command: .. code-block:: console - $ cubids-print-metadata-fields BIDS_Dataset + $ cubids print-metadata-fields BIDS_Dataset -This command returns a total of 66 fields, including acquisition parameters and other metadata -fields present in the dataset's JSON sidecars. From the output we can see that the dataset contains -(simulated) PHI — the `PatientName` field, which we wish to remove. +This command returns a total of 66 fields, including acquisition parameters and other metadata +fields present in the dataset's JSON sidecars. +From the output we can see that the dataset contains (simulated) PHI — the `PatientName` field, +which we wish to remove. To remove the `PatientName` field from the sidecars, we can use the command: .. code-block:: console - $ cubids-remove-metadata-fields BIDS_Dataset --fields PatientName + $ cubids remove-metadata-fields BIDS_Dataset --fields PatientName This command should succeed silently. + Checking the BIDS dataset into DataLad -------------------------------------------- +-------------------------------------- -Now that all PHI has been removed from the metadata, we are ready to check our dataset into ``datalad``. +Now that all PHI has been removed from the metadata, we are ready to check our dataset into ``datalad``. To do this, we run the following command: .. code-block:: console @@ -76,45 +82,47 @@ To do this, we run the following command: This command creates a new directory called ``BIDS_Dataset_DataLad`` where ``DataLad`` will begin implementing version control and provenance tracking while we implement the rest of our ``CuBIDS`` workflow. -The creation of our ``datalad`` dataset is accordingly reflected in the dataset's version control -history, accessible with ``git log``. At any point in the ``CuBIDS`` workflow, +The creation of our ``datalad`` dataset is accordingly reflected in the dataset's version control history, +accessible with ``git log``. +At any point in the ``CuBIDS`` workflow, we can view a summary of our dataset's version history by running the following commands: -.. code-block:: console +.. code-block:: console $ cd BIDS_Dataset_DataLad $ git log --oneline $ cd .. -This command will write the following to the terminal: +This command will write the following to the terminal: .. image:: _static/screenshot_1.png -Next, we copy the contents of our BIDS dataset into the newly created and currently empty DataLad -dataset and save the changes. +Next, we copy the contents of our BIDS dataset into the newly created and currently empty DataLad +dataset and save the changes. .. code-block:: console $ cd .. $ cp -r BIDS_Dataset/* BIDS_Dataset_DataLad -In addition to being able to access the version history of our data, any point in this workflow, we can -also check the status of untracked (not yet saved) changes using the datalad status command, as seen -below: +In addition to being able to access the version history of our data, any point in this workflow, +we can also check the status of untracked (not yet saved) changes using the datalad status command, +as seen below: -.. code-block:: console +.. code-block:: console $ cd BIDS_Dataset_DataLad && datalad status $ cd .. -This command produces a description of the changes we have made to the data since the last commit +This command produces a description of the changes we have made to the data since the last commit (see below) .. image:: _static/screenshot_2.png -The command above shows all files untracked, as we have copied the BIDS data into -``~/CuBIDS_Test/BIDS_Dataset_DataLad`` but have not yet saved those changes. Our next step is to -run save. It is best practice to provide a detailed commit message, for example: +The command above shows all files untracked, as we have copied the BIDS data into +``~/CuBIDS_Test/BIDS_Dataset_DataLad`` but have not yet saved those changes. +Our next step is to run save. +It is best practice to provide a detailed commit message, for example: .. code-block:: console @@ -122,8 +130,9 @@ run save. It is best practice to provide a detailed commit message, for example: At this stage, we also recommend removing the ``BIDS_Dataset`` directory — its contents are safely copied into and tracked in ``BIDS_Dataset_DataLad``. -We can check our ``git`` history to be sure, which will display the version history of our dataset -thus far, with the following command: +We can check our ``git`` history to be sure, +which will display the version history of our dataset thus far, +with the following command: .. code-block:: console @@ -131,51 +140,56 @@ thus far, with the following command: $ git log --oneline $ cd .. -which will produce the following: +which will produce the following: .. image:: _static/screenshot_3.png -As seen above, the creation of our DataLad dataset is now reflected in the dataset’s version control -history. Note that it is best practice to provide a detailed commit message with each change made to -the data. +As seen above, +the creation of our DataLad dataset is now reflected in the dataset's version control history. +Note that it is best practice to provide a detailed commit message with each change made to the data. Adding NIfTI Information to JSON Sidecars -------------------------------------------- +----------------------------------------- Next, we seek to add more image parameters to our sidecars so that we can better define our Key Groups. Historically, only a subset of parameters in the NIfTI image header have been included in a BIDS sidecar... -Parameters such as image dimensions, number of volumes, image obliquity, and voxel sizes — all important -data that can change how our pipelines will eventually run! +Parameters such as image dimensions, number of volumes, image obliquity, and voxel sizes — +all important data that can change how our pipelines will eventually run! To add them to the sidecar metadata, run: .. code-block:: console - $ cubids-add-nifti-info BIDS_Dataset_DataLad --use-datalad - -This command adds the NIfTI header information to the JSON sidecars and saves those changes. In order -to ensure that this command has been executed properly, we can run ``cubids-print-metadata-fields`` -once more, which reveals that new NIfTI header information has been successfully included in the metadata. -Since we ran ``cubids-add-nifti-info`` with the ``--use-datalad`` flag set, ``CuBIDS`` automatically saves -the changes made to the dataset to the git log as follows: + $ cubids add-nifti-info BIDS_Dataset_DataLad --use-datalad +This command adds the NIfTI header information to the JSON sidecars and saves those changes. +In order to ensure that this command has been executed properly, +we can run ``cubids print-metadata-fields`` once more, +which reveals that new NIfTI header information has been successfully included in the metadata. +Since we ran ``cubids add-nifti-info`` with the ``--use-datalad`` flag set, +``CuBIDS`` automatically saves the changes made to the dataset to the git log as follows: .. image:: _static/screenshot_4.png -BIDS validation ----------------- + +BIDS validation +--------------- The next step in the ``CuBIDS`` workflow is to run BIDS validation -to detect potential curation errors using ``cubids-validate``. +to detect potential curation errors using ``cubids validate``. .. code-block:: console - $ cubids-validate BIDS_Dataset_DataLad v0 --sequential + $ cubids validate BIDS_Dataset_DataLad v0 --sequential -.. note:: The use of the ``--sequential`` flag forces the validator to treat each participant as its own BIDS dataset. This can be helpful for identifying heterogeneous elements, but can be slowed down by extremely large datasets. +.. note:: + The use of the ``--sequential`` flag forces the validator to treat each participant as its + own BIDS dataset. + This can be helpful for identifying heterogeneous elements, + but can be slowed down by extremely large datasets. -This command produces the following tsv: +This command produces the following tsv: .. csv-table:: v0_validation.tsv :file: _static/v0_validation.csv @@ -183,59 +197,64 @@ This command produces the following tsv: :header-rows: 1 This initial validation run reveals firstly that Phase Encoding Direction (PED) is not specified -for one of the task-rest BOLD scans. This is an important parameter -for `fieldmap correction in fMRIPRep `_, +for one of the task-rest BOLD scans. +This is an important parameter for +`fieldmap correction in fMRIPRep `_, so knowing this ahead of time is valuable information. -To resolve this, we could either find the PED for this scan elsewhere and -edit its sidecar to include it, or remove that scan from the dataset. -For the purpose of this demonstration, we elect to remove -the scan. To do this, we run the ``cubids-purge`` command. - -``cubids-purge`` requires as input a list of files to cleanly -"purge" from the dataset. You can create this file in any -text editor, as long as it is saved as plain text ``.txt``. For this example, we created the following file: +To resolve this, +we could either find the PED for this scan elsewhere and edit its sidecar to include it, +or remove that scan from the dataset. +For the purposes of this demonstration, +we elect to remove the scan. +To do this, we run the ``cubids purge`` command. + +``cubids purge`` requires as input a list of files to cleanly "purge" from the dataset. +You can create this file in any text editor, +as long as it is saved as plain text ``.txt``. +For this example, we created the following file: .. code-block:: console - + $ cat no_ped.txt - + /AN/EXAMPLE/PATH/CuBIDS_Test/BIDS_Dataset_Datalad/sub-02/ses-phdiff/func/sub-02_ses-phdiff_task-rest_bold.nii.gz -and saved it in our ``CuBIDS_Test directory``. +and saved it in our ``CuBIDS_Test directory``. To safely purge this file from the dataset, run: .. code-block:: console - $ cubids-purge BIDS_Dataset_DataLad no_ped.txt --use-datalad + $ cubids purge BIDS_Dataset_DataLad no_ped.txt --use-datalad -We elect to use ``cubids-purge`` instead of simply removing the scan -due to the fact that purge will ensure all associations, -such as sidecars and IntendedFor references in fieldmaps, are -also safely deleted. ``CuBIDS`` will reflect these deletions in the -``git`` history: +We elect to use ``cubids purge`` instead of simply removing the scan due to the fact that +purge will ensure all associations, +such as sidecars and IntendedFor references in fieldmaps, +are also safely deleted. +``CuBIDS`` will reflect these deletions in the ``git`` history: .. image:: _static/screenshot_5.png -Returning again to ``v0_validation.tsv``, we can also see that there is one DWI scan missing -TotalReadoutTime, a metadata field necessary for -`fieldmap correction `_. -After conferring with our MR physicist and the scanner technician, we determine -that TotalReadoutTime (TRT) was erroneously omitted from the DWI sidecars! -After some digging, the technician provided us with the correct value, so it's now our job to manually -add it to the sidecar for which it is missing. Once we have this value, we manually add it to the sidecar -for which it is missing by opening ``BIDS_Dataset_DataLad/sub-03/ses-phdiff/dwi/sub-03_ses-phdiff_acq-HASC55AP_dwi.json`` -in an editor and adding the following line: +Returning again to ``v0_validation.tsv``, +we can also see that there is one DWI scan missing TotalReadoutTime, +a metadata field necessary for `fieldmap correction `_. +After conferring with our MR physicist and the scanner technician, +we determine that TotalReadoutTime (TRT) was erroneously omitted from the DWI sidecars! +After some digging, the technician provided us with the correct value, +so it's now our job to manually add it to the sidecar for which it is missing. +Once we have this value, we manually add it to the sidecar for which it is missing by opening +``BIDS_Dataset_DataLad/sub-03/ses-phdiff/dwi/sub-03_ses-phdiff_acq-HASC55AP_dwi.json`` +in an editor and adding the following line: -.. code-block:: console +.. code-block:: console "TotalReadoutTime": 0.0717598, -on a new line anywhere inside the curly braces between lines containing parameters and their values, -save the changes, and close the JSON file. We then save the latest changes to the dataset with a -detailed commit message as follows: +on a new line anywhere inside the curly braces between lines containing parameters and their values, +save the changes, and close the JSON file. +We then save the latest changes to the dataset with a detailed commit message as follows: .. code-block:: console @@ -249,132 +268,139 @@ To verify that there are no remaining validation errors, we rerun validation wit .. code-block:: console - $ cubids-validate BIDS_Dataset_DataLad v1 --sequential + $ cubids validate BIDS_Dataset_DataLad v1 --sequential -This command should produce no tsv output, and instead print “No issues/warnings parsed, your dataset is -BIDS valid” to the terminal, which indicates that the dataset is now free from BIDS validation errors -and warnings. +This command should produce no tsv output, and instead print “No issues/warnings parsed, +your dataset is BIDS valid” to the terminal, +which indicates that the dataset is now free from BIDS validation errors and warnings. Visualizing metadata heterogeneity ------------------------------------ - -Next, we'll use ``CuBIDS`` to gain some insight on the -dataset's structure, heterogeneity, and metadata errors. -We'll do this with ``cubids-group``. - -Large datasets almost inevitably contain multiple validation and metadata -errors, so it's useful to run both ``cubids-validate`` and ``cubids-group`` -in parallel, as validation errors are better understood within the context of a dataset's heterogeneity. Being able to see -both metadata errors (such as missing or incorrectly specified -sidecar parameters) that grouping reveals alongside BIDS errors that -the validator catches, gives users a more comprehensive view of -the issues they will need to fix during the curation process. Note that if users -choose to provide just a pass in just a filename prefix (e.g. V1) for the second argument, -then CuBIDS will put the four grouping outputs in ``bids_dir/code/CuBIDS``. If users -provide a path (e.g. ``/Users/scovitz/BIDS/V1``), then output files will go to the -specified location. An example command for running the grouping function as follows: +---------------------------------- + +Next, we'll use ``CuBIDS`` to gain some insight on the dataset's structure, heterogeneity, +and metadata errors. +We'll do this with ``cubids group``. + +Large datasets almost inevitably contain multiple validation and metadata errors, +so it's useful to run both ``cubids validate`` and ``cubids group`` in parallel, +as validation errors are better understood within the context of a dataset's heterogeneity. +Being able to see both metadata errors +(such as missing or incorrectly specified sidecar parameters) +that grouping reveals alongside BIDS errors that the validator catches, +gives users a more comprehensive view of the issues they will need to fix during the curation process. +Note that if users choose to provide just a pass in just a filename prefix (e.g. V1) +for the second argument, +then CuBIDS will put the four grouping outputs in ``bids_dir/code/CuBIDS``. +If users provide a path (e.g., ``/Users/scovitz/BIDS/V1``), +then output files will go to the specified location. +An example command for running the grouping function as follows: .. code-block:: console - $ cubids-group BIDS_Dataset_DataLad v0 + $ cubids group BIDS_Dataset_DataLad v0 -This command will produce four tables that describe the dataset's -heterogeneity in different ways. +This command will produce four tables that describe the dataset's heterogeneity in different ways. -#. ``v0_summary.tsv`` contains all detected Key and Parameter groups and provides a high-level overview of the heterogeneity in the entire dataset. -#. ``v0_files.tsv`` maps each imaging file in the BIDS directory to a Key and Parameter group. -#. ``v0_AcqGrouping.tsv`` maps each session in the dataset to an Acquisition Group. -#. ``v0_AcqGroupInfo.txt`` lists the set of scanning parameters present in each Acquisition Group. +#. ``v0_summary.tsv`` contains all detected Key and Parameter groups and provides a high-level + overview of the heterogeneity in the entire dataset. +#. ``v0_files.tsv`` maps each imaging file in the BIDS directory to a Key and Parameter group. +#. ``v0_AcqGrouping.tsv`` maps each session in the dataset to an Acquisition Group. +#. ``v0_AcqGroupInfo.txt`` lists the set of scanning parameters present in each Acquisition Group. -By first examining ``v0_summary.tsv`` users are given he opportunity to -conduct metadata quality assurance (QA). The file can help identify -instances of incomplete, incorrect, or unusable parameter groups, -based on acquisition fields such as dimension and voxel sizes, number of volumes, obliquity, and more. +By first examining ``v0_summary.tsv`` users are given he opportunity to conduct metadata +quality assurance (QA). +The file can help identify instances of incomplete, incorrect, or unusable parameter groups, +based on acquisition fields such as dimension and voxel sizes, number of volumes, obliquity, and more. -While ``v0_validation.tsv`` identified all the BIDS validation errors -present in the dataset, it did not identify any potential issues that -might be present within the sidecars' metadata. Below, we see insances of missing -metadata fields in a handful of sidecars, which may impact successful execution of BIDS Apps. +While ``v0_validation.tsv`` identified all the BIDS validation errors present in the dataset, +it did not identify any potential issues that might be present within the sidecars' metadata. +Below, we see insances of missing metadata fields in a handful of sidecars, +which may impact successful execution of BIDS Apps. .. csv-table:: v0_summary.tsv :file: _static/v0_summary.csv :widths: 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 :header-rows: 1 -Examining ``v0_summary.tsv`` we can see that one DWI Parameter Group — ``acquisition-HASC55AP_datatype-dwi_suffix-dwi__2`` — contains -only one scan (see "Counts" column) with only 10 volumes (see -"NumVolumes" column). Since the majority of DWI scans in this dataset -have 61 volumes, ``CuBIDS`` assigns this single scan to a "Variant" -(i.e. non-dominant) Parameter Group, and automatically populates -that Parameter Group's "RenameKeyGroup" column in ``v0_summary.tsv`` +Examining ``v0_summary.tsv`` we can see that one DWI Parameter Group — +``acquisition-HASC55AP_datatype-dwi_suffix-dwi__2`` — +contains only one scan (see "Counts" column) with only 10 volumes +(see "NumVolumes" column). +Since the majority of DWI scans in this dataset have 61 volumes, +``CuBIDS`` assigns this single scan to a "Variant" (i.e. non-dominant) Parameter Group, +and automatically populates that Parameter Group's "RenameKeyGroup" column in ``v0_summary.tsv`` with a suggested name: ``acquisition-HASC55APVARIANTNumVolumes_datatype-dwi_suffix-dwi``. -This time, though, we elect to remove this scan because it does not have enough volumes to be usable for most analyses. -To do this, we can either use ``cubids-purge`` again, *or* we could -edit v0_summary.tsv by adding ``0`` to the ``MergeInto`` column -in the row (Parameter Group) we want to remove. This will ensure all -scans in that Parameter Group (in this example, just one scan) are removed. +This time, though, +we elect to remove this scan because it does not have enough volumes to be usable for most analyses. +To do this, we can either use ``cubids purge`` again, +*or* we could edit v0_summary.tsv by adding ``0`` to the ``MergeInto`` column in the row +(Parameter Group) we want to remove. +This will ensure all scans in that Parameter Group (in this example, just one scan) are removed. -Make this change and save this edited version of ``v0_summary.tsv`` as ``v0_edited_summary.tsv``, which will be passed to ``cubids-apply`` in our next -curation step. +Make this change and save this edited version of ``v0_summary.tsv`` as ``v0_edited_summary.tsv``, +which will be passed to ``cubids apply`` in our next curation step. .. csv-table:: v0_edited_summary.tsv :file: _static/v0_edited_summary.csv :widths: 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4 :header-rows: 1 + Applying changes ------------------ - -Now that all metadata issues have been addressed — both validation and -``CuBIDS`` summary — we are ready to rename our files based on their -RenameKeyGroup values and apply the requested deletion in ``v0_edited_summary.tsv``. The ``cubids-apply`` -function renames scans in each Variant Parameter Group according to the metadata parameters with a flag “VARIANT”, which is useful -because the user will then be able to see, in each scan’s filename, which metadata parameters associated with that scan vary from -those in the acquisition’s Dominant Group. If the edited summary and files tsvs are located in -the ``bids_dir/code/CuBIDS`` directory, the user may just pass in those filenames. +---------------- + +Now that all metadata issues have been addressed — +both validation and ``CuBIDS`` summary — +we are ready to rename our files based on their RenameKeyGroup values and +apply the requested deletion in ``v0_edited_summary.tsv``. +The ``cubids apply`` function renames scans in each Variant Parameter Group according +to the metadata parameters with a flag “VARIANT”, +which is useful because the user will then be able to see, in each scan's filename, +which metadata parameters associated with that scan vary from those in the acquisition's Dominant Group. +If the edited summary and files tsvs are located in the ``bids_dir/code/CuBIDS`` directory, +the user may just pass in those filenames. Otherwise, specifying the path to those files is necessary. -We can execute cubids-apply with the following command: +We can execute ``cubids apply`` with the following command: .. code-block:: console - $ cubids-apply BIDS_Dataset_DataLad v0_edited_summary.tsv v0_files.tsv v1 --use-datalad - + $ cubids apply BIDS_Dataset_DataLad v0_edited_summary.tsv v0_files.tsv v1 --use-datalad Checking our git log, we can see that our changes from apply have been saved. .. image:: _static/screenshot_7.png -We can check the four grouping tsvs ``cubids-apply`` produces (``v1_*``) to ensure they look as -expected — that all files with variant scanning parameters have been renamed to indicate the parameters +We can check the four grouping tsvs ``cubids apply`` produces (``v1_*``) to ensure they look as expected — +that all files with variant scanning parameters have been renamed to indicate the parameters that vary in the acquisition fields of their filenames. + Exemplar testing ------------------ +---------------- -The curation of the dataset is complete; finally, it's time -for pre-processing. To streamline this step, and as an added measure -for reproducibility and quality assurance, ``CuBIDS`` facilitates this -subsequent step through the creation of an *Exemplar Dataset*: a subset -of the full dataset that spans the full variation of acquisitions and +The curation of the dataset is complete; finally, +it's time for pre-processing. +To streamline this step, and as an added measure for reproducibility and quality assurance, +``CuBIDS`` facilitates this subsequent step through the creation of an *Exemplar Dataset*: +a subset of the full dataset that spans the full variation of acquisitions and parameters by including one subject from each Acquisition Group. -By testing only one subject per Acquisition Group, users are able to -pinpoint specific metadata values and scans that may trigger -pipeline failures. These acquisition groups could then be evaluated in -more detail and flagged for remediation or exclusion. The *Exemplar -Dataset* can easily be created with the ``cubids-copy-exemplars`` -command, to which we pass in ``v2_AcqGrouping.tsv`` as input -(the post ``cubids-apply`` acquisition grouping tsv). +By testing only one subject per Acquisition Group, +users are able to pinpoint specific metadata values and scans that may trigger pipeline failures. +These acquisition groups could then be evaluated in more detail and flagged for remediation or exclusion. +The *Exemplar Dataset* can easily be created with the ``cubids copy-exemplars`` command, +to which we pass in ``v2_AcqGrouping.tsv`` as input +(the post ``cubids apply`` acquisition grouping tsv). .. code-block:: console - $ cubids-copy-exemplars BIDS_Dataset_DataLad Exemplar_Dataset v1_AcqGrouping.tsv --use-datalad + $ cubids copy-exemplars BIDS_Dataset_DataLad Exemplar_Dataset v1_AcqGrouping.tsv --use-datalad -Since we used the ``use-datalad`` flag, ``Exemplar_Dataset`` is a DataLad dataset with the version history -tracked in its git log (see below): +Since we used the ``use-datalad`` flag, +``Exemplar_Dataset`` is a DataLad dataset with the version history tracked in its git log (see below): .. image:: _static/screenshot_8.png -Once a preprocessing pipeline completes successfully on the Exemplar Dataset, -the full dataset can be executed with confidence, as a pipeline's -behavior on the full range of metadata heterogeneity in the dataset -will have already been discovered during exemplar testing. \ No newline at end of file +Once a preprocessing pipeline completes successfully on the Exemplar Dataset, +the full dataset can be executed with confidence, +as a pipeline's behavior on the full range of metadata heterogeneity in the dataset +will have already been discovered during exemplar testing. diff --git a/docs/glossary.rst b/docs/glossary.rst new file mode 100644 index 000000000..8a2b3a4f3 --- /dev/null +++ b/docs/glossary.rst @@ -0,0 +1,40 @@ +.. include:: links.rst + +Glossary +======== + +.. glossary:: + + Key Group + A set of scans whose filenames share all `BIDS filename key-value pairs`_, + excluding subject and session. + The key group is derived from the common BIDS filename elements. + For example, ``acquisition-*_datatype-*_run-*_task-*_suffix``. + + Parameter Group + A set of scans with identical metadata parameters in their sidecars. + Defined within a Key Group. + Numerically identified, meaning that each Key Group will have *n* Param Groups, + where *n* is the number of unique sets of scanning parameters present in that Key Group + (e.g., 1, 2, etc.). + + Dominant Group + The Param Group that contains the most scans in its Key Group. + + Variant Group + Any Param Group that is non-dominant. + + Rename Key Group + Auto-generated, recommended new Key Group name for Variant Groups. + Based on the metadata parameters that cause scans in Variant Groups to vary from those + in their respective Dominant Groups. + + Acquisition Group + A collection of sessions across participants that contains the exact same set of Key + and Param Groups. + + +References +---------- + +.. footbibliography:: diff --git a/docs/index.rst b/docs/index.rst index a492452e6..0b4ba8858 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -1,6 +1,6 @@ .. CuBIDS documentation master file -.. include:: README.rst +.. include:: ../README.rst Contents -------- @@ -9,10 +9,13 @@ Contents :maxdepth: 2 about - usage installation + usage + cli example examples ../CONTRIBUTING ../AUTHORS ../HISTORY + glossary + api diff --git a/docs/installation.rst b/docs/installation.rst index 8b9ddbf53..d55b84a40 100644 --- a/docs/installation.rst +++ b/docs/installation.rst @@ -6,18 +6,21 @@ Installation ============ -.. note:: We **strongly recommend** using ``CuBIDS`` with environment management. For this, we recommend - `miniconda `_ - (`miniforge `_ for M1 Chip Mac Machines). +.. note:: + We **strongly recommend** using ``CuBIDS`` with environment management. + For this, we recommend `miniconda `_ + (`miniforge `_ for M1 Chip Mac Machines). -Once you've installed conda, initialize a new conda environment (for example, named ``cubids``) as follows: +Once you've installed conda, +initialize a new conda environment (for example, named ``cubids``) as follows: .. code-block:: console - $ conda create -n cubids python=3.8 + $ conda create -n cubids python=3.8 pip $ conda activate cubids -You are now ready to install CuBIDS. You can do so in one of two ways. +You are now ready to install CuBIDS. +You can do so in one of two ways. To obtain ``CuBIDS`` locally, we can use ``pip`` to download our software from the `Python Package Manager (Pypi) `_ by running the following commands: @@ -26,8 +29,8 @@ To obtain ``CuBIDS`` locally, we can use ``pip`` to download our software from t $ pip install CuBIDS -Alternatively, you can clone the source code for ``CuBIDS`` from our `GitHub repository`_ -using the following command: +Alternatively, +you can clone the source code for ``CuBIDS`` from our `GitHub repository`_ using the following command: .. code-block:: console @@ -40,28 +43,32 @@ Once you have a copy of the source, you can install it with: $ cd CuBIDS $ pip install -e . - -We will now need to install some dependencies of ``CuBIDS``. To do this, we first must install -nodejs. We can accomplish this using the following command: +We will now need to install some dependencies of ``CuBIDS``. +To do this, we first must install nodejs. +We can accomplish this using the following command: .. code-block:: console $ conda install nodejs -Now that we have npm installed, we can install the ``bids-validator`` using the following command: +Now that we have npm installed, we can install ``bids-validator`` using the following command: .. code-block:: console $ npm install -g bids-validator@1.7.2 -In our example walkthrough, we use ``bids-validator`` v1.7.2. using a different version of the -validator may result in slightly different validation tsv printouts, but ``CuBIDS`` is compatible with all -versions of the validator at or above v1.6.2. - -We also recommend using ``CuBIDS`` with the optional ``DatLad`` version control capabilities. -We use ``DataLad`` throughout our walkthrough of the CuBIDS Workflow on :doc:`the Example Walkthrough page `. -To leverage the version control capabilities, you will need to install both ``DataLad`` and ``git-annex``, -the large file storage software ``DataLad`` runs under the hood. Installation instructions -for ``DataLad`` and ``git-annex`` can be found `here `_ - -.. _GitHub repository: https://github.com/PennLINC/CuBIDS \ No newline at end of file +In our example walkthrough, +we use ``bids-validator`` v1.7.2. using a different version of the +validator may result in slightly different validation tsv printouts, +but ``CuBIDS`` is compatible with all versions of the validator at or above v1.6.2. + +We also recommend using ``CuBIDS`` with the optional ``DataLad`` version control capabilities. +We use ``DataLad`` throughout our walkthrough of the CuBIDS Workflow on +:doc:`the Example Walkthrough page `. +To leverage the version control capabilities, +you will need to install both ``DataLad`` and ``git-annex``, +the large file storage software ``DataLad`` runs under the hood. +Installation instructions for ``DataLad`` and ``git-annex`` can be found +`here `_. + +.. _GitHub repository: https://github.com/PennLINC/CuBIDS diff --git a/docs/links.rst b/docs/links.rst new file mode 100644 index 000000000..23abf76f6 --- /dev/null +++ b/docs/links.rst @@ -0,0 +1 @@ +.. _`BIDS filename key-value pairs`: https://bids-specification.readthedocs.io/en/stable/02-common-principles.html#file-name-key-value-pairs diff --git a/docs/sphinxext/github_link.py b/docs/sphinxext/github_link.py new file mode 100644 index 000000000..2b50cb1b9 --- /dev/null +++ b/docs/sphinxext/github_link.py @@ -0,0 +1,90 @@ +""" +This script comes from scikit-learn: +https://github.com/scikit-learn/scikit-learn/blob/master/doc/sphinxext/github_link.py +""" + +import inspect +import os +import subprocess +import sys +from functools import partial +from operator import attrgetter + +REVISION_CMD = "git rev-parse --short HEAD" + + +def _get_git_revision(): + try: + revision = subprocess.check_output(REVISION_CMD.split()).strip() + except (subprocess.CalledProcessError, OSError): + print("Failed to execute git to get revision") + return None + return revision.decode("utf-8") + + +def _linkcode_resolve(domain, info, package, url_fmt, revision): + """Determine a link to online source for a class/method/function + + This is called by sphinx.ext.linkcode + + An example with a long-untouched module that everyone has + >>> _linkcode_resolve('py', {'module': 'tty', + ... 'fullname': 'setraw'}, + ... package='tty', + ... url_fmt='http://hg.python.org/cpython/file/' + ... '{revision}/Lib/{package}/{path}#L{lineno}', + ... revision='xxxx') + 'http://hg.python.org/cpython/file/xxxx/Lib/tty/tty.py#L18' + """ + + if revision is None: + return + if domain not in ("py", "pyx"): + return + if not info.get("module") or not info.get("fullname"): + return + + class_name = info["fullname"].split(".")[0] + if isinstance(class_name, str): + # Python 2 only + class_name = class_name.encode("utf-8") + try: + module = __import__(info["module"], fromlist=[class_name]) + obj = attrgetter(info["fullname"])(module) + except Exception: + fn = None + return + + try: + fn = inspect.getsourcefile(obj) + except Exception: + fn = None + if not fn: + try: + fn = inspect.getsourcefile(sys.modules[obj.__module__]) + except Exception: + fn = None + if not fn: + return + + fn = os.path.relpath(fn, start=os.path.dirname(__import__(package).__file__)) + try: + lineno = inspect.getsourcelines(obj)[1] + except Exception: + lineno = "" + return url_fmt.format(revision=revision, package=package, path=fn, lineno=lineno) + + +def make_linkcode_resolve(package, url_fmt): + """Returns a linkcode_resolve function for the given URL format + + revision is a git commit reference (hash or name) + + package is the name of the root module of the package + + url_fmt is along the lines of ('https://github.com/USER/PROJECT/' + 'blob/{revision}/{package}/' + '{path}#L{lineno}') + """ + revision = _get_git_revision() + return partial(_linkcode_resolve, revision=revision, package=package, url_fmt=url_fmt) diff --git a/docs/usage.rst b/docs/usage.rst index ff8fa367d..f86b4e386 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -1,36 +1,40 @@ -========================================== -Commands & Actions -========================================== +======================== +General Usage Guidelines +======================== Before we implement a ``CuBIDS`` workflow, let's define the terminology and take a look at some of the commands available in the software. + More definitions ------------------ +---------------- + .. _keygroup: Key Group ~~~~~~~~~ -A *Key Group* is a unique set of BIDS key-value pairs, excluding identifiers such as -subject and session. For example the files:: +A :term:`Key Group` is a unique set of BIDS key-value pairs, +excluding identifiers such as subject and session. +For example, the files:: bids-root/sub-1/ses-1/func/sub-1_ses-1_acq-mb_dir-PA_task-rest_bold.nii.gz bids-root/sub-1/ses-2/func/sub-1_ses-2_acq-mb_dir_PA_task-rest_bold.nii.gz bids-root/sub-2/ses-1/func/sub-2_ses-1_acq-mb_dir-PA_task-rest_bold.nii.gz -Would all share the same Key Group. If these scans were all acquired as a part of the same -study on the same scanner with exactly the same acquisition parameters, this -naming convention would suffice. +Would all share the same Key Group. +If these scans were all acquired as a part of the same study on the same scanner with +exactly the same acquisition parameters, +this naming convention would suffice. -However, in large multi-scanner, multi-site, or longitudinal studies where acquisition -parameters change over time, it's possible that the same Key Group could comprise of -scans that differ in important ways. +However, in large multi-scanner, multi-site, +or longitudinal studies where acquisition parameters change over time, +it's possible that the same Key Group could contain scans that differ in important ways. ``CuBIDS`` examines all acquisitions within a Key Group to see if there are any images -that differ in a set of important acquisition parameters. The subsets of consistent -acquisition parameter sets within a Key Group are called a :ref:`paramgroup`. +that differ in a set of important acquisition parameters. +The subsets of consistent acquisition parameter sets within a Key Group are called a :ref:`paramgroup`. .. _paramgroup: @@ -38,46 +42,54 @@ acquisition parameter sets within a Key Group are called a :ref:`paramgroup`. Parameter Group ~~~~~~~~~~~~~~~ -Even though two images may belong to the same Key Group and are valid BIDS, they -may have images with different acquisition parameters. There is nothing fundamentally -wrong with this — the ``bids-validator`` will often simply flag these differences, -with a ``Warning``, but not necessarily suggest changes. That being said, +A :term:`Parameter Group` is a subset of a Key Group that contains images with the same +acquisition parameters. + +Even though two images may belong to the same Key Group and are valid BIDS, +they may have images with different acquisition parameters. +There is nothing fundamentally wrong with this — +the ``bids-validator`` will often simply flag these differences with a ``Warning``, +but not necessarily suggest changes. +That being said, there can be detrimental consequences downstream if the different parameters cause the same preprocessing pipelines to configure differently to images of the same Key Group. -Acquisition Group -~~~~~~~~~~~~~~~~~ - -Acquisition Groups are sets of subjects who's images belong to all the same Key and Parameter Groups. The Acquistion Groups that subjects belong to are listed in ``_AcqGrouping.csv``, while the Key Groups and Parameter Groups that define each Acquisition Group are noted in ``_AcqGroupingInfo.txt``. - .. _acquisitiongroup: Acquisition Group -~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~ + +We define an :term:`Acquisition Group` as a collection of sessions across participants that +contain the exact same set of Key and Parameter Groups. +Since Key Groups are based on the BIDS filenames— +and therefore both MRI image type and acquisition specific— +each BIDS session directory contains images that belong to a set of Parameter Groups. +CuBIDS assigns each session, or set of Parameter Groups, +to an Acquisition Group such that all sessions in an Acquisition Group possesses an identical set of +scan acquisitions and metadata parameters across all image modalities present in the dataset. +We find Acquisition Groups to be a particularly useful categorization of BIDS data, +as they identify homogeneous sets of sessions (not individual scans) in a large dataset. +They are also useful for expediting the testing of pipelines; +if a BIDS App runs successfully on a single subject from each Acquisition Group, +one can be confident that it will handle all combinations of scanning parameters in the entire dataset. + +The Acquisition Groups that subjects belong to are listed in ``_AcqGrouping.csv``, +while the Key Groups and Parameter Groups that define each Acquisition Group are noted in +``_AcqGroupingInfo.txt``. -We define an “Acquisition Group” as a collection of sessions across participants that contain the exact -same set of Key and Parameter Groups. Since Key Groups are based on the BIDS filenames—and therefore both -MRI image type and acquisition specific—each BIDS session directory contains images that belong to a set of -Parameter Groups. CuBIDS assigns each session––or set of Parameter Groups––to an Acquisition Group -such that all sessions in an Acquisition Group possesses an identical set of scan acquisitions and -metadata parameters across all image modalities present in the dataset. We find Acquisition Groups to be -a particularly useful categorization of BIDS data, as they identify homogeneous sets of sessions (not -individual scans) in a large dataset. They are also useful for expediting the testing of pipelines; if a -BIDS App runs successfully on a single subject from each Acquisition Group, one can be confident that it -will handle all combinations of scanning parameters in the entire dataset. .. _summaryfile: The ``_summary.tsv`` File ~~~~~~~~~~~~~~~~~~~~~~~~~ -This file contains all the detected Key Groups and Parameter Groups. It provides -an opportunity to evaluate your data and decide how to handle heterogeneity. +This file contains all the detected Key Groups and Parameter Groups. +It provides an opportunity to evaluate your data and decide how to handle heterogeneity. -Below is an example ``_summary.tsv`` of the run-1 DWI Key Group in the PNC [#f1]_. This -reflects the original data that has been converted to BIDS using a heuristic. It is -similar to what you will see when you first use this functionality: +Below is an example ``_summary.tsv`` of the run-1 DWI Key Group in the PNC [#f1]_. +This reflects the original data that has been converted to BIDS using a heuristic. +It is similar to what you will see when you first use this functionality: .. csv-table:: Pre Apply Groupings :file: _static/PNC_pre_apply_summary_dwi_run1.csv @@ -88,10 +100,11 @@ similar to what you will see when you first use this functionality: .. _filelistfile: The ``_files.tsv`` file -~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~ -This file contains one row per imaging file in the BIDS directory. You won't need to edit this file -directly, but it keeps track of every file's assignment to Key and Parameter Groups. +This file contains one row per imaging file in the BIDS directory. +You won't need to edit this file directly, +but it keeps track of every file's assignment to Key and Parameter Groups. .. _acqgrouptsv: @@ -100,80 +113,96 @@ Modifying Key and Parameter Group Assignments --------------------------------------------- Sometimes we see that there are important differences in acquisition parameters within a Key Group. -If these differences impact how a pipeline will process the data, it makes sense to assign the scans -in that Parameter Group to a different Key Group (i.e. assign them a different BIDS name). This can -be accomplished by editing the empty columns in the `_summary.csv` file produced by ``cubids-group``. +If these differences impact how a pipeline will process the data, +it makes sense to assign the scans in that Parameter Group to a different Key Group +(i.e., assign them a different BIDS name). +This can be accomplished by editing the empty columns in the `_summary.csv` file produced by +``cubids group``. Once the columns have been edited you can apply the changes to BIDS data using .. code-block:: console - $ cubids-apply /bids/dir keyparam_edited new_keyparam_prefix + $ cubids apply /bids/dir keyparam_edited new_keyparam_prefix The changes in ``keyparam_edited_summary.csv`` will be applied to the BIDS data in ``/bids/dir`` -and the new Key and Parameter groups will be saved to csv files starting with ``new_keyparam_prefix``. Note: -fieldmaps keygroups with variant parameters will be identified but not renamed. - +and the new Key and Parameter groups will be saved to csv files starting with ``new_keyparam_prefix``. +Note: fieldmaps keygroups with variant parameters will be identified but not renamed. The ``_AcqGrouping.tsv`` file -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +The ``_AcqGrouping.tsv`` file organizes the dataset by session and tags each one with its +Acquisition Group number. -The ``_AcqGrouping.tsv`` file organizes the dataset by session and tags each one with its Acquisition Group number. .. _acqgrouptxt: The ``_AcqGroupInfo.txt`` file -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +The ``_AcqGroupInfo.txt`` file lists all Key Groups that belong to a given Acquisition Group +along with the number of sessions each group possesses. -The ``_AcqGroupInfo.txt`` file lists all Key Groups that belong to a given Acquisition Group along with \ -the number of sessions each group possesses. -Visualizing and summarizing metadata heterogenaity ----------------------------------------------------- +Visualizing and summarizing metadata heterogeneity +-------------------------------------------------- -Use ``cubids-group`` to generate your dataset's Key Groups and Parameter Groups: +Use ``cubids group`` to generate your dataset's Key Groups and Parameter Groups: .. code-block:: console - $ cubids-group FULL/PATH/TO/BIDS/DIR FULL/PATH/TO/v0 + $ cubids group FULL/PATH/TO/BIDS/DIR FULL/PATH/TO/v0 This will output four files, including the summary and files tsvs described above, prefixed by the second argument ``v0``. + Applying changes ------------------- +---------------- -The ``cubids-apply`` program provides an easy way for users to manipulate their datasets. -Specifically, ``cubids-apply`` can rename files according to the users’ specification in a tracked -and organized way. Here, the summary.tsv functions as an interface modifications; users can mark +The ``cubids apply`` program provides an easy way for users to manipulate their datasets. +Specifically, +``cubids apply`` can rename files according to the users' specification in a tracked and organized way. +Here, the summary.tsv functions as an interface modifications; users can mark ``Parameter Groups`` they want to rename (or delete) in a dedicated column of the summary.tsv and -pass that edited tsv as an argument to ``cubids-apply``. +pass that edited tsv as an argument to ``cubids apply``. + Detecting Variant Groups -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - -Additionally, cubids-apply can automatically rename files in ``Variant Groups`` based on their -scanning parameters that vary from those in their Key Groups’ Dominant Parameter Groups. Renaming -is automatically suggested when the summary.tsv is generated from a cubids-group run, with the suggested -new name listed in the tsv’s “Rename Key Group” column. CuBIDS populates this column for all Variant -Groups—e.g., every Parameter Group except the Dominant one. Specifically, CuBIDS will suggest renaming -all non-dominant Parameter Group to include VARIANT* in their acquisition field where * is the reason -the Parameter Group varies from the Dominant Group. For example, when CuBIDS encounters a Parameter -Group with a repetition time that varies from the one present in the Dominant Group, it will automatically -suggest renaming all scans in that Variant Group to include ``acquisition-VARIANTRepetitionTime`` in their -filenames. When the user runs ``cubids-apply``, filenames will get renamed according to the auto-generated -names in the “Rename Key Group” column in the summary.tsv +~~~~~~~~~~~~~~~~~~~~~~~~ + +Additionally, ``cubids apply`` can automatically rename files in :term:`Variant Groups ` +based on their scanning parameters that vary from those in their Key Groups' +:term:`Dominant Parameter Groups `. +Renaming is automatically suggested when the summary.tsv is generated from a ``cubids group`` run, +with the suggested new name listed in the tsv's :term:`Rename Key Group` column. +CuBIDS populates this column for all Variant Groups +(e.g., every Parameter Group except the Dominant one). +Specifically, CuBIDS will suggest renaming all non-dominant Parameter Group to include ``VARIANT*`` +in their acquisition field where ``*`` is the reason +the Parameter Group varies from the Dominant Group. +For example, when CuBIDS encounters a Parameter Group with a repetition time that varies from +the one present in the Dominant Group, +it will automatically suggest renaming all scans in that Variant Group to include +``acquisition-VARIANTRepetitionTime`` in their filenames. +When the user runs ``cubids apply``, +filenames will get renamed according to the auto-generated names in the “Rename Key Group” column +in the summary.tsv + Deleting a mistake -~~~~~~~~~~~~~~~~~~~~~~ +~~~~~~~~~~~~~~~~~~ -To remove files in a Parameter Group from your BIDS data, you simply set the ``MergeInto`` value -to ``0``. We see in our data that there is a strange scan that has a ``RepetitionTime`` of 12.3 -seconds and is also variant with respect to EffectiveEchoSpacing and EchoTime. We elect to remove this scan from -our dataset because we do not want these parameters to affect our analyses. -To remove these files from your BIDS data, add a ``0`` to ``MergeInto`` and save the new tsv as ``v0_edited_summary.tsv`` +To remove files in a Parameter Group from your BIDS data, +you simply set the ``MergeInto`` value to ``0``. +We see in our data that there is a strange scan that has a ``RepetitionTime`` of 12.3 +seconds and is also variant with respect to EffectiveEchoSpacing and EchoTime. +We elect to remove this scan from our dataset because we do not want these parameters to affect our +analyses. +To remove these files from your BIDS data, +add a ``0`` to ``MergeInto`` and save the new tsv as ``v0_edited_summary.tsv`` .. csv-table:: Pre Apply Groupings with Deletion Requested :file: _static/PNC_pre_apply_summary_dwi_run1_deletion.csv @@ -184,7 +213,7 @@ In this example, users can apply the changes to BIDS data using the following co .. code-block:: console - $ cubids-apply FULL/PATH/TO/BIDS/DIR FULL/PATH/TO/v0_edited_summary.tsv FULL/PATH/TO/v0_files.tsv FULL/PATH/TO/v1 + $ cubids apply FULL/PATH/TO/BIDS/DIR FULL/PATH/TO/v0_edited_summary.tsv FULL/PATH/TO/v0_files.tsv FULL/PATH/TO/v1 The changes in ``v0_edited_summary.tsv`` will be applied to the BIDS data and the new Key and Parameter Groups will be saved to tsv files starting with ``v1``. @@ -197,59 +226,54 @@ Applying these changes we would see: :header-rows: 1 -Command line tools -------------------- - -With that brief introduction done, we can introduce the full gamut -of ``CuBIDS`` command line tools: - -.. code-block:: bash - - cubids --help - -This will print the instructions for using the command line interface in your command line. - -.. argparse:: - :ref: cubids.cli._get_parser - :prog: cubids - :func: _get_parser - - Customizable configuration ---------------------------- +-------------------------- + ``CuBIDS`` also features an optional, customizable, MRI image type-specific configuration file. -This file can be passed as an argument to cubids-group and cubids-apply using the ``–-config`` flag -and allows users to customize grouping settings based on MRI image type and parameter. Each ``Key Group`` -is associated with one (and only one) MRI image type, as BIDS filenames include MRI image type-specific values -as their suffixes. This easy-to-modify configuration file provides several benefits to curation. +This file can be passed as an argument to ``cubids group`` and ``cubids apply`` +using the ``--config`` flag and allows users to customize grouping settings based on +MRI image type and parameter. +Each ``Key Group`` is associated with one (and only one) MRI image type, +as BIDS filenames include MRI image type-specific values as their suffixes. + +This easy-to-modify configuration file provides several benefits to curation. First, it allows users to add and remove metadata parameters from the set that determines groupings. This can be very useful if a user deems a specific metadata parameter irrelevant and wishes to collapse -variation based on that parameter into a single Parameter Group. Second, the configuration file allows -users to apply tolerances for parameters with numerical values. This functionality allows users to avoid -very small differences in scanning parameters (i.e., a TR of 3.0s vs 3.0001s) being split into different -``Parameter Groups``. Third, the configuration file allows users to determine which scanning parameters +variation based on that parameter into a single Parameter Group. +Second, the configuration file allows users to apply tolerances for parameters with numerical values. +This functionality allows users to avoid very small differences in scanning parameters +(i.e., a TR of 3.0s vs 3.0001s) +being split into different ``Parameter Groups``. +Third, the configuration file allows users to determine which scanning parameters are listed in the acquisition field when auto-renaming is applied to ``Variant Groups``. Exemplar testing ------------------ -In addition to facilitating curation of large, heterogeneous BIDS datasets, ``CuBIDS`` also prepares -datasets for testing BIDS Apps. This portion of the ``CuBIDS`` workflow relies on the concept of the -Acquisition Group: a set of sessions that have identical scan types and metadata across all imaging -modalities present in the session set. Specifically, ``cubids-copy-exemplars`` copies one subject from each -Acquisition Group into a separate directory, which we call an ``Exemplar Dataset``. Since the ``Exemplar Dataset`` -contains one randomly selected subject from each unique Acquisition Group in the dataset, it will be a -valid BIDS dataset that spans the entire metadata parameter space of the full study. If users run -``cubids-copy-exemplars`` with the ``–-use-datalad`` flag, the program will ensure that the ``Exemplar Dataset`` -is tracked and saved in ``DataLad``. If the user chooses to forgo this flag, the ``Exemplar Dataset`` -will be a standard directory located on the filesystem. Once the ``Exemplar Dataset`` has been created, -a user can test it with a BIDS App (e.g., fMRIPrep or QSIPrep) to ensure that each unique set of scanning -parameters will pass through the pipelines successfully. Because BIDS Apps auto-configure workflows based -on the metadata encountered, they will process all scans in each ``Acquisition Group`` in the same way. By -first verifying that BIDS Apps perform as intended on the small sub-sample of participants present in the -``Exemplar Dataset`` (that spans the full variation of the metadata), users can confidently move forward -processing the data of the complete BIDS dataset. - +---------------- + +In addition to facilitating curation of large, heterogeneous BIDS datasets, +``CuBIDS`` also prepares datasets for testing BIDS Apps. +This portion of the ``CuBIDS`` workflow relies on the concept of the Acquisition Group: +a set of sessions that have identical scan types and metadata across all imaging +modalities present in the session set. +Specifically, ``cubids copy-exemplars`` copies one subject from each +Acquisition Group into a separate directory, +which we call an ``Exemplar Dataset``. +Since the ``Exemplar Dataset`` contains one randomly selected subject from each unique +Acquisition Group in the dataset, +it will be a valid BIDS dataset that spans the entire metadata parameter space of the full study. +If users run ``cubids copy-exemplars`` with the ``--use-datalad`` flag, +the program will ensure that the ``Exemplar Dataset`` is tracked and saved in ``DataLad``. +If the user chooses to forgo this flag, +the ``Exemplar Dataset`` will be a standard directory located on the filesystem. +Once the ``Exemplar Dataset`` has been created, +a user can test it with a BIDS App (e.g., fMRIPrep or QSIPrep) +to ensure that each unique set of scanning parameters will pass through the pipelines successfully. +Because BIDS Apps auto-configure workflows based on the metadata encountered, +they will process all scans in each ``Acquisition Group`` in the same way. +By first verifying that BIDS Apps perform as intended on the small sub-sample of participants +present in the ``Exemplar Dataset`` (that spans the full variation of the metadata), +users can confidently move forward processing the data of the complete BIDS dataset. In the next section, we'll introduce ``DataLad`` and walk through a real example. diff --git a/pyproject.toml b/pyproject.toml index 5ee74c42f..2d9027c97 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -44,9 +44,12 @@ doc = [ "recommonmark", "sphinx >= 2.2", "sphinx-argparse", + "sphinx-copybutton", "sphinx_gallery", + "sphinx-hoverxref", "sphinx_markdown_tables", "sphinx_rtd_theme", + "sphinxcontrib-bibtex", ] tests = [ "codespell",