From 64b209766acd2d0cde27143f545adb6f92b463f4 Mon Sep 17 00:00:00 2001
From: Tien Tong <tien.tong98@gmail.com>
Date: Fri, 17 Jan 2025 11:48:29 -0500
Subject: [PATCH 01/10] Add is_longitudinal attribute to the CuBIDS class

---
 cubids/cubids.py           | 88 ++++++++++++++++++++++++++++++--------
 cubids/metadata_merge.py   | 36 ++++++++++------
 cubids/tests/test_apply.py | 27 +++++++-----
 cubids/tests/test_bond.py  |  7 +--
 cubids/workflows.py        |  4 ++
 5 files changed, 119 insertions(+), 43 deletions(-)

diff --git a/cubids/cubids.py b/cubids/cubids.py
index 158e97d05..3329444ff 100644
--- a/cubids/cubids.py
+++ b/cubids/cubids.py
@@ -84,6 +84,8 @@ class CuBIDS(object):
         A data dictionary for TSV outputs.
     use_datalad : :obj:`bool`
         If True, use datalad to track changes to the BIDS dataset.
+    is_longitudinal : :obj:`bool`
+        If True, includes "ses" in filepath. Default is False.
     """
 
     def __init__(
@@ -93,6 +95,7 @@ def __init__(
         acq_group_level="subject",
         grouping_config=None,
         force_unlock=False,
+        is_longitudinal=False,
     ):
         self.path = os.path.abspath(data_root)
         self._layout = None
@@ -110,10 +113,11 @@ def __init__(
         self.cubids_code_dir = Path(self.path + "/code/CuBIDS").is_dir()
         self.data_dict = {}  # data dictionary for TSV outputs
         self.use_datalad = use_datalad  # True if flag set, False if flag unset
+        self.is_longitudinal = is_longitudinal # True if flag set, False if flag unset
         if self.use_datalad:
             self.init_datalad()
 
-        if self.acq_group_level == "session":
+        if self.is_longitudinal and self.acq_group_level == "session":
             NON_KEY_ENTITIES.remove("session")
 
     @property
@@ -452,7 +456,7 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T
         # remove renames file that gets created under the hood
         subprocess.run(["rm", "-rf", "renames"])
 
-    def change_filename(self, filepath, entities):
+    def change_filename(self, filepath, entities, is_longitudinal=False):
         """Apply changes to a filename based on the renamed entity sets.
 
         This function takes into account the new entity set names
@@ -464,6 +468,8 @@ def change_filename(self, filepath, entities):
             Path prefix to a file in the affected entity set change.
         entities : :obj:`dict`
             A pybids dictionary of entities parsed from the new entity set name.
+        is_longitudinal : :obj:`bool`, optional
+            If True, includes "ses" in filepath. Default is False.
 
         Notes
         -----
@@ -473,6 +479,7 @@ def change_filename(self, filepath, entities):
             filepath=filepath,
             entities=entities,
             out_dir=str(self.path),
+            is_longitudinal=self.is_longitudinal
         )
 
         exts = Path(filepath).suffixes
@@ -481,7 +488,8 @@ def change_filename(self, filepath, entities):
         suffix = entities["suffix"]
 
         sub = get_entity_value(filepath, "sub")
-        ses = get_entity_value(filepath, "ses")
+        if self.is_longitudinal:
+            ses = get_entity_value(filepath, "ses")
 
         # Add the scan path + new path to the lists of old, new filenames
         self.old_filenames.append(filepath)
@@ -577,7 +585,10 @@ def change_filename(self, filepath, entities):
                 self.new_filenames.append(new_labeling)
 
         # RENAME INTENDED FORS!
-        ses_path = self.path + "/" + sub + "/" + ses
+        if self.is_longitudinal:
+            ses_path = self.path + "/" + sub + "/" + ses
+        elif not self.is_longitudinal:
+            ses_path = self.path + "/" + sub
         files_with_if = []
         files_with_if += Path(ses_path).rglob("fmap/*.json")
         files_with_if += Path(ses_path).rglob("perf/*_m0scan.json")
@@ -595,7 +606,7 @@ def change_filename(self, filepath, entities):
                 # Coerce IntendedFor to a list.
                 data["IntendedFor"] = listify(data["IntendedFor"])
                 for item in data["IntendedFor"]:
-                    if item == _get_participant_relative_path(filepath):
+                    if item == _get_participant_relative_path(filepath):                       
                         # remove old filename
                         data["IntendedFor"].remove(item)
                         # add new filename
@@ -1363,6 +1374,7 @@ def get_layout(self):
         return self.layout
 
 
+# XXX: Remove _validate_json?
 def _validate_json():
     """Validate a JSON file's contents.
 
@@ -1402,8 +1414,29 @@ def _get_participant_relative_path(scan):
 
     This is what will appear in the IntendedFor field of any association.
 
+    Examples:
+    >>> _get_participant_relative_path(
+    ...    "/path/to/dset/sub-01/ses-01/func/sub-01_ses-01_bold.nii.gz",
+    ... )
+    'ses-01/func/sub-01_ses-01_bold.nii.gz'
+    
+    >>> _get_participant_relative_path(
+    ...    "/path/to/dset/sub-01/func/sub-01_bold.nii.gz",
+    ... )
+    'func/sub-01_bold.nii.gz'
+    
+    >>> _get_participant_relative_path(
+    ...    "/path/to/dset/ses-01/func/ses-01_bold.nii.gz",
+    ... )
+    Traceback (most recent call last):
+    ValueError: Could not find subject in ...
     """
-    return "/".join(Path(scan).parts[-3:])
+    parts = Path(scan).parts
+    # Find the first part that starts with "sub-"
+    for i, part in enumerate(parts):
+        if part.startswith("sub-"):
+            return "/".join(parts[i+1:])
+    raise ValueError(f"Could not find subject in {scan}")
 
 
 def _get_bidsuri(filename, dataset_root):
@@ -1734,7 +1767,7 @@ def get_entity_value(path, key):
             return part
 
 
-def build_path(filepath, entities, out_dir):
+def build_path(filepath, entities, out_dir, is_longitudinal=False):
     """Build a new path for a file based on its BIDS entities.
 
     Parameters
@@ -1746,6 +1779,8 @@ def build_path(filepath, entities, out_dir):
         This should include all of the entities in the filename *except* for subject and session.
     out_dir : str
         The output directory for the new file.
+    is_longitudinal : bool, optional
+        If True, add "ses" to file path. Default is False.
 
     Returns
     -------
@@ -1758,6 +1793,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/anat/sub-01_ses-01_T1w.nii.gz",
     ...    {"acquisition": "VAR", "suffix": "T2w"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/anat/sub-01_ses-01_acq-VAR_T2w.nii.gz'
 
@@ -1766,6 +1802,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
     ...    {"task": "rest", "run": "2", "acquisition": "VAR", "suffix": "bold"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
 
@@ -1775,6 +1812,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-00001_bold.nii.gz",
     ...    {"task": "rest", "run": 2, "acquisition": "VAR", "suffix": "bold"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-00002_bold.nii.gz'
 
@@ -1784,6 +1822,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-1_bold.nii.gz",
     ...    {"task": "rest", "run": 2, "acquisition": "VAR", "suffix": "bold"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
 
@@ -1792,6 +1831,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-1_bold.nii.gz",
     ...    {"task": "rest", "run": "2", "acquisition": "VAR", "suffix": "bold"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
 
@@ -1801,6 +1841,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
     ...    {"task": "rest", "acquisition": "VAR", "suffix": "bold"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
 
@@ -1809,6 +1850,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
     ...    {"subject": "02", "task": "rest", "acquisition": "VAR", "suffix": "bold"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
 
@@ -1817,6 +1859,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
     ...    {"task": "rest", "acquisition": "VAR", "echo": 1, "suffix": "bold"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
 
@@ -1825,19 +1868,19 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/anat/sub-01_ses-01_asl.nii.gz",
     ...    {"datatype": "perf", "acquisition": "VAR", "suffix": "asl"},
     ...    "/output",
+    ...    True,
     ... )
     WARNING: DATATYPE CHANGE DETECTED
     '/output/sub-01/ses-01/perf/sub-01_ses-01_acq-VAR_asl.nii.gz'
 
-    It expects a longitudinal structure, so providing a cross-sectional filename won't work.
-    XXX: This is a bug.
+    It also works for cross-sectional filename.
     >>> build_path(
     ...    "/input/sub-01/func/sub-01_task-rest_run-01_bold.nii.gz",
-    ...    {"task": "rest", "acquisition": "VAR", "echo": 1, "suffix": "bold"},
+    ...    {"task": "rest", "acquisition": "VAR", "suffix": "bold"},
     ...    "/output",
+    ...    False,
     ... )
-    Traceback (most recent call last):
-    ValueError: Could not extract subject or session from ...
+    '/output/sub-01/func/sub-01_task-rest_acq-VAR_bold.nii.gz'
     """
     exts = Path(filepath).suffixes
     old_ext = "".join(exts)
@@ -1853,9 +1896,13 @@ def build_path(filepath, entities, out_dir):
             entity_file_keys.append(key)
 
     sub = get_entity_value(filepath, "sub")
-    ses = get_entity_value(filepath, "ses")
-    if sub is None or ses is None:
-        raise ValueError(f"Could not extract subject or session from {filepath}")
+    if sub is None:
+        raise ValueError(f"Could not extract subject from {filepath}")
+
+    if is_longitudinal:
+        ses = get_entity_value(filepath, "ses")
+        if ses is None:
+            raise ValueError(f"Could not extract session from {filepath}")
 
     # Add leading zeros to run entity if it's an integer.
     # If it's a string, respect the value provided.
@@ -1874,7 +1921,10 @@ def build_path(filepath, entities, out_dir):
         .replace("reconstruction", "rec")
     )
     if len(filename) > 0:
-        filename = f"{sub}_{ses}_{filename}_{suffix}{old_ext}"
+        if is_longitudinal:
+            filename = f"{sub}_{ses}_{filename}_{suffix}{old_ext}"
+        elif not is_longitudinal:
+            filename = f"{sub}_{filename}_{suffix}{old_ext}"
     else:
         raise ValueError(f"Could not construct new filename for {filepath}")
 
@@ -1894,5 +1944,9 @@ def build_path(filepath, entities, out_dir):
         dtype_new = dtype_orig
 
     # Construct the new filename
-    new_path = str(Path(out_dir) / sub / ses / dtype_new / filename)
+    if is_longitudinal:
+        new_path = str(Path(out_dir) / sub / ses / dtype_new / filename)
+    elif not is_longitudinal:
+        new_path = str(Path(out_dir) / sub / dtype_new / filename)
+
     return new_path
diff --git a/cubids/metadata_merge.py b/cubids/metadata_merge.py
index 6562f35b7..0779db090 100644
--- a/cubids/metadata_merge.py
+++ b/cubids/metadata_merge.py
@@ -243,13 +243,13 @@ def merge_json_into_json(from_file, to_file, raise_on_error=False):
     return 0
 
 
-def get_acq_dictionary():
+def get_acq_dictionary(is_longitudinal=False):
     """Create a BIDS data dictionary from dataframe columns.
 
     Parameters
     ----------
-    df : :obj:`pandas.DataFrame`
-        Pre export TSV that will be converted to a json dictionary.
+    is_longitudinal : :obj:`bool`, optional
+       If True, add "session" to acq_dict. Default is False.
 
     Returns
     -------
@@ -258,7 +258,8 @@ def get_acq_dictionary():
     """
     acq_dict = {}
     acq_dict["subject"] = {"Description": "Participant ID"}
-    acq_dict["session"] = {"Description": "Session ID"}
+    if is_longitudinal:
+        acq_dict["session"] = {"Description": "Session ID"}
     docs = " https://cubids.readthedocs.io/en/latest/about.html#definitions"
     desc = "Acquisition Group. See Read the Docs for more information"
     acq_dict["AcqGroup"] = {"Description": desc + docs}
@@ -266,7 +267,7 @@ def get_acq_dictionary():
     return acq_dict
 
 
-def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
+def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level, is_longitudinal=False):
     """Find unique sets of Key/Param groups across subjects.
 
     This writes out the following files:
@@ -284,6 +285,8 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
         Prefix for output files.
     acq_group_level : {"subject", "session"}
         Level at which to group acquisitions.
+    is_longitudinal : :obj:`bool`, optional
+       If True, add "session" to acq_dict. Default is False.
     """
     from bids import config
     from bids.layout import parse_file_entities
@@ -298,9 +301,12 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
         file_entities = parse_file_entities(row.FilePath)
 
         if acq_group_level == "subject":
-            acq_id = (file_entities.get("subject"), file_entities.get("session"))
+            if is_longitudinal:
+                acq_id = (file_entities.get("subject"), file_entities.get("session"))
+            elif not is_longitudinal:
+                acq_id = (file_entities.get("subject"))
             acq_groups[acq_id].append((row.EntitySet, row.ParamGroup))
-        else:
+        elif is_longitudinal and acq_group_level == "session":
             acq_id = (file_entities.get("subject"), None)
             acq_groups[acq_id].append(
                 (row.EntitySet, row.ParamGroup, file_entities.get("session"))
@@ -326,17 +332,23 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
     for groupnum, content_id_row in enumerate(descending_order, start=1):
         content_id = content_ids[content_id_row]
         acq_group_info.append((groupnum, content_id_counts[content_id_row]) + content_id)
-        for subject, session in contents_to_subjects[content_id]:
-            grouped_sub_sess.append(
-                {"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum}
-            )
+        if is_longitudinal:
+            for subject, session in contents_to_subjects[content_id]:
+                grouped_sub_sess.append(
+                    {"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum}
+                )
+        elif not is_longitudinal:
+            for subject in contents_to_subjects[content_id]:
+                grouped_sub_sess.append(
+                    {"subject": "sub-" + subject, "AcqGroup": groupnum}
+                )
 
     # Write the mapping of subject/session to
     acq_group_df = pd.DataFrame(grouped_sub_sess)
     acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", sep="\t", index=False)
 
     # Create data dictionary for acq group tsv
-    acq_dict = get_acq_dictionary()
+    acq_dict = get_acq_dictionary(is_longitudinal)
     with open(output_prefix + "_AcqGrouping.json", "w") as outfile:
         json.dump(acq_dict, outfile, indent=4)
 
diff --git a/cubids/tests/test_apply.py b/cubids/tests/test_apply.py
index ba92b6034..ebe67818b 100644
--- a/cubids/tests/test_apply.py
+++ b/cubids/tests/test_apply.py
@@ -237,33 +237,35 @@ def summary_data():
 
 
 @pytest.mark.parametrize(
-    ("name", "skeleton", "intended_for", "expected"),
+    ("name", "skeleton", "intended_for", "is_longitudinal", "expected"),
     [
-        (
+        (   # doesn't have acq-VAR
             "relpath_long",
             relpath_intendedfor_long,
             "ses-01/dwi/sub-01_ses-01_acq-VAR_dir-AP_run-01_dwi.nii.gz",
+            True,
             "pass",
         ),
-        (
+        (   # doesn't have ses-01
             "bidsuri_long",
             bidsuri_intendedfor_long,
             "bids::sub-01/ses-01/dwi/sub-01_ses-01_acq-VAR_dir-AP_run-01_dwi.nii.gz",
+            True,       
             "pass",
         ),
-        (
+        (   # doesn't have acq-VAR
             "relpath_cs",
             relpath_intendedfor_cs,
-            # XXX: CuBIDS enforces longitudinal dataset, so this fails.
             "dwi/sub-01_acq-VAR_dir-AP_run-01_dwi.nii.gz",
-            ValueError,
+            False,
+            "pass",
         ),
-        (
+        (   # pass
             "bidsuri_cs",
             bidsuri_intendedfor_cs,
-            # XXX: CuBIDS enforces longitudinal dataset, so this fails.
             "bids::sub-01/dwi/sub-01_acq-VAR_dir-AP_run-01_dwi.nii.gz",
-            ValueError,
+            False,
+            "pass",
         ),
     ],
 )
@@ -274,6 +276,7 @@ def test_cubids_apply_intendedfor(
     name,
     skeleton,
     intended_for,
+    is_longitudinal,
     expected,
 ):
     """Test cubids apply with different IntendedFor types.
@@ -308,7 +311,7 @@ def test_cubids_apply_intendedfor(
     bids_dir = tmpdir / name
     generate_bids_skeleton(str(bids_dir), skeleton)
 
-    if "long" in name:
+    if is_longitudinal:
         fdata = files_data["longitudinal"]
         fmap_json = bids_dir / "sub-01/ses-01/fmap/sub-01_ses-01_dir-AP_epi.json"
     else:
@@ -336,6 +339,7 @@ def test_cubids_apply_intendedfor(
             files_tsv=files_tsv,
             new_tsv_prefix=None,
             container=None,
+            is_longitudinal=is_longitudinal,
         )
 
         with open(fmap_json) as f:
@@ -353,4 +357,5 @@ def test_cubids_apply_intendedfor(
                 files_tsv=files_tsv,
                 new_tsv_prefix=None,
                 container=None,
-            )
+                is_longitudinal=is_longitudinal,
+            )
\ No newline at end of file
diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py
index a4da48a23..fdcde2eee 100644
--- a/cubids/tests/test_bond.py
+++ b/cubids/tests/test_bond.py
@@ -489,7 +489,7 @@ def test_tsv_merge_changes(tmp_path):
         The temporary path where the test data will be copied.
     """
     data_root = get_data(tmp_path)
-    bod = CuBIDS(data_root / "inconsistent", use_datalad=True)
+    bod = CuBIDS(data_root / "inconsistent", use_datalad=True, is_longitudinal=True)
     bod.datalad_save()
     assert bod.is_datalad_clean()
 
@@ -946,7 +946,8 @@ def test_session_apply(tmp_path):
 
     data_root = get_data(tmp_path)
 
-    ses_cubids = CuBIDS(data_root / "inconsistent", acq_group_level="session", use_datalad=True)
+    ses_cubids = CuBIDS(data_root / "inconsistent", acq_group_level="session",
+                         use_datalad=True, is_longitudinal=True)
 
     ses_cubids.get_tsvs(str(tmp_path / "originals"))
 
@@ -1193,6 +1194,7 @@ def test_bids_version(tmp_path):
     ), f"Schema version {schema_version} is less than minimum {min_schema_version}"
 
 
+<<<<<<< HEAD
 def test_docker():
     """Verify that docker is installed and the user has permission to run docker images.
 
@@ -1218,7 +1220,6 @@ def test_docker():
         return_status = 0
     assert return_status
 
-
 # def test_image(image='pennlinc/bond:latest'):
 #     """Check whether image is present on local system."""
 #     ret = subprocess.run(['docker', 'images', '-q', image],
diff --git a/cubids/workflows.py b/cubids/workflows.py
index c09366d1e..07d03bfe9 100644
--- a/cubids/workflows.py
+++ b/cubids/workflows.py
@@ -432,6 +432,7 @@ def apply(
     files_tsv,
     new_tsv_prefix,
     container,
+    is_longitudinal=False,
 ):
     """Apply the tsv changes.
 
@@ -453,6 +454,8 @@ def apply(
         Path to the new tsv prefix.
     container : :obj:`str`
         Container in which to run the workflow.
+    is_longitudinal : :obj:`bool`
+        If True, includes "ses" in filepath. Default is False.
     """
     # Run directly from python using
     if container is None:
@@ -461,6 +464,7 @@ def apply(
             use_datalad=use_datalad,
             acq_group_level=acq_group_level,
             grouping_config=config,
+            is_longitudinal=is_longitudinal,
         )
         if use_datalad:
             if not bod.is_datalad_clean():

From 8f1b3c4cd966f287cd1f843d1a09bdfdbfe12fc3 Mon Sep 17 00:00:00 2001
From: Tien Tong <tien.tong98@gmail.com>
Date: Fri, 17 Jan 2025 13:08:32 -0500
Subject: [PATCH 02/10] fix lint issues

---
 cubids/config.py           |  1 +
 cubids/cubids.py           | 12 ++++++------
 cubids/metadata_merge.py   |  6 ++----
 cubids/tests/test_apply.py | 12 ++++++------
 cubids/tests/test_bond.py  | 34 ++++++----------------------------
 5 files changed, 21 insertions(+), 44 deletions(-)

diff --git a/cubids/config.py b/cubids/config.py
index a863d4eab..6c9b67887 100644
--- a/cubids/config.py
+++ b/cubids/config.py
@@ -4,6 +4,7 @@
 import importlib.resources
 import yaml
 
+
 def load_config(config_file):
     """Load a YAML file containing a configuration for param groups.
 
diff --git a/cubids/cubids.py b/cubids/cubids.py
index 3329444ff..3dd554ef5 100644
--- a/cubids/cubids.py
+++ b/cubids/cubids.py
@@ -113,7 +113,7 @@ def __init__(
         self.cubids_code_dir = Path(self.path + "/code/CuBIDS").is_dir()
         self.data_dict = {}  # data dictionary for TSV outputs
         self.use_datalad = use_datalad  # True if flag set, False if flag unset
-        self.is_longitudinal = is_longitudinal # True if flag set, False if flag unset
+        self.is_longitudinal = is_longitudinal  # True if flag set, False if flag unset
         if self.use_datalad:
             self.init_datalad()
 
@@ -479,7 +479,7 @@ def change_filename(self, filepath, entities, is_longitudinal=False):
             filepath=filepath,
             entities=entities,
             out_dir=str(self.path),
-            is_longitudinal=self.is_longitudinal
+            is_longitudinal=self.is_longitudinal,
         )
 
         exts = Path(filepath).suffixes
@@ -606,7 +606,7 @@ def change_filename(self, filepath, entities, is_longitudinal=False):
                 # Coerce IntendedFor to a list.
                 data["IntendedFor"] = listify(data["IntendedFor"])
                 for item in data["IntendedFor"]:
-                    if item == _get_participant_relative_path(filepath):                       
+                    if item == _get_participant_relative_path(filepath):
                         # remove old filename
                         data["IntendedFor"].remove(item)
                         # add new filename
@@ -1419,12 +1419,12 @@ def _get_participant_relative_path(scan):
     ...    "/path/to/dset/sub-01/ses-01/func/sub-01_ses-01_bold.nii.gz",
     ... )
     'ses-01/func/sub-01_ses-01_bold.nii.gz'
-    
+
     >>> _get_participant_relative_path(
     ...    "/path/to/dset/sub-01/func/sub-01_bold.nii.gz",
     ... )
     'func/sub-01_bold.nii.gz'
-    
+
     >>> _get_participant_relative_path(
     ...    "/path/to/dset/ses-01/func/ses-01_bold.nii.gz",
     ... )
@@ -1435,7 +1435,7 @@ def _get_participant_relative_path(scan):
     # Find the first part that starts with "sub-"
     for i, part in enumerate(parts):
         if part.startswith("sub-"):
-            return "/".join(parts[i+1:])
+            return "/".join(parts[i + 1 :])
     raise ValueError(f"Could not find subject in {scan}")
 
 
diff --git a/cubids/metadata_merge.py b/cubids/metadata_merge.py
index 0779db090..3dc02bbf0 100644
--- a/cubids/metadata_merge.py
+++ b/cubids/metadata_merge.py
@@ -304,7 +304,7 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level, is_long
             if is_longitudinal:
                 acq_id = (file_entities.get("subject"), file_entities.get("session"))
             elif not is_longitudinal:
-                acq_id = (file_entities.get("subject"))
+                acq_id = file_entities.get("subject")
             acq_groups[acq_id].append((row.EntitySet, row.ParamGroup))
         elif is_longitudinal and acq_group_level == "session":
             acq_id = (file_entities.get("subject"), None)
@@ -339,9 +339,7 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level, is_long
                 )
         elif not is_longitudinal:
             for subject in contents_to_subjects[content_id]:
-                grouped_sub_sess.append(
-                    {"subject": "sub-" + subject, "AcqGroup": groupnum}
-                )
+                grouped_sub_sess.append({"subject": "sub-" + subject, "AcqGroup": groupnum})
 
     # Write the mapping of subject/session to
     acq_group_df = pd.DataFrame(grouped_sub_sess)
diff --git a/cubids/tests/test_apply.py b/cubids/tests/test_apply.py
index ebe67818b..a44ebf451 100644
--- a/cubids/tests/test_apply.py
+++ b/cubids/tests/test_apply.py
@@ -239,28 +239,28 @@ def summary_data():
 @pytest.mark.parametrize(
     ("name", "skeleton", "intended_for", "is_longitudinal", "expected"),
     [
-        (   # doesn't have acq-VAR
+        (  # doesn't have acq-VAR
             "relpath_long",
             relpath_intendedfor_long,
             "ses-01/dwi/sub-01_ses-01_acq-VAR_dir-AP_run-01_dwi.nii.gz",
             True,
             "pass",
         ),
-        (   # doesn't have ses-01
+        (  # doesn't have ses-01
             "bidsuri_long",
             bidsuri_intendedfor_long,
             "bids::sub-01/ses-01/dwi/sub-01_ses-01_acq-VAR_dir-AP_run-01_dwi.nii.gz",
-            True,       
+            True,
             "pass",
         ),
-        (   # doesn't have acq-VAR
+        (  # doesn't have acq-VAR
             "relpath_cs",
             relpath_intendedfor_cs,
             "dwi/sub-01_acq-VAR_dir-AP_run-01_dwi.nii.gz",
             False,
             "pass",
         ),
-        (   # pass
+        (  # pass
             "bidsuri_cs",
             bidsuri_intendedfor_cs,
             "bids::sub-01/dwi/sub-01_acq-VAR_dir-AP_run-01_dwi.nii.gz",
@@ -358,4 +358,4 @@ def test_cubids_apply_intendedfor(
                 new_tsv_prefix=None,
                 container=None,
                 is_longitudinal=is_longitudinal,
-            )
\ No newline at end of file
+            )
diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py
index fdcde2eee..bc57593c0 100644
--- a/cubids/tests/test_bond.py
+++ b/cubids/tests/test_bond.py
@@ -946,8 +946,12 @@ def test_session_apply(tmp_path):
 
     data_root = get_data(tmp_path)
 
-    ses_cubids = CuBIDS(data_root / "inconsistent", acq_group_level="session",
-                         use_datalad=True, is_longitudinal=True)
+    ses_cubids = CuBIDS(
+        data_root / "inconsistent",
+        acq_group_level="session",
+        use_datalad=True,
+        is_longitudinal=True,
+    )
 
     ses_cubids.get_tsvs(str(tmp_path / "originals"))
 
@@ -1194,32 +1198,6 @@ def test_bids_version(tmp_path):
     ), f"Schema version {schema_version} is less than minimum {min_schema_version}"
 
 
-<<<<<<< HEAD
-def test_docker():
-    """Verify that docker is installed and the user has permission to run docker images.
-
-    Returns
-    -------
-    int
-        -1 if Docker can't be found.
-         0 if Docker is found, but the user can't connect to the daemon.
-         1 if the test run is OK.
-    """
-    try:
-        return_status = 1
-        ret = subprocess.run(["docker", "version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
-    except OSError as e:
-        from errno import ENOENT
-
-        if e.errno == ENOENT:
-            print("Cannot find Docker engine!")
-            return_status = 0
-        raise e
-    if ret.stderr.startswith(b"Cannot connect to the Docker daemon."):
-        print("Cannot connect to Docker daemon!")
-        return_status = 0
-    assert return_status
-
 # def test_image(image='pennlinc/bond:latest'):
 #     """Check whether image is present on local system."""
 #     ret = subprocess.run(['docker', 'images', '-q', image],

From 4aacea48e7a9493aec2d7902c563dd3072d15940 Mon Sep 17 00:00:00 2001
From: Tien Tong <tien.tong98@gmail.com>
Date: Fri, 17 Jan 2025 13:16:01 -0500
Subject: [PATCH 03/10] still fixing lint issues

---
 cubids/cubids.py           | 5 +----
 cubids/tests/test_apply.py | 8 ++++----
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/cubids/cubids.py b/cubids/cubids.py
index 3dd554ef5..a985d695c 100644
--- a/cubids/cubids.py
+++ b/cubids/cubids.py
@@ -456,7 +456,7 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T
         # remove renames file that gets created under the hood
         subprocess.run(["rm", "-rf", "renames"])
 
-    def change_filename(self, filepath, entities, is_longitudinal=False):
+    def change_filename(self, filepath, entities):
         """Apply changes to a filename based on the renamed entity sets.
 
         This function takes into account the new entity set names
@@ -468,8 +468,6 @@ def change_filename(self, filepath, entities, is_longitudinal=False):
             Path prefix to a file in the affected entity set change.
         entities : :obj:`dict`
             A pybids dictionary of entities parsed from the new entity set name.
-        is_longitudinal : :obj:`bool`, optional
-            If True, includes "ses" in filepath. Default is False.
 
         Notes
         -----
@@ -479,7 +477,6 @@ def change_filename(self, filepath, entities, is_longitudinal=False):
             filepath=filepath,
             entities=entities,
             out_dir=str(self.path),
-            is_longitudinal=self.is_longitudinal,
         )
 
         exts = Path(filepath).suffixes
diff --git a/cubids/tests/test_apply.py b/cubids/tests/test_apply.py
index a44ebf451..3f0e2b5af 100644
--- a/cubids/tests/test_apply.py
+++ b/cubids/tests/test_apply.py
@@ -239,28 +239,28 @@ def summary_data():
 @pytest.mark.parametrize(
     ("name", "skeleton", "intended_for", "is_longitudinal", "expected"),
     [
-        (  # doesn't have acq-VAR
+        (
             "relpath_long",
             relpath_intendedfor_long,
             "ses-01/dwi/sub-01_ses-01_acq-VAR_dir-AP_run-01_dwi.nii.gz",
             True,
             "pass",
         ),
-        (  # doesn't have ses-01
+        (
             "bidsuri_long",
             bidsuri_intendedfor_long,
             "bids::sub-01/ses-01/dwi/sub-01_ses-01_acq-VAR_dir-AP_run-01_dwi.nii.gz",
             True,
             "pass",
         ),
-        (  # doesn't have acq-VAR
+        (
             "relpath_cs",
             relpath_intendedfor_cs,
             "dwi/sub-01_acq-VAR_dir-AP_run-01_dwi.nii.gz",
             False,
             "pass",
         ),
-        (  # pass
+        (
             "bidsuri_cs",
             bidsuri_intendedfor_cs,
             "bids::sub-01/dwi/sub-01_acq-VAR_dir-AP_run-01_dwi.nii.gz",

From fe68deaee83d1eed809e387683b71f1d5d35ba80 Mon Sep 17 00:00:00 2001
From: Taylor Salo <salot@pennmedicine.upenn.edu>
Date: Fri, 17 Jan 2025 13:37:35 -0500
Subject: [PATCH 04/10] Update cubids.py

---
 cubids/cubids.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/cubids/cubids.py b/cubids/cubids.py
index a985d695c..d6ca6818d 100644
--- a/cubids/cubids.py
+++ b/cubids/cubids.py
@@ -477,6 +477,7 @@ def change_filename(self, filepath, entities):
             filepath=filepath,
             entities=entities,
             out_dir=str(self.path),
+            is_longitudinal=self.is_longitudinal,
         )
 
         exts = Path(filepath).suffixes
@@ -608,6 +609,7 @@ def change_filename(self, filepath, entities):
                         data["IntendedFor"].remove(item)
                         # add new filename
                         data["IntendedFor"].append(_get_participant_relative_path(new_path))
+
                     if item == _get_bidsuri(filepath, self.path):
                         # remove old filename
                         data["IntendedFor"].remove(item)

From 1dcde70d5c3e1fa0e406bdebe849a085f6d97421 Mon Sep 17 00:00:00 2001
From: Tien Tong <tien.tong98@gmail.com>
Date: Fri, 17 Jan 2025 14:39:47 -0500
Subject: [PATCH 05/10] unset default value for is_longitudinal and add a
 method to infer is_longitudinal from data structure

---
 cubids/cubids.py           | 18 ++++++++++++------
 cubids/tests/test_apply.py |  4 ++--
 cubids/tests/test_bond.py  |  3 +--
 cubids/workflows.py        |  4 ----
 4 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/cubids/cubids.py b/cubids/cubids.py
index d6ca6818d..f958f04b3 100644
--- a/cubids/cubids.py
+++ b/cubids/cubids.py
@@ -85,7 +85,7 @@ class CuBIDS(object):
     use_datalad : :obj:`bool`
         If True, use datalad to track changes to the BIDS dataset.
     is_longitudinal : :obj:`bool`
-        If True, includes "ses" in filepath. Default is False.
+        If True, adds "ses" in filepath.
     """
 
     def __init__(
@@ -95,7 +95,6 @@ def __init__(
         acq_group_level="subject",
         grouping_config=None,
         force_unlock=False,
-        is_longitudinal=False,
     ):
         self.path = os.path.abspath(data_root)
         self._layout = None
@@ -113,12 +112,15 @@ def __init__(
         self.cubids_code_dir = Path(self.path + "/code/CuBIDS").is_dir()
         self.data_dict = {}  # data dictionary for TSV outputs
         self.use_datalad = use_datalad  # True if flag set, False if flag unset
-        self.is_longitudinal = is_longitudinal  # True if flag set, False if flag unset
+        self.is_longitudinal = self._infer_longitudinal()  # inferred from dataset structure
+
         if self.use_datalad:
             self.init_datalad()
 
         if self.is_longitudinal and self.acq_group_level == "session":
             NON_KEY_ENTITIES.remove("session")
+        elif not self.is_longitudinal and self.acq_group_level == "session":
+            raise ValueError('Data is not longitudinal, so "session" is not a valid grouping level.')
 
     @property
     def layout(self):
@@ -132,6 +134,10 @@ def layout(self):
             # print("LAYOUT OBJECT SET")
         return self._layout
 
+    def _infer_longitudinal(self):
+        """Infer if the dataset is longitudinal based on its structure."""
+        return any("ses-" in str(f) for f in Path(self.path).rglob("*"))
+
     def reset_bids_layout(self, validate=False):
         """Reset the BIDS layout.
 
@@ -1766,7 +1772,7 @@ def get_entity_value(path, key):
             return part
 
 
-def build_path(filepath, entities, out_dir, is_longitudinal=False):
+def build_path(filepath, entities, out_dir, is_longitudinal):
     """Build a new path for a file based on its BIDS entities.
 
     Parameters
@@ -1778,8 +1784,8 @@ def build_path(filepath, entities, out_dir, is_longitudinal=False):
         This should include all of the entities in the filename *except* for subject and session.
     out_dir : str
         The output directory for the new file.
-    is_longitudinal : bool, optional
-        If True, add "ses" to file path. Default is False.
+    is_longitudinal : bool
+        If True, add "ses" to file path.
 
     Returns
     -------
diff --git a/cubids/tests/test_apply.py b/cubids/tests/test_apply.py
index 3f0e2b5af..51afa64f0 100644
--- a/cubids/tests/test_apply.py
+++ b/cubids/tests/test_apply.py
@@ -295,6 +295,8 @@ def test_cubids_apply_intendedfor(
         BIDS skeleton structure.
     intended_for : str
         IntendedFor field value.
+    is_longitudinal : bool
+        Indicate whether the data structure is longitudinal or cross-sectional.
     expected : str or Exception
         Expected result or exception.
 
@@ -339,7 +341,6 @@ def test_cubids_apply_intendedfor(
             files_tsv=files_tsv,
             new_tsv_prefix=None,
             container=None,
-            is_longitudinal=is_longitudinal,
         )
 
         with open(fmap_json) as f:
@@ -357,5 +358,4 @@ def test_cubids_apply_intendedfor(
                 files_tsv=files_tsv,
                 new_tsv_prefix=None,
                 container=None,
-                is_longitudinal=is_longitudinal,
             )
diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py
index bc57593c0..9f7f5b399 100644
--- a/cubids/tests/test_bond.py
+++ b/cubids/tests/test_bond.py
@@ -489,7 +489,7 @@ def test_tsv_merge_changes(tmp_path):
         The temporary path where the test data will be copied.
     """
     data_root = get_data(tmp_path)
-    bod = CuBIDS(data_root / "inconsistent", use_datalad=True, is_longitudinal=True)
+    bod = CuBIDS(data_root / "inconsistent", use_datalad=True)
     bod.datalad_save()
     assert bod.is_datalad_clean()
 
@@ -950,7 +950,6 @@ def test_session_apply(tmp_path):
         data_root / "inconsistent",
         acq_group_level="session",
         use_datalad=True,
-        is_longitudinal=True,
     )
 
     ses_cubids.get_tsvs(str(tmp_path / "originals"))
diff --git a/cubids/workflows.py b/cubids/workflows.py
index 07d03bfe9..c09366d1e 100644
--- a/cubids/workflows.py
+++ b/cubids/workflows.py
@@ -432,7 +432,6 @@ def apply(
     files_tsv,
     new_tsv_prefix,
     container,
-    is_longitudinal=False,
 ):
     """Apply the tsv changes.
 
@@ -454,8 +453,6 @@ def apply(
         Path to the new tsv prefix.
     container : :obj:`str`
         Container in which to run the workflow.
-    is_longitudinal : :obj:`bool`
-        If True, includes "ses" in filepath. Default is False.
     """
     # Run directly from python using
     if container is None:
@@ -464,7 +461,6 @@ def apply(
             use_datalad=use_datalad,
             acq_group_level=acq_group_level,
             grouping_config=config,
-            is_longitudinal=is_longitudinal,
         )
         if use_datalad:
             if not bod.is_datalad_clean():

From 9db0657697736095f606e8daa96d8d461652673a Mon Sep 17 00:00:00 2001
From: Tien Tong <tien.tong98@gmail.com>
Date: Fri, 17 Jan 2025 14:43:10 -0500
Subject: [PATCH 06/10] fix lint issues

---
 cubids/cubids.py | 4 +++-
 docs/conf.py     | 6 ++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/cubids/cubids.py b/cubids/cubids.py
index f958f04b3..b666656ac 100644
--- a/cubids/cubids.py
+++ b/cubids/cubids.py
@@ -120,7 +120,9 @@ def __init__(
         if self.is_longitudinal and self.acq_group_level == "session":
             NON_KEY_ENTITIES.remove("session")
         elif not self.is_longitudinal and self.acq_group_level == "session":
-            raise ValueError('Data is not longitudinal, so "session" is not a valid grouping level.')
+            raise ValueError(
+                'Data is not longitudinal, so "session" is not a valid grouping level.'
+            )
 
     @property
     def layout(self):
diff --git a/docs/conf.py b/docs/conf.py
index 5512a64d6..9da1b27fb 100755
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -56,7 +56,7 @@
     "sphinx_gallery.load_style",
     "sphinxarg.ext",  # argparse extension
     "sphinxcontrib.bibtex",  # bibtex-based bibliographies
-    "sphinx_design",   # for adding in-line badges etc
+    "sphinx_design",  # for adding in-line badges etc
 ]
 
 # Mock modules in autodoc:
@@ -266,4 +266,6 @@
 # -----------------------------------------------------------------------------
 # Configuration for sphinx_copybutton to remove shell prompts, i.e. $
 copybutton_prompt_text = "$ "
-copybutton_only_copy_prompt_lines = False  # ensures all lines are copied, even those without a prompt
\ No newline at end of file
+copybutton_only_copy_prompt_lines = (
+    False  # ensures all lines are copied, even those without a prompt
+)

From ad265ef8c585840fb7bda6bbf079fdc504fee181 Mon Sep 17 00:00:00 2001
From: Tien Tong <tien.tong98@gmail.com>
Date: Fri, 17 Jan 2025 14:56:42 -0500
Subject: [PATCH 07/10] remove is_longitudinal from CuBIDS class docstring as
 Taylor suggested

---
 cubids/cubids.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/cubids/cubids.py b/cubids/cubids.py
index b666656ac..8cbb4dc49 100644
--- a/cubids/cubids.py
+++ b/cubids/cubids.py
@@ -84,8 +84,6 @@ class CuBIDS(object):
         A data dictionary for TSV outputs.
     use_datalad : :obj:`bool`
         If True, use datalad to track changes to the BIDS dataset.
-    is_longitudinal : :obj:`bool`
-        If True, adds "ses" in filepath.
     """
 
     def __init__(

From ac11eb3ad16df4fa8967721c8eec2b0abe6dc83c Mon Sep 17 00:00:00 2001
From: singlesp <sps253@cornell.edu>
Date: Fri, 17 Jan 2025 15:44:45 -0500
Subject: [PATCH 08/10] DOCSTRINGS

---
 cubids/cubids.py | 443 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 405 insertions(+), 38 deletions(-)

diff --git a/cubids/cubids.py b/cubids/cubids.py
index 8cbb4dc49..00d0b76c1 100644
--- a/cubids/cubids.py
+++ b/cubids/cubids.py
@@ -1,4 +1,8 @@
-"""Main module."""
+"""Main module for CuBIDS.
+
+This module provides the core functionalities of the CuBIDS package, including
+operations for handling BIDS datasets, clustering, and metadata merging.
+"""
 
 import csv
 import json
@@ -126,7 +130,10 @@ def __init__(
     def layout(self):
         """Return the BIDSLayout object.
 
-        If the BIDSLayout object has not been created, create it.
+        Returns
+        -------
+        BIDSLayout
+            The BIDSLayout object associated with the current instance.
         """
         if self._layout is None:
             # print("SETTING LAYOUT OBJECT")
@@ -135,7 +142,18 @@ def layout(self):
         return self._layout
 
     def _infer_longitudinal(self):
-        """Infer if the dataset is longitudinal based on its structure."""
+        """Infer if the dataset is longitudinal based on its structure.
+
+        This method checks if any file or directory within the dataset path
+        contains the substring "ses-" in its name, which is a common convention
+        used to denote session identifiers in longitudinal datasets.
+
+        Returns
+        -------
+        bool
+            True if the dataset is longitudinal (i.e., contains session identifiers),
+            False otherwise.
+        """
         return any("ses-" in str(f) for f in Path(self.path).rglob("*"))
 
     def reset_bids_layout(self, validate=False):
@@ -185,11 +203,24 @@ def create_cubids_code_dir(self):
         return self.cubids_code_dir
 
     def init_datalad(self):
-        """Initialize a datalad Dataset at self.path.
+        """Initialize a datalad Dataset at the specified path.
+
+        This method creates a datalad dataset at the path specified by `self.path`.
+        It sets the `datalad_ready` attribute to True and assigns the datalad.Dataset
+        object to the `datalad_handle` attribute.
+
+        Attributes
+        ----------
+        datalad_ready : bool
+            Indicates whether the datalad dataset has been successfully initialized.
+        datalad_handle : datalad.api.Dataset
+            The datalad dataset object associated with the specified path.
 
-        This creates a datalad dataset at self.path and sets the
-        ``datalad_ready`` attribute to True.
-        It also sets the ``datalad_handle`` attribute to the datalad.Dataset object.
+        Notes
+        -----
+        If the dataset is not already installed at the specified path, this method
+        will create a new dataset with the configuration process "text2git" and
+        enable the annex feature.
         """
         self.datalad_ready = True
 
@@ -202,13 +233,19 @@ def init_datalad(self):
     def datalad_save(self, message=None):
         """Perform a DataLad Save operation on the BIDS tree.
 
-        Additionally a check for an active datalad handle and that the
-        status of all objects after the save is "ok".
+        This method checks for an active DataLad handle and ensures that the
+        status of all objects after the save operation is "ok".
+
+        Parameters
+        ----------
+        message : str or None, optional
+            Commit message to use with DataLad save. If None, a default message
+            "CuBIDS Save" will be used.
 
-        Parameters:
-        -----------
-        message : str or None
-            Commit message to use with datalad save.
+        Raises
+        ------
+        Exception
+            If DataLad has not been initialized or if the save operation fails.
         """
         if not self.datalad_ready:
             raise Exception("DataLad has not been initialized. use datalad_init()")
@@ -252,7 +289,34 @@ def datalad_undo_last_commit(self):
         reset_proc.check_returncode()
 
     def add_nifti_info(self):
-        """Add info from nifti files to json sidecars."""
+        """
+        Add information from NIfTI files to their corresponding JSON sidecars.
+
+        This method processes all NIfTI files in the BIDS directory specified by `self.path`.
+        It extracts relevant metadata from each NIfTI file and updates the corresponding JSON
+        sidecar files with this information. If `self.force_unlock` is set, it will unlock
+        the dataset using `datalad` before processing the files.
+
+        Metadata added to the JSON sidecars includes:
+        - Obliquity
+        - Voxel sizes (dimensions 1, 2, and 3)
+        - Matrix dimensions (sizes of dimensions 1, 2, and 3)
+        - Number of volumes (for 4D images)
+        - Image orientation
+
+        If `self.use_datalad` is set, the changes will be saved using `datalad`.
+
+        Raises
+        ------
+        Exception
+            If there is an error loading a NIfTI file or parsing a JSON sidecar file.
+
+        Notes
+        -----
+        - This method assumes that the NIfTI files are organized in a BIDS-compliant directory structure.
+        - The method will skip any files in hidden directories (directories starting with a dot).
+        - If a JSON sidecar file does not exist for a NIfTI file, it will be skipped.
+        """
         # check if force_unlock is set
         if self.force_unlock:
             # CHANGE TO SUBPROCESS.CALL IF NOT BLOCKING
@@ -370,7 +434,6 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T
                     merge_commands.append(f"bids-sidecar-merge {source_json} {dest_json}")
 
         # Get the delete commands
-        # delete_commands = []
         to_remove = []
         for rm_id in deletions:
             files_to_rm = files_df.loc[(files_df[["ParamGroup", "EntitySet"]] == rm_id).all(1)]
@@ -378,7 +441,6 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T
             for rm_me in files_to_rm.FilePath:
                 if Path(self.path + rm_me).exists():
                     to_remove.append(self.path + rm_me)
-                    # delete_commands.append("rm " + rm_me)
 
         # call purge associations on list of files to remove
         self._purge_associations(to_remove)
@@ -847,8 +909,15 @@ def _purge_associations(self, scans):
     def get_nifti_associations(self, nifti):
         """Get nifti associations.
 
-        This uses globbing to find files with the same path, entities, and suffix as the NIfTI,
-        but with a different extension.
+        Parameters
+        ----------
+        nifti : str or Path
+            The path to the NIfTI file for which to find associated files.
+
+        Returns
+        -------
+        associations : list of str
+            A list of paths to files associated with the given NIfTI file, excluding the NIfTI file itself.
         """
         # get all assocation files of a nifti image
         no_ext_file = str(nifti).split("/")[-1].split(".")[0]
@@ -860,7 +929,18 @@ def get_nifti_associations(self, nifti):
         return associations
 
     def _cache_fieldmaps(self):
-        """Search all fieldmaps and create a lookup for each file."""
+        """Search all fieldmaps and create a lookup for each file.
+
+        This method scans for fieldmap files with specific suffixes and extensions,
+        retrieves their metadata, and creates a lookup dictionary that maps each
+        file to its corresponding fieldmap(s). If a fieldmap file does not have an
+        "IntendedFor" field in its metadata, it is added to a list of misfits.
+
+        Returns
+        -------
+        misfits : list
+            A list of fieldmap files that do not have an "IntendedFor" field in their metadata.
+        """
         suffix = "(phase1|phasediff|epi|fieldmap)"
         fmap_files = self.layout.get(
             suffix=suffix, regex_search=True, extension=[".nii.gz", ".nii"]
@@ -958,7 +1038,41 @@ def get_param_groups_from_entity_set(self, entity_set):
         return tup_ret
 
     def create_data_dictionary(self):
-        """Create a data dictionary."""
+        """Create a data dictionary for scanning parameters and other metadata.
+
+        This method populates the `data_dict` attribute with descriptions for various
+        scanning parameters, relational parameters, and derived parameters based on
+        the `grouping_config` attribute. Additionally, it manually adds descriptions
+        for non-sidecar columns.
+
+        Attributes
+        ----------
+        grouping_config : dict
+            Configuration dictionary containing `sidecar_params`, `relational_params`,
+            and `derived_params` which are used to populate the `data_dict`.
+
+        data_dict : dict
+            Dictionary to be populated with parameter descriptions.
+
+        Sidecar Parameters
+        ------------------
+        - Scanning Parameter: Parameters extracted from sidecar files.
+        - NIfTI Header Parameter: Parameters derived from NIfTI headers.
+
+        Manually Added Columns
+        ----------------------
+        - ManualCheck: Column where users mark groups to manually check.
+        - Notes: Column to mark notes about the parameter group.
+        - RenameEntitySet: Auto-generated suggested rename of Non-Dominant Groups based on variant scanning parameters.
+        - Counts: Number of files in the parameter group.
+        - Modality: MRI image type.
+        - MergeInto: Column to mark groups to remove with a '0'.
+        - FilePath: Location of file.
+        - EntitySetCount: Number of participants in an Entity Set.
+        - EntitySet: A set of scans whose filenames share all BIDS filename key-value pairs, excluding subject and session.
+        - ParamGroup: The set of scans with identical metadata parameters in their sidecars (defined within an Entity Set and denoted numerically).
+        - KeyParamGroup: Entity Set name and Param Group number separated by a double underscore.
+        """
         sidecar_params = self.grouping_config.get("sidecar_params")
         for mod in sidecar_params.keys():
             mod_dict = sidecar_params[mod]
@@ -1058,7 +1172,19 @@ def get_data_dictionary(self, df):
         return json_dict
 
     def get_param_groups_dataframes(self):
-        """Create DataFrames of files x param groups and a summary."""
+        """Create DataFrames of files x param groups and a summary.
+
+        This method processes entity sets to generate two DataFrames:
+        one containing labeled file parameters and another summarizing
+        parameter groups. It also suggests renaming based on variant parameters.
+
+        Returns
+        -------
+        tuple of pandas.DataFrame
+            A tuple containing two DataFrames:
+            - big_df: DataFrame with labeled file parameters.
+            - summary: DataFrame summarizing parameter groups with suggested renaming.
+        """
         entity_sets = self.get_entity_sets()
         labeled_files = []
         param_group_summaries = []
@@ -1267,7 +1393,18 @@ def get_tsvs(self, path_prefix):
         print(f"CuBIDS detected {len(summary)} Parameter Groups.")
 
     def get_entity_sets(self):
-        """Identify the entity sets for the bids dataset."""
+        """Identify the entity sets for the BIDS dataset.
+
+        This method scans the dataset directory for files matching the BIDS
+        specification and identifies unique entity sets based on the filenames.
+        It also populates a dictionary mapping each entity set to a list of
+        corresponding file paths.
+
+        Returns
+        -------
+        list of str
+            A sorted list of unique entity sets found in the dataset.
+        """
         # reset self.keys_files
         self.keys_files = {}
 
@@ -1292,9 +1429,32 @@ def get_entity_sets(self):
         return sorted(entity_sets)
 
     def change_metadata(self, filters, metadata):
-        """Change metadata.
+        """Change metadata of BIDS files based on provided filters.
+
+        This method updates the metadata of BIDS files that match the given filters.
+        It retrieves the associated JSON sidecar files, updates them with the provided
+        metadata, and writes the changes back to the JSON files.
+
+        Parameters
+        ----------
+        filters : dict
+            A dictionary of filters to apply when searching for BIDS files.
+            The keys should correspond to BIDS entity names (e.g., 'subject', 'session').
+        metadata : dict
+            A dictionary containing the metadata to update in the JSON sidecar files.
+            The keys should correspond to the metadata fields to be updated.
 
-        NOTE: Appears unused.
+        Raises
+        ------
+        FileNotFoundError
+            If no JSON sidecar files are found for the BIDS files.
+        ValueError
+            If irregular associations are found (i.e., more than one JSON file is
+            associated with a BIDS file).
+
+        Notes
+        -----
+        This method appears to be unused in the current codebase.
         """
         files_to_change = self.layout.get(return_type="object", **filters)
 
@@ -1321,7 +1481,24 @@ def change_metadata(self, filters, metadata):
                 _update_json(json_file.path, sidecar)
 
     def get_all_metadata_fields(self):
-        """Return all metadata fields in a bids directory."""
+        """Return all metadata fields in a BIDS directory.
+
+        This method searches through all JSON files in the specified BIDS directory
+        and collects all unique metadata fields present in those files. It skips
+        files within any ".git" directory and handles empty files and JSON decoding
+        errors gracefully.
+
+        Returns
+        -------
+        list of str
+            A sorted list of all unique metadata fields found in the BIDS directory.
+
+        Raises
+        ------
+        UserWarning
+            If there is an error decoding a JSON file or any unexpected error occurs
+            while processing a file.
+        """
         found_fields = set()
         for json_file in Path(self.path).rglob("*.json"):
             if ".git" not in str(json_file):
@@ -1342,7 +1519,20 @@ def get_all_metadata_fields(self):
         return sorted(found_fields)
 
     def remove_metadata_fields(self, fields_to_remove):
-        """Remove specific fields from all metadata files."""
+        """Remove specific fields from all metadata files in the directory.
+
+        This method iterates through all JSON files in the specified directory
+        and removes the specified fields from each file's metadata.
+
+        Parameters
+        ----------
+        fields_to_remove : list of str
+            A list of field names to be removed from the metadata files.
+
+        Returns
+        -------
+        None
+        """
         remove_fields = set(fields_to_remove)
         if not remove_fields:
             return
@@ -1390,6 +1580,22 @@ def _validate_json():
 
 
 def _update_json(json_file, metadata):
+    """Update a JSON file with the provided metadata.
+
+    This function writes the given metadata to the specified JSON file if the
+    JSON data is valid. If the JSON data is invalid, it prints an error message.
+
+    Parameters
+    ----------
+    json_file : str
+        The path to the JSON file to be updated.
+    metadata : dict
+        The metadata to be written to the JSON file.
+
+    Returns
+    -------
+    None
+    """
     if _validate_json():
         with open(json_file, "w", encoding="utf-8") as f:
             json.dump(metadata, f, ensure_ascii=False, indent=4)
@@ -1398,18 +1604,59 @@ def _update_json(json_file, metadata):
 
 
 def _entity_set_to_entities(entity_set):
-    """Split a entity_set name into a pybids dictionary of entities."""
+    """Split an entity_set name into a pybids dictionary of entities.
+
+    Parameters
+    ----------
+    entity_set : str
+        A string representing a set of entities, where each entity is
+        separated by an underscore and each key-value pair is separated by a hyphen.
+
+    Returns
+    -------
+    dict
+        A dictionary where the keys are entity names and the values are entity values.
+
+    Examples
+    --------
+    >>> _entity_set_to_entities("sub-01_ses-02_task-rest")
+    {'sub': '01', 'ses': '02', 'task': 'rest'}
+    """
     return dict([group.split("-") for group in entity_set.split("_")])
 
 
 def _entities_to_entity_set(entities):
-    """Convert a pybids entities dictionary into a entity set name."""
+    """Convert a pybids entities dictionary into an entity set name.
+
+    Parameters
+    ----------
+    entities : dict
+        A dictionary containing pybids entities where keys are entity names
+        and values are entity values.
+
+    Returns
+    -------
+    str
+        A string representing the entity set name, constructed by joining
+        the sorted entity keys and their corresponding values, separated by hyphens.
+    """
     group_keys = sorted(entities.keys() - NON_KEY_ENTITIES)
     return "_".join([f"{key}-{entities[key]}" for key in group_keys])
 
 
 def _file_to_entity_set(filename):
-    """Identify and return the entity set of a bids valid filename."""
+    """Identify and return the entity set of a BIDS valid filename.
+
+    Parameters
+    ----------
+    filename : str
+        The filename to parse for BIDS entities.
+
+    Returns
+    -------
+    set
+        A set of entities extracted from the filename.
+    """
     entities = parse_file_entities(str(filename))
     return _entities_to_entity_set(entities)
 
@@ -1417,9 +1664,23 @@ def _file_to_entity_set(filename):
 def _get_participant_relative_path(scan):
     """Build the relative-from-subject version of a Path to a file.
 
-    This is what will appear in the IntendedFor field of any association.
+    Parameters
+    ----------
+    scan : str
+        The full path to the scan file.
 
-    Examples:
+    Returns
+    -------
+    str
+        The relative path from the subject directory.
+
+    Raises
+    ------
+    ValueError
+        If the subject directory cannot be found in the path.
+
+    Examples
+    --------
     >>> _get_participant_relative_path(
     ...    "/path/to/dset/sub-01/ses-01/func/sub-01_ses-01_bold.nii.gz",
     ... )
@@ -1445,7 +1706,24 @@ def _get_participant_relative_path(scan):
 
 
 def _get_bidsuri(filename, dataset_root):
-    """Convert a file path to a bidsuri.
+    """Convert a file path to a BIDS URI.
+
+    Parameters
+    ----------
+    filename : str
+        The full path to the file within the BIDS dataset.
+    dataset_root : str
+        The root directory of the BIDS dataset.
+
+    Returns
+    -------
+    str
+        The BIDS URI corresponding to the given file path.
+
+    Raises
+    ------
+    ValueError
+        If the filename is not within the dataset_root.
 
     Examples
     --------
@@ -1624,7 +1902,22 @@ def _get_param_groups(
 
 
 def round_params(param_group_df, config, modality):
-    """Round columns' values in DataFrame according to requested precision."""
+    """Round columns' values in a DataFrame according to requested precision.
+
+    Parameters
+    ----------
+    param_group_df : pandas.DataFrame
+        DataFrame containing the parameters to be rounded.
+    config : dict
+        Configuration dictionary containing rounding precision information.
+    modality : str
+        The modality key to access the relevant rounding precision settings in the config.
+
+    Returns
+    -------
+    pandas.DataFrame
+        DataFrame with the specified columns' values rounded to the requested precision.
+    """
     to_format = config["sidecar_params"][modality]
     to_format.update(config["derived_params"][modality])
 
@@ -1644,7 +1937,23 @@ def round_params(param_group_df, config, modality):
 def get_sidecar_metadata(json_file):
     """Get all metadata values in a file's sidecar.
 
-    Transform json dictionary to Python dictionary.
+    Transform JSON dictionary to Python dictionary.
+
+    Parameters
+    ----------
+    json_file : str
+        Path to the JSON sidecar file.
+
+    Returns
+    -------
+    dict or str
+        Returns a dictionary containing the metadata if the file is successfully read,
+        otherwise returns the string "Erroneous sidecar".
+
+    Raises
+    ------
+    Exception
+        If there is an error loading the JSON file.
     """
     try:
         with open(json_file) as json_file:
@@ -1726,8 +2035,19 @@ def format_params(param_group_df, config, modality):
 def _order_columns(df):
     """Organize columns of the summary and files DataFrames.
 
-    This ensures that EntitySet and ParamGroup are the first two columns,
-    FilePath is the last, and the others are sorted alphabetically.
+
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        The DataFrame whose columns need to be organized.
+
+    Returns
+    -------
+    pandas.DataFrame
+        The DataFrame with columns organized such that 'EntitySet' and
+        'ParamGroup' are the first two columns, 'FilePath' is the last
+        column (if present), and the remaining columns are sorted
+        alphabetically.
 
     Notes
     -----
@@ -1747,7 +2067,30 @@ def _order_columns(df):
 
 
 def img_to_new_ext(img_path, new_ext):
-    """Convert img to new extension.
+    """Convert an image file path to a new extension.
+
+    Parameters
+    ----------
+    img_path : str
+        The file path of the image to be converted.
+    new_ext : str
+        The new extension to be applied to the image file path.
+
+    Returns
+    -------
+    str
+        The file path with the new extension applied.
+
+    Examples
+    --------
+    >>> img_to_new_ext('/path/to/image.nii.gz', '.tsv')
+    '/path/to/image_events.tsv'
+
+    >>> img_to_new_ext('/path/to/image.nii.gz', '.tsv.gz')
+    '/path/to/image_physio.tsv.gz'
+
+    >>> img_to_new_ext('/path/to/image.nii.gz', '.json')
+    '/path/to/image.json'
 
     Notes
     -----
@@ -1765,7 +2108,31 @@ def img_to_new_ext(img_path, new_ext):
 
 
 def get_entity_value(path, key):
-    """Given a filepath and BIDS key name, return value."""
+    """Given a filepath and BIDS key name, return the value associated with the key.
+
+    Parameters
+    ----------
+    path : str
+        The file path to be parsed.
+    key : str
+        The BIDS key name to search for in the file path.
+
+    Returns
+    -------
+    str or None
+        The value associated with the BIDS key if found, otherwise None.
+
+    Examples
+    --------
+    >>> get_entity_value('/path/to/sub-01_ses-02_task-rest_bold.nii.gz', 'sub')
+    '01'
+    >>> get_entity_value('/path/to/sub-01_ses-02_task-rest_bold.nii.gz', 'ses')
+    '02'
+    >>> get_entity_value('/path/to/sub-01_ses-02_task-rest_bold.nii.gz', 'task')
+    'rest'
+    >>> get_entity_value('/path/to/sub-01_ses-02_task-rest_bold.nii.gz', 'run')
+    None
+    """
     parts = Path(path).parts
     for part in parts:
         if part.startswith(key + "-"):

From 7204dd4d55e36871512f23513cfddf2bd41e4eef Mon Sep 17 00:00:00 2001
From: singlesp <sps253@cornell.edu>
Date: Fri, 17 Jan 2025 15:54:15 -0500
Subject: [PATCH 09/10] lint

---
 cubids/cubids.py | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/cubids/cubids.py b/cubids/cubids.py
index 00d0b76c1..e91b23194 100644
--- a/cubids/cubids.py
+++ b/cubids/cubids.py
@@ -313,7 +313,8 @@ def add_nifti_info(self):
 
         Notes
         -----
-        - This method assumes that the NIfTI files are organized in a BIDS-compliant directory structure.
+        - This method assumes that the NIfTI files are organized in a BIDS-compliant
+            directory structure.
         - The method will skip any files in hidden directories (directories starting with a dot).
         - If a JSON sidecar file does not exist for a NIfTI file, it will be skipped.
         """
@@ -917,7 +918,8 @@ def get_nifti_associations(self, nifti):
         Returns
         -------
         associations : list of str
-            A list of paths to files associated with the given NIfTI file, excluding the NIfTI file itself.
+            A list of paths to files associated with the given NIfTI file, excluding
+            the NIfTI file itself.
         """
         # get all assocation files of a nifti image
         no_ext_file = str(nifti).split("/")[-1].split(".")[0]
@@ -1063,15 +1065,19 @@ def create_data_dictionary(self):
         ----------------------
         - ManualCheck: Column where users mark groups to manually check.
         - Notes: Column to mark notes about the parameter group.
-        - RenameEntitySet: Auto-generated suggested rename of Non-Dominant Groups based on variant scanning parameters.
+        - RenameEntitySet: Auto-generated suggested rename of Non-Dominant Groups
+            based on variant scanning parameters.
         - Counts: Number of files in the parameter group.
         - Modality: MRI image type.
         - MergeInto: Column to mark groups to remove with a '0'.
         - FilePath: Location of file.
         - EntitySetCount: Number of participants in an Entity Set.
-        - EntitySet: A set of scans whose filenames share all BIDS filename key-value pairs, excluding subject and session.
-        - ParamGroup: The set of scans with identical metadata parameters in their sidecars (defined within an Entity Set and denoted numerically).
-        - KeyParamGroup: Entity Set name and Param Group number separated by a double underscore.
+        - EntitySet: A set of scans whose filenames share all BIDS filename key-value
+            pairs, excluding subject and session.
+        - ParamGroup: The set of scans with identical metadata parameters in their
+            sidecars (defined within an Entity Set and denoted numerically).
+        - KeyParamGroup: Entity Set name and Param Group number separated by a double
+            underscore.
         """
         sidecar_params = self.grouping_config.get("sidecar_params")
         for mod in sidecar_params.keys():
@@ -2035,7 +2041,6 @@ def format_params(param_group_df, config, modality):
 def _order_columns(df):
     """Organize columns of the summary and files DataFrames.
 
-
     Parameters
     ----------
     df : pandas.DataFrame

From 03c54912e70b51820a51bccd7580faa7453ab37b Mon Sep 17 00:00:00 2001
From: singlesp <sps253@cornell.edu>
Date: Fri, 24 Jan 2025 15:26:05 -0500
Subject: [PATCH 10/10] fixed doctests

---
 cubids/cubids.py | 24 ++++++++++--------------
 1 file changed, 10 insertions(+), 14 deletions(-)

diff --git a/cubids/cubids.py b/cubids/cubids.py
index 7a7ed1e31..a6c288c11 100644
--- a/cubids/cubids.py
+++ b/cubids/cubids.py
@@ -2090,14 +2090,14 @@ def img_to_new_ext(img_path, new_ext):
 
     Examples
     --------
-    >>> img_to_new_ext('/path/to/image.nii.gz', '.tsv')
-    '/path/to/image_events.tsv'
+    >>> img_to_new_ext('/path/to/file_image.nii.gz', '.tsv')
+    '/path/to/file_events.tsv'
 
-    >>> img_to_new_ext('/path/to/image.nii.gz', '.tsv.gz')
-    '/path/to/image_physio.tsv.gz'
+    >>> img_to_new_ext('/path/to/file_image.nii.gz', '.tsv.gz')
+    '/path/to/file_physio.tsv.gz'
 
-    >>> img_to_new_ext('/path/to/image.nii.gz', '.json')
-    '/path/to/image.json'
+    >>> img_to_new_ext('/path/to/file_image.nii.gz', '.json')
+    '/path/to/file_image.json'
 
     Notes
     -----
@@ -2131,14 +2131,10 @@ def get_entity_value(path, key):
 
     Examples
     --------
-    >>> get_entity_value('/path/to/sub-01_ses-02_task-rest_bold.nii.gz', 'sub')
-    '01'
-    >>> get_entity_value('/path/to/sub-01_ses-02_task-rest_bold.nii.gz', 'ses')
-    '02'
-    >>> get_entity_value('/path/to/sub-01_ses-02_task-rest_bold.nii.gz', 'task')
-    'rest'
-    >>> get_entity_value('/path/to/sub-01_ses-02_task-rest_bold.nii.gz', 'run')
-    None
+    >>> get_entity_value('/path/to/sub-01/ses-01/func/sub-01_ses-02_task-rest_bold.nii.gz', 'sub')
+    'sub-01'
+    >>> get_entity_value('/path/to/sub-01/ses-02/func/sub-01_ses-02_task-rest_bold.nii.gz', 'ses')
+    'ses-02'
     """
     parts = Path(path).parts
     for part in parts: