From 64b209766acd2d0cde27143f545adb6f92b463f4 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Fri, 17 Jan 2025 11:48:29 -0500 Subject: [PATCH 01/10] Add is_longitudinal attribute to the CuBIDS class --- cubids/cubids.py | 88 ++++++++++++++++++++++++++++++-------- cubids/metadata_merge.py | 36 ++++++++++------ cubids/tests/test_apply.py | 27 +++++++----- cubids/tests/test_bond.py | 7 +-- cubids/workflows.py | 4 ++ 5 files changed, 119 insertions(+), 43 deletions(-) diff --git a/cubids/cubids.py b/cubids/cubids.py index 158e97d05..3329444ff 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -84,6 +84,8 @@ class CuBIDS(object): A data dictionary for TSV outputs. use_datalad : :obj:`bool` If True, use datalad to track changes to the BIDS dataset. + is_longitudinal : :obj:`bool` + If True, includes "ses" in filepath. Default is False. """ def __init__( @@ -93,6 +95,7 @@ def __init__( acq_group_level="subject", grouping_config=None, force_unlock=False, + is_longitudinal=False, ): self.path = os.path.abspath(data_root) self._layout = None @@ -110,10 +113,11 @@ def __init__( self.cubids_code_dir = Path(self.path + "/code/CuBIDS").is_dir() self.data_dict = {} # data dictionary for TSV outputs self.use_datalad = use_datalad # True if flag set, False if flag unset + self.is_longitudinal = is_longitudinal # True if flag set, False if flag unset if self.use_datalad: self.init_datalad() - if self.acq_group_level == "session": + if self.is_longitudinal and self.acq_group_level == "session": NON_KEY_ENTITIES.remove("session") @property @@ -452,7 +456,7 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T # remove renames file that gets created under the hood subprocess.run(["rm", "-rf", "renames"]) - def change_filename(self, filepath, entities): + def change_filename(self, filepath, entities, is_longitudinal=False): """Apply changes to a filename based on the renamed entity sets. This function takes into account the new entity set names @@ -464,6 +468,8 @@ def change_filename(self, filepath, entities): Path prefix to a file in the affected entity set change. entities : :obj:`dict` A pybids dictionary of entities parsed from the new entity set name. + is_longitudinal : :obj:`bool`, optional + If True, includes "ses" in filepath. Default is False. Notes ----- @@ -473,6 +479,7 @@ def change_filename(self, filepath, entities): filepath=filepath, entities=entities, out_dir=str(self.path), + is_longitudinal=self.is_longitudinal ) exts = Path(filepath).suffixes @@ -481,7 +488,8 @@ def change_filename(self, filepath, entities): suffix = entities["suffix"] sub = get_entity_value(filepath, "sub") - ses = get_entity_value(filepath, "ses") + if self.is_longitudinal: + ses = get_entity_value(filepath, "ses") # Add the scan path + new path to the lists of old, new filenames self.old_filenames.append(filepath) @@ -577,7 +585,10 @@ def change_filename(self, filepath, entities): self.new_filenames.append(new_labeling) # RENAME INTENDED FORS! - ses_path = self.path + "/" + sub + "/" + ses + if self.is_longitudinal: + ses_path = self.path + "/" + sub + "/" + ses + elif not self.is_longitudinal: + ses_path = self.path + "/" + sub files_with_if = [] files_with_if += Path(ses_path).rglob("fmap/*.json") files_with_if += Path(ses_path).rglob("perf/*_m0scan.json") @@ -595,7 +606,7 @@ def change_filename(self, filepath, entities): # Coerce IntendedFor to a list. data["IntendedFor"] = listify(data["IntendedFor"]) for item in data["IntendedFor"]: - if item == _get_participant_relative_path(filepath): + if item == _get_participant_relative_path(filepath): # remove old filename data["IntendedFor"].remove(item) # add new filename @@ -1363,6 +1374,7 @@ def get_layout(self): return self.layout +# XXX: Remove _validate_json? def _validate_json(): """Validate a JSON file's contents. @@ -1402,8 +1414,29 @@ def _get_participant_relative_path(scan): This is what will appear in the IntendedFor field of any association. + Examples: + >>> _get_participant_relative_path( + ... "/path/to/dset/sub-01/ses-01/func/sub-01_ses-01_bold.nii.gz", + ... ) + 'ses-01/func/sub-01_ses-01_bold.nii.gz' + + >>> _get_participant_relative_path( + ... "/path/to/dset/sub-01/func/sub-01_bold.nii.gz", + ... ) + 'func/sub-01_bold.nii.gz' + + >>> _get_participant_relative_path( + ... "/path/to/dset/ses-01/func/ses-01_bold.nii.gz", + ... ) + Traceback (most recent call last): + ValueError: Could not find subject in ... """ - return "/".join(Path(scan).parts[-3:]) + parts = Path(scan).parts + # Find the first part that starts with "sub-" + for i, part in enumerate(parts): + if part.startswith("sub-"): + return "/".join(parts[i+1:]) + raise ValueError(f"Could not find subject in {scan}") def _get_bidsuri(filename, dataset_root): @@ -1734,7 +1767,7 @@ def get_entity_value(path, key): return part -def build_path(filepath, entities, out_dir): +def build_path(filepath, entities, out_dir, is_longitudinal=False): """Build a new path for a file based on its BIDS entities. Parameters @@ -1746,6 +1779,8 @@ def build_path(filepath, entities, out_dir): This should include all of the entities in the filename *except* for subject and session. out_dir : str The output directory for the new file. + is_longitudinal : bool, optional + If True, add "ses" to file path. Default is False. Returns ------- @@ -1758,6 +1793,7 @@ def build_path(filepath, entities, out_dir): ... "/input/sub-01/ses-01/anat/sub-01_ses-01_T1w.nii.gz", ... {"acquisition": "VAR", "suffix": "T2w"}, ... "/output", + ... True, ... ) '/output/sub-01/ses-01/anat/sub-01_ses-01_acq-VAR_T2w.nii.gz' @@ -1766,6 +1802,7 @@ def build_path(filepath, entities, out_dir): ... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz", ... {"task": "rest", "run": "2", "acquisition": "VAR", "suffix": "bold"}, ... "/output", + ... True, ... ) '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz' @@ -1775,6 +1812,7 @@ def build_path(filepath, entities, out_dir): ... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-00001_bold.nii.gz", ... {"task": "rest", "run": 2, "acquisition": "VAR", "suffix": "bold"}, ... "/output", + ... True, ... ) '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-00002_bold.nii.gz' @@ -1784,6 +1822,7 @@ def build_path(filepath, entities, out_dir): ... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-1_bold.nii.gz", ... {"task": "rest", "run": 2, "acquisition": "VAR", "suffix": "bold"}, ... "/output", + ... True, ... ) '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz' @@ -1792,6 +1831,7 @@ def build_path(filepath, entities, out_dir): ... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-1_bold.nii.gz", ... {"task": "rest", "run": "2", "acquisition": "VAR", "suffix": "bold"}, ... "/output", + ... True, ... ) '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz' @@ -1801,6 +1841,7 @@ def build_path(filepath, entities, out_dir): ... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz", ... {"task": "rest", "acquisition": "VAR", "suffix": "bold"}, ... "/output", + ... True, ... ) '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz' @@ -1809,6 +1850,7 @@ def build_path(filepath, entities, out_dir): ... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz", ... {"subject": "02", "task": "rest", "acquisition": "VAR", "suffix": "bold"}, ... "/output", + ... True, ... ) '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz' @@ -1817,6 +1859,7 @@ def build_path(filepath, entities, out_dir): ... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz", ... {"task": "rest", "acquisition": "VAR", "echo": 1, "suffix": "bold"}, ... "/output", + ... True, ... ) '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz' @@ -1825,19 +1868,19 @@ def build_path(filepath, entities, out_dir): ... "/input/sub-01/ses-01/anat/sub-01_ses-01_asl.nii.gz", ... {"datatype": "perf", "acquisition": "VAR", "suffix": "asl"}, ... "/output", + ... True, ... ) WARNING: DATATYPE CHANGE DETECTED '/output/sub-01/ses-01/perf/sub-01_ses-01_acq-VAR_asl.nii.gz' - It expects a longitudinal structure, so providing a cross-sectional filename won't work. - XXX: This is a bug. + It also works for cross-sectional filename. >>> build_path( ... "/input/sub-01/func/sub-01_task-rest_run-01_bold.nii.gz", - ... {"task": "rest", "acquisition": "VAR", "echo": 1, "suffix": "bold"}, + ... {"task": "rest", "acquisition": "VAR", "suffix": "bold"}, ... "/output", + ... False, ... ) - Traceback (most recent call last): - ValueError: Could not extract subject or session from ... + '/output/sub-01/func/sub-01_task-rest_acq-VAR_bold.nii.gz' """ exts = Path(filepath).suffixes old_ext = "".join(exts) @@ -1853,9 +1896,13 @@ def build_path(filepath, entities, out_dir): entity_file_keys.append(key) sub = get_entity_value(filepath, "sub") - ses = get_entity_value(filepath, "ses") - if sub is None or ses is None: - raise ValueError(f"Could not extract subject or session from {filepath}") + if sub is None: + raise ValueError(f"Could not extract subject from {filepath}") + + if is_longitudinal: + ses = get_entity_value(filepath, "ses") + if ses is None: + raise ValueError(f"Could not extract session from {filepath}") # Add leading zeros to run entity if it's an integer. # If it's a string, respect the value provided. @@ -1874,7 +1921,10 @@ def build_path(filepath, entities, out_dir): .replace("reconstruction", "rec") ) if len(filename) > 0: - filename = f"{sub}_{ses}_{filename}_{suffix}{old_ext}" + if is_longitudinal: + filename = f"{sub}_{ses}_{filename}_{suffix}{old_ext}" + elif not is_longitudinal: + filename = f"{sub}_{filename}_{suffix}{old_ext}" else: raise ValueError(f"Could not construct new filename for {filepath}") @@ -1894,5 +1944,9 @@ def build_path(filepath, entities, out_dir): dtype_new = dtype_orig # Construct the new filename - new_path = str(Path(out_dir) / sub / ses / dtype_new / filename) + if is_longitudinal: + new_path = str(Path(out_dir) / sub / ses / dtype_new / filename) + elif not is_longitudinal: + new_path = str(Path(out_dir) / sub / dtype_new / filename) + return new_path diff --git a/cubids/metadata_merge.py b/cubids/metadata_merge.py index 6562f35b7..0779db090 100644 --- a/cubids/metadata_merge.py +++ b/cubids/metadata_merge.py @@ -243,13 +243,13 @@ def merge_json_into_json(from_file, to_file, raise_on_error=False): return 0 -def get_acq_dictionary(): +def get_acq_dictionary(is_longitudinal=False): """Create a BIDS data dictionary from dataframe columns. Parameters ---------- - df : :obj:`pandas.DataFrame` - Pre export TSV that will be converted to a json dictionary. + is_longitudinal : :obj:`bool`, optional + If True, add "session" to acq_dict. Default is False. Returns ------- @@ -258,7 +258,8 @@ def get_acq_dictionary(): """ acq_dict = {} acq_dict["subject"] = {"Description": "Participant ID"} - acq_dict["session"] = {"Description": "Session ID"} + if is_longitudinal: + acq_dict["session"] = {"Description": "Session ID"} docs = " https://cubids.readthedocs.io/en/latest/about.html#definitions" desc = "Acquisition Group. See Read the Docs for more information" acq_dict["AcqGroup"] = {"Description": desc + docs} @@ -266,7 +267,7 @@ def get_acq_dictionary(): return acq_dict -def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): +def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level, is_longitudinal=False): """Find unique sets of Key/Param groups across subjects. This writes out the following files: @@ -284,6 +285,8 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): Prefix for output files. acq_group_level : {"subject", "session"} Level at which to group acquisitions. + is_longitudinal : :obj:`bool`, optional + If True, add "session" to acq_dict. Default is False. """ from bids import config from bids.layout import parse_file_entities @@ -298,9 +301,12 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): file_entities = parse_file_entities(row.FilePath) if acq_group_level == "subject": - acq_id = (file_entities.get("subject"), file_entities.get("session")) + if is_longitudinal: + acq_id = (file_entities.get("subject"), file_entities.get("session")) + elif not is_longitudinal: + acq_id = (file_entities.get("subject")) acq_groups[acq_id].append((row.EntitySet, row.ParamGroup)) - else: + elif is_longitudinal and acq_group_level == "session": acq_id = (file_entities.get("subject"), None) acq_groups[acq_id].append( (row.EntitySet, row.ParamGroup, file_entities.get("session")) @@ -326,17 +332,23 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level): for groupnum, content_id_row in enumerate(descending_order, start=1): content_id = content_ids[content_id_row] acq_group_info.append((groupnum, content_id_counts[content_id_row]) + content_id) - for subject, session in contents_to_subjects[content_id]: - grouped_sub_sess.append( - {"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum} - ) + if is_longitudinal: + for subject, session in contents_to_subjects[content_id]: + grouped_sub_sess.append( + {"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum} + ) + elif not is_longitudinal: + for subject in contents_to_subjects[content_id]: + grouped_sub_sess.append( + {"subject": "sub-" + subject, "AcqGroup": groupnum} + ) # Write the mapping of subject/session to acq_group_df = pd.DataFrame(grouped_sub_sess) acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", sep="\t", index=False) # Create data dictionary for acq group tsv - acq_dict = get_acq_dictionary() + acq_dict = get_acq_dictionary(is_longitudinal) with open(output_prefix + "_AcqGrouping.json", "w") as outfile: json.dump(acq_dict, outfile, indent=4) diff --git a/cubids/tests/test_apply.py b/cubids/tests/test_apply.py index ba92b6034..ebe67818b 100644 --- a/cubids/tests/test_apply.py +++ b/cubids/tests/test_apply.py @@ -237,33 +237,35 @@ def summary_data(): @pytest.mark.parametrize( - ("name", "skeleton", "intended_for", "expected"), + ("name", "skeleton", "intended_for", "is_longitudinal", "expected"), [ - ( + ( # doesn't have acq-VAR "relpath_long", relpath_intendedfor_long, "ses-01/dwi/sub-01_ses-01_acq-VAR_dir-AP_run-01_dwi.nii.gz", + True, "pass", ), - ( + ( # doesn't have ses-01 "bidsuri_long", bidsuri_intendedfor_long, "bids::sub-01/ses-01/dwi/sub-01_ses-01_acq-VAR_dir-AP_run-01_dwi.nii.gz", + True, "pass", ), - ( + ( # doesn't have acq-VAR "relpath_cs", relpath_intendedfor_cs, - # XXX: CuBIDS enforces longitudinal dataset, so this fails. "dwi/sub-01_acq-VAR_dir-AP_run-01_dwi.nii.gz", - ValueError, + False, + "pass", ), - ( + ( # pass "bidsuri_cs", bidsuri_intendedfor_cs, - # XXX: CuBIDS enforces longitudinal dataset, so this fails. "bids::sub-01/dwi/sub-01_acq-VAR_dir-AP_run-01_dwi.nii.gz", - ValueError, + False, + "pass", ), ], ) @@ -274,6 +276,7 @@ def test_cubids_apply_intendedfor( name, skeleton, intended_for, + is_longitudinal, expected, ): """Test cubids apply with different IntendedFor types. @@ -308,7 +311,7 @@ def test_cubids_apply_intendedfor( bids_dir = tmpdir / name generate_bids_skeleton(str(bids_dir), skeleton) - if "long" in name: + if is_longitudinal: fdata = files_data["longitudinal"] fmap_json = bids_dir / "sub-01/ses-01/fmap/sub-01_ses-01_dir-AP_epi.json" else: @@ -336,6 +339,7 @@ def test_cubids_apply_intendedfor( files_tsv=files_tsv, new_tsv_prefix=None, container=None, + is_longitudinal=is_longitudinal, ) with open(fmap_json) as f: @@ -353,4 +357,5 @@ def test_cubids_apply_intendedfor( files_tsv=files_tsv, new_tsv_prefix=None, container=None, - ) + is_longitudinal=is_longitudinal, + ) \ No newline at end of file diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index a4da48a23..fdcde2eee 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -489,7 +489,7 @@ def test_tsv_merge_changes(tmp_path): The temporary path where the test data will be copied. """ data_root = get_data(tmp_path) - bod = CuBIDS(data_root / "inconsistent", use_datalad=True) + bod = CuBIDS(data_root / "inconsistent", use_datalad=True, is_longitudinal=True) bod.datalad_save() assert bod.is_datalad_clean() @@ -946,7 +946,8 @@ def test_session_apply(tmp_path): data_root = get_data(tmp_path) - ses_cubids = CuBIDS(data_root / "inconsistent", acq_group_level="session", use_datalad=True) + ses_cubids = CuBIDS(data_root / "inconsistent", acq_group_level="session", + use_datalad=True, is_longitudinal=True) ses_cubids.get_tsvs(str(tmp_path / "originals")) @@ -1193,6 +1194,7 @@ def test_bids_version(tmp_path): ), f"Schema version {schema_version} is less than minimum {min_schema_version}" +<<<<<<< HEAD def test_docker(): """Verify that docker is installed and the user has permission to run docker images. @@ -1218,7 +1220,6 @@ def test_docker(): return_status = 0 assert return_status - # def test_image(image='pennlinc/bond:latest'): # """Check whether image is present on local system.""" # ret = subprocess.run(['docker', 'images', '-q', image], diff --git a/cubids/workflows.py b/cubids/workflows.py index c09366d1e..07d03bfe9 100644 --- a/cubids/workflows.py +++ b/cubids/workflows.py @@ -432,6 +432,7 @@ def apply( files_tsv, new_tsv_prefix, container, + is_longitudinal=False, ): """Apply the tsv changes. @@ -453,6 +454,8 @@ def apply( Path to the new tsv prefix. container : :obj:`str` Container in which to run the workflow. + is_longitudinal : :obj:`bool` + If True, includes "ses" in filepath. Default is False. """ # Run directly from python using if container is None: @@ -461,6 +464,7 @@ def apply( use_datalad=use_datalad, acq_group_level=acq_group_level, grouping_config=config, + is_longitudinal=is_longitudinal, ) if use_datalad: if not bod.is_datalad_clean(): From 8f1b3c4cd966f287cd1f843d1a09bdfdbfe12fc3 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Fri, 17 Jan 2025 13:08:32 -0500 Subject: [PATCH 02/10] fix lint issues --- cubids/config.py | 1 + cubids/cubids.py | 12 ++++++------ cubids/metadata_merge.py | 6 ++---- cubids/tests/test_apply.py | 12 ++++++------ cubids/tests/test_bond.py | 34 ++++++---------------------------- 5 files changed, 21 insertions(+), 44 deletions(-) diff --git a/cubids/config.py b/cubids/config.py index a863d4eab..6c9b67887 100644 --- a/cubids/config.py +++ b/cubids/config.py @@ -4,6 +4,7 @@ import importlib.resources import yaml + def load_config(config_file): """Load a YAML file containing a configuration for param groups. diff --git a/cubids/cubids.py b/cubids/cubids.py index 3329444ff..3dd554ef5 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -113,7 +113,7 @@ def __init__( self.cubids_code_dir = Path(self.path + "/code/CuBIDS").is_dir() self.data_dict = {} # data dictionary for TSV outputs self.use_datalad = use_datalad # True if flag set, False if flag unset - self.is_longitudinal = is_longitudinal # True if flag set, False if flag unset + self.is_longitudinal = is_longitudinal # True if flag set, False if flag unset if self.use_datalad: self.init_datalad() @@ -479,7 +479,7 @@ def change_filename(self, filepath, entities, is_longitudinal=False): filepath=filepath, entities=entities, out_dir=str(self.path), - is_longitudinal=self.is_longitudinal + is_longitudinal=self.is_longitudinal, ) exts = Path(filepath).suffixes @@ -606,7 +606,7 @@ def change_filename(self, filepath, entities, is_longitudinal=False): # Coerce IntendedFor to a list. data["IntendedFor"] = listify(data["IntendedFor"]) for item in data["IntendedFor"]: - if item == _get_participant_relative_path(filepath): + if item == _get_participant_relative_path(filepath): # remove old filename data["IntendedFor"].remove(item) # add new filename @@ -1419,12 +1419,12 @@ def _get_participant_relative_path(scan): ... "/path/to/dset/sub-01/ses-01/func/sub-01_ses-01_bold.nii.gz", ... ) 'ses-01/func/sub-01_ses-01_bold.nii.gz' - + >>> _get_participant_relative_path( ... "/path/to/dset/sub-01/func/sub-01_bold.nii.gz", ... ) 'func/sub-01_bold.nii.gz' - + >>> _get_participant_relative_path( ... "/path/to/dset/ses-01/func/ses-01_bold.nii.gz", ... ) @@ -1435,7 +1435,7 @@ def _get_participant_relative_path(scan): # Find the first part that starts with "sub-" for i, part in enumerate(parts): if part.startswith("sub-"): - return "/".join(parts[i+1:]) + return "/".join(parts[i + 1 :]) raise ValueError(f"Could not find subject in {scan}") diff --git a/cubids/metadata_merge.py b/cubids/metadata_merge.py index 0779db090..3dc02bbf0 100644 --- a/cubids/metadata_merge.py +++ b/cubids/metadata_merge.py @@ -304,7 +304,7 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level, is_long if is_longitudinal: acq_id = (file_entities.get("subject"), file_entities.get("session")) elif not is_longitudinal: - acq_id = (file_entities.get("subject")) + acq_id = file_entities.get("subject") acq_groups[acq_id].append((row.EntitySet, row.ParamGroup)) elif is_longitudinal and acq_group_level == "session": acq_id = (file_entities.get("subject"), None) @@ -339,9 +339,7 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level, is_long ) elif not is_longitudinal: for subject in contents_to_subjects[content_id]: - grouped_sub_sess.append( - {"subject": "sub-" + subject, "AcqGroup": groupnum} - ) + grouped_sub_sess.append({"subject": "sub-" + subject, "AcqGroup": groupnum}) # Write the mapping of subject/session to acq_group_df = pd.DataFrame(grouped_sub_sess) diff --git a/cubids/tests/test_apply.py b/cubids/tests/test_apply.py index ebe67818b..a44ebf451 100644 --- a/cubids/tests/test_apply.py +++ b/cubids/tests/test_apply.py @@ -239,28 +239,28 @@ def summary_data(): @pytest.mark.parametrize( ("name", "skeleton", "intended_for", "is_longitudinal", "expected"), [ - ( # doesn't have acq-VAR + ( # doesn't have acq-VAR "relpath_long", relpath_intendedfor_long, "ses-01/dwi/sub-01_ses-01_acq-VAR_dir-AP_run-01_dwi.nii.gz", True, "pass", ), - ( # doesn't have ses-01 + ( # doesn't have ses-01 "bidsuri_long", bidsuri_intendedfor_long, "bids::sub-01/ses-01/dwi/sub-01_ses-01_acq-VAR_dir-AP_run-01_dwi.nii.gz", - True, + True, "pass", ), - ( # doesn't have acq-VAR + ( # doesn't have acq-VAR "relpath_cs", relpath_intendedfor_cs, "dwi/sub-01_acq-VAR_dir-AP_run-01_dwi.nii.gz", False, "pass", ), - ( # pass + ( # pass "bidsuri_cs", bidsuri_intendedfor_cs, "bids::sub-01/dwi/sub-01_acq-VAR_dir-AP_run-01_dwi.nii.gz", @@ -358,4 +358,4 @@ def test_cubids_apply_intendedfor( new_tsv_prefix=None, container=None, is_longitudinal=is_longitudinal, - ) \ No newline at end of file + ) diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index fdcde2eee..bc57593c0 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -946,8 +946,12 @@ def test_session_apply(tmp_path): data_root = get_data(tmp_path) - ses_cubids = CuBIDS(data_root / "inconsistent", acq_group_level="session", - use_datalad=True, is_longitudinal=True) + ses_cubids = CuBIDS( + data_root / "inconsistent", + acq_group_level="session", + use_datalad=True, + is_longitudinal=True, + ) ses_cubids.get_tsvs(str(tmp_path / "originals")) @@ -1194,32 +1198,6 @@ def test_bids_version(tmp_path): ), f"Schema version {schema_version} is less than minimum {min_schema_version}" -<<<<<<< HEAD -def test_docker(): - """Verify that docker is installed and the user has permission to run docker images. - - Returns - ------- - int - -1 if Docker can't be found. - 0 if Docker is found, but the user can't connect to the daemon. - 1 if the test run is OK. - """ - try: - return_status = 1 - ret = subprocess.run(["docker", "version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE) - except OSError as e: - from errno import ENOENT - - if e.errno == ENOENT: - print("Cannot find Docker engine!") - return_status = 0 - raise e - if ret.stderr.startswith(b"Cannot connect to the Docker daemon."): - print("Cannot connect to Docker daemon!") - return_status = 0 - assert return_status - # def test_image(image='pennlinc/bond:latest'): # """Check whether image is present on local system.""" # ret = subprocess.run(['docker', 'images', '-q', image], From 4aacea48e7a9493aec2d7902c563dd3072d15940 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Fri, 17 Jan 2025 13:16:01 -0500 Subject: [PATCH 03/10] still fixing lint issues --- cubids/cubids.py | 5 +---- cubids/tests/test_apply.py | 8 ++++---- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/cubids/cubids.py b/cubids/cubids.py index 3dd554ef5..a985d695c 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -456,7 +456,7 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T # remove renames file that gets created under the hood subprocess.run(["rm", "-rf", "renames"]) - def change_filename(self, filepath, entities, is_longitudinal=False): + def change_filename(self, filepath, entities): """Apply changes to a filename based on the renamed entity sets. This function takes into account the new entity set names @@ -468,8 +468,6 @@ def change_filename(self, filepath, entities, is_longitudinal=False): Path prefix to a file in the affected entity set change. entities : :obj:`dict` A pybids dictionary of entities parsed from the new entity set name. - is_longitudinal : :obj:`bool`, optional - If True, includes "ses" in filepath. Default is False. Notes ----- @@ -479,7 +477,6 @@ def change_filename(self, filepath, entities, is_longitudinal=False): filepath=filepath, entities=entities, out_dir=str(self.path), - is_longitudinal=self.is_longitudinal, ) exts = Path(filepath).suffixes diff --git a/cubids/tests/test_apply.py b/cubids/tests/test_apply.py index a44ebf451..3f0e2b5af 100644 --- a/cubids/tests/test_apply.py +++ b/cubids/tests/test_apply.py @@ -239,28 +239,28 @@ def summary_data(): @pytest.mark.parametrize( ("name", "skeleton", "intended_for", "is_longitudinal", "expected"), [ - ( # doesn't have acq-VAR + ( "relpath_long", relpath_intendedfor_long, "ses-01/dwi/sub-01_ses-01_acq-VAR_dir-AP_run-01_dwi.nii.gz", True, "pass", ), - ( # doesn't have ses-01 + ( "bidsuri_long", bidsuri_intendedfor_long, "bids::sub-01/ses-01/dwi/sub-01_ses-01_acq-VAR_dir-AP_run-01_dwi.nii.gz", True, "pass", ), - ( # doesn't have acq-VAR + ( "relpath_cs", relpath_intendedfor_cs, "dwi/sub-01_acq-VAR_dir-AP_run-01_dwi.nii.gz", False, "pass", ), - ( # pass + ( "bidsuri_cs", bidsuri_intendedfor_cs, "bids::sub-01/dwi/sub-01_acq-VAR_dir-AP_run-01_dwi.nii.gz", From fe68deaee83d1eed809e387683b71f1d5d35ba80 Mon Sep 17 00:00:00 2001 From: Taylor Salo Date: Fri, 17 Jan 2025 13:37:35 -0500 Subject: [PATCH 04/10] Update cubids.py --- cubids/cubids.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cubids/cubids.py b/cubids/cubids.py index a985d695c..d6ca6818d 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -477,6 +477,7 @@ def change_filename(self, filepath, entities): filepath=filepath, entities=entities, out_dir=str(self.path), + is_longitudinal=self.is_longitudinal, ) exts = Path(filepath).suffixes @@ -608,6 +609,7 @@ def change_filename(self, filepath, entities): data["IntendedFor"].remove(item) # add new filename data["IntendedFor"].append(_get_participant_relative_path(new_path)) + if item == _get_bidsuri(filepath, self.path): # remove old filename data["IntendedFor"].remove(item) From 1dcde70d5c3e1fa0e406bdebe849a085f6d97421 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Fri, 17 Jan 2025 14:39:47 -0500 Subject: [PATCH 05/10] unset default value for is_longitudinal and add a method to infer is_longitudinal from data structure --- cubids/cubids.py | 18 ++++++++++++------ cubids/tests/test_apply.py | 4 ++-- cubids/tests/test_bond.py | 3 +-- cubids/workflows.py | 4 ---- 4 files changed, 15 insertions(+), 14 deletions(-) diff --git a/cubids/cubids.py b/cubids/cubids.py index d6ca6818d..f958f04b3 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -85,7 +85,7 @@ class CuBIDS(object): use_datalad : :obj:`bool` If True, use datalad to track changes to the BIDS dataset. is_longitudinal : :obj:`bool` - If True, includes "ses" in filepath. Default is False. + If True, adds "ses" in filepath. """ def __init__( @@ -95,7 +95,6 @@ def __init__( acq_group_level="subject", grouping_config=None, force_unlock=False, - is_longitudinal=False, ): self.path = os.path.abspath(data_root) self._layout = None @@ -113,12 +112,15 @@ def __init__( self.cubids_code_dir = Path(self.path + "/code/CuBIDS").is_dir() self.data_dict = {} # data dictionary for TSV outputs self.use_datalad = use_datalad # True if flag set, False if flag unset - self.is_longitudinal = is_longitudinal # True if flag set, False if flag unset + self.is_longitudinal = self._infer_longitudinal() # inferred from dataset structure + if self.use_datalad: self.init_datalad() if self.is_longitudinal and self.acq_group_level == "session": NON_KEY_ENTITIES.remove("session") + elif not self.is_longitudinal and self.acq_group_level == "session": + raise ValueError('Data is not longitudinal, so "session" is not a valid grouping level.') @property def layout(self): @@ -132,6 +134,10 @@ def layout(self): # print("LAYOUT OBJECT SET") return self._layout + def _infer_longitudinal(self): + """Infer if the dataset is longitudinal based on its structure.""" + return any("ses-" in str(f) for f in Path(self.path).rglob("*")) + def reset_bids_layout(self, validate=False): """Reset the BIDS layout. @@ -1766,7 +1772,7 @@ def get_entity_value(path, key): return part -def build_path(filepath, entities, out_dir, is_longitudinal=False): +def build_path(filepath, entities, out_dir, is_longitudinal): """Build a new path for a file based on its BIDS entities. Parameters @@ -1778,8 +1784,8 @@ def build_path(filepath, entities, out_dir, is_longitudinal=False): This should include all of the entities in the filename *except* for subject and session. out_dir : str The output directory for the new file. - is_longitudinal : bool, optional - If True, add "ses" to file path. Default is False. + is_longitudinal : bool + If True, add "ses" to file path. Returns ------- diff --git a/cubids/tests/test_apply.py b/cubids/tests/test_apply.py index 3f0e2b5af..51afa64f0 100644 --- a/cubids/tests/test_apply.py +++ b/cubids/tests/test_apply.py @@ -295,6 +295,8 @@ def test_cubids_apply_intendedfor( BIDS skeleton structure. intended_for : str IntendedFor field value. + is_longitudinal : bool + Indicate whether the data structure is longitudinal or cross-sectional. expected : str or Exception Expected result or exception. @@ -339,7 +341,6 @@ def test_cubids_apply_intendedfor( files_tsv=files_tsv, new_tsv_prefix=None, container=None, - is_longitudinal=is_longitudinal, ) with open(fmap_json) as f: @@ -357,5 +358,4 @@ def test_cubids_apply_intendedfor( files_tsv=files_tsv, new_tsv_prefix=None, container=None, - is_longitudinal=is_longitudinal, ) diff --git a/cubids/tests/test_bond.py b/cubids/tests/test_bond.py index bc57593c0..9f7f5b399 100644 --- a/cubids/tests/test_bond.py +++ b/cubids/tests/test_bond.py @@ -489,7 +489,7 @@ def test_tsv_merge_changes(tmp_path): The temporary path where the test data will be copied. """ data_root = get_data(tmp_path) - bod = CuBIDS(data_root / "inconsistent", use_datalad=True, is_longitudinal=True) + bod = CuBIDS(data_root / "inconsistent", use_datalad=True) bod.datalad_save() assert bod.is_datalad_clean() @@ -950,7 +950,6 @@ def test_session_apply(tmp_path): data_root / "inconsistent", acq_group_level="session", use_datalad=True, - is_longitudinal=True, ) ses_cubids.get_tsvs(str(tmp_path / "originals")) diff --git a/cubids/workflows.py b/cubids/workflows.py index 07d03bfe9..c09366d1e 100644 --- a/cubids/workflows.py +++ b/cubids/workflows.py @@ -432,7 +432,6 @@ def apply( files_tsv, new_tsv_prefix, container, - is_longitudinal=False, ): """Apply the tsv changes. @@ -454,8 +453,6 @@ def apply( Path to the new tsv prefix. container : :obj:`str` Container in which to run the workflow. - is_longitudinal : :obj:`bool` - If True, includes "ses" in filepath. Default is False. """ # Run directly from python using if container is None: @@ -464,7 +461,6 @@ def apply( use_datalad=use_datalad, acq_group_level=acq_group_level, grouping_config=config, - is_longitudinal=is_longitudinal, ) if use_datalad: if not bod.is_datalad_clean(): From 9db0657697736095f606e8daa96d8d461652673a Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Fri, 17 Jan 2025 14:43:10 -0500 Subject: [PATCH 06/10] fix lint issues --- cubids/cubids.py | 4 +++- docs/conf.py | 6 ++++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/cubids/cubids.py b/cubids/cubids.py index f958f04b3..b666656ac 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -120,7 +120,9 @@ def __init__( if self.is_longitudinal and self.acq_group_level == "session": NON_KEY_ENTITIES.remove("session") elif not self.is_longitudinal and self.acq_group_level == "session": - raise ValueError('Data is not longitudinal, so "session" is not a valid grouping level.') + raise ValueError( + 'Data is not longitudinal, so "session" is not a valid grouping level.' + ) @property def layout(self): diff --git a/docs/conf.py b/docs/conf.py index 5512a64d6..9da1b27fb 100755 --- a/docs/conf.py +++ b/docs/conf.py @@ -56,7 +56,7 @@ "sphinx_gallery.load_style", "sphinxarg.ext", # argparse extension "sphinxcontrib.bibtex", # bibtex-based bibliographies - "sphinx_design", # for adding in-line badges etc + "sphinx_design", # for adding in-line badges etc ] # Mock modules in autodoc: @@ -266,4 +266,6 @@ # ----------------------------------------------------------------------------- # Configuration for sphinx_copybutton to remove shell prompts, i.e. $ copybutton_prompt_text = "$ " -copybutton_only_copy_prompt_lines = False # ensures all lines are copied, even those without a prompt \ No newline at end of file +copybutton_only_copy_prompt_lines = ( + False # ensures all lines are copied, even those without a prompt +) From ad265ef8c585840fb7bda6bbf079fdc504fee181 Mon Sep 17 00:00:00 2001 From: Tien Tong Date: Fri, 17 Jan 2025 14:56:42 -0500 Subject: [PATCH 07/10] remove is_longitudinal from CuBIDS class docstring as Taylor suggested --- cubids/cubids.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/cubids/cubids.py b/cubids/cubids.py index b666656ac..8cbb4dc49 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -84,8 +84,6 @@ class CuBIDS(object): A data dictionary for TSV outputs. use_datalad : :obj:`bool` If True, use datalad to track changes to the BIDS dataset. - is_longitudinal : :obj:`bool` - If True, adds "ses" in filepath. """ def __init__( From ac11eb3ad16df4fa8967721c8eec2b0abe6dc83c Mon Sep 17 00:00:00 2001 From: singlesp Date: Fri, 17 Jan 2025 15:44:45 -0500 Subject: [PATCH 08/10] DOCSTRINGS --- cubids/cubids.py | 443 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 405 insertions(+), 38 deletions(-) diff --git a/cubids/cubids.py b/cubids/cubids.py index 8cbb4dc49..00d0b76c1 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -1,4 +1,8 @@ -"""Main module.""" +"""Main module for CuBIDS. + +This module provides the core functionalities of the CuBIDS package, including +operations for handling BIDS datasets, clustering, and metadata merging. +""" import csv import json @@ -126,7 +130,10 @@ def __init__( def layout(self): """Return the BIDSLayout object. - If the BIDSLayout object has not been created, create it. + Returns + ------- + BIDSLayout + The BIDSLayout object associated with the current instance. """ if self._layout is None: # print("SETTING LAYOUT OBJECT") @@ -135,7 +142,18 @@ def layout(self): return self._layout def _infer_longitudinal(self): - """Infer if the dataset is longitudinal based on its structure.""" + """Infer if the dataset is longitudinal based on its structure. + + This method checks if any file or directory within the dataset path + contains the substring "ses-" in its name, which is a common convention + used to denote session identifiers in longitudinal datasets. + + Returns + ------- + bool + True if the dataset is longitudinal (i.e., contains session identifiers), + False otherwise. + """ return any("ses-" in str(f) for f in Path(self.path).rglob("*")) def reset_bids_layout(self, validate=False): @@ -185,11 +203,24 @@ def create_cubids_code_dir(self): return self.cubids_code_dir def init_datalad(self): - """Initialize a datalad Dataset at self.path. + """Initialize a datalad Dataset at the specified path. + + This method creates a datalad dataset at the path specified by `self.path`. + It sets the `datalad_ready` attribute to True and assigns the datalad.Dataset + object to the `datalad_handle` attribute. + + Attributes + ---------- + datalad_ready : bool + Indicates whether the datalad dataset has been successfully initialized. + datalad_handle : datalad.api.Dataset + The datalad dataset object associated with the specified path. - This creates a datalad dataset at self.path and sets the - ``datalad_ready`` attribute to True. - It also sets the ``datalad_handle`` attribute to the datalad.Dataset object. + Notes + ----- + If the dataset is not already installed at the specified path, this method + will create a new dataset with the configuration process "text2git" and + enable the annex feature. """ self.datalad_ready = True @@ -202,13 +233,19 @@ def init_datalad(self): def datalad_save(self, message=None): """Perform a DataLad Save operation on the BIDS tree. - Additionally a check for an active datalad handle and that the - status of all objects after the save is "ok". + This method checks for an active DataLad handle and ensures that the + status of all objects after the save operation is "ok". + + Parameters + ---------- + message : str or None, optional + Commit message to use with DataLad save. If None, a default message + "CuBIDS Save" will be used. - Parameters: - ----------- - message : str or None - Commit message to use with datalad save. + Raises + ------ + Exception + If DataLad has not been initialized or if the save operation fails. """ if not self.datalad_ready: raise Exception("DataLad has not been initialized. use datalad_init()") @@ -252,7 +289,34 @@ def datalad_undo_last_commit(self): reset_proc.check_returncode() def add_nifti_info(self): - """Add info from nifti files to json sidecars.""" + """ + Add information from NIfTI files to their corresponding JSON sidecars. + + This method processes all NIfTI files in the BIDS directory specified by `self.path`. + It extracts relevant metadata from each NIfTI file and updates the corresponding JSON + sidecar files with this information. If `self.force_unlock` is set, it will unlock + the dataset using `datalad` before processing the files. + + Metadata added to the JSON sidecars includes: + - Obliquity + - Voxel sizes (dimensions 1, 2, and 3) + - Matrix dimensions (sizes of dimensions 1, 2, and 3) + - Number of volumes (for 4D images) + - Image orientation + + If `self.use_datalad` is set, the changes will be saved using `datalad`. + + Raises + ------ + Exception + If there is an error loading a NIfTI file or parsing a JSON sidecar file. + + Notes + ----- + - This method assumes that the NIfTI files are organized in a BIDS-compliant directory structure. + - The method will skip any files in hidden directories (directories starting with a dot). + - If a JSON sidecar file does not exist for a NIfTI file, it will be skipped. + """ # check if force_unlock is set if self.force_unlock: # CHANGE TO SUBPROCESS.CALL IF NOT BLOCKING @@ -370,7 +434,6 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T merge_commands.append(f"bids-sidecar-merge {source_json} {dest_json}") # Get the delete commands - # delete_commands = [] to_remove = [] for rm_id in deletions: files_to_rm = files_df.loc[(files_df[["ParamGroup", "EntitySet"]] == rm_id).all(1)] @@ -378,7 +441,6 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T for rm_me in files_to_rm.FilePath: if Path(self.path + rm_me).exists(): to_remove.append(self.path + rm_me) - # delete_commands.append("rm " + rm_me) # call purge associations on list of files to remove self._purge_associations(to_remove) @@ -847,8 +909,15 @@ def _purge_associations(self, scans): def get_nifti_associations(self, nifti): """Get nifti associations. - This uses globbing to find files with the same path, entities, and suffix as the NIfTI, - but with a different extension. + Parameters + ---------- + nifti : str or Path + The path to the NIfTI file for which to find associated files. + + Returns + ------- + associations : list of str + A list of paths to files associated with the given NIfTI file, excluding the NIfTI file itself. """ # get all assocation files of a nifti image no_ext_file = str(nifti).split("/")[-1].split(".")[0] @@ -860,7 +929,18 @@ def get_nifti_associations(self, nifti): return associations def _cache_fieldmaps(self): - """Search all fieldmaps and create a lookup for each file.""" + """Search all fieldmaps and create a lookup for each file. + + This method scans for fieldmap files with specific suffixes and extensions, + retrieves their metadata, and creates a lookup dictionary that maps each + file to its corresponding fieldmap(s). If a fieldmap file does not have an + "IntendedFor" field in its metadata, it is added to a list of misfits. + + Returns + ------- + misfits : list + A list of fieldmap files that do not have an "IntendedFor" field in their metadata. + """ suffix = "(phase1|phasediff|epi|fieldmap)" fmap_files = self.layout.get( suffix=suffix, regex_search=True, extension=[".nii.gz", ".nii"] @@ -958,7 +1038,41 @@ def get_param_groups_from_entity_set(self, entity_set): return tup_ret def create_data_dictionary(self): - """Create a data dictionary.""" + """Create a data dictionary for scanning parameters and other metadata. + + This method populates the `data_dict` attribute with descriptions for various + scanning parameters, relational parameters, and derived parameters based on + the `grouping_config` attribute. Additionally, it manually adds descriptions + for non-sidecar columns. + + Attributes + ---------- + grouping_config : dict + Configuration dictionary containing `sidecar_params`, `relational_params`, + and `derived_params` which are used to populate the `data_dict`. + + data_dict : dict + Dictionary to be populated with parameter descriptions. + + Sidecar Parameters + ------------------ + - Scanning Parameter: Parameters extracted from sidecar files. + - NIfTI Header Parameter: Parameters derived from NIfTI headers. + + Manually Added Columns + ---------------------- + - ManualCheck: Column where users mark groups to manually check. + - Notes: Column to mark notes about the parameter group. + - RenameEntitySet: Auto-generated suggested rename of Non-Dominant Groups based on variant scanning parameters. + - Counts: Number of files in the parameter group. + - Modality: MRI image type. + - MergeInto: Column to mark groups to remove with a '0'. + - FilePath: Location of file. + - EntitySetCount: Number of participants in an Entity Set. + - EntitySet: A set of scans whose filenames share all BIDS filename key-value pairs, excluding subject and session. + - ParamGroup: The set of scans with identical metadata parameters in their sidecars (defined within an Entity Set and denoted numerically). + - KeyParamGroup: Entity Set name and Param Group number separated by a double underscore. + """ sidecar_params = self.grouping_config.get("sidecar_params") for mod in sidecar_params.keys(): mod_dict = sidecar_params[mod] @@ -1058,7 +1172,19 @@ def get_data_dictionary(self, df): return json_dict def get_param_groups_dataframes(self): - """Create DataFrames of files x param groups and a summary.""" + """Create DataFrames of files x param groups and a summary. + + This method processes entity sets to generate two DataFrames: + one containing labeled file parameters and another summarizing + parameter groups. It also suggests renaming based on variant parameters. + + Returns + ------- + tuple of pandas.DataFrame + A tuple containing two DataFrames: + - big_df: DataFrame with labeled file parameters. + - summary: DataFrame summarizing parameter groups with suggested renaming. + """ entity_sets = self.get_entity_sets() labeled_files = [] param_group_summaries = [] @@ -1267,7 +1393,18 @@ def get_tsvs(self, path_prefix): print(f"CuBIDS detected {len(summary)} Parameter Groups.") def get_entity_sets(self): - """Identify the entity sets for the bids dataset.""" + """Identify the entity sets for the BIDS dataset. + + This method scans the dataset directory for files matching the BIDS + specification and identifies unique entity sets based on the filenames. + It also populates a dictionary mapping each entity set to a list of + corresponding file paths. + + Returns + ------- + list of str + A sorted list of unique entity sets found in the dataset. + """ # reset self.keys_files self.keys_files = {} @@ -1292,9 +1429,32 @@ def get_entity_sets(self): return sorted(entity_sets) def change_metadata(self, filters, metadata): - """Change metadata. + """Change metadata of BIDS files based on provided filters. + + This method updates the metadata of BIDS files that match the given filters. + It retrieves the associated JSON sidecar files, updates them with the provided + metadata, and writes the changes back to the JSON files. + + Parameters + ---------- + filters : dict + A dictionary of filters to apply when searching for BIDS files. + The keys should correspond to BIDS entity names (e.g., 'subject', 'session'). + metadata : dict + A dictionary containing the metadata to update in the JSON sidecar files. + The keys should correspond to the metadata fields to be updated. - NOTE: Appears unused. + Raises + ------ + FileNotFoundError + If no JSON sidecar files are found for the BIDS files. + ValueError + If irregular associations are found (i.e., more than one JSON file is + associated with a BIDS file). + + Notes + ----- + This method appears to be unused in the current codebase. """ files_to_change = self.layout.get(return_type="object", **filters) @@ -1321,7 +1481,24 @@ def change_metadata(self, filters, metadata): _update_json(json_file.path, sidecar) def get_all_metadata_fields(self): - """Return all metadata fields in a bids directory.""" + """Return all metadata fields in a BIDS directory. + + This method searches through all JSON files in the specified BIDS directory + and collects all unique metadata fields present in those files. It skips + files within any ".git" directory and handles empty files and JSON decoding + errors gracefully. + + Returns + ------- + list of str + A sorted list of all unique metadata fields found in the BIDS directory. + + Raises + ------ + UserWarning + If there is an error decoding a JSON file or any unexpected error occurs + while processing a file. + """ found_fields = set() for json_file in Path(self.path).rglob("*.json"): if ".git" not in str(json_file): @@ -1342,7 +1519,20 @@ def get_all_metadata_fields(self): return sorted(found_fields) def remove_metadata_fields(self, fields_to_remove): - """Remove specific fields from all metadata files.""" + """Remove specific fields from all metadata files in the directory. + + This method iterates through all JSON files in the specified directory + and removes the specified fields from each file's metadata. + + Parameters + ---------- + fields_to_remove : list of str + A list of field names to be removed from the metadata files. + + Returns + ------- + None + """ remove_fields = set(fields_to_remove) if not remove_fields: return @@ -1390,6 +1580,22 @@ def _validate_json(): def _update_json(json_file, metadata): + """Update a JSON file with the provided metadata. + + This function writes the given metadata to the specified JSON file if the + JSON data is valid. If the JSON data is invalid, it prints an error message. + + Parameters + ---------- + json_file : str + The path to the JSON file to be updated. + metadata : dict + The metadata to be written to the JSON file. + + Returns + ------- + None + """ if _validate_json(): with open(json_file, "w", encoding="utf-8") as f: json.dump(metadata, f, ensure_ascii=False, indent=4) @@ -1398,18 +1604,59 @@ def _update_json(json_file, metadata): def _entity_set_to_entities(entity_set): - """Split a entity_set name into a pybids dictionary of entities.""" + """Split an entity_set name into a pybids dictionary of entities. + + Parameters + ---------- + entity_set : str + A string representing a set of entities, where each entity is + separated by an underscore and each key-value pair is separated by a hyphen. + + Returns + ------- + dict + A dictionary where the keys are entity names and the values are entity values. + + Examples + -------- + >>> _entity_set_to_entities("sub-01_ses-02_task-rest") + {'sub': '01', 'ses': '02', 'task': 'rest'} + """ return dict([group.split("-") for group in entity_set.split("_")]) def _entities_to_entity_set(entities): - """Convert a pybids entities dictionary into a entity set name.""" + """Convert a pybids entities dictionary into an entity set name. + + Parameters + ---------- + entities : dict + A dictionary containing pybids entities where keys are entity names + and values are entity values. + + Returns + ------- + str + A string representing the entity set name, constructed by joining + the sorted entity keys and their corresponding values, separated by hyphens. + """ group_keys = sorted(entities.keys() - NON_KEY_ENTITIES) return "_".join([f"{key}-{entities[key]}" for key in group_keys]) def _file_to_entity_set(filename): - """Identify and return the entity set of a bids valid filename.""" + """Identify and return the entity set of a BIDS valid filename. + + Parameters + ---------- + filename : str + The filename to parse for BIDS entities. + + Returns + ------- + set + A set of entities extracted from the filename. + """ entities = parse_file_entities(str(filename)) return _entities_to_entity_set(entities) @@ -1417,9 +1664,23 @@ def _file_to_entity_set(filename): def _get_participant_relative_path(scan): """Build the relative-from-subject version of a Path to a file. - This is what will appear in the IntendedFor field of any association. + Parameters + ---------- + scan : str + The full path to the scan file. - Examples: + Returns + ------- + str + The relative path from the subject directory. + + Raises + ------ + ValueError + If the subject directory cannot be found in the path. + + Examples + -------- >>> _get_participant_relative_path( ... "/path/to/dset/sub-01/ses-01/func/sub-01_ses-01_bold.nii.gz", ... ) @@ -1445,7 +1706,24 @@ def _get_participant_relative_path(scan): def _get_bidsuri(filename, dataset_root): - """Convert a file path to a bidsuri. + """Convert a file path to a BIDS URI. + + Parameters + ---------- + filename : str + The full path to the file within the BIDS dataset. + dataset_root : str + The root directory of the BIDS dataset. + + Returns + ------- + str + The BIDS URI corresponding to the given file path. + + Raises + ------ + ValueError + If the filename is not within the dataset_root. Examples -------- @@ -1624,7 +1902,22 @@ def _get_param_groups( def round_params(param_group_df, config, modality): - """Round columns' values in DataFrame according to requested precision.""" + """Round columns' values in a DataFrame according to requested precision. + + Parameters + ---------- + param_group_df : pandas.DataFrame + DataFrame containing the parameters to be rounded. + config : dict + Configuration dictionary containing rounding precision information. + modality : str + The modality key to access the relevant rounding precision settings in the config. + + Returns + ------- + pandas.DataFrame + DataFrame with the specified columns' values rounded to the requested precision. + """ to_format = config["sidecar_params"][modality] to_format.update(config["derived_params"][modality]) @@ -1644,7 +1937,23 @@ def round_params(param_group_df, config, modality): def get_sidecar_metadata(json_file): """Get all metadata values in a file's sidecar. - Transform json dictionary to Python dictionary. + Transform JSON dictionary to Python dictionary. + + Parameters + ---------- + json_file : str + Path to the JSON sidecar file. + + Returns + ------- + dict or str + Returns a dictionary containing the metadata if the file is successfully read, + otherwise returns the string "Erroneous sidecar". + + Raises + ------ + Exception + If there is an error loading the JSON file. """ try: with open(json_file) as json_file: @@ -1726,8 +2035,19 @@ def format_params(param_group_df, config, modality): def _order_columns(df): """Organize columns of the summary and files DataFrames. - This ensures that EntitySet and ParamGroup are the first two columns, - FilePath is the last, and the others are sorted alphabetically. + + Parameters + ---------- + df : pandas.DataFrame + The DataFrame whose columns need to be organized. + + Returns + ------- + pandas.DataFrame + The DataFrame with columns organized such that 'EntitySet' and + 'ParamGroup' are the first two columns, 'FilePath' is the last + column (if present), and the remaining columns are sorted + alphabetically. Notes ----- @@ -1747,7 +2067,30 @@ def _order_columns(df): def img_to_new_ext(img_path, new_ext): - """Convert img to new extension. + """Convert an image file path to a new extension. + + Parameters + ---------- + img_path : str + The file path of the image to be converted. + new_ext : str + The new extension to be applied to the image file path. + + Returns + ------- + str + The file path with the new extension applied. + + Examples + -------- + >>> img_to_new_ext('/path/to/image.nii.gz', '.tsv') + '/path/to/image_events.tsv' + + >>> img_to_new_ext('/path/to/image.nii.gz', '.tsv.gz') + '/path/to/image_physio.tsv.gz' + + >>> img_to_new_ext('/path/to/image.nii.gz', '.json') + '/path/to/image.json' Notes ----- @@ -1765,7 +2108,31 @@ def img_to_new_ext(img_path, new_ext): def get_entity_value(path, key): - """Given a filepath and BIDS key name, return value.""" + """Given a filepath and BIDS key name, return the value associated with the key. + + Parameters + ---------- + path : str + The file path to be parsed. + key : str + The BIDS key name to search for in the file path. + + Returns + ------- + str or None + The value associated with the BIDS key if found, otherwise None. + + Examples + -------- + >>> get_entity_value('/path/to/sub-01_ses-02_task-rest_bold.nii.gz', 'sub') + '01' + >>> get_entity_value('/path/to/sub-01_ses-02_task-rest_bold.nii.gz', 'ses') + '02' + >>> get_entity_value('/path/to/sub-01_ses-02_task-rest_bold.nii.gz', 'task') + 'rest' + >>> get_entity_value('/path/to/sub-01_ses-02_task-rest_bold.nii.gz', 'run') + None + """ parts = Path(path).parts for part in parts: if part.startswith(key + "-"): From 7204dd4d55e36871512f23513cfddf2bd41e4eef Mon Sep 17 00:00:00 2001 From: singlesp Date: Fri, 17 Jan 2025 15:54:15 -0500 Subject: [PATCH 09/10] lint --- cubids/cubids.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/cubids/cubids.py b/cubids/cubids.py index 00d0b76c1..e91b23194 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -313,7 +313,8 @@ def add_nifti_info(self): Notes ----- - - This method assumes that the NIfTI files are organized in a BIDS-compliant directory structure. + - This method assumes that the NIfTI files are organized in a BIDS-compliant + directory structure. - The method will skip any files in hidden directories (directories starting with a dot). - If a JSON sidecar file does not exist for a NIfTI file, it will be skipped. """ @@ -917,7 +918,8 @@ def get_nifti_associations(self, nifti): Returns ------- associations : list of str - A list of paths to files associated with the given NIfTI file, excluding the NIfTI file itself. + A list of paths to files associated with the given NIfTI file, excluding + the NIfTI file itself. """ # get all assocation files of a nifti image no_ext_file = str(nifti).split("/")[-1].split(".")[0] @@ -1063,15 +1065,19 @@ def create_data_dictionary(self): ---------------------- - ManualCheck: Column where users mark groups to manually check. - Notes: Column to mark notes about the parameter group. - - RenameEntitySet: Auto-generated suggested rename of Non-Dominant Groups based on variant scanning parameters. + - RenameEntitySet: Auto-generated suggested rename of Non-Dominant Groups + based on variant scanning parameters. - Counts: Number of files in the parameter group. - Modality: MRI image type. - MergeInto: Column to mark groups to remove with a '0'. - FilePath: Location of file. - EntitySetCount: Number of participants in an Entity Set. - - EntitySet: A set of scans whose filenames share all BIDS filename key-value pairs, excluding subject and session. - - ParamGroup: The set of scans with identical metadata parameters in their sidecars (defined within an Entity Set and denoted numerically). - - KeyParamGroup: Entity Set name and Param Group number separated by a double underscore. + - EntitySet: A set of scans whose filenames share all BIDS filename key-value + pairs, excluding subject and session. + - ParamGroup: The set of scans with identical metadata parameters in their + sidecars (defined within an Entity Set and denoted numerically). + - KeyParamGroup: Entity Set name and Param Group number separated by a double + underscore. """ sidecar_params = self.grouping_config.get("sidecar_params") for mod in sidecar_params.keys(): @@ -2035,7 +2041,6 @@ def format_params(param_group_df, config, modality): def _order_columns(df): """Organize columns of the summary and files DataFrames. - Parameters ---------- df : pandas.DataFrame From 03c54912e70b51820a51bccd7580faa7453ab37b Mon Sep 17 00:00:00 2001 From: singlesp Date: Fri, 24 Jan 2025 15:26:05 -0500 Subject: [PATCH 10/10] fixed doctests --- cubids/cubids.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/cubids/cubids.py b/cubids/cubids.py index 7a7ed1e31..a6c288c11 100644 --- a/cubids/cubids.py +++ b/cubids/cubids.py @@ -2090,14 +2090,14 @@ def img_to_new_ext(img_path, new_ext): Examples -------- - >>> img_to_new_ext('/path/to/image.nii.gz', '.tsv') - '/path/to/image_events.tsv' + >>> img_to_new_ext('/path/to/file_image.nii.gz', '.tsv') + '/path/to/file_events.tsv' - >>> img_to_new_ext('/path/to/image.nii.gz', '.tsv.gz') - '/path/to/image_physio.tsv.gz' + >>> img_to_new_ext('/path/to/file_image.nii.gz', '.tsv.gz') + '/path/to/file_physio.tsv.gz' - >>> img_to_new_ext('/path/to/image.nii.gz', '.json') - '/path/to/image.json' + >>> img_to_new_ext('/path/to/file_image.nii.gz', '.json') + '/path/to/file_image.json' Notes ----- @@ -2131,14 +2131,10 @@ def get_entity_value(path, key): Examples -------- - >>> get_entity_value('/path/to/sub-01_ses-02_task-rest_bold.nii.gz', 'sub') - '01' - >>> get_entity_value('/path/to/sub-01_ses-02_task-rest_bold.nii.gz', 'ses') - '02' - >>> get_entity_value('/path/to/sub-01_ses-02_task-rest_bold.nii.gz', 'task') - 'rest' - >>> get_entity_value('/path/to/sub-01_ses-02_task-rest_bold.nii.gz', 'run') - None + >>> get_entity_value('/path/to/sub-01/ses-01/func/sub-01_ses-02_task-rest_bold.nii.gz', 'sub') + 'sub-01' + >>> get_entity_value('/path/to/sub-01/ses-02/func/sub-01_ses-02_task-rest_bold.nii.gz', 'ses') + 'ses-02' """ parts = Path(path).parts for part in parts: