Skip to content

Commit

Permalink
[ENH] Proposition of a function to translate study ids to bids ids (a…
Browse files Browse the repository at this point in the history
…ramis-lab#1220)

* Proposition for study_to_bids_id

* Factoring

* add test

* Add to converters

* Rename function

* Apply to all datasets

* Add nifd

* Add ADNI

* Changes upon suggestions

* Use for adni-json

* Add other uses

* todo

* changes upon suggestion
  • Loading branch information
AliceJoubert authored Jul 10, 2024
1 parent 3a34e68 commit 047065c
Show file tree
Hide file tree
Showing 14 changed files with 345 additions and 60 deletions.
262 changes: 248 additions & 14 deletions clinica/iotools/bids_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,12 @@

import json
import os
import re
from abc import ABC, abstractmethod
from collections import UserString
from enum import Enum
from pathlib import Path
from typing import BinaryIO, List, Optional, Union
from typing import BinaryIO, List, Optional, Type, Union

import pandas as pd

Expand Down Expand Up @@ -49,6 +52,246 @@ class StudyName(str, Enum):
}


class BIDSSubjectID(ABC, UserString):
"""This is the interface that BIDS subject IDs have to implement."""

def __init__(self, value: str):
instance = super().__init__(self.validate(value))
return instance

@abstractmethod
def validate(self, value: str) -> str:
raise NotImplementedError

@classmethod
@abstractmethod
def from_original_study_id(cls, study_id: str) -> str:
raise NotImplementedError

@abstractmethod
def to_original_study_id(self) -> str:
raise NotImplementedError


def bids_id_factory(study: StudyName) -> Type[BIDSSubjectID]:
if study == StudyName.ADNI:
return ADNIBIDSSubjectID
if study == StudyName.NIFD:
return NIFDBIDSSubjectID
if study == StudyName.AIBL:
return AIBLBIDSSubjectID
if study == StudyName.UKB:
return UKBBIDSSubjectID
if study == StudyName.GENFI:
return GENFIBIDSSubjectID
if study == StudyName.OASIS:
return OASISBIDSSubjectID
if study == StudyName.OASIS3:
return OASIS3BIDSSubjectID
if study == StudyName.HABS:
return HABSBIDSSubjectID


class ADNIBIDSSubjectID(BIDSSubjectID):
"""Implementation for ADNI of the BIDSSubjectIDClass, allowing to go from the source id XXX_S_XXXX
to a bids id sub-ADNIXXXSXXX and reciprocally."""

def validate(self, value: str) -> str:
if re.fullmatch(r"sub-ADNI\d{3}S\d{4}", value):
return value
raise ValueError(
f"BIDS ADNI subject ID {value} is not properly formatted. "
"Expecting a 'sub-ADNIXXXSXXXX' format."
)

@classmethod
def from_original_study_id(cls, study_id: str) -> str:
if re.fullmatch(r"\d{3}_S_\d{4}", study_id):
return "sub-ADNI" + study_id.replace("_", "")
raise ValueError(
f"Raw ADNI subject ID {study_id} is not properly formatted. "
"Expecting a 'XXX_S_XXXX' format."
)

def to_original_study_id(self) -> str:
return "_S_".join(self.split("ADNI")[1].split("S"))


class NIFDBIDSSubjectID(BIDSSubjectID):
"""Implementation for NIFD of the BIDSSubjectIDClass, allowing to go from the source id X_S_XXXX
to a bids id sub-NIFDXSXXX and reciprocally."""

def validate(self, value: str) -> str:
if re.fullmatch(r"sub-NIFD\dS\d{4}", value):
return value
raise ValueError(
f"BIDS NIFD subject ID {value} is not properly formatted. "
"Expecting a 'sub-NIFDXSXXXX' format."
)

@classmethod
def from_original_study_id(cls, study_id: str) -> str:
if re.fullmatch(r"\d_S_\d{4}", study_id):
return "sub-NIFD" + study_id.replace("_", "")
raise ValueError(
f"Raw NIFD subject ID {study_id} is not properly formatted. "
"Expecting a 'X_S_XXXX' format."
)

def to_original_study_id(self) -> str:
return "_S_".join(self.split("NIFD")[1].split("S"))


class AIBLBIDSSubjectID(BIDSSubjectID):
"""Implementation for AIBL of the BIDSSubjectIDClass, allowing to go from the source id Y
to a bids id sub-ADNIY and reciprocally."""

def validate(self, value: str) -> str:
if re.fullmatch(r"sub-AIBL\d*", value):
return value
raise ValueError(
f"BIDS AIBL subject ID {value} is not properly formatted. "
"Expecting a 'sub-AIBLY' format."
)

@classmethod
def from_original_study_id(cls, study_id: str) -> str:
if re.fullmatch(r"\d*", study_id):
return "sub-AIBL" + study_id
raise ValueError(
f"Raw AIBL subject ID {study_id} is not properly formatted. "
"Expecting a 'Y' format where Y is a combination of digits."
)

def to_original_study_id(self) -> str:
return self.split("AIBL")[1]


class UKBBIDSSubjectID(BIDSSubjectID):
"""Implementation for UKB of the BIDSSubjectIDClass, allowing to go from the source id Y
to a bids id sub-ADNIY and reciprocally."""

def validate(self, value: str) -> str:
if re.fullmatch(r"sub-UKB\d*", value):
return value
raise ValueError(
f"BIDS UKB subject ID {value} is not properly formatted. "
"Expecting a 'sub-UKBY' format."
)

@classmethod
def from_original_study_id(cls, study_id: str) -> str:
if re.fullmatch(r"\d*", study_id):
return "sub-UKB" + study_id
raise ValueError(
f"Raw UKB subject ID {study_id} is not properly formatted. "
"Expecting a 'Y' format where Y is a combination of digits."
)

def to_original_study_id(self) -> str:
return self.split("UKB")[1]


class GENFIBIDSSubjectID(BIDSSubjectID):
"""Implementation for GENFI of the BIDSSubjectIDClass, allowing to go from the source id Y
to a bids id sub-Y and reciprocally."""

def validate(self, value: str) -> str:
if re.fullmatch(r"sub-\w*", value):
return value
raise ValueError(
f"BIDS GENFI subject ID {value} is not properly formatted. "
"Expecting a 'sub-Y' format."
)

@classmethod
def from_original_study_id(cls, study_id: str) -> str:
if re.fullmatch(r"\w*", study_id):
return "sub-" + study_id
raise ValueError(
f"Raw GENFI subject ID {study_id} is not properly formatted. "
"Expecting a 'Y' format where Y is a combination of letters and digits."
)

def to_original_study_id(self) -> str:
return self.split("-")[1]


class OASISBIDSSubjectID(BIDSSubjectID):
"""Implementation for OASIS1 of the BIDSSubjectIDClass, allowing to go from the source id OAS1_XXXX_MR1/2
to a bids id sub-OASIS1XXXX and reciprocally."""

def validate(self, value: str) -> str:
if re.fullmatch(r"sub-OASIS1\d{4}", value):
return value
raise ValueError(
f"BIDS OASIS1 subject ID {value} is not properly formatted. "
"Expecting a 'sub-OASIS1XXXX' format."
)

@classmethod
def from_original_study_id(cls, study_id: str) -> str:
if re.fullmatch(r"OAS1_\d{4}_MR\d", study_id):
return "sub-OASIS1" + study_id.split("_")[1]
raise ValueError(
f"Raw OASIS1 subject ID {study_id} is not properly formatted. "
"Expecting a 'OAS1_XXXX_MR1/2' format."
)

def to_original_study_id(self) -> str:
return "OAS1" + self.split("OASIS1")[1] + "MR1"


class OASIS3BIDSSubjectID(BIDSSubjectID):
"""Implementation for OASIS3 of the BIDSSubjectIDClass, allowing to go from the source id XXXX
to a bids id sub-OAS3XXXX and reciprocally."""

def validate(self, value: str) -> str:
if re.fullmatch(r"sub-OAS3\d{4}", value):
return value
raise ValueError(
f"BIDS OASIS3 subject ID {value} is not properly formatted. "
"Expecting a 'sub-OAS3XXXX' format."
)

@classmethod
def from_original_study_id(cls, study_id: str) -> str:
if re.fullmatch(r"OAS3\d{4}", study_id):
return "sub-" + study_id
raise ValueError(
f"Raw OASIS3 subject ID {study_id} is not properly formatted. "
"Expecting a 'OAS3XXXX' format."
)

def to_original_study_id(self) -> str:
return self.split("-")[1]


class HABSBIDSSubjectID(BIDSSubjectID):
"""Implementation for HABS of the BIDSSubjectIDClass, allowing to go from the source id P_Y
to a bids id sub-HABSY and reciprocally."""

def validate(self, value: str) -> str:
if re.fullmatch(r"sub-HABS\w*", value):
return value
raise ValueError(
f"BIDS HABS subject ID {value} is not properly formatted. "
"Expecting a 'sub-HABSY' format."
)

@classmethod
def from_original_study_id(cls, study_id: str) -> str:
if re.fullmatch(r"P_\w*", study_id):
return study_id.replace("P_", "sub-HABS")
raise ValueError(
f"Raw HABS subject ID {study_id} is not properly formatted. "
"Expecting a 'P_Y' format."
)

def to_original_study_id(self) -> str:
return str(self.replace("sub-HABS", "P_"))


# -- Methods for the clinical data --
def create_participants_df(
study_name: StudyName,
Expand Down Expand Up @@ -166,15 +409,10 @@ def create_participants_df(

# Adding participant_id column with BIDS ids
for i in range(0, len(participant_df)):
if study_name == StudyName.OASIS:
value = (participant_df["alternative_id_1"][i].split("_"))[1]
elif study_name == StudyName.OASIS3:
value = participant_df["alternative_id_1"][i].replace("OAS3", "")
else:
value = remove_space_and_symbols(participant_df["alternative_id_1"][i])

value = bids_id_factory(study_name).from_original_study_id(
participant_df["alternative_id_1"][i]
)
bids_id = [s for s in bids_ids if value in s]

if len(bids_id) == 0:
index_to_drop.append(i)
subjects_to_drop.append(value)
Expand Down Expand Up @@ -289,11 +527,7 @@ def create_sessions_dict_oasis(
if subj_id.dtype == np.int64:
subj_id = str(subj_id)
# Removes all the - from
subj_id_alpha = remove_space_and_symbols(subj_id)
if study_name == StudyName.OASIS:
subj_id_alpha = str(subj_id[0:3] + "IS" + subj_id[3] + subj_id[5:9])
if study_name == StudyName.OASIS3:
subj_id_alpha = str(subj_id[0:3] + "IS" + subj_id[3:])
subj_id_alpha = str(subj_id[0:3] + "IS" + subj_id[3] + subj_id[5:9])

# Extract the corresponding BIDS id and create the output file if doesn't exist
subj_bids = [s for s in bids_ids if subj_id_alpha in s]
Expand Down
10 changes: 5 additions & 5 deletions clinica/iotools/converters/adni_to_bids/adni_json.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,12 @@ def _bids_id_to_loni(bids_id: str) -> Optional[str]:
"""Convert a subject id of the form sub-ADNI000S0000
back to original format 000_S_0000
"""
import re
from clinica.iotools.bids_utils import StudyName, bids_id_factory

ids = re.findall(r"\d+", bids_id)
if len(ids) == 2:
return ids[0] + "_S_" + ids[1]
return None
try:
return bids_id_factory(StudyName.ADNI)(bids_id).to_original_study_id()
except ValueError:
return None


def _read_xml_files(
Expand Down
6 changes: 5 additions & 1 deletion clinica/iotools/converters/adni_to_bids/adni_to_bids.py
Original file line number Diff line number Diff line change
Expand Up @@ -235,6 +235,8 @@ def _get_bids_subjects_info(
out_path: Path,
subjects: Optional[Path] = None,
) -> tuple[list[str], list[Path]]:
from clinica.iotools.bids_utils import StudyName, bids_id_factory

from .adni_utils import load_clinical_csv

# Read optional list of participants.
Expand All @@ -246,7 +248,9 @@ def _get_bids_subjects_info(
# Filter participants if requested.
participants = sorted(participants & subjects if subjects else participants)
# Compute their corresponding BIDS IDs and paths.
bids_ids = [f"sub-ADNI{p.replace('_', '')}" for p in participants]
bids_ids = [
bids_id_factory(StudyName.ADNI).from_original_study_id(p) for p in participants
]
bids_paths = [out_path / bids_id for bids_id in bids_ids]

return bids_ids, bids_paths
16 changes: 7 additions & 9 deletions clinica/iotools/converters/adni_to_bids/adni_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,6 @@ def _write_adni_sessions_tsv(
df_subj_sessions: global dataframe containing clinical sessions data for all subjects
bids_subjs_paths: a list with the path to all bids subjects
"""
import os
from os import path

df_subj_sessions["adas_memory"] = (
df_subj_sessions["adas_Q1"]
Expand Down Expand Up @@ -268,6 +266,7 @@ def _filter_subj_bids(

# Depending on the file that needs to be open, identify and
# preprocess the column that contains the subjects ids.
# todo : use id class here ?
bids_ids = [x[8:] for x in bids_ids if "sub-ADNI" in x]
if location == "ADNIMERGE.csv":
df_files["RID"] = df_files["PTID"].apply(
Expand Down Expand Up @@ -529,6 +528,7 @@ def create_adni_scans_files(conversion_path: Path, bids_subjs_paths: list[Path])
"""
from os import path

from clinica.iotools.bids_utils import StudyName, bids_id_factory
from clinica.utils.stream import cprint

scans_fields_bids = ["filename", "scan_id", "mri_field"]
Expand All @@ -552,7 +552,7 @@ def create_adni_scans_files(conversion_path: Path, bids_subjs_paths: list[Path])
for bids_subject_path in bids_subjs_paths:
# Create the file
bids_id = bids_subject_path.resolve().name
subject_id = "_S_".join(bids_id[8::].split("S"))
subject_id = bids_id_factory(StudyName.ADNI)(bids_id).to_original_study_id()
for session_path in bids_subject_path.glob("ses-*"):
viscode = _session_label_to_viscode(session_path.name[4::])
tsv_name = f"{bids_id}_{session_path.name}_scans.tsv"
Expand Down Expand Up @@ -768,7 +768,7 @@ def _create_file(
import numpy as np

from clinica.cmdline import setup_clinica_logging
from clinica.iotools.bids_utils import run_dcm2niix
from clinica.iotools.bids_utils import StudyName, bids_id_factory, run_dcm2niix
from clinica.iotools.converter_utils import viscode_to_session
from clinica.iotools.utils.data_handling import center_nifti_origin
from clinica.utils.stream import cprint
Expand Down Expand Up @@ -805,12 +805,10 @@ def _create_file(
# If the original image is a DICOM, check if contains two DICOM inside the same folder
if image.Is_Dicom:
image_path = _check_two_dcm_folder(image_path, bids_dir, image_id)
bids_subj = subject.replace("_", "")
output_path = (
bids_dir / f"sub-ADNI{bids_subj}" / session / _get_output_path(modality)
)
bids_id = bids_id_factory(StudyName.ADNI).from_original_study_id(subject)
output_path = bids_dir / bids_id / session / _get_output_path(modality)
output_filename = (
f"sub-ADNI{bids_subj}_{session}{_get_output_filename(modality, image_tracer)}"
f"{bids_id}_{session}{_get_output_filename(modality, image_tracer)}"
)
output_path.mkdir(parents=True, exist_ok=True)

Expand Down
Loading

0 comments on commit 047065c

Please sign in to comment.