Skip to content

Commit

Permalink
[FIX] ADNI-to-BIDS : incorporate the fix for KeyError "APGEN" (#1342)
Browse files Browse the repository at this point in the history
* Proposition

* Changes upon suggestions

* Modify unit test

* Small fix
  • Loading branch information
AliceJoubert authored and NicolasGensollen committed Oct 25, 2024
1 parent 1fad1c3 commit 876298c
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 8 deletions.
15 changes: 15 additions & 0 deletions clinica/iotools/bids_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -401,6 +401,21 @@ def create_participants_df(
file_to_read = load_clinical_csv(
clinical_data_dir, location.split(".")[0]
)
# Condition to handle ADNI modification of file APOERES.csv
# See issue https://github.com/aramis-lab/clinica/issues/1294
if study_name == StudyName.ADNI and location == "APOERES.csv":
if (
participant_fields_db[i] not in file_to_read.columns
and "GENOTYPE" in file_to_read.columns
):
# Split the 'GENOTYPE' column into 'APGEN1' and 'APGEN2'
genotype = file_to_read["GENOTYPE"].str.split(
"/", expand=True
)
file_to_read = file_to_read.assign(
APGEN1=genotype[0], APGEN2=genotype[1]
)

prev_location = location
prev_sheet = sheet

Expand Down
39 changes: 31 additions & 8 deletions test/unittests/iotools/test_bids_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from pathlib import Path
from string import Template
from typing import Union
from typing import Optional, Union

import numpy as np
import pandas as pd
Expand Down Expand Up @@ -112,7 +112,7 @@ def test_bids_to_study(study, bids_id, source_id):
assert bids_id_factory(study)(bids_id).to_original_study_id() == source_id


def create_clinical_data(tmp_path: Path, study_name: StudyName) -> Path:
def create_participants_spec(tmp_path: Path) -> Path:
spec_df = pd.DataFrame(
{
"BIDS CLINICA": [
Expand Down Expand Up @@ -142,6 +142,12 @@ def create_clinical_data(tmp_path: Path, study_name: StudyName) -> Path:
)
spec_df.to_csv(tmp_path / "participant.tsv", sep="\t", index=False)

return tmp_path


def create_clinical_data(
tmp_path: Path, study_name: StudyName, adni_genotype: Optional[bool] = False
) -> Path:
clinical_path = tmp_path / "clinical_data"
clinical_path.mkdir()

Expand All @@ -160,14 +166,25 @@ def create_clinical_data(tmp_path: Path, study_name: StudyName) -> Path:
"AGE": ["40", "50", "60", "70", "80", None],
}
)

df_apoeres = pd.DataFrame(
{
"APGEN1": ["3", "3", "3", "3", None, "3"],
"GEN2": ["2", "2", "2", "2", None, "2"],
}
)

if adni_genotype:
df_apoeres = pd.DataFrame(
{
"GENOTYPE": ["3/2", "3/2", "3/2", "3/2", None, "3/2"],
"GEN2": ["2", "2", "2", "2", None, "2"],
}
)

df_adnimerge.to_csv(clinical_path / "ADNIMERGE.csv", index=False)
df_apoeres.to_csv(clinical_path / "APOERES.csv", index=False)

if study_name == StudyName.OASIS:
df_oasis = pd.DataFrame(
{
Expand All @@ -189,7 +206,7 @@ def create_clinical_data(tmp_path: Path, study_name: StudyName) -> Path:


@pytest.mark.parametrize(
"study_name, bids_ids, expected",
"study_name, bids_ids, expected, adni_genotype",
[
(
StudyName.OASIS,
Expand All @@ -201,6 +218,7 @@ def create_clinical_data(tmp_path: Path, study_name: StudyName) -> Path:
"sex": ["F"],
}
),
False,
),
(
StudyName.ADNI,
Expand All @@ -210,9 +228,10 @@ def create_clinical_data(tmp_path: Path, study_name: StudyName) -> Path:
"participant_id": ["sub-ADNI001S0001"],
"alternative_id_1": ["001_S_0001"],
"sex": ["Male"],
"apoegen1": [3.0],
"apoegen1": ["3"],
}
),
True,
),
(
StudyName.OASIS,
Expand All @@ -224,6 +243,7 @@ def create_clinical_data(tmp_path: Path, study_name: StudyName) -> Path:
"sex": ["M", "M"],
}
),
False,
),
(
StudyName.ADNI,
Expand All @@ -236,6 +256,7 @@ def create_clinical_data(tmp_path: Path, study_name: StudyName) -> Path:
"apoegen1": ["n/a"],
}
),
False,
),
(
StudyName.ADNI,
Expand All @@ -248,18 +269,20 @@ def create_clinical_data(tmp_path: Path, study_name: StudyName) -> Path:
"apoegen1": [3.0],
}
),
False,
),
],
)
def test_create_participants_df(tmp_path, bids_ids, expected, study_name):
def test_create_participants_df(
tmp_path, bids_ids, expected, study_name, adni_genotype
):
from clinica.iotools.bids_utils import create_participants_df

clinical_path = create_clinical_data(tmp_path, study_name)
assert (
create_participants_df(
study_name,
clinical_specifications_folder=tmp_path,
clinical_data_dir=clinical_path,
clinical_specifications_folder=create_participants_spec(tmp_path),
clinical_data_dir=create_clinical_data(tmp_path, study_name, adni_genotype),
bids_ids=bids_ids,
)
.reset_index(drop=True)
Expand Down

0 comments on commit 876298c

Please sign in to comment.