Merge pull request #24 from catalystneuro/change_session_id_definition

Change session id definition
catalystneuro · Dec 10, 2024 · b1c3cdc · b1c3cdc
2 parents 1814e41 + fd5ef23
commit b1c3cdc
Show file tree

Hide file tree

Showing 6 changed files with 49 additions and 55 deletions.
diff --git a/src/cai_lab_to_nwb/zaki_2024/tutorials/zaki_2024_tutorial.ipynb b/src/cai_lab_to_nwb/zaki_2024/tutorials/zaki_2024_tutorial.ipynb
@@ -130,7 +130,7 @@
    "source": [
     "from pynwb import NWBHDF5IO\n",
     "\n",
-    "nwbfile_path = f\"D:/cai_lab_conversion_nwb/{subject_id}_{session_id}.nwb\"\n",
+    "nwbfile_path = f\"D:/cai_lab_conversion_nwb/sub-{subject_id}ses-{session_id}.nwb\"\n",
     "io = NWBHDF5IO(path=nwbfile_path, load_namespaces=True)"
    ],
    "id": "f3c1df012cddd8b9",

diff --git a/src/cai_lab_to_nwb/zaki_2024/utils/define_conversion_parameters.py b/src/cai_lab_to_nwb/zaki_2024/utils/define_conversion_parameters.py
@@ -10,7 +10,7 @@ def update_conversion_parameters_yaml(
     data_dir_path: Union[str, Path],
     output_dir_path: Union[str, Path],
     experiment_design_file_path: Union[str, Path],
-    session_types: list = (),
+    session_ids: list = (),
 ):
     """
     Update a YAML file with parameters required for session-to-NWB conversion.
@@ -25,7 +25,7 @@ def update_conversion_parameters_yaml(
         Path to the output directory for NWB files.
     experiment_design_file_path : Union[str, Path]
         Path to the experiment design file.
-    session_types : list, optional
+    session_ids : list, optional
         List of session types to process. Defaults to an empty list.
 
     Returns:
@@ -34,12 +34,9 @@ def update_conversion_parameters_yaml(
     """
     yaml_file_path = Path(__file__).parent / "conversion_parameters.yaml"
     subjects_df = pd.read_excel(experiment_design_file_path)
-    session_times_df = get_session_times_df(
-        subject_id=subject_id, data_dir_path=data_dir_path, session_types=session_types
-    )
-    for session_type in session_times_df["Session"]:
-        session_id = subject_id + "_" + session_type
-        session_row = session_times_df[session_times_df["Session"] == session_type].iloc[0]
+    session_times_df = get_session_times_df(subject_id=subject_id, data_dir_path=data_dir_path, session_ids=session_ids)
+    for session_id in session_times_df["Session"]:
+        session_row = session_times_df[session_times_df["Session"] == session_id].iloc[0]
         date_str = session_row["Date"]
         time_str = session_row["Time"]
         experiment_dir_path = get_experiment_dir_path(subject_id, session_id, data_dir_path)
@@ -56,7 +53,7 @@ def update_conversion_parameters_yaml(
             sleep_classification_file_path = None
             video_file_path = get_video_file_path(subject_id, session_id, data_dir_path)
             freezing_output_file_path = get_freezing_output_file_path(subject_id, session_id, data_dir_path)
-            if session_type == "FC" or session_type == "Recall1":
+            if session_id == "FC":
                 shock_amplitude = subjects_df["Amplitude"][subjects_df["Mouse"] == subject_id].to_numpy()[0]
                 shock_amplitude = float(re.findall(r"[-+]?\d*\.\d+|\d+", shock_amplitude)[0])
                 shock_stimulus = dict(
@@ -68,7 +65,7 @@ def update_conversion_parameters_yaml(
         minian_folder_path = get_miniscope_folder_path(subject_id, session_id, data_dir_path)
 
         session_description = generate_session_description(
-            experiment_design_file_path=experiment_design_file_path, subject_id=subject_id, session_type=session_type
+            experiment_design_file_path=experiment_design_file_path, subject_id=subject_id, session_id=session_id
         )
         session_to_nwb_kwargs_per_session = {
             session_id: {
@@ -109,7 +106,7 @@ def update_conversion_parameters_yaml(
 
 if __name__ == "__main__":
     update_conversion_parameters_yaml(
-        subject_id="Ca_EEG3-4",
+        subject_id="Ca_EEG2-1",
         data_dir_path=Path("D:/"),
         output_dir_path=Path("D:/cai_lab_conversion_nwb/"),
         experiment_design_file_path=Path("D:/Ca_EEG_Design.xlsx"),

diff --git a/src/cai_lab_to_nwb/zaki_2024/utils/generate_session_description.py b/src/cai_lab_to_nwb/zaki_2024/utils/generate_session_description.py
@@ -4,7 +4,7 @@
 import re
 
 
-def generate_session_description(experiment_design_file_path: Union[Path, str], subject_id: str, session_type: str):
+def generate_session_description(experiment_design_file_path: Union[Path, str], subject_id: str, session_id: str):
     subjects_df = pd.read_excel(experiment_design_file_path)
     subject_df = subjects_df[subjects_df["Mouse"] == subject_id]
     shock_amplitude = subject_df["Amplitude"].to_numpy()[0]
@@ -24,17 +24,17 @@ def generate_session_description(experiment_design_file_path: Union[Path, str],
         "Recall3": f"Third Recall session: mouse was placed in {subject_df["Test_3"].to_numpy()[0]} context for 5 min. Context: {Contexts[subject_df["Test_3_ctx"].to_numpy()[0]]}",
         "Offline": f"After Neutral Exposure and Fear Conditioning sessions, mice were taken out of the testing chambers and immediately placed in their homecage (scope was not removed).The homecage was placed in a dark grey storage bin with a webcam on top of the bin, taped to a wooden plank, looking down into the homecage. Mouse behavior and calcium were recorded for an hour.",
     }
-    if "Offline" in session_type:
-        session_type = "Offline"
+    if "Offline" in session_id:
+        session_id = "Offline"
 
-    return session_descriptions[session_type]
+    return session_descriptions[session_id]
 
 
 if __name__ == "__main__":
     experiment_design_file_path = Path("D:/Ca_EEG_Design.xlsx")
     subject_id = "Ca_EEG3-4"
-    session_type = "NeutralExposure"
+    session_id = "NeutralExposure"
     session_description = generate_session_description(
-        experiment_design_file_path=experiment_design_file_path, subject_id=subject_id, session_type=session_type
+        experiment_design_file_path=experiment_design_file_path, subject_id=subject_id, session_id=session_id
     )
     print(session_description)
diff --git a/src/cai_lab_to_nwb/zaki_2024/utils/source_data_path_resolver.py b/src/cai_lab_to_nwb/zaki_2024/utils/source_data_path_resolver.py
@@ -5,7 +5,7 @@
 import warnings
 
 
-def get_session_times_df(subject_id: str, data_dir_path: Union[str, Path], session_types: list = ()) -> pd.DataFrame:
+def get_session_times_df(subject_id: str, data_dir_path: Union[str, Path], session_ids: list = ()) -> pd.DataFrame:
     """
     Retrieve a DataFrame containing session times for a given subject.
 
@@ -15,7 +15,7 @@ def get_session_times_df(subject_id: str, data_dir_path: Union[str, Path], sessi
         The ID of the subject.
     data_dir_path : Union[str, Path]
         Path to the base data directory.
-    session_types : list, optional
+    session_ids : list, optional
         List of session types to filter. Defaults to an empty list.
 
     Returns:
@@ -30,11 +30,11 @@ def get_session_times_df(subject_id: str, data_dir_path: Union[str, Path], sessi
     """
 
     if "Ca_EEG3-" in subject_id:
-        session_times_file_path = data_dir_path / "Ca_EEG_Experiment" / subject_id / (subject_id + "_SessionTimes.csv")
+        session_times_file_path = data_dir_path / "Ca_EEG_Experiment" / subject_id / f"{subject_id}_SessionTimes.csv"
         assert session_times_file_path.is_file(), f"{session_times_file_path} does not exist"
         session_times_df = pd.read_csv(session_times_file_path)
-        if session_types:
-            session_times_df = session_times_df[session_times_df["Session"].isin(session_types)]
+        if session_ids:
+            session_times_df = session_times_df[session_times_df["Session"].isin(session_ids)]
         return session_times_df
 
     elif "Ca_EEG2-" in subject_id:
@@ -45,8 +45,8 @@ def get_session_times_df(subject_id: str, data_dir_path: Union[str, Path], sessi
         session_times = session_times_df_original.iloc[1, 1:].tolist()  # Exclude first column
         # Create the DataFrame
         session_times_df = pd.DataFrame({"Session": session_names, "Time": session_times, "Date": None})
-        if session_types:
-            session_times_df = session_times_df[session_times_df["Session"].isin(session_types)]
+        if session_ids:
+            session_times_df = session_times_df[session_times_df["Session"].isin(session_ids)]
         return session_times_df
 
     elif "Ca_EEG_Pilot" in subject_id:
@@ -82,9 +82,13 @@ def get_experiment_dir_path(subject_id: str, session_id: str, data_dir_path: Uni
 
     if "Offline" in session_id:
         offline_day = session_id.split("Session")[0]
-        experiment_dir_path = data_dir_path / "Ca_EEG_Experiment" / subject_id / (subject_id + "_Offline") / offline_day
+        experiment_dir_path = (
+            data_dir_path / "Ca_EEG_Experiment" / subject_id / f"{subject_id}_Offline" / f"{subject_id}_{offline_day}"
+        )
     else:
-        experiment_dir_path = data_dir_path / "Ca_EEG_Experiment" / subject_id / (subject_id + "_Sessions") / session_id
+        experiment_dir_path = (
+            data_dir_path / "Ca_EEG_Experiment" / subject_id / f"{subject_id}_Sessions" / f"{subject_id}_{session_id}"
+        )
     assert experiment_dir_path.is_dir(), f"{experiment_dir_path} does not exist"
     return experiment_dir_path
 
@@ -136,10 +140,8 @@ def get_edf_file_path(subject_id: str, date_str: str, data_dir_path: Union[str,
 
     try:
         datetime_obj = datetime.strptime(date_str, "%Y_%m_%d")
-        reformatted_date_str = datetime_obj.strftime("_%m%d%y")
-        edf_file_path = (
-            data_dir_path / "Ca_EEG_EDF" / (subject_id + "_EDF") / (subject_id + reformatted_date_str + ".edf")
-        )
+        reformatted_date_str = datetime_obj.strftime("%m%d%y")
+        edf_file_path = data_dir_path / "Ca_EEG_EDF" / f"{subject_id}_EDF" / f"{subject_id}_{reformatted_date_str}.edf"
         if not edf_file_path.is_file():
             warnings.warn(f"{edf_file_path} not found.")
             return None
@@ -169,7 +171,7 @@ def get_sleep_classification_file_path(
         Path to the sleep classification file, or None if not found.
     """
     sleep_classification_file_path = (
-        data_dir_path / "Ca_EEG_Sleep" / subject_id / "AlignedSleep" / (session_id + "_AlignedSleep.csv")
+        data_dir_path / "Ca_EEG_Sleep" / subject_id / "AlignedSleep" / f"{subject_id}_{session_id}_AlignedSleep.csv"
     )
     if not sleep_classification_file_path.is_file():
         warnings.warn(f"{sleep_classification_file_path} not found.")
@@ -185,7 +187,7 @@ def get_video_file_path(subject_id: str, session_id: str, data_dir_path: Union[s
     -----------
     subject_id : str
         The ID of the subject.
-    session_id : str
+    session_id_old : str
         The ID of the session.
     data_dir_path : Union[str, Path]
         Path to the base data directory.
@@ -197,7 +199,7 @@ def get_video_file_path(subject_id: str, session_id: str, data_dir_path: Union[s
     """
 
     experiment_dir_path = get_experiment_dir_path(subject_id, session_id, data_dir_path)
-    video_file_path = Path(experiment_dir_path) / (session_id + ".wmv")
+    video_file_path = Path(experiment_dir_path) / f"{subject_id}_{session_id}.wmv"
     if not video_file_path.is_file():
         warnings.warn(f"{video_file_path} not found.")
         return None
@@ -226,7 +228,7 @@ def get_freezing_output_file_path(
     """
 
     experiment_dir_path = get_experiment_dir_path(subject_id, session_id, data_dir_path)
-    freezing_output_file_path = Path(experiment_dir_path) / (session_id + "_FreezingOutput.csv")
+    freezing_output_file_path = Path(experiment_dir_path) / f"{subject_id}_{session_id}_FreezingOutput.csv"
     if not freezing_output_file_path.is_file():
         warnings.warn(f"{freezing_output_file_path} not found.")
         return None
@@ -288,7 +290,7 @@ def get_miniscope_folder_path(subject_id: str, session_id: str, data_dir_path: U
         Path to the miniscope folder, or None if not found.
     """
 
-    minian_folder_path = data_dir_path / "Ca_EEG_Calcium" / subject_id / session_id / "minian"
+    minian_folder_path = data_dir_path / "Ca_EEG_Calcium" / subject_id / f"{subject_id}_{session_id}" / "minian"
     if not minian_folder_path.is_dir():
         warnings.warn(f"{minian_folder_path} not found.")
         return None

diff --git a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_session.py b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_session.py
@@ -21,6 +21,7 @@ def session_to_nwb(
     time_str: str,
     session_description: str,
     stub_test: bool = False,
+    overwrite: bool = False,
     verbose: bool = False,
     experiment_dir_path: Union[str, Path] = None,
     imaging_folder_path: Union[str, Path] = None,
@@ -110,15 +111,15 @@ def session_to_nwb(
     """
 
     if verbose:
-        print(f"Converting session {session_id}")
+        print(f"Converting session {session_id} for subject {subject_id}")
         start = time.time()
 
     output_dir_path = Path(output_dir_path)
     if stub_test:
         output_dir_path = output_dir_path / "nwb_stub"
     output_dir_path.mkdir(parents=True, exist_ok=True)
 
-    nwbfile_path = output_dir_path / f"{session_id}.nwb"
+    nwbfile_path = output_dir_path / f"sub-{subject_id}_ses-{session_id}.nwb"
 
     source_data = dict()
     conversion_options = dict()
@@ -183,7 +184,7 @@ def session_to_nwb(
             datetime_str = date_str + " " + time_str
             start_datetime_timestamp = datetime.strptime(datetime_str, "%Y_%m_%d %H_%M_%S")
 
-            txt_file_path = experiment_dir_path / f"{session_id}.txt"
+            txt_file_path = experiment_dir_path / f"{subject_id}_{session_id}.txt"
             assert txt_file_path.is_file(), f"{txt_file_path} does not exist"
 
             session_run_time = get_session_run_time(txt_file_path=txt_file_path)
@@ -242,10 +243,11 @@ def session_to_nwb(
 
     metadata["Subject"]["subject_id"] = subject_id
     metadata["NWBFile"]["session_description"] = session_description
+    metadata["NWBFile"]["session_id"] = session_id
 
     # Run conversion
     converter.run_conversion(
-        metadata=metadata, nwbfile_path=nwbfile_path, conversion_options=conversion_options, overwrite=True
+        metadata=metadata, nwbfile_path=nwbfile_path, conversion_options=conversion_options, overwrite=overwrite
     )
 
     if verbose:
@@ -262,17 +264,11 @@ def session_to_nwb(
 if __name__ == "__main__":
 
     subject_id = "Ca_EEG3-4"
-    session_type = "OfflineDay1Session16"  #
-    session_id = subject_id + "_" + session_type
-    stub_test = False
+    session_id = "NeutralExposure"
+    stub_test = True
     verbose = True
+    overwrite = True
     yaml_file_path = Path(__file__).parent / "utils/conversion_parameters.yaml"
     conversion_parameter_dict = load_dict_from_file(yaml_file_path)
     session_to_nwb_kwargs_per_session = conversion_parameter_dict[subject_id][session_id]
-    session_to_nwb_kwargs_per_session.update(
-        stub_test=stub_test,
-        verbose=verbose,
-    )
-    session_to_nwb(**session_to_nwb_kwargs_per_session)
-
-    # Alternatively one can get each path separately using the functions in utils and update the session_to_nwb_kwargs_per_session dictionary
+    session_to_nwb(**session_to_nwb_kwargs_per_session, stub_test=stub_test, verbose=verbose, overwrite=overwrite)
diff --git a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py
@@ -5,7 +5,6 @@
 from pathlib import Path
 from typing import Union
 import re
-import pandas as pd
 from datetime import datetime
 from mne.io import read_raw_edf
 
@@ -41,7 +40,7 @@ def session_to_nwb(
         output_dir_path = output_dir_path / "nwb_stub"
     output_dir_path.mkdir(parents=True, exist_ok=True)
 
-    nwbfile_path = output_dir_path / f"{subject_id}_week_session.nwb"
+    nwbfile_path = output_dir_path / f"sub-{subject_id}_ses-Week.nwb"
 
     source_data = dict()
     conversion_options = dict()
@@ -86,6 +85,7 @@ def session_to_nwb(
     metadata = dict_deep_update(metadata, editable_metadata)
 
     metadata["Subject"]["subject_id"] = subject_id
+    metadata["NWBFile"]["session_id"] = "Week"
 
     edf_reader = read_raw_edf(input_fname=edf_file_paths[0], verbose=verbose)
     session_start_time = edf_reader.info["meas_date"]
@@ -100,10 +100,9 @@ def session_to_nwb(
     # Add columns to TimeIntervals
     nwbfile.add_epoch_column(name="session_ids", description="ID of the session")
 
-    for task, date_str, time_str in zip(
+    for session_id, date_str, time_str in zip(
         session_times_df["Session"], session_times_df["Date"], session_times_df["Time"]
     ):
-        session_id = subject_id + "_" + task
         experiment_dir_path = get_experiment_dir_path(
             subject_id=subject_id, session_id=session_id, data_dir_path=data_dir_path
         )
@@ -136,7 +135,7 @@ def session_to_nwb(
             datetime_str = date_str + " " + time_str
             start_datetime_timestamp = datetime.strptime(datetime_str, "%Y_%m_%d %H_%M_%S")
 
-            txt_file_path = experiment_dir_path / f"{session_id}.txt"
+            txt_file_path = experiment_dir_path / f"{subject_id}_{session_id}.txt"
             session_run_time = get_session_run_time(txt_file_path=txt_file_path)
 
             start_time = (start_datetime_timestamp - session_start_time.replace(tzinfo=None)).total_seconds()