From ee00923ed5e22ace8310aa464b286fdb221c906e Mon Sep 17 00:00:00 2001 From: alessandratrapani Date: Tue, 19 Nov 2024 19:22:14 +0100 Subject: [PATCH 1/5] add interface for cell registration --- .../zaki_2024/interfaces/__init__.py | 1 + .../zaki_2024_cell_registration_interface.py | 61 +++++++++++++++++++ .../zaki_2024/utils/cell_registration.py | 44 ------------- .../zaki_2024_convert_week_session.py | 17 +++++- .../zaki_2024/zaki_2024_nwbconverter.py | 46 ++------------ 5 files changed, 81 insertions(+), 88 deletions(-) create mode 100644 src/cai_lab_to_nwb/zaki_2024/interfaces/zaki_2024_cell_registration_interface.py delete mode 100644 src/cai_lab_to_nwb/zaki_2024/utils/cell_registration.py diff --git a/src/cai_lab_to_nwb/zaki_2024/interfaces/__init__.py b/src/cai_lab_to_nwb/zaki_2024/interfaces/__init__.py index 9ea686e..4babfbc 100644 --- a/src/cai_lab_to_nwb/zaki_2024/interfaces/__init__.py +++ b/src/cai_lab_to_nwb/zaki_2024/interfaces/__init__.py @@ -4,3 +4,4 @@ from .zaki_2024_sleep_classification_interface import Zaki2024SleepClassificationInterface from .miniscope_imaging_interface import MiniscopeImagingInterface from .zaki_2024_shock_stimuli_interface import Zaki2024ShockStimuliInterface +from .zaki_2024_cell_registration_interface import Zaki2024CellRegistrationInterface diff --git a/src/cai_lab_to_nwb/zaki_2024/interfaces/zaki_2024_cell_registration_interface.py b/src/cai_lab_to_nwb/zaki_2024/interfaces/zaki_2024_cell_registration_interface.py new file mode 100644 index 0000000..0c91747 --- /dev/null +++ b/src/cai_lab_to_nwb/zaki_2024/interfaces/zaki_2024_cell_registration_interface.py @@ -0,0 +1,61 @@ +"""Primary class for converting experiment-specific cell registration output.""" + +from neuroconv.basedatainterface import BaseDataInterface +from neuroconv.utils import DeepDict +from typing import Optional +from pathlib import Path +from pynwb import NWBFile +from hdmf.common.table import DynamicTable, VectorData + +import pandas as pd + + +class Zaki2024CellRegistrationInterface(BaseDataInterface): + """Adds a table to store the output of CellReg.""" + + keywords = ["cross sessions cell registration"] + + def __init__(self, file_paths: list[Path], verbose: bool = False): + + self.verbose = verbose + self.file_paths = file_paths + super().__init__(file_paths=file_paths) + + def get_metadata(self) -> DeepDict: + # Automatically retrieve as much metadata as possible from the source files available + metadata = super().get_metadata() + + return metadata + + def add_to_nwbfile( + self, + nwbfile: NWBFile, + subject_id: str, + stub_test: bool = False, + metadata: Optional[dict] = None, + ): + processing_module = nwbfile.create_processing_module( + name="cell_registration", description="Processing module for cross session cell registration." + ) + + for file_path in self.file_paths: + offline_session_name = Path(file_path).stem.split(f"{subject_id}_")[-1] + name = offline_session_name + "vsConditioningSessions" + data = pd.read_csv(file_path) + + columns = [ + VectorData( + name=col, + description=f"ROI indexes from segmentation of session {col} imaging data", + data=data[col].tolist()[:100] if stub_test else data[col].tolist(), + ) + for col in data.columns + ] + + dynamic_table = DynamicTable( + name=name, + description=f"Table storing data from cross sessions cell registration: all conditioning sessions are registered with respect to {offline_session_name} ", + columns=columns, + ) + + processing_module.add(dynamic_table) diff --git a/src/cai_lab_to_nwb/zaki_2024/utils/cell_registration.py b/src/cai_lab_to_nwb/zaki_2024/utils/cell_registration.py deleted file mode 100644 index 76b1406..0000000 --- a/src/cai_lab_to_nwb/zaki_2024/utils/cell_registration.py +++ /dev/null @@ -1,44 +0,0 @@ -from neuroconv.tools import get_module -from pynwb import NWBFile -import pandas as pd -from roiextractors.extraction_tools import PathType - - -def add_cell_registration( - nwbfile: NWBFile, - global_roi_ids: list, - plane_segmentation_name: str, -) -> None: - """Add cell registration data to the NWBFile. - - The global roi ids for the segmentation data (identified by 'plane_segmentation_name' are added to the NWBFile as - an extra column of the PlaneSegmentation table). - - Parameters - ---------- - nwbfile: NWBFile - The NWBFile where the motion correction time series will be added to. - global_roi_ids: list - global roi ids for the segmentation data. - plane_segmentation_name: str - The name of the plane segmentation table in the NWBFile. - """ - ophys = get_module(nwbfile, "ophys") - assert ( - plane_segmentation_name in ophys["ImageSegmentation"].plane_segmentations.keys() - ), f"The plane segmentation '{plane_segmentation_name}' does not exist in the NWBFile." - - plane_segmentation = ophys["ImageSegmentation"][plane_segmentation_name] - plane_segmentation.add_column( - name="global_ids", - description="list of global ids of identified cells registered cross sessions", - data=global_roi_ids, - ) - - -def get_global_ids_from_csv( - file_path: PathType, - session_id: str, -): - df = pd.read_csv(file_path) - # TODO discuss with Joe how to identify global ids diff --git a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py index 690c16c..50a0b30 100644 --- a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py +++ b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py @@ -4,9 +4,7 @@ from natsort import natsorted from pathlib import Path from typing import Union -from datetime import datetime, timedelta -import pandas as pd -import json +import os from neuroconv.utils import load_dict_from_file, dict_deep_update from zaki_2024_nwbconverter import Zaki2024NWBConverter @@ -49,6 +47,19 @@ def session_to_nwb( else: print(f"No .edf file found in {edf_folder_path}") + # Add Cross session cell registration + main_folder = data_dir_path / f"/Ca_EEG_Calcium/{subject_id}/SpatialFootprints" + file_paths = [] + for folder in os.listdir(main_folder): + folder_path = os.path.join(main_folder, folder) + if os.path.isdir(folder_path): # Ensure it's a directory + filename = folder.split("_")[0] + f"_{subject_id}_" + folder.split("_")[-1] + csv_file = os.path.join(folder_path, f"{filename}.csv") + if os.path.isfile(csv_file): # Check if the file exists + file_paths.append(csv_file) + source_data.update(dict(CellRegistration=dict(file_paths=file_paths))) + conversion_options.update(dict(CellRegistration=dict(stub_test=stub_test, subject_id=subject_id))) + converter = Zaki2024NWBConverter(source_data=source_data) # Add datetime to conversion diff --git a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py index 8ef2c16..7b68517 100644 --- a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py +++ b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py @@ -1,11 +1,10 @@ """Primary NWBConverter class for this dataset.""" -from copy import deepcopy - from neuroconv import NWBConverter -from neuroconv.utils import DeepDict +from pynwb import NWBFile from neuroconv.datainterfaces import VideoInterface -from typing import Dict +from typing import Optional +from pathlib import Path from interfaces import ( MinianSegmentationInterface, @@ -16,6 +15,7 @@ MiniscopeImagingInterface, MinianMotionCorrectionInterface, Zaki2024ShockStimuliInterface, + Zaki2024CellRegistrationInterface, ) @@ -32,41 +32,5 @@ class Zaki2024NWBConverter(NWBConverter): FreezingBehavior=EzTrackFreezingBehaviorInterface, Video=VideoInterface, ShockStimuli=Zaki2024ShockStimuliInterface, + CellRegistration=Zaki2024CellRegistrationInterface, ) - - -""" - # TODO decide which datastream set the session start time - def get_metadata(self) -> DeepDict: - if "" not in self.data_interface_objects: - return super().get_metadata() - - # Explicitly set session_start_time to ... start time - metadata = super().get_metadata() - session_start_time = self.data_interface_objects[""] - metadata["NWBFile"]["session_start_time"] = session_start_time - - return metadata - - # TODO Add cell global_ids - def add_to_nwbfile(self, nwbfile: NWBFile, metadata, conversion_options: Optional[dict] = None) -> None: - super().add_to_nwbfile(nwbfile=nwbfile, metadata=metadata, conversion_options=conversion_options) - - if "MinianSegmentation" in self.data_interface_objects: - global_roi_ids = get_global_ids_from_csv() - add_cell_registration( - nwbfile=nwbfile, - global_roi_ids=global_roi_ids, - plane_segmentation_name="PlaneSegmentation", - ) - - # TODO discuss time alignment with author - def temporally_align_data_interfaces(self): - aligned_starting_time = 0 - if "MiniscopeImaging" in self.data_interface_classes: - miniscope_interface = self.data_interface_classes["MiniscopeImaging"] - miniscope_interface.set_aligned_starting_time(aligned_starting_time=aligned_starting_time) - if "MinianSegmentation" in self.data_interface_classes: - minian_interface = self.data_interface_classes["MinianSegmentation"] - minian_interface.set_aligned_starting_time(aligned_starting_time=aligned_starting_time) -""" From a41037bc780787983c1831730d5b3254d71204e0 Mon Sep 17 00:00:00 2001 From: alessandratrapani Date: Mon, 25 Nov 2024 15:11:48 +0100 Subject: [PATCH 2/5] remove unnecessary imports --- src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py index 7b68517..90c5712 100644 --- a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py +++ b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py @@ -1,10 +1,7 @@ """Primary NWBConverter class for this dataset.""" from neuroconv import NWBConverter -from pynwb import NWBFile from neuroconv.datainterfaces import VideoInterface -from typing import Optional -from pathlib import Path from interfaces import ( MinianSegmentationInterface, From 568951d77c13bf4a551e36640d9c50f7315f3b0c Mon Sep 17 00:00:00 2001 From: alessandratrapani Date: Tue, 26 Nov 2024 10:45:40 +0100 Subject: [PATCH 3/5] replace os with pathlib --- .../zaki_2024/zaki_2024_convert_week_session.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py index f3f1575..3be1bee 100644 --- a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py +++ b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py @@ -4,7 +4,6 @@ from natsort import natsorted from pathlib import Path from typing import Union -import os from neuroconv.utils import load_dict_from_file, dict_deep_update from zaki_2024_nwbconverter import Zaki2024NWBConverter @@ -48,15 +47,16 @@ def session_to_nwb( conversion_options.update(dict(MultiEDFSignals=dict(stub_test=stub_test))) # Add Cross session cell registration - main_folder = data_dir_path / f"/Ca_EEG_Calcium/{subject_id}/SpatialFootprints" + main_folder = data_dir_path / f"Ca_EEG_Calcium/{subject_id}/SpatialFootprints" file_paths = [] - for folder in os.listdir(main_folder): - folder_path = os.path.join(main_folder, folder) - if os.path.isdir(folder_path): # Ensure it's a directory - filename = folder.split("_")[0] + f"_{subject_id}_" + folder.split("_")[-1] - csv_file = os.path.join(folder_path, f"{filename}.csv") - if os.path.isfile(csv_file): # Check if the file exists + + for folder in main_folder.iterdir(): + if folder.is_dir(): # Ensure it's a directory + filename = folder.name.split("_")[0] + f"_{subject_id}_" + folder.name.split("_")[-1] + csv_file = folder / f"{filename}.csv" + if csv_file.is_file(): # Check if the file exists file_paths.append(csv_file) + source_data.update(dict(CellRegistration=dict(file_paths=file_paths))) conversion_options.update(dict(CellRegistration=dict(stub_test=stub_test, subject_id=subject_id))) From 6176097ebebe2e6892ef345fc91958c79615575e Mon Sep 17 00:00:00 2001 From: alessandratrapani Date: Tue, 26 Nov 2024 11:13:54 +0100 Subject: [PATCH 4/5] add better description for module and tables --- .../zaki_2024_cell_registration_interface.py | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/src/cai_lab_to_nwb/zaki_2024/interfaces/zaki_2024_cell_registration_interface.py b/src/cai_lab_to_nwb/zaki_2024/interfaces/zaki_2024_cell_registration_interface.py index 0c91747..f018ed1 100644 --- a/src/cai_lab_to_nwb/zaki_2024/interfaces/zaki_2024_cell_registration_interface.py +++ b/src/cai_lab_to_nwb/zaki_2024/interfaces/zaki_2024_cell_registration_interface.py @@ -35,7 +35,16 @@ def add_to_nwbfile( metadata: Optional[dict] = None, ): processing_module = nwbfile.create_processing_module( - name="cell_registration", description="Processing module for cross session cell registration." + name="cell_registration", + description="Processing module for cross session cell registration. " + "Cells recorded across sessions were cross-registered using a previously published open-source " + "cross-registration algorithm, CellReg, using the spatial correlations of nearby cells to " + "determine whether highly correlated footprints close in space are likely to be the same cell across sessions." + "Each offline recording was cross-registered with all the encoding and recall sessions, " + "but not with the other offline sessions because cross-registering between all sessions would lead to too many conflicts and, " + "therefore, to no cells cross-registered across all sessions." + "Each table represents the output of the cross-registration between one offline sessions and all the encoding and recall sessions. " + "A table maps the global ROI ids (row of the table) to the ROI ids in each of cross-registered session's plane segmentation.", ) for file_path in self.file_paths: @@ -46,7 +55,7 @@ def add_to_nwbfile( columns = [ VectorData( name=col, - description=f"ROI indexes from segmentation of session {col} imaging data", + description=f"ROI indexes of plane segmentation of session {col}", data=data[col].tolist()[:100] if stub_test else data[col].tolist(), ) for col in data.columns @@ -54,7 +63,9 @@ def add_to_nwbfile( dynamic_table = DynamicTable( name=name, - description=f"Table storing data from cross sessions cell registration: all conditioning sessions are registered with respect to {offline_session_name} ", + description="Table maps the global ROI ids (row of the table) to the ROI ids in each of cross-registered session's plane segmentation." + "The column names refer to the cross-registered session's ids" + "The values -9999 indicates no correspondence. ", columns=columns, ) From c8b037dccfb1612622d9e5488c2e9c6d59758a8e Mon Sep 17 00:00:00 2001 From: alessandratrapani Date: Thu, 28 Nov 2024 14:50:36 +0100 Subject: [PATCH 5/5] add checks for filepaths --- .../zaki_2024/zaki_2024_convert_week_session.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py index 3be1bee..77a4f22 100644 --- a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py +++ b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py @@ -3,7 +3,9 @@ import time from natsort import natsorted from pathlib import Path +import warnings from typing import Union +import re from neuroconv.utils import load_dict_from_file, dict_deep_update from zaki_2024_nwbconverter import Zaki2024NWBConverter @@ -48,14 +50,17 @@ def session_to_nwb( # Add Cross session cell registration main_folder = data_dir_path / f"Ca_EEG_Calcium/{subject_id}/SpatialFootprints" - file_paths = [] + pattern = re.compile(r"^CellRegResults_OfflineDay(\d+)Session(\d+)$") + file_paths = [] for folder in main_folder.iterdir(): - if folder.is_dir(): # Ensure it's a directory - filename = folder.name.split("_")[0] + f"_{subject_id}_" + folder.name.split("_")[-1] - csv_file = folder / f"{filename}.csv" - if csv_file.is_file(): # Check if the file exists - file_paths.append(csv_file) + match = pattern.match(folder.name) + if folder.is_dir() and match: + offline_day, session_number = match.groups() + filename = f"CellRegResults_{subject_id}_OfflineDay{offline_day}Session{session_number}.csv" + csv_file = folder / filename + assert csv_file.is_file(), f"Expected file not found: {csv_file}" + file_paths.append(csv_file) source_data.update(dict(CellRegistration=dict(file_paths=file_paths))) conversion_options.update(dict(CellRegistration=dict(stub_test=stub_test, subject_id=subject_id)))