From ee00923ed5e22ace8310aa464b286fdb221c906e Mon Sep 17 00:00:00 2001
From: alessandratrapani <alessandramaria.trapani@gmail.com>
Date: Tue, 19 Nov 2024 19:22:14 +0100
Subject: [PATCH 1/5] add interface for cell registration

---
 .../zaki_2024/interfaces/__init__.py          |  1 +
 .../zaki_2024_cell_registration_interface.py  | 61 +++++++++++++++++++
 .../zaki_2024/utils/cell_registration.py      | 44 -------------
 .../zaki_2024_convert_week_session.py         | 17 +++++-
 .../zaki_2024/zaki_2024_nwbconverter.py       | 46 ++------------
 5 files changed, 81 insertions(+), 88 deletions(-)
 create mode 100644 src/cai_lab_to_nwb/zaki_2024/interfaces/zaki_2024_cell_registration_interface.py
 delete mode 100644 src/cai_lab_to_nwb/zaki_2024/utils/cell_registration.py

diff --git a/src/cai_lab_to_nwb/zaki_2024/interfaces/__init__.py b/src/cai_lab_to_nwb/zaki_2024/interfaces/__init__.py
index 9ea686e..4babfbc 100644
--- a/src/cai_lab_to_nwb/zaki_2024/interfaces/__init__.py
+++ b/src/cai_lab_to_nwb/zaki_2024/interfaces/__init__.py
@@ -4,3 +4,4 @@
 from .zaki_2024_sleep_classification_interface import Zaki2024SleepClassificationInterface
 from .miniscope_imaging_interface import MiniscopeImagingInterface
 from .zaki_2024_shock_stimuli_interface import Zaki2024ShockStimuliInterface
+from .zaki_2024_cell_registration_interface import Zaki2024CellRegistrationInterface
diff --git a/src/cai_lab_to_nwb/zaki_2024/interfaces/zaki_2024_cell_registration_interface.py b/src/cai_lab_to_nwb/zaki_2024/interfaces/zaki_2024_cell_registration_interface.py
new file mode 100644
index 0000000..0c91747
--- /dev/null
+++ b/src/cai_lab_to_nwb/zaki_2024/interfaces/zaki_2024_cell_registration_interface.py
@@ -0,0 +1,61 @@
+"""Primary class for converting experiment-specific cell registration output."""
+
+from neuroconv.basedatainterface import BaseDataInterface
+from neuroconv.utils import DeepDict
+from typing import Optional
+from pathlib import Path
+from pynwb import NWBFile
+from hdmf.common.table import DynamicTable, VectorData
+
+import pandas as pd
+
+
+class Zaki2024CellRegistrationInterface(BaseDataInterface):
+    """Adds a table to store the output of CellReg."""
+
+    keywords = ["cross sessions cell registration"]
+
+    def __init__(self, file_paths: list[Path], verbose: bool = False):
+
+        self.verbose = verbose
+        self.file_paths = file_paths
+        super().__init__(file_paths=file_paths)
+
+    def get_metadata(self) -> DeepDict:
+        # Automatically retrieve as much metadata as possible from the source files available
+        metadata = super().get_metadata()
+
+        return metadata
+
+    def add_to_nwbfile(
+        self,
+        nwbfile: NWBFile,
+        subject_id: str,
+        stub_test: bool = False,
+        metadata: Optional[dict] = None,
+    ):
+        processing_module = nwbfile.create_processing_module(
+            name="cell_registration", description="Processing module for cross session cell registration."
+        )
+
+        for file_path in self.file_paths:
+            offline_session_name = Path(file_path).stem.split(f"{subject_id}_")[-1]
+            name = offline_session_name + "vsConditioningSessions"
+            data = pd.read_csv(file_path)
+
+            columns = [
+                VectorData(
+                    name=col,
+                    description=f"ROI indexes from segmentation of session {col} imaging data",
+                    data=data[col].tolist()[:100] if stub_test else data[col].tolist(),
+                )
+                for col in data.columns
+            ]
+
+            dynamic_table = DynamicTable(
+                name=name,
+                description=f"Table storing data from cross sessions cell registration: all conditioning sessions are registered with respect to {offline_session_name} ",
+                columns=columns,
+            )
+
+            processing_module.add(dynamic_table)
diff --git a/src/cai_lab_to_nwb/zaki_2024/utils/cell_registration.py b/src/cai_lab_to_nwb/zaki_2024/utils/cell_registration.py
deleted file mode 100644
index 76b1406..0000000
--- a/src/cai_lab_to_nwb/zaki_2024/utils/cell_registration.py
+++ /dev/null
@@ -1,44 +0,0 @@
-from neuroconv.tools import get_module
-from pynwb import NWBFile
-import pandas as pd
-from roiextractors.extraction_tools import PathType
-
-
-def add_cell_registration(
-    nwbfile: NWBFile,
-    global_roi_ids: list,
-    plane_segmentation_name: str,
-) -> None:
-    """Add cell registration data to the NWBFile.
-
-    The global roi ids for the segmentation data (identified by 'plane_segmentation_name' are added to the NWBFile as
-    an extra column of the PlaneSegmentation table).
-
-    Parameters
-    ----------
-    nwbfile: NWBFile
-        The NWBFile where the motion correction time series will be added to.
-    global_roi_ids: list
-        global roi ids for the segmentation data.
-    plane_segmentation_name: str
-        The name of the plane segmentation table in the NWBFile.
-    """
-    ophys = get_module(nwbfile, "ophys")
-    assert (
-        plane_segmentation_name in ophys["ImageSegmentation"].plane_segmentations.keys()
-    ), f"The plane segmentation '{plane_segmentation_name}' does not exist in the NWBFile."
-
-    plane_segmentation = ophys["ImageSegmentation"][plane_segmentation_name]
-    plane_segmentation.add_column(
-        name="global_ids",
-        description="list of global ids of identified cells registered cross sessions",
-        data=global_roi_ids,
-    )
-
-
-def get_global_ids_from_csv(
-    file_path: PathType,
-    session_id: str,
-):
-    df = pd.read_csv(file_path)
-    # TODO discuss with Joe how to identify global ids
diff --git a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py
index 690c16c..50a0b30 100644
--- a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py
+++ b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py
@@ -4,9 +4,7 @@
 from natsort import natsorted
 from pathlib import Path
 from typing import Union
-from datetime import datetime, timedelta
-import pandas as pd
-import json
+import os
 from neuroconv.utils import load_dict_from_file, dict_deep_update
 
 from zaki_2024_nwbconverter import Zaki2024NWBConverter
@@ -49,6 +47,19 @@ def session_to_nwb(
     else:
         print(f"No .edf file found in {edf_folder_path}")
 
+    # Add Cross session cell registration
+    main_folder = data_dir_path / f"/Ca_EEG_Calcium/{subject_id}/SpatialFootprints"
+    file_paths = []
+    for folder in os.listdir(main_folder):
+        folder_path = os.path.join(main_folder, folder)
+        if os.path.isdir(folder_path):  # Ensure it's a directory
+            filename = folder.split("_")[0] + f"_{subject_id}_" + folder.split("_")[-1]
+            csv_file = os.path.join(folder_path, f"{filename}.csv")
+            if os.path.isfile(csv_file):  # Check if the file exists
+                file_paths.append(csv_file)
+    source_data.update(dict(CellRegistration=dict(file_paths=file_paths)))
+    conversion_options.update(dict(CellRegistration=dict(stub_test=stub_test, subject_id=subject_id)))
+
     converter = Zaki2024NWBConverter(source_data=source_data)
 
     # Add datetime to conversion
diff --git a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py
index 8ef2c16..7b68517 100644
--- a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py
+++ b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py
@@ -1,11 +1,10 @@
 """Primary NWBConverter class for this dataset."""
 
-from copy import deepcopy
-
 from neuroconv import NWBConverter
-from neuroconv.utils import DeepDict
+from pynwb import NWBFile
 from neuroconv.datainterfaces import VideoInterface
-from typing import Dict
+from typing import Optional
+from pathlib import Path
 
 from interfaces import (
     MinianSegmentationInterface,
@@ -16,6 +15,7 @@
     MiniscopeImagingInterface,
     MinianMotionCorrectionInterface,
     Zaki2024ShockStimuliInterface,
+    Zaki2024CellRegistrationInterface,
 )
 
 
@@ -32,41 +32,5 @@ class Zaki2024NWBConverter(NWBConverter):
         FreezingBehavior=EzTrackFreezingBehaviorInterface,
         Video=VideoInterface,
         ShockStimuli=Zaki2024ShockStimuliInterface,
+        CellRegistration=Zaki2024CellRegistrationInterface,
     )
-
-
-"""
-    # TODO decide which datastream set the session start time
-    def get_metadata(self) -> DeepDict:
-        if "" not in self.data_interface_objects:
-            return super().get_metadata()
-
-        # Explicitly set session_start_time to ... start time
-        metadata = super().get_metadata()
-        session_start_time = self.data_interface_objects[""]
-        metadata["NWBFile"]["session_start_time"] = session_start_time
-
-        return metadata
-    
-    # TODO Add cell global_ids
-    def add_to_nwbfile(self, nwbfile: NWBFile, metadata, conversion_options: Optional[dict] = None) -> None:
-        super().add_to_nwbfile(nwbfile=nwbfile, metadata=metadata, conversion_options=conversion_options)
-
-        if "MinianSegmentation" in self.data_interface_objects:
-            global_roi_ids = get_global_ids_from_csv()
-            add_cell_registration(
-                nwbfile=nwbfile,
-                global_roi_ids=global_roi_ids,
-                plane_segmentation_name="PlaneSegmentation",
-            )
-
-    # TODO discuss time alignment with author
-    def temporally_align_data_interfaces(self):
-        aligned_starting_time = 0
-        if "MiniscopeImaging" in self.data_interface_classes:
-            miniscope_interface = self.data_interface_classes["MiniscopeImaging"]
-            miniscope_interface.set_aligned_starting_time(aligned_starting_time=aligned_starting_time)
-        if "MinianSegmentation" in self.data_interface_classes:
-            minian_interface = self.data_interface_classes["MinianSegmentation"]
-            minian_interface.set_aligned_starting_time(aligned_starting_time=aligned_starting_time)
-"""

From a41037bc780787983c1831730d5b3254d71204e0 Mon Sep 17 00:00:00 2001
From: alessandratrapani <alessandramaria.trapani@gmail.com>
Date: Mon, 25 Nov 2024 15:11:48 +0100
Subject: [PATCH 2/5] remove unnecessary imports

---
 src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py
index 7b68517..90c5712 100644
--- a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py
+++ b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py
@@ -1,10 +1,7 @@
 """Primary NWBConverter class for this dataset."""
 
 from neuroconv import NWBConverter
-from pynwb import NWBFile
 from neuroconv.datainterfaces import VideoInterface
-from typing import Optional
-from pathlib import Path
 
 from interfaces import (
     MinianSegmentationInterface,

From 568951d77c13bf4a551e36640d9c50f7315f3b0c Mon Sep 17 00:00:00 2001
From: alessandratrapani <alessandramaria.trapani@gmail.com>
Date: Tue, 26 Nov 2024 10:45:40 +0100
Subject: [PATCH 3/5] replace os with pathlib

---
 .../zaki_2024/zaki_2024_convert_week_session.py  | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py
index f3f1575..3be1bee 100644
--- a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py
+++ b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py
@@ -4,7 +4,6 @@
 from natsort import natsorted
 from pathlib import Path
 from typing import Union
-import os
 from neuroconv.utils import load_dict_from_file, dict_deep_update
 
 from zaki_2024_nwbconverter import Zaki2024NWBConverter
@@ -48,15 +47,16 @@ def session_to_nwb(
     conversion_options.update(dict(MultiEDFSignals=dict(stub_test=stub_test)))
 
     # Add Cross session cell registration
-    main_folder = data_dir_path / f"/Ca_EEG_Calcium/{subject_id}/SpatialFootprints"
+    main_folder = data_dir_path / f"Ca_EEG_Calcium/{subject_id}/SpatialFootprints"
     file_paths = []
-    for folder in os.listdir(main_folder):
-        folder_path = os.path.join(main_folder, folder)
-        if os.path.isdir(folder_path):  # Ensure it's a directory
-            filename = folder.split("_")[0] + f"_{subject_id}_" + folder.split("_")[-1]
-            csv_file = os.path.join(folder_path, f"{filename}.csv")
-            if os.path.isfile(csv_file):  # Check if the file exists
+
+    for folder in main_folder.iterdir():
+        if folder.is_dir():  # Ensure it's a directory
+            filename = folder.name.split("_")[0] + f"_{subject_id}_" + folder.name.split("_")[-1]
+            csv_file = folder / f"{filename}.csv"
+            if csv_file.is_file():  # Check if the file exists
                 file_paths.append(csv_file)
+
     source_data.update(dict(CellRegistration=dict(file_paths=file_paths)))
     conversion_options.update(dict(CellRegistration=dict(stub_test=stub_test, subject_id=subject_id)))
 

From 6176097ebebe2e6892ef345fc91958c79615575e Mon Sep 17 00:00:00 2001
From: alessandratrapani <alessandramaria.trapani@gmail.com>
Date: Tue, 26 Nov 2024 11:13:54 +0100
Subject: [PATCH 4/5] add better description for module and tables

---
 .../zaki_2024_cell_registration_interface.py    | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/cai_lab_to_nwb/zaki_2024/interfaces/zaki_2024_cell_registration_interface.py b/src/cai_lab_to_nwb/zaki_2024/interfaces/zaki_2024_cell_registration_interface.py
index 0c91747..f018ed1 100644
--- a/src/cai_lab_to_nwb/zaki_2024/interfaces/zaki_2024_cell_registration_interface.py
+++ b/src/cai_lab_to_nwb/zaki_2024/interfaces/zaki_2024_cell_registration_interface.py
@@ -35,7 +35,16 @@ def add_to_nwbfile(
         metadata: Optional[dict] = None,
     ):
         processing_module = nwbfile.create_processing_module(
-            name="cell_registration", description="Processing module for cross session cell registration."
+            name="cell_registration",
+            description="Processing module for cross session cell registration. "
+            "Cells recorded across sessions were cross-registered using a previously published open-source "
+            "cross-registration algorithm, CellReg, using the spatial correlations of nearby cells to "
+            "determine whether highly correlated footprints close in space are likely to be the same cell across sessions."
+            "Each offline recording was cross-registered with all the encoding and recall sessions, "
+            "but not with the other offline sessions because cross-registering between all sessions would lead to too many conflicts and, "
+            "therefore, to no cells cross-registered across all sessions."
+            "Each table represents the output of the cross-registration between one offline sessions and all the encoding and recall sessions. "
+            "A table maps the global ROI ids (row of the table) to the ROI ids in each of cross-registered session's plane segmentation.",
         )
 
         for file_path in self.file_paths:
@@ -46,7 +55,7 @@ def add_to_nwbfile(
             columns = [
                 VectorData(
                     name=col,
-                    description=f"ROI indexes from segmentation of session {col} imaging data",
+                    description=f"ROI indexes of plane segmentation of session {col}",
                     data=data[col].tolist()[:100] if stub_test else data[col].tolist(),
                 )
                 for col in data.columns
@@ -54,7 +63,9 @@ def add_to_nwbfile(
 
             dynamic_table = DynamicTable(
                 name=name,
-                description=f"Table storing data from cross sessions cell registration: all conditioning sessions are registered with respect to {offline_session_name} ",
+                description="Table maps the global ROI ids (row of the table) to the ROI ids in each of cross-registered session's plane segmentation."
+                "The column names refer to the cross-registered session's ids"
+                "The values -9999 indicates no correspondence. ",
                 columns=columns,
             )
 

From c8b037dccfb1612622d9e5488c2e9c6d59758a8e Mon Sep 17 00:00:00 2001
From: alessandratrapani <alessandramaria.trapani@gmail.com>
Date: Thu, 28 Nov 2024 14:50:36 +0100
Subject: [PATCH 5/5] add checks for filepaths

---
 .../zaki_2024/zaki_2024_convert_week_session.py | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py
index 3be1bee..77a4f22 100644
--- a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py
+++ b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py
@@ -3,7 +3,9 @@
 import time
 from natsort import natsorted
 from pathlib import Path
+import warnings
 from typing import Union
+import re
 from neuroconv.utils import load_dict_from_file, dict_deep_update
 
 from zaki_2024_nwbconverter import Zaki2024NWBConverter
@@ -48,14 +50,17 @@ def session_to_nwb(
 
     # Add Cross session cell registration
     main_folder = data_dir_path / f"Ca_EEG_Calcium/{subject_id}/SpatialFootprints"
-    file_paths = []
+    pattern = re.compile(r"^CellRegResults_OfflineDay(\d+)Session(\d+)$")
 
+    file_paths = []
     for folder in main_folder.iterdir():
-        if folder.is_dir():  # Ensure it's a directory
-            filename = folder.name.split("_")[0] + f"_{subject_id}_" + folder.name.split("_")[-1]
-            csv_file = folder / f"{filename}.csv"
-            if csv_file.is_file():  # Check if the file exists
-                file_paths.append(csv_file)
+        match = pattern.match(folder.name)
+        if folder.is_dir() and match:
+            offline_day, session_number = match.groups()
+            filename = f"CellRegResults_{subject_id}_OfflineDay{offline_day}Session{session_number}.csv"
+            csv_file = folder / filename
+            assert csv_file.is_file(), f"Expected file not found: {csv_file}"
+            file_paths.append(csv_file)
 
     source_data.update(dict(CellRegistration=dict(file_paths=file_paths)))
     conversion_options.update(dict(CellRegistration=dict(stub_test=stub_test, subject_id=subject_id)))