Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add cell registration #19

Merged
merged 6 commits into from
Nov 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/cai_lab_to_nwb/zaki_2024/interfaces/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,3 +4,4 @@
from .zaki_2024_sleep_classification_interface import Zaki2024SleepClassificationInterface
from .miniscope_imaging_interface import MiniscopeImagingInterface
from .zaki_2024_shock_stimuli_interface import Zaki2024ShockStimuliInterface
from .zaki_2024_cell_registration_interface import Zaki2024CellRegistrationInterface
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
"""Primary class for converting experiment-specific cell registration output."""

from neuroconv.basedatainterface import BaseDataInterface
from neuroconv.utils import DeepDict
from typing import Optional
from pathlib import Path
from pynwb import NWBFile
from hdmf.common.table import DynamicTable, VectorData

import pandas as pd


class Zaki2024CellRegistrationInterface(BaseDataInterface):
"""Adds a table to store the output of CellReg."""

keywords = ["cross sessions cell registration"]

def __init__(self, file_paths: list[Path], verbose: bool = False):

self.verbose = verbose
self.file_paths = file_paths
super().__init__(file_paths=file_paths)

def get_metadata(self) -> DeepDict:
# Automatically retrieve as much metadata as possible from the source files available
metadata = super().get_metadata()

return metadata

def add_to_nwbfile(
self,
nwbfile: NWBFile,
subject_id: str,
stub_test: bool = False,
metadata: Optional[dict] = None,
):
processing_module = nwbfile.create_processing_module(
name="cell_registration",
description="Processing module for cross session cell registration. "
"Cells recorded across sessions were cross-registered using a previously published open-source "
"cross-registration algorithm, CellReg, using the spatial correlations of nearby cells to "
"determine whether highly correlated footprints close in space are likely to be the same cell across sessions."
"Each offline recording was cross-registered with all the encoding and recall sessions, "
"but not with the other offline sessions because cross-registering between all sessions would lead to too many conflicts and, "
"therefore, to no cells cross-registered across all sessions."
"Each table represents the output of the cross-registration between one offline sessions and all the encoding and recall sessions. "
"A table maps the global ROI ids (row of the table) to the ROI ids in each of cross-registered session's plane segmentation.",
)

for file_path in self.file_paths:
offline_session_name = Path(file_path).stem.split(f"{subject_id}_")[-1]
name = offline_session_name + "vsConditioningSessions"
data = pd.read_csv(file_path)

columns = [
VectorData(
name=col,
description=f"ROI indexes of plane segmentation of session {col}",
data=data[col].tolist()[:100] if stub_test else data[col].tolist(),
)
for col in data.columns
]

dynamic_table = DynamicTable(
name=name,
description="Table maps the global ROI ids (row of the table) to the ROI ids in each of cross-registered session's plane segmentation."
"The column names refer to the cross-registered session's ids"
"The values -9999 indicates no correspondence. ",
columns=columns,
)

processing_module.add(dynamic_table)
44 changes: 0 additions & 44 deletions src/cai_lab_to_nwb/zaki_2024/utils/cell_registration.py

This file was deleted.

22 changes: 19 additions & 3 deletions src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_week_session.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,9 @@
import time
from natsort import natsorted
from pathlib import Path
import warnings
from typing import Union
from datetime import datetime, timedelta
import pandas as pd
import json
import re
from neuroconv.utils import load_dict_from_file, dict_deep_update

from zaki_2024_nwbconverter import Zaki2024NWBConverter
Expand Down Expand Up @@ -49,6 +48,23 @@ def session_to_nwb(
)
conversion_options.update(dict(MultiEDFSignals=dict(stub_test=stub_test)))

# Add Cross session cell registration
main_folder = data_dir_path / f"Ca_EEG_Calcium/{subject_id}/SpatialFootprints"
pattern = re.compile(r"^CellRegResults_OfflineDay(\d+)Session(\d+)$")

file_paths = []
for folder in main_folder.iterdir():
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What do you think about throwing errors when the expected folder_paths and the file_paths are not found? right now they are skipped silently but if the point of this interface is that those things should be added maybe it is better to stop the process at the tracks and throw an informative error so the user is aware of this.

I am not sure, maybe you have a good reason to skip instead of stopping.

Copy link
Collaborator Author

@alessandratrapani alessandratrapani Nov 28, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

right! I added few lines to throw an error only when there is no file in the specific folder (matching the pattern) as expected. The folder.is_dir() "check" is just to select the folder paths among all the type Paths objects iterdir might iterate over (both folders and files), thus no need to throw an error if that object is not a directory.

match = pattern.match(folder.name)
if folder.is_dir() and match:
offline_day, session_number = match.groups()
filename = f"CellRegResults_{subject_id}_OfflineDay{offline_day}Session{session_number}.csv"
csv_file = folder / filename
assert csv_file.is_file(), f"Expected file not found: {csv_file}"
file_paths.append(csv_file)

source_data.update(dict(CellRegistration=dict(file_paths=file_paths)))
conversion_options.update(dict(CellRegistration=dict(stub_test=stub_test, subject_id=subject_id)))

converter = Zaki2024NWBConverter(source_data=source_data)

# Add datetime to conversion
Expand Down
43 changes: 2 additions & 41 deletions src/cai_lab_to_nwb/zaki_2024/zaki_2024_nwbconverter.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,7 @@
"""Primary NWBConverter class for this dataset."""

from copy import deepcopy

from neuroconv import NWBConverter
from neuroconv.utils import DeepDict
from neuroconv.datainterfaces import VideoInterface
from typing import Dict

from interfaces import (
MinianSegmentationInterface,
Expand All @@ -16,6 +12,7 @@
MiniscopeImagingInterface,
MinianMotionCorrectionInterface,
Zaki2024ShockStimuliInterface,
Zaki2024CellRegistrationInterface,
)


Expand All @@ -32,41 +29,5 @@ class Zaki2024NWBConverter(NWBConverter):
FreezingBehavior=EzTrackFreezingBehaviorInterface,
Video=VideoInterface,
ShockStimuli=Zaki2024ShockStimuliInterface,
CellRegistration=Zaki2024CellRegistrationInterface,
)


"""
# TODO decide which datastream set the session start time
def get_metadata(self) -> DeepDict:
if "" not in self.data_interface_objects:
return super().get_metadata()

# Explicitly set session_start_time to ... start time
metadata = super().get_metadata()
session_start_time = self.data_interface_objects[""]
metadata["NWBFile"]["session_start_time"] = session_start_time

return metadata

# TODO Add cell global_ids
def add_to_nwbfile(self, nwbfile: NWBFile, metadata, conversion_options: Optional[dict] = None) -> None:
super().add_to_nwbfile(nwbfile=nwbfile, metadata=metadata, conversion_options=conversion_options)

if "MinianSegmentation" in self.data_interface_objects:
global_roi_ids = get_global_ids_from_csv()
add_cell_registration(
nwbfile=nwbfile,
global_roi_ids=global_roi_ids,
plane_segmentation_name="PlaneSegmentation",
)

# TODO discuss time alignment with author
def temporally_align_data_interfaces(self):
aligned_starting_time = 0
if "MiniscopeImaging" in self.data_interface_classes:
miniscope_interface = self.data_interface_classes["MiniscopeImaging"]
miniscope_interface.set_aligned_starting_time(aligned_starting_time=aligned_starting_time)
if "MinianSegmentation" in self.data_interface_classes:
minian_interface = self.data_interface_classes["MinianSegmentation"]
minian_interface.set_aligned_starting_time(aligned_starting_time=aligned_starting_time)
"""