Merge pull request #23 from catalystneuro/time_alignment

Time alignment
catalystneuro · Dec 10, 2024 · 120a764 · 120a764
2 parents e84014c + bc65d77
commit 120a764
Show file tree

Hide file tree

Showing 8 changed files with 197 additions and 78 deletions.
diff --git a/src/cai_lab_to_nwb/zaki_2024/interfaces/eztrack_interface.py b/src/cai_lab_to_nwb/zaki_2024/interfaces/eztrack_interface.py
@@ -1,14 +1,16 @@
 """Primary class for converting experiment-specific behavior."""
 
 import numpy as np
+import pandas as pd
+
 from pynwb import TimeSeries
 from pynwb.epoch import TimeIntervals
 from pynwb.file import NWBFile
 
 from neuroconv.basedatainterface import BaseDataInterface
 from neuroconv.utils import DeepDict
 from pydantic import FilePath
-from typing import Optional
+from typing import Optional, List
 
 
 class EzTrackFreezingBehaviorInterface(BaseDataInterface):
@@ -22,29 +24,62 @@ def __init__(self, file_path: FilePath, video_sampling_frequency: float, verbose
         self.file_path = file_path
         self.verbose = verbose
         self.video_sampling_frequency = video_sampling_frequency
+        self._start_times = None
+        self._stop_times = None
+        self._starting_time = None
 
     def get_metadata(self) -> DeepDict:
         # Automatically retrieve as much metadata as possible from the source files available
         metadata = super().get_metadata()
 
         return metadata
 
-    def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict] = None):
+    def get_interval_times(self):
+        # Extract start and stop times of the freezing events
+        # From the discussion wih the author, the freezing events are the frames where the freezing behavior is 100
+        freezing_behavior_df = pd.read_csv(self.file_path)
+        freezing_values = freezing_behavior_df["Freezing"].values
+        changes_in_freezing = np.diff(freezing_values)
+        freezing_start = np.where(changes_in_freezing == 100)[0] + 1
+        freezing_stop = np.where(changes_in_freezing == -100)[0] + 1
 
-        import pandas as pd
+        start_frames = freezing_behavior_df["Frame"].values[freezing_start]
+        stop_frames = freezing_behavior_df["Frame"].values[freezing_stop]
+
+        start_times = (
+            self._start_times if self._start_times is not None else start_frames / self.video_sampling_frequency
+        )
+        stop_times = self._stop_times if self._stop_times is not None else stop_frames / self.video_sampling_frequency
+        return start_times, stop_times
 
+    def get_starting_time(self) -> float:
         freezing_behavior_df = pd.read_csv(self.file_path)
+        return freezing_behavior_df["Frame"].values[0] / self.video_sampling_frequency
+
+    def set_aligned_interval_times(self, start_times: List[float], stop_times: List[float]) -> None:
+        self._start_times = start_times
+        self._stop_times = stop_times
+
+    def set_aligned_starting_time(self, aligned_start_time) -> None:
+        self._starting_time = aligned_start_time
+
+    def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict] = None, stub_test: bool = False):
+
+        freezing_behavior_df = pd.read_csv(self.file_path)
+
+        start_times, stop_times = self.get_interval_times()
 
         # Extract motion data
         motion_data = freezing_behavior_df["Motion"].values
+        starting_time = self._starting_time if self._starting_time is not None else self.get_starting_time()
 
         motion_series = TimeSeries(
             name="MotionSeries",
             description="ezTrack measures the motion of the animal by assessing the number of pixels of the behavioral "
             "video whose grayscale change exceeds a particular threshold from one frame to the next.",
-            data=motion_data,
+            data=motion_data[:100] if stub_test else motion_data,
             unit="n.a",
-            starting_time=freezing_behavior_df["Frame"][0] / self.video_sampling_frequency,
+            starting_time=starting_time,
             rate=self.video_sampling_frequency,
         )
 
@@ -54,19 +89,6 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict] = None):
         freeze_threshold = freezing_behavior_df["FreezeThresh"].unique()[0]
         min_freeze_duration = freezing_behavior_df["MinFreezeDuration"].unique()[0]
 
-        # Extract start and stop times of the freezing events
-        # From the discussion wih the author, the freezing events are the frames where the freezing behavior is 100
-        freezing_values = freezing_behavior_df["Freezing"].values
-        changes_in_freezing = np.diff(freezing_values)
-        freezing_start = np.where(changes_in_freezing == 100)[0] + 1
-        freezing_stop = np.where(changes_in_freezing == -100)[0] + 1
-
-        start_frames = freezing_behavior_df["Frame"].values[freezing_start]
-        stop_frames = freezing_behavior_df["Frame"].values[freezing_stop]
-
-        start_times = start_frames / self.video_sampling_frequency
-        stop_times = stop_frames / self.video_sampling_frequency
-
         description = f"""
             Freezing behavior intervals generated using EzTrack software for file {file}. 
             Parameters used include a motion cutoff of {motion_cutoff}, freeze threshold of {freeze_threshold}, 

diff --git a/src/cai_lab_to_nwb/zaki_2024/interfaces/minian_interface.py b/src/cai_lab_to_nwb/zaki_2024/interfaces/minian_interface.py
@@ -154,7 +154,7 @@ def get_accepted_list(self) -> list:
         accepted_list: list
             List of accepted ROI ids.
         """
-        return list(range(self.get_num_rois()))
+        return self.get_roi_ids()
 
     def get_rejected_list(self) -> list:
         """Get a list of rejected ROI ids.
@@ -166,10 +166,6 @@ def get_rejected_list(self) -> list:
         """
         return list()
 
-    def get_roi_ids(self) -> list:
-        dataset = self._read_zarr_group("/A.zarr")
-        return list(dataset["unit_id"])
-
     def get_traces_dict(self) -> dict:
         """Get traces as a dictionary with key as the name of the ROiResponseSeries.
 
@@ -312,6 +308,28 @@ def get_dtype(self) -> DtypeType:
     def get_channel_names(self) -> list[str]:
         return ["OpticalChannel"]
 
+    def get_original_timestamps(self, stub_test: bool = False) -> list[np.ndarray]:
+        """
+        Retrieve the original unaltered timestamps for the data in this interface.
+
+        This function should retrieve the data on-demand by re-initializing the IO.
+
+        Returns
+        -------
+        timestamps : numpy.ndarray
+            The timestamps for the data stream.
+        stub_test : bool, default: False
+            This method scans through each video; a process which can take some time to complete.
+
+            To limit that scan to a small number of frames, set `stub_test=True`.
+        """
+        max_frames = 100 if stub_test else None
+        with self._video_capture(file_path=str(self.file_path)) as video:
+            # fps = video.get_video_fps()  # There is some debate about whether the OpenCV timestamp
+            # method is simply returning range(length) / fps 100% of the time for any given format
+            timestamps = video.get_video_timestamps(max_frames=max_frames)
+        return timestamps
+
     def get_video(
         self, start_frame: Optional[int] = None, end_frame: Optional[int] = None, channel: int = 0
     ) -> np.ndarray:
@@ -440,7 +458,7 @@ def add_to_nwbfile(
             xy_translation=xy_translation,
         )
 
-        ophys = get_module(nwbfile, "ophys")
+        ophys = get_module(nwbfile, name="ophys", description="Data processed with MiniAn software")
         if "MotionCorrection" not in ophys.data_interfaces:
             motion_correction = MotionCorrection(name="MotionCorrection")
             ophys.add(motion_correction)

diff --git a/src/cai_lab_to_nwb/zaki_2024/interfaces/miniscope_imaging_interface.py b/src/cai_lab_to_nwb/zaki_2024/interfaces/miniscope_imaging_interface.py
@@ -370,11 +370,6 @@ def get_original_timestamps(self) -> np.ndarray:
         timestamps_file_path = self.miniscope_folder / "timeStamps.csv"
         assert timestamps_file_path.exists(), f"Miniscope timestamps file not found in {self.miniscope_folder}"
         timestamps_seconds = get_miniscope_timestamps(file_path=timestamps_file_path)
-        # Shift when the first timestamp is negative
-        # TODO: Figure why, I copied from Miniscope. Need to shift also session_start_time
-        if timestamps_seconds[0] < 0.0:
-            timestamps_seconds += abs(timestamps_seconds[0])
-
         return timestamps_seconds
 
     def add_to_nwbfile(
@@ -391,7 +386,7 @@ def add_to_nwbfile(
 
         from neuroconv.tools.roiextractors import add_photon_series_to_nwbfile
 
-        miniscope_timestamps = self.get_original_timestamps()
+        miniscope_timestamps = self.get_timestamps()
         imaging_extractor = self.imaging_extractor
 
         if stub_test:

diff --git a/src/cai_lab_to_nwb/zaki_2024/interfaces/zaki_2024_edf_interface.py b/src/cai_lab_to_nwb/zaki_2024/interfaces/zaki_2024_edf_interface.py
@@ -5,7 +5,6 @@
 from pynwb.device import Device
 
 from neuroconv.basedatainterface import BaseDataInterface
-from neuroconv.utils import DeepDict
 
 from mne.io import read_raw_edf
 from datetime import datetime, timedelta
@@ -19,9 +18,13 @@ def __init__(
         verbose: bool = False,
     ):
         self.file_path = Path(file_path)
+        self._starting_time = None
         self.verbose = verbose
         super().__init__(file_path=file_path)
 
+    def set_aligned_starting_time(self, aligned_starting_time: float):
+        self._starting_time = aligned_starting_time
+
     def add_to_nwbfile(
         self,
         nwbfile: NWBFile,
@@ -49,8 +52,6 @@ def add_to_nwbfile(
         stop_datetime_timestamp : datetime, optional
             The ending timestamp for slicing the data. If specified, data will be included
             only up to this time. Default is None, which includes data until the end.
-        **conversion_options
-            Additional options for data conversion (not currently used directly in this function).
 
         Returns
         -------
@@ -73,8 +74,6 @@ def add_to_nwbfile(
                 "description": "EMG signal recorder with HD-X02 wireless telemetry probe",
                 "unit": "volts",
             },
-            # TODO: Figure out if the units of activity are correct, the raw format marks Volts
-            # TODO: My own reading of the header indicates that the physical units is counts
             "Activity": {
                 "name": "ActivitySignal",
                 "description": "Activity signal recorder with HD-X02 wireless telemetry probe. It refers to the motion of the probe relative to the receiver and it can be used as a proxy for locomotion.",
@@ -107,6 +106,8 @@ def add_to_nwbfile(
         else:
             data = data[:, start_idx:end_idx]
 
+        starting_time = self._starting_time if self._starting_time is not None else starting_time
+
         for channel_index, channel_name in enumerate(channels_dict.keys()):
             time_series_kwargs = channels_dict[channel_name].copy()
             time_series_kwargs.update(
@@ -132,8 +133,12 @@ def __init__(
     ):
         self.file_paths = file_paths
         self.verbose = verbose
+        self._starting_time = 0.0
         super().__init__(file_paths=file_paths)
 
+    def set_aligned_starting_time(self, aligned_starting_time: float):
+        self._starting_time = aligned_starting_time
+
     def add_to_nwbfile(
         self,
         nwbfile: NWBFile,
@@ -205,7 +210,7 @@ def add_to_nwbfile(
         for channel_index, channel_name in enumerate(channels_dict.keys()):
             time_series_kwargs = channels_dict[channel_name].copy()
             time_series_kwargs.update(
-                data=concatenated_data[channel_index], starting_time=0.0, rate=edf_reader.info["sfreq"]
+                data=concatenated_data[channel_index], starting_time=self._starting_time, rate=edf_reader.info["sfreq"]
             )
             time_series = TimeSeries(**time_series_kwargs)
             nwbfile.add_acquisition(time_series)

diff --git a/src/cai_lab_to_nwb/zaki_2024/interfaces/zaki_2024_sleep_classification_interface.py b/src/cai_lab_to_nwb/zaki_2024/interfaces/zaki_2024_sleep_classification_interface.py
@@ -1,59 +1,69 @@
 """Primary class for converting experiment-specific behavior."""
 
-from pynwb.file import NWBFile
+import pandas as pd
+from pydantic import FilePath
+from typing import Optional, List
 
+from pynwb.file import NWBFile
+from pynwb.epoch import TimeIntervals
 from neuroconv.basedatainterface import BaseDataInterface
 from neuroconv.utils import DeepDict
-from pydantic import FilePath
-from typing import Optional
-from pynwb.epoch import TimeIntervals
 
 
 class Zaki2024SleepClassificationInterface(BaseDataInterface):
     """Adds intervals of sleeping behavior."""
 
     keywords = ["behavior", "sleep stages"]
 
-    def __init__(self, file_path: FilePath, video_sampling_frequency: float, verbose: bool = False):
+    def __init__(self, file_path: FilePath, sampling_frequency: float, verbose: bool = False):
         # This should load the data lazily and prepare variables you need
 
         self.file_path = file_path
         self.verbose = verbose
-        self.video_sampling_frequency = video_sampling_frequency
+        self.sampling_frequency = sampling_frequency
+        self._start_times = None
+        self._stop_times = None
 
     def get_metadata(self) -> DeepDict:
         # Automatically retrieve as much metadata as possible from the source files available
         metadata = super().get_metadata()
 
         return metadata
 
-    def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict] = None):
-
-        import pandas as pd
-
+    def get_sleep_states_times(self):
         sleep_behavior_df = pd.read_csv(self.file_path)
 
         import numpy as np
 
         # Note this will have the first row as None
-        shifted_sleep_state = sleep_behavior_df['SleepState'].shift()
-        start_indices = np.where(sleep_behavior_df['SleepState'] != shifted_sleep_state)[0]
+        shifted_sleep_state = sleep_behavior_df["SleepState"].shift()
+        start_indices = np.where(sleep_behavior_df["SleepState"] != shifted_sleep_state)[0]
         stop_indices = [i - 1 for i in start_indices[1:]]
         stop_indices.append(len(sleep_behavior_df) - 1)
         stop_indices = np.array(stop_indices)
 
-        start_frames = sleep_behavior_df['Frame'][start_indices].values
-        start_times = start_frames / self.video_sampling_frequency
-        stop_frames = sleep_behavior_df['Frame'][stop_indices].values
-        stop_times = stop_frames / self.video_sampling_frequency
-        sleep_state = sleep_behavior_df['SleepState'][start_indices].values
+        start_frames = sleep_behavior_df["Frame"][start_indices].values
+        start_times = self._start_times if self._start_times is not None else start_frames / self.sampling_frequency
+        stop_frames = sleep_behavior_df["Frame"][stop_indices].values
+        stop_times = self._stop_times if self._stop_times is not None else stop_frames / self.sampling_frequency
 
+        sleep_state = sleep_behavior_df["SleepState"][start_indices].values
+
+        return start_times, stop_times, sleep_state
+
+    def set_aligned_interval_times(self, start_times: List[float], stop_times: List[float]) -> None:
+        self._start_times = start_times
+        self._stop_times = stop_times
+
+    def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict] = None):
+
+        start_times, stop_times, sleep_state = self.get_sleep_states_times()
 
         description = (
             "Sleep states classified with custom algorithm using the data "
             "from the HD-X02 sensor (EEG, EMG, temperature, etc.)."
-        )        
-        
+        )
+
         sleep_intervals = TimeIntervals(name="SleepIntervals", description=description)
         column_description = """
             Sleep State Classification, it can be one of the following: 
@@ -63,14 +73,13 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict] = None):
             - 'wake': State of full consciousness when the animal is alert, responsive to the environment, and capable of voluntary movement
         """
         sleep_intervals.add_column(name="sleep_state", description=column_description)
-        
+
         for start_time, stop_time, state in zip(start_times, stop_times, sleep_state):
             sleep_intervals.add_interval(start_time=start_time, stop_time=stop_time, sleep_state=state)
 
         if "sleep" not in nwbfile.processing:
             sleep_module = nwbfile.create_processing_module(name="sleep", description="Sleep data")
         else:
             sleep_module = nwbfile.processing["sleep"]
-
-        sleep_module.add(sleep_intervals)
 
+        sleep_module.add(sleep_intervals)
diff --git a/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_all_sessions.py b/src/cai_lab_to_nwb/zaki_2024/zaki_2024_convert_all_sessions.py
@@ -122,8 +122,8 @@ def get_session_to_nwb_kwargs_per_session(
     data_dir_path = Path("D:/")
     output_dir_path = Path("D:/cai_lab_conversion_nwb/")
     max_workers = 1
-    verbose = True
-    stub_test = True
+    verbose = False
+    stub_test = False
     dataset_to_nwb(
         data_dir_path=data_dir_path,
         output_dir_path=output_dir_path,