Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Time alignment #23

Merged
merged 12 commits into from
Dec 10, 2024
58 changes: 40 additions & 18 deletions src/cai_lab_to_nwb/zaki_2024/interfaces/eztrack_interface.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,16 @@
"""Primary class for converting experiment-specific behavior."""

import numpy as np
import pandas as pd

from pynwb import TimeSeries
from pynwb.epoch import TimeIntervals
from pynwb.file import NWBFile

from neuroconv.basedatainterface import BaseDataInterface
from neuroconv.utils import DeepDict
from pydantic import FilePath
from typing import Optional
from typing import Optional, List


class EzTrackFreezingBehaviorInterface(BaseDataInterface):
Expand All @@ -22,29 +24,62 @@ def __init__(self, file_path: FilePath, video_sampling_frequency: float, verbose
self.file_path = file_path
self.verbose = verbose
self.video_sampling_frequency = video_sampling_frequency
self._start_times = None
self._stop_times = None
self._starting_time = None

def get_metadata(self) -> DeepDict:
# Automatically retrieve as much metadata as possible from the source files available
metadata = super().get_metadata()

return metadata

def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict] = None):
def get_interval_times(self):
# Extract start and stop times of the freezing events
# From the discussion wih the author, the freezing events are the frames where the freezing behavior is 100
freezing_behavior_df = pd.read_csv(self.file_path)
freezing_values = freezing_behavior_df["Freezing"].values
changes_in_freezing = np.diff(freezing_values)
freezing_start = np.where(changes_in_freezing == 100)[0] + 1
freezing_stop = np.where(changes_in_freezing == -100)[0] + 1

import pandas as pd
start_frames = freezing_behavior_df["Frame"].values[freezing_start]
stop_frames = freezing_behavior_df["Frame"].values[freezing_stop]

start_times = (
self._start_times if self._start_times is not None else start_frames / self.video_sampling_frequency
)
stop_times = self._stop_times if self._stop_times is not None else stop_frames / self.video_sampling_frequency
return start_times, stop_times

def get_starting_time(self) -> float:
freezing_behavior_df = pd.read_csv(self.file_path)
return freezing_behavior_df["Frame"].values[0] / self.video_sampling_frequency

def set_aligned_interval_times(self, start_times: List[float], stop_times: List[float]) -> None:
self._start_times = start_times
self._stop_times = stop_times

def set_aligned_starting_time(self, aligned_start_time) -> None:
self._starting_time = aligned_start_time

def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict] = None, stub_test: bool = False):

freezing_behavior_df = pd.read_csv(self.file_path)

start_times, stop_times = self.get_interval_times()

# Extract motion data
motion_data = freezing_behavior_df["Motion"].values
starting_time = self._starting_time if self._starting_time is not None else self.get_starting_time()

motion_series = TimeSeries(
name="MotionSeries",
description="ezTrack measures the motion of the animal by assessing the number of pixels of the behavioral "
"video whose grayscale change exceeds a particular threshold from one frame to the next.",
data=motion_data,
data=motion_data[:100] if stub_test else motion_data,
unit="n.a",
starting_time=freezing_behavior_df["Frame"][0] / self.video_sampling_frequency,
starting_time=starting_time,
rate=self.video_sampling_frequency,
)

Expand All @@ -54,19 +89,6 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict] = None):
freeze_threshold = freezing_behavior_df["FreezeThresh"].unique()[0]
min_freeze_duration = freezing_behavior_df["MinFreezeDuration"].unique()[0]

# Extract start and stop times of the freezing events
# From the discussion wih the author, the freezing events are the frames where the freezing behavior is 100
freezing_values = freezing_behavior_df["Freezing"].values
changes_in_freezing = np.diff(freezing_values)
freezing_start = np.where(changes_in_freezing == 100)[0] + 1
freezing_stop = np.where(changes_in_freezing == -100)[0] + 1

start_frames = freezing_behavior_df["Frame"].values[freezing_start]
stop_frames = freezing_behavior_df["Frame"].values[freezing_stop]

start_times = start_frames / self.video_sampling_frequency
stop_times = stop_frames / self.video_sampling_frequency

description = f"""
Freezing behavior intervals generated using EzTrack software for file {file}.
Parameters used include a motion cutoff of {motion_cutoff}, freeze threshold of {freeze_threshold},
Expand Down
30 changes: 24 additions & 6 deletions src/cai_lab_to_nwb/zaki_2024/interfaces/minian_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def get_accepted_list(self) -> list:
accepted_list: list
List of accepted ROI ids.
"""
return list(range(self.get_num_rois()))
return self.get_roi_ids()

def get_rejected_list(self) -> list:
"""Get a list of rejected ROI ids.
Expand All @@ -166,10 +166,6 @@ def get_rejected_list(self) -> list:
"""
return list()

def get_roi_ids(self) -> list:
dataset = self._read_zarr_group("/A.zarr")
return list(dataset["unit_id"])

def get_traces_dict(self) -> dict:
"""Get traces as a dictionary with key as the name of the ROiResponseSeries.

Expand Down Expand Up @@ -312,6 +308,28 @@ def get_dtype(self) -> DtypeType:
def get_channel_names(self) -> list[str]:
return ["OpticalChannel"]

def get_original_timestamps(self, stub_test: bool = False) -> list[np.ndarray]:
"""
Retrieve the original unaltered timestamps for the data in this interface.

This function should retrieve the data on-demand by re-initializing the IO.

Returns
-------
timestamps : numpy.ndarray
The timestamps for the data stream.
stub_test : bool, default: False
This method scans through each video; a process which can take some time to complete.

To limit that scan to a small number of frames, set `stub_test=True`.
"""
max_frames = 100 if stub_test else None
with self._video_capture(file_path=str(self.file_path)) as video:
# fps = video.get_video_fps() # There is some debate about whether the OpenCV timestamp
# method is simply returning range(length) / fps 100% of the time for any given format
timestamps = video.get_video_timestamps(max_frames=max_frames)
return timestamps

def get_video(
self, start_frame: Optional[int] = None, end_frame: Optional[int] = None, channel: int = 0
) -> np.ndarray:
Expand Down Expand Up @@ -440,7 +458,7 @@ def add_to_nwbfile(
xy_translation=xy_translation,
)

ophys = get_module(nwbfile, "ophys")
ophys = get_module(nwbfile, name="ophys", description="Data processed with MiniAn software")
if "MotionCorrection" not in ophys.data_interfaces:
motion_correction = MotionCorrection(name="MotionCorrection")
ophys.add(motion_correction)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -370,11 +370,6 @@ def get_original_timestamps(self) -> np.ndarray:
timestamps_file_path = self.miniscope_folder / "timeStamps.csv"
assert timestamps_file_path.exists(), f"Miniscope timestamps file not found in {self.miniscope_folder}"
timestamps_seconds = get_miniscope_timestamps(file_path=timestamps_file_path)
# Shift when the first timestamp is negative
# TODO: Figure why, I copied from Miniscope. Need to shift also session_start_time
if timestamps_seconds[0] < 0.0:
timestamps_seconds += abs(timestamps_seconds[0])

return timestamps_seconds

def add_to_nwbfile(
Expand All @@ -391,7 +386,7 @@ def add_to_nwbfile(

from neuroconv.tools.roiextractors import add_photon_series_to_nwbfile

miniscope_timestamps = self.get_original_timestamps()
miniscope_timestamps = self.get_timestamps()
imaging_extractor = self.imaging_extractor

if stub_test:
Expand Down
17 changes: 11 additions & 6 deletions src/cai_lab_to_nwb/zaki_2024/interfaces/zaki_2024_edf_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from pynwb.device import Device

from neuroconv.basedatainterface import BaseDataInterface
from neuroconv.utils import DeepDict

from mne.io import read_raw_edf
from datetime import datetime, timedelta
Expand All @@ -19,9 +18,13 @@ def __init__(
verbose: bool = False,
):
self.file_path = Path(file_path)
self._starting_time = None
self.verbose = verbose
super().__init__(file_path=file_path)

def set_aligned_starting_time(self, aligned_starting_time: float):
self._starting_time = aligned_starting_time

def add_to_nwbfile(
self,
nwbfile: NWBFile,
Expand Down Expand Up @@ -49,8 +52,6 @@ def add_to_nwbfile(
stop_datetime_timestamp : datetime, optional
The ending timestamp for slicing the data. If specified, data will be included
only up to this time. Default is None, which includes data until the end.
**conversion_options
Additional options for data conversion (not currently used directly in this function).

Returns
-------
Expand All @@ -73,8 +74,6 @@ def add_to_nwbfile(
"description": "EMG signal recorder with HD-X02 wireless telemetry probe",
"unit": "volts",
},
# TODO: Figure out if the units of activity are correct, the raw format marks Volts
# TODO: My own reading of the header indicates that the physical units is counts
"Activity": {
"name": "ActivitySignal",
"description": "Activity signal recorder with HD-X02 wireless telemetry probe. It refers to the motion of the probe relative to the receiver and it can be used as a proxy for locomotion.",
Expand Down Expand Up @@ -107,6 +106,8 @@ def add_to_nwbfile(
else:
data = data[:, start_idx:end_idx]

starting_time = self._starting_time if self._starting_time is not None else starting_time

for channel_index, channel_name in enumerate(channels_dict.keys()):
time_series_kwargs = channels_dict[channel_name].copy()
time_series_kwargs.update(
Expand All @@ -132,8 +133,12 @@ def __init__(
):
self.file_paths = file_paths
self.verbose = verbose
self._starting_time = 0.0
super().__init__(file_paths=file_paths)

def set_aligned_starting_time(self, aligned_starting_time: float):
self._starting_time = aligned_starting_time

def add_to_nwbfile(
self,
nwbfile: NWBFile,
Expand Down Expand Up @@ -205,7 +210,7 @@ def add_to_nwbfile(
for channel_index, channel_name in enumerate(channels_dict.keys()):
time_series_kwargs = channels_dict[channel_name].copy()
time_series_kwargs.update(
data=concatenated_data[channel_index], starting_time=0.0, rate=edf_reader.info["sfreq"]
data=concatenated_data[channel_index], starting_time=self._starting_time, rate=edf_reader.info["sfreq"]
)
time_series = TimeSeries(**time_series_kwargs)
nwbfile.add_acquisition(time_series)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,59 +1,69 @@
"""Primary class for converting experiment-specific behavior."""

from pynwb.file import NWBFile
import pandas as pd
from pydantic import FilePath
from typing import Optional, List

from pynwb.file import NWBFile
from pynwb.epoch import TimeIntervals
from neuroconv.basedatainterface import BaseDataInterface
from neuroconv.utils import DeepDict
from pydantic import FilePath
from typing import Optional
from pynwb.epoch import TimeIntervals


class Zaki2024SleepClassificationInterface(BaseDataInterface):
"""Adds intervals of sleeping behavior."""

keywords = ["behavior", "sleep stages"]

def __init__(self, file_path: FilePath, video_sampling_frequency: float, verbose: bool = False):
def __init__(self, file_path: FilePath, sampling_frequency: float, verbose: bool = False):
# This should load the data lazily and prepare variables you need

self.file_path = file_path
self.verbose = verbose
self.video_sampling_frequency = video_sampling_frequency
self.sampling_frequency = sampling_frequency
self._start_times = None
self._stop_times = None

def get_metadata(self) -> DeepDict:
# Automatically retrieve as much metadata as possible from the source files available
metadata = super().get_metadata()

return metadata

def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict] = None):

import pandas as pd

def get_sleep_states_times(self):
sleep_behavior_df = pd.read_csv(self.file_path)

import numpy as np

# Note this will have the first row as None
shifted_sleep_state = sleep_behavior_df['SleepState'].shift()
start_indices = np.where(sleep_behavior_df['SleepState'] != shifted_sleep_state)[0]
shifted_sleep_state = sleep_behavior_df["SleepState"].shift()
start_indices = np.where(sleep_behavior_df["SleepState"] != shifted_sleep_state)[0]
stop_indices = [i - 1 for i in start_indices[1:]]
stop_indices.append(len(sleep_behavior_df) - 1)
stop_indices = np.array(stop_indices)

start_frames = sleep_behavior_df['Frame'][start_indices].values
start_times = start_frames / self.video_sampling_frequency
stop_frames = sleep_behavior_df['Frame'][stop_indices].values
stop_times = stop_frames / self.video_sampling_frequency
sleep_state = sleep_behavior_df['SleepState'][start_indices].values
start_frames = sleep_behavior_df["Frame"][start_indices].values
start_times = self._start_times if self._start_times is not None else start_frames / self.sampling_frequency
stop_frames = sleep_behavior_df["Frame"][stop_indices].values
stop_times = self._stop_times if self._stop_times is not None else stop_frames / self.sampling_frequency

sleep_state = sleep_behavior_df["SleepState"][start_indices].values

return start_times, stop_times, sleep_state

def set_aligned_interval_times(self, start_times: List[float], stop_times: List[float]) -> None:
self._start_times = start_times
self._stop_times = stop_times

def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict] = None):

start_times, stop_times, sleep_state = self.get_sleep_states_times()

description = (
"Sleep states classified with custom algorithm using the data "
"from the HD-X02 sensor (EEG, EMG, temperature, etc.)."
)
)

sleep_intervals = TimeIntervals(name="SleepIntervals", description=description)
column_description = """
Sleep State Classification, it can be one of the following:
Expand All @@ -63,14 +73,13 @@ def add_to_nwbfile(self, nwbfile: NWBFile, metadata: Optional[dict] = None):
- 'wake': State of full consciousness when the animal is alert, responsive to the environment, and capable of voluntary movement
"""
sleep_intervals.add_column(name="sleep_state", description=column_description)

for start_time, stop_time, state in zip(start_times, stop_times, sleep_state):
sleep_intervals.add_interval(start_time=start_time, stop_time=stop_time, sleep_state=state)

if "sleep" not in nwbfile.processing:
sleep_module = nwbfile.create_processing_module(name="sleep", description="Sleep data")
else:
sleep_module = nwbfile.processing["sleep"]

sleep_module.add(sleep_intervals)

sleep_module.add(sleep_intervals)
Original file line number Diff line number Diff line change
Expand Up @@ -122,8 +122,8 @@ def get_session_to_nwb_kwargs_per_session(
data_dir_path = Path("D:/")
output_dir_path = Path("D:/cai_lab_conversion_nwb/")
max_workers = 1
verbose = True
stub_test = True
verbose = False
stub_test = False
dataset_to_nwb(
data_dir_path=data_dir_path,
output_dir_path=output_dir_path,
Expand Down
Loading