Skip to content

Commit

Permalink
Merge pull request #2280 from AllenInstitute/ticket/2275
Browse files Browse the repository at this point in the history
Loosens/updates required versions for several dependencies
  • Loading branch information
aamster authored Dec 15, 2021
2 parents cb29a83 + 4d1b72c commit ef0cbfd
Show file tree
Hide file tree
Showing 42 changed files with 3,147 additions and 2,157 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ All notable changes to this project will be documented in this file.
## [2.13.2] = TBD
- Fixes bug that caused file paths on windows machines to be incorrect in Visual behavior user-facing classes
- Updates to support MESO.2
- Loosens/updates required versions for several dependencies

## [2.13.1] = 2021-10-04
- Fixes bug that was preventing the BehaviorSession from properly instantiating passive sessions.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ def add_experience_level_to_experiment_table(
session_123 = experiments_table.session_number.isin([1, 2, 3])
familiar_indices = experiments_table[session_123].index.values

experiments_table.at[familiar_indices, 'experience_level'] = 'Familiar'
experiments_table.loc[familiar_indices, 'experience_level'] = 'Familiar'

session_4 = (experiments_table.session_number == 4)
zero_prior_exp = (experiments_table.prior_exposures_to_image_set == 0)
Expand All @@ -43,17 +43,15 @@ def add_experience_level_to_experiment_table(
session_4
& zero_prior_exp].index.values

experiments_table.at[novel_indices,
'experience_level'] = 'Novel 1'
experiments_table.loc[novel_indices, 'experience_level'] = 'Novel 1'

session_456 = experiments_table.session_number.isin([4, 5, 6])
nonzero_prior_exp = (experiments_table.prior_exposures_to_image_set != 0)
novel_gt_1_indices = experiments_table[
session_456
& nonzero_prior_exp].index.values

experiments_table.at[novel_gt_1_indices,
'experience_level'] = 'Novel >1'
experiments_table.loc[novel_gt_1_indices, 'experience_level'] = 'Novel >1'

return experiments_table

Expand Down Expand Up @@ -88,7 +86,7 @@ def add_passive_flag_to_ophys_experiment_table(

session_25 = experiments_table.session_number.isin([2, 5])
passive_indices = experiments_table[session_25].index.values
experiments_table.at[passive_indices, 'passive'] = True
experiments_table.loc[passive_indices, 'passive'] = True

return experiments_table

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,6 @@ def load_data(filepath: Union[str, Path]) -> pd.DataFrame:
with h5py.File(filepath, 'r') as in_file:
traces = in_file['data'][()]
roi_id = in_file['roi_names'][()]
idx = pd.Index(roi_id, name='cell_roi_id', dtype=int)
idx = pd.Index(roi_id, name='cell_roi_id').astype('int64')
return pd.DataFrame({'corrected_fluorescence': list(traces)},
index=idx)
2 changes: 1 addition & 1 deletion allensdk/brain_observatory/behavior/data_files/dff_file.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,5 +61,5 @@ def load_data(filepath: Union[str, Path]) -> pd.DataFrame:
with h5py.File(filepath, 'r') as raw_file:
traces = np.asarray(raw_file['data'], dtype=np.float64)
roi_names = np.asarray(raw_file['roi_names'])
idx = pd.Index(roi_names, name='cell_roi_id', dtype=int)
idx = pd.Index(roi_names, name='cell_roi_id').astype('int64')
return pd.DataFrame({'dff': [x for x in traces]}, index=idx)
3 changes: 2 additions & 1 deletion allensdk/brain_observatory/comparison_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ def compare_fields(x1: Any, x2: Any, err_msg="",
if isinstance(x1, pd.DataFrame):
try:
assert_frame_equal(x1, x2, check_like=True)
except Exception:
except AssertionError as e:
print(e)
print(err_msg)
raise
elif isinstance(x1, np.ndarray):
Expand Down
7 changes: 3 additions & 4 deletions allensdk/brain_observatory/dff.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,13 +37,12 @@
import os
import argparse
import matplotlib.pyplot as plt
import warnings
import h5py
import numpy as np
from functools import partial
from scipy.ndimage.filters import median_filter

from allensdk.core.brain_observatory_nwb_data_set import BrainObservatoryNwbDataSet
from allensdk.core.brain_observatory_nwb_data_set import \
BrainObservatoryNwbDataSet

GAUSSIAN_MAD_STD_SCALE = 1.4826

Expand Down Expand Up @@ -408,7 +407,7 @@ def main():
args.input_h5).get_corrected_fluorescence_traces()
else:
input_h5 = h5py.File(args.input_h5, "r")
traces = input_h5["data"].value
traces = input_h5["data"][()]
input_h5.close()

dff = calculate_dff(traces, save_plot_dir=args.plot_dir)
Expand Down
336 changes: 210 additions & 126 deletions allensdk/brain_observatory/drifting_gratings.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,8 @@
import numpy as np
import h5py
import collections
import warnings

# from allensdk.brain_observatory.nwb.nwb_api import NwbApi
from .ecephys_session_api import EcephysSessionApi
from allensdk.brain_observatory.running_speed import RunningSpeed


class IDCreator(object):
Expand All @@ -34,19 +31,28 @@ def __contains__(self, key):
class EcephysNwb1Api(EcephysSessionApi):
"""An EcephySession adaptor for reading NWB1.0 files.
Was created by sight using an assortment of existing NWB1 files. It is possible that parts of the NWB1 standard (?!)
Was created by sight using an assortment of existing NWB1 files. It is
possible that parts of the NWB1 standard (?!)
is missing or not properly implemented.
NWB1 vs NWB2 issues:
* In NWB 1 there is no difference between global unit-ids and probe's local-index. A unit is unique to one channel
* Units are missing information about firing_rate, isi_violation, and quality.
- So that EcephysSession._build_units() actually return values I had to set quality=good for all units
* NWB Stimulus_presentations missing stimulus_block, stimulus_index and Image column
- To get EcephysSession.conditionwise_spikes() working had to make up a block number for every stimulus type
* NWB1 missing a 'valid_data' tag for channels. Had to set to True otherwise EcephysSession won't see any channels
* There were no 'channels' table/group in NWB1. Instead we had to iterate through all the units and pull out the
* In NWB 1 there is no difference between global unit-ids and probe's
local-index. A unit is unique to one channel
* Units are missing information about firing_rate, isi_violation,
and quality.
- So that EcephysSession._build_units() actually return values I had
to set quality=good for all units
* NWB Stimulus_presentations missing stimulus_block, stimulus_index and
Image column
- To get EcephysSession.conditionwise_spikes() working had to make up
a block number for every stimulus type
* NWB1 missing a 'valid_data' tag for channels. Had to set to True
otherwise EcephysSession won't see any channels
* There were no 'channels' table/group in NWB1. Instead we had to
iterate through all the units and pull out the
distinct channel info.
* In NWB2 each unit has a mean-waveform for every channel on the probe. In NWB1 A unit only has a single waveform
* In NWB2 each unit has a mean-waveform for every channel on the probe.
In NWB1 A unit only has a single waveform
* The NWB1 identifier is a string
"""

Expand All @@ -56,19 +62,22 @@ def __init__(self, path, *args, **kwargs):
try:
# check file is a valid NWB 1 file
version_str = self._h5_root['nwb_version'][()]
if not (version_str.startswith('NWB-1.') or version_str.startswith('1.')):
raise Exception('{} is not a valid NWB 1 file path'.format(self._path))
if not (version_str.startswith(b'NWB-1.') or
version_str.startswith(b'1.')):
raise Exception(
'{} is not a valid NWB 1 file path'.format(self._path))
except Exception:
raise

# EcephysSession requires session wide ids for units/channels/etc but NWB 1 doesn't have such a thing (ids
# are relative to the probe). The following data-stuctures are used build and fetch session ids without having
# EcephysSession requires session wide ids for units/channels/etc
# but NWB 1 doesn't have such a thing (ids
# are relative to the probe). The following data-stuctures are used
# build and fetch session ids without having
# to parse all the tables.
self._unit_ids = IDCreator()
self._channel_ids = IDCreator()
self._probe_ids = IDCreator()


@property
def processing_grp(self):
return self._h5_root['/processing']
Expand All @@ -79,18 +88,21 @@ def running_speed_grp(self):

def _probe_groups(self):
return [(pname, pgrp) for pname, pgrp in self.processing_grp.items()
if isinstance(pgrp, h5py.Group) and pname.lower().startswith('probe')]
if isinstance(pgrp, h5py.Group) and pname.lower().startswith(
'probe')]

def get_running_speed(self):
running_speed_grp = self.running_speed_grp

return pd.DataFrame({
"start_time": running_speed_grp['timestamps'][:],
"velocity": running_speed_grp['data'][:] # average velocities over a given interval
"velocity": running_speed_grp['data'][:]
# average velocities over a given interval
})

__stim_col_map = {
# Used for mapping column names from NWB 1.0 features ds to their appropiate NWB 2.0 name
# Used for mapping column names from NWB 1.0 features ds to their
# appropiate NWB 2.0 name
b'temporal_frequency': 'TF',
b'spatial_frequency': 'SF',
b'pos_x': 'Pos_x',
Expand All @@ -107,28 +119,35 @@ def get_stimulus_presentations(self) -> pd.DataFrame:
presentation_ids = 0 # make up a id for every stim-presentation
stim_pres_grp = self._h5_root['/stimulus/presentation']

# Stimulus-presentations are heirarchily grouped by presentation name. Iterate through all of them and build
# Stimulus-presentations are heirarchily grouped by presentation
# name. Iterate through all of them and build
# a single table.
for block_i, (stim_name, stim_grp) in enumerate(stim_pres_grp.items()):
timestamps = stim_grp['timestamps'][()]
start_times = timestamps[:, 0]
if timestamps.shape[1] == 2:
stop_times = timestamps[:, 1]
else:
# Some of the datasets have an optotagging stimulus with no stop time.
# Some of the datasets have an optotagging stimulus with no
# stop time.
continue
stop_times = np.nan

n_stims = stim_grp['num_samples'][()]
try:
# parse the features/data datasets, map old column names (temporal freq->TF, phase-> phase, etc).
stim_props = {self.__stim_col_map.get(ftr_name, ftr_name): stim_grp['data'][:, i]
for i, ftr_name in enumerate(stim_grp['features'][()])}
# parse the features/data datasets, map old column names (
# temporal freq->TF, phase-> phase, etc).
stim_props = {
self.__stim_col_map.get(
ftr_name, ftr_name): stim_grp['data'][:, i]
for i, ftr_name in enumerate(stim_grp['features'][()])}
except Exception:
stim_props = {}

stim_df = pd.DataFrame({
'stimulus_presentation_id': np.arange(presentation_ids, presentation_ids + n_stims),
'stimulus_presentation_id': np.arange(presentation_ids,
presentation_ids +
n_stims),
'start_time': start_times,
'stop_time': stop_times,
'stimulus_name': stim_name,
Expand All @@ -140,20 +159,24 @@ def get_stimulus_presentations(self) -> pd.DataFrame:
'Color': stim_props.get('Color', np.nan),
'Phase': stim_props.get('Phase', np.nan),
'Image': stim_props.get('Image', np.nan),
'stimulus_block': block_i # Required by conditionwise_spike_counts(), add made-up number
'stimulus_block': block_i
# Required by conditionwise_spike_counts(), add made-up number
})

presentation_ids += n_stims
if stimulus_presentations_df is None:
stimulus_presentations_df = stim_df
else:
stimulus_presentations_df = stimulus_presentations_df.append(stim_df)

stimulus_presentations_df['stimulus_index'] = 0 # I'm not sure what column is, but is droped by EcephysSession
stimulus_presentations_df.set_index('stimulus_presentation_id', inplace=True)
stimulus_presentations_df = stimulus_presentations_df.append(
stim_df)

stimulus_presentations_df[
'stimulus_index'] = 0 # I'm not sure what column is, but is
# droped by EcephysSession
stimulus_presentations_df.set_index('stimulus_presentation_id',
inplace=True)
return stimulus_presentations_df


def get_probes(self) -> pd.DataFrame:
probe_ids = []
locations = []
Expand All @@ -167,15 +190,18 @@ def get_probes(self) -> pd.DataFrame:
'description': "" # TODO: Find description
})
probes_df.set_index('id', inplace=True)
probes_df['sampling_rate'] = 30000.0 # TODO: calculate real sampling rate for each probe.
return probes_df

# TODO: calculate real sampling rate for each probe.
probes_df['sampling_rate'] = 30000.0

return probes_df

def get_channels(self) -> pd.DataFrame:
# TODO: Missing: manual_structure_id
processing_grp = self.processing_grp

max_channels = sum(len(prb_grp['unit_list']) for prb_grp in processing_grp.values())
max_channels = sum(
len(prb_grp['unit_list']) for prb_grp in processing_grp.values())
channel_ids = np.zeros(max_channels, dtype=np.uint64)
local_channel_indices = np.zeros(max_channels, dtype=np.int64)
prb_ids = np.zeros(max_channels, dtype=np.uint64)
Expand All @@ -185,7 +211,8 @@ def get_channels(self) -> pd.DataFrame:

channel_indx = 0
existing_channels = set()
# In NWB 1.0 files I used I couldn't find a channel group/dataset. Instead we have to iterate through all units
# In NWB 1.0 files I used I couldn't find a channel group/dataset.
# Instead we have to iterate through all units
# to get information about all available channels
for prb_name, prb_grp in self._probe_groups():
prb_id = self._probe_ids[prb_name]
Expand All @@ -195,7 +222,8 @@ def get_channels(self) -> pd.DataFrame:
local_channel_index = unit_grp['channel'][()]
channel_id = self._channel_ids[(prb_name, local_channel_index)]
if channel_id in existing_channels:
# If a channel has already been processed (ie it's shared by another unit) skip it. I'm assuming
# If a channel has already been processed (ie it's
# shared by another unit) skip it. I'm assuming
# position/ccf info is the same for every probe/channel_id.
continue
else:
Expand All @@ -205,10 +233,11 @@ def get_channels(self) -> pd.DataFrame:
prb_hrz_pos[channel_indx] = unit_grp['xpos_probe'][()]
prb_vert_pos[channel_indx] = unit_grp['ypos_probe'][()]
try:
struct_acronyms[channel_indx] = str(unit_grp['ccf_structure'][()], encoding='ascii')
struct_acronyms[channel_indx] = str(
unit_grp['ccf_structure'][()], encoding='ascii')
except TypeError:
struct_acronyms[channel_indx] = unit_grp['ccf_structure'][()]

struct_acronyms[channel_indx] = \
unit_grp['ccf_structure'][()]

existing_channels.add(channel_id)
channel_indx += 1
Expand All @@ -229,12 +258,15 @@ def get_channels(self) -> pd.DataFrame:
def get_mean_waveforms(self) -> Dict[int, np.ndarray]:
waveforms = {}
for prb_name, prb_grp in self._probe_groups():
# There is one waveform for any given spike, but still calling it "mean" wavefor
# There is one waveform for any given spike, but still calling
# it "mean" waveform
for indx, uid in enumerate(prb_grp['unit_list']):
unit_grp = prb_grp['UnitTimes'][str(uid)]
unit_id = self._unit_ids[(prb_name, uid)]
waveforms[unit_id] = np.array([unit_grp['waveform'][()],]) # EcephysSession is expecting an array of waveforms

# EcephysSession is expecting an array of waveforms
waveforms[unit_id] = \
np.array([unit_grp['waveform'][()], ])
return waveforms

def get_spike_times(self) -> Dict[int, np.ndarray]:
Expand All @@ -256,15 +288,17 @@ def get_units(self) -> pd.DataFrame:

for prb_name, prb_grp in self._probe_groups():
# visit every /processing/probeN/UnitList/N/ group to build
# TODO: Since just visting the tree is so expensive, maybe build the channels and probes at the same time.
# TODO: Since just visting the tree is so expensive, maybe build
# the channels and probes at the same time.
unit_list = prb_grp['unit_list'][()]
prb_uids = np.zeros(len(unit_list), dtype=np.uint64)
prb_channels = np.zeros(len(unit_list), dtype=np.int64)
prb_snr = np.zeros(len(unit_list), dtype=np.float64)
for indx, uid in enumerate(unit_list):
unit_grp = prb_grp['UnitTimes'][str(uid)]
prb_uids[indx] = self._unit_ids[(prb_name, uid)]
prb_channels[indx] = self._channel_ids[(prb_name, unit_grp['channel'][()])]
prb_channels[indx] = self._channel_ids[
(prb_name, unit_grp['channel'][()])]
prb_snr[indx] = unit_grp['snr'][()]

unit_ids = np.append(unit_ids, prb_uids)
Expand All @@ -277,7 +311,9 @@ def get_units(self) -> pd.DataFrame:
'local_index': local_indices,
'peak_channel_id': peak_channel_ids,
'snr': snrs,
'quality': "good" # TODO: NWB 1.0 is missing quality table, need to find an equivelent
'quality': "good"
# TODO: NWB 1.0 is missing quality table, need to find an
# equivalent
})

units_df.set_index('unit_id', inplace=True)
Expand Down
Loading

0 comments on commit ef0cbfd

Please sign in to comment.