Skip to content

Commit

Permalink
Add function to subdivide nwb files. (#97)
Browse files Browse the repository at this point in the history
* Add function to subdivide nwb files.

* Actually validate.
  • Loading branch information
DailyDreaming authored Aug 20, 2024
1 parent f578065 commit 1081ddc
Showing 1 changed file with 76 additions and 0 deletions.
76 changes: 76 additions & 0 deletions src/braingeneers/data/datasets_electrophysiology.py
Original file line number Diff line number Diff line change
Expand Up @@ -1378,6 +1378,82 @@ def generate_metadata_maxwell(batch_uuid: str, experiment_prefix: Optional[str]
return metadata_json


def validate_sections(sections: List[List[float]]):
previous_start_time = 0
previous_end_time = 0
for start_time, end_time in sections:
end_time = end_time or float('inf')
if end_time < start_time:
raise RuntimeError(f'Section start time cannot be greater than the end time: {start_time, end_time}')
if previous_end_time > start_time:
raise RuntimeError(f'Section preceding this overlaps: "{previous_start_time, previous_end_time}" "{start_time, end_time}"')


def split_nwb_into_subfiles(base_filename: str, sections: str):
"""
Assuming an NWB file contains an 'ElectricalSeries', separate "sections" of the timeseries into
their own individual files.
'base_filename' is a local path to the NWB file that we are splitting. This will create subfiles
that will have section "#" appended to their names.
'sections' are the sections of the timeseries as a comma-separated string of float-dash-float
values, each of which is put into its own file. For example:
'0-300,300-1200,1300-10000,9000000-'
Would write 4 separate files, with the data in seconds for each of the comma-separated ranges
described above. NOTE: A dash at the end without a number signifies writing to completion.
"""
start_time = time.time()
maxwell_sample_rate = 20000
sections = [[float(j) for j in i.split('-')] for i in sections.split(',')]
validate_sections(sections)
print(f'Creating {len(sections)} files from: {base_filename}')
for start_time, end_time in sections:
if base_filename.endswith('.nwb'):
base_filename = base_filename[:-len('.nwb')]
shutil.copyfile(base_filename, f'{base_filename}.{start_time}-{end_time}.nwb')

for start_time, end_time in sections:
with h5py.File(f'{base_filename}.{start_time}-{end_time}.nwb', "r+") as r:
r["acquisition"]['ElectricalSeries']['starting_time'] = start_time # set the new file's start time

num_electrodes = r["acquisition"]['ElectricalSeries']['electrodes'].shape[0]
section_start = start_time * maxwell_sample_rate
section_end = end_time * maxwell_sample_rate if end_time else 0

if num_electrodes == r["acquisition"]['ElectricalSeries']['data'].shape[0]:
electrodes_number = r["acquisition"]['ElectricalSeries']['data'].shape[0]
timeseries_number = r["acquisition"]['ElectricalSeries']['data'].shape[1]
# actually trim the data
if not section_end:
trimmed_data = r["acquisition"]['ElectricalSeries']['data'][:electrodes_number, section_start:]
r["acquisition"]['ElectricalSeries']['data'][:electrodes_number, :-section_start] = trimmed_data
r["acquisition"]['ElectricalSeries']['data'].resize((electrodes_number, timeseries_number - section_start))
else:
trimmed_data = r["acquisition"]['ElectricalSeries']['data'][:electrodes_number, section_start:section_end]
r["acquisition"]['ElectricalSeries']['data'][:electrodes_number, :-(timeseries_number - (section_end - section_start))] = trimmed_data
r["acquisition"]['ElectricalSeries']['data'].resize((electrodes_number, (section_end - section_start)))
elif num_electrodes == r["acquisition"]['ElectricalSeries']['data'].shape[1]:
electrodes_number = r["acquisition"]['ElectricalSeries']['data'].shape[1]
timeseries_number = r["acquisition"]['ElectricalSeries']['data'].shape[0]
# actually trim the data
if not section_end:
trimmed_data = r["acquisition"]['ElectricalSeries']['data'][section_start:, :electrodes_number]
r["acquisition"]['ElectricalSeries']['data'][:-section_start, :electrodes_number] = trimmed_data
r["acquisition"]['ElectricalSeries']['data'].resize((timeseries_number - section_start, electrodes_number))
else:
trimmed_data = r["acquisition"]['ElectricalSeries']['data'][section_start:section_end, :electrodes_number]
r["acquisition"]['ElectricalSeries']['data'][:-(timeseries_number - (section_end - section_start)), :electrodes_number] = trimmed_data
r["acquisition"]['ElectricalSeries']['data'].resize(((section_end - section_start), electrodes_number))
else:
raise RuntimeError(f'Something is wrong with the electrode count in the ElectricalSeries: '
f'{num_electrodes} not in {r["acquisition"]["ElectricalSeries"]["data"].shape}')

print(f'--- Completed in {(time.time() - start_time) / 60.0} minutes ---')


def remove_portion_of_timeseries_from_maxwell_nwb(filename: str, seconds: float):
"""
Assuming an NWB file contains an 'ElectricalSeries', trim the first x seconds from that timeseries.
Expand Down

0 comments on commit 1081ddc

Please sign in to comment.