Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ENH: Improve convolution performance for Sparse variables #411

Merged
merged 18 commits into from
Mar 7, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ before_install:
- source venv/bin/activate
- python --version # just to check
- travis_retry pip install -U pip setuptools wheel # needed at one point
- travis_retry pip install pytest pytest-xdist pytest-cov codecov
- travis_retry pip install pytest pytest-xdist pytest-cov codecov mock
- travis_retry pip install pathlib

install:
Expand Down
54 changes: 49 additions & 5 deletions bids/analysis/tests/test_transformations.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,11 @@
import numpy as np
import pandas as pd

try:
from unittest import mock
except ImportError:
import mock


@pytest.fixture
def collection():
Expand All @@ -29,10 +34,48 @@ def sparse_run_variable_with_missing_values():
'amplitude': [1, 1, np.nan, 1]
})
run_info = [RunInfo({'subject': '01'}, 20, 2, 'dummy.nii.gz')]
var = SparseRunVariable(name='var', data=data, run_info=run_info, source='events')
var = SparseRunVariable(
name='var', data=data, run_info=run_info, source='events')
return BIDSRunVariableCollection([var])


def test_convolve(collection):
rt = collection.variables['RT']
transform.Convolve(collection, 'RT', output='reaction_time')
rt_conv = collection.variables['reaction_time']

assert rt_conv.values.shape[0] == \
rt.get_duration() * collection.sampling_rate

transform.ToDense(collection, 'RT', output='rt_dense')
transform.Convolve(collection, 'rt_dense', output='dense_convolved')

dense_conv = collection.variables['reaction_time']

assert dense_conv.values.shape[0] == \
rt.get_duration() * collection.sampling_rate

# Test adapative oversampling computation
with mock.patch('bids.analysis.transformations.compute.hrf') as mocked:
transform.Convolve(collection, 'RT', output='rt_mock')
mocked.compute_regressor.assert_called_with(
mock.ANY, 'spm', mock.ANY, fir_delays=None, min_onset=0,
oversampling=1.0)

with mock.patch('bids.analysis.transformations.compute.hrf') as mocked:
transform.Convolve(collection, 'rt_dense', output='rt_mock')
mocked.compute_regressor.assert_called_with(
mock.ANY, 'spm', mock.ANY, fir_delays=None, min_onset=0,
oversampling=3.0)

with mock.patch('bids.analysis.transformations.compute.hrf') as mocked:
collection.sampling_rate = 0.5
transform.Convolve(collection, 'RT', output='rt_mock')
mocked.compute_regressor.assert_called_with(
mock.ANY, 'spm', mock.ANY, fir_delays=None, min_onset=0,
oversampling=2.0)


def test_rename(collection):
dense_rt = collection.variables['RT'].to_dense(10)
assert len(dense_rt.values) == 230400
Expand All @@ -54,13 +97,14 @@ def test_product(collection):

def test_sum(collection):
c = collection
transform.Sum(collection, variables=['parametric gain', 'gain'],
output='sum')
transform.Sum(
collection, variables=['parametric gain', 'gain'], output='sum')
res = c['sum'].values
target = c['parametric gain'].values + c['gain'].values
assert np.array_equal(res, target)
transform.Sum(collection, variables=['parametric gain', 'gain'],
output='sum', weights=[2, 2])
transform.Sum(
collection,
variables=['parametric gain', 'gain'], output='sum', weights=[2, 2])
assert np.array_equal(c['sum'].values, target * 2)
with pytest.raises(ValueError):
transform.Sum(collection, variables=['parametric gain', 'gain'],
Expand Down
39 changes: 29 additions & 10 deletions bids/analysis/transformations/compute.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
'''
Transformations that primarily involve numerical computation on variables.
'''

from __future__ import division
import math
import numpy as np
import pandas as pd
from bids.utils import listify
Expand Down Expand Up @@ -33,12 +34,18 @@ def _transform(self, var, model='spm', derivative=False, dispersion=False,

model = model.lower()

df = var.to_df(entities=False)

if isinstance(var, SparseRunVariable):
sr = self.collection.sampling_rate
var = var.to_dense(sr)
sampling_rate = self.collection.sampling_rate
dur = var.get_duration()
resample_frames = np.linspace(
0, dur, int(math.ceil(dur * sampling_rate)), endpoint=False)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm wondering if we should center the sampled frames within the timeseries... e.g., suppose we have a 2-second dense timeseries, and we want to downsample to 2 Hz. Currently we would sample at 0, 0.5, 1, and 1.5. Probably we should do 0.25, 0.75, 1.25, and 1.75. Let's not change anything here, because if we were going to do this, we'd need to do it throughout the codebase for consistency. Mostly just making a mental note.


else:
resample_frames = df['onset'].values
sampling_rate = var.sampling_rate

df = var.to_df(entities=False)
onsets = df['onset'].values
vals = df[['onset', 'duration', 'amplitude']].values.T

if model in ['spm', 'glover']:
Expand All @@ -49,11 +56,23 @@ def _transform(self, var, model='spm', derivative=False, dispersion=False,
elif model != 'fir':
raise ValueError("Model must be one of 'spm', 'glover', or 'fir'.")

convolved = hrf.compute_regressor(vals, model, onsets,
fir_delays=fir_delays, min_onset=0)

return DenseRunVariable(name=var.name, values=convolved[0], run_info=var.run_info,
source=var.source, sampling_rate=var.sampling_rate)
# Minimum interval between event onsets/duration
# Used to compute oversampling factor to prevent information loss
unique_onsets = np.unique(np.sort(df.onset))
if len(unique_onsets) > 1:
min_interval = min(np.ediff1d(unique_onsets).min(),
df.duration.min())
oversampling = np.ceil(2*(1 / (min_interval * sampling_rate)))
else:
oversampling = 2
convolved = hrf.compute_regressor(
vals, model, resample_frames, fir_delays=fir_delays, min_onset=0,
oversampling=oversampling
)

return DenseRunVariable(
name=var.name, values=convolved[0], run_info=var.run_info,
source=var.source, sampling_rate=sampling_rate)


class Demean(Transformation):
Expand Down