Skip to content
This repository has been archived by the owner on Apr 2, 2020. It is now read-only.

First level residuals #410

Merged
merged 39 commits into from
Jan 13, 2020
Merged
Show file tree
Hide file tree
Changes from 10 commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
2b7df92
Allow for storing residuals in First Level Models
Gilles86 Dec 7, 2018
0663e7a
import missing modules and update calls to functions
jdkent Nov 20, 2019
1b58ebb
fix flake8 errors
jdkent Nov 20, 2019
4909feb
remove @set_attr_on_read
jdkent Nov 20, 2019
acac09b
modify tests to reflect updated code base
jdkent Nov 20, 2019
4e7c897
fix typo and simplifiy loop
jdkent Nov 20, 2019
0b15ec6
respond to review comments:
jdkent Dec 2, 2019
5f260c9
change rsq to r_square
jdkent Dec 2, 2019
adb504c
change rsq to r_square in tests
jdkent Dec 2, 2019
9b434bc
fix function calls
jdkent Dec 2, 2019
d416d0b
Example of how to use for
Gilles86 Dec 18, 2019
65010cf
Made ValueError for storing model attributes more verbose.
Gilles86 Dec 18, 2019
0b5a170
Also include R-squared
Gilles86 Dec 18, 2019
740f3bb
Merge pull request #1 from Gilles86/first_level_residuals
jdkent Dec 18, 2019
3d0ed81
Merge branch 'master' of https://github.com/nistats/nistats into firs…
jdkent Dec 18, 2019
5b61be4
fix heading underlines in example
jdkent Dec 18, 2019
bf319c1
Merge branch 'master' of https://github.com/nistats/nistats into firs…
jdkent Dec 19, 2019
08acd3a
Merge branch 'first_level_residuals' of https://github.com/jdkent/nis…
jdkent Dec 19, 2019
fcf8320
fix grammar
jdkent Dec 19, 2019
a864919
fix code formatting and do not standardize
jdkent Dec 19, 2019
953d15b
Merge branch 'first_level_residuals' of https://github.com/jdkent/nis…
jdkent Dec 19, 2019
806f11d
change parameter timeseries to result_as_time_series
jdkent Dec 19, 2019
51002ea
Merge branch 'master' of https://github.com/nistats/nistats into firs…
kchawla-pi Jan 8, 2020
20e2201
Merge pull request #2 from kchawla-pi/first_level_residuals
jdkent Jan 8, 2020
a47586f
attempt to address @bthirion comments
jdkent Jan 8, 2020
cdf2161
split imports statements
jdkent Jan 8, 2020
b8c483d
always return list get_voxelwise_model_attribute_
jdkent Jan 8, 2020
72aec1d
change docstrings for output to always be a list
jdkent Jan 9, 2020
a245635
modify tests to treat output as list
jdkent Jan 9, 2020
afef0a9
make _get_voxelwise_model_attribute private and improve documentation
jdkent Jan 9, 2020
1cbd072
fix formatting of function call
jdkent Jan 9, 2020
1a8e0c5
add empty line back in
jdkent Jan 9, 2020
e801f8e
revert regression.py to master
jdkent Jan 9, 2020
bf67066
make result_as_time_series mandatory
jdkent Jan 9, 2020
b55c13b
add newlines to docs
jdkent Jan 9, 2020
999caf9
add newline to end of file
jdkent Jan 9, 2020
4b5d485
fix missing newline
jdkent Jan 9, 2020
78995d0
add James Kent to .mailmap
jdkent Jan 11, 2020
44b0e85
add entry for the new attributes to FirstLevelModel
jdkent Jan 11, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 91 additions & 4 deletions nistats/first_level_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
import numpy as np
import pandas as pd
from nibabel import Nifti1Image
from nibabel.onetime import setattr_on_read

from sklearn.base import (BaseEstimator,
clone,
Expand Down Expand Up @@ -117,14 +118,14 @@ def run_glm(Y, X, noise_model='ar1', bins=100, n_jobs=1, verbose=0):
acceptable_noise_models = ['ar1', 'ols']
if noise_model not in acceptable_noise_models:
raise ValueError(
"Acceptable noise models are {0}. You provided 'noise_model={1}'".\
format(acceptable_noise_models, noise_model))
"Acceptable noise models are {0}. You provided 'noise_model={1}'".
format(acceptable_noise_models, noise_model))

if Y.shape[0] != X.shape[0]:
raise ValueError(
'The number of rows of Y should match the number of rows of X.'
' You provided X with shape {0} and Y with shape {1}'.\
format(X.shape, Y.shape))
' You provided X with shape {0} and Y with shape {1}'.
format(X.shape, Y.shape))

# Create the model
ols_result = OLSModel(X).fit(Y)
Expand Down Expand Up @@ -309,6 +310,7 @@ def __init__(self, t_r=None, slice_time_ref=0., hrf_model='glover',
else:
raise ValueError('signal_scaling must be "False", "0", "1"'
' or "(0, 1)"')

self.noise_model = noise_model
self.verbose = verbose
self.n_jobs = n_jobs
Expand Down Expand Up @@ -583,6 +585,91 @@ def compute_contrast(self, contrast_def, stat_type=None,

return outputs if output_type == 'all' else output

def get_voxelwise_model_attribute_(self, attribute, timeseries=True):
"""Transform RegressionResults instances within a dictionary
(whose keys represent the autoregressive coefficient under the 'ar1'
noise model or only 0.0 under 'ols' noise_model and values are the
RegressionResults instances) into input nifti space.

Parameters
----------
attribute : str
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

attribute is defined within a fixed dictionary of values I guess ? This should be provided if the function is meant to be used publicly.

Copy link
Contributor Author

@jdkent jdkent Jan 9, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe this should really be a private method (I've changed it to be a private method), but I added additional documentation and an internal check to be explicit anyway.

This comment was marked as outdated.

This comment was marked as resolved.

an attribute of a RegressionResults instance
timeseries : bool, optional
whether the RegressionResult attribute has a value
per timepoint of the input nifti image.

Returns
-------
output : list or Nifti1Image
a list of Nifti1Images if FirstLevelModel is fit with
a list of Nifti1Images, or a single Nifti1Image otherwise.
"""
if self.minimize_memory:
raise ValueError('minimize_memory should be set to False to make residuals or predictions.')

if self.labels_ is None or self.results_ is None:
raise ValueError('The model has not been fit yet')

output = []

for design_matrix, labels, results in zip(self.design_matrices_, self.labels_, self.results_):

if timeseries:
voxelwise_attribute = np.zeros((design_matrix.shape[0], len(labels)))
else:
voxelwise_attribute = np.zeros((1, len(labels)))

for label_ in results:
label_mask = labels == label_
voxelwise_attribute[:, label_mask] = getattr(results[label_], attribute)

output.append(self.masker_.inverse_transform(voxelwise_attribute))

if len(output) == 1:
return output[0]
else:
return output

@setattr_on_read
def residuals(self):
"""Transform voxelwise residuals to the same shape
as the input Nifti1Image(s)

Returns
-------
output : list or Nifti1Image
a list of Nifti1Images if FirstLevelModel is fit with
a list of Nifti1Images, or a single Nifti1Image otherwise.
"""
return self.get_voxelwise_model_attribute_('resid')

@setattr_on_read
def predicted(self):
"""Transform voxelwise predicted values to the same shape
as the input Nifti1Image(s)

Returns
-------
output : list or Nifti1Image
a list of Nifti1Images if FirstLevelModel is fit with
a list of Nifti1Images, or a single Nifti1Image otherwise.
"""
return self.get_voxelwise_model_attribute_('predicted')

@setattr_on_read
def r_square(self):
"""Transform voxelwise r-squared values to the same shape
as the input Nifti1Image(s)

Returns
-------
output : list or Nifti1Image
a list of Nifti1Images if FirstLevelModel is fit with
a list of Nifti1Images, or a single Nifti1Image otherwise.
"""
return self.get_voxelwise_model_attribute_('r_square', timeseries=False)


@replace_parameters({'mask': 'mask_img'}, end_version='next')
def first_level_models_from_bids(
Expand Down
49 changes: 17 additions & 32 deletions nistats/regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,6 @@ class OLSModel(object):
df_resid : scalar
Degrees of freedom of the residuals. Number of observations less the
rank of the design.

df_model : scalar
Degrees of freedome of the model. The rank of the design.
"""
Expand Down Expand Up @@ -276,6 +275,7 @@ def __init__(self, theta, Y, model, wY, wresid, cov=None, dispersion=1.,
dispersion, nuisance)
self.wY = wY
self.wresid = wresid
self.wdesign = model.wdesign

@setattr_on_read
def resid(self):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bthirion We should deprecate this and replace it with residuals in a separate PR, and others like it.
Or I should ask JD Kent to revert the changes I asked him to make and rename the resid mehotd in this PR back to resid so it stay consistent.
I like the former idea, renaming all uses of resid to residuals.
Does Nilearn have resid or residuals?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, let's do it in a sperate PR.
I'm not aware of resid in nilear? We should go for residuals

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Don't think Nilearn has any resid method.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so we will keep the method as resid for this pull request, I will not change this

Expand Down Expand Up @@ -310,7 +310,7 @@ def predicted(self):
"""
beta = self.theta
# the LikelihoodModelResults has parameters named 'theta'
X = self.model.design
X = self.wdesign
return np.dot(X, beta)

@setattr_on_read
Expand All @@ -319,13 +319,20 @@ def SSE(self):
"""
return (self.wresid ** 2).sum(0)

@setattr_on_read
def r_square(self):
"""Proportion of explained variance.
If not from an OLS model this is "pseudo"-R2.
"""
return np.var(self.predicted, 0) / np.var(self.wY, 0)

@setattr_on_read
def MSE(self):
""" Mean square (error) """
return self.SSE / self.df_resid


class SimpleRegressionResults(LikelihoodModelResults):
class SimpleRegressionResults(RegressionResults):
"""This class contains only information of the model fit necessary
for contast computation.

Expand All @@ -347,41 +354,19 @@ def __init__(self, results):
# put this as a parameter of LikelihoodModel
self.df_resid = self.df_total - self.df_model

self.wdesign = results.model.wdesign

def logL(self, Y):
"""
The maximized log-likelihood
"""
raise ValueError('can not use this method for simple results')
raise ValueError('minimize_memory should be set to False to make residuals or predictions.')

def resid(self, Y):
def resid(self):
"""
Residuals from the fit.
"""
return Y - self.predicted

def norm_resid(self, Y):
"""
Residuals, normalized to have unit length.

Notes
-----
Is this supposed to return "stanardized residuals,"
residuals standardized
to have mean zero and approximately unit variance?
raise ValueError('minimize_memory should be set to False to make residuals or predictions.')
Copy link
Collaborator

@kchawla-pi kchawla-pi Jan 9, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@bthirion @jdkent @Gilles86
I'm still confused about the suitability of this, and other such methods. It feels like logL(), resid() and norm_resid() are in effect, traps; waiting for unsuspecting users to call them and watch their programs fail.

If these methods do not have anything to say, then we should simply remove all of them, or convert this into a message rather than an error.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Agreed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I believe the motivation for this was to provide a hint to the user on how to get residuals from their model instead of an attribute error, but I see this check is handled here. I'm fine with removing those methods.


d_i = e_i / sqrt(MS_E)

Where MS_E = SSE / (n - k)

See: Montgomery and Peck 3.2.1 p. 68
Davidson and MacKinnon 15.2 p 662
"""
return self.resid(Y) * positive_reciprocal(np.sqrt(self.dispersion))

def predicted(self):
""" Return linear predictor values from a design matrix.
"""
beta = self.theta
# the LikelihoodModelResults has parameters named 'theta'
X = self.model.design
return np.dot(X, beta)
def norm_resid(self):
raise ValueError('minimize_memory should be set to False to make residuals or predictions.')
44 changes: 44 additions & 0 deletions nistats/tests/test_first_level_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,9 @@
assert_true,
)
from numpy.testing import (assert_almost_equal,
assert_array_almost_equal,
assert_array_equal,
assert_array_less,
)
from nibabel.tmpdirs import InTemporaryDirectory

Expand Down Expand Up @@ -523,3 +525,45 @@ def test_param_mask_deprecation_first_level_models_from_bids():
for param_warning_ in raised_param_deprecation_warnings:
assert str(param_warning_.message) == deprecation_msg
assert param_warning_.category is DeprecationWarning


def test_first_level_model_residuals():
shapes, rk = [(10, 10, 10, 100)], 3
mask, fmri_data, design_matrices = _generate_fake_fmri_data(shapes, rk)

for i in range(len(design_matrices)):
design_matrices[i].iloc[:, 0] = 1

model = FirstLevelModel(mask=mask, minimize_memory=False,
noise_model='ols')
model.fit(fmri_data, design_matrices=design_matrices)

resid = model.residuals
mean_resids = model.masker_.transform(resid).mean(0)
assert_array_almost_equal(mean_resids, 0)


def test_first_level_model_predictions_r_square():
shapes, rk = [(10, 10, 10, 25)], 3
mask, fmri_data, design_matrices = _generate_fake_fmri_data(shapes, rk)

for i in range(len(design_matrices)):
design_matrices[i].iloc[:, 0] = 1

model = FirstLevelModel(mask=mask,
signal_scaling=False,
minimize_memory=False,
noise_model='ols')
model.fit(fmri_data, design_matrices=design_matrices)

pred = model.predicted
data = fmri_data[0]
r_square_3d = model.r_square

y_predicted = model.masker_.transform(pred)
y_measured = model.masker_.transform(data)

assert_almost_equal(np.mean(y_predicted - y_measured), 0)

r_square_2d = model.masker_.transform(r_square_3d)
assert_array_less(0., r_square_2d)
34 changes: 33 additions & 1 deletion nistats/tests/test_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,8 +6,10 @@

from nose.tools import assert_equal

from nistats.regression import OLSModel, ARModel
from nose.tools import assert_equal, assert_true, assert_almost_equal
from numpy.testing import assert_array_almost_equal, assert_array_equal

from nistats.regression import OLSModel, ARModel

RNG = np.random.RandomState(20110902)
X = RNG.standard_normal((40, 10))
Expand All @@ -18,12 +20,42 @@ def test_OLS():
model = OLSModel(design=X)
results = model.fit(Y)
assert_equal(results.df_resid, 30)
assert_equal(results.resid.shape[0], 40)
assert_equal(results.predicted.shape[0], 40)


def test_AR():
model = ARModel(design=X, rho=0.4)
results = model.fit(Y)
assert_equal(results.df_resid, 30)
assert_equal(results.resid.shape[0], 40)
assert_equal(results.predicted.shape[0], 40)


def test_residuals():
Xintercept = X.copy()

# If design matrix contains an intercept, the
# mean of the residuals should be 0 (short of
# some numerical rounding errors)
Xintercept[:, 0] = 1
model = OLSModel(design=Xintercept)
results = model.fit(Y)
assert_almost_equal(results.resid.mean(), 0)


def test_predicted_r_square():
Xshort = X.copy()[:10, :]
Yshort = Y.copy()[:10]

# Signal of 10 elements should be completely
# predicted by 10 predictors (short of some numerical
# rounding errors)
model = OLSModel(design=Xshort)
results = model.fit(Yshort)
assert_almost_equal(results.resid.sum(), 0)
assert_array_almost_equal(results.predicted, Yshort)
assert_almost_equal(results.r_square, 1.0)


def test_OLS_degenerate():
Expand Down