From f532707bbc6c5b8d1ccb0fe7cd2bcdc951719639 Mon Sep 17 00:00:00 2001 From: ejolly Date: Mon, 22 Mar 2021 16:40:01 -0400 Subject: [PATCH 01/20] Code base reformat with black. Added github workflow for ci. Added vscode settings and extensions. --- .gitignore | 4 +- .vscode/extensions.json | 6 + .vscode/settings.json | 11 + nltools/__init__.py | 32 +- nltools/analysis.py | 189 ++- nltools/cross_validation.py | 43 +- nltools/data/__init__.py | 12 +- nltools/data/adjacency.py | 1224 +++++++++------- nltools/data/brain_data.py | 1415 +++++++++++-------- nltools/data/design_matrix.py | 334 +++-- nltools/datasets.py | 139 +- nltools/external/__init__.py | 12 +- nltools/external/hrf.py | 104 +- nltools/external/srm.py | 162 ++- nltools/mask.py | 113 +- nltools/plotting.py | 203 +-- nltools/prefs.py | 69 +- nltools/simulator.py | 419 ++++-- nltools/stats.py | 1787 +++++++++++++----------- nltools/tests/conftest.py | 72 +- nltools/tests/test_adjacency.py | 299 ++-- nltools/tests/test_analysis.py | 23 +- nltools/tests/test_brain_data.py | 588 +++++--- nltools/tests/test_cross_validation.py | 14 +- nltools/tests/test_design_matrix.py | 110 +- nltools/tests/test_file_reader.py | 2 +- nltools/tests/test_groupby.py | 6 +- nltools/tests/test_mask.py | 18 +- nltools/tests/test_simulator.py | 37 +- nltools/tests/test_stats.py | 594 +++++--- nltools/tests/test_utils.py | 7 +- nltools/utils.py | 228 +-- nltools/version.py | 2 +- requirements-dev.txt | 6 + 34 files changed, 5057 insertions(+), 3227 deletions(-) create mode 100644 .vscode/extensions.json create mode 100644 .vscode/settings.json create mode 100644 requirements-dev.txt diff --git a/.gitignore b/.gitignore index 1f92f3c7..c4a868f0 100644 --- a/.gitignore +++ b/.gitignore @@ -11,7 +11,7 @@ dist/ .cache/ htmlcov .pytest_cache/* -dev/ +dev/ # Logs and databases # ###################### *.log @@ -46,3 +46,5 @@ htmlcov/ ##### .tox .tox/* + +.pytest_cache diff --git a/.vscode/extensions.json b/.vscode/extensions.json new file mode 100644 index 00000000..d987703f --- /dev/null +++ b/.vscode/extensions.json @@ -0,0 +1,6 @@ +{ + "recommendations": [ + "kevinrose.vsc-python-indent", + "njpwerner.autodocstring" + ] +} \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..5657220a --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,11 @@ +{ + "editor.formatOnSave": true, + "python.testing.pytestEnabled": true, + "python.testing.unittestEnabled": false, + "python.testing.nosetestsEnabled": false, + "python.testing.pytestArgs": [ + "nltools" + ], + "python.testing.autoTestDiscoverOnSaveEnabled": true, + "editor.insertSpaces": true +} \ No newline at end of file diff --git a/nltools/__init__.py b/nltools/__init__.py index 5420b389..bc327eaa 100644 --- a/nltools/__init__.py +++ b/nltools/__init__.py @@ -1,25 +1,23 @@ from __future__ import absolute_import -__all__ = ['data', - 'datasets', - 'analysis', - 'cross_validation', - 'plotting', - 'stats', - 'utils', - 'file_reader', - 'mask', - 'prefs', - 'external', - '__version__'] +__all__ = [ + "data", + "datasets", + "analysis", + "cross_validation", + "plotting", + "stats", + "utils", + "file_reader", + "mask", + "prefs", + "external", + "__version__", +] from .analysis import Roc from .cross_validation import set_cv -from .data import (Brain_Data, - Adjacency, - Groupby, - Design_Matrix, - Design_Matrix_Series) +from .data import Brain_Data, Adjacency, Groupby, Design_Matrix, Design_Matrix_Series from .simulator import Simulator from .prefs import MNI_Template, resolve_mni_path from .version import __version__ diff --git a/nltools/analysis.py b/nltools/analysis.py index 55931e64..5adc8cb3 100644 --- a/nltools/analysis.py +++ b/nltools/analysis.py @@ -1,13 +1,13 @@ from __future__ import division -''' +""" NeuroLearn Analysis Tools ========================= These tools provide the ability to quickly run machine-learning analyses on imaging data -''' +""" -__all__ = ['Roc'] +__all__ = ["Roc"] __author__ = ["Luke Chang"] __license__ = "MIT" @@ -21,7 +21,7 @@ class Roc(object): - """ Roc Class + """Roc Class The Roc class is based on Tor Wager's Matlab roc_plot.m function and allows a user to easily run different types of receiver operator @@ -38,19 +38,28 @@ class Roc(object): """ - def __init__(self, input_values=None, binary_outcome=None, - threshold_type='optimal_overall', forced_choice=None, **kwargs): + def __init__( + self, + input_values=None, + binary_outcome=None, + threshold_type="optimal_overall", + forced_choice=None, + **kwargs + ): if len(input_values) != len(binary_outcome): - raise ValueError("Data Problem: input_value and binary_outcome" - "are different lengths.") + raise ValueError( + "Data Problem: input_value and binary_outcome" "are different lengths." + ) if not any(binary_outcome): raise ValueError("Data Problem: binary_outcome may not be boolean") - thr_type = ['optimal_overall', 'optimal_balanced', 'minimum_sdt_bias'] + thr_type = ["optimal_overall", "optimal_balanced", "minimum_sdt_bias"] if threshold_type not in thr_type: - raise ValueError("threshold_type must be ['optimal_overall', " - "'optimal_balanced','minimum_sdt_bias']") + raise ValueError( + "threshold_type must be ['optimal_overall', " + "'optimal_balanced','minimum_sdt_bias']" + ) self.input_values = deepcopy(input_values) self.binary_outcome = deepcopy(binary_outcome) @@ -61,10 +70,16 @@ def __init__(self, input_values=None, binary_outcome=None, else: self.binary_outcome = deepcopy(binary_outcome) - def calculate(self, input_values=None, binary_outcome=None, - criterion_values=None, threshold_type='optimal_overall', - forced_choice=None, balanced_acc=False): - """ Calculate Receiver Operating Characteristic plot (ROC) for + def calculate( + self, + input_values=None, + binary_outcome=None, + criterion_values=None, + threshold_type="optimal_overall", + forced_choice=None, + balanced_acc=False, + ): + """Calculate Receiver Operating Characteristic plot (ROC) for single-interval classification. Args: @@ -95,25 +110,58 @@ def calculate(self, input_values=None, binary_outcome=None, if criterion_values is not None: self.criterion_values = deepcopy(criterion_values) else: - self.criterion_values = np.linspace(np.min(self.input_values.squeeze()), - np.max(self.input_values.squeeze()), - num=50*len(self.binary_outcome)) + self.criterion_values = np.linspace( + np.min(self.input_values.squeeze()), + np.max(self.input_values.squeeze()), + num=50 * len(self.binary_outcome), + ) if forced_choice is not None: self.forced_choice = deepcopy(forced_choice) if self.forced_choice is not None: sub_idx = np.unique(self.forced_choice) - if len(sub_idx) != len(self.binary_outcome)/2: - raise ValueError("Make sure that subject ids are correct for 'forced_choice'.") - if len(set(sub_idx).union(set(np.array(self.forced_choice)[self.binary_outcome]))) != len(sub_idx): + if len(sub_idx) != len(self.binary_outcome) / 2: + raise ValueError( + "Make sure that subject ids are correct for 'forced_choice'." + ) + if len( + set(sub_idx).union( + set(np.array(self.forced_choice)[self.binary_outcome]) + ) + ) != len(sub_idx): raise ValueError("Issue with forced_choice subject labels.") - if len(set(sub_idx).union(set(np.array(self.forced_choice)[~self.binary_outcome]))) != len(sub_idx): + if len( + set(sub_idx).union( + set(np.array(self.forced_choice)[~self.binary_outcome]) + ) + ) != len(sub_idx): raise ValueError("Issue with forced_choice subject labels.") for sub in sub_idx: - sub_mn = (self.input_values[(self.forced_choice == sub) & (self.binary_outcome)]+self.input_values[(self.forced_choice == sub) & (~self.binary_outcome)])[0]/2 - self.input_values[(self.forced_choice == sub) & (self.binary_outcome)] = self.input_values[(self.forced_choice == sub) & (self.binary_outcome)][0] - sub_mn - self.input_values[(self.forced_choice == sub) & (~self.binary_outcome)] = self.input_values[(self.forced_choice == sub) & (~self.binary_outcome)][0] - sub_mn + sub_mn = ( + self.input_values[ + (self.forced_choice == sub) & (self.binary_outcome) + ] + + self.input_values[ + (self.forced_choice == sub) & (~self.binary_outcome) + ] + )[0] / 2 + self.input_values[ + (self.forced_choice == sub) & (self.binary_outcome) + ] = ( + self.input_values[ + (self.forced_choice == sub) & (self.binary_outcome) + ][0] + - sub_mn + ) + self.input_values[ + (self.forced_choice == sub) & (~self.binary_outcome) + ] = ( + self.input_values[ + (self.forced_choice == sub) & (~self.binary_outcome) + ][0] + - sub_mn + ) self.class_thr = 0 # Calculate true positive and false positive rate @@ -121,8 +169,10 @@ def calculate(self, input_values=None, binary_outcome=None, self.fpr = np.zeros(self.criterion_values.shape) for i, x in enumerate(self.criterion_values): wh = self.input_values >= x - self.tpr[i] = np.sum(wh[self.binary_outcome])/np.sum(self.binary_outcome) - self.fpr[i] = np.sum(wh[~self.binary_outcome])/np.sum(~self.binary_outcome) + self.tpr[i] = np.sum(wh[self.binary_outcome]) / np.sum(self.binary_outcome) + self.fpr[i] = np.sum(wh[~self.binary_outcome]) / np.sum( + ~self.binary_outcome + ) self.n_true = np.sum(self.binary_outcome) self.n_false = np.sum(~self.binary_outcome) self.auc = auc(self.fpr, self.tpr) @@ -130,28 +180,44 @@ def calculate(self, input_values=None, binary_outcome=None, # Get criterion threshold if self.forced_choice is None: self.threshold_type = threshold_type - if threshold_type == 'optimal_balanced': - mn = (self.tpr+self.fpr)/2 + if threshold_type == "optimal_balanced": + mn = (self.tpr + self.fpr) / 2 self.class_thr = self.criterion_values[np.argmax(mn)] - elif threshold_type == 'optimal_overall': - n_corr_t = self.tpr*self.n_true - n_corr_f = (1 - self.fpr)*self.n_false - sm = (n_corr_t + n_corr_f) + elif threshold_type == "optimal_overall": + n_corr_t = self.tpr * self.n_true + n_corr_f = (1 - self.fpr) * self.n_false + sm = n_corr_t + n_corr_f self.class_thr = self.criterion_values[np.argmax(sm)] - elif threshold_type == 'minimum_sdt_bias': + elif threshold_type == "minimum_sdt_bias": # Calculate MacMillan and Creelman 2005 Response Bias (c_bias) - c_bias = (norm.ppf(np.maximum(.0001, np.minimum(0.9999, self.tpr))) + norm.ppf(np.maximum(.0001, np.minimum(0.9999, self.fpr)))) / float(2) + c_bias = ( + norm.ppf(np.maximum(0.0001, np.minimum(0.9999, self.tpr))) + + norm.ppf(np.maximum(0.0001, np.minimum(0.9999, self.fpr))) + ) / float(2) self.class_thr = self.criterion_values[np.argmin(abs(c_bias))] # Calculate output - self.false_positive = (self.input_values >= self.class_thr) & (~self.binary_outcome) - self.false_negative = (self.input_values < self.class_thr) & (self.binary_outcome) + self.false_positive = (self.input_values >= self.class_thr) & ( + ~self.binary_outcome + ) + self.false_negative = (self.input_values < self.class_thr) & ( + self.binary_outcome + ) self.misclass = (self.false_negative) | (self.false_positive) self.true_positive = (self.binary_outcome) & (~self.misclass) self.true_negative = (~self.binary_outcome) & (~self.misclass) - self.sensitivity = np.sum(self.input_values[self.binary_outcome] >= self.class_thr)/self.n_true - self.specificity = 1 - np.sum(self.input_values[~self.binary_outcome] >= self.class_thr)/self.n_false - self.ppv = np.sum(self.true_positive)/(np.sum(self.true_positive) + np.sum(self.false_positive)) + self.sensitivity = ( + np.sum(self.input_values[self.binary_outcome] >= self.class_thr) + / self.n_true + ) + self.specificity = ( + 1 + - np.sum(self.input_values[~self.binary_outcome] >= self.class_thr) + / self.n_false + ) + self.ppv = np.sum(self.true_positive) / ( + np.sum(self.true_positive) + np.sum(self.false_positive) + ) if self.forced_choice is not None: self.true_positive = self.true_positive[self.binary_outcome] self.true_negative = self.true_negative[~self.binary_outcome] @@ -161,17 +227,21 @@ def calculate(self, input_values=None, binary_outcome=None, # Calculate Accuracy if balanced_acc: - self.accuracy = np.mean([self.sensitivity, self.specificity]) # See Brodersen, Ong, Stephan, Buhmann (2010) + self.accuracy = np.mean( + [self.sensitivity, self.specificity] + ) # See Brodersen, Ong, Stephan, Buhmann (2010) else: self.accuracy = 1 - np.mean(self.misclass) # Calculate p-Value using binomial test (can add hierarchical version of binomial test) self.n = len(self.misclass) - self.accuracy_p = binom_test(int(np.sum(~self.misclass)), self.n, p=.5) - self.accuracy_se = np.sqrt(np.mean(~self.misclass) * (np.mean(~self.misclass)) / self.n) + self.accuracy_p = binom_test(int(np.sum(~self.misclass)), self.n, p=0.5) + self.accuracy_se = np.sqrt( + np.mean(~self.misclass) * (np.mean(~self.misclass)) / self.n + ) - def plot(self, plot_method='gaussian', balanced_acc=False, **kwargs): - """ Create ROC Plot + def plot(self, plot_method="gaussian", balanced_acc=False, **kwargs): + """Create ROC Plot Create a specific kind of ROC curve plot, based on input values along a continuous distribution and a binary outcome variable (logical) @@ -189,12 +259,19 @@ def plot(self, plot_method='gaussian', balanced_acc=False, **kwargs): self.calculate(balanced_acc=balanced_acc) # Calculate ROC parameters - if plot_method == 'gaussian': + if plot_method == "gaussian": if self.forced_choice is not None: sub_idx = np.unique(self.forced_choice) diff_scores = [] for sub in sub_idx: - diff_scores.append(self.input_values[(self.forced_choice == sub) & (self.binary_outcome)][0] - self.input_values[(self.forced_choice == sub) & (~self.binary_outcome)][0]) + diff_scores.append( + self.input_values[ + (self.forced_choice == sub) & (self.binary_outcome) + ][0] + - self.input_values[ + (self.forced_choice == sub) & (~self.binary_outcome) + ][0] + ) diff_scores = np.array(diff_scores) mn_diff = np.mean(diff_scores) d = mn_diff / np.std(diff_scores) @@ -207,7 +284,7 @@ def plot(self, plot_method='gaussian', balanced_acc=False, **kwargs): self.ppv = self.sensitivity / (self.sensitivity + 1 - self.specificity) self.auc = norm.cdf(d_a_model / np.sqrt(2)) - x = np.arange(-3, 3, .1) + x = np.arange(-3, 3, 0.1) self.tpr_smooth = 1 - norm.cdf(x, d, 1) self.fpr_smooth = 1 - norm.cdf(x, -d, 1) else: @@ -215,19 +292,21 @@ def plot(self, plot_method='gaussian', balanced_acc=False, **kwargs): mn_false = np.mean(self.input_values[~self.binary_outcome]) var_true = np.var(self.input_values[self.binary_outcome]) var_false = np.var(self.input_values[~self.binary_outcome]) - pooled_sd = np.sqrt((var_true*(self.n_true - 1))/(self.n_true + self.n_false - 2)) - d = (mn_true - mn_false)/pooled_sd - z_true = mn_true/pooled_sd - z_false = mn_false/pooled_sd - - x = np.arange(z_false-3, z_true+3, .1) + pooled_sd = np.sqrt( + (var_true * (self.n_true - 1)) / (self.n_true + self.n_false - 2) + ) + d = (mn_true - mn_false) / pooled_sd + z_true = mn_true / pooled_sd + z_false = mn_false / pooled_sd + + x = np.arange(z_false - 3, z_true + 3, 0.1) self.tpr_smooth = 1 - (norm.cdf(x, z_true, 1)) self.fpr_smooth = 1 - (norm.cdf(x, z_false, 1)) self.aucn = auc(self.fpr_smooth, self.tpr_smooth) fig = roc_plot(self.fpr_smooth, self.tpr_smooth) - elif plot_method == 'observed': + elif plot_method == "observed": fig = roc_plot(self.fpr, self.tpr) else: raise ValueError("plot_method must be 'gaussian' or 'observed'") diff --git a/nltools/cross_validation.py b/nltools/cross_validation.py index b7197988..5952498d 100644 --- a/nltools/cross_validation.py +++ b/nltools/cross_validation.py @@ -1,16 +1,15 @@ from __future__ import division -''' +""" Cross-Validation Data Classes ============================= Scikit-learn compatible classes for performing various types of cross-validation -''' +""" -__all__ = ['KFoldStratified', - 'set_cv'] +__all__ = ["KFoldStratified", "set_cv"] __author__ = ["Luke Chang"] __license__ = "MIT" @@ -47,7 +46,7 @@ def __init__(self, n_splits=3, shuffle=False, random_state=None): def _make_test_folds(self, X, y=None, groups=None): y = pd.DataFrame(y) y_sort = y.sort_values(0) - test_folds = np.nan*np.ones(len(y_sort)) + test_folds = np.nan * np.ones(len(y_sort)) for k in range(self.n_splits): test_idx = y_sort.index[np.arange(k, len(y_sort), self.n_splits)] test_folds[y_sort.iloc[test_idx].index] = k @@ -83,7 +82,7 @@ def split(self, X, y, groups=None): def set_cv(Y=None, cv_dict=None, return_generator=True): - """ Helper function to create a sci-kit learn compatible cv object using + """Helper function to create a sci-kit learn compatible cv object using common parameters for prediction analyses. Args: @@ -97,35 +96,43 @@ def set_cv(Y=None, cv_dict=None, return_generator=True): Returns: cv: a scikit-learn model-selection generator - """ + """ if isinstance(cv_dict, dict): - if cv_dict['type'] == 'kfolds': - if 'subject_id' in cv_dict: # Hold out subjects within each fold + if cv_dict["type"] == "kfolds": + if "subject_id" in cv_dict: # Hold out subjects within each fold from sklearn.model_selection import GroupKFold - cv_inst = GroupKFold(n_splits=cv_dict['n_folds']) - cv = cv_inst.split(X=np.zeros(len(Y)), y=Y, groups=cv_dict['subject_id']) - elif 'stratified' in cv_dict: # Stratified K-Folds Continuous + + cv_inst = GroupKFold(n_splits=cv_dict["n_folds"]) + cv = cv_inst.split( + X=np.zeros(len(Y)), y=Y, groups=cv_dict["subject_id"] + ) + elif "stratified" in cv_dict: # Stratified K-Folds Continuous from nltools.cross_validation import KFoldStratified - cv_inst = KFoldStratified(n_splits=cv_dict['n_folds']) + + cv_inst = KFoldStratified(n_splits=cv_dict["n_folds"]) cv = cv_inst.split(X=np.zeros(len(Y)), y=Y) else: # Normal K-Folds from sklearn.model_selection import KFold - cv_inst = KFold(n_splits=cv_dict['n_folds']) + + cv_inst = KFold(n_splits=cv_dict["n_folds"]) cv = cv_inst.split(X=np.zeros(len(Y)), y=Y) - elif cv_dict['type'] == 'loso': # Leave One Subject Out + elif cv_dict["type"] == "loso": # Leave One Subject Out from sklearn.model_selection import LeaveOneGroupOut + cv_inst = LeaveOneGroupOut() - cv = cv_inst.split(X=np.zeros(len(Y)), y=Y, groups=cv_dict['subject_id']) + cv = cv_inst.split(X=np.zeros(len(Y)), y=Y, groups=cv_dict["subject_id"]) else: - raise ValueError("""Make sure you specify a dictionary of + raise ValueError( + """Make sure you specify a dictionary of {'type': 'kfolds', 'n_folds': n}, {'type': 'kfolds', 'n_folds': n, 'stratified': Y}, {'type': 'kfolds', 'n_folds': n, 'subject_id': holdout}, or {'type': 'loso', 'subject_id': holdout}, where n = number of folds, and subject = vector of subject ids that - corresponds to self.Y""") + corresponds to self.Y""" + ) else: raise ValueError("Make sure 'cv_dict' is a dictionary.") if return_generator: diff --git a/nltools/data/__init__.py b/nltools/data/__init__.py index cd9de10c..c77f22b5 100644 --- a/nltools/data/__init__.py +++ b/nltools/data/__init__.py @@ -6,8 +6,10 @@ from .adjacency import Adjacency from .design_matrix import Design_Matrix, Design_Matrix_Series -__all__ = ['Brain_Data', - 'Adjacency', - 'Groupby', - 'Design_Matrix', - 'Design_Matrix_Series'] +__all__ = [ + "Brain_Data", + "Adjacency", + "Groupby", + "Design_Matrix", + "Design_Matrix_Series", +] diff --git a/nltools/data/adjacency.py b/nltools/data/adjacency.py index 310f3c9c..1f9c76f7 100644 --- a/nltools/data/adjacency.py +++ b/nltools/data/adjacency.py @@ -1,8 +1,8 @@ from __future__ import division -''' +""" This data class is for working with similarity/dissimilarity matrices -''' +""" __author__ = ["Luke Chang"] __license__ = "MIT" @@ -21,34 +21,37 @@ import scipy.stats as stats import seaborn as sns import matplotlib.pyplot as plt -from nltools.stats import (correlation_permutation, - one_sample_permutation, - two_sample_permutation, - summarize_bootstrap, - matrix_permutation, - fisher_r_to_z, - _calc_pvalue, - _bootstrap_isc) +from nltools.stats import ( + correlation_permutation, + one_sample_permutation, + two_sample_permutation, + summarize_bootstrap, + matrix_permutation, + fisher_r_to_z, + _calc_pvalue, + _bootstrap_isc, +) from nltools.stats import regress as regression -from nltools.plotting import (plot_stacked_adjacency, - plot_silhouette) -from nltools.utils import (all_same, - attempt_to_import, - concatenate, - _bootstrap_apply_func, - _df_meta_to_arr) +from nltools.plotting import plot_stacked_adjacency, plot_silhouette +from nltools.utils import ( + all_same, + attempt_to_import, + concatenate, + _bootstrap_apply_func, + _df_meta_to_arr, +) from .design_matrix import Design_Matrix from joblib import Parallel, delayed # Optional dependencies -nx = attempt_to_import('networkx', 'nx') +nx = attempt_to_import("networkx", "nx") MAX_INT = np.iinfo(np.int32).max class Adjacency(object): - ''' + """ Adjacency is a class to represent Adjacency matrices as a vector rather than a 2-dimensional matrix. This makes it easier to perform data manipulation and analyses. @@ -61,69 +64,94 @@ class Adjacency(object): Y: Pandas DataFrame of training labels **kwargs: Additional keyword arguments - ''' + """ def __init__(self, data=None, Y=None, matrix_type=None, labels=[], **kwargs): if matrix_type is not None and matrix_type.lower() not in [ - 'distance', - 'similarity', - 'directed', - 'distance_flat', - 'similarity_flat', - 'directed_flat', + "distance", + "similarity", + "directed", + "distance_flat", + "similarity_flat", + "directed_flat", ]: - raise ValueError("matrix_type must be [None,'distance', " - "'similarity','directed','distance_flat', " - "'similarity_flat','directed_flat']") + raise ValueError( + "matrix_type must be [None,'distance', " + "'similarity','directed','distance_flat', " + "'similarity_flat','directed_flat']" + ) if data is None: self.data = np.array([]) - self.matrix_type = 'empty' + self.matrix_type = "empty" self.is_single_matrix = np.nan self.issymmetric = np.nan elif isinstance(data, list): if isinstance(data[0], Adjacency): tmp = concatenate(data) - for item in ['data', 'matrix_type', 'Y', 'issymmetric']: + for item in ["data", "matrix_type", "Y", "issymmetric"]: setattr(self, item, getattr(tmp, item)) else: d_all = [] symmetric_all = [] matrix_type_all = [] for d in data: - data_tmp, issymmetric_tmp, matrix_type_tmp, _ = self._import_single_data(d, matrix_type=matrix_type) + ( + data_tmp, + issymmetric_tmp, + matrix_type_tmp, + _, + ) = self._import_single_data(d, matrix_type=matrix_type) d_all.append(data_tmp) symmetric_all.append(issymmetric_tmp) matrix_type_all.append(matrix_type_tmp) if not all_same(symmetric_all): - raise ValueError('Not all matrices are of the same ' - 'symmetric type.') + raise ValueError( + "Not all matrices are of the same " "symmetric type." + ) if not all_same(matrix_type_all): - raise ValueError('Not all matrices are of the same matrix ' - 'type.') + raise ValueError("Not all matrices are of the same matrix " "type.") self.data = np.array(d_all) self.issymmetric = symmetric_all[0] self.matrix_type = matrix_type_all[0] self.is_single_matrix = False - elif isinstance(data, six.string_types) and (('.h5' in data) or ('.hdf5' in data)): + elif isinstance(data, six.string_types) and ( + (".h5" in data) or (".hdf5" in data) + ): f = dd.io.load(data) - self.data = f['data'] - self.Y = pd.DataFrame(f['Y'], columns=[e.decode('utf-8') if isinstance(e, bytes) else e for e in f['Y_columns']], index=[e.decode('utf-8') if isinstance(e, bytes) else e for e in f['Y_index']]) - self.matrix_type = f['matrix_type'] - self.is_single_matrix = f['is_single_matrix'] - self.issymmetric = f['issymmetric'] - self.labels = [e.decode('utf-8') if isinstance(e, bytes) else e for e in f['labels']] + self.data = f["data"] + self.Y = pd.DataFrame( + f["Y"], + columns=[ + e.decode("utf-8") if isinstance(e, bytes) else e + for e in f["Y_columns"] + ], + index=[ + e.decode("utf-8") if isinstance(e, bytes) else e + for e in f["Y_index"] + ], + ) + self.matrix_type = f["matrix_type"] + self.is_single_matrix = f["is_single_matrix"] + self.issymmetric = f["issymmetric"] + self.labels = [ + e.decode("utf-8") if isinstance(e, bytes) else e for e in f["labels"] + ] return else: - self.data, self.issymmetric, self.matrix_type, self.is_single_matrix = self._import_single_data(data, matrix_type=matrix_type) + ( + self.data, + self.issymmetric, + self.matrix_type, + self.is_single_matrix, + ) = self._import_single_data(data, matrix_type=matrix_type) if Y is not None: if isinstance(Y, six.string_types) and os.path.isfile(Y): Y = pd.read_csv(Y, header=None, index_col=None) if isinstance(Y, pd.DataFrame): if self.data.shape[0] != len(Y): - raise ValueError("Y does not match the correct size of " - "data") + raise ValueError("Y does not match the correct size of " "data") self.Y = Y else: raise ValueError("Make sure Y is a pandas data frame.") @@ -135,34 +163,44 @@ def __init__(self, data=None, Y=None, matrix_type=None, labels=[], **kwargs): raise ValueError("Make sure labels is a list or numpy array.") if self.is_single_matrix: if len(labels) != self.square_shape()[0]: - raise ValueError('Make sure the length of labels matches the shape of data.') + raise ValueError( + "Make sure the length of labels matches the shape of data." + ) self.labels = deepcopy(labels) else: if len(labels) != len(self): if len(labels) != self.square_shape()[0]: - raise ValueError('Make sure length of labels either ' - 'matches the number of Adjacency ' - 'matrices or the size of a single ' - 'matrix.') + raise ValueError( + "Make sure length of labels either " + "matches the number of Adjacency " + "matrices or the size of a single " + "matrix." + ) else: self.labels = list(labels) * len(self) else: - if np.all(np.array([len(x) for x in labels]) != self.square_shape()[0]): - raise ValueError("All lists of labels must be same length as shape of data.") + if np.all( + np.array([len(x) for x in labels]) != self.square_shape()[0] + ): + raise ValueError( + "All lists of labels must be same length as shape of data." + ) self.labels = deepcopy(labels) else: self.labels = [] def __repr__(self): - return ("%s.%s(shape=%s, square_shape=%s, Y=%s, is_symmetric=%s," - "matrix_type=%s)") % ( - self.__class__.__module__, - self.__class__.__name__, - self.shape(), - self.square_shape(), - len(self.Y), - self.issymmetric, - self.matrix_type) + return ( + "%s.%s(shape=%s, square_shape=%s, Y=%s, is_symmetric=%s," "matrix_type=%s)" + ) % ( + self.__class__.__module__, + self.__class__.__name__, + self.shape(), + self.square_shape(), + len(self.Y), + self.issymmetric, + self.matrix_type, + ) def __getitem__(self, index): new = self.copy() @@ -192,11 +230,12 @@ def __add__(self, y): new.data = new.data + y elif isinstance(y, Adjacency): if self.shape() != y.shape(): - raise ValueError('Both Adjacency() instances need to be the ' - 'same shape.') + raise ValueError( + "Both Adjacency() instances need to be the " "same shape." + ) new.data = new.data + y.data else: - raise ValueError('Can only add int, float, or Adjacency') + raise ValueError("Can only add int, float, or Adjacency") return new def __radd__(self, y): @@ -205,11 +244,12 @@ def __radd__(self, y): new.data = y + new.data elif isinstance(y, Adjacency): if self.shape() != y.shape(): - raise ValueError('Both Adjacency() instances need to be the ' - 'same shape.') + raise ValueError( + "Both Adjacency() instances need to be the " "same shape." + ) new.data = y.data + new.data else: - raise ValueError('Can only add int, float, or Adjacency') + raise ValueError("Can only add int, float, or Adjacency") return new def __sub__(self, y): @@ -218,11 +258,12 @@ def __sub__(self, y): new.data = new.data - y elif isinstance(y, Adjacency): if self.shape() != y.shape(): - raise ValueError('Both Adjacency() instances need to be the ' - 'same shape.') + raise ValueError( + "Both Adjacency() instances need to be the " "same shape." + ) new.data = new.data - y.data else: - raise ValueError('Can only subtract int, float, or Adjacency') + raise ValueError("Can only subtract int, float, or Adjacency") return new def __rsub__(self, y): @@ -231,11 +272,12 @@ def __rsub__(self, y): new.data = y - new.data elif isinstance(y, Adjacency): if self.shape() != y.shape(): - raise ValueError('Both Adjacency() instances need to be the ' - 'same shape.') - new.data = y.data - new.data + raise ValueError( + "Both Adjacency() instances need to be the " "same shape." + ) + new.data = y.data - new.data else: - raise ValueError('Can only subtract int, float, or Adjacency') + raise ValueError("Can only subtract int, float, or Adjacency") return new def __mul__(self, y): @@ -244,11 +286,12 @@ def __mul__(self, y): new.data = new.data * y elif isinstance(y, Adjacency): if self.shape() != y.shape(): - raise ValueError('Both Adjacency() instances need to be the ' - 'same shape.') + raise ValueError( + "Both Adjacency() instances need to be the " "same shape." + ) new.data = np.multiply(new.data, y.data) else: - raise ValueError('Can only multiply int, float, or Adjacency') + raise ValueError("Can only multiply int, float, or Adjacency") return new def __rmul__(self, y): @@ -257,11 +300,12 @@ def __rmul__(self, y): new.data = y * new.data elif isinstance(y, Adjacency): if self.shape() != y.shape(): - raise ValueError('Both Adjacency() instances need to be the ' - 'same shape.') + raise ValueError( + "Both Adjacency() instances need to be the " "same shape." + ) new.data = np.multiply(y.data, new.data) else: - raise ValueError('Can only multiply int, float, or Adjacency') + raise ValueError("Can only multiply int, float, or Adjacency") return new def __truediv__(self, y): @@ -270,50 +314,50 @@ def __truediv__(self, y): new.data = new.data / y elif isinstance(y, Adjacency): if self.shape() != y.shape(): - raise ValueError('Both Adjacency() instances need to be the ' - 'same shape.') + raise ValueError( + "Both Adjacency() instances need to be the " "same shape." + ) new.data = np.divide(new.data, y.data) else: - raise ValueError('Can only divide int, float, or Adjacency') + raise ValueError("Can only divide int, float, or Adjacency") return new @staticmethod def _test_is_single_matrix(data): """Static method because it belongs to the class, ie is only invoked via self.test_single_matrix or Adjacency.test_single_matrix and requires no self argument.""" return len(data.shape) == 1 - + def _import_single_data(self, data, matrix_type=None): - ''' Helper function to import single data matrix.''' + """ Helper function to import single data matrix.""" if isinstance(data, six.string_types): if os.path.isfile(data): data = pd.read_csv(data) else: - raise ValueError('Make sure you have specified a valid file ' - 'path.') + raise ValueError("Make sure you have specified a valid file " "path.") if matrix_type is not None: - if matrix_type.lower() == 'distance_flat': - matrix_type = 'distance' + if matrix_type.lower() == "distance_flat": + matrix_type = "distance" data = np.array(data) issymmetric = True is_single_matrix = self._test_is_single_matrix(data) - elif matrix_type.lower() == 'similarity_flat': - matrix_type = 'similarity' + elif matrix_type.lower() == "similarity_flat": + matrix_type = "similarity" data = np.array(data) issymmetric = True is_single_matrix = self._test_is_single_matrix(data) - elif matrix_type.lower() == 'directed_flat': - matrix_type = 'directed' + elif matrix_type.lower() == "directed_flat": + matrix_type = "directed" data = np.array(data).flatten() issymmetric = False is_single_matrix = self._test_is_single_matrix(data) - elif matrix_type.lower() in ['distance', 'similarity', 'directed']: + elif matrix_type.lower() in ["distance", "similarity", "directed"]: if data.shape[0] != data.shape[1]: - raise ValueError('Data matrix must be square') + raise ValueError("Data matrix must be square") data = np.array(data) matrix_type = matrix_type.lower() - if matrix_type in ['distance', 'similarity']: + if matrix_type in ["distance", "similarity"]: issymmetric = True data = data[np.triu_indices(data.shape[0], k=1)] else: @@ -328,9 +372,11 @@ def _import_single_data(self, data, matrix_type=None): try: data = squareform(data) except ValueError: - print('Data is not flattened upper triangle from ' - 'similarity/distance matrix or flattened directed ' - 'matrix.') + print( + "Data is not flattened upper triangle from " + "similarity/distance matrix or flattened directed " + "matrix." + ) is_single_matrix = True elif data.shape[0] == data.shape[1]: # Square Matrix is_single_matrix = True @@ -339,13 +385,18 @@ def _import_single_data(self, data, matrix_type=None): try: data = squareform(data_all[0, :]) except ValueError: - print('Data is not flattened upper triangle from multiple ' - 'similarity/distance matrices or flattened directed ' - 'matrices.') + print( + "Data is not flattened upper triangle from multiple " + "similarity/distance matrices or flattened directed " + "matrices." + ) is_single_matrix = False # Test if matrix is symmetrical - if np.all(data[np.triu_indices(data.shape[0], k=1)] == data.T[np.triu_indices(data.shape[0], k=1)]): + if np.all( + data[np.triu_indices(data.shape[0], k=1)] + == data.T[np.triu_indices(data.shape[0], k=1)] + ): issymmetric = True else: issymmetric = False @@ -353,12 +404,12 @@ def _import_single_data(self, data, matrix_type=None): # Determine matrix type if issymmetric: if np.sum(np.diag(data)) == 0: - matrix_type = 'distance' + matrix_type = "distance" elif np.sum(np.diag(data)) == data.shape[0]: - matrix_type = 'similarity' + matrix_type = "similarity" data = data[np.triu_indices(data.shape[0], k=1)] else: - matrix_type = 'directed' + matrix_type = "directed" data = data.flatten() if not is_single_matrix: @@ -367,11 +418,11 @@ def _import_single_data(self, data, matrix_type=None): return (data, issymmetric, matrix_type, is_single_matrix) def isempty(self): - '''Check if Adjacency object is empty''' - return bool(self.matrix_type == 'empty') + """Check if Adjacency object is empty""" + return bool(self.matrix_type == "empty") def squareform(self): - '''Convert adjacency back to squareform''' + """Convert adjacency back to squareform""" if self.issymmetric: if self.is_single_matrix: return squareform(self.data) @@ -379,51 +430,66 @@ def squareform(self): return [squareform(x.data) for x in self] else: if self.is_single_matrix: - return self.data.reshape(int(np.sqrt(self.data.shape[0])), - int(np.sqrt(self.data.shape[0]))) + return self.data.reshape( + int(np.sqrt(self.data.shape[0])), int(np.sqrt(self.data.shape[0])) + ) else: - return [x.data.reshape(int(np.sqrt(x.data.shape[0])), - int(np.sqrt(x.data.shape[0]))) for x in self] + return [ + x.data.reshape( + int(np.sqrt(x.data.shape[0])), int(np.sqrt(x.data.shape[0])) + ) + for x in self + ] def plot(self, limit=3, axes=None, *args, **kwargs): - ''' Create Heatmap of Adjacency Matrix - - Can pass in any sns.heatmap argument + """Create Heatmap of Adjacency Matrix + + Can pass in any sns.heatmap argument - Args: - limit: (int) number of heatmaps to plot if object contains multiple adjacencies (default: 3) - axes: matplotlib axis handle - ''' + Args: + limit: (int) number of heatmaps to plot if object contains multiple adjacencies (default: 3) + axes: matplotlib axis handle + """ if self.is_single_matrix: if axes is None: _, axes = plt.subplots(nrows=1, figsize=(7, 5)) if self.labels: - sns.heatmap(self.squareform(), square=True, ax=axes, - xticklabels=self.labels, - yticklabels=self.labels, - *args, **kwargs) + sns.heatmap( + self.squareform(), + square=True, + ax=axes, + xticklabels=self.labels, + yticklabels=self.labels, + *args, + **kwargs, + ) else: - sns.heatmap(self.squareform(), square=True, ax=axes, - *args, **kwargs) + sns.heatmap(self.squareform(), square=True, ax=axes, *args, **kwargs) else: if axes is not None: print("axes is ignored when plotting multiple images") n_subs = np.minimum(len(self), limit) - _, a = plt.subplots(nrows=n_subs, figsize=(7, len(self)*5)) + _, a = plt.subplots(nrows=n_subs, figsize=(7, len(self) * 5)) for i in range(n_subs): if self.labels: - sns.heatmap(self[i].squareform(), square=True, - xticklabels=self.labels[i], - yticklabels=self.labels[i], - ax=a[i], *args, **kwargs) + sns.heatmap( + self[i].squareform(), + square=True, + xticklabels=self.labels[i], + yticklabels=self.labels[i], + ax=a[i], + *args, + **kwargs, + ) else: - sns.heatmap(self[i].squareform(), square=True, ax=a[i], - *args, **kwargs) + sns.heatmap( + self[i].squareform(), square=True, ax=a[i], *args, **kwargs + ) return def mean(self, axis=0): - ''' Calculate mean of Adjacency + """Calculate mean of Adjacency Args: axis: (int) calculate mean over features (0) or data (1). @@ -433,19 +499,21 @@ def mean(self, axis=0): mean: float if single, adjacency if axis=0, np.array if axis=1 and multiple - ''' + """ if self.is_single_matrix: return np.nanmean(self.data) else: if axis == 0: - return Adjacency(data=np.nanmean(self.data, axis=axis), - matrix_type=self.matrix_type + '_flat') + return Adjacency( + data=np.nanmean(self.data, axis=axis), + matrix_type=self.matrix_type + "_flat", + ) elif axis == 1: return np.nanmean(self.data, axis=axis) def std(self, axis=0): - ''' Calculate standard deviation of Adjacency + """Calculate standard deviation of Adjacency Args: axis: (int) calculate std over features (0) or data (1). @@ -455,19 +523,21 @@ def std(self, axis=0): std: float if single, adjacency if axis=0, np.array if axis=1 and multiple - ''' + """ if self.is_single_matrix: return np.nanstd(self.data) else: if axis == 0: - return Adjacency(data=np.nanstd(self.data, axis=axis), - matrix_type=self.matrix_type + '_flat') + return Adjacency( + data=np.nanstd(self.data, axis=axis), + matrix_type=self.matrix_type + "_flat", + ) elif axis == 1: return np.nanstd(self.data, axis=axis) def median(self, axis=0): - ''' Calculate median of Adjacency + """Calculate median of Adjacency Args: axis: (int) calculate median over features (0) or data (1). @@ -477,24 +547,26 @@ def median(self, axis=0): mean: float if single, adjacency if axis=0, np.array if axis=1 and multiple - ''' + """ if self.is_single_matrix: return np.nanmedian(self.data) else: if axis == 0: - return Adjacency(data=np.nanmedian(self.data, axis=axis), - matrix_type=self.matrix_type + '_flat') + return Adjacency( + data=np.nanmedian(self.data, axis=axis), + matrix_type=self.matrix_type + "_flat", + ) elif axis == 1: return np.nanmedian(self.data, axis=axis) def shape(self): - ''' Calculate shape of data. ''' + """ Calculate shape of data. """ return self.data.shape def square_shape(self): - ''' Calculate shape of squareform data. ''' - if self.matrix_type == 'empty': + """ Calculate shape of squareform data. """ + if self.matrix_type == "empty": return np.array([]) else: if self.is_single_matrix: @@ -503,11 +575,11 @@ def square_shape(self): return self[0].squareform().shape def copy(self): - ''' Create a copy of Adjacency object.''' + """ Create a copy of Adjacency object.""" return deepcopy(self) def append(self, data): - ''' Append data to Adjacency instance + """Append data to Adjacency instance Args: data: (Adjacency) Adjacency instance to append @@ -515,18 +587,17 @@ def append(self, data): Returns: out: (Adjacency) new appended Adjacency instance - ''' + """ if not isinstance(data, Adjacency): - raise ValueError('Make sure data is a Adjacency instance.') + raise ValueError("Make sure data is a Adjacency instance.") if self.isempty(): out = data.copy() else: out = self.copy() if self.square_shape() != data.square_shape(): - raise ValueError('Data is not the same shape as Adjacency ' - 'instance.') + raise ValueError("Data is not the same shape as Adjacency " "instance.") out.data = np.vstack([self.data, data.data]) out.is_single_matrix = False @@ -535,50 +606,66 @@ def append(self, data): return out - def write(self, file_name, method='long', **kwargs): - ''' Write out Adjacency object to csv file. + def write(self, file_name, method="long", **kwargs): + """Write out Adjacency object to csv file. - Args: - file_name (str): name of file name to write - method (str): method to write out data ['long','square'] - kwargs: optional arguments to deepdish.io.save + Args: + file_name (str): name of file name to write + method (str): method to write out data ['long','square'] + kwargs: optional arguments to deepdish.io.save - ''' - if method not in ['long', 'square']: + """ + if method not in ["long", "square"]: raise ValueError('Make sure method is ["long","square"].') - if ('.h5' in file_name) or ('.hdf5' in file_name): - if method == 'square': - raise NotImplementedError('Saving as hdf5 does not support method="square"') + if (".h5" in file_name) or (".hdf5" in file_name): + if method == "square": + raise NotImplementedError( + 'Saving as hdf5 does not support method="square"' + ) y_columns, y_index = _df_meta_to_arr(self.Y) - dd.io.save(file_name, { - 'data': self.data, - 'Y': self.Y.values, - 'Y_columns': y_columns, - 'Y_index': y_index, - 'matrix_type': self.matrix_type, - 'labels': np.array(self.labels, dtype='S'), - 'is_single_matrix': self.is_single_matrix, - 'issymmetric': self.issymmetric - }, compression=kwargs.get('compression', 'blosc')) + dd.io.save( + file_name, + { + "data": self.data, + "Y": self.Y.values, + "Y_columns": y_columns, + "Y_index": y_index, + "matrix_type": self.matrix_type, + "labels": np.array(self.labels, dtype="S"), + "is_single_matrix": self.is_single_matrix, + "issymmetric": self.issymmetric, + }, + compression=kwargs.get("compression", "blosc"), + ) else: - if method == 'long': + if method == "long": pd.DataFrame(self.data).to_csv(file_name, index=None) - elif self.is_single_matrix and method == 'square': + elif self.is_single_matrix and method == "square": pd.DataFrame(self.squareform()).to_csv(file_name, index=None) - elif not self.is_single_matrix and method == 'square': - raise NotImplementedError('Need to decide how we should write out multiple matrices. As separate files?') - - def similarity(self, data, plot=False, perm_type='2d', n_permute=5000, - metric='spearman', ignore_diagonal=False, **kwargs): - ''' Calculate similarity between two Adjacency matrices. + elif not self.is_single_matrix and method == "square": + raise NotImplementedError( + "Need to decide how we should write out multiple matrices. As separate files?" + ) + + def similarity( + self, + data, + plot=False, + perm_type="2d", + n_permute=5000, + metric="spearman", + ignore_diagonal=False, + **kwargs, + ): + """Calculate similarity between two Adjacency matrices. Default is to use spearman correlation and permutation test. Args: data: Adjacency data, or 1-d array same size as self.data perm_type: (str) '1d','2d', or None metric: (str) 'spearman','pearson','kendall' ignore_diagonal: (bool) only applies to 'directed' Adjacency types using perm_type=None or perm_type='1d' - ''' + """ data1 = self.copy() if not isinstance(data, Adjacency): data2 = Adjacency(data) @@ -588,24 +675,28 @@ def similarity(self, data, plot=False, perm_type='2d', n_permute=5000, if perm_type is None: n_permute = 0 similarity_func = correlation_permutation - elif perm_type == '1d': + elif perm_type == "1d": similarity_func = correlation_permutation - elif perm_type == '2d': + elif perm_type == "2d": similarity_func = matrix_permutation else: raise ValueError("perm_type must be ['1d','2d', or None']") - def _convert_data_similarity(data, perm_type=None, ignore_diagonal=ignore_diagonal): - '''Helper function to convert data correctly''' - if (perm_type is None) or (perm_type == '1d'): + def _convert_data_similarity( + data, perm_type=None, ignore_diagonal=ignore_diagonal + ): + """Helper function to convert data correctly""" + if (perm_type is None) or (perm_type == "1d"): if ignore_diagonal and (not data.issymmetric): d = data.squareform() data = d[~np.eye(d.shape[0]).astype(bool)] else: data = data.data - elif perm_type == '2d': + elif perm_type == "2d": if not data.issymmetric: - raise TypeError(f"data must be symmetric to do {perm_type} permutation") + raise TypeError( + f"data must be symmetric to do {perm_type} permutation" + ) else: data = data.squareform() else: @@ -615,25 +706,31 @@ def _convert_data_similarity(data, perm_type=None, ignore_diagonal=ignore_diagon if self.is_single_matrix: if plot: plot_stacked_adjacency(self, data) - return similarity_func(_convert_data_similarity(data1, - perm_type=perm_type), - _convert_data_similarity(data2, - perm_type=perm_type), - metric=metric, n_permute=n_permute, **kwargs) + return similarity_func( + _convert_data_similarity(data1, perm_type=perm_type), + _convert_data_similarity(data2, perm_type=perm_type), + metric=metric, + n_permute=n_permute, + **kwargs, + ) else: if plot: _, a = plt.subplots(len(self)) for i in a: plot_stacked_adjacency(self, data, ax=i) - return [similarity_func(_convert_data_similarity(x, - perm_type=perm_type), - _convert_data_similarity(data2, - perm_type=perm_type), - metric=metric, n_permute=n_permute, - **kwargs) for x in self] - - def distance(self, metric='correlation', **kwargs): - ''' Calculate distance between images within an Adjacency() instance. + return [ + similarity_func( + _convert_data_similarity(x, perm_type=perm_type), + _convert_data_similarity(data2, perm_type=perm_type), + metric=metric, + n_permute=n_permute, + **kwargs, + ) + for x in self + ] + + def distance(self, metric="correlation", **kwargs): + """Calculate distance between images within an Adjacency() instance. Args: metric: (str) type of distance metric (can use any scikit learn or @@ -642,20 +739,22 @@ def distance(self, metric='correlation', **kwargs): Returns: dist: (Adjacency) Outputs a 2D distance matrix. - ''' - return Adjacency(pairwise_distances(self.data, metric=metric, **kwargs), - matrix_type='distance') + """ + return Adjacency( + pairwise_distances(self.data, metric=metric, **kwargs), + matrix_type="distance", + ) def r_to_z(self): - ''' Apply Fisher's r to z transformation to each element of the data - object.''' + """Apply Fisher's r to z transformation to each element of the data + object.""" out = self.copy() out.data = fisher_r_to_z(out.data) return out def threshold(self, upper=None, lower=None, binarize=False): - '''Threshold Adjacency instance. Provide upper and lower values or + """Threshold Adjacency instance. Provide upper and lower values or percentages to perform two-sided thresholding. Binarize will return a mask image respecting thresholds if provided, otherwise respecting every non-zero value. @@ -674,12 +773,12 @@ def threshold(self, upper=None, lower=None, binarize=False): Returns: Adjacency: thresholded Adjacency instance - ''' + """ b = self.copy() - if isinstance(upper, six.string_types) and upper[-1] == '%': + if isinstance(upper, six.string_types) and upper[-1] == "%": upper = np.percentile(b.data, float(upper[:-1])) - if isinstance(lower, six.string_types) and lower[-1] == '%': + if isinstance(lower, six.string_types) and lower[-1] == "%": lower = np.percentile(b.data, float(lower[:-1])) if upper and lower: @@ -693,11 +792,11 @@ def threshold(self, upper=None, lower=None, binarize=False): return b def to_graph(self): - ''' Convert Adjacency into networkx graph. only works on - single_matrix for now.''' + """Convert Adjacency into networkx graph. only works on + single_matrix for now.""" if self.is_single_matrix: - if self.matrix_type == 'directed': + if self.matrix_type == "directed": G = nx.DiGraph(self.squareform()) else: G = nx.Graph(self.squareform()) @@ -706,11 +805,12 @@ def to_graph(self): nx.relabel_nodes(G, labels, copy=False) return G else: - raise NotImplementedError('This function currently only works on ' - 'single matrices.') + raise NotImplementedError( + "This function currently only works on " "single matrices." + ) def ttest(self, permutation=False, **kwargs): - ''' Calculate ttest across samples. + """Calculate ttest across samples. Args: permutation: (bool) Run ttest as permutation. Note this can be very slow. @@ -719,17 +819,17 @@ def ttest(self, permutation=False, **kwargs): out: (dict) contains Adjacency instances of t values (or mean if running permutation) and Adjacency instance of p values. - ''' + """ if self.is_single_matrix: - raise ValueError('t-test cannot be run on single matrices.') + raise ValueError("t-test cannot be run on single matrices.") if permutation: t = [] p = [] for i in range(self.data.shape[1]): stats = one_sample_permutation(self.data[:, i], **kwargs) - t.append(stats['mean']) - p.append(stats['p']) + t.append(stats["mean"]) + p.append(stats["p"]) t = Adjacency(np.array(t)) p = Adjacency(np.array(p)) else: @@ -737,22 +837,23 @@ def ttest(self, permutation=False, **kwargs): p = deepcopy(t) t.data, p.data = ttest_1samp(self.data, 0, 0) - return {'t': t, 'p': p} + return {"t": t, "p": p} def plot_label_distance(self, labels=None, ax=None): - ''' Create a violin plot indicating within and between label distance + """Create a violin plot indicating within and between label distance - Args: - labels (np.array): numpy array of labels to plot + Args: + labels (np.array): numpy array of labels to plot - Returns: - f: violin plot handles + Returns: + f: violin plot handles - ''' + """ if not self.is_single_matrix: - raise ValueError('This function only works on single adjacency ' - 'matrices.') + raise ValueError( + "This function only works on single adjacency " "matrices." + ) distance = pd.DataFrame(self.squareform()) @@ -760,41 +861,54 @@ def plot_label_distance(self, labels=None, ax=None): labels = np.array(deepcopy(self.labels)) else: if len(labels) != distance.shape[0]: - raise ValueError('Labels must be same length as distance matrix') + raise ValueError("Labels must be same length as distance matrix") - out = pd.DataFrame(columns=['Distance', 'Group', 'Type'], index=None) + out = pd.DataFrame(columns=["Distance", "Group", "Type"], index=None) for i in np.unique(labels): tmp_w = pd.DataFrame(columns=out.columns, index=None) - tmp_w['Distance'] = distance.loc[labels == i, labels == i].values[np.triu_indices(sum(labels == i), k=1)] - tmp_w['Type'] = 'Within' - tmp_w['Group'] = i + tmp_w["Distance"] = distance.loc[labels == i, labels == i].values[ + np.triu_indices(sum(labels == i), k=1) + ] + tmp_w["Type"] = "Within" + tmp_w["Group"] = i tmp_b = pd.DataFrame(columns=out.columns, index=None) - tmp_b['Distance'] = distance.loc[labels != i, labels != i].values[np.triu_indices(sum(labels == i), k=1)] - tmp_b['Type'] = 'Between' - tmp_b['Group'] = i + tmp_b["Distance"] = distance.loc[labels != i, labels != i].values[ + np.triu_indices(sum(labels == i), k=1) + ] + tmp_b["Type"] = "Between" + tmp_b["Group"] = i out = out.append(tmp_w).append(tmp_b) - f = sns.violinplot(x="Group", y="Distance", hue="Type", data=out, split=True, inner='quartile', - palette={"Within": "lightskyblue", "Between": "red"}, ax=ax) - f.set_ylabel('Average Distance') - f.set_title('Average Group Distance') + f = sns.violinplot( + x="Group", + y="Distance", + hue="Type", + data=out, + split=True, + inner="quartile", + palette={"Within": "lightskyblue", "Between": "red"}, + ax=ax, + ) + f.set_ylabel("Average Distance") + f.set_title("Average Group Distance") return def stats_label_distance(self, labels=None, n_permute=5000, n_jobs=-1): - ''' Calculate permutation tests on within and between label distance. + """Calculate permutation tests on within and between label distance. - Args: - labels (np.array): numpy array of labels to plot - n_permute (int): number of permutations to run (default=5000) + Args: + labels (np.array): numpy array of labels to plot + n_permute (int): number of permutations to run (default=5000) - Returns: - dict: dictionary of within and between group differences - and p-values + Returns: + dict: dictionary of within and between group differences + and p-values - ''' + """ if not self.is_single_matrix: - raise ValueError('This function only works on single adjacency ' - 'matrices.') + raise ValueError( + "This function only works on single adjacency " "matrices." + ) distance = pd.DataFrame(self.squareform()) @@ -802,46 +916,63 @@ def stats_label_distance(self, labels=None, n_permute=5000, n_jobs=-1): labels = deepcopy(self.labels) else: if len(labels) != distance.shape[0]: - raise ValueError('Labels must be same length as distance matrix') + raise ValueError("Labels must be same length as distance matrix") - out = pd.DataFrame(columns=['Distance', 'Group', 'Type'], index=None) + out = pd.DataFrame(columns=["Distance", "Group", "Type"], index=None) for i in np.unique(labels): tmp_w = pd.DataFrame(columns=out.columns, index=None) - tmp_w['Distance'] = distance.loc[labels == i, labels == i].values[np.triu_indices(sum(labels == i), k=1)] - tmp_w['Type'] = 'Within' - tmp_w['Group'] = i + tmp_w["Distance"] = distance.loc[labels == i, labels == i].values[ + np.triu_indices(sum(labels == i), k=1) + ] + tmp_w["Type"] = "Within" + tmp_w["Group"] = i tmp_b = pd.DataFrame(columns=out.columns, index=None) - tmp_b['Distance'] = distance.loc[labels == i, labels != i].values.flatten() - tmp_b['Type'] = 'Between' - tmp_b['Group'] = i + tmp_b["Distance"] = distance.loc[labels == i, labels != i].values.flatten() + tmp_b["Type"] = "Between" + tmp_b["Group"] = i out = out.append(tmp_w).append(tmp_b) stats = {} for i in np.unique(labels): # Within group test - tmp1 = out.loc[(out['Group'] == i) & (out['Type'] == 'Within'), 'Distance'] - tmp2 = out.loc[(out['Group'] == i) & (out['Type'] == 'Between'), 'Distance'] - stats[str(i)] = two_sample_permutation(tmp1, tmp2, - n_permute=n_permute, n_jobs=n_jobs) + tmp1 = out.loc[(out["Group"] == i) & (out["Type"] == "Within"), "Distance"] + tmp2 = out.loc[(out["Group"] == i) & (out["Type"] == "Between"), "Distance"] + stats[str(i)] = two_sample_permutation( + tmp1, tmp2, n_permute=n_permute, n_jobs=n_jobs + ) return stats - def plot_silhouette(self, labels=None, ax=None, permutation_test=True, - n_permute=5000, **kwargs): - '''Create a silhouette plot''' + def plot_silhouette( + self, labels=None, ax=None, permutation_test=True, n_permute=5000, **kwargs + ): + """Create a silhouette plot""" distance = pd.DataFrame(self.squareform()) if labels is None: labels = np.array(deepcopy(self.labels)) else: if len(labels) != distance.shape[0]: - raise ValueError('Labels must be same length as distance matrix') - - return plot_silhouette(distance, pd.Series(labels), ax=None, - permutation_test=True, - n_permute=5000, **kwargs) - - def bootstrap(self, function, n_samples=5000, save_weights=False, - n_jobs=-1, random_state=None, *args, **kwargs): - '''Bootstrap an Adjacency method. + raise ValueError("Labels must be same length as distance matrix") + + return plot_silhouette( + distance, + pd.Series(labels), + ax=None, + permutation_test=True, + n_permute=5000, + **kwargs, + ) + + def bootstrap( + self, + function, + n_samples=5000, + save_weights=False, + n_jobs=-1, + random_state=None, + *args, + **kwargs, + ): + """Bootstrap an Adjacency method. Example Useage: b = dat.bootstrap('mean', n_samples=5000) @@ -857,148 +988,206 @@ def bootstrap(self, function, n_samples=5000, save_weights=False, -1 means all CPUs.Returns: output: summarized studentized bootstrap output - ''' + """ random_state = check_random_state(random_state) seeds = random_state.randint(MAX_INT, size=n_samples) bootstrapped = Parallel(n_jobs=n_jobs)( - delayed(_bootstrap_apply_func)(self, - function, random_state=seeds[i], *args, **kwargs) - for i in range(n_samples)) + delayed(_bootstrap_apply_func)( + self, function, random_state=seeds[i], *args, **kwargs + ) + for i in range(n_samples) + ) bootstrapped = Adjacency(bootstrapped) return summarize_bootstrap(bootstrapped, save_weights=save_weights) - def isc(self, n_bootstraps=5000, metric='median', ci_percentile=95, exclude_self_corr=True, - return_bootstraps=False, tail=2, n_jobs=-1, random_state=None): - ''' Compute intersubject correlation. - - This implementation uses the subject-wise bootstrap method from Chen et al., 2016. - Instead of recomputing the pairwise ISC using circle_shift or phase_randomization methods, - this approach uses the computationally more efficient method of bootstrapping the subjects - and computing a new pairwise similarity matrix with randomly selected subjects with replacement. - If the same subject is selected multiple times, we set the perfect correlation to a nan with - (exclude_self_corr=True). As recommended by Chen et al., 2016, we compute the median pairwise ISC - by default. However, if the mean is preferred, we compute the mean correlation after performing - the fisher r-to-z transformation and then convert back to correlations to minimize artificially - inflating the correlation values. We compute the p-values using the percentile method using the same - method in Brainiak. - - Chen, G., Shin, Y. W., Taylor, P. A., Glen, D. R., Reynolds, R. C., Israel, R. B., - & Cox, R. W. (2016). Untangling the relatedness among correlations, part I: - nonparametric approaches to inter-subject correlation analysis at the group level. - NeuroImage, 142, 248-259. - - Hall, P., & Wilson, S. R. (1991). Two guidelines for bootstrap hypothesis testing. - Biometrics, 757-762. - - Args: - n_bootstraps: (int) number of bootstraps - metric: (str) type of association metric ['spearman','pearson','kendall'] - tail: (int) either 1 for one-tail or 2 for two-tailed test (default: 2) - n_jobs: (int) The number of CPUs to use to do the computation. -1 means all CPUs. - return_parms: (bool) Return the permutation distribution along with the p-value; default False - - Returns: - stats: (dict) dictionary of permutation results ['correlation','p'] - - ''' - + def isc( + self, + n_bootstraps=5000, + metric="median", + ci_percentile=95, + exclude_self_corr=True, + return_bootstraps=False, + tail=2, + n_jobs=-1, + random_state=None, + ): + """Compute intersubject correlation. + + This implementation uses the subject-wise bootstrap method from Chen et al., 2016. + Instead of recomputing the pairwise ISC using circle_shift or phase_randomization methods, + this approach uses the computationally more efficient method of bootstrapping the subjects + and computing a new pairwise similarity matrix with randomly selected subjects with replacement. + If the same subject is selected multiple times, we set the perfect correlation to a nan with + (exclude_self_corr=True). As recommended by Chen et al., 2016, we compute the median pairwise ISC + by default. However, if the mean is preferred, we compute the mean correlation after performing + the fisher r-to-z transformation and then convert back to correlations to minimize artificially + inflating the correlation values. We compute the p-values using the percentile method using the same + method in Brainiak. + + Chen, G., Shin, Y. W., Taylor, P. A., Glen, D. R., Reynolds, R. C., Israel, R. B., + & Cox, R. W. (2016). Untangling the relatedness among correlations, part I: + nonparametric approaches to inter-subject correlation analysis at the group level. + NeuroImage, 142, 248-259. + + Hall, P., & Wilson, S. R. (1991). Two guidelines for bootstrap hypothesis testing. + Biometrics, 757-762. + + Args: + n_bootstraps: (int) number of bootstraps + metric: (str) type of association metric ['spearman','pearson','kendall'] + tail: (int) either 1 for one-tail or 2 for two-tailed test (default: 2) + n_jobs: (int) The number of CPUs to use to do the computation. -1 means all CPUs. + return_parms: (bool) Return the permutation distribution along with the p-value; default False + + Returns: + stats: (dict) dictionary of permutation results ['correlation','p'] + + """ + random_state = check_random_state(random_state) - if metric not in ['mean', 'median']: + if metric not in ["mean", "median"]: raise ValueError("metric must be ['mean', 'median']") if not self.is_single_matrix: - raise NotImplementedError('Currently we can only compute ISC values on single Adjacency matrices') - - if metric =='mean': + raise NotImplementedError( + "Currently we can only compute ISC values on single Adjacency matrices" + ) + + if metric == "mean": isc = np.tanh(self.r_to_z().mean()) - elif metric =='median': + elif metric == "median": isc = self.median() - stats = {'isc': isc} - - all_bootstraps = Parallel(n_jobs=n_jobs)(delayed(_bootstrap_isc)( - self, metric=metric, exclude_self_corr=exclude_self_corr, - random_state=random_state) for i in range(n_bootstraps)) - - stats['p'] = _calc_pvalue(all_bootstraps - stats['isc'], stats['isc'], tail) - - stats['ci'] = (np.percentile(np.array(all_bootstraps), (100 - ci_percentile)/2, axis=0), - np.percentile(np.array(all_bootstraps), ci_percentile + (100 - ci_percentile)/2, axis=0)) - + stats = {"isc": isc} + + all_bootstraps = Parallel(n_jobs=n_jobs)( + delayed(_bootstrap_isc)( + self, + metric=metric, + exclude_self_corr=exclude_self_corr, + random_state=random_state, + ) + for i in range(n_bootstraps) + ) + + stats["p"] = _calc_pvalue(all_bootstraps - stats["isc"], stats["isc"], tail) + + stats["ci"] = ( + np.percentile(np.array(all_bootstraps), (100 - ci_percentile) / 2, axis=0), + np.percentile( + np.array(all_bootstraps), + ci_percentile + (100 - ci_percentile) / 2, + axis=0, + ), + ) + if return_bootstraps: - stats['null_distribution'] = all_bootstraps - + stats["null_distribution"] = all_bootstraps + return stats - def plot_mds(self, n_components=2, metric=True, labels=None, labels_color=None, - cmap=plt.cm.hot_r, n_jobs=-1, view=(30, 20), - figsize=[12, 8], ax=None, *args, **kwargs): - ''' Plot Multidimensional Scaling + def plot_mds( + self, + n_components=2, + metric=True, + labels=None, + labels_color=None, + cmap=plt.cm.hot_r, + n_jobs=-1, + view=(30, 20), + figsize=[12, 8], + ax=None, + *args, + **kwargs, + ): + """Plot Multidimensional Scaling - Args: - n_components: (int) Number of dimensions to project (can be 2 or 3) - metric: (bool) Perform metric or non-metric dimensional scaling; default - labels: (list) Can override labels stored in Adjacency Class - labels_color: (str) list of colors for labels, if len(1) then make all same color - n_jobs: (int) Number of parallel jobs - view: (tuple) view for 3-Dimensional plot; default (30,20) + Args: + n_components: (int) Number of dimensions to project (can be 2 or 3) + metric: (bool) Perform metric or non-metric dimensional scaling; default + labels: (list) Can override labels stored in Adjacency Class + labels_color: (str) list of colors for labels, if len(1) then make all same color + n_jobs: (int) Number of parallel jobs + view: (tuple) view for 3-Dimensional plot; default (30,20) - ''' + """ - if self.matrix_type != 'distance': + if self.matrix_type != "distance": raise ValueError("MDS only works on distance matrices.") if not self.is_single_matrix: raise ValueError("MDS only works on single matrices.") if n_components not in [2, 3]: - raise ValueError('Cannot plot {0}-d image'.format(n_components)) + raise ValueError("Cannot plot {0}-d image".format(n_components)) if labels is not None: if len(labels) != self.square_shape()[0]: - raise ValueError("Make sure labels matches the same shape as Adjaency data") + raise ValueError( + "Make sure labels matches the same shape as Adjaency data" + ) else: labels = self.labels if labels_color is not None: if len(labels) == 0: - raise ValueError("Make sure that Adjacency object has labels specified.") + raise ValueError( + "Make sure that Adjacency object has labels specified." + ) if len(labels) != len(labels_color): raise ValueError("Length of labels_color must match self.labels.") # Run MDS - mds = MDS(n_components=n_components, metric=metric, n_jobs=n_jobs, - dissimilarity="precomputed", *args, **kwargs) + mds = MDS( + n_components=n_components, + metric=metric, + n_jobs=n_jobs, + dissimilarity="precomputed", + *args, + **kwargs, + ) proj = mds.fit_transform(self.squareform()) # Create Plot if ax is None: # Create axis fig = plt.figure(figsize=figsize) if n_components == 3: - ax = fig.add_subplot(111, projection='3d') + ax = fig.add_subplot(111, projection="3d") ax.view_init(*view) elif n_components == 2: ax = fig.add_subplot(111) # Plot dots if n_components == 3: - ax.scatter(proj[:, 0], proj[:, 1], proj[:, 2], s=1, c='k') + ax.scatter(proj[:, 0], proj[:, 1], proj[:, 2], s=1, c="k") elif n_components == 2: - ax.scatter(proj[:, 0], proj[:, 1], s=1, c='k') + ax.scatter(proj[:, 0], proj[:, 1], s=1, c="k") # Plot labels if labels_color is None: - labels_color = ['black'] * len(labels) + labels_color = ["black"] * len(labels) if n_components == 3: for ((x, y, z), label, color) in zip(proj, labels, labels_color): - ax.text(x, y, z, label, color='white', bbox=dict(facecolor=color, alpha=1, boxstyle="round,pad=0.3")) + ax.text( + x, + y, + z, + label, + color="white", + bbox=dict(facecolor=color, alpha=1, boxstyle="round,pad=0.3"), + ) else: for ((x, y), label, color) in zip(proj, labels, labels_color): - ax.text(x, y, label, color='white', # color, - bbox=dict(facecolor=color, alpha=1, boxstyle="round,pad=0.3")) + ax.text( + x, + y, + label, + color="white", # color, + bbox=dict(facecolor=color, alpha=1, boxstyle="round,pad=0.3"), + ) ax.xaxis.set_visible(False) ax.yaxis.set_visible(False) def distance_to_similarity(self, beta=1): - '''Convert distance matrix to similarity matrix + """Convert distance matrix to similarity matrix Args: beta: (float) parameter to scale exponential function (default: 1) @@ -1006,79 +1195,93 @@ def distance_to_similarity(self, beta=1): Returns: out: (Adjacency) Adjacency object - ''' - if self.matrix_type == 'distance': - return Adjacency(np.exp(-beta*self.squareform()/self.squareform().std()), - labels=self.labels, matrix_type='similarity') + """ + if self.matrix_type == "distance": + return Adjacency( + np.exp(-beta * self.squareform() / self.squareform().std()), + labels=self.labels, + matrix_type="similarity", + ) else: - raise ValueError('Matrix is not a distance matrix.') + raise ValueError("Matrix is not a distance matrix.") def similarity_to_distance(self): - '''Convert similarity matrix to distance matrix''' - if self.matrix_type == 'similarity': - return Adjacency(1-self.squareform(), - labels=self.labels, matrix_type='distance') + """Convert similarity matrix to distance matrix""" + if self.matrix_type == "similarity": + return Adjacency( + 1 - self.squareform(), labels=self.labels, matrix_type="distance" + ) else: - raise ValueError('Matrix is not a similarity matrix.') + raise ValueError("Matrix is not a similarity matrix.") def within_cluster_mean(self, clusters=None): - ''' This function calculates mean within cluster labels + """This function calculates mean within cluster labels Args: clusters: (list) list of cluster labels Returns: dict: (dict) within cluster means - ''' + """ distance = pd.DataFrame(self.squareform()) clusters = np.array(clusters) if len(clusters) != distance.shape[0]: - raise ValueError('Cluster labels must be same length as distance matrix') + raise ValueError("Cluster labels must be same length as distance matrix") - out = pd.DataFrame(columns=['Mean', 'Label'], index=None) + out = pd.DataFrame(columns=["Mean", "Label"], index=None) out = {} for i in list(set(clusters)): - out[i] = np.mean(distance.loc[clusters == i, clusters == i].values[np.triu_indices(sum(clusters == i), k=1)]) + out[i] = np.mean( + distance.loc[clusters == i, clusters == i].values[ + np.triu_indices(sum(clusters == i), k=1) + ] + ) return out - def regress(self, X, mode='ols', **kwargs): - ''' Run a regression on an adjacency instance. - You can decompose an adjacency instance with another adjacency instance. - You can also decompose each pixel by passing a design_matrix instance. + def regress(self, X, mode="ols", **kwargs): + """Run a regression on an adjacency instance. + You can decompose an adjacency instance with another adjacency instance. + You can also decompose each pixel by passing a design_matrix instance. - Args: - X: Design matrix can be an Adjacency or Design_Matrix instance - method: type of regression (default: ols) + Args: + X: Design matrix can be an Adjacency or Design_Matrix instance + method: type of regression (default: ols) - Returns: - stats: (dict) dictionary of stats outputs. - ''' + Returns: + stats: (dict) dictionary of stats outputs. + """ stats = {} if isinstance(X, Adjacency): if X.square_shape()[0] != self.square_shape()[0]: - raise ValueError('Adjacency instances must be the same size.') + raise ValueError("Adjacency instances must be the same size.") b, t, p, _, res = regression(X.data.T, self.data, mode=mode, **kwargs) - stats['beta'], stats['t'], stats['p'], stats['residual'] = (b, t, p, res) + stats["beta"], stats["t"], stats["p"], stats["residual"] = (b, t, p, res) elif isinstance(X, Design_Matrix): if X.shape[0] != len(self): - raise ValueError('Design matrix must have same number of observations as Adjacency') + raise ValueError( + "Design matrix must have same number of observations as Adjacency" + ) b, t, p, df, res = regression(X, self.data, mode=mode, **kwargs) - mode = 'ols' - stats['beta'], stats['t'], stats['p'] = [x for x in self[:3]] - stats['beta'].data, stats['t'].data, stats['p'].data = b.squeeze(), t.squeeze(), p.squeeze() - stats['residual'] = self.copy() - stats['residual'].data = res - stats['df'] = df + mode = "ols" + stats["beta"], stats["t"], stats["p"] = [x for x in self[:3]] + stats["beta"].data, stats["t"].data, stats["p"].data = ( + b.squeeze(), + t.squeeze(), + p.squeeze(), + ) + stats["residual"] = self.copy() + stats["residual"].data = res + stats["df"] = df else: - raise ValueError('X must be a Design_Matrix or Adjacency Instance.') + raise ValueError("X must be a Design_Matrix or Adjacency Instance.") return stats def social_relations_model(self, summarize_results=True, nan_replace=True): - '''Estimate the social relations model from a matrix for a round-robin design. - + """Estimate the social relations model from a matrix for a round-robin design. + X_{ij} = m + \alpha_i + \beta_j + g_{ij} + \episolon_{ijl} where X_{ij} is the score for person i rating person j, m is the group mean, @@ -1108,185 +1311,239 @@ def social_relations_model(self, summarize_results=True, nan_replace=True): Returns: estimated effects: (pd.Series/pd.DataFrame) All of the effects estimated using SRM - ''' + """ - def mean_square_between(x1, x2=None, df='standard'): - '''Calculate between dyad variance''' + def mean_square_between(x1, x2=None, df="standard"): + """Calculate between dyad variance""" - if df == 'standard': + if df == "standard": n = len(x1) df = n - 1 - elif df == 'relationship': + elif df == "relationship": n = len(squareform(x1)) - df = ((n-1)*(n-2)/2) - 1 + df = ((n - 1) * (n - 2) / 2) - 1 else: raise ValueError("df can only be ['standard', 'relationship']") if x2 is not None: - return 2*np.nansum((((x1 + x2)/2) - np.nanmean((x1 + x2)/2))**2)/df + return ( + 2 + * np.nansum((((x1 + x2) / 2) - np.nanmean((x1 + x2) / 2)) ** 2) + / df + ) else: - return np.nansum((x1 - np.nanmean(x1))**2)/df + return np.nansum((x1 - np.nanmean(x1)) ** 2) / df - def mean_square_within(x1, x2, df='standard'): - '''Calculate within dyad variance''' + def mean_square_within(x1, x2, df="standard"): + """Calculate within dyad variance""" - if df == 'standard': + if df == "standard": n = len(x1) df = n - elif df == 'relationship': + elif df == "relationship": n = len(squareform(x1)) - df = (n-1)*(n-2)/2 + df = (n - 1) * (n - 2) / 2 else: raise ValueError("df can only be ['standard', 'relationship']") - return np.nansum((x1 - x2)**2)/(2*df) + return np.nansum((x1 - x2) ** 2) / (2 * df) def estimate_person_effect(n, x1_mean, x2_mean, grand_mean): - '''Calculate effect for actor, partner, and relationship''' - return ((n-1)**2/(n*(n-2)))*x1_mean + ((n-1)/(n*(n-2)))*x2_mean - ((n-1)/(n-2))*grand_mean + """Calculate effect for actor, partner, and relationship""" + return ( + ((n - 1) ** 2 / (n * (n - 2))) * x1_mean + + ((n - 1) / (n * (n - 2))) * x2_mean + - ((n - 1) / (n - 2)) * grand_mean + ) def estimate_person_variance(x, ms_b, ms_w): - '''Calculate variance of a specific dyad member (e.g., actor, partner)''' + """Calculate variance of a specific dyad member (e.g., actor, partner)""" n = len(x) - return mean_square_between(x) - (ms_b/(2*(n-2))) - (ms_w/(2*n)) + return mean_square_between(x) - (ms_b / (2 * (n - 2))) - (ms_w / (2 * n)) def estimate_srm(data): - '''Estimate Social Relations Model from a Single Matrix''' + """Estimate Social Relations Model from a Single Matrix""" if not data.is_single_matrix: - raise ValueError("This function only operates on single matrix Adjacency instances.") + raise ValueError( + "This function only operates on single matrix Adjacency instances." + ) n = data.square_shape()[0] if n < 4: - raise ValueError('The Social Relations Model cannote be estimated when sample size is less than 4.') + raise ValueError( + "The Social Relations Model cannote be estimated when sample size is less than 4." + ) grand_mean = data.mean() dat = data.squareform().copy() np.fill_diagonal(dat, np.nan) actor_mean = np.nanmean(dat, axis=1) partner_mean = np.nanmean(dat, axis=0) - a = estimate_person_effect(n, actor_mean, partner_mean, grand_mean) # Actor effects - b = estimate_person_effect(n, partner_mean, actor_mean, grand_mean) # Partner effects + a = estimate_person_effect( + n, actor_mean, partner_mean, grand_mean + ) # Actor effects + b = estimate_person_effect( + n, partner_mean, actor_mean, grand_mean + ) # Partner effects # Relationship effects - g = np.ones(dat.shape)*np.nan + g = np.ones(dat.shape) * np.nan for i in range(n): for j in range(n): if i != j: - g[i,j] = dat[i, j] - a[i] - b[j] - grand_mean + g[i, j] = dat[i, j] - a[i] - b[j] - grand_mean # Estimate Variance x1 = g[np.tril_indices(n, k=-1)] x2 = g[np.triu_indices(n, k=1)] - ms_b = mean_square_between(x1, x2, df='relationship') - ms_w = mean_square_within(x1, x2, df='relationship') + ms_b = mean_square_between(x1, x2, df="relationship") + ms_w = mean_square_within(x1, x2, df="relationship") actor_variance = estimate_person_variance(a, ms_b, ms_w) partner_variance = estimate_person_variance(b, ms_b, ms_w) - relationship_variance = (ms_b + ms_w)/2 - dyadic_reciprocity_covariance = (ms_b - ms_w)/2 - dyadic_reciprocity_correlation = (ms_b - ms_w)/(ms_b + ms_w) - actor_partner_covariance = (np.sum(a*b)/(n-1)) - (ms_b/(2*(n-2))) + (ms_w/(2*n)) - actor_partner_correlation = actor_partner_covariance/(np.sqrt(actor_variance*partner_variance)) - actor_reliability = actor_variance/(actor_variance + (relationship_variance/(n-1)) - (dyadic_reciprocity_covariance/((n-1)**2))) - partner_reliability = partner_variance/(partner_variance + (relationship_variance/(n-1)) - (dyadic_reciprocity_covariance/((n-1)**2))) - adjusted_dyadic_reciprocity_correlation = actor_partner_correlation*np.sqrt(actor_reliability*partner_reliability) + relationship_variance = (ms_b + ms_w) / 2 + dyadic_reciprocity_covariance = (ms_b - ms_w) / 2 + dyadic_reciprocity_correlation = (ms_b - ms_w) / (ms_b + ms_w) + actor_partner_covariance = ( + (np.sum(a * b) / (n - 1)) - (ms_b / (2 * (n - 2))) + (ms_w / (2 * n)) + ) + actor_partner_correlation = actor_partner_covariance / ( + np.sqrt(actor_variance * partner_variance) + ) + actor_reliability = actor_variance / ( + actor_variance + + (relationship_variance / (n - 1)) + - (dyadic_reciprocity_covariance / ((n - 1) ** 2)) + ) + partner_reliability = partner_variance / ( + partner_variance + + (relationship_variance / (n - 1)) + - (dyadic_reciprocity_covariance / ((n - 1) ** 2)) + ) + adjusted_dyadic_reciprocity_correlation = ( + actor_partner_correlation + * np.sqrt(actor_reliability * partner_reliability) + ) total_variance = actor_variance + partner_variance + relationship_variance - return pd.Series({'grand_mean':grand_mean, - 'actor_effect':a, - 'partner_effect':b, - 'relationship_effect':g, - 'actor_variance':actor_variance, - 'partner_variance':partner_variance, - 'relationship_variance':relationship_variance, - 'actor_partner_covariance':actor_partner_covariance, - 'actor_partner_correlation':actor_partner_correlation, - 'dyadic_reciprocity_covariance':dyadic_reciprocity_covariance, - 'dyadic_reciprocity_correlation':dyadic_reciprocity_correlation, - 'adjusted_dyadic_reciprocity_correlation':adjusted_dyadic_reciprocity_correlation, - 'actor_reliability':actor_reliability, - 'partner_reliability':partner_reliability, - 'total_variance':total_variance}) + return pd.Series( + { + "grand_mean": grand_mean, + "actor_effect": a, + "partner_effect": b, + "relationship_effect": g, + "actor_variance": actor_variance, + "partner_variance": partner_variance, + "relationship_variance": relationship_variance, + "actor_partner_covariance": actor_partner_covariance, + "actor_partner_correlation": actor_partner_correlation, + "dyadic_reciprocity_covariance": dyadic_reciprocity_covariance, + "dyadic_reciprocity_correlation": dyadic_reciprocity_correlation, + "adjusted_dyadic_reciprocity_correlation": adjusted_dyadic_reciprocity_correlation, + "actor_reliability": actor_reliability, + "partner_reliability": partner_reliability, + "total_variance": total_variance, + } + ) def summarize_srm_results(results): - '''Summarize results of SRM''' + """Summarize results of SRM""" def estimate_srm_stats(results, var_name, tailed=1): estimate = results[var_name].mean() - standardized = (results[var_name]/results['total_variance']).mean() - se = results[var_name].std()/np.sqrt(len(results[var_name])) - t = estimate/se + standardized = (results[var_name] / results["total_variance"]).mean() + se = results[var_name].std() / np.sqrt(len(results[var_name])) + t = estimate / se if tailed == 1: p = 1 - stats.t.cdf(t, len(results[var_name]) - 1) elif tailed == 2: - p = 2*(1 - stats.t.cdf(t, len(results[var_name]) - 1)) + p = 2 * (1 - stats.t.cdf(t, len(results[var_name]) - 1)) else: raise ValueError("tailed can only be [1,2]") return (estimate, standardized, se, t, p) def print_srm_stats(results, var_name, tailed=1): - estimate, standardized, se, t, p = estimate_srm_stats(results, var_name, tailed) - print(f"{var_name:<40} {estimate:^10.2f}{standardized:^10.2f} {se:^10.2f} {t:^10.2f} {p:^10.4f}") + estimate, standardized, se, t, p = estimate_srm_stats( + results, var_name, tailed + ) + print( + f"{var_name:<40} {estimate:^10.2f}{standardized:^10.2f} {se:^10.2f} {t:^10.2f} {p:^10.4f}" + ) def print_single_group_srm_stats(results, var_name): estimate = results[var_name].mean() - standardized = (results[var_name]/results['total_variance']).mean() - print(f"{var_name:<40} {estimate:^10.2f}{standardized:^10.2f} {np.nan:^10.2f} {np.nan:^10.2f} {np.nan:^10.4f}") + standardized = (results[var_name] / results["total_variance"]).mean() + print( + f"{var_name:<40} {estimate:^10.2f}{standardized:^10.2f} {np.nan:^10.2f} {np.nan:^10.2f} {np.nan:^10.4f}" + ) def print_srm_covariances(results, var_name): - estimate, _, se, t, p = estimate_srm_stats(results, f"{var_name}_covariance", tailed=2) + estimate, _, se, t, p = estimate_srm_stats( + results, f"{var_name}_covariance", tailed=2 + ) standardized = results[f"{var_name}_correlation"].mean() - print(f"{var_name:<40} {estimate:^10.2f}{standardized:^10.2f} {se:^10.2f} {t:^10.2f} {p:^10.4f}") + print( + f"{var_name:<40} {estimate:^10.2f}{standardized:^10.2f} {se:^10.2f} {t:^10.2f} {p:^10.4f}" + ) def print_single_srm_covariances(results, var_name): estimate = results[f"{var_name}_covariance"].mean() standardized = results[f"{var_name}_correlation"].mean() - print(f"{var_name:<40} {estimate:^10.2f}{standardized:^10.2f} {np.nan:^10.2f} {np.nan:^10.2f} {np.nan:^10.4f}") + print( + f"{var_name:<40} {estimate:^10.2f}{standardized:^10.2f} {np.nan:^10.2f} {np.nan:^10.2f} {np.nan:^10.4f}" + ) if isinstance(results, pd.Series): n_groups = 1 - group_size = results['actor_effect'].shape[0] + group_size = results["actor_effect"].shape[0] elif isinstance(results, pd.DataFrame): n_groups = len(results) - group_size = np.mean([x.shape for x in results['actor_effect']]) + group_size = np.mean([x.shape for x in results["actor_effect"]]) print("Social Relations Model: Results") print("\n") print(f"Number of Groups: {n_groups:<20}") print(f"Average Group Size: {group_size:<20}") print("\n") - print(f"{'':<40} {'Estimate':<10} {'Standardized':<10} {'se':<10} {'t':<10} {'p':<10}") + print( + f"{'':<40} {'Estimate':<10} {'Standardized':<10} {'se':<10} {'t':<10} {'p':<10}" + ) if isinstance(results, pd.Series): - print_single_group_srm_stats(results, 'actor_variance') - print_single_group_srm_stats(results, 'partner_variance') - print_single_group_srm_stats(results, 'relationship_variance') - print_single_srm_covariances(results, 'actor_partner') - print_single_srm_covariances(results, 'dyadic_reciprocity') + print_single_group_srm_stats(results, "actor_variance") + print_single_group_srm_stats(results, "partner_variance") + print_single_group_srm_stats(results, "relationship_variance") + print_single_srm_covariances(results, "actor_partner") + print_single_srm_covariances(results, "dyadic_reciprocity") elif isinstance(results, pd.DataFrame): - print_srm_stats(results, 'actor_variance') - print_srm_stats(results, 'partner_variance') - print_srm_stats(results, 'relationship_variance') - print_srm_covariances(results, 'actor_partner') - print_srm_covariances(results, 'dyadic_reciprocity') + print_srm_stats(results, "actor_variance") + print_srm_stats(results, "partner_variance") + print_srm_stats(results, "relationship_variance") + print_srm_covariances(results, "actor_partner") + print_srm_covariances(results, "dyadic_reciprocity") print("\n") - print(f"{'Actor Reliability':<20} {results['actor_reliability'].mean():^20.2f}") - print(f"{'Partner Reliability':<20} {results['partner_reliability'].mean():^20.2f}") + print( + f"{'Actor Reliability':<20} {results['actor_reliability'].mean():^20.2f}" + ) + print( + f"{'Partner Reliability':<20} {results['partner_reliability'].mean():^20.2f}" + ) print("\n") def replace_missing(data): - '''Replace missing data with row/column means and return new data and missing coordinates''' + """Replace missing data with row/column means and return new data and missing coordinates""" def fix_missing(data): X = data.squareform().copy() - x,y = np.where(np.isnan(X)) - for i,j in zip(x,y): + x, y = np.where(np.isnan(X)) + for i, j in zip(x, y): if i != j: - X[i,j] = (np.nanmean(X[i,:]) + np.nanmean(X[:,j]))/2 + X[i, j] = (np.nanmean(X[i, :]) + np.nanmean(X[:, j])) / 2 X = Adjacency(X, matrix_type=data.matrix_type) - return (X, (x,y)) + return (X, (x, y)) if data.is_single_matrix: X, coord = fix_missing(data) else: - X = []; coord = [] + X = [] + coord = [] for d in data: m, c = fix_missing(d) X.append(m) @@ -1308,4 +1565,3 @@ def fix_missing(data): summarize_srm_results(results) return results - diff --git a/nltools/data/brain_data.py b/nltools/data/brain_data.py index 8104c613..51bbbff9 100644 --- a/nltools/data/brain_data.py +++ b/nltools/data/brain_data.py @@ -1,12 +1,12 @@ from __future__ import division -''' +""" NeuroLearn Brain Data ===================== Classes to represent brain image data. -''' +""" # Notes: # Need to figure out how to speed up loading and resampling of data @@ -42,28 +42,32 @@ from nilearn.image import smooth_img, resample_to_img from nilearn.masking import intersect_masks from nilearn.regions import connected_regions, connected_label_regions -from nltools.utils import (set_algorithm, - attempt_to_import, - concatenate, - _bootstrap_apply_func, - set_decomposition_algorithm, - check_brain_data, - check_brain_data_is_single, - _roi_func, - get_mni_from_img_resolution, - _df_meta_to_arr) +from nltools.utils import ( + set_algorithm, + attempt_to_import, + concatenate, + _bootstrap_apply_func, + set_decomposition_algorithm, + check_brain_data, + check_brain_data_is_single, + _roi_func, + get_mni_from_img_resolution, + _df_meta_to_arr, +) from nltools.cross_validation import set_cv from nltools.plotting import scatterplot, plot_interactive_brain, plot_brain -from nltools.stats import (pearson, - fdr, - holm_bonf, - threshold, - fisher_r_to_z, - transform_pairwise, - summarize_bootstrap, - procrustes, - find_spikes, - regress_permutation) +from nltools.stats import ( + pearson, + fdr, + holm_bonf, + threshold, + fisher_r_to_z, + transform_pairwise, + summarize_bootstrap, + procrustes, + find_spikes, + regress_permutation, +) from nltools.stats import regress as regression from .adjacency import Adjacency from nltools.prefs import MNI_Template, resolve_mni_path @@ -72,8 +76,12 @@ # Optional dependencies -nx = attempt_to_import('networkx', 'nx') -mne_stats = attempt_to_import('mne.stats', name='mne_stats', fromlist=['spatio_temporal_cluster_1samp_test', 'ttest_1samp_no_p']) +nx = attempt_to_import("networkx", "nx") +mne_stats = attempt_to_import( + "mne.stats", + name="mne_stats", + fromlist=["spatio_temporal_cluster_1samp_test", "ttest_1samp_no_p"], +) MAX_INT = np.iinfo(np.int32).max @@ -95,38 +103,66 @@ class Brain_Data(object): """ - def __init__(self, data=None, Y=None, X=None, mask=None, output_file=None, - **kwargs): + def __init__( + self, data=None, Y=None, X=None, mask=None, output_file=None, **kwargs + ): if mask is not None: if not isinstance(mask, nib.Nifti1Image): if isinstance(mask, six.string_types): if os.path.isfile(mask): mask = nib.load(mask) else: - raise ValueError("mask is not a nibabel instance or a " - "valid file name") + raise ValueError( + "mask is not a nibabel instance or a " "valid file name" + ) self.mask = mask else: - self.mask = nib.load(resolve_mni_path(MNI_Template)['mask']) + self.mask = nib.load(resolve_mni_path(MNI_Template)["mask"]) self.nifti_masker = NiftiMasker(mask_img=self.mask) if data is not None: if isinstance(data, six.string_types): - if 'http://' in data or 'https://' in data: + if "http://" in data or "https://" in data: from nltools.datasets import download_nifti - tmp_dir = os.path.join(tempfile.gettempdir(), - str(os.times()[-1])) + + tmp_dir = os.path.join(tempfile.gettempdir(), str(os.times()[-1])) os.makedirs(tmp_dir) data = nib.load(download_nifti(data, data_dir=tmp_dir)) - elif ('.h5' in data) or ('.hdf5' in data): + elif (".h5" in data) or (".hdf5" in data): f = dd.io.load(data) - self.data = f['data'] - self.X = pd.DataFrame(f['X'], columns=[e.decode('utf-8') if isinstance(e, bytes) else e for e in f['X_columns']], index=[e.decode('utf-8') if isinstance(e, bytes) else e for e in f['X_index']]) - self.Y = pd.DataFrame(f['Y'], columns=[e.decode('utf-8') if isinstance(e, bytes) else e for e in f['Y_columns']], index=[e.decode('utf-8') if isinstance(e, bytes) else e for e in f['Y_index']]) - self.mask = nib.Nifti1Image(f['mask_data'], affine=f['mask_affine'], file_map={'image': nib.FileHolder(filename=f['mask_file_name'])}) + self.data = f["data"] + self.X = pd.DataFrame( + f["X"], + columns=[ + e.decode("utf-8") if isinstance(e, bytes) else e + for e in f["X_columns"] + ], + index=[ + e.decode("utf-8") if isinstance(e, bytes) else e + for e in f["X_index"] + ], + ) + self.Y = pd.DataFrame( + f["Y"], + columns=[ + e.decode("utf-8") if isinstance(e, bytes) else e + for e in f["Y_columns"] + ], + index=[ + e.decode("utf-8") if isinstance(e, bytes) else e + for e in f["Y_index"] + ], + ) + self.mask = nib.Nifti1Image( + f["mask_data"], + affine=f["mask_affine"], + file_map={ + "image": nib.FileHolder(filename=f["mask_file_name"]) + }, + ) nifti_masker = NiftiMasker(self.mask) self.nifti_masker = nifti_masker.fit(self.mask) - self.file_name = f['file_name'] + self.file_name = f["file_name"] return else: @@ -135,21 +171,23 @@ def __init__(self, data=None, Y=None, X=None, mask=None, output_file=None, elif isinstance(data, list): if isinstance(data[0], Brain_Data): tmp = concatenate(data) - for item in ['data', 'Y', 'X', 'mask', 'nifti_masker', - 'file_name']: + for item in ["data", "Y", "X", "mask", "nifti_masker", "file_name"]: setattr(self, item, getattr(tmp, item)) else: if all(isinstance(x, data[0].__class__) for x in data): self.data = [] for i in data: if isinstance(i, six.string_types): - self.data.append(self.nifti_masker.fit_transform( - nib.load(i))) + self.data.append( + self.nifti_masker.fit_transform(nib.load(i)) + ) elif isinstance(i, nib.Nifti1Image): self.data.append(self.nifti_masker.fit_transform(i)) self.data = np.concatenate(self.data) else: - raise ValueError('Make sure all objects in the list are the same type.') + raise ValueError( + "Make sure all objects in the list are the same type." + ) elif isinstance(data, nib.Nifti1Image): self.data = np.array(self.nifti_masker.fit_transform(data)) else: @@ -166,8 +204,7 @@ def __init__(self, data=None, Y=None, X=None, mask=None, output_file=None, Y = pd.read_csv(Y, header=None, index_col=None) if isinstance(Y, pd.DataFrame): if self.data.shape[0] != len(Y): - raise ValueError("Y does not match the correct size " - "of data") + raise ValueError("Y does not match the correct size " "of data") self.Y = Y else: raise ValueError("Make sure Y is a pandas data frame.") @@ -179,8 +216,7 @@ def __init__(self, data=None, Y=None, X=None, mask=None, output_file=None, X = pd.read_csv(X, header=None, index_col=None) if isinstance(X, pd.DataFrame): if self.data.shape[0] != X.shape[0]: - raise ValueError("X does not match the correct size " - "of data") + raise ValueError("X does not match the correct size " "of data") self.X = X else: raise ValueError("Make sure X is a pandas data frame.") @@ -190,14 +226,14 @@ def __init__(self, data=None, Y=None, X=None, mask=None, output_file=None, self.file_name = output_file if output_file is not None else [] def __repr__(self): - return '%s.%s(data=%s, Y=%s, X=%s, mask=%s, output_file=%s)' % ( + return "%s.%s(data=%s, Y=%s, X=%s, mask=%s, output_file=%s)" % ( self.__class__.__module__, self.__class__.__name__, self.shape(), len(self.Y), self.X.shape, os.path.basename(self.mask.get_filename()), - self.file_name + self.file_name, ) def __getitem__(self, index): @@ -222,15 +258,16 @@ def __getitem__(self, index): def __setitem__(self, index, value): if not isinstance(value, Brain_Data): - raise ValueError("Make sure the value you are trying to set is a " - "Brain_Data() instance.") + raise ValueError( + "Make sure the value you are trying to set is a " + "Brain_Data() instance." + ) self.data[index, :] = value.data if not value.Y.empty: self.Y.values[index] = value.Y if not value.X.empty: if self.X.shape[1] != value.X.shape[1]: - raise ValueError("Make sure self.X is the same size as " - "value.X.") + raise ValueError("Make sure self.X is the same size as " "value.X.") self.X.values[index] = value.X def __len__(self): @@ -242,11 +279,12 @@ def __add__(self, y): new.data = new.data + y elif isinstance(y, Brain_Data): if self.shape() != y.shape(): - raise ValueError("Both Brain_Data() instances need to be the " - "same shape.") + raise ValueError( + "Both Brain_Data() instances need to be the " "same shape." + ) new.data = new.data + y.data else: - raise ValueError('Can only add int, float, or Brain_Data') + raise ValueError("Can only add int, float, or Brain_Data") return new def __radd__(self, y): @@ -255,11 +293,12 @@ def __radd__(self, y): new.data = y + new.data elif isinstance(y, Brain_Data): if self.shape() != y.shape(): - raise ValueError("Both Brain_Data() instances need to be the " - "same shape.") + raise ValueError( + "Both Brain_Data() instances need to be the " "same shape." + ) new.data = y.data + new.data else: - raise ValueError('Can only add int, float, or Brain_Data') + raise ValueError("Can only add int, float, or Brain_Data") return new def __sub__(self, y): @@ -268,11 +307,12 @@ def __sub__(self, y): new.data = new.data - y elif isinstance(y, Brain_Data): if self.shape() != y.shape(): - raise ValueError('Both Brain_Data() instances need to be the ' - 'same shape.') + raise ValueError( + "Both Brain_Data() instances need to be the " "same shape." + ) new.data = new.data - y.data else: - raise ValueError('Can only add int, float, or Brain_Data') + raise ValueError("Can only add int, float, or Brain_Data") return new def __rsub__(self, y): @@ -281,11 +321,12 @@ def __rsub__(self, y): new.data = y - new.data elif isinstance(y, Brain_Data): if self.shape() != y.shape(): - raise ValueError('Both Brain_Data() instances need to be the ' - 'same shape.') + raise ValueError( + "Both Brain_Data() instances need to be the " "same shape." + ) new.data = y.data - new.data else: - raise ValueError('Can only add int, float, or Brain_Data') + raise ValueError("Can only add int, float, or Brain_Data") return new def __mul__(self, y): @@ -294,18 +335,21 @@ def __mul__(self, y): new.data = new.data * y elif isinstance(y, Brain_Data): if self.shape() != y.shape(): - raise ValueError("Both Brain_Data() instances need to be the " - "same shape.") + raise ValueError( + "Both Brain_Data() instances need to be the " "same shape." + ) new.data = np.multiply(new.data, y.data) elif isinstance(y, (list, np.ndarray)): if len(y) != len(self): - raise ValueError('Vector multiplication requires that the ' - 'length of the vector match the number of ' - 'images in Brain_Data instance.') + raise ValueError( + "Vector multiplication requires that the " + "length of the vector match the number of " + "images in Brain_Data instance." + ) else: new.data = np.dot(new.data.T, y).T else: - raise ValueError('Can only multiply int, float, list, or Brain_Data') + raise ValueError("Can only multiply int, float, list, or Brain_Data") return new def __rmul__(self, y): @@ -314,11 +358,12 @@ def __rmul__(self, y): new.data = y * new.data elif isinstance(y, Brain_Data): if self.shape() != y.shape(): - raise ValueError("Both Brain_Data() instances need to be the " - "same shape.") + raise ValueError( + "Both Brain_Data() instances need to be the " "same shape." + ) new.data = np.multiply(y.data, new.data) else: - raise ValueError('Can only multiply int, float, or Brain_Data') + raise ValueError("Can only multiply int, float, or Brain_Data") return new def __truediv__(self, y): @@ -327,11 +372,12 @@ def __truediv__(self, y): new.data = new.data / y elif isinstance(y, Brain_Data): if self.shape() != y.shape(): - raise ValueError("Both Brain_Data() instances need to be the " - "same shape.") + raise ValueError( + "Both Brain_Data() instances need to be the " "same shape." + ) new.data = np.divide(new.data, y.data) else: - raise ValueError('Can only divide int, float, list, or Brain_Data') + raise ValueError("Can only divide int, float, list, or Brain_Data") return new def __iter__(self): @@ -344,15 +390,15 @@ def shape(self): return self.data.shape def mean(self, axis=0): - ''' Get mean of each voxel or image - - Args: - axis: (int) across images=0 (default), within images=1 - - Returns: - out: (float/np.array/Brain_Data) + """Get mean of each voxel or image + + Args: + axis: (int) across images=0 (default), within images=1 + + Returns: + out: (float/np.array/Brain_Data) - ''' + """ out = deepcopy(self) if check_brain_data_is_single(self): @@ -365,19 +411,19 @@ def mean(self, axis=0): elif axis == 1: out = np.mean(self.data, axis=1) else: - raise ValueError('axis must be 0 or 1') + raise ValueError("axis must be 0 or 1") return out def median(self, axis=0): - ''' Get median of each voxel or image - - Args: - axis: (int) across images=0 (default), within images=1 - - Returns: - out: (float/np.array/Brain_Data) - - ''' + """Get median of each voxel or image + + Args: + axis: (int) across images=0 (default), within images=1 + + Returns: + out: (float/np.array/Brain_Data) + + """ out = deepcopy(self) if check_brain_data_is_single(self): @@ -390,18 +436,18 @@ def median(self, axis=0): elif axis == 1: out = np.median(self.data, axis=1) else: - raise ValueError('axis must be 0 or 1') + raise ValueError("axis must be 0 or 1") return out def std(self, axis=0): - ''' Get standard deviation of each voxel or image. - - Args: - axis: (int) across images=0 (default), within images=1 - - Returns: - out: (float/np.array/Brain_Data) - ''' + """Get standard deviation of each voxel or image. + + Args: + axis: (int) across images=0 (default), within images=1 + + Returns: + out: (float/np.array/Brain_Data) + """ out = deepcopy(self) if check_brain_data_is_single(self): @@ -414,7 +460,7 @@ def std(self, axis=0): elif axis == 1: out = np.std(self.data, axis=1) else: - raise ValueError('axis must be 0 or 1') + raise ValueError("axis must be 0 or 1") return out def sum(self): @@ -435,7 +481,7 @@ def to_nifti(self): return self.nifti_masker.inverse_transform(self.data) def write(self, file_name=None, **kwargs): - """ Write out Brain_Data object to Nifti or HDF5 File. + """Write out Brain_Data object to Nifti or HDF5 File. Args: file_name: (str) name of nifti file including path @@ -443,27 +489,31 @@ def write(self, file_name=None, **kwargs): """ - if ('.h5' in file_name) or ('.hdf5' in file_name): + if (".h5" in file_name) or (".hdf5" in file_name): x_columns, x_index = _df_meta_to_arr(self.X) y_columns, y_index = _df_meta_to_arr(self.Y) - dd.io.save(file_name, { - 'data': self.data, - 'X': self.X.values, - 'X_columns': x_columns, - 'X_index': x_index, - 'Y': self.Y.values, - 'Y_columns': y_columns, - 'Y_index': y_index, - 'mask_affine': self.mask.affine, - 'mask_data': self.mask.get_data(), - 'mask_file_name': self.mask.get_filename(), - 'file_name': self.file_name - }, compression=kwargs.get('compression', 'blosc')) + dd.io.save( + file_name, + { + "data": self.data, + "X": self.X.values, + "X_columns": x_columns, + "X_index": x_index, + "Y": self.Y.values, + "Y_columns": y_columns, + "Y_index": y_index, + "mask_affine": self.mask.affine, + "mask_data": self.mask.get_data(), + "mask_file_name": self.mask.get_filename(), + "file_name": self.file_name, + }, + compression=kwargs.get("compression", "blosc"), + ) else: self.to_nifti().to_filename(file_name) - def scale(self, scale_val=100.): - """ Scale all values such that they are on the range [0, scale_val], + def scale(self, scale_val=100.0): + """Scale all values such that they are on the range [0, scale_val], via grand-mean scaling. This is NOT global-scaling/intensity normalization. This is useful for ensuring that data is on a common scale (e.g. good for multiple runs, participants, etc) @@ -483,9 +533,21 @@ def scale(self, scale_val=100.): return out - def plot(self, limit=5, anatomical=None, view='axial', colorbar=False, black_bg=True, draw_cross=False, - threshold_upper=None, threshold_lower=None, figsize=(15, 2), axes=None, **kwargs): - """ Create a quick plot of self.data. Will plot each image separately + def plot( + self, + limit=5, + anatomical=None, + view="axial", + colorbar=False, + black_bg=True, + draw_cross=False, + threshold_upper=None, + threshold_lower=None, + figsize=(15, 2), + axes=None, + **kwargs + ): + """Create a quick plot of self.data. Will plot each image separately Args: limit: (int) max number of images to return @@ -503,7 +565,7 @@ def plot(self, limit=5, anatomical=None, view='axial', colorbar=False, black_bg= """ - if view == 'axial': + if view == "axial": if threshold_upper is not None or threshold_lower is not None: print("threshold is ignored for simple axial plots") if anatomical is not None: @@ -514,40 +576,60 @@ def plot(self, limit=5, anatomical=None, view='axial', colorbar=False, black_bg= raise ValueError("anatomical is not a nibabel instance") else: # anatomical = nib.load(resolve_mni_path(MNI_Template)['plot']) - anatomical = get_mni_from_img_resolution(self, img_type='plot') + anatomical = get_mni_from_img_resolution(self, img_type="plot") if self.data.ndim == 1: if axes is None: _, axes = plt.subplots(nrows=1, figsize=figsize) - plot_stat_map(self.to_nifti(), anatomical, - cut_coords=range(-40, 60, 10), display_mode='z', - black_bg=black_bg, colorbar=colorbar, draw_cross=draw_cross, - axes=axes, **kwargs) + plot_stat_map( + self.to_nifti(), + anatomical, + cut_coords=range(-40, 60, 10), + display_mode="z", + black_bg=black_bg, + colorbar=colorbar, + draw_cross=draw_cross, + axes=axes, + **kwargs + ) else: if axes is not None: print("axes is ignored when plotting multiple images") n_subs = np.minimum(self.data.shape[0], limit) - _, a = plt.subplots(nrows=n_subs, figsize=(figsize[0], len(self) * figsize[1])) + _, a = plt.subplots( + nrows=n_subs, figsize=(figsize[0], len(self) * figsize[1]) + ) for i in range(n_subs): - plot_stat_map(self[i].to_nifti(), anatomical, - cut_coords=range(-40, 60, 10), - display_mode='z', - black_bg=black_bg, - colorbar=colorbar, - draw_cross=draw_cross, - axes=a[i], - **kwargs) + plot_stat_map( + self[i].to_nifti(), + anatomical, + cut_coords=range(-40, 60, 10), + display_mode="z", + black_bg=black_bg, + colorbar=colorbar, + draw_cross=draw_cross, + axes=a[i], + **kwargs + ) return - elif view in ['glass', 'mni', 'full']: + elif view in ["glass", "mni", "full"]: if self.data.ndim == 1: - return plot_brain(self, how=view, thr_upper=threshold_upper, thr_lower=threshold_lower, **kwargs) + return plot_brain( + self, + how=view, + thr_upper=threshold_upper, + thr_lower=threshold_lower, + **kwargs + ) else: - raise ValueError("Plotting in 'glass', 'mni', or 'full' views only works with a 3D image") + raise ValueError( + "Plotting in 'glass', 'mni', or 'full' views only works with a 3D image" + ) else: raise ValueError("view must be one of: 'axial', 'glass', 'mni', 'full'.") def iplot(self, threshold=0, surface=False, anatomical=None, **kwargs): - """ Create an interactive brain viewer for the current brain data instance. + """Create an interactive brain viewer for the current brain data instance. Args: threshold: (float/str) two-sided threshold to initialize the @@ -569,11 +651,13 @@ def iplot(self, threshold=0, surface=False, anatomical=None, **kwargs): raise ValueError("anatomical is not a nibabel instance") else: # anatomical = nib.load(resolve_mni_path(MNI_Template)['brain']) - anatomical = get_mni_from_img_resolution(self, img_type='brain') - return plot_interactive_brain(self, threshold=threshold, surface=surface, anatomical=anatomical, **kwargs) + anatomical = get_mni_from_img_resolution(self, img_type="brain") + return plot_interactive_brain( + self, threshold=threshold, surface=surface, anatomical=anatomical, **kwargs + ) - def regress(self, mode='ols', **kwargs): - """ Run a mass-univariate regression across voxels. Three types of regressions can be run: + def regress(self, mode="ols", **kwargs): + """Run a mass-univariate regression across voxels. Three types of regressions can be run: 1) Standard OLS (default) 2) Robust OLS (heteroscedasticty and/or auto-correlation robust errors), i.e. OLS with "sandwich estimators" 3) ARMA (auto-regressive and moving-average lags = 1 by default; experimental) @@ -594,14 +678,13 @@ def regress(self, mode='ols', **kwargs): """ if not isinstance(self.X, pd.DataFrame): - raise ValueError('Make sure self.X is a pandas DataFrame.') + raise ValueError("Make sure self.X is a pandas DataFrame.") if self.X.empty: - raise ValueError('Make sure self.X is not empty.') + raise ValueError("Make sure self.X is not empty.") if self.data.shape[0] != self.X.shape[0]: - raise ValueError("self.X does not match the correct size of " - "self.data") + raise ValueError("self.X does not match the correct size of " "self.data") b, t, p, _, res = regression(self.X, self.data, mode=mode, **kwargs) @@ -615,12 +698,25 @@ def regress(self, mode='ols', **kwargs): p_out = b_out.copy() sigma_out = b_out.copy() res_out = b_out.copy() - b_out.data, t_out.data, p_out.data, sigma_out.data, res_out.data = (b, t, p, sigma_out, res) + b_out.data, t_out.data, p_out.data, sigma_out.data, res_out.data = ( + b, + t, + p, + sigma_out, + res, + ) - return {'beta': b_out, 't': t_out, 'p': p_out, - 'sigma': sigma_out, 'residual': res_out} + return { + "beta": b_out, + "t": t_out, + "p": p_out, + "sigma": sigma_out, + "residual": res_out, + } - def randomise(self, n_permute=5000, threshold_dict=None, return_mask=False, **kwargs): + def randomise( + self, n_permute=5000, threshold_dict=None, return_mask=False, **kwargs + ): """ Run mass-univariate regression at each voxel with inference performed via permutation testing ala randomise in FSL. Operates just like @@ -636,14 +732,13 @@ def randomise(self, n_permute=5000, threshold_dict=None, return_mask=False, **kw """ if not isinstance(self.X, pd.DataFrame): - raise ValueError('Make sure self.X is a pandas DataFrame.') + raise ValueError("Make sure self.X is a pandas DataFrame.") if self.X.empty: - raise ValueError('Make sure self.X is not empty.') + raise ValueError("Make sure self.X is not empty.") if self.data.shape[0] != self.X.shape[0]: - raise ValueError("self.X does not match the correct size of " - "self.data") + raise ValueError("self.X does not match the correct size of " "self.data") b, t, p = regress_permutation(self.X, self.data, n_permute=n_permute, **kwargs) @@ -659,31 +754,39 @@ def randomise(self, n_permute=5000, threshold_dict=None, return_mask=False, **kw if threshold_dict is not None: if isinstance(threshold_dict, dict): - if 'unc' in threshold_dict: - thr = threshold_dict['unc'] - elif 'fdr' in threshold_dict: - thr = fdr(p_out.data, q=threshold_dict['fdr']) - elif 'holm-bof' in threshold_dict: - thr = holm_bonf(p.data, alpha=threshold_dict['holm-bonf']) - elif 'permutation' in threshold_dict: - thr = .05 + if "unc" in threshold_dict: + thr = threshold_dict["unc"] + elif "fdr" in threshold_dict: + thr = fdr(p_out.data, q=threshold_dict["fdr"]) + elif "holm-bof" in threshold_dict: + thr = holm_bonf(p.data, alpha=threshold_dict["holm-bonf"]) + elif "permutation" in threshold_dict: + thr = 0.05 if return_mask: thr_t_out, thr_mask = threshold(t_out, p_out, thr, True) - out = {'beta': b_out, 't': t_out, 'p': p_out, 'thr_t': thr_t_out, 'thr_mask': thr_mask} + out = { + "beta": b_out, + "t": t_out, + "p": p_out, + "thr_t": thr_t_out, + "thr_mask": thr_mask, + } else: thr_t_out = threshold(t_out, p_out, thr) - out = {'beta': b_out, 't': t_out, 'p': p_out, 'thr_t': thr_t_out} + out = {"beta": b_out, "t": t_out, "p": p_out, "thr_t": thr_t_out} else: - raise ValueError("threshold_dict is not a dictionary. " - "Make sure it is in the form of {'unc': .001} " - "or {'fdr': .05}") + raise ValueError( + "threshold_dict is not a dictionary. " + "Make sure it is in the form of {'unc': .001} " + "or {'fdr': .05}" + ) else: - out = {'beta': b_out, 't': t_out, 'p': p_out} + out = {"beta": b_out, "t": t_out, "p": p_out} return out def ttest(self, threshold_dict=None, return_mask=False): - """ Calculate one sample t-test across each voxel (two-sided) + """Calculate one sample t-test across each voxel (two-sided) Args: threshold_dict: (dict) a dictionary of threshold parameters @@ -700,36 +803,48 @@ def ttest(self, threshold_dict=None, return_mask=False): t = deepcopy(self) p = deepcopy(self) - if threshold_dict is not None and 'permutation' in threshold_dict: + if threshold_dict is not None and "permutation" in threshold_dict: # Convert data to correct shape (subjects, time, space) data_convert_shape = deepcopy(self.data) data_convert_shape = np.expand_dims(data_convert_shape, axis=1) - if 'n_permutations' in threshold_dict: - n_permutations = threshold_dict['n_permutations'] + if "n_permutations" in threshold_dict: + n_permutations = threshold_dict["n_permutations"] else: n_permutations = 1000 - warnings.warn("n_permutations not set: running with 1000 " - "permutations") + warnings.warn( + "n_permutations not set: running with 1000 " "permutations" + ) - if 'connectivity' in threshold_dict: - connectivity = threshold_dict['connectivity'] + if "connectivity" in threshold_dict: + connectivity = threshold_dict["connectivity"] else: connectivity = None - n_jobs = threshold_dict['n_jobs'] if 'n_jobs' in threshold_dict else 1 - if threshold_dict['permutation'] == 'tfce': + n_jobs = threshold_dict["n_jobs"] if "n_jobs" in threshold_dict else 1 + if threshold_dict["permutation"] == "tfce": perm_threshold = dict(start=0, step=0.2) else: perm_threshold = None - if 'stat_fun' in threshold_dict: - stat_fun = threshold_dict['stat_fun'] + if "stat_fun" in threshold_dict: + stat_fun = threshold_dict["stat_fun"] else: stat_fun = mne_stats.ttest_1samp_no_p - t.data, clusters, p_values, _ = mne_stats.spatio_temporal_cluster_1samp_test( - data_convert_shape, tail=0, threshold=perm_threshold, stat_fun=stat_fun, - connectivity=connectivity, n_permutations=n_permutations, n_jobs=n_jobs) + ( + t.data, + clusters, + p_values, + _, + ) = mne_stats.spatio_temporal_cluster_1samp_test( + data_convert_shape, + tail=0, + threshold=perm_threshold, + stat_fun=stat_fun, + connectivity=connectivity, + n_permutations=n_permutations, + n_jobs=n_jobs, + ) t.data = t.data.squeeze() @@ -740,31 +855,33 @@ def ttest(self, threshold_dict=None, return_mask=False): t.data, p.data = ttest_1samp(self.data, 0, 0) if threshold_dict is not None: if isinstance(threshold_dict, dict): - if 'unc' in threshold_dict: - thr = threshold_dict['unc'] - elif 'fdr' in threshold_dict: - thr = fdr(p.data, q=threshold_dict['fdr']) - elif 'holm-bonf' in threshold_dict: - thr = holm_bonf(p.data, alpha=threshold_dict['holm-bonf']) - elif 'permutation' in threshold_dict: - thr = .05 + if "unc" in threshold_dict: + thr = threshold_dict["unc"] + elif "fdr" in threshold_dict: + thr = fdr(p.data, q=threshold_dict["fdr"]) + elif "holm-bonf" in threshold_dict: + thr = holm_bonf(p.data, alpha=threshold_dict["holm-bonf"]) + elif "permutation" in threshold_dict: + thr = 0.05 if return_mask: thr_t, thr_mask = threshold(t, p, thr, True) - out = {'t': t, 'p': p, 'thr_t': thr_t, 'thr_mask': thr_mask} + out = {"t": t, "p": p, "thr_t": thr_t, "thr_mask": thr_mask} else: thr_t = threshold(t, p, thr) - out = {'t': t, 'p': p, 'thr_t': thr_t} + out = {"t": t, "p": p, "thr_t": thr_t} else: - raise ValueError("threshold_dict is not a dictionary. " - "Make sure it is in the form of {'unc': .001} " - "or {'fdr': .05}") + raise ValueError( + "threshold_dict is not a dictionary. " + "Make sure it is in the form of {'unc': .001} " + "or {'fdr': .05}" + ) else: - out = {'t': t, 'p': p} + out = {"t": t, "p": p} return out def append(self, data, **kwargs): - """ Append data to Brain_Data instance + """Append data to Brain_Data instance Args: data: (Brain_Data) Brain_Data instance to append @@ -779,8 +896,10 @@ def append(self, data, **kwargs): if self.isempty(): out = deepcopy(data) else: - error_string = ("Data to append has different number of voxels " - "then Brain_Data instance.") + error_string = ( + "Data to append has different number of voxels " + "then Brain_Data instance." + ) if len(self.shape()) == 1 & len(data.shape()) == 1: if self.shape()[0] != data.shape()[0]: raise ValueError(error_string) @@ -824,16 +943,16 @@ def isempty(self): boolean = True if not self.data else False return boolean - def similarity(self, image, method='correlation'): - """ Calculate similarity of Brain_Data() instance with single - Brain_Data or Nibabel image + def similarity(self, image, method="correlation"): + """Calculate similarity of Brain_Data() instance with single + Brain_Data or Nibabel image - Args: - image: (Brain_Data, nifti) image to evaluate similarity - method: (str) Type of similarity - ['correlation','dot_product','cosine'] - Returns: - pexp: (list) Outputs a vector of pattern expression values + Args: + image: (Brain_Data, nifti) image to evaluate similarity + method: (str) Type of similarity + ['correlation','dot_product','cosine'] + Returns: + pexp: (list) Outputs a vector of pattern expression values """ @@ -842,10 +961,14 @@ def similarity(self, image, method='correlation'): # Check to make sure masks are the same for each dataset and if not # create a union mask # This might be handy code for a new Brain_Data method - if np.sum(self.nifti_masker.mask_img.get_data() == 1) != np.sum(image.nifti_masker.mask_img.get_data() == 1): - new_mask = intersect_masks([self.nifti_masker.mask_img, - image.nifti_masker.mask_img], - threshold=1, connected=False) + if np.sum(self.nifti_masker.mask_img.get_data() == 1) != np.sum( + image.nifti_masker.mask_img.get_data() == 1 + ): + new_mask = intersect_masks( + [self.nifti_masker.mask_img, image.nifti_masker.mask_img], + threshold=1, + connected=False, + ) new_nifti_masker = NiftiMasker(mask_img=new_mask) data2 = new_nifti_masker.fit_transform(self.to_nifti()) image2 = new_nifti_masker.fit_transform(image.to_nifti()) @@ -867,7 +990,7 @@ def flatten_array(data): return data # Calculate pattern expression - if method == 'dot_product': + if method == "dot_product": if len(image2.shape) > 1: if image2.shape[0] > 1: pexp = [] @@ -878,7 +1001,7 @@ def flatten_array(data): pexp = np.dot(data2, image2) else: pexp = np.dot(data2, image2) - elif method == 'correlation': + elif method == "correlation": if len(image2.shape) > 1: if image2.shape[0] > 1: pexp = [] @@ -889,62 +1012,71 @@ def flatten_array(data): pexp = pearson(image2, data2) else: pexp = pearson(image2, data2) - elif method == 'cosine': + elif method == "cosine": image2 = vector2array(image2) data2 = vector2array(data2) if image2.shape[1] > 1: pexp = [] for i in range(image2.shape[0]): - pexp.append(cosine_similarity(image2[i, :].reshape(-1, 1).T, data2).flatten()) + pexp.append( + cosine_similarity( + image2[i, :].reshape(-1, 1).T, data2 + ).flatten() + ) pexp = np.array(pexp) else: pexp = cosine_similarity(image2, data2).flatten() else: - raise ValueError('Method must be one of: correlation, dot_product, cosine') + raise ValueError("Method must be one of: correlation, dot_product, cosine") return flatten_array(pexp) - def distance(self, metric='euclidean', **kwargs): - """ Calculate distance between images within a Brain_Data() instance. + def distance(self, metric="euclidean", **kwargs): + """Calculate distance between images within a Brain_Data() instance. - Args: - metric: (str) type of distance metric (can use any scikit learn or - sciypy metric) + Args: + metric: (str) type of distance metric (can use any scikit learn or + sciypy metric) - Returns: - dist: (Adjacency) Outputs a 2D distance matrix. + Returns: + dist: (Adjacency) Outputs a 2D distance matrix. """ - return Adjacency(pairwise_distances(self.data, metric=metric, **kwargs), - matrix_type='Distance') + return Adjacency( + pairwise_distances(self.data, metric=metric, **kwargs), + matrix_type="Distance", + ) - def multivariate_similarity(self, images, method='ols'): - """ Predict spatial distribution of Brain_Data() instance from linear - combination of other Brain_Data() instances or Nibabel images + def multivariate_similarity(self, images, method="ols"): + """Predict spatial distribution of Brain_Data() instance from linear + combination of other Brain_Data() instances or Nibabel images - Args: - self: Brain_Data instance of data to be applied - images: Brain_Data instance of weight map + Args: + self: Brain_Data instance of data to be applied + images: Brain_Data instance of weight map - Returns: - out: dictionary of regression statistics in Brain_Data - instances {'beta','t','p','df','residual'} + Returns: + out: dictionary of regression statistics in Brain_Data + instances {'beta','t','p','df','residual'} """ # Notes: Should add ridge, and lasso, elastic net options options if len(self.shape()) > 1: - raise ValueError("This method can only decompose a single brain " - "image.") + raise ValueError("This method can only decompose a single brain " "image.") images = check_brain_data(images) # Check to make sure masks are the same for each dataset and if not create a union mask # This might be handy code for a new Brain_Data method - if np.sum(self.nifti_masker.mask_img.get_data() == 1) != np.sum(images.nifti_masker.mask_img.get_data() == 1): - new_mask = intersect_masks([self.nifti_masker.mask_img, - images.nifti_masker.mask_img], - threshold=1, connected=False) + if np.sum(self.nifti_masker.mask_img.get_data() == 1) != np.sum( + images.nifti_masker.mask_img.get_data() == 1 + ): + new_mask = intersect_masks( + [self.nifti_masker.mask_img, images.nifti_masker.mask_img], + threshold=1, + connected=False, + ) new_nifti_masker = NiftiMasker(mask_img=new_mask) data2 = new_nifti_masker.fit_transform(self.to_nifti()) image2 = new_nifti_masker.fit_transform(images.to_nifti()) @@ -956,23 +1088,33 @@ def multivariate_similarity(self, images, method='ols'): image2 = np.vstack((np.ones(image2.shape[1]), image2)).T # Calculate pattern expression - if method == 'ols': + if method == "ols": b = np.dot(np.linalg.pinv(image2), data2) res = data2 - np.dot(image2, b) sigma = np.std(res, axis=0) - stderr = np.dot(np.matrix(np.diagonal(np.linalg.inv(np.dot(image2.T, - image2)))**.5).T, np.matrix(sigma)) + stderr = np.dot( + np.matrix( + np.diagonal(np.linalg.inv(np.dot(image2.T, image2))) ** 0.5 + ).T, + np.matrix(sigma), + ) t_out = b / stderr - df = image2.shape[0]-image2.shape[1] - p = 2*(1-t_dist.cdf(np.abs(t_out), df)) + df = image2.shape[0] - image2.shape[1] + p = 2 * (1 - t_dist.cdf(np.abs(t_out), df)) else: raise NotImplementedError - return {'beta': b, 't': t_out, 'p': p, 'df': df, 'sigma': sigma, - 'residual': res} + return { + "beta": b, + "t": t_out, + "p": p, + "df": df, + "sigma": sigma, + "residual": res, + } def predict(self, algorithm=None, cv_dict=None, plot=True, **kwargs): - """ Run prediction + """Run prediction Args: algorithm: Algorithm to use for prediction. Must be one of 'svm', @@ -999,61 +1141,77 @@ def predict(self, algorithm=None, cv_dict=None, plot=True, **kwargs): predictor_settings = set_algorithm(algorithm, **kwargs) else: # Use SVR as a default - predictor_settings = set_algorithm('svr', **{'kernel': "linear"}) + predictor_settings = set_algorithm("svr", **{"kernel": "linear"}) # Initialize output dictionary - output = {'Y': np.array(self.Y).flatten()} - predictor = predictor_settings['predictor'] + output = {"Y": np.array(self.Y).flatten()} + predictor = predictor_settings["predictor"] # Overall Fit for weight map - predictor.fit(self.data, np.ravel(output['Y'])) - output['yfit_all'] = predictor.predict(self.data) - if predictor_settings['prediction_type'] == 'classification': - if predictor_settings['algorithm'] not in ['svm', 'ridgeClassifier', - 'ridgeClassifierCV']: - output['prob_all'] = predictor.predict_proba(self.data) + predictor.fit(self.data, np.ravel(output["Y"])) + output["yfit_all"] = predictor.predict(self.data) + if predictor_settings["prediction_type"] == "classification": + if predictor_settings["algorithm"] not in [ + "svm", + "ridgeClassifier", + "ridgeClassifierCV", + ]: + output["prob_all"] = predictor.predict_proba(self.data) else: - output['dist_from_hyperplane_all'] = predictor.decision_function(self.data) - if predictor_settings['algorithm'] == 'svm' and predictor.probability: - output['prob_all'] = predictor.predict_proba(self.data) + output["dist_from_hyperplane_all"] = predictor.decision_function( + self.data + ) + if predictor_settings["algorithm"] == "svm" and predictor.probability: + output["prob_all"] = predictor.predict_proba(self.data) # Intercept - if predictor_settings['algorithm'] == 'pcr': - output['intercept'] = predictor_settings['_regress'].intercept_ - elif predictor_settings['algorithm'] == 'lassopcr': - output['intercept'] = predictor_settings['_lasso'].intercept_ + if predictor_settings["algorithm"] == "pcr": + output["intercept"] = predictor_settings["_regress"].intercept_ + elif predictor_settings["algorithm"] == "lassopcr": + output["intercept"] = predictor_settings["_lasso"].intercept_ else: - output['intercept'] = predictor.intercept_ + output["intercept"] = predictor.intercept_ # Weight map - output['weight_map'] = self.empty() - if predictor_settings['algorithm'] == 'lassopcr': - output['weight_map'].data = np.dot(predictor_settings['_pca'].components_.T, predictor_settings['_lasso'].coef_) - elif predictor_settings['algorithm'] == 'pcr': - output['weight_map'].data = np.dot(predictor_settings['_pca'].components_.T, predictor_settings['_regress'].coef_) + output["weight_map"] = self.empty() + if predictor_settings["algorithm"] == "lassopcr": + output["weight_map"].data = np.dot( + predictor_settings["_pca"].components_.T, + predictor_settings["_lasso"].coef_, + ) + elif predictor_settings["algorithm"] == "pcr": + output["weight_map"].data = np.dot( + predictor_settings["_pca"].components_.T, + predictor_settings["_regress"].coef_, + ) else: - output['weight_map'].data = predictor.coef_.squeeze() + output["weight_map"].data = predictor.coef_.squeeze() # Cross-Validation Fit from sklearn.base import clone + if cv_dict is not None: cv = set_cv(Y=self.Y, cv_dict=cv_dict) - predictor_cv = predictor_settings['predictor'] - output['yfit_xval'] = output['yfit_all'].copy() - output['intercept_xval'] = [] + predictor_cv = predictor_settings["predictor"] + output["yfit_xval"] = output["yfit_all"].copy() + output["intercept_xval"] = [] # Multi-class classification, init weightmaps as list - if ((predictor_settings['prediction_type'] == 'classification') and (len(np.unique(self.Y)) > 2)): - output['weight_map_xval'] = [] + if (predictor_settings["prediction_type"] == "classification") and ( + len(np.unique(self.Y)) > 2 + ): + output["weight_map_xval"] = [] else: # Otherwise we'll have a single weightmap - output['weight_map_xval'] = output['weight_map'].copy() - output['cv_idx'] = [] + output["weight_map_xval"] = output["weight_map"].copy() + output["cv_idx"] = [] wt_map_xval = [] # Initialize zero'd arrays that will be filled during cross-validation and fitting # These will need change shape if doing multi-class or probablistic predictions - if (predictor_settings['algorithm'] == 'logistic') or (predictor_settings['algorithm'] == 'svm' and predictor.probability): + if (predictor_settings["algorithm"] == "logistic") or ( + predictor_settings["algorithm"] == "svm" and predictor.probability + ): # If logistic or svm prob, probs == number of classes probs_init = np.zeros((len(self.Y), len(np.unique(self.Y)))) # however if num classes == 2 decision function == 1, but if num class > 2, decision function == num classes (sklearn weirdness) @@ -1068,91 +1226,169 @@ def predict(self, algorithm=None, cv_dict=None, plot=True, **kwargs): # else: # dec_init = np.zeros((len(self.Y), len(np.unique(self.Y)))) - if predictor_settings['prediction_type'] == 'classification': - if predictor_settings['algorithm'] not in ['svm', 'ridgeClassifier', 'ridgeClassifierCV']: - output['prob_xval'] = probs_init + if predictor_settings["prediction_type"] == "classification": + if predictor_settings["algorithm"] not in [ + "svm", + "ridgeClassifier", + "ridgeClassifierCV", + ]: + output["prob_xval"] = probs_init else: - output['dist_from_hyperplane_xval'] = dec_init - if predictor_settings['algorithm'] == 'svm' and predictor_cv.probability: - output['prob_xval'] = probs_init + output["dist_from_hyperplane_xval"] = dec_init + if ( + predictor_settings["algorithm"] == "svm" + and predictor_cv.probability + ): + output["prob_xval"] = probs_init for train, test in cv: # Ensure estimators are always indepedent across folds - predictor_cv = clone(predictor_settings['predictor']) + predictor_cv = clone(predictor_settings["predictor"]) predictor_cv.fit(self.data[train], np.ravel(self.Y.iloc[train])) - output['yfit_xval'][test] = predictor_cv.predict(self.data[test]).ravel() - if predictor_settings['prediction_type'] == 'classification': - if predictor_settings['algorithm'] not in ['svm', 'ridgeClassifier', 'ridgeClassifierCV']: - output['prob_xval'][test] = predictor_cv.predict_proba(self.data[test]) + output["yfit_xval"][test] = predictor_cv.predict( + self.data[test] + ).ravel() + if predictor_settings["prediction_type"] == "classification": + if predictor_settings["algorithm"] not in [ + "svm", + "ridgeClassifier", + "ridgeClassifierCV", + ]: + output["prob_xval"][test] = predictor_cv.predict_proba( + self.data[test] + ) else: - output['dist_from_hyperplane_xval'][test] = predictor_cv.decision_function(self.data[test]) - if predictor_settings['algorithm'] == 'svm' and predictor_cv.probability: - output['prob_xval'][test] = predictor_cv.predict_proba(self.data[test]) + output["dist_from_hyperplane_xval"][ + test + ] = predictor_cv.decision_function(self.data[test]) + if ( + predictor_settings["algorithm"] == "svm" + and predictor_cv.probability + ): + output["prob_xval"][test] = predictor_cv.predict_proba( + self.data[test] + ) # Intercept - if predictor_settings['algorithm'] == 'pcr': - output['intercept_xval'].append(predictor_settings['_regress'].intercept_) - elif predictor_settings['algorithm'] == 'lassopcr': - output['intercept_xval'].append(predictor_settings['_lasso'].intercept_) + if predictor_settings["algorithm"] == "pcr": + output["intercept_xval"].append( + predictor_settings["_regress"].intercept_ + ) + elif predictor_settings["algorithm"] == "lassopcr": + output["intercept_xval"].append( + predictor_settings["_lasso"].intercept_ + ) else: - output['intercept_xval'].append(predictor_cv.intercept_) - output['cv_idx'].append((train, test)) + output["intercept_xval"].append(predictor_cv.intercept_) + output["cv_idx"].append((train, test)) # Weight map # Multi-class classification, weightmaps as list - if ((predictor_settings['prediction_type'] == 'classification') and (len(np.unique(self.Y)) > 2)): - tmp = output['weight_map'].empty() + if (predictor_settings["prediction_type"] == "classification") and ( + len(np.unique(self.Y)) > 2 + ): + tmp = output["weight_map"].empty() tmp.data = predictor_cv.coef_.squeeze() - output['weight_map_xval'].append(tmp) + output["weight_map_xval"].append(tmp) # Regression or binary classification else: - if predictor_settings['algorithm'] == 'lassopcr': - wt_map_xval.append(np.dot(predictor_settings['_pca'].components_.T, predictor_settings['_lasso'].coef_)) - elif predictor_settings['algorithm'] == 'pcr': - wt_map_xval.append(np.dot(predictor_settings['_pca'].components_.T, predictor_settings['_regress'].coef_)) + if predictor_settings["algorithm"] == "lassopcr": + wt_map_xval.append( + np.dot( + predictor_settings["_pca"].components_.T, + predictor_settings["_lasso"].coef_, + ) + ) + elif predictor_settings["algorithm"] == "pcr": + wt_map_xval.append( + np.dot( + predictor_settings["_pca"].components_.T, + predictor_settings["_regress"].coef_, + ) + ) else: wt_map_xval.append(predictor_cv.coef_.squeeze()) - output['weight_map_xval'].data = np.array(wt_map_xval) + output["weight_map_xval"].data = np.array(wt_map_xval) # Print Results - if predictor_settings['prediction_type'] == 'classification': - output['mcr_all'] = balanced_accuracy_score(self.Y.values, output['yfit_all']) - print('overall accuracy: %.2f' % output['mcr_all']) + if predictor_settings["prediction_type"] == "classification": + output["mcr_all"] = balanced_accuracy_score( + self.Y.values, output["yfit_all"] + ) + print("overall accuracy: %.2f" % output["mcr_all"]) if cv_dict is not None: - output['mcr_xval'] = np.mean(output['yfit_xval'] == np.array(self.Y).flatten()) - print('overall CV accuracy: %.2f' % output['mcr_xval']) - elif predictor_settings['prediction_type'] == 'prediction': - output['rmse_all'] = np.sqrt(np.mean((output['yfit_all']-output['Y'])**2)) - output['r_all'] = pearsonr(output['Y'], output['yfit_all'])[0] - print('overall Root Mean Squared Error: %.2f' % output['rmse_all']) - print('overall Correlation: %.2f' % output['r_all']) + output["mcr_xval"] = np.mean( + output["yfit_xval"] == np.array(self.Y).flatten() + ) + print("overall CV accuracy: %.2f" % output["mcr_xval"]) + elif predictor_settings["prediction_type"] == "prediction": + output["rmse_all"] = np.sqrt( + np.mean((output["yfit_all"] - output["Y"]) ** 2) + ) + output["r_all"] = pearsonr(output["Y"], output["yfit_all"])[0] + print("overall Root Mean Squared Error: %.2f" % output["rmse_all"]) + print("overall Correlation: %.2f" % output["r_all"]) if cv_dict is not None: - output['rmse_xval'] = np.sqrt(np.mean((output['yfit_xval']-output['Y'])**2)) - output['r_xval'] = pearsonr(output['Y'], output['yfit_xval'])[0] - print('overall CV Root Mean Squared Error: %.2f' % output['rmse_xval']) - print('overall CV Correlation: %.2f' % output['r_xval']) + output["rmse_xval"] = np.sqrt( + np.mean((output["yfit_xval"] - output["Y"]) ** 2) + ) + output["r_xval"] = pearsonr(output["Y"], output["yfit_xval"])[0] + print("overall CV Root Mean Squared Error: %.2f" % output["rmse_xval"]) + print("overall CV Correlation: %.2f" % output["r_xval"]) # Plot if plot: if cv_dict is not None: - if predictor_settings['prediction_type'] == 'prediction': - scatterplot(pd.DataFrame({'Y': output['Y'], 'yfit_xval': output['yfit_xval']})) - elif predictor_settings['prediction_type'] == 'classification': + if predictor_settings["prediction_type"] == "prediction": + scatterplot( + pd.DataFrame( + {"Y": output["Y"], "yfit_xval": output["yfit_xval"]} + ) + ) + elif predictor_settings["prediction_type"] == "classification": if len(np.unique(self.Y)) > 2: - print('Skipping ROC plot because num_classes > 2') + print("Skipping ROC plot because num_classes > 2") else: - if predictor_settings['algorithm'] not in ['svm', 'ridgeClassifier', 'ridgeClassifierCV']: - output['roc'] = Roc(input_values=output['prob_xval'][:,1], binary_outcome=output['Y'].astype('bool')) + if predictor_settings["algorithm"] not in [ + "svm", + "ridgeClassifier", + "ridgeClassifierCV", + ]: + output["roc"] = Roc( + input_values=output["prob_xval"][:, 1], + binary_outcome=output["Y"].astype("bool"), + ) else: - output['roc'] = Roc(input_values=output['dist_from_hyperplane_xval'], binary_outcome=output['Y'].astype('bool')) - if predictor_settings['algorithm'] == 'svm' and predictor_cv.probability: - output['roc'] = Roc(input_values=output['prob_xval'][:, 1], binary_outcome=output['Y'].astype('bool')) - output['roc'].plot() - output['weight_map'].plot() + output["roc"] = Roc( + input_values=output["dist_from_hyperplane_xval"], + binary_outcome=output["Y"].astype("bool"), + ) + if ( + predictor_settings["algorithm"] == "svm" + and predictor_cv.probability + ): + output["roc"] = Roc( + input_values=output["prob_xval"][:, 1], + binary_outcome=output["Y"].astype("bool"), + ) + output["roc"].plot() + output["weight_map"].plot() return output - def predict_multi(self, algorithm=None, cv_dict=None, method='searchlight', rois=None, process_mask=None, radius=2.0, scoring=None, n_jobs=1, verbose=0, **kwargs): - """ Perform multi-region prediction. This can be a searchlight analysis or multi-roi analysis if provided a Brain_Data instance with labeled non-overlapping rois. + def predict_multi( + self, + algorithm=None, + cv_dict=None, + method="searchlight", + rois=None, + process_mask=None, + radius=2.0, + scoring=None, + n_jobs=1, + verbose=0, + **kwargs + ): + """Perform multi-region prediction. This can be a searchlight analysis or multi-roi analysis if provided a Brain_Data instance with labeled non-overlapping rois. Args: algorithm (string): algorithm to use for prediction Must be one of 'svm', @@ -1178,27 +1414,29 @@ def predict_multi(self, algorithm=None, cv_dict=None, method='searchlight', rois """ - if method not in ['searchlight', 'rois']: + if method not in ["searchlight", "rois"]: raise ValueError("method must be one of 'searchlight' or 'roi'") - if method == 'roi' and rois is None: - raise ValueError("With method = 'roi' a file path, or nibabel/nltools instance with roi labels must be provided") + if method == "roi" and rois is None: + raise ValueError( + "With method = 'roi' a file path, or nibabel/nltools instance with roi labels must be provided" + ) # Set algorithm if algorithm is not None: predictor_settings = set_algorithm(algorithm, **kwargs) else: # Use SVR as a default - predictor_settings = set_algorithm('svr', **{'kernel': "linear"}) - estimator = predictor_settings['predictor'] + predictor_settings = set_algorithm("svr", **{"kernel": "linear"}) + estimator = predictor_settings["predictor"] if cv_dict is not None: cv = set_cv(Y=self.Y, cv_dict=cv_dict, return_generator=False) - groups = cv_dict['subject_id'] if cv_dict['type'] == 'loso' else None + groups = cv_dict["subject_id"] if cv_dict["type"] == "loso" else None else: cv = None groups = None - if method == 'rois': + if method == "rois": if isinstance(rois, six.string_types): if os.path.isfile(rois): rois_img = Brain_Data(rois, mask=self.mask) @@ -1209,11 +1447,16 @@ def predict_multi(self, algorithm=None, cv_dict=None, method='searchlight', rois if len(rois_img.shape()) == 1: rois_img = expand_mask(rois_img, custom_mask=self.mask) if len(rois_img.shape()) != 2: - raise ValueError("rois cannot be coerced into a mask. Make sure nifti file or Brain_Data is 3d with non-overlapping integer labels or 4d with non-overlapping boolean masks") + raise ValueError( + "rois cannot be coerced into a mask. Make sure nifti file or Brain_Data is 3d with non-overlapping integer labels or 4d with non-overlapping boolean masks" + ) - out = Parallel(n_jobs=n_jobs, verbose=verbose)(delayed(_roi_func)(self, r, algorithm, cv_dict, **kwargs) for r in rois_img) + out = Parallel(n_jobs=n_jobs, verbose=verbose)( + delayed(_roi_func)(self, r, algorithm, cv_dict, **kwargs) + for r in rois_img + ) - elif method == 'searchlight': + elif method == "searchlight": # Searchlight if process_mask is None: process_mask_img = None @@ -1225,11 +1468,24 @@ def predict_multi(self, algorithm=None, cv_dict=None, method='searchlight', rois if os.path.isfile(process_mask): process_mask_img = nib.load(process_mask) else: - raise ValueError("process mask file path specified but can't be found") + raise ValueError( + "process mask file path specified but can't be found" + ) else: - raise TypeError("process_mask is not a valid nibabel instance, Brain_Data instance or file path") - - sl = SearchLight(mask_img=self.mask, process_mask_img=process_mask_img, estimator=estimator, n_jobs=n_jobs, scoring=scoring, cv=cv, verbose=verbose, radius=radius) + raise TypeError( + "process_mask is not a valid nibabel instance, Brain_Data instance or file path" + ) + + sl = SearchLight( + mask_img=self.mask, + process_mask_img=process_mask_img, + estimator=estimator, + n_jobs=n_jobs, + scoring=scoring, + cv=cv, + verbose=verbose, + radius=radius, + ) in_image = self.to_nifti() sl.fit(in_image, self.Y, groups=groups) out = nib.Nifti1Image(sl.scores_, affine=self.nifti_masker.affine_) @@ -1237,7 +1493,7 @@ def predict_multi(self, algorithm=None, cv_dict=None, method='searchlight', rois return out def apply_mask(self, mask, resample_mask_to_brain=False): - """ Mask Brain_Data instance + """Mask Brain_Data instance Note target data will be resampled into the same space as the mask. If you would like the mask resampled into the Brain_Data space, then set resample_mask_to_brain=True. @@ -1254,10 +1510,10 @@ def apply_mask(self, mask, resample_mask_to_brain=False): masked = deepcopy(self) mask = check_brain_data(mask) if not check_brain_data_is_single(mask): - raise ValueError('Mask must be a single image') + raise ValueError("Mask must be a single image") n_vox = len(self) if check_brain_data_is_single(self) else self.shape()[1] - if resample_mask_to_brain: + if resample_mask_to_brain: mask = resample_to_img(mask.to_nifti(), masked.to_nifti()) mask = check_brain_data(mask, masked.mask) @@ -1275,8 +1531,8 @@ def apply_mask(self, mask, resample_mask_to_brain=False): masked.data = masked.data.flatten() return masked - def extract_roi(self, mask, metric='mean', n_components=None): - """ Extract activity from mask + def extract_roi(self, mask, metric="mean", n_components=None): + """Extract activity from mask Args: mask: (nifti) nibabel mask can be binary or numbered for @@ -1290,7 +1546,7 @@ def extract_roi(self, mask, metric='mean', n_components=None): """ - metrics = ['mean','median','pca'] + metrics = ["mean", "median", "pca"] mask = check_brain_data(mask) ma = mask.copy() @@ -1301,48 +1557,54 @@ def extract_roi(self, mask, metric='mean', n_components=None): if len(np.unique(ma.data)) == 2: masked = self.apply_mask(ma) if check_brain_data_is_single(masked): - if metric == 'mean': + if metric == "mean": out = masked.mean() - elif metric == 'median': + elif metric == "median": out = masked.median() else: - raise ValueError('Not possible to run PCA on a single image') + raise ValueError("Not possible to run PCA on a single image") else: - if metric == 'mean': + if metric == "mean": out = masked.mean(axis=1) - elif metric == 'median': + elif metric == "median": out = masked.median(axis=1) else: - output = masked.decompose(algorithm='pca', n_components=n_components, axis='images') - out = output['weights'].T + output = masked.decompose( + algorithm="pca", n_components=n_components, axis="images" + ) + out = output["weights"].T elif len(np.unique(ma.data)) > 2: # make sure each ROI id is an integer ma.data = np.round(ma.data).astype(int) all_mask = expand_mask(ma) if check_brain_data_is_single(self): - if metric == 'mean': + if metric == "mean": out = np.array([self.apply_mask(m).mean() for m in all_mask]) - elif metric == 'median': + elif metric == "median": out = np.array([self.apply_mask(m).median() for m in all_mask]) else: - raise ValueError('Not possible to run PCA on a single image') + raise ValueError("Not possible to run PCA on a single image") else: - if metric == 'mean': + if metric == "mean": out = np.array([self.apply_mask(m).mean(axis=1) for m in all_mask]) - elif metric == 'median': - out = np.array([self.apply_mask(m).median(axis=1) for m in all_mask]) + elif metric == "median": + out = np.array( + [self.apply_mask(m).median(axis=1) for m in all_mask] + ) else: out = [] for m in all_mask: masked = self.apply_mask(m) - output = masked.decompose(algorithm='pca', n_components=n_components, axis='images') - out.append(output['weights'].T) + output = masked.decompose( + algorithm="pca", n_components=n_components, axis="images" + ) + out.append(output["weights"].T) else: - raise ValueError('Mask must be binary or integers') + raise ValueError("Mask must be binary or integers") return out - def icc(self, icc_type='icc2'): - ''' Calculate intraclass correlation coefficient for data within + def icc(self, icc_type="icc2"): + """Calculate intraclass correlation coefficient for data within Brain_Data class ICC Formulas are based on: @@ -1363,14 +1625,14 @@ def icc(self, icc_type='icc2'): Returns: ICC: (np.array) intraclass correlation coefficient - ''' + """ Y = self.data.T [n, k] = Y.shape # Degrees of Freedom dfc = k - 1 - dfe = (n - 1) * (k-1) + dfe = (n - 1) * (k - 1) dfr = n - 1 # Sum Square Total @@ -1383,9 +1645,10 @@ def icc(self, icc_type='icc2'): X = np.hstack([x, x0]) # Sum Square Error - predicted_Y = np.dot(np.dot(np.dot(X, np.linalg.pinv(np.dot(X.T, X))), - X.T), Y.flatten('F')) - residuals = Y.flatten('F') - predicted_Y + predicted_Y = np.dot( + np.dot(np.dot(X, np.linalg.pinv(np.dot(X.T, X))), X.T), Y.flatten("F") + ) + residuals = Y.flatten("F") - predicted_Y SSE = (residuals ** 2).sum() MSE = SSE / dfe @@ -1398,28 +1661,28 @@ def icc(self, icc_type='icc2'): SSR = SST - SSC - SSE MSR = SSR / dfr - if icc_type == 'icc1': + if icc_type == "icc1": # ICC(2,1) = (mean square subject - mean square error) / # (mean square subject + (k-1)*mean square error + # k*(mean square columns - mean square error)/n) # ICC = (MSR - MSRW) / (MSR + (k-1) * MSRW) NotImplementedError("This method isn't implemented yet.") - elif icc_type == 'icc2': + elif icc_type == "icc2": # ICC(2,1) = (mean square subject - mean square error) / # (mean square subject + (k-1)*mean square error + # k*(mean square columns - mean square error)/n) - ICC = (MSR - MSE) / (MSR + (k-1) * MSE + k * (MSC - MSE) / n) + ICC = (MSR - MSE) / (MSR + (k - 1) * MSE + k * (MSC - MSE) / n) - elif icc_type == 'icc3': + elif icc_type == "icc3": # ICC(3,1) = (mean square subject - mean square error) / # (mean square subject + (k-1)*mean square error) - ICC = (MSR - MSE) / (MSR + (k-1) * MSE) + ICC = (MSR - MSE) / (MSR + (k - 1) * MSE) return ICC - def detrend(self, method='linear'): - """ Remove linear trend from each voxel + def detrend(self, method="linear"): + """Remove linear trend from each voxel Args: type: ('linear','constant', optional) type of detrending @@ -1430,8 +1693,9 @@ def detrend(self, method='linear'): """ if len(self.shape()) == 1: - raise ValueError('Make sure there is more than one image in order ' - 'to detrend.') + raise ValueError( + "Make sure there is more than one image in order " "to detrend." + ) out = deepcopy(self) out.data = detrend(out.data, type=method, axis=0) @@ -1441,10 +1705,16 @@ def copy(self): """ Create a copy of a Brain_Data instance. """ return deepcopy(self) - def upload_neurovault(self, access_token=None, collection_name=None, - collection_id=None, img_type=None, img_modality=None, - **kwargs): - """ Upload Data to Neurovault. Will add any columns in self.X to image + def upload_neurovault( + self, + access_token=None, + collection_name=None, + collection_id=None, + img_type=None, + img_modality=None, + **kwargs + ): + """Upload Data to Neurovault. Will add any columns in self.X to image metadata. Index will be used as image name. Args: @@ -1461,7 +1731,7 @@ def upload_neurovault(self, access_token=None, collection_name=None, """ if access_token is None: - raise ValueError('You must supply a valid neurovault access token') + raise ValueError("You must supply a valid neurovault access token") api = Client(access_token=access_token) @@ -1472,60 +1742,67 @@ def upload_neurovault(self, access_token=None, collection_name=None, try: collection = api.create_collection(collection_name) except ValueError: - print('Collection Name already exists. Pick a ' - 'different name or specify an existing collection id') + print( + "Collection Name already exists. Pick a " + "different name or specify an existing collection id" + ) tmp_dir = os.path.join(tempfile.gettempdir(), str(os.times()[-1])) os.makedirs(tmp_dir) - def add_image_to_collection(api, collection, dat, tmp_dir, index_id=0, - **kwargs): - '''Upload image to collection + def add_image_to_collection( + api, collection, dat, tmp_dir, index_id=0, **kwargs + ): + """Upload image to collection Args: api: pynv Client instance collection: collection information dat: Brain_Data instance to upload tmp_dir: temporary directory index_id: (int) index for file naming - ''' + """ if (len(dat.shape()) > 1) & (dat.shape()[0] > 1): raise ValueError('"dat" must be a single image.') if not dat.X.empty and isinstance(dat.X.name, six.string_types): img_name = dat.X.name else: - img_name = collection['name'] + '_' + str(index_id) + '.nii.gz' + img_name = collection["name"] + "_" + str(index_id) + ".nii.gz" f_path = os.path.join(tmp_dir, img_name) dat.write(f_path) if not dat.X.empty: kwargs.update(dict([(k, dat.X.loc[k]) for k in dat.X.keys()])) - api.add_image(collection['id'], - f_path, - name=img_name, - modality=img_modality, - map_type=img_type, - **kwargs) + api.add_image( + collection["id"], + f_path, + name=img_name, + modality=img_modality, + map_type=img_type, + **kwargs + ) if len(self.shape()) == 1: - add_image_to_collection(api, collection, self, tmp_dir, index_id=0, - **kwargs) + add_image_to_collection( + api, collection, self, tmp_dir, index_id=0, **kwargs + ) else: for i, x in enumerate(self): - add_image_to_collection(api, collection, x, tmp_dir, - index_id=i, **kwargs) + add_image_to_collection( + api, collection, x, tmp_dir, index_id=i, **kwargs + ) shutil.rmtree(tmp_dir, ignore_errors=True) return collection def r_to_z(self): - ''' Apply Fisher's r to z transformation to each element of the data - object.''' + """Apply Fisher's r to z transformation to each element of the data + object.""" out = self.copy() out.data = fisher_r_to_z(out.data) return out def filter(self, sampling_freq=None, high_pass=None, low_pass=None, **kwargs): - ''' Apply 5th order butterworth filter to data. Wraps nilearn + """Apply 5th order butterworth filter to data. Wraps nilearn functionality. Does not default to detrending and standardizing like nilearn implementation, but this can be overridden using kwargs. @@ -1537,27 +1814,32 @@ def filter(self, sampling_freq=None, high_pass=None, low_pass=None, **kwargs): Returns: Brain_Data: Filtered Brain_Data instance - ''' + """ if sampling_freq is None: raise ValueError("Need to provide sampling rate (TR)!") if high_pass is None and low_pass is None: - raise ValueError("high_pass and/or low_pass cutoff must be" - "provided!") - standardize = kwargs.get('standardize', False) - detrend = kwargs.get('detrend', False) + raise ValueError("high_pass and/or low_pass cutoff must be" "provided!") + standardize = kwargs.get("standardize", False) + detrend = kwargs.get("detrend", False) out = self.copy() - out.data = clean(out.data, t_r=1. / sampling_freq, detrend=detrend, - standardize=standardize, high_pass=high_pass, - low_pass=low_pass, **kwargs) + out.data = clean( + out.data, + t_r=1.0 / sampling_freq, + detrend=detrend, + standardize=standardize, + high_pass=high_pass, + low_pass=low_pass, + **kwargs + ) return out def dtype(self): - ''' Get data type of Brain_Data.data.''' + """ Get data type of Brain_Data.data.""" return self.data.dtype def astype(self, dtype): - ''' Cast Brain_Data.data as type. + """Cast Brain_Data.data as type. Args: dtype: datatype to convert @@ -1565,14 +1847,14 @@ def astype(self, dtype): Returns: Brain_Data: Brain_Data instance with new datatype - ''' + """ out = self.copy() out.data = out.data.astype(dtype) return out - def standardize(self, axis=0, method='center'): - ''' Standardize Brain_Data() instance. + def standardize(self, axis=0, method="center"): + """Standardize Brain_Data() instance. Args: axis: 0 for observations 1 for voxels @@ -1581,14 +1863,16 @@ def standardize(self, axis=0, method='center'): Returns: Brain_Data Instance - ''' + """ if axis == 1 and len(self.shape()) == 1: - raise IndexError("Brain_Data is only 3d but standardization was requested over observations") + raise IndexError( + "Brain_Data is only 3d but standardization was requested over observations" + ) out = self.copy() - if method == 'zscore': + if method == "zscore": with_std = True - elif method == 'center': + elif method == "center": with_std = False else: raise ValueError('method must be ["center","zscore"') @@ -1596,17 +1880,17 @@ def standardize(self, axis=0, method='center'): return out def groupby(self, mask): - '''Create groupby instance''' + """Create groupby instance""" return Groupby(self, mask) def aggregate(self, mask, func): - '''Create new Brain_Data instance that aggregages func over mask''' + """Create new Brain_Data instance that aggregages func over mask""" dat = self.groupby(mask) values = dat.apply(func) return dat.combine(values) def threshold(self, upper=None, lower=None, binarize=False, coerce_nan=True): - '''Threshold Brain_Data instance. Provide upper and lower values or + """Threshold Brain_Data instance. Provide upper and lower values or percentages to perform two-sided thresholding. Binarize will return a mask image respecting thresholds if provided, otherwise respecting every non-zero value. @@ -1626,17 +1910,17 @@ def threshold(self, upper=None, lower=None, binarize=False, coerce_nan=True): Returns: Thresholded Brain_Data object. - ''' + """ b = self.copy() if coerce_nan: b.data = np.nan_to_num(b.data) - if isinstance(upper, six.string_types) and upper[-1] == '%': + if isinstance(upper, six.string_types) and upper[-1] == "%": upper = np.percentile(b.data, float(upper[:-1])) - if isinstance(lower, six.string_types) and lower[-1] == '%': + if isinstance(lower, six.string_types) and lower[-1] == "%": lower = np.percentile(b.data, float(lower[:-1])) if upper and lower: @@ -1650,9 +1934,14 @@ def threshold(self, upper=None, lower=None, binarize=False, coerce_nan=True): b.data[b.data != 0] = 1 return b - def regions(self, min_region_size=1350, extract_type='local_regions', - smoothing_fwhm=6, is_mask=False): - ''' Extract brain connected regions into separate regions. + def regions( + self, + min_region_size=1350, + extract_type="local_regions", + smoothing_fwhm=6, + is_mask=False, + ): + """Extract brain connected regions into separate regions. Args: min_region_size (int): Minimum volume in mm3 for a region to be @@ -1677,34 +1966,42 @@ def regions(self, min_region_size=1350, extract_type='local_regions', Returns: Brain_Data: Brain_Data instance with extracted ROIs as data. - ''' + """ if is_mask: regions, _ = connected_label_regions(self.to_nifti()) else: - regions, _ = connected_regions(self.to_nifti(), - min_region_size, extract_type, - smoothing_fwhm) + regions, _ = connected_regions( + self.to_nifti(), min_region_size, extract_type, smoothing_fwhm + ) return Brain_Data(regions, mask=self.mask) def transform_pairwise(self): - ''' Extract brain connected regions into separate regions. + """Extract brain connected regions into separate regions. Args: Returns: Brain_Data: Brain_Data instance tranformed into pairwise comparisons - ''' + """ out = self.copy() out.data, new_Y = transform_pairwise(self.data, self.Y) out.Y = pd.DataFrame(new_Y) out.Y.replace(-1, 0, inplace=True) return out - def bootstrap(self, function, n_samples=5000, save_weights=False, - n_jobs=-1, random_state=None, *args, **kwargs): - '''Bootstrap a Brain_Data method. + def bootstrap( + self, + function, + n_samples=5000, + save_weights=False, + n_jobs=-1, + random_state=None, + *args, + **kwargs + ): + """Bootstrap a Brain_Data method. Example Useage: b = dat.bootstrap('mean', n_samples=5000) @@ -1720,24 +2017,27 @@ def bootstrap(self, function, n_samples=5000, save_weights=False, -1 means all CPUs.Returns: output: summarized studentized bootstrap output - ''' + """ random_state = check_random_state(random_state) seeds = random_state.randint(MAX_INT, size=n_samples) bootstrapped = Parallel(n_jobs=n_jobs)( - delayed(_bootstrap_apply_func)(self, - function, random_state=seeds[i], *args, **kwargs) - for i in range(n_samples)) + delayed(_bootstrap_apply_func)( + self, function, random_state=seeds[i], *args, **kwargs + ) + for i in range(n_samples) + ) - if function == 'predict': - bootstrapped = [x['weight_map'] for x in bootstrapped] + if function == "predict": + bootstrapped = [x["weight_map"] for x in bootstrapped] bootstrapped = Brain_Data(bootstrapped, mask=self.mask) return summarize_bootstrap(bootstrapped, save_weights=save_weights) - def decompose(self, algorithm='pca', axis='voxels', n_components=None, - *args, **kwargs): - ''' Decompose Brain_Data object + def decompose( + self, algorithm="pca", axis="voxels", n_components=None, *args, **kwargs + ): + """Decompose Brain_Data object Args: algorithm: (str) Algorithm to perform decomposition @@ -1747,38 +2047,39 @@ def decompose(self, algorithm='pca', axis='voxels', n_components=None, as many as possible. Returns: output: a dictionary of decomposition parameters - ''' + """ out = { - 'decomposition_object': set_decomposition_algorithm( + "decomposition_object": set_decomposition_algorithm( *args, algorithm=algorithm, n_components=n_components, **kwargs ) } - if axis == 'images': - out['decomposition_object'].fit(self.data.T) - out['components'] = self.empty() - out['components'].data = out['decomposition_object'].transform( - self.data.T).T - out['weights'] = out['decomposition_object'].components_.T - elif axis == 'voxels': - out['decomposition_object'].fit(self.data) - out['weights'] = out['decomposition_object'].transform(self.data) - out['components'] = self.empty() - out['components'].data = out['decomposition_object'].components_ + if axis == "images": + out["decomposition_object"].fit(self.data.T) + out["components"] = self.empty() + out["components"].data = ( + out["decomposition_object"].transform(self.data.T).T + ) + out["weights"] = out["decomposition_object"].components_.T + elif axis == "voxels": + out["decomposition_object"].fit(self.data) + out["weights"] = out["decomposition_object"].transform(self.data) + out["components"] = self.empty() + out["components"].data = out["decomposition_object"].components_ return out - def align(self, target, method='procrustes', axis=0, *args, **kwargs): - ''' Align Brain_Data instance to target object using functional alignment + def align(self, target, method="procrustes", axis=0, *args, **kwargs): + """Align Brain_Data instance to target object using functional alignment Alignment type can be hyperalignment or Shared Response Model. When using hyperalignment, `target` image can be another subject or an already estimated common model. When using SRM, `target` must be a previously estimated common model stored as a numpy array. Transformed data can be back projected to original data using Tranformation matrix. - + See nltools.stats.align for aligning multiple Brain_Data instances - + Examples: Hyperalign using procrustes transform: out = data.align(target, method='procrustes') @@ -1799,18 +2100,20 @@ def align(self, target, method='procrustes', axis=0, *args, **kwargs): out: (dict) a dictionary containing transformed object, transformation matrix, and the shared response matrix - ''' + """ + + if method not in ["probabilistic_srm", "deterministic_srm", "procrustes"]: + raise ValueError( + "Method must be ['probabilistic_srm','deterministic_srm','procrustes']" + ) - if method not in ['probabilistic_srm', 'deterministic_srm', 'procrustes']: - raise ValueError("Method must be ['probabilistic_srm','deterministic_srm','procrustes']") - source = self.copy() data1 = self.data.copy() - if method == 'procrustes': + if method == "procrustes": target = check_brain_data(target) data2 = target.data.copy() - + # pad columns if different shapes sizes_1 = [x.shape[1] for x in [data1, data2]] C = max(sizes_1) @@ -1826,104 +2129,113 @@ def align(self, target, method='procrustes', axis=0, *args, **kwargs): data2 = data2.T out = {} - if method in ['deterministic_srm', 'probabilistic_srm']: + if method in ["deterministic_srm", "probabilistic_srm"]: if not isinstance(target, np.ndarray): - raise ValueError("Common Model must be a numpy array for ['deterministic_srm', 'probabilistic_srm']") + raise ValueError( + "Common Model must be a numpy array for ['deterministic_srm', 'probabilistic_srm']" + ) if data2.shape[0] != data1.shape[0]: - raise ValueError("The number of timepoints(TRs) does not match the model.") + raise ValueError( + "The number of timepoints(TRs) does not match the model." + ) A = data1.T.dot(data2) # # Solve the Procrustes problem U, _, V = np.linalg.svd(A, full_matrices=False) - out['transformation_matrix'] = source - out['transformation_matrix'].data = U.dot(V).T + out["transformation_matrix"] = source + out["transformation_matrix"].data = U.dot(V).T - out['transformed'] = data1.dot(out['transformation_matrix'].data.T) - out['common_model'] = target - elif method == 'procrustes': - _, transformed, out['disparity'], tf_mtx, out['scale'] = procrustes(data2, data1) + out["transformed"] = data1.dot(out["transformation_matrix"].data.T) + out["common_model"] = target + elif method == "procrustes": + _, transformed, out["disparity"], tf_mtx, out["scale"] = procrustes( + data2, data1 + ) source.data = transformed - out['transformed'] = source - out['common_model'] = target - out['transformation_matrix'] = source.copy() - out['transformation_matrix'].data = tf_mtx + out["transformed"] = source + out["common_model"] = target + out["transformation_matrix"] = source.copy() + out["transformation_matrix"].data = tf_mtx if axis == 1: - if method == 'procrustes': - out['transformed'].data = out['transformed'].data.T + if method == "procrustes": + out["transformed"].data = out["transformed"].data.T else: - out['transformed'] = out['transformed'].T + out["transformed"] = out["transformed"].T return out def smooth(self, fwhm): - '''Apply spatial smoothing using nilearn smooth_img() + """Apply spatial smoothing using nilearn smooth_img() - Args: - fwhm: (float) full width half maximum of gaussian spatial filter - Returns: - Brain_Data instance - ''' + Args: + fwhm: (float) full width half maximum of gaussian spatial filter + Returns: + Brain_Data instance + """ out = self.copy() out.data = out.nifti_masker.fit_transform(smooth_img(self.to_nifti(), fwhm)) - + if 1 in out.data.shape: out.data = out.data.squeeze() return out def find_spikes(self, global_spike_cutoff=3, diff_spike_cutoff=3): - '''Function to identify spikes from Time Series Data + """Function to identify spikes from Time Series Data - Args: - global_spike_cutoff: (int,None) cutoff to identify spikes in global signal - in standard deviations, None indicates do not calculate. - diff_spike_cutoff: (int,None) cutoff to identify spikes in average frame difference - in standard deviations, None indicates do not calculate. - Returns: - pandas dataframe with spikes as indicator variables - ''' - return find_spikes(self, - global_spike_cutoff=global_spike_cutoff, - diff_spike_cutoff=diff_spike_cutoff) - - def temporal_resample(self, sampling_freq=None, target=None, target_type='hz'): - ''' Resample Brain_Data timeseries to a new target frequency or number of samples + Args: + global_spike_cutoff: (int,None) cutoff to identify spikes in global signal + in standard deviations, None indicates do not calculate. + diff_spike_cutoff: (int,None) cutoff to identify spikes in average frame difference + in standard deviations, None indicates do not calculate. + Returns: + pandas dataframe with spikes as indicator variables + """ + return find_spikes( + self, + global_spike_cutoff=global_spike_cutoff, + diff_spike_cutoff=diff_spike_cutoff, + ) + + def temporal_resample(self, sampling_freq=None, target=None, target_type="hz"): + """Resample Brain_Data timeseries to a new target frequency or number of samples using Piecewise Cubic Hermite Interpolating Polynomial (PCHIP) interpolation. This function can up- or down-sample data. - + Note: this function can use quite a bit of RAM. Args: sampling_freq: (float) sampling frequency of data in hertz target: (float) upsampling target target_type: (str) type of target can be [samples,seconds,hz] - + Returns: upsampled Brain_Data instance - ''' - + """ + out = self.copy() - if target_type == 'samples': + if target_type == "samples": n_samples = target - elif target_type == 'seconds': - n_samples = target*sampling_freq - elif target_type == 'hz': - n_samples = float(sampling_freq)/float(target) + elif target_type == "seconds": + n_samples = target * sampling_freq + elif target_type == "hz": + n_samples = float(sampling_freq) / float(target) else: raise ValueError('Make sure target_type is "samples", "seconds", or "hz".') - + orig_spacing = np.arange(0, self.shape()[0], 1) new_spacing = np.arange(0, self.shape()[0], n_samples) - + out.data = np.zeros([len(new_spacing), self.shape()[1]]) for i in range(self.shape()[1]): interpolate = pchip(orig_spacing, self.data[:, i]) out.data[:, i] = interpolate(new_spacing) return out + class Groupby(object): def __init__(self, data, mask): @@ -1935,17 +2247,17 @@ def __init__(self, data, mask): if len(np.unique(mask.data)) > 2: mask = expand_mask(mask) else: - raise ValueError('mask does not have enough groups.') + raise ValueError("mask does not have enough groups.") self.mask = mask self.split(data, mask) def __repr__(self): - return '%s.%s(len=%s)' % ( + return "%s.%s(len=%s)" % ( self.__class__.__module__, self.__class__.__name__, len(self), - ) + ) def __len__(self): return len(self.data) @@ -1958,36 +2270,37 @@ def __getitem__(self, index): if isinstance(index, int): return self.data[index] else: - raise ValueError('Groupby currently only supports integer indexing') + raise ValueError("Groupby currently only supports integer indexing") def split(self, data, mask): - '''Split Brain_Data instance into separate masks and store as a - dictionary. - ''' + """Split Brain_Data instance into separate masks and store as a + dictionary. + """ self.data = {} for i, m in enumerate(mask): self.data[i] = data.apply_mask(m) def apply(self, method): - '''Apply Brain_Data instance methods to each element of Groupby - object. - ''' + """Apply Brain_Data instance methods to each element of Groupby + object. + """ return dict([(i, getattr(x, method)()) for i, x in self]) def combine(self, value_dict): - '''Combine value dictionary back into masks''' + """Combine value dictionary back into masks""" out = self.mask.copy().astype(float) for i in iter(value_dict.keys()): if isinstance(value_dict[i], Brain_Data): if value_dict[i].shape()[0] == np.sum(self.mask[i].data): out.data[i, out.data[i, :] == 1] = value_dict[i].data else: - raise ValueError('Brain_Data instances are different ' - 'shapes.') + raise ValueError("Brain_Data instances are different " "shapes.") elif isinstance(value_dict[i], (float, int, bool, np.number)): - out.data[i, :] = out.data[i, :]*value_dict[i] + out.data[i, :] = out.data[i, :] * value_dict[i] else: - raise ValueError('No method for aggregation implented for %s ' - 'yet.' % type(value_dict[i])) + raise ValueError( + "No method for aggregation implented for %s " + "yet." % type(value_dict[i]) + ) return out.sum() diff --git a/nltools/data/design_matrix.py b/nltools/data/design_matrix.py index 0458b223..a9ecf5d0 100644 --- a/nltools/data/design_matrix.py +++ b/nltools/data/design_matrix.py @@ -1,12 +1,12 @@ from __future__ import division -''' +""" NeuroLearn Design Matrix ======================== Class for working with design matrices. -''' +""" __author__ = ["Eshin Jolly"] __license__ = "MIT" @@ -20,11 +20,7 @@ from scipy.special import legendre import six from ..external.hrf import glover_hrf -from nltools.stats import (downsample, - upsample, - zscore, - make_cosine_basis - ) +from nltools.stats import downsample, upsample, zscore, make_cosine_basis from nltools.utils import AmbiguityError @@ -57,13 +53,13 @@ class Design_Matrix(DataFrame): """ - _metadata = ['sampling_freq', 'convolved', 'polys', 'multi'] + _metadata = ["sampling_freq", "convolved", "polys", "multi"] def __init__(self, *args, **kwargs): - sampling_freq = kwargs.pop('sampling_freq', None) - convolved = kwargs.pop('convolved', []) - polys = kwargs.pop('polys', []) + sampling_freq = kwargs.pop("sampling_freq", None) + convolved = kwargs.pop("convolved", []) + polys = kwargs.pop("polys", []) self.sampling_freq = sampling_freq self.convolved = convolved self.polys = polys @@ -82,13 +78,9 @@ def _constructor(self): def _constructor_sliced(self): return Design_Matrix_Series - def _inherit_attributes(self, - dm_out, - atts=[ - 'sampling_freq', - 'convolved', - 'polys', - 'multi']): + def _inherit_attributes( + self, dm_out, atts=["sampling_freq", "convolved", "polys", "multi"] + ): """ This is helper function that simply ensures that attributes are copied over from the current Design_Matrix to a new Design_Matrix. @@ -109,25 +101,33 @@ def _sort_cols(self): """ This is a helper function that tries to ensure that columns of a Design Matrix are sorted according to: a) those not separated during append operations, b) those separated during append operations, c) polynomials. Called primarily during vertical concatentation and cleaning. """ - data_cols = [elem for elem in self.columns if not elem.split('_')[0].isdigit() and elem not in self.polys] - separated_cols = [elem for elem in self.columns if elem.split('_')[0].isdigit() and elem not in self.polys] + data_cols = [ + elem + for elem in self.columns + if not elem.split("_")[0].isdigit() and elem not in self.polys + ] + separated_cols = [ + elem + for elem in self.columns + if elem.split("_")[0].isdigit() and elem not in self.polys + ] return self[data_cols + separated_cols + self.polys] def details(self): - """Print class meta data. - - """ - return '%s.%s(sampling_freq=%s (hz), shape=%s, multi=%s, convolved=%s, polynomials=%s)' % ( - self.__class__.__module__, + return ( + "%s.%s(sampling_freq=%s (hz), shape=%s, multi=%s, convolved=%s, polynomials=%s)" + % self.__class__.__module__, self.__class__.__name__, self.sampling_freq, self.shape, self.multi, self.convolved, - self.polys - ) + self.polys, + ) - def append(self, dm, axis=0, keep_separate=True, unique_cols=None, fill_na=0, verbose=False): + def append( + self, dm, axis=0, keep_separate=True, unique_cols=None, fill_na=0, verbose=False + ): """Method for concatenating another design matrix row or column-wise. When concatenating row-wise, has the ability to keep certain columns separated if they exist in multiple design matrices (e.g. keeping separate intercepts for multiple runs). This is on by default and will automatically separate out polynomial columns (i.e. anything added with the `add_poly` or `add_dct_basis` methods). Additional columns can be separate by run using the `unique_cols` parameter. Can also add new polynomial terms during vertical concatentation (when axis == 0). This will by default create new polynomial terms separately for each design matrix Args: @@ -148,10 +148,18 @@ def append(self, dm, axis=0, keep_separate=True, unique_cols=None, fill_na=0, ve if not all(isinstance(elem, self.__class__) for elem in to_append): raise TypeError("Each object to be appended must be a Design_Matrix!") if not all(elem.sampling_freq == self.sampling_freq for elem in to_append): - raise ValueError("All Design Matrices must have the same sampling frequency!") + raise ValueError( + "All Design Matrices must have the same sampling frequency!" + ) if axis == 0: - return self._vertcat(to_append, keep_separate=keep_separate, unique_cols=unique_cols, fill_na=fill_na, verbose=verbose) + return self._vertcat( + to_append, + keep_separate=keep_separate, + unique_cols=unique_cols, + fill_na=fill_na, + verbose=verbose, + ) elif axis == 1: if any( @@ -165,7 +173,7 @@ def append(self, dm, axis=0, keep_separate=True, unique_cols=None, fill_na=0, ve def _horzcat(self, to_append, fill_na): """Used by .append(). Append another design matrix, column-wise - (horz cat). Always returns a new design_matrix. + (horz cat). Always returns a new design_matrix. """ @@ -182,8 +190,9 @@ def _horzcat(self, to_append, fill_na): return out def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose): + """Used by .append(). Append another design matrix row-wise (vert cat). - Always returns a new design matrix. + Always returns a new design matrix. """ @@ -199,30 +208,36 @@ def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose): if unique_cols is not None: if not keep_separate: - raise ValueError("unique_cols provided but keep_separate set to False. Set keep_separate to True to separate unique_cols") + raise ValueError( + "unique_cols provided but keep_separate set to False. Set keep_separate to True to separate unique_cols" + ) # 1) Make sure unique_cols are in original Design Matrix if not self.empty: to_rename = {} unique_count = [] for u in unique_cols: - if u.endswith('*'): - searchstr = u.split('*')[0] - elif u.startswith('*'): - searchstr = u.split('*')[1] + if u.endswith("*"): + searchstr = u.split("*")[0] + elif u.startswith("*"): + searchstr = u.split("*")[1] else: searchstr = u if not any([searchstr in elem for elem in self.columns]): - raise ValueError("'{}' not present in any column name of original Design Matrix".format(searchstr)) - # 2) Prepend them with a 0_ if this dm has never been appended to be for otherwise grab their current prepended index are and start a unique_cols counter + raise ValueError( + "'{}' not present in any column name of original Design Matrix".format( + searchstr + ) + ) + # 2) Prepend them with a 0_ if this dm has never been appended to be for otherwise grab their current prepended index are and start a unique_cols counter else: for c in self.columns: if searchstr in c: if self.multi and c[0].isdigit(): - count = c.split('_')[0] + count = c.split("_")[0] unique_count.append(int(count)) else: - new_name = '0_' + c + new_name = "0_" + c all_separated.append(new_name) to_rename[c] = new_name all_separated.append(new_name) @@ -246,7 +261,9 @@ def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose): # Self no polys; append has polys. if any([len(elem.polys) for elem in to_append]): if verbose: - print("Keep separate requested but original Design Matrix has no polynomial terms but matrices to be appended do. Inherting appended Design Matrices' polynomials...") + print( + "Keep separate requested but original Design Matrix has no polynomial terms but matrices to be appended do. Inherting appended Design Matrices' polynomials..." + ) for i, dm in enumerate(to_append): for p in dm.polys: all_polys.append(p) @@ -254,7 +271,11 @@ def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose): # Handle renaming additional unique cols to keep separate if cols_to_separate: if verbose: - print("Unique cols requested. Trying to keep {} separated".format(cols_to_separate)) + print( + "Unique cols requested. Trying to keep {} separated".format( + cols_to_separate + ) + ) to_rename = {} data_cols = dm.drop(dm.polys, axis=1).columns print(data_cols) @@ -262,13 +283,15 @@ def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose): for c in data_cols: if u in c: if dm.multi: - count = int(c.split('_')[0]) - name = '_'.join(c.split('_')[1:]) + count = int(c.split("_")[0]) + name = "_".join(c.split("_")[1:]) count += max_unique_count + 1 - new_name = str(count) + '_' + name + new_name = str(count) + "_" + name to_rename[c] = new_name else: - new_name = str(max_unique_count + 1) + '_' + c + new_name = ( + str(max_unique_count + 1) + "_" + c + ) to_rename[c] = new_name all_separated.append(new_name) modify_to_append.append(dm.rename(columns=to_rename)) @@ -278,25 +301,33 @@ def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose): else: # Self no polys; append no polys if verbose: - print("Keep separate requested but neither original Design Matrix nor matrices to be appended have any polynomial terms Ignoring...") + print( + "Keep separate requested but neither original Design Matrix nor matrices to be appended have any polynomial terms Ignoring..." + ) # Handle renaming additional unique cols to keep separate for i, dm in enumerate(to_append): if cols_to_separate: if verbose: - print("Unique cols requested. Trying to keep {} separated".format(cols_to_separate)) + print( + "Unique cols requested. Trying to keep {} separated".format( + cols_to_separate + ) + ) to_rename = {} data_cols = dm.drop(dm.polys, axis=1).columns for u in cols_to_separate: for c in data_cols: if u in c: if dm.multi: - count = int(c.split('_')[0]) - name = '_'.join(c.split('_')[1:]) + count = int(c.split("_")[0]) + name = "_".join(c.split("_")[1:]) count += max_unique_count + 1 - new_name = str(count) + '_' + name + new_name = str(count) + "_" + name to_rename[c] = new_name else: - new_name = str(max_unique_count + 1) + '_' + c + new_name = ( + str(max_unique_count + 1) + "_" + c + ) to_rename[c] = new_name all_separated.append(new_name) modify_to_append.append(dm.rename(columns=to_rename)) @@ -307,15 +338,17 @@ def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose): # Self has polys; append has polys if any([len(elem.polys) for elem in to_append]): if verbose: - print("Keep separate requested and both original Design Matrix and matrices to be appended have polynomial terms. Separating...") + print( + "Keep separate requested and both original Design Matrix and matrices to be appended have polynomial terms. Separating..." + ) # Get the unique polynomials that currently exist # [name, count/None, isRoot] current_polys = [] for p in self.polys: - if p.count('_') == 2: + if p.count("_") == 2: isRoot = False - pSplit = p.split('_') - pName = '_'.join(pSplit[1:]) + pSplit = p.split("_") + pName = "_".join(pSplit[1:]) pCount = int(pSplit[0]) else: isRoot = True @@ -330,7 +363,9 @@ def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose): if any(current_polys[:, 2]): renamed_polys = {} for i in range(current_polys.shape[0]): - renamed_polys[current_polys[i, 0]] = str(current_polys[i, 1]) + '_' + current_polys[i, 0] + renamed_polys[current_polys[i, 0]] = ( + str(current_polys[i, 1]) + "_" + current_polys[i, 0] + ) orig = orig.rename(columns=renamed_polys) all_polys += list(renamed_polys.values()) else: @@ -341,14 +376,14 @@ def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose): for i, dm in enumerate(to_append): to_rename = {} for p in dm.polys: - if p.count('_') == 2: - pSplit = p.split('_') - pName = '_'.join(pSplit[1:]) + if p.count("_") == 2: + pSplit = p.split("_") + pName = "_".join(pSplit[1:]) pCount = int(pSplit[0]) + current_poly_max + 1 else: pName = p pCount = current_poly_max + 1 - to_rename[p] = str(pCount) + '_' + pName + to_rename[p] = str(pCount) + "_" + pName temp_dm = dm.rename(columns=to_rename) current_poly_max += 1 all_polys += list(to_rename.values()) @@ -356,20 +391,26 @@ def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose): # Handle renaming additional unique cols to keep separate if cols_to_separate: if verbose: - print("Unique cols requested. Trying to keep {} separated".format(cols_to_separate)) + print( + "Unique cols requested. Trying to keep {} separated".format( + cols_to_separate + ) + ) to_rename = {} data_cols = dm.drop(dm.polys, axis=1).columns for u in cols_to_separate: for c in data_cols: if u in c: if dm.multi: - count = int(c.split('_')[0]) - name = '_'.join(c.split('_')[1:]) + count = int(c.split("_")[0]) + name = "_".join(c.split("_")[1:]) count += max_unique_count + 1 - new_name = str(count) + '_' + name + new_name = str(count) + "_" + name to_rename[c] = new_name else: - new_name = str(max_unique_count + 1) + '_' + c + new_name = ( + str(max_unique_count + 1) + "_" + c + ) to_rename[c] = new_name all_separated.append(new_name) @@ -381,13 +422,19 @@ def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose): else: # Self has polys; append no polys if verbose: - print("Keep separate requested but only original Design Matrix has polynomial terms. Retaining original Design Matrix's polynomials only...") + print( + "Keep separate requested but only original Design Matrix has polynomial terms. Retaining original Design Matrix's polynomials only..." + ) all_polys += self.polys # Handle renaming additional unique cols to keep separate if cols_to_separate: if verbose: - print("Unique cols requested. Trying to keep {} separated".format(cols_to_separate)) + print( + "Unique cols requested. Trying to keep {} separated".format( + cols_to_separate + ) + ) for i, dm in enumerate(to_append): to_rename = {} data_cols = dm.drop(dm.polys, axis=1).columns @@ -395,13 +442,15 @@ def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose): for c in data_cols: if u in c: if dm.multi: - count = int(c.split('_')[0]) - name = '_'.join(c.split('_')[1:]) + count = int(c.split("_")[0]) + name = "_".join(c.split("_")[1:]) count += max_unique_count + 1 - new_name = str(count) + '_' + name + new_name = str(count) + "_" + name to_rename[c] = new_name else: - new_name = str(max_unique_count + 1) + '_' + c + new_name = ( + str(max_unique_count + 1) + "_" + c + ) to_rename[c] = new_name all_separated.append(new_name) modify_to_append.append(dm.rename(to_rename)) @@ -426,14 +475,12 @@ def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose): return out._sort_cols() def vif(self, exclude_polys=True): - """Compute variance inflation factor amongst columns of design matrix, - ignoring polynomial terms. Much faster that statsmodels and more - reliable too. Uses the same method as Matlab and R (diagonal - elements of the inverted correlation matrix). + """ + Compute variance inflation factor amongst columns of design matrix,ignoring polynomial terms. Much faster that statsmodels and more reliable too. Uses the same method as Matlab and R (diagonal elements of the inverted correlation matrix). Returns: vifs (list): list with length == number of columns - intercept - exclude_polys (bool): whether to skip checking of polynomial terms (i.e. intercept, trends, basis functions); default True + exclude_polys (bool): whether to skip checking of polynomial terms (i.e intercept, trends, basis functions); default True """ if self.shape[1] <= 1: @@ -442,20 +489,22 @@ def vif(self, exclude_polys=True): out = self.drop(self.polys, axis=1) else: # Always drop intercept before computing VIF - intercepts = [elem for elem in self.columns if 'poly_0' in str(elem)] + intercepts = [elem for elem in self.columns if "poly_0" in str(elem)] out = self.drop(intercepts, axis=1) try: return np.diag(np.linalg.inv(out.corr()), 0) except np.linalg.LinAlgError: - print("ERROR: Cannot compute vifs! Design Matrix is singular because it has some perfectly correlated or duplicated columns. Using .clean() method may help.") + print( + "ERROR: Cannot compute vifs! Design Matrix is singular because it has some perfectly correlated or duplicated columns. Using .clean() method may help." + ) def heatmap(self, figsize=(8, 6), **kwargs): """Visualize Design Matrix spm style. Use .plot() for typical pandas - plotting functionality. Can pass optional keyword args to seaborn - heatmap. + plotting functionality. Can pass optional keyword args to seaborn + heatmap. """ - cmap = kwargs.pop('cmap', 'gray') + cmap = kwargs.pop("cmap", "gray") fig, ax = plt.subplots(1, figsize=figsize) ax = sns.heatmap(self, cmap=cmap, cbar=False, ax=ax, **kwargs) for _, spine in ax.spines.items(): @@ -467,11 +516,11 @@ def heatmap(self, figsize=(8, 6), **kwargs): label.set_visible(False) ax.axhline(linewidth=4, color="k") ax.axvline(linewidth=4, color="k") - ax.axhline(y=self.shape[0], color='k', linewidth=4) - ax.axvline(x=self.shape[1], color='k', linewidth=4) + ax.axhline(y=self.shape[0], color="k", linewidth=4) + ax.axvline(x=self.shape[1], color="k", linewidth=4) plt.yticks(rotation=0) - def convolve(self, conv_func='hrf', columns=None): + def convolve(self, conv_func="hrf", columns=None): """Perform convolution using an arbitrary function. Args: @@ -491,23 +540,31 @@ def convolve(self, conv_func='hrf', columns=None): if len(conv_func.shape) > 2: raise ValueError("2d conv_func must be formatted as samplex X kernals!") elif isinstance(conv_func, six.string_types): - if conv_func != 'hrf': - raise ValueError("Did you mean 'hrf'? 'hrf' can generate a kernel for you, otherwise custom kernels should be passed in as 1d or 2d arrays.") - conv_func = glover_hrf(1. / self.sampling_freq, oversampling=1.) + if conv_func != "hrf": + raise ValueError( + "Did you mean 'hrf'? 'hrf' can generate a kernel for you, otherwise custom kernels should be passed in as 1d or 2d arrays." + ) + conv_func = glover_hrf(1.0 / self.sampling_freq, oversampling=1.0) else: - raise TypeError("conv_func must be a 1d or 2d numpy array organized as samples x kernels, or the string 'hrf' for the canonical glover hrf") + raise TypeError( + "conv_func must be a 1d or 2d numpy array organized as samples x kernels, or the string 'hrf' for the canonical glover hrf" + ) if len(conv_func.shape) > 1: conv_mats = [] for i in range(conv_func.shape[1]): - c = self[columns].apply(lambda x: np.convolve(x, conv_func[:, i])[:self.shape[0]]) - c.columns = [str(col)+'_c'+str(i) for col in c.columns] + c = self[columns].apply( + lambda x: np.convolve(x, conv_func[:, i])[: self.shape[0]] + ) + c.columns = [str(col) + "_c" + str(i) for col in c.columns] conv_mats.append(c) out = pd.concat(conv_mats + [self[nonConvolved]], axis=1) else: - c = self[columns].apply(lambda x: np.convolve(x, conv_func)[:self.shape[0]]) - c.columns = [str(col)+'_c0' for col in c.columns] + c = self[columns].apply( + lambda x: np.convolve(x, conv_func)[: self.shape[0]] + ) + c.columns = [str(col) + "_c0" for col in c.columns] out = pd.concat([c, self[nonConvolved]], axis=1) out = self._inherit_attributes(out) @@ -527,7 +584,15 @@ def downsample(self, target, **kwargs): if target > self.sampling_freq: raise ValueError("Target must be longer than current sampling rate") - df = Design_Matrix(downsample(self, sampling_freq=self.sampling_freq, target=target, target_type='hz', **kwargs)) + df = Design_Matrix( + downsample( + self, + sampling_freq=self.sampling_freq, + target=target, + target_type="hz", + **kwargs + ) + ) # convert df to a design matrix newMat = self._inherit_attributes(df) @@ -547,7 +612,15 @@ def upsample(self, target, **kwargs): if target < self.sampling_freq: raise ValueError("Target must be shorter than current sampling rate") - df = Design_Matrix(upsample(self, sampling_freq=self.sampling_freq, target=target, target_type='hz', **kwargs)) + df = Design_Matrix( + upsample( + self, + sampling_freq=self.sampling_freq, + target=target, + target_type="hz", + **kwargs + ) + ) # convert df to a design matrix newMat = self._inherit_attributes(df) @@ -584,25 +657,35 @@ def add_poly(self, order=0, include_lower=True): if order < 0: raise ValueError("Order must be 0 or greater") - if self.polys and any(elem.count('_') == 2 for elem in self.polys): - raise AmbiguityError("It appears that this Design Matrix contains polynomial terms that were kept seperate from a previous append operation. This makes it ambiguous for adding polynomials terms. Try calling .add_poly() on each separate Design Matrix before appending them instead.") + if self.polys and any(elem.count("_") == 2 for elem in self.polys): + raise AmbiguityError( + "It appears that this Design Matrix contains polynomial terms that were kept seperate from a previous append operation. This makes it ambiguous for adding polynomials terms. Try calling .add_poly() on each separate Design Matrix before appending them instead." + ) polyDict = {} # Normal/canonical legendre polynomials on the range -1,1 but with size defined by number of observations; keeps all polynomials on similar scales (i.e. big polys don't blow up) and betas are better behaved norm_order = np.linspace(-1, 1, self.shape[0]) - if 'poly_'+str(order) in self.polys: - print("Design Matrix already has {}th order polynomial...skipping".format(order)) + if "poly_" + str(order) in self.polys: + print( + "Design Matrix already has {}th order polynomial...skipping".format( + order + ) + ) return self if include_lower: - for i in range(order+1): - if 'poly_'+str(i) in self.polys: - print("Design Matrix already has {}th order polynomial...skipping".format(i)) + for i in range(order + 1): + if "poly_" + str(i) in self.polys: + print( + "Design Matrix already has {}th order polynomial...skipping".format( + i + ) + ) else: - polyDict['poly_' + str(i)] = legendre(i)(norm_order) + polyDict["poly_" + str(i)] = legendre(i)(norm_order) else: - polyDict['poly_' + str(order)] = legendre(order)(norm_order) + polyDict["poly_" + str(order)] = legendre(order)(norm_order) toAdd = Design_Matrix(polyDict, sampling_freq=self.sampling_freq) out = self.append(toAdd, axis=1) @@ -627,16 +710,25 @@ def add_dct_basis(self, duration=180, drop=0): raise ValueError("Design_Matrix has no sampling_freq set!") if self.polys and any( - elem.count('_') == 2 and 'cosine' in elem for elem in self.polys + elem.count("_") == 2 and "cosine" in elem for elem in self.polys ): - raise AmbiguityError("It appears that this Design Matrix contains cosine bases that were kept seperate from a previous append operation. This makes it ambiguous for adding polynomials terms. Try calling .add_dct_basis() on each separate Design Matrix before appending them instead.") + raise AmbiguityError( + "It appears that this Design Matrix contains cosine bases that were kept seperate from a previous append operation. This makes it ambiguous for adding polynomials terms. Try calling .add_dct_basis() on each separate Design Matrix before appending them instead." + ) - basis_mat = make_cosine_basis(self.shape[0], 1./self.sampling_freq, duration, drop=drop) + basis_mat = make_cosine_basis( + self.shape[0], 1.0 / self.sampling_freq, duration, drop=drop + ) - basis_frame = Design_Matrix(basis_mat, - sampling_freq=self.sampling_freq, columns=[str(elem) for elem in range(basis_mat.shape[1])]) + basis_frame = Design_Matrix( + basis_mat, + sampling_freq=self.sampling_freq, + columns=[str(elem) for elem in range(basis_mat.shape[1])], + ) - basis_frame.columns = ['cosine_'+str(i+1) for i in range(basis_frame.shape[1])] + basis_frame.columns = [ + "cosine_" + str(i + 1) for i in range(basis_frame.shape[1]) + ] if self.polys: # Only add those we don't already have @@ -663,7 +755,11 @@ def replace_data(self, data, column_names=None): """ - if isinstance(data, np.ndarray) or isinstance(data, pd.DataFrame) or isinstance(data, dict): + if ( + isinstance(data, np.ndarray) + or isinstance(data, pd.DataFrame) + or isinstance(data, dict) + ): if data.shape[0] == self.shape[0]: out = Design_Matrix(data, columns=column_names) polys = self[self.polys] @@ -673,9 +769,11 @@ def replace_data(self, data, column_names=None): else: raise ValueError("New data cannot change the number of rows") else: - raise TypeError("New data must be numpy array, pandas DataFrame or python dictionary type") + raise TypeError( + "New data must be numpy array, pandas DataFrame or python dictionary type" + ) - def clean(self, fill_na=0, exclude_polys=False, thresh=.95, verbose=True): + def clean(self, fill_na=0, exclude_polys=False, thresh=0.95, verbose=True): """ Method to fill NaNs in Design Matrix and remove duplicate columns based on data values, NOT names. Columns are dropped if they are correlated >= the requested threshold (default = .95). In this case, only the first instance of that column will be retained and all others will be dropped. @@ -688,7 +786,7 @@ def clean(self, fill_na=0, exclude_polys=False, thresh=.95, verbose=True): """ # Temporarily turn off warnings for correlations - old_settings = np.seterr(all='ignore') + old_settings = np.seterr(all="ignore") if fill_na is not None: out = self.fillna(fill_na) @@ -704,7 +802,11 @@ def clean(self, fill_na=0, exclude_polys=False, thresh=.95, verbose=True): r = np.abs(pearsonr(c, c2)[0]) if (r >= thresh) and (j not in keep) and (j not in remove): if verbose: - print("{} and {} correlated at {} which is >= threshold of {}. Dropping {}".format(i, j, np.round(r, 2), thresh, j)) + print( + "{} and {} correlated at {} which is >= threshold of {}. Dropping {}".format( + i, j, np.round(r, 2), thresh, j + ) + ) keep.append(i) remove.append(j) if remove: diff --git a/nltools/datasets.py b/nltools/datasets.py index 9049200a..41470dbb 100644 --- a/nltools/datasets.py +++ b/nltools/datasets.py @@ -1,27 +1,28 @@ -''' +""" NeuroLearn datasets =================== functions to help download datasets -''' +""" ## Notes: # Need to figure out how to speed up loading and resampling of data -__all__ = ['download_nifti', - 'get_collection_image_metadata', - 'download_collection', - 'fetch_emotion_ratings', - 'fetch_pain'] +__all__ = [ + "download_nifti", + "get_collection_image_metadata", + "download_collection", + "fetch_emotion_ratings", + "fetch_pain", +] __author__ = ["Luke Chang"] __license__ = "MIT" import os import pandas as pd from nltools.data import Brain_Data -from nilearn.datasets.utils import (_get_dataset_dir, - _fetch_file) +from nilearn.datasets.utils import _get_dataset_dir, _fetch_file from pynv import Client # Optional dependencies @@ -32,125 +33,123 @@ def download_nifti(url, data_dir=None): - ''' Download a image to a nifti file.''' - local_filename = url.split('/')[-1] + """ Download a image to a nifti file.""" + local_filename = url.split("/")[-1] if data_dir is not None: if not os.path.isdir(data_dir): os.makedirs(data_dir) local_filename = os.path.join(data_dir, local_filename) r = requests.get(url, stream=True) - with open(local_filename, 'wb') as f: + with open(local_filename, "wb") as f: for chunk in r.iter_content(chunk_size=1024): if chunk: # filter out keep-alive new chunks f.write(chunk) return local_filename -def get_collection_image_metadata(collection=None, data_dir=None, - limit=10): - ''' Get image metadata associated with collection +def get_collection_image_metadata(collection=None, data_dir=None, limit=10): + """ + Get image metadata associated with collection Args: - collection: (int) collection id - data_dir: (str) data directory - limit: (int) number of images to increment + collection (int, optional): collection id. Defaults to None. + data_dir (str, optional): data directory. Defaults to None. + limit (int, optional): number of images to increment. Defaults to 10. Returns: - metadata: (pd.DataFrame) Dataframe with full image metadata from - collection + pd.DataFrame: Dataframe with full image metadata from collection + """ - ''' - - if os.path.isfile(os.path.join(data_dir, 'metadata.csv')): - dat = pd.read_csv(os.path.join(data_dir, 'metadata.csv')) + if os.path.isfile(os.path.join(data_dir, "metadata.csv")): + dat = pd.read_csv(os.path.join(data_dir, "metadata.csv")) else: offset = 0 api = Client() - i = api.get_collection_images(collection_id=collection, limit=limit, offset=offset) - dat = pd.DataFrame(columns=i['results'][0].keys()) - while int(offset) < int(i['count']): - for x in i['results']: + i = api.get_collection_images( + collection_id=collection, limit=limit, offset=offset + ) + dat = pd.DataFrame(columns=i["results"][0].keys()) + while int(offset) < int(i["count"]): + for x in i["results"]: dat = dat.append(x, ignore_index=True) offset = offset + limit - i = api.get_collection_images(collection_id=collection, limit=limit, offset=offset) - dat.to_csv(os.path.join(data_dir, 'metadata.csv'), index=False) + i = api.get_collection_images( + collection_id=collection, limit=limit, offset=offset + ) + dat.to_csv(os.path.join(data_dir, "metadata.csv"), index=False) return dat -def download_collection(collection=None, data_dir=None, overwrite=False, - resume=True, verbose=1): - ''' Download images and metadata from Neurovault collection +def download_collection( + collection=None, data_dir=None, overwrite=False, resume=True, verbose=1 +): + """ + Download images and metadata from Neurovault collection Args: - collection: (int) collection id - data_dir: (str) data directory + collection (int, optional): collection id. Defaults to None. + data_dir (str, optional): data directory. Defaults to None. + overwrite (bool, optional): overwrite data directory. Defaults to False. + resume (bool, optional): resume download. Defaults to True. + verbose (int, optional): print diagnostic messages. Defaults to 1. Returns: - metadata: (pd.DataFrame) Dataframe with full image metadata from - collection - files: (list) list of files of downloaded collection - - ''' + (pd.DataFrame, list): (DataFrame of image metadata, list of files from downloaded collection) + """ if data_dir is None: - data_dir = _get_dataset_dir(str(collection), data_dir=data_dir, - verbose=verbose) + data_dir = _get_dataset_dir(str(collection), data_dir=data_dir, verbose=verbose) # Get collection Metadata - metadata = get_collection_image_metadata(collection=collection, - data_dir=data_dir) + metadata = get_collection_image_metadata(collection=collection, data_dir=data_dir) # Get images files = [] - for f in metadata['file']: - files.append(_fetch_file(f, data_dir, resume=resume, verbose=verbose, - overwrite=overwrite)) + for f in metadata["file"]: + files.append( + _fetch_file( + f, data_dir, resume=resume, verbose=verbose, overwrite=overwrite + ) + ) return (metadata, files) def fetch_pain(data_dir=None, resume=True, verbose=1): - '''Download and loads pain dataset from neurovault + """Download and loads pain dataset from neurovault Args: - data_dir: (string, optional) Path of the data directory. - Used to force data storage in a specified location. - Default: None + data_dir: (string, optional) Path of the data directory. Used to force data storage in a specified location. Default: None Returns: out: (Brain_Data) Brain_Data object with downloaded data. X=metadata - ''' + """ collection = 504 - dataset_name = 'chang2015_pain' - data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, - verbose=verbose) - metadata, files = download_collection(collection=collection, - data_dir=data_dir, resume=resume, - verbose=verbose) + dataset_name = "chang2015_pain" + data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) + metadata, files = download_collection( + collection=collection, data_dir=data_dir, resume=resume, verbose=verbose + ) return Brain_Data(data=files, X=metadata) def fetch_emotion_ratings(data_dir=None, resume=True, verbose=1): - '''Download and loads emotion rating dataset from neurovault + """Download and loads emotion rating dataset from neurovault Args: - data_dir: (string, optional). Path of the data directory. - Used to force data storage in a specified location. - Default: None + data_dir: (string, optional). Path of the data directory. Used to force data storage in a specified location. Default: None Returns: out: (Brain_Data) Brain_Data object with downloaded data. X=metadata - ''' + """ collection = 1964 - dataset_name = 'chang2015_emotion_ratings' - data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, - verbose=verbose) - metadata, files = download_collection(collection=collection, - data_dir=data_dir, resume=resume, - verbose=verbose) + dataset_name = "chang2015_emotion_ratings" + data_dir = _get_dataset_dir(dataset_name, data_dir=data_dir, verbose=verbose) + metadata, files = download_collection( + collection=collection, data_dir=data_dir, resume=resume, verbose=verbose + ) return Brain_Data(data=files, X=metadata) - diff --git a/nltools/external/__init__.py b/nltools/external/__init__.py index ed192725..dd21bdee 100644 --- a/nltools/external/__init__.py +++ b/nltools/external/__init__.py @@ -3,8 +3,10 @@ """ from .srm import DetSRM, SRM -from .hrf import (spm_hrf, - glover_hrf, - spm_time_derivative, - glover_time_derivative, - spm_dispersion_derivative) +from .hrf import ( + spm_hrf, + glover_hrf, + spm_time_derivative, + glover_time_derivative, + spm_dispersion_derivative, +) diff --git a/nltools/external/hrf.py b/nltools/external/hrf.py index d263f4cd..2d56e181 100644 --- a/nltools/external/hrf.py +++ b/nltools/external/hrf.py @@ -1,4 +1,4 @@ -''' +""" HRF Functions ============= @@ -34,22 +34,32 @@ THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -''' +""" -__all__ = ['spm_hrf', - 'glover_hrf', - 'spm_time_derivative', - 'glover_time_derivative', - 'spm_dispersion_derivative'] +__all__ = [ + "spm_hrf", + "glover_hrf", + "spm_time_derivative", + "glover_time_derivative", + "spm_dispersion_derivative", +] from scipy.stats import gamma import numpy as np -def _gamma_difference_hrf(tr, oversampling=16, time_length=32, onset=0., - delay=6, undershoot=16., dispersion=1., - u_dispersion=1., ratio=0.167): - """ Compute an hrf as the difference of two gamma functions +def _gamma_difference_hrf( + tr, + oversampling=16, + time_length=32, + onset=0.0, + delay=6, + undershoot=16.0, + dispersion=1.0, + u_dispersion=1.0, + ratio=0.167, +): + """Compute an hrf as the difference of two gamma functions Parameters ---------- tr: float, scan repeat time, in seconds @@ -64,15 +74,15 @@ def _gamma_difference_hrf(tr, oversampling=16, time_length=32, onset=0., dt = tr / oversampling time_stamps = np.linspace(0, time_length, int(time_length / dt)) time_stamps -= onset / dt - hrf = gamma.pdf(time_stamps, delay / dispersion, dt / dispersion) - \ - ratio * gamma.pdf( - time_stamps, undershoot / u_dispersion, dt / u_dispersion) + hrf = gamma.pdf( + time_stamps, delay / dispersion, dt / dispersion + ) - ratio * gamma.pdf(time_stamps, undershoot / u_dispersion, dt / u_dispersion) hrf /= hrf.sum() return hrf -def spm_hrf(tr, oversampling=16, time_length=32., onset=0.): - """ Implementation of the SPM hrf model. +def spm_hrf(tr, oversampling=16, time_length=32.0, onset=0.0): + """Implementation of the SPM hrf model. Args: tr: float, scan repeat time, in seconds @@ -89,8 +99,8 @@ def spm_hrf(tr, oversampling=16, time_length=32., onset=0.): return _gamma_difference_hrf(tr, oversampling, time_length, onset) -def glover_hrf(tr, oversampling=16, time_length=32, onset=0.): - """ Implementation of the Glover hrf model. +def glover_hrf(tr, oversampling=16, time_length=32, onset=0.0): + """Implementation of the Glover hrf model. Args: tr: float, scan repeat time, in seconds @@ -104,13 +114,21 @@ def glover_hrf(tr, oversampling=16, time_length=32, onset=0.): """ - return _gamma_difference_hrf(tr, oversampling, time_length, onset, - delay=6, undershoot=12., dispersion=.9, - u_dispersion=.9, ratio=.35) + return _gamma_difference_hrf( + tr, + oversampling, + time_length, + onset, + delay=6, + undershoot=12.0, + dispersion=0.9, + u_dispersion=0.9, + ratio=0.35, + ) -def spm_time_derivative(tr, oversampling=16, time_length=32., onset=0.): - """ Implementation of the SPM time derivative hrf (dhrf) model. +def spm_time_derivative(tr, oversampling=16, time_length=32.0, onset=0.0): + """Implementation of the SPM time derivative hrf (dhrf) model. Args: tr: float, scan repeat time, in seconds @@ -124,12 +142,19 @@ def spm_time_derivative(tr, oversampling=16, time_length=32., onset=0.): """ - do = .1 - dhrf = 1. / do * (spm_hrf(tr, oversampling, time_length, onset + do) - spm_hrf(tr, oversampling, time_length, onset)) + do = 0.1 + dhrf = ( + 1.0 + / do + * ( + spm_hrf(tr, oversampling, time_length, onset + do) + - spm_hrf(tr, oversampling, time_length, onset) + ) + ) return dhrf -def glover_time_derivative(tr, oversampling=16, time_length=32., onset=0.): +def glover_time_derivative(tr, oversampling=16, time_length=32.0, onset=0.0): """Implementation of the flover time derivative hrf (dhrf) model. Args: @@ -144,12 +169,19 @@ def glover_time_derivative(tr, oversampling=16, time_length=32., onset=0.): """ - do = .1 - dhrf = 1. / do * (glover_hrf(tr, oversampling, time_length, onset + do) - glover_hrf(tr, oversampling, time_length, onset)) + do = 0.1 + dhrf = ( + 1.0 + / do + * ( + glover_hrf(tr, oversampling, time_length, onset + do) + - glover_hrf(tr, oversampling, time_length, onset) + ) + ) return dhrf -def spm_dispersion_derivative(tr, oversampling=16, time_length=32., onset=0.): +def spm_dispersion_derivative(tr, oversampling=16, time_length=32.0, onset=0.0): """Implementation of the SPM dispersion derivative hrf model. Args: @@ -164,7 +196,15 @@ def spm_dispersion_derivative(tr, oversampling=16, time_length=32., onset=0.): """ - dd = .01 - dhrf = 1. / dd * (_gamma_difference_hrf(tr, oversampling, time_length, - onset, dispersion=1. + dd) - spm_hrf(tr, oversampling, time_length, onset)) + dd = 0.01 + dhrf = ( + 1.0 + / dd + * ( + _gamma_difference_hrf( + tr, oversampling, time_length, onset, dispersion=1.0 + dd + ) + - spm_hrf(tr, oversampling, time_length, onset) + ) + ) return dhrf diff --git a/nltools/external/srm.py b/nltools/external/srm.py index d33cb1ad..9f419f44 100644 --- a/nltools/external/srm.py +++ b/nltools/external/srm.py @@ -44,9 +44,7 @@ from sklearn.exceptions import NotFittedError import sys -__all__ = [ - "SRM", "DetSRM" -] +__all__ = ["SRM", "DetSRM"] logger = logging.getLogger(__name__) @@ -88,7 +86,9 @@ def _init_w_transforms(data, features, random_states): for subject in range(subjects): if data[subject] is not None: voxels[subject] = data[subject].shape[0] - rnd_matrix = random_states[subject].random_sample((voxels[subject], features)) + rnd_matrix = random_states[subject].random_sample( + (voxels[subject], features) + ) q, r = np.linalg.qr(rnd_matrix) w.append(q) else: @@ -135,7 +135,7 @@ class SRM(BaseEstimator, TransformerMixin): rho2_ : array, shape=[subjects] The estimated noise variance :math:`\\rho_i^2` for each subject - + random_state_: `RandomState` Random number generator initialized using rand_seed @@ -173,18 +173,21 @@ def fit(self, X, y=None): y : not used """ - logger.info('Starting Probabilistic SRM') + logger.info("Starting Probabilistic SRM") # Check the number of subjects if len(X) <= 1: - raise ValueError("There are not enough subjects " - "({0:d}) to train the model.".format(len(X))) + raise ValueError( + "There are not enough subjects " + "({0:d}) to train the model.".format(len(X)) + ) # Check for input data sizes if X[0].shape[1] < self.features: raise ValueError( "There are not enough samples to train the model with " - "{0:d} features.".format(self.features)) + "{0:d} features.".format(self.features) + ) # Check if all subjects have same number of TRs number_trs = X[0].shape[1] @@ -193,8 +196,7 @@ def fit(self, X, y=None): if X[subject] is not None: assert_all_finite(X[subject]) if X[subject].shape[1] != number_trs: - raise ValueError("Different number of samples between subjects" - ".") + raise ValueError("Different number of samples between subjects" ".") # Run SRM self.sigma_s_, self.w_, self.mu_, self.rho2_, self.s_ = self._srm(X) @@ -218,13 +220,14 @@ def transform(self, X, y=None): """ # Check if the model exist - if hasattr(self, 'w_') is False: + if hasattr(self, "w_") is False: raise NotFittedError("The model fit has not been run yet.") # Check the number of subjects if len(X) != len(self.w_): - raise ValueError("The number of subjects does not match the one" - " in the model.") + raise ValueError( + "The number of subjects does not match the one" " in the model." + ) s = [None] * len(X) for subject in range(len(X)): @@ -277,9 +280,16 @@ def _init_structures(self, data, subjects): return x, mu, rho2, trace_xtx - def _likelihood(self, chol_sigma_s_rhos, log_det_psi, chol_sigma_s, - trace_xt_invsigma2_x, inv_sigma_s_rhos, wt_invpsi_x, - samples): + def _likelihood( + self, + chol_sigma_s_rhos, + log_det_psi, + chol_sigma_s, + trace_xt_invsigma2_x, + inv_sigma_s_rhos, + wt_invpsi_x, + samples, + ): """Calculate the log-likelihood function Parameters @@ -313,11 +323,15 @@ def _likelihood(self, chol_sigma_s_rhos, log_det_psi, chol_sigma_s, loglikehood : float The log-likelihood value. """ - log_det = (np.log(np.diag(chol_sigma_s_rhos) ** 2).sum() + log_det_psi - + np.log(np.diag(chol_sigma_s) ** 2).sum()) + log_det = ( + np.log(np.diag(chol_sigma_s_rhos) ** 2).sum() + + log_det_psi + + np.log(np.diag(chol_sigma_s) ** 2).sum() + ) loglikehood = -0.5 * samples * log_det - 0.5 * trace_xt_invsigma2_x loglikehood += 0.5 * np.trace( - wt_invpsi_x.T.dot(inv_sigma_s_rhos).dot(wt_invpsi_x)) + wt_invpsi_x.T.dot(inv_sigma_s_rhos).dot(wt_invpsi_x) + ) # + const --> -0.5*nTR*nvoxel*subjects*math.log(2*math.pi) return loglikehood @@ -364,13 +378,14 @@ def transform_subject(self, X): """ # Check if the model exist - if hasattr(self, 'w_') is False: + if hasattr(self, "w_") is False: raise NotFittedError("The model fit has not been run yet.") # Check the number of TRs in the subject if X.shape[1] != self.s_.shape[1]: - raise ValueError("The number of timepoints(TRs) does not match the" - "one in the model.") + raise ValueError( + "The number of timepoints(TRs) does not match the" "one in the model." + ) w = self._update_transform_subject(X, self.s_) @@ -381,7 +396,7 @@ def _srm(self, data): Parameters ---------- - + data : list of 2D arrays, element i has shape=[voxels_i, samples] Each element in the list contains the fMRI data of one subject. @@ -406,11 +421,13 @@ def _srm(self, data): The shared response. """ - samples = min([d.shape[1] for d in data if d is not None], - default=sys.maxsize) + samples = min([d.shape[1] for d in data if d is not None], default=sys.maxsize) subjects = len(data) self.random_state_ = np.random.RandomState(self.rand_seed) - random_states = [np.random.RandomState(self.random_state_.randint(2 ** 32)) for i in range(len(data))] + random_states = [ + np.random.RandomState(self.random_state_.randint(2 ** 32)) + for i in range(len(data)) + ] # Initialization step: initialize the outputs with initial values, # voxels with the number of voxels in each subject, and trace_xtx with @@ -422,7 +439,7 @@ def _srm(self, data): # Main loop of the algorithm (run for iteration in range(self.n_iter): - logger.info('Iteration %d' % (iteration + 1)) + logger.info("Iteration %d" % (iteration + 1)) # E-step: @@ -431,19 +448,24 @@ def _srm(self, data): # Invert Sigma_s using Cholesky factorization (chol_sigma_s, lower_sigma_s) = scipy.linalg.cho_factor( - sigma_s, check_finite=False) + sigma_s, check_finite=False + ) inv_sigma_s = scipy.linalg.cho_solve( - (chol_sigma_s, lower_sigma_s), np.identity(self.features), - check_finite=False) + (chol_sigma_s, lower_sigma_s), + np.identity(self.features), + check_finite=False, + ) # Invert (Sigma_s + rho_0 * I) using Cholesky factorization sigma_s_rhos = inv_sigma_s + np.identity(self.features) * rho0 - chol_sigma_s_rhos, lower_sigma_s_rhos = \ - scipy.linalg.cho_factor(sigma_s_rhos, - check_finite=False) + chol_sigma_s_rhos, lower_sigma_s_rhos = scipy.linalg.cho_factor( + sigma_s_rhos, check_finite=False + ) inv_sigma_s_rhos = scipy.linalg.cho_solve( (chol_sigma_s_rhos, lower_sigma_s_rhos), - np.identity(self.features), check_finite=False) + np.identity(self.features), + check_finite=False, + ) # Compute the sum of W_i^T * rho_i^-2 * X_i, and the sum of traces # of X_i^T * rho_i^-2 * X_i @@ -458,14 +480,15 @@ def _srm(self, data): # Update the shared response shared_response = sigma_s.dot( - np.identity(self.features) - rho0 * inv_sigma_s_rhos).dot( - wt_invpsi_x) + np.identity(self.features) - rho0 * inv_sigma_s_rhos + ).dot(wt_invpsi_x) # M-step # Update Sigma_s and compute its trace - sigma_s = (inv_sigma_s_rhos - + shared_response.dot(shared_response.T) / samples) + sigma_s = ( + inv_sigma_s_rhos + shared_response.dot(shared_response.T) / samples + ) trace_sigma_s = samples * np.trace(sigma_s) # Update each subject's mapping transform W_i and error variance @@ -476,7 +499,8 @@ def _srm(self, data): perturbation = np.zeros(a_subject.shape) np.fill_diagonal(perturbation, 0.001) u_subject, s_subject, v_subject = np.linalg.svd( - a_subject + perturbation, full_matrices=False) + a_subject + perturbation, full_matrices=False + ) w[subject] = u_subject.dot(v_subject) rho2[subject] = trace_xtx[subject] rho2[subject] += -2 * np.sum(w[subject] * a_subject).sum() @@ -488,10 +512,15 @@ def _srm(self, data): # Calculate and log the current log-likelihood for checking # convergence loglike = self._likelihood( - chol_sigma_s_rhos, log_det_psi, chol_sigma_s, - trace_xt_invsigma2_x, inv_sigma_s_rhos, wt_invpsi_x, - samples) - logger.info('Objective function %f' % loglike) + chol_sigma_s_rhos, + log_det_psi, + chol_sigma_s, + trace_xt_invsigma2_x, + inv_sigma_s_rhos, + wt_invpsi_x, + samples, + ) + logger.info("Objective function %f" % loglike) return sigma_s, w, mu, rho2, shared_response @@ -562,18 +591,21 @@ def fit(self, X, y=None): y : not used """ - logger.info('Starting Deterministic SRM') + logger.info("Starting Deterministic SRM") # Check the number of subjects if len(X) <= 1: - raise ValueError("There are not enough subjects " - "({0:d}) to train the model.".format(len(X))) + raise ValueError( + "There are not enough subjects " + "({0:d}) to train the model.".format(len(X)) + ) # Check for input data sizes if X[0].shape[1] < self.features: raise ValueError( "There are not enough samples to train the model with " - "{0:d} features.".format(self.features)) + "{0:d} features.".format(self.features) + ) # Check if all subjects have same number of TRs number_trs = X[0].shape[1] @@ -581,8 +613,7 @@ def fit(self, X, y=None): for subject in range(number_subjects): assert_all_finite(X[subject]) if X[subject].shape[1] != number_trs: - raise ValueError("Different number of samples between subjects" - ".") + raise ValueError("Different number of samples between subjects" ".") # Run SRM self.w_, self.s_ = self._srm(X) @@ -607,13 +638,14 @@ def transform(self, X, y=None): """ # Check if the model exist - if hasattr(self, 'w_') is False: + if hasattr(self, "w_") is False: raise NotFittedError("The model fit has not been run yet.") # Check the number of subjects if len(X) != len(self.w_): - raise ValueError("The number of subjects does not match the one" - " in the model.") + raise ValueError( + "The number of subjects does not match the one" " in the model." + ) s = [None] * len(X) for subject in range(len(X)): @@ -645,13 +677,12 @@ def _objective_function(self, data, w, s): subjects = len(data) objective = 0.0 for m in range(subjects): - objective += \ - np.linalg.norm(data[m] - w[m].dot(s), 'fro')**2 + objective += np.linalg.norm(data[m] - w[m].dot(s), "fro") ** 2 return objective * 0.5 / data[0].shape[1] def _compute_shared_response(self, data, w): - """ Compute the shared response S + """Compute the shared response S Parameters ---------- @@ -716,13 +747,14 @@ def transform_subject(self, X): Orthogonal mapping `W_{new}` for new subject """ # Check if the model exist - if hasattr(self, 'w_') is False: + if hasattr(self, "w_") is False: raise NotFittedError("The model fit has not been run yet.") # Check the number of TRs in the subject if X.shape[1] != self.s_.shape[1]: - raise ValueError("The number of timepoints(TRs) does not match the" - "one in the model.") + raise ValueError( + "The number of timepoints(TRs) does not match the" "one in the model." + ) w = self._update_transform_subject(X, self.s_) @@ -752,7 +784,8 @@ def _srm(self, data): self.random_state_ = np.random.RandomState(self.rand_seed) random_states = [ np.random.RandomState(self.random_state_.randint(2 ** 32)) - for i in range(len(data))] + for i in range(len(data)) + ] # Initialization step: initialize the outputs with initial values, # voxels with the number of voxels in each subject. @@ -761,11 +794,11 @@ def _srm(self, data): if logger.isEnabledFor(logging.INFO): # Calculate the current objective function value objective = self._objective_function(data, w, shared_response) - logger.info('Objective function %f' % objective) + logger.info("Objective function %f" % objective) # Main loop of the algorithm for iteration in range(self.n_iter): - logger.info('Iteration %d' % (iteration + 1)) + logger.info("Iteration %d" % (iteration + 1)) # Update each subject's mapping transform W_i: for subject in range(subjects): @@ -773,7 +806,8 @@ def _srm(self, data): perturbation = np.zeros(a_subject.shape) np.fill_diagonal(perturbation, 0.001) u_subject, _, v_subject = np.linalg.svd( - a_subject + perturbation, full_matrices=False) + a_subject + perturbation, full_matrices=False + ) w[subject] = u_subject.dot(v_subject) # Update the shared response: @@ -782,6 +816,6 @@ def _srm(self, data): if logger.isEnabledFor(logging.INFO): # Calculate the current objective function value objective = self._objective_function(data, w, shared_response) - logger.info('Objective function %f' % objective) + logger.info("Objective function %f" % objective) return w, shared_response diff --git a/nltools/mask.py b/nltools/mask.py index 437a644b..5b645412 100644 --- a/nltools/mask.py +++ b/nltools/mask.py @@ -1,15 +1,12 @@ -''' +""" NeuroLearn Mask Classes ======================= Classes to represent masks -''' +""" -__all__ = ['create_sphere', - 'expand_mask', - 'collapse_mask', - 'roi_to_brain'] +__all__ = ["create_sphere", "expand_mask", "collapse_mask", "roi_to_brain"] __author__ = ["Luke Chang", "Sam Greydanus"] __license__ = "MIT" @@ -24,7 +21,7 @@ def create_sphere(coordinates, radius=5, mask=None): - """ Generate a set of spheres in the brain mask space + """Generate a set of spheres in the brain mask space Args: radius: vector of radius. Will create multiple spheres if @@ -41,14 +38,15 @@ def create_sphere(coordinates, radius=5, mask=None): if os.path.isfile(mask): mask = nib.load(mask) else: - raise ValueError("mask is not a nibabel instance or a valid " - "file name") + raise ValueError( + "mask is not a nibabel instance or a valid " "file name" + ) else: - mask = nib.load(resolve_mni_path(MNI_Template)['mask']) + mask = nib.load(resolve_mni_path(MNI_Template)["mask"]) def sphere(r, p, mask): - """ create a sphere of given radius at some point p in the brain mask + """create a sphere of given radius at some point p in the brain mask Args: r: radius of the sphere @@ -57,30 +55,38 @@ def sphere(r, p, mask): """ dims = mask.shape - m = [dims[0]/2, dims[1]/2, dims[2]/2] - x, y, z = np.ogrid[-m[0]:dims[0]-m[0], - -m[1]:dims[1]-m[1], - -m[2]:dims[2]-m[2]] - mask_r = x*x + y*y + z*z <= r*r + m = [dims[0] / 2, dims[1] / 2, dims[2] / 2] + x, y, z = np.ogrid[ + -m[0] : dims[0] - m[0], -m[1] : dims[1] - m[1], -m[2] : dims[2] - m[2] + ] + mask_r = x * x + y * y + z * z <= r * r activation = np.zeros(dims) activation[mask_r] = 1 - translation_affine = np.array([[1, 0, 0, p[0]-m[0]], - [0, 1, 0, p[1]-m[1]], - [0, 0, 1, p[2]-m[2]], - [0, 0, 0, 1]]) + translation_affine = np.array( + [ + [1, 0, 0, p[0] - m[0]], + [0, 1, 0, p[1] - m[1]], + [0, 0, 1, p[2] - m[2]], + [0, 0, 0, 1], + ] + ) return nib.Nifti1Image(activation, affine=translation_affine) if any(isinstance(i, list) for i in coordinates): if isinstance(radius, list): if len(radius) != len(coordinates): - raise ValueError('Make sure length of radius list matches' - 'length of coordinate list.') + raise ValueError( + "Make sure length of radius list matches" + "length of coordinate list." + ) elif isinstance(radius, int): - radius = [radius]*len(coordinates) - out = Brain_Data(nib.Nifti1Image(np.zeros_like(mask.get_data()), - affine=mask.affine), mask=mask) + radius = [radius] * len(coordinates) + out = Brain_Data( + nib.Nifti1Image(np.zeros_like(mask.get_data()), affine=mask.affine), + mask=mask, + ) for r, c in zip(radius, coordinates): out = out + Brain_Data(sphere(r, c, mask), mask=mask) else: @@ -92,7 +98,7 @@ def sphere(r, p, mask): def expand_mask(mask, custom_mask=None): - """ expand a mask with multiple integers into separate binary masks + """expand a mask with multiple integers into separate binary masks Args: mask: nibabel or Brain_Data instance @@ -104,21 +110,22 @@ def expand_mask(mask, custom_mask=None): """ from nltools.data import Brain_Data + if isinstance(mask, nib.Nifti1Image): mask = Brain_Data(mask, mask=custom_mask) if not isinstance(mask, Brain_Data): - raise ValueError('Make sure mask is a nibabel or Brain_Data instance.') + raise ValueError("Make sure mask is a nibabel or Brain_Data instance.") mask.data = np.round(mask.data).astype(int) tmp = [] for i in np.nonzero(np.unique(mask.data))[0]: - tmp.append((mask.data == i)*1) + tmp.append((mask.data == i) * 1) out = mask.empty() out.data = np.array(tmp) return out def collapse_mask(mask, auto_label=True, custom_mask=None): - """ collapse separate masks into one mask with multiple integers + """collapse separate masks into one mask with multiple integers overlapping areas are ignored Args: @@ -132,12 +139,12 @@ def collapse_mask(mask, auto_label=True, custom_mask=None): """ from nltools.data import Brain_Data + if not isinstance(mask, Brain_Data): if isinstance(mask, nib.Nifti1Image): mask = Brain_Data(mask, mask=custom_mask) else: - raise ValueError('Make sure mask is a nibabel or Brain_Data ' - 'instance.') + raise ValueError("Make sure mask is a nibabel or Brain_Data " "instance.") if len(mask.shape()) > 1: if len(mask) > 1: @@ -148,25 +155,33 @@ def collapse_mask(mask, auto_label=True, custom_mask=None): for x in range(len(mask)): m_list.append(mask[x].to_nifti()) intersect = intersect_masks(m_list, threshold=1, connected=False) - intersect = Brain_Data(nib.Nifti1Image( - np.abs(intersect.get_data()-1), - intersect.get_affine()), mask=custom_mask) + intersect = Brain_Data( + nib.Nifti1Image( + np.abs(intersect.get_data() - 1), intersect.get_affine() + ), + mask=custom_mask, + ) merge = [] if auto_label: # Combine all masks into sequential order # ignoring any areas of overlap for i in range(len(m_list)): - merge.append(np.multiply( - Brain_Data(m_list[i], mask=custom_mask).data, - intersect.data)*(i+1)) + merge.append( + np.multiply( + Brain_Data(m_list[i], mask=custom_mask).data, intersect.data + ) + * (i + 1) + ) out.data = np.sum(np.array(merge).T, 1).astype(int) else: # Collapse masks using value as label for i in range(len(m_list)): - merge.append(np.multiply( - Brain_Data(m_list[i], mask=custom_mask).data, - intersect.data)) + merge.append( + np.multiply( + Brain_Data(m_list[i], mask=custom_mask).data, intersect.data + ) + ) out.data = np.sum(np.array(merge).T, 1) return out else: @@ -174,7 +189,7 @@ def collapse_mask(mask, auto_label=True, custom_mask=None): def roi_to_brain(data, mask_x): - ''' This function will create convert an expanded binary mask of ROIs + """This function will create convert an expanded binary mask of ROIs (see expand_mask) based on a vector of of values. The dataframe of values must correspond to ROI numbers. @@ -187,7 +202,7 @@ def roi_to_brain(data, mask_x): Returns: out: (Brain_Data) Brain_Data instance where each ROI is now populated with a value - ''' + """ from nltools.data import Brain_Data if not isinstance(data, (pd.Series, pd.DataFrame)): @@ -202,7 +217,9 @@ def roi_to_brain(data, mask_x): if data.shape[1] == len(mask_x): data = data.T else: - raise ValueError('Data must have the same number of rows as rois in mask') + raise ValueError( + "Data must have the same number of rows as rois in mask" + ) else: raise NotImplementedError @@ -210,18 +227,20 @@ def roi_to_brain(data, mask_x): raise ValueError("Data must be a pandas series or data frame.") if len(mask_x) != data.shape[0]: - raise ValueError('Data must have the same number of rows as mask has ROIs.') + raise ValueError("Data must have the same number of rows as mask has ROIs.") if isinstance(data, pd.Series): out = mask_x[0].copy() out.data = np.zeros(out.data.shape) for roi in range(len(mask_x)): - out.data[np.where(mask_x.data[roi,:])] = data[roi] + out.data[np.where(mask_x.data[roi, :])] = data[roi] return out else: out = mask_x.copy() out.data = np.ones((data.shape[1], out.data.shape[1])) for roi in range(len(mask_x)): - roi_data = np.reshape(data.iloc[roi,:].values, (-1,1)) - out.data[:, mask_x[roi].data==1] = np.repeat(roi_data.T, np.sum(mask_x[roi].data==1), axis=0).T + roi_data = np.reshape(data.iloc[roi, :].values, (-1, 1)) + out.data[:, mask_x[roi].data == 1] = np.repeat( + roi_data.T, np.sum(mask_x[roi].data == 1), axis=0 + ).T return out diff --git a/nltools/plotting.py b/nltools/plotting.py index ee6d7e55..7cfc34f1 100644 --- a/nltools/plotting.py +++ b/nltools/plotting.py @@ -37,7 +37,9 @@ # Optional dependencies ipywidgets = attempt_to_import( - "ipywidgets", name="ipywidgets", fromlist=["interact", "fixed", "widgets", "BoundedFloatText", "BoundedIntText"] + "ipywidgets", + name="ipywidgets", + fromlist=["interact", "fixed", "widgets", "BoundedFloatText", "BoundedIntText"], ) @@ -47,7 +49,7 @@ def plot_interactive_brain( surface=False, percentile_threshold=False, anatomical=None, - **kwargs + **kwargs, ): """ This function leverages nilearn's new javascript based brain viewer functions to create interactive plotting functionality. @@ -72,7 +74,9 @@ def plot_interactive_brain( if threshold[-1] != "%": raise ValueError("Starting threshold provided as string must end in '%'") percentile_threshold = True - warnings.warn("Percentile thresholding ignores brain mask. Results are likely more liberal than you expect (e.g. with non-interactive plotting)!") + warnings.warn( + "Percentile thresholding ignores brain mask. Results are likely more liberal than you expect (e.g. with non-interactive plotting)!" + ) threshold = int(threshold[:-1]) if len(brain.shape()) == 2: @@ -108,7 +112,7 @@ def plot_interactive_brain( percentile_threshold=percentile_threshold, surface=surface, anatomical=ipywidgets.fixed(anatomical), - **kwargs + **kwargs, ) @@ -208,7 +212,7 @@ def plot_t_brain( colorbar=True, cmap=cmap, plot_abs=False, - **kwargs + **kwargs, ) for v, c in zip(views, cut_coords): plot_stat_map( @@ -217,7 +221,7 @@ def plot_t_brain( display_mode=v, cmap=cmap, bg_img=resolve_mni_path(MNI_Template)["brain"], - **kwargs + **kwargs, ) elif how == "glass": plot_glass_brain( @@ -226,7 +230,7 @@ def plot_t_brain( colorbar=True, cmap=cmap, plot_abs=False, - **kwargs + **kwargs, ) elif how == "mni": for v, c in zip(views, cut_coords): @@ -236,7 +240,7 @@ def plot_t_brain( display_mode=v, cmap=cmap, bg_img=resolve_mni_path(MNI_Template)["brain"], - **kwargs + **kwargs, ) del obj del out @@ -299,7 +303,7 @@ def plot_brain(objIn, how="full", thr_upper=None, thr_lower=None, save=False, ** colorbar=True, cmap=cmap, plot_abs=False, - **kwargs + **kwargs, ) if save: plt.savefig(glass_save, bbox_inches="tight") @@ -310,7 +314,7 @@ def plot_brain(objIn, how="full", thr_upper=None, thr_lower=None, save=False, ** display_mode=v, cmap=cmap, bg_img=resolve_mni_path(MNI_Template)["brain"], - **kwargs + **kwargs, ) if save: plt.savefig(savefile, bbox_inches="tight") @@ -321,7 +325,7 @@ def plot_brain(objIn, how="full", thr_upper=None, thr_lower=None, save=False, ** colorbar=True, cmap=cmap, plot_abs=False, - **kwargs + **kwargs, ) if save: plt.savefig(glass_save, bbox_inches="tight") @@ -333,7 +337,7 @@ def plot_brain(objIn, how="full", thr_upper=None, thr_lower=None, save=False, ** display_mode=v, cmap=cmap, bg_img=resolve_mni_path(MNI_Template)["brain"], - **kwargs + **kwargs, ) if save: plt.savefig(savefile, bbox_inches="tight") @@ -342,7 +346,7 @@ def plot_brain(objIn, how="full", thr_upper=None, thr_lower=None, save=False, ** def dist_from_hyperplane_plot(stats_output): - """ Plot SVM Classification Distance from Hyperplane + """Plot SVM Classification Distance from Hyperplane Args: stats_output: a pandas file with prediction output @@ -371,11 +375,11 @@ def dist_from_hyperplane_plot(stats_output): plt.xlabel("Subject", fontsize=16) plt.ylabel("Distance from Hyperplane", fontsize=16) plt.title("Classification", fontsize=18) - return + return def scatterplot(stats_output): - """ Plot Prediction Scatterplot + """Plot Prediction Scatterplot Args: stats_output: a pandas file with prediction output @@ -392,11 +396,11 @@ def scatterplot(stats_output): plt.xlabel("Y", fontsize=16) plt.ylabel("Predicted Value", fontsize=16) plt.title("Prediction", fontsize=18) - return + return def probability_plot(stats_output): - """ Plot Classification Probability + """Plot Classification Probability Args: stats_output: a pandas file with prediction output @@ -412,7 +416,7 @@ def probability_plot(stats_output): plt.xlabel("Y", fontsize=16) plt.ylabel("Predicted Probability", fontsize=16) plt.title("Prediction", fontsize=18) - return + return # # and plot the result # plt.figure(1, figsize=(4, 3)) @@ -427,7 +431,7 @@ def probability_plot(stats_output): def roc_plot(fpr, tpr): - """ Plot 1-Specificity by Sensitivity + """Plot 1-Specificity by Sensitivity Args: fpr: false positive rate from Roc.calculate @@ -448,7 +452,7 @@ def roc_plot(fpr, tpr): def plot_stacked_adjacency(adjacency1, adjacency2, normalize=True, **kwargs): - """ Create stacked adjacency to illustrate similarity. + """Create stacked adjacency to illustrate similarity. Args: matrix1: Adjacency instance 1 @@ -483,9 +487,9 @@ def plot_mean_label_distance( permutation_test=False, n_permute=5000, fontsize=18, - **kwargs + **kwargs, ): - """ Create a violin plot indicating within and between label distance. + """Create a violin plot indicating within and between label distance. Args: distance: pandas dataframe of distance @@ -531,7 +535,7 @@ def plot_mean_label_distance( inner="quartile", palette={"Within": "lightskyblue", "Between": "red"}, ax=ax, - **kwargs + **kwargs, ) f.set_ylabel("Average Distance", fontsize=fontsize) f.set_title("Average Group Distance", fontsize=fontsize) @@ -554,24 +558,24 @@ def plot_between_label_distance( permutation_test=True, n_permute=5000, fontsize=18, - **kwargs + **kwargs, ): - """ Create a heatmap indicating average between label distance - - - Args: - distance: (pandas dataframe) brain_distance matrix - labels: (pandas dataframe) group labels - ax: axis to plot (default=None) - permutation_test: (boolean) - n_permute: (int) number of samples for permuation test - fontsize: (int) size of font for plot - Returns: - f: heatmap - out: pandas dataframe of pairwise distance between conditions - within_dist_out: average pairwise distance matrix - mn_dist_out: (optional if permutation_test=True) average difference in distance between conditions - p_dist_out: (optional if permutation_test=True) p-value for difference in distance between conditions + """Create a heatmap indicating average between label distance + + + Args: + distance: (pandas dataframe) brain_distance matrix + labels: (pandas dataframe) group labels + ax: axis to plot (default=None) + permutation_test: (boolean) + n_permute: (int) number of samples for permuation test + fontsize: (int) size of font for plot + Returns: + f: heatmap + out: pandas dataframe of pairwise distance between conditions + within_dist_out: average pairwise distance matrix + mn_dist_out: (optional if permutation_test=True) average difference in distance between conditions + p_dist_out: (optional if permutation_test=True) p-value for difference in distance between conditions """ labels = np.unique(np.array(labels)) @@ -652,23 +656,23 @@ def plot_between_label_distance( def plot_silhouette( distance, labels, ax=None, permutation_test=True, n_permute=5000, **kwargs ): - """ Create a silhouette plot indicating between relative to within label distance - - Args: - distance: (pandas dataframe) brain_distance matrix - labels: (pandas dataframe) group labels - ax: axis to plot (default=None) - permutation_test: (boolean) - n_permute: (int) number of samples for permuation test - Optional keyword args: - figsize: (list) dimensions of silhouette plot - colors: (list) color triplets for silhouettes. Length must equal number of unique labels - Returns: - # f: heatmap - # out: pandas dataframe of pairwise distance between conditions - # within_dist_out: average pairwise distance matrix - # mn_dist_out: (optional if permutation_test=True) average difference in distance between conditions - # p_dist_out: (optional if permutation_test=True) p-value for difference in distance between conditions + """Create a silhouette plot indicating between relative to within label distance + + Args: + distance: (pandas dataframe) brain_distance matrix + labels: (pandas dataframe) group labels + ax: axis to plot (default=None) + permutation_test: (boolean) + n_permute: (int) number of samples for permuation test + Optional keyword args: + figsize: (list) dimensions of silhouette plot + colors: (list) color triplets for silhouettes. Length must equal number of unique labels + Returns: + # f: heatmap + # out: pandas dataframe of pairwise distance between conditions + # within_dist_out: average pairwise distance matrix + # mn_dist_out: (optional if permutation_test=True) average difference in distance between conditions + # p_dist_out: (optional if permutation_test=True) p-value for difference in distance between conditions """ # Define label set @@ -756,13 +760,14 @@ def plot_silhouette( else: return + def component_viewer(output, tr=2.0): - ''' This a function to interactively view the results of a decomposition analysis + """This a function to interactively view the results of a decomposition analysis Args: output: (dict) output dictionary from running Brain_data.decompose() tr: (float) repetition time of data - ''' + """ if ipywidgets is None: raise ImportError( @@ -770,43 +775,65 @@ def component_viewer(output, tr=2.0): ) def component_inspector(component, threshold): - '''This a function to be used with ipywidgets to interactively view a decomposition analysis + """This a function to be used with ipywidgets to interactively view a decomposition analysis - Make sure you have tr and output assigned to variables. + Make sure you have tr and output assigned to variables. - Example: + Example: - from ipywidgets import BoundedFloatText, BoundedIntText - from ipywidgets import interact + from ipywidgets import BoundedFloatText, BoundedIntText + from ipywidgets import interact - tr = 2.4 - output = data_filtered_smoothed.decompose(algorithm='ica', n_components=30, axis='images', whiten=True) + tr = 2.4 + output = data_filtered_smoothed.decompose(algorithm='ica', n_components=30, axis='images', whiten=True) - interact(component_inspector, component=BoundedIntText(description='Component', value=0, min=0, max=len(output['components'])-1), - threshold=BoundedFloatText(description='Threshold', value=2.0, min=0, max=4, step=.1)) + interact(component_inspector, component=BoundedIntText(description='Component', value=0, min=0, max=len(output['components'])-1), + threshold=BoundedFloatText(description='Threshold', value=2.0, min=0, max=4, step=.1)) - ''' - _, ax = plt.subplots(nrows=3, figsize=(12,8)) - thresholded = (output['components'][component] - output['components'][component].mean())*(1/output['components'][component].std()) + """ + _, ax = plt.subplots(nrows=3, figsize=(12, 8)) + thresholded = ( + output["components"][component] - output["components"][component].mean() + ) * (1 / output["components"][component].std()) thresholded.data[np.abs(thresholded.data) <= threshold] = 0 - plot_stat_map(thresholded.to_nifti(), cut_coords=range(-40, 70, 10), - display_mode='z', black_bg=True, colorbar=True, annotate=False, - draw_cross=False, axes=ax[0]) - if isinstance(output['decomposition_object'], (sklearn.decomposition.PCA)): - var_exp = output['decomposition_object'].explained_variance_ratio_[component] - ax[0].set_title(f"Component: {component}/{len(output['components'])}, Variance Explained: {var_exp:2.2}", fontsize=18) + plot_stat_map( + thresholded.to_nifti(), + cut_coords=range(-40, 70, 10), + display_mode="z", + black_bg=True, + colorbar=True, + annotate=False, + draw_cross=False, + axes=ax[0], + ) + if isinstance(output["decomposition_object"], (sklearn.decomposition.PCA)): + var_exp = output["decomposition_object"].explained_variance_ratio_[ + component + ] + ax[0].set_title( + f"Component: {component}/{len(output['components'])}, Variance Explained: {var_exp:2.2}", + fontsize=18, + ) else: - ax[0].set_title(f"Component: {component}/{len(output['components'])}", fontsize=18) + ax[0].set_title( + f"Component: {component}/{len(output['components'])}", fontsize=18 + ) - ax[1].plot(output['weights'][:, component], linewidth=2, color='red') - ax[1].set_ylabel('Intensity (AU)', fontsize=18) - ax[1].set_title(f'Timecourse (TR={tr})', fontsize=16) - y = fft(output['weights'][:, component]) + ax[1].plot(output["weights"][:, component], linewidth=2, color="red") + ax[1].set_ylabel("Intensity (AU)", fontsize=18) + ax[1].set_title(f"Timecourse (TR={tr})", fontsize=16) + y = fft(output["weights"][:, component]) f = fftfreq(len(y), d=tr) - ax[2].plot(f[f > 0], np.abs(y)[f > 0]**2) - ax[2].set_ylabel('Power', fontsize=18) - ax[2].set_xlabel('Frequency (Hz)', fontsize=16) - - ipywidgets.interact(component_inspector, component=ipywidgets.BoundedIntText(description='Component', value=0, min=0, max=len(output['components'])-1), - threshold=ipywidgets.BoundedFloatText(description='Threshold', value=2.0, min=0, max=4, step=.1)) + ax[2].plot(f[f > 0], np.abs(y)[f > 0] ** 2) + ax[2].set_ylabel("Power", fontsize=18) + ax[2].set_xlabel("Frequency (Hz)", fontsize=16) + ipywidgets.interact( + component_inspector, + component=ipywidgets.BoundedIntText( + description="Component", value=0, min=0, max=len(output["components"]) - 1 + ), + threshold=ipywidgets.BoundedFloatText( + description="Threshold", value=2.0, min=0, max=4, step=0.1 + ), + ) diff --git a/nltools/prefs.py b/nltools/prefs.py index 5a488028..68d6c3f8 100644 --- a/nltools/prefs.py +++ b/nltools/prefs.py @@ -1,10 +1,10 @@ -''' +""" NeuroLearn Preferences ====================== -''' -__all__ = ['MNI_Template', 'resolve_mni_path'] +""" +__all__ = ["MNI_Template", "resolve_mni_path"] __author__ = ["Luke Chang"] __license__ = "MIT" @@ -13,51 +13,68 @@ import six MNI_Template = dict( - resolution='2mm', - mask_type='with_ventricles', - mask=os.path.join(get_resource_path(), 'MNI152_T1_2mm_brain_mask.nii.gz'), - plot=os.path.join(get_resource_path(), 'MNI152_T1_2mm.nii.gz'), - brain=os.path.join(get_resource_path(), 'MNI152_T1_2mm_brain.nii.gz'), + resolution="2mm", + mask_type="with_ventricles", + mask=os.path.join(get_resource_path(), "MNI152_T1_2mm_brain_mask.nii.gz"), + plot=os.path.join(get_resource_path(), "MNI152_T1_2mm.nii.gz"), + brain=os.path.join(get_resource_path(), "MNI152_T1_2mm_brain.nii.gz"), ) def resolve_mni_path(MNI_Template): """ Helper function to resolve MNI path based on MNI_Template prefs setting.""" - res = MNI_Template['resolution'] - m = MNI_Template['mask_type'] + res = MNI_Template["resolution"] + m = MNI_Template["mask_type"] if not isinstance(res, six.string_types): raise ValueError("resolution must be provided as a string!") if not isinstance(m, six.string_types): raise ValueError("mask_type must be provided as a string!") - if res == '3mm': - if m == 'with_ventricles': - MNI_Template['mask'] = os.path.join(get_resource_path(), 'MNI152_T1_3mm_brain_mask.nii.gz') - elif m == 'no_ventricles': - MNI_Template['mask'] = os.path.join(get_resource_path(), 'MNI152_T1_3mm_brain_mask_no_ventricles.nii.gz') + if res == "3mm": + if m == "with_ventricles": + MNI_Template["mask"] = os.path.join( + get_resource_path(), "MNI152_T1_3mm_brain_mask.nii.gz" + ) + elif m == "no_ventricles": + MNI_Template["mask"] = os.path.join( + get_resource_path(), "MNI152_T1_3mm_brain_mask_no_ventricles.nii.gz" + ) else: - raise ValueError("Available mask_types are 'with_ventricles' or 'no_ventricles'") + raise ValueError( + "Available mask_types are 'with_ventricles' or 'no_ventricles'" + ) - MNI_Template['plot'] = os.path.join(get_resource_path(), 'MNI152_T1_3mm.nii.gz') + MNI_Template["plot"] = os.path.join(get_resource_path(), "MNI152_T1_3mm.nii.gz") - MNI_Template['brain'] = os.path.join(get_resource_path(), 'MNI152_T1_3mm_brain.nii.gz') + MNI_Template["brain"] = os.path.join( + get_resource_path(), "MNI152_T1_3mm_brain.nii.gz" + ) - elif res == '2mm': - if m == 'with_ventricles': - MNI_Template['mask'] = os.path.join(get_resource_path(), 'MNI152_T1_2mm_brain_mask.nii.gz') - elif m == 'no_ventricles': - MNI_Template['mask'] = os.path.join(get_resource_path(), 'MNI152_T1_2mm_brain_mask_no_ventricles.nii.gz') + elif res == "2mm": + if m == "with_ventricles": + MNI_Template["mask"] = os.path.join( + get_resource_path(), "MNI152_T1_2mm_brain_mask.nii.gz" + ) + elif m == "no_ventricles": + MNI_Template["mask"] = os.path.join( + get_resource_path(), "MNI152_T1_2mm_brain_mask_no_ventricles.nii.gz" + ) else: - raise ValueError("Available mask_types are 'with_ventricles' or 'no_ventricles'") + raise ValueError( + "Available mask_types are 'with_ventricles' or 'no_ventricles'" + ) - MNI_Template['plot'] = os.path.join(get_resource_path(), 'MNI152_T1_2mm.nii.gz') + MNI_Template["plot"] = os.path.join(get_resource_path(), "MNI152_T1_2mm.nii.gz") - MNI_Template['brain'] = os.path.join(get_resource_path(), 'MNI152_T1_2mm_brain.nii.gz') + MNI_Template["brain"] = os.path.join( + get_resource_path(), "MNI152_T1_2mm_brain.nii.gz" + ) else: raise ValueError("Available templates are '2mm' or '3mm'") return MNI_Template + # class Prefs(object): # # """ diff --git a/nltools/simulator.py b/nltools/simulator.py index ece58ed3..31806eaf 100755 --- a/nltools/simulator.py +++ b/nltools/simulator.py @@ -1,12 +1,12 @@ -''' +""" NeuroLearn Simulator Tools ========================== Tools to simulate multivariate data. -''' +""" -__all__ = ['Simulator', 'SimulateGrid'] +__all__ = ["Simulator", "SimulateGrid"] __author__ = ["Sam Greydanus", "Luke Chang"] __license__ = "MIT" @@ -25,6 +25,7 @@ import csv from copy import deepcopy + class Simulator: def __init__(self, brain_mask=None, output_dir=None): # no scoring param # self.resource_folder = os.path.join(os.getcwd(),'resources') @@ -36,26 +37,30 @@ def __init__(self, brain_mask=None, output_dir=None): # no scoring param if isinstance(brain_mask, str): brain_mask = nib.load(brain_mask) elif brain_mask is None: - brain_mask = nib.load(resolve_mni_path(MNI_Template)['mask']) + brain_mask = nib.load(resolve_mni_path(MNI_Template)["mask"]) elif ~isinstance(brain_mask, nib.nifti1.Nifti1Image): raise ValueError("brain_mask is not a string or a nibabel instance") self.brain_mask = brain_mask self.nifti_masker = NiftiMasker(mask_img=self.brain_mask) def gaussian(self, mu, sigma, i_tot): - """ create a 3D gaussian signal normalized to a given intensity + """create a 3D gaussian signal normalized to a given intensity Args: mu: average value of the gaussian signal (usually set to 0) sigma: standard deviation i_tot: sum total of activation (numerical integral over the gaussian returns this value) """ - x, y, z = np.mgrid[0:self.brain_mask.shape[0], 0:self.brain_mask.shape[1], 0:self.brain_mask.shape[2]] + x, y, z = np.mgrid[ + 0 : self.brain_mask.shape[0], + 0 : self.brain_mask.shape[1], + 0 : self.brain_mask.shape[2], + ] # Need an (N, 3) array of (x, y) pairs. xyz = np.column_stack([x.flat, y.flat, z.flat]) - covariance = np.diag(sigma**2) + covariance = np.diag(sigma ** 2) g = multivariate_normal.pdf(xyz, mean=mu, cov=covariance) # Reshape back to a 3D grid. @@ -64,12 +69,12 @@ def gaussian(self, mu, sigma, i_tot): # select only the regions within the brain mask g = np.multiply(self.brain_mask.get_data(), g) # adjust total intensity of gaussian - g = np.multiply(i_tot/np.sum(g), g) + g = np.multiply(i_tot / np.sum(g), g) return g def sphere(self, r, p): - """ create a sphere of given radius at some point p in the brain mask + """create a sphere of given radius at some point p in the brain mask Args: r: radius of the sphere @@ -77,8 +82,10 @@ def sphere(self, r, p): """ dims = self.brain_mask.shape - x, y, z = np.ogrid[-p[0]:dims[0]-p[0], -p[1]:dims[1]-p[1], -p[2]:dims[2]-p[2]] - mask = x*x + y*y + z*z <= r*r + x, y, z = np.ogrid[ + -p[0] : dims[0] - p[0], -p[1] : dims[1] - p[1], -p[2] : dims[2] - p[2] + ] + mask = x * x + y * y + z * z <= r * r activation = np.zeros(dims) activation[mask] = 1 @@ -89,7 +96,7 @@ def sphere(self, r, p): return activation.get_data() def normal_noise(self, mu, sigma): - """ produce a normal noise distribution for all all points in the brain mask + """produce a normal noise distribution for all all points in the brain mask Args: mu: average value of the gaussian signal (usually set to 0) @@ -101,27 +108,29 @@ def normal_noise(self, mu, sigma): if sigma != 0: n = np.random.normal(mu, sigma, vlength) else: - n = [mu]*vlength + n = [mu] * vlength m = self.nifti_masker.inverse_transform(n) # return the 3D numpy matrix of zeros containing the brain mask filled with noise produced over a normal distribution return m.get_data() def to_nifti(self, m): - """ convert a numpy matrix to the nifti format and assign to it the brain_mask's affine matrix + """convert a numpy matrix to the nifti format and assign to it the brain_mask's affine matrix Args: m: the 3D numpy matrix we wish to convert to .nii """ if not (type(m) == np.ndarray and len(m.shape) >= 3): # try 4D # if not (type(m) == np.ndarray and len(m.shape) == 3): - raise ValueError("ERROR: need 3D np.ndarray matrix to create the nifti file") + raise ValueError( + "ERROR: need 3D np.ndarray matrix to create the nifti file" + ) m = m.astype(np.float32) ni = nib.Nifti1Image(m, affine=self.brain_mask.affine) return ni def n_spheres(self, radius, center): - """ generate a set of spheres in the brain mask space + """generate a set of spheres in the brain mask space Args: radius: vector of radius. Will create multiple spheres if len(radius) > 1 @@ -134,19 +143,31 @@ def n_spheres(self, radius, center): if isinstance(radius, int): radius = [radius] if center is None: - center = [[dims[0]/2, dims[1]/2, dims[2]/2] * len(radius)] # default value for centers - elif isinstance(center, list) and isinstance(center[0], int) and len(radius) == 1: + center = [ + [dims[0] / 2, dims[1] / 2, dims[2] / 2] * len(radius) + ] # default value for centers + elif ( + isinstance(center, list) and isinstance(center[0], int) and len(radius) == 1 + ): centers = [center] - if (type(radius)) is list and (type(center) is list) and (len(radius) == len(center)): + if ( + (type(radius)) is list + and (type(center) is list) + and (len(radius) == len(center)) + ): A = np.zeros_like(self.brain_mask.get_data()) for i in range(len(radius)): A = np.add(A, self.sphere(radius[i], center[i])) return A else: - raise ValueError("Data type for sphere or radius(ii) or center(s) not recognized.") + raise ValueError( + "Data type for sphere or radius(ii) or center(s) not recognized." + ) - def create_data(self, levels, sigma, radius=5, center=None, reps=1, output_dir=None): - """ create simulated data with integers + def create_data( + self, levels, sigma, radius=5, center=None, reps=1, output_dir=None + ): + """create simulated data with integers Args: levels: vector of intensities or class labels @@ -165,7 +186,7 @@ def create_data(self, levels, sigma, radius=5, center=None, reps=1, output_dir=N rep_id = [1] * len(levels) for i in range(reps - 1): y = y + levels - rep_id.extend([i+2] * nlevels) + rep_id.extend([i + 2] * nlevels) # Initialize Spheres with options for multiple radii and centers of the spheres (or just an int and a 3D list) A = self.n_spheres(radius, center) @@ -197,13 +218,17 @@ def create_data(self, levels, sigma, radius=5, center=None, reps=1, output_dir=N # Write Data to files if requested if output_dir is not None and isinstance(output_dir, six.string_types): - NF_list.write(os.path.join(output_dir, 'data.nii.gz')) - self.y.to_csv(os.path.join(output_dir, 'y.csv'), index=None, header=False) - self.rep_id.to_csv(os.path.join(output_dir, 'rep_id.csv'), index=None, header=False) + NF_list.write(os.path.join(output_dir, "data.nii.gz")) + self.y.to_csv(os.path.join(output_dir, "y.csv"), index=None, header=False) + self.rep_id.to_csv( + os.path.join(output_dir, "rep_id.csv"), index=None, header=False + ) return dat - def create_cov_data(self, cor, cov, sigma, mask=None, reps=1, n_sub=1, output_dir=None): - """ create continuous simulated data with covariance + def create_cov_data( + self, cor, cov, sigma, mask=None, reps=1, n_sub=1, output_dir=None + ): + """create continuous simulated data with covariance Args: cor: amount of covariance between each voxel and Y variable @@ -228,26 +253,41 @@ def create_cov_data(self, cor, cov, sigma, mask=None, reps=1, n_sub=1, output_di flat_sphere = self.nifti_masker.fit_transform(mask) n_vox = np.sum(flat_sphere == 1) - cov_matrix = np.ones([n_vox+1, n_vox+1]) * cov + cov_matrix = np.ones([n_vox + 1, n_vox + 1]) * cov cov_matrix[0, :] = cor # set covariance with y cov_matrix[:, 0] = cor # set covariance with all other voxels np.fill_diagonal(cov_matrix, 1) # set diagonal to 1 - mv_sim = np.random.multivariate_normal(np.zeros([n_vox+1]), cov_matrix, size=reps) + mv_sim = np.random.multivariate_normal( + np.zeros([n_vox + 1]), cov_matrix, size=reps + ) print(mv_sim) y = mv_sim[:, 0] self.y = y mv_sim = mv_sim[:, 1:] new_dat = np.ones([mv_sim.shape[0], flat_sphere.shape[1]]) new_dat[:, np.where(flat_sphere == 1)[1]] = mv_sim - self.data = self.nifti_masker.inverse_transform(np.add(new_dat, np.random.standard_normal(size=new_dat.shape)*sigma)) # add noise scaled by sigma + self.data = self.nifti_masker.inverse_transform( + np.add(new_dat, np.random.standard_normal(size=new_dat.shape) * sigma) + ) # add noise scaled by sigma self.rep_id = [1] * len(y) if n_sub > 1: self.y = list(self.y) for s in range(1, n_sub): - self.data = nib.concat_images([self.data, self.nifti_masker.inverse_transform(np.add(new_dat, np.random.standard_normal(size=new_dat.shape)*sigma))], axis=3) # add noise scaled by sigma - noise_y = list(y + np.random.randn(len(y))*sigma) + self.data = nib.concat_images( + [ + self.data, + self.nifti_masker.inverse_transform( + np.add( + new_dat, + np.random.standard_normal(size=new_dat.shape) * sigma, + ) + ), + ], + axis=3, + ) # add noise scaled by sigma + noise_y = list(y + np.random.randn(len(y)) * sigma) self.y = self.y + noise_y - self.rep_id = self.rep_id + [s+1] * len(mv_sim[:, 0]) + self.rep_id = self.rep_id + [s + 1] * len(mv_sim[:, 0]) self.y = np.array(self.y) # # Old method in 4 D space - much slower @@ -271,17 +311,30 @@ def create_cov_data(self, cor, cov, sigma, mask=None, reps=1, n_sub=1, output_di if isinstance(output_dir, six.string_types): if not os.path.isdir(output_dir): os.makedirs(output_dir) - self.data.to_filename(os.path.join(output_dir, 'maskdata_cor' + str(cor) + "_cov" + str(cov) + '_sigma' + str(sigma) + '.nii.gz')) - y_file = open(os.path.join(output_dir, 'y.csv'), 'wb') + self.data.to_filename( + os.path.join( + output_dir, + "maskdata_cor" + + str(cor) + + "_cov" + + str(cov) + + "_sigma" + + str(sigma) + + ".nii.gz", + ) + ) + y_file = open(os.path.join(output_dir, "y.csv"), "wb") wr = csv.writer(y_file, quoting=csv.QUOTE_ALL) wr.writerow(self.y) - rep_id_file = open(os.path.join(output_dir, 'rep_id.csv'), 'wb') + rep_id_file = open(os.path.join(output_dir, "rep_id.csv"), "wb") wr = csv.writer(rep_id_file, quoting=csv.QUOTE_ALL) wr.writerow(self.rep_id) - def create_ncov_data(self, cor, cov, sigma, masks=None, reps=1, n_sub=1, output_dir=None): - """ create continuous simulated data with covariance + def create_ncov_data( + self, cor, cov, sigma, masks=None, reps=1, n_sub=1, output_dir=None + ): + """create continuous simulated data with covariance Args: cor: amount of covariance between each voxel and Y variable (an int or a vector) @@ -307,20 +360,36 @@ def create_ncov_data(self, cor, cov, sigma, masks=None, reps=1, n_sub=1, output_ if type(cov) is float or type(cor) is int: cov = [cov] if not len(cor) == len(masks): - raise ValueError("cor matrix has incompatible dimensions for mask list of length " + str(len(masks))) - if not len(cov) == len(masks) or len(masks) == 0 or not len(cov[0]) == len(masks): - raise ValueError("cov matrix has incompatible dimensions for mask list of length " + str(len(masks))) + raise ValueError( + "cor matrix has incompatible dimensions for mask list of length " + + str(len(masks)) + ) + if ( + not len(cov) == len(masks) + or len(masks) == 0 + or not len(cov[0]) == len(masks) + ): + raise ValueError( + "cov matrix has incompatible dimensions for mask list of length " + + str(len(masks)) + ) # Create n_reps with cov for each voxel within sphere # Build covariance matrix with each variable correlated with y amount 'cor' and each other amount 'cov' flat_masks = self.nifti_masker.fit_transform(masks) print("Building correlation/covariation matrix...") - n_vox = np.sum(flat_masks == 1, axis=1) # this is a list, each entry contains number voxels for given mask + n_vox = np.sum( + flat_masks == 1, axis=1 + ) # this is a list, each entry contains number voxels for given mask if 0 in n_vox: - raise ValueError("one or more processing mask does not fit inside the brain mask") + raise ValueError( + "one or more processing mask does not fit inside the brain mask" + ) - cov_matrix = np.zeros([np.sum(n_vox)+1, np.sum(n_vox)+1]) # one big covariance matrix + cov_matrix = np.zeros( + [np.sum(n_vox) + 1, np.sum(n_vox) + 1] + ) # one big covariance matrix for i, nv in enumerate(n_vox): cstart = np.sum(n_vox[:i]) + 1 cstop = cstart + nv @@ -329,12 +398,16 @@ def create_ncov_data(self, cor, cov, sigma, masks=None, reps=1, n_sub=1, output_ for j in range(len(masks)): rstart = np.sum(n_vox[:j]) + 1 rstop = rstart + nv - cov_matrix[cstart:cstop, rstart:rstop] = cov[i][j] # set covariance of this mask's voxels with each of other masks + cov_matrix[cstart:cstop, rstart:rstop] = cov[i][ + j + ] # set covariance of this mask's voxels with each of other masks np.fill_diagonal(cov_matrix, 1) # set diagonal to 1 # these operations happen in one vector that we'll later split into the separate regions print("Generating multivariate normal distribution...") - mv_sim_l = np.random.multivariate_normal(np.zeros([np.sum(n_vox)+1]), cov_matrix, size=reps) + mv_sim_l = np.random.multivariate_normal( + np.zeros([np.sum(n_vox) + 1]), cov_matrix, size=reps + ) print(mv_sim_l) self.y = mv_sim_l[:, 0] @@ -346,10 +419,14 @@ def create_ncov_data(self, cor, cov, sigma, masks=None, reps=1, n_sub=1, output_ start = int(np.sum(n_vox[:mask_i])) stop = int(start + n_vox[mask_i]) print(rep, start, stop) - new_dats[rep, np.where(flat_masks[mask_i, :] == 1)] = mv_sim[rep, start:stop] - - noise = np.random.standard_normal(size=new_dats.shape[1])*sigma - self.data = self.nifti_masker.inverse_transform(np.add(new_dats, noise)) # append 3d simulated data to list + new_dats[rep, np.where(flat_masks[mask_i, :] == 1)] = mv_sim[ + rep, start:stop + ] + + noise = np.random.standard_normal(size=new_dats.shape[1]) * sigma + self.data = self.nifti_masker.inverse_transform( + np.add(new_dats, noise) + ) # append 3d simulated data to list self.rep_id = [1] * len(self.y) print("Generating subject-level noise...") @@ -359,13 +436,13 @@ def create_ncov_data(self, cor, cov, sigma, masks=None, reps=1, n_sub=1, output_ y = list(self.y) for s in range(1, n_sub): # ask Luke about this new version - noise = np.random.standard_normal(size=new_dats.shape[1])*sigma + noise = np.random.standard_normal(size=new_dats.shape[1]) * sigma next_subj = self.nifti_masker.inverse_transform(np.add(new_dats, noise)) self.data = nib.concat_images([self.data, next_subj], axis=3) - y += list(self.y + np.random.randn(len(self.y))*sigma) + y += list(self.y + np.random.randn(len(self.y)) * sigma) print("y == " + str(len(y))) - self.rep_id += [s+1] * len(mv_sim[:, 0]) + self.rep_id += [s + 1] * len(mv_sim[:, 0]) self.y = np.array(y) print("Saving to " + str(output_dir)) @@ -375,18 +452,34 @@ def create_ncov_data(self, cor, cov, sigma, masks=None, reps=1, n_sub=1, output_ if type(output_dir) is str: if not os.path.isdir(output_dir): os.makedirs(output_dir) - self.data.to_filename(os.path.join(output_dir, 'simulated_data_' + str(sigma) + 'sigma_' + str(n_sub) + 'subj.nii.gz')) - y_file = open(os.path.join(output_dir, 'y.csv'), 'wb') + self.data.to_filename( + os.path.join( + output_dir, + "simulated_data_" + + str(sigma) + + "sigma_" + + str(n_sub) + + "subj.nii.gz", + ) + ) + y_file = open(os.path.join(output_dir, "y.csv"), "wb") wr = csv.writer(y_file, quoting=csv.QUOTE_ALL) wr.writerow(self.y) - rep_id_file = open(os.path.join(output_dir, 'rep_id.csv'), 'wb') + rep_id_file = open(os.path.join(output_dir, "rep_id.csv"), "wb") wr = csv.writer(rep_id_file, quoting=csv.QUOTE_ALL) wr.writerow(self.rep_id) class SimulateGrid(object): - def __init__(self, grid_width=100, signal_width=20, n_subjects=20, sigma=1, signal_amplitude=None): + def __init__( + self, + grid_width=100, + signal_width=20, + n_subjects=20, + sigma=1, + signal_amplitude=None, + ): self.isfit = False self.thresholded = None @@ -401,70 +494,88 @@ def __init__(self, grid_width=100, signal_width=20, n_subjects=20, sigma=1, sign self.data = self._create_noise() if signal_amplitude is not None: - self.add_signal(signal_amplitude=signal_amplitude, signal_width=signal_width) + self.add_signal( + signal_amplitude=signal_amplitude, signal_width=signal_width + ) else: self.signal_amplitude = None self.signal_mask = None def _create_noise(self): - '''Generate simualted data using object parameters + """Generate simualted data using object parameters Returns: simulated_data (np.array): simulated noise using object parameters - ''' - return np.random.randn(self.grid_width, self.grid_width, self.n_subjects) * self.sigma + """ + return ( + np.random.randn(self.grid_width, self.grid_width, self.n_subjects) + * self.sigma + ) def add_signal(self, signal_width=20, signal_amplitude=1): - '''Add rectangular signal to self.data + """Add rectangular signal to self.data Args: signal_width (int): width of signal box signal_amplitude (int): intensity of signal - ''' + """ if signal_width >= self.grid_width: - raise ValueError('Signal width must be smaller than total grid.') + raise ValueError("Signal width must be smaller than total grid.") self.signal_amplitude = signal_amplitude self.create_mask(signal_width) - signal = np.repeat(np.expand_dims(self.signal_mask, axis=2), self.n_subjects, axis=2) + signal = np.repeat( + np.expand_dims(self.signal_mask, axis=2), self.n_subjects, axis=2 + ) self.data = deepcopy(self.data) + signal * self.signal_amplitude def create_mask(self, signal_width): - '''Create a mask for where the signal is located in grid.''' + """Create a mask for where the signal is located in grid.""" mask = np.zeros((self.grid_width, self.grid_width)) - mask[int((np.floor((self.grid_width/2)-(signal_width/2)))):int(np.ceil((self.grid_width/2)+(signal_width/2))), int((np.floor((self.grid_width/2)-(signal_width/2)))):int(np.ceil((self.grid_width/2)+(signal_width/2)))] = 1 + mask[ + int((np.floor((self.grid_width / 2) - (signal_width / 2)))) : int( + np.ceil((self.grid_width / 2) + (signal_width / 2)) + ), + int((np.floor((self.grid_width / 2) - (signal_width / 2)))) : int( + np.ceil((self.grid_width / 2) + (signal_width / 2)) + ), + ] = 1 self.signal_width = signal_width self.signal_mask = mask def _run_ttest(self, data): - '''Helper function to run ttest on data''' - flattened = data.reshape(self.grid_width*self.grid_width, self.n_subjects) + """Helper function to run ttest on data""" + flattened = data.reshape(self.grid_width * self.grid_width, self.n_subjects) t, p = ttest_1samp(flattened.T, 0) t = np.reshape(t, (self.grid_width, self.grid_width)) p = np.reshape(p, (self.grid_width, self.grid_width)) return (t, p) def _run_permutation(self, data): - '''Helper function to run a nonparametric one-sample permutation test''' - flattened = data.reshape(self.grid_width*self.grid_width, self.n_subjects) + """Helper function to run a nonparametric one-sample permutation test""" + flattened = data.reshape(self.grid_width * self.grid_width, self.n_subjects) stats_all = [] for i in range(flattened.shape[0]): - stats = one_sample_permutation(flattened[i,:]) + stats = one_sample_permutation(flattened[i, :]) stats_all.append(stats) - mean = np.reshape(np.array([x['mean'] for x in stats_all]), (self.grid_width, self.grid_width)) - p = np.reshape(np.array([x['p'] for x in stats_all]), (self.grid_width, self.grid_width)) + mean = np.reshape( + np.array([x["mean"] for x in stats_all]), (self.grid_width, self.grid_width) + ) + p = np.reshape( + np.array([x["p"] for x in stats_all]), (self.grid_width, self.grid_width) + ) return (mean, p) def fit(self): - '''Run ttest on self.data''' + """Run ttest on self.data""" if self.isfit: raise ValueError("Can't fit because ttest has already been run.") self.t_values, self.p_values = self._run_ttest(self.data) self.isfit = True def _threshold_simulation(self, t, p, threshold, threshold_type, correction=None): - '''Helper function to threshold simulation + """Helper function to threshold simulation Args: threshold (float): threshold to apply to simulation @@ -472,21 +583,21 @@ def _threshold_simulation(self, t, p, threshold, threshold_type, correction=None Returns: threshold_data (np.array): thresholded data - ''' - if correction == 'fdr': - if threshold_type != 'q': + """ + if correction == "fdr": + if threshold_type != "q": raise ValueError("Must specify a q value when using fdr") - if correction == 'permutation': - if threshold_type != 'p': + if correction == "permutation": + if threshold_type != "p": raise ValueError("Must specify a p value when using permutation") thresholded = deepcopy(t) - if threshold_type == 't': + if threshold_type == "t": thresholded[np.abs(t) < threshold] = 0 - elif threshold_type == 'p': + elif threshold_type == "p": thresholded[p > threshold] = 0 - elif threshold_type == 'q': + elif threshold_type == "q": fdr_threshold = fdr(p.flatten(), q=threshold) if fdr_threshold < 0: thresholded = np.zeros(thresholded.shape) @@ -497,21 +608,23 @@ def _threshold_simulation(self, t, p, threshold, threshold_type, correction=None return thresholded def threshold_simulation(self, threshold, threshold_type, correction=None): - '''Threshold simulation + """Threshold simulation Args: threshold (float): threshold to apply to simulation threshhold_type (str): type of threshold to use can be a specific t-value or p-value ['t', 'p', 'q'] - ''' + """ if not self.isfit: raise ValueError("Must fit model before thresholding.") - if correction == 'fdr': + if correction == "fdr": self.corrected_threshold = fdr(self.p_values.flatten()) self.correction = correction - self.thresholded = self._threshold_simulation(self.t_values, self.p_values, threshold, threshold_type, correction) + self.thresholded = self._threshold_simulation( + self.t_values, self.p_values, threshold, threshold_type, correction + ) self.threshold = threshold self.threshold_type = threshold_type @@ -520,90 +633,140 @@ def threshold_simulation(self, threshold, threshold_type, correction=None): self.tp_percent = self._calc_true_positives(self.thresholded) def _calc_false_positives(self, thresholded): - '''Calculate percent of grid containing false positives + """Calculate percent of grid containing false positives Args: thresholded (np.array): thresholded grid Returns: fp_percent (float): percentage of grid that contains false positives - ''' + """ if self.signal_mask is None: - fp_percent = np.sum(thresholded != 0)/(self.grid_width**2) + fp_percent = np.sum(thresholded != 0) / (self.grid_width ** 2) else: - fp_percent = np.sum(thresholded[self.signal_mask != 1] != 0)/(self.grid_width**2 - self.signal_width**2) + fp_percent = np.sum(thresholded[self.signal_mask != 1] != 0) / ( + self.grid_width ** 2 - self.signal_width ** 2 + ) return fp_percent def _calc_true_positives(self, thresholded): - '''Calculate percent of mask containing true positives + """Calculate percent of mask containing true positives Args: thresholded (np.array): thresholded grid Returns: tp_percent (float): percentage of grid that contains true positives - ''' + """ if self.signal_mask is None: - raise ValueError('No mask exists, run add_signal() first.') - tp_percent = np.sum(thresholded[self.signal_mask == 1] != 0)/(self.signal_width**2) + raise ValueError("No mask exists, run add_signal() first.") + tp_percent = np.sum(thresholded[self.signal_mask == 1] != 0) / ( + self.signal_width ** 2 + ) return tp_percent def _calc_false_discovery_rate(self, thresholded): - '''Calculate percent of activated voxels that are false positives + """Calculate percent of activated voxels that are false positives Args: thresholded (np.array): thresholded grid Returns: fp_percent (float): percentage of activated voxels that are false positives - ''' + """ if self.signal_mask is None: - raise ValueError('No mask exists, run add_signal() first.') - fp_percent = np.sum(thresholded[self.signal_mask == 0] > 0)/np.sum(thresholded > 0) + raise ValueError("No mask exists, run add_signal() first.") + fp_percent = np.sum(thresholded[self.signal_mask == 0] > 0) / np.sum( + thresholded > 0 + ) return fp_percent - def run_multiple_simulations(self, threshold, threshold_type, n_simulations=100, correction=None): - '''This method will run multiple simulations to calculate overall false positive rate''' + def run_multiple_simulations( + self, threshold, threshold_type, n_simulations=100, correction=None + ): + """This method will run multiple simulations to calculate overall false positive rate""" if self.signal_mask is None: - simulations = [self._run_ttest(self._create_noise()) for x in range(n_simulations)] + simulations = [ + self._run_ttest(self._create_noise()) for x in range(n_simulations) + ] else: - signal = np.repeat(np.expand_dims(self.signal_mask, axis=2), self.n_subjects, axis=2) * self.signal_amplitude - simulations = [self._run_ttest(self._create_noise() + signal) for x in range(n_simulations)] - - self.multiple_thresholded = [self._threshold_simulation(s[0], s[1], threshold, threshold_type, correction=correction) for s in simulations] - self.multiple_fp = np.array([self._calc_false_positives(x) for x in self.multiple_thresholded]) + signal = ( + np.repeat( + np.expand_dims(self.signal_mask, axis=2), self.n_subjects, axis=2 + ) + * self.signal_amplitude + ) + simulations = [ + self._run_ttest(self._create_noise() + signal) + for x in range(n_simulations) + ] + + self.multiple_thresholded = [ + self._threshold_simulation( + s[0], s[1], threshold, threshold_type, correction=correction + ) + for s in simulations + ] + self.multiple_fp = np.array( + [self._calc_false_positives(x) for x in self.multiple_thresholded] + ) self.fpr = np.mean(np.array([x for x in self.multiple_fp]) > 0) if self.signal_mask is not None: - self.multiple_tp = np.array([self._calc_true_positives(x) for x in self.multiple_thresholded]) - self.multiple_fdr = np.array([self._calc_false_discovery_rate(x) for x in self.multiple_thresholded]) - - def plot_grid_simulation(self, threshold, threshold_type, n_simulations=100, correction=None): - '''Create a plot of the simulations''' + self.multiple_tp = np.array( + [self._calc_true_positives(x) for x in self.multiple_thresholded] + ) + self.multiple_fdr = np.array( + [self._calc_false_discovery_rate(x) for x in self.multiple_thresholded] + ) + + def plot_grid_simulation( + self, threshold, threshold_type, n_simulations=100, correction=None + ): + """Create a plot of the simulations""" if not self.isfit: self.fit() - self.threshold_simulation(threshold=threshold, threshold_type=threshold_type, correction=correction) - self.run_multiple_simulations(threshold=threshold, threshold_type=threshold_type, n_simulations=n_simulations) + self.threshold_simulation( + threshold=threshold, + threshold_type=threshold_type, + correction=correction, + ) + self.run_multiple_simulations( + threshold=threshold, + threshold_type=threshold_type, + n_simulations=n_simulations, + ) if self.signal_mask is None: - f,a = plt.subplots(ncols=3, figsize=(15, 5)) + f, a = plt.subplots(ncols=3, figsize=(15, 5)) else: - f,a = plt.subplots(ncols=4, figsize=(18, 5)) + f, a = plt.subplots(ncols=4, figsize=(18, 5)) a[3].hist(self.multiple_tp) - a[3].set_ylabel('Frequency', fontsize=18) - a[3].set_xlabel('Percent Signal Recovery', fontsize=18) - a[3].set_title('Average Signal Recovery', fontsize=18) + a[3].set_ylabel("Frequency", fontsize=18) + a[3].set_xlabel("Percent Signal Recovery", fontsize=18) + a[3].set_title("Average Signal Recovery", fontsize=18) a[0].imshow(self.t_values) - a[0].set_title('Random Noise', fontsize=18) + a[0].set_title("Random Noise", fontsize=18) a[0].axes.get_xaxis().set_visible(False) a[0].axes.get_yaxis().set_visible(False) a[1].imshow(self.thresholded) - a[1].set_title(f'Threshold: {threshold_type} = {threshold}', fontsize=18) + a[1].set_title(f"Threshold: {threshold_type} = {threshold}", fontsize=18) a[1].axes.get_xaxis().set_visible(False) a[1].axes.get_yaxis().set_visible(False) - a[2].plot(binom.pmf(np.arange(0, n_simulations, 1), n_simulations, np.mean(self.multiple_fp>0))) - a[2].axvline(x=np.mean(self.fpr) * n_simulations, color='r', linestyle='dashed', linewidth=2) - a[2].set_title(f'False Positive Rate = {self.fpr:.2f}', fontsize=18) - a[2].set_ylabel('Probability', fontsize=18) - a[2].set_xlabel('False Positive Rate', fontsize=18) + a[2].plot( + binom.pmf( + np.arange(0, n_simulations, 1), + n_simulations, + np.mean(self.multiple_fp > 0), + ) + ) + a[2].axvline( + x=np.mean(self.fpr) * n_simulations, + color="r", + linestyle="dashed", + linewidth=2, + ) + a[2].set_title(f"False Positive Rate = {self.fpr:.2f}", fontsize=18) + a[2].set_ylabel("Probability", fontsize=18) + a[2].set_xlabel("False Positive Rate", fontsize=18) plt.tight_layout() diff --git a/nltools/stats.py b/nltools/stats.py index 3c17ff4c..d0b6655d 100644 --- a/nltools/stats.py +++ b/nltools/stats.py @@ -1,50 +1,52 @@ from __future__ import division -''' +""" NeuroLearn Statistics Tools =========================== Tools to help with statistical analyses. -''' - -__all__ = ['pearson', - 'zscore', - 'fdr', - 'holm_bonf', - 'threshold', - 'multi_threshold', - 'winsorize', - 'trim', - 'calc_bpm', - 'downsample', - 'upsample', - 'fisher_r_to_z', - 'one_sample_permutation', - 'two_sample_permutation', - 'correlation_permutation', - 'matrix_permutation', - 'make_cosine_basis', - 'summarize_bootstrap', - 'regress', - 'procrustes', - 'procrustes_distance', - 'align', - 'find_spikes', - 'correlation', - 'distance_correlation', - 'transform_pairwise', - 'double_center', - 'u_center', - '_bootstrap_isc', - 'isc', - 'isfc', - 'isps', - '_compute_matrix_correlation', - '_phase_mean_angle', - '_phase_vector_length', - '_butter_bandpass_filter', - '_phase_rayleigh_p'] +""" + +__all__ = [ + "pearson", + "zscore", + "fdr", + "holm_bonf", + "threshold", + "multi_threshold", + "winsorize", + "trim", + "calc_bpm", + "downsample", + "upsample", + "fisher_r_to_z", + "one_sample_permutation", + "two_sample_permutation", + "correlation_permutation", + "matrix_permutation", + "make_cosine_basis", + "summarize_bootstrap", + "regress", + "procrustes", + "procrustes_distance", + "align", + "find_spikes", + "correlation", + "distance_correlation", + "transform_pairwise", + "double_center", + "u_center", + "_bootstrap_isc", + "isc", + "isfc", + "isps", + "_compute_matrix_correlation", + "_phase_mean_angle", + "_phase_vector_length", + "_butter_bandpass_filter", + "_phase_rayleigh_p", +] import numpy as np from numpy.fft import fft, ifft @@ -70,46 +72,46 @@ MAX_INT = np.iinfo(np.int32).max # Optional dependencies -sm = attempt_to_import('statsmodels.tsa.arima_model', name='sm') +sm = attempt_to_import("statsmodels.tsa.arima_model", name="sm") def pearson(x, y): - """ Correlates row vector x with each row vector in 2D array y. + """Correlates row vector x with each row vector in 2D array y. From neurosynth.stats.py - author: Tal Yarkoni """ data = np.vstack((x, y)) ms = data.mean(axis=1)[(slice(None, None, None), None)] datam = data - ms - datass = np.sqrt(np.sum(datam*datam, axis=1)) + datass = np.sqrt(np.sum(datam * datam, axis=1)) # datass = np.sqrt(ss(datam, axis=1)) temp = np.dot(datam[1:], datam[0].T) return temp / (datass[1:] * datass[0]) def zscore(df): - """ zscore every column in a pandas dataframe or series. + """zscore every column in a pandas dataframe or series. - Args: - df: (pd.DataFrame) Pandas DataFrame instance + Args: + df: (pd.DataFrame) Pandas DataFrame instance - Returns: - z_data: (pd.DataFrame) z-scored pandas DataFrame or series instance + Returns: + z_data: (pd.DataFrame) z-scored pandas DataFrame or series instance """ if isinstance(df, pd.DataFrame): - return df.apply(lambda x: (x - x.mean())/x.std()) + return df.apply(lambda x: (x - x.mean()) / x.std()) elif isinstance(df, pd.Series): - return (df-np.mean(df))/np.std(df) + return (df - np.mean(df)) / np.std(df) else: raise ValueError("Data is not a Pandas DataFrame or Series instance") -def fdr(p, q=.05): - """ Determine FDR threshold given a p value array and desired false +def fdr(p, q=0.05): + """Determine FDR threshold given a p value array and desired false discovery rate q. Written by Tal Yarkoni Args: - p: (np.array) vector of p-values + p: (np.array) vector of p-values q: (float) false discovery rate level Returns: @@ -119,17 +121,17 @@ def fdr(p, q=.05): """ if not isinstance(p, np.ndarray): - raise ValueError('Make sure vector of p-values is a numpy array') + raise ValueError("Make sure vector of p-values is a numpy array") s = np.sort(p) nvox = p.shape[0] - null = np.array(range(1, nvox + 1), dtype='float') * q / nvox + null = np.array(range(1, nvox + 1), dtype="float") * q / nvox below = np.where(s <= null)[0] return s[max(below)] if len(below) else -1 -def holm_bonf(p, alpha=.05): - """ Compute corrected p-values based on the Holm-Bonferroni method, i.e. step-down procedure applying iteratively less correction to highest p-values. A bit more conservative than fdr, but much more powerful thanvanilla bonferroni. +def holm_bonf(p, alpha=0.05): + """Compute corrected p-values based on the Holm-Bonferroni method, i.e. step-down procedure applying iteratively less correction to highest p-values. A bit more conservative than fdr, but much more powerful thanvanilla bonferroni. Args: p: (np.array) vector of p-values @@ -142,17 +144,17 @@ def holm_bonf(p, alpha=.05): """ if not isinstance(p, np.ndarray): - raise ValueError('Make sure vector of p-values is a numpy array') + raise ValueError("Make sure vector of p-values is a numpy array") s = np.sort(p) nvox = p.shape[0] - null = .05 / (nvox - np.arange(1, nvox + 1) + 1) + null = 0.05 / (nvox - np.arange(1, nvox + 1) + 1) below = np.where(s <= null)[0] return s[max(below)] if len(below) else -1 -def threshold(stat, p, thr=.05, return_mask=False): - """ Threshold test image by p-value from p image +def threshold(stat, p, thr=0.05, return_mask=False): + """Threshold test image by p-value from p image Args: stat: (Brain_Data) Brain_Data instance of arbitrary statistic metric @@ -168,10 +170,10 @@ def threshold(stat, p, thr=.05, return_mask=False): from nltools.data import Brain_Data if not isinstance(stat, Brain_Data): - raise ValueError('Make sure stat is a Brain_Data instance') + raise ValueError("Make sure stat is a Brain_Data instance") if not isinstance(p, Brain_Data): - raise ValueError('Make sure p is a Brain_Data instance') + raise ValueError("Make sure p is a Brain_Data instance") # Create Mask mask = deepcopy(p) @@ -195,7 +197,7 @@ def threshold(stat, p, thr=.05, return_mask=False): def multi_threshold(t_map, p_map, thresh): - """ Threshold test image by multiple p-value from p image + """Threshold test image by multiple p-value from p image Args: stat: (Brain_Data) Brain_Data instance of arbitrary statistic metric @@ -210,13 +212,13 @@ def multi_threshold(t_map, p_map, thresh): from nltools.data import Brain_Data if not isinstance(t_map, Brain_Data): - raise ValueError('Make sure stat is a Brain_Data instance') + raise ValueError("Make sure stat is a Brain_Data instance") if not isinstance(p_map, Brain_Data): - raise ValueError('Make sure p is a Brain_Data instance') + raise ValueError("Make sure p is a Brain_Data instance") if not isinstance(thresh, list): - raise ValueError('Make sure thresh is a list of p-values') + raise ValueError("Make sure thresh is a list of p-values") affine = t_map.to_nifti().get_affine() pos_out = np.zeros(t_map.to_nifti().shape) @@ -228,79 +230,86 @@ def multi_threshold(t_map, p_map, thresh): t_neg = deepcopy(t_pos) t_pos.data[t.data > 0] = 1 t_neg.data[t.data < 0] = 1 - pos_out = pos_out+t_pos.to_nifti().get_data() - neg_out = neg_out+t_neg.to_nifti().get_data() - pos_out = pos_out + neg_out*-1 + pos_out = pos_out + t_pos.to_nifti().get_data() + neg_out = neg_out + t_neg.to_nifti().get_data() + pos_out = pos_out + neg_out * -1 return Brain_Data(nib.Nifti1Image(pos_out, affine)) def winsorize(data, cutoff=None, replace_with_cutoff=True): - ''' Winsorize a Pandas DataFrame or Series with the largest/lowest value not considered outlier + """Winsorize a Pandas DataFrame or Series with the largest/lowest value not considered outlier - Args: - data: (pd.DataFrame, pd.Series) data to winsorize - cutoff: (dict) a dictionary with keys {'std':[low,high]} or - {'quantile':[low,high]} - replace_with_cutoff: (bool) If True, replace outliers with cutoff. - If False, replaces outliers with closest - existing values; (default: False) - Returns: - out: (pd.DataFrame, pd.Series) winsorized data - ''' - return _transform_outliers(data, cutoff, replace_with_cutoff=replace_with_cutoff, method='winsorize') + Args: + data: (pd.DataFrame, pd.Series) data to winsorize + cutoff: (dict) a dictionary with keys {'std':[low,high]} or + {'quantile':[low,high]} + replace_with_cutoff: (bool) If True, replace outliers with cutoff. + If False, replaces outliers with closest + existing values; (default: False) + Returns: + out: (pd.DataFrame, pd.Series) winsorized data + """ + return _transform_outliers( + data, cutoff, replace_with_cutoff=replace_with_cutoff, method="winsorize" + ) def trim(data, cutoff=None): - ''' Trim a Pandas DataFrame or Series by replacing outlier values with NaNs + """Trim a Pandas DataFrame or Series by replacing outlier values with NaNs - Args: - data: (pd.DataFrame, pd.Series) data to trim - cutoff: (dict) a dictionary with keys {'std':[low,high]} or - {'quantile':[low,high]} - Returns: - out: (pd.DataFrame, pd.Series) trimmed data - ''' - return _transform_outliers(data, cutoff, replace_with_cutoff=None, method='trim') + Args: + data: (pd.DataFrame, pd.Series) data to trim + cutoff: (dict) a dictionary with keys {'std':[low,high]} or + {'quantile':[low,high]} + Returns: + out: (pd.DataFrame, pd.Series) trimmed data + """ + return _transform_outliers(data, cutoff, replace_with_cutoff=None, method="trim") def _transform_outliers(data, cutoff, replace_with_cutoff, method): - ''' This function is not exposed to user but is called by either trim - or winsorize. - - Args: - data: (pd.DataFrame, pd.Series) data to transform - cutoff: (dict) a dictionary with keys {'std':[low,high]} or - {'quantile':[low,high]} - replace_with_cutoff: (bool) If True, replace outliers with cutoff. - If False, replaces outliers with closest - existing values. (default: False) - method: 'winsorize' or 'trim' - - Returns: - out: (pd.DataFrame, pd.Series) transformed data - ''' + """This function is not exposed to user but is called by either trim + or winsorize. + + Args: + data: (pd.DataFrame, pd.Series) data to transform + cutoff: (dict) a dictionary with keys {'std':[low,high]} or + {'quantile':[low,high]} + replace_with_cutoff: (bool) If True, replace outliers with cutoff. + If False, replaces outliers with closest + existing values. (default: False) + method: 'winsorize' or 'trim' + + Returns: + out: (pd.DataFrame, pd.Series) transformed data + """ df = data.copy() # To not overwrite data make a copy - def _transform_outliers_sub(data, cutoff, replace_with_cutoff, method='trim'): + def _transform_outliers_sub(data, cutoff, replace_with_cutoff, method="trim"): if not isinstance(data, pd.Series): - raise ValueError('Make sure that you are applying winsorize to a pandas dataframe or series.') + raise ValueError( + "Make sure that you are applying winsorize to a pandas dataframe or series." + ) if isinstance(cutoff, dict): # calculate cutoff values - if 'quantile' in cutoff: - q = data.quantile(cutoff['quantile']) - elif 'std' in cutoff: - std = [data.mean()-data.std()*cutoff['std'][0], data.mean()+data.std()*cutoff['std'][1]] - q = pd.Series(index=cutoff['std'], data=std) + if "quantile" in cutoff: + q = data.quantile(cutoff["quantile"]) + elif "std" in cutoff: + std = [ + data.mean() - data.std() * cutoff["std"][0], + data.mean() + data.std() * cutoff["std"][1], + ] + q = pd.Series(index=cutoff["std"], data=std) # if replace_with_cutoff is false, replace with true existing values closest to cutoff - if method == 'winsorize' and not replace_with_cutoff: + if method == "winsorize" and not replace_with_cutoff: q.iloc[0] = data[data > q.iloc[0]].min() q.iloc[1] = data[data < q.iloc[1]].max() else: - raise ValueError('cutoff must be a dictionary with quantile or std keys.') - if method == 'trim': + raise ValueError("cutoff must be a dictionary with quantile or std keys.") + if method == "trim": data[data < q.iloc[0]] = np.nan data[data > q.iloc[1]] = np.nan - elif method == 'winsorize': + elif method == "winsorize": if isinstance(q, pd.Series) and len(q) == 2: data[data < q.iloc[0]] = q.iloc[0] data[data > q.iloc[1]] = q.iloc[1] @@ -309,104 +318,115 @@ def _transform_outliers_sub(data, cutoff, replace_with_cutoff, method='trim'): # transform each column if a dataframe, if series just transform data if isinstance(df, pd.DataFrame): for col in df.columns: - df.loc[:, col] = _transform_outliers_sub(df.loc[:, col], cutoff=cutoff, replace_with_cutoff=replace_with_cutoff, method=method) + df.loc[:, col] = _transform_outliers_sub( + df.loc[:, col], + cutoff=cutoff, + replace_with_cutoff=replace_with_cutoff, + method=method, + ) return df elif isinstance(df, pd.Series): - return _transform_outliers_sub(df, cutoff=cutoff, replace_with_cutoff=replace_with_cutoff, method=method) + return _transform_outliers_sub( + df, cutoff=cutoff, replace_with_cutoff=replace_with_cutoff, method=method + ) else: - raise ValueError('Data must be a pandas DataFrame or Series') + raise ValueError("Data must be a pandas DataFrame or Series") def calc_bpm(beat_interval, sampling_freq): - ''' Calculate instantaneous BPM from beat to beat interval + """Calculate instantaneous BPM from beat to beat interval - Args: - beat_interval: (int) number of samples in between each beat - (typically R-R Interval) - sampling_freq: (float) sampling frequency in Hz + Args: + beat_interval: (int) number of samples in between each beat + (typically R-R Interval) + sampling_freq: (float) sampling frequency in Hz - Returns: - bpm: (float) beats per minute for time interval - ''' - return 60*sampling_freq*(1/(beat_interval)) + Returns: + bpm: (float) beats per minute for time interval + """ + return 60 * sampling_freq * (1 / (beat_interval)) -def downsample(data, sampling_freq=None, target=None, target_type='samples', - method='mean'): - ''' Downsample pandas to a new target frequency or number of samples - using averaging. +def downsample( + data, sampling_freq=None, target=None, target_type="samples", method="mean" +): + """Downsample pandas to a new target frequency or number of samples + using averaging. - Args: - data: (pd.DataFrame, pd.Series) data to downsample - sampling_freq: (float) Sampling frequency of data in hertz - target: (float) downsampling target - target_type: type of target can be [samples,seconds,hz] - method: (str) type of downsample method ['mean','median'], - default: mean + Args: + data: (pd.DataFrame, pd.Series) data to downsample + sampling_freq: (float) Sampling frequency of data in hertz + target: (float) downsampling target + target_type: type of target can be [samples,seconds,hz] + method: (str) type of downsample method ['mean','median'], + default: mean - Returns: - out: (pd.DataFrame, pd.Series) downsmapled data + Returns: + out: (pd.DataFrame, pd.Series) downsmapled data - ''' + """ if not isinstance(data, (pd.DataFrame, pd.Series)): - raise ValueError('Data must by a pandas DataFrame or Series instance.') - if not (method == 'median') | (method == 'mean'): + raise ValueError("Data must by a pandas DataFrame or Series instance.") + if not (method == "median") | (method == "mean"): raise ValueError("Metric must be either 'mean' or 'median' ") - if target_type == 'samples': + if target_type == "samples": n_samples = target - elif target_type == 'seconds': - n_samples = target*sampling_freq - elif target_type == 'hz': - n_samples = sampling_freq/target + elif target_type == "seconds": + n_samples = target * sampling_freq + elif target_type == "hz": + n_samples = sampling_freq / target else: - raise ValueError('Make sure target_type is "samples", "seconds", ' - ' or "hz".') + raise ValueError('Make sure target_type is "samples", "seconds", ' ' or "hz".') - idx = np.sort(np.repeat(np.arange(1, data.shape[0]/n_samples, 1), n_samples)) + idx = np.sort(np.repeat(np.arange(1, data.shape[0] / n_samples, 1), n_samples)) # if data.shape[0] % n_samples: if data.shape[0] > len(idx): - idx = np.concatenate([idx, np.repeat(idx[-1]+1, data.shape[0]-len(idx))]) - if method == 'mean': + idx = np.concatenate([idx, np.repeat(idx[-1] + 1, data.shape[0] - len(idx))]) + if method == "mean": return data.groupby(idx).mean().reset_index(drop=True) - elif method == 'median': + elif method == "median": return data.groupby(idx).median().reset_index(drop=True) -def upsample(data, sampling_freq=None, target=None, target_type='samples', method='linear'): - ''' Upsample pandas to a new target frequency or number of samples using interpolation. +def upsample( + data, sampling_freq=None, target=None, target_type="samples", method="linear" +): + """Upsample pandas to a new target frequency or number of samples using interpolation. - Args: - data: (pd.DataFrame, pd.Series) data to upsample - (Note: will drop non-numeric columns from DataFrame) - sampling_freq: Sampling frequency of data in hertz - target: (float) upsampling target - target_type: (str) type of target can be [samples,seconds,hz] - method: (str) ['linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'] - where 'zero', 'slinear', 'quadratic' and 'cubic' - refer to a spline interpolation of zeroth, first, - second or third order (default: linear) - Returns: - upsampled pandas object + Args: + data: (pd.DataFrame, pd.Series) data to upsample + (Note: will drop non-numeric columns from DataFrame) + sampling_freq: Sampling frequency of data in hertz + target: (float) upsampling target + target_type: (str) type of target can be [samples,seconds,hz] + method: (str) ['linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'] + where 'zero', 'slinear', 'quadratic' and 'cubic' + refer to a spline interpolation of zeroth, first, + second or third order (default: linear) + Returns: + upsampled pandas object - ''' + """ - methods = ['linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'] + methods = ["linear", "nearest", "zero", "slinear", "quadratic", "cubic"] if method not in methods: - raise ValueError("Method must be 'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'") + raise ValueError( + "Method must be 'linear', 'nearest', 'zero', 'slinear', 'quadratic', 'cubic'" + ) - if target_type == 'samples': + if target_type == "samples": n_samples = target - elif target_type == 'seconds': - n_samples = target*sampling_freq - elif target_type == 'hz': - n_samples = float(sampling_freq)/float(target) + elif target_type == "seconds": + n_samples = target * sampling_freq + elif target_type == "hz": + n_samples = float(sampling_freq) / float(target) else: raise ValueError('Make sure target_type is "samples", "seconds", or "hz".') orig_spacing = np.arange(0, data.shape[0], 1) - new_spacing = np.arange(0, data.shape[0]-1, n_samples) + new_spacing = np.arange(0, data.shape[0] - 1, n_samples) if isinstance(data, pd.Series): interpolate = interp1d(orig_spacing, data, kind=method) @@ -414,39 +434,43 @@ def upsample(data, sampling_freq=None, target=None, target_type='samples', metho elif isinstance(data, pd.DataFrame): numeric_data = data._get_numeric_data() if data.shape[1] != numeric_data.shape[1]: - warnings.warn('Dropping %s non-numeric columns' % (data.shape[1] - numeric_data.shape[1]), UserWarning) + warnings.warn( + "Dropping %s non-numeric columns" + % (data.shape[1] - numeric_data.shape[1]), + UserWarning, + ) out = pd.DataFrame(columns=numeric_data.columns, index=None) for i, x in numeric_data.iteritems(): interpolate = interp1d(orig_spacing, x, kind=method) out.loc[:, i] = interpolate(new_spacing) return out else: - raise ValueError('Data must by a pandas DataFrame or Series instance.') + raise ValueError("Data must by a pandas DataFrame or Series instance.") def fisher_r_to_z(r): - ''' Use Fisher transformation to convert correlation to z score ''' + """ Use Fisher transformation to convert correlation to z score """ - return .5*np.log((1+r)/(1-r)) + return 0.5 * np.log((1 + r) / (1 - r)) -def correlation(data1, data2, metric='pearson'): - ''' This function calculates the correlation between data1 and data2 +def correlation(data1, data2, metric="pearson"): + """This function calculates the correlation between data1 and data2 - Args: - data1: (np.array) x - data2: (np.array) y - metric: (str) type of correlation ["spearman" or "pearson" or "kendall"] - Returns: - r: (np.array) correlations - p: (float) p-value + Args: + data1: (np.array) x + data2: (np.array) y + metric: (str) type of correlation ["spearman" or "pearson" or "kendall"] + Returns: + r: (np.array) correlations + p: (float) p-value - ''' - if metric == 'spearman': + """ + if metric == "spearman": func = spearmanr - elif metric == 'pearson': + elif metric == "pearson": func = pearsonr - elif metric == 'kendall': + elif metric == "kendall": func = kendalltau else: raise ValueError('metric must be "spearman" or "pearson" or "kendall"') @@ -455,35 +479,37 @@ def correlation(data1, data2, metric='pearson'): def _permute_sign(data, random_state=None): random_state = check_random_state(random_state) - return np.mean(data*random_state.choice([1, -1], len(data))) + return np.mean(data * random_state.choice([1, -1], len(data))) def _permute_group(data, random_state=None): random_state = check_random_state(random_state) - perm_label = random_state.permutation(data['Group']) - return (np.mean(data.loc[perm_label == 1, 'Values']) - np.mean(data.loc[perm_label == 0, 'Values'])) + perm_label = random_state.permutation(data["Group"]) + return np.mean(data.loc[perm_label == 1, "Values"]) - np.mean( + data.loc[perm_label == 0, "Values"] + ) def _permute_func(data1, data2, metric, random_state=None): - """ Helper function for matrix_permutation. - Can take a functon, that would be repeated for calculation. - Args: - data1: (np.array) squareform matrix - data2: flattened np array (same size upper triangle of data1) - metric: similarity/distance function from scipy.stats (e.g., spearman, pearson, kendall etc) - random_state: random_state instance for permutation - Returns: - r: r value of function + """Helper function for matrix_permutation. + Can take a functon, that would be repeated for calculation. + Args: + data1: (np.array) squareform matrix + data2: flattened np array (same size upper triangle of data1) + metric: similarity/distance function from scipy.stats (e.g., spearman, pearson, kendall etc) + random_state: random_state instance for permutation + Returns: + r: r value of function """ random_state = check_random_state(random_state) data_row_id = range(data1.shape[0]) - permuted_ix = random_state.choice(data_row_id, - size=len(data_row_id), replace=False) + permuted_ix = random_state.choice(data_row_id, size=len(data_row_id), replace=False) new_fmri_dist = data1.iloc[permuted_ix, permuted_ix].values new_fmri_dist = new_fmri_dist[np.triu_indices(new_fmri_dist.shape[0], k=1)] return correlation(new_fmri_dist, data2, metric=metric)[0] + def _calc_pvalue(all_p, stat, tail): """Calculates p value based on distribution of correlations This function is called by the permutation functions @@ -491,164 +517,203 @@ def _calc_pvalue(all_p, stat, tail): stat: actual value being tested, i.e., stats['correlation'] or stats['mean'] tail: (int) either 2 or 1 for two-tailed p-value or one-tailed """ - + denom = float(len(all_p)) + 1 if tail == 1: numer = np.sum(all_p >= stat) + 1 if stat >= 0 else np.sum(all_p <= stat) + 1 elif tail == 2: numer = np.sum(np.abs(all_p) >= np.abs(stat)) + 1 else: - raise ValueError('tail must be either 1 or 2') + raise ValueError("tail must be either 1 or 2") return numer / denom -def one_sample_permutation(data, n_permute=5000, tail=2, n_jobs=-1, return_perms=False, random_state=None): - ''' One sample permutation test using randomization. +def one_sample_permutation( + data, n_permute=5000, tail=2, n_jobs=-1, return_perms=False, random_state=None +): + """One sample permutation test using randomization. - Args: - data: (pd.DataFrame, pd.Series, np.array) data to permute - n_permute: (int) number of permutations - tail: (int) either 1 for one-tail or 2 for two-tailed test (default: 2) - n_jobs: (int) The number of CPUs to use to do the computation. - -1 means all CPUs. - return_parms: (bool) Return the permutation distribution along with the p-value; default False - random_state: (int, None, or np.random.RandomState) Initial random seed (default: None) + Args: + data: (pd.DataFrame, pd.Series, np.array) data to permute + n_permute: (int) number of permutations + tail: (int) either 1 for one-tail or 2 for two-tailed test (default: 2) + n_jobs: (int) The number of CPUs to use to do the computation. + -1 means all CPUs. + return_parms: (bool) Return the permutation distribution along with the p-value; default False + random_state: (int, None, or np.random.RandomState) Initial random seed (default: None) - Returns: - stats: (dict) dictionary of permutation results ['mean','p'] + Returns: + stats: (dict) dictionary of permutation results ['mean','p'] - ''' + """ random_state = check_random_state(random_state) seeds = random_state.randint(MAX_INT, size=n_permute) data = np.array(data) - stats = {'mean': np.nanmean(data)} - all_p = Parallel(n_jobs=n_jobs)(delayed(_permute_sign)(data, - random_state=seeds[i]) for i in range(n_permute)) - stats['p'] = _calc_pvalue(all_p, stats['mean'], tail) + stats = {"mean": np.nanmean(data)} + all_p = Parallel(n_jobs=n_jobs)( + delayed(_permute_sign)(data, random_state=seeds[i]) for i in range(n_permute) + ) + stats["p"] = _calc_pvalue(all_p, stats["mean"], tail) if return_perms: - stats['perm_dist'] = all_p + stats["perm_dist"] = all_p return stats -def two_sample_permutation(data1, data2, n_permute=5000, - tail=2, n_jobs=-1, return_perms=False, random_state=None): - ''' Independent sample permutation test. +def two_sample_permutation( + data1, + data2, + n_permute=5000, + tail=2, + n_jobs=-1, + return_perms=False, + random_state=None, +): + """Independent sample permutation test. - Args: - data1: (pd.DataFrame, pd.Series, np.array) dataset 1 to permute - data2: (pd.DataFrame, pd.Series, np.array) dataset 2 to permute - n_permute: (int) number of permutations - tail: (int) either 1 for one-tail or 2 for two-tailed test (default: 2) - n_jobs: (int) The number of CPUs to use to do the computation. - -1 means all CPUs. - return_parms: (bool) Return the permutation distribution along with the p-value; default False - Returns: - stats: (dict) dictionary of permutation results ['mean','p'] + Args: + data1: (pd.DataFrame, pd.Series, np.array) dataset 1 to permute + data2: (pd.DataFrame, pd.Series, np.array) dataset 2 to permute + n_permute: (int) number of permutations + tail: (int) either 1 for one-tail or 2 for two-tailed test (default: 2) + n_jobs: (int) The number of CPUs to use to do the computation. + -1 means all CPUs. + return_parms: (bool) Return the permutation distribution along with the p-value; default False + Returns: + stats: (dict) dictionary of permutation results ['mean','p'] - ''' + """ random_state = check_random_state(random_state) seeds = random_state.randint(MAX_INT, size=n_permute) - stats = {'mean': np.nanmean(data1) - np.nanmean(data2)} - data = pd.DataFrame(data={'Values': data1, 'Group': np.ones(len(data1))}) - data = data.append(pd.DataFrame(data={ - 'Values': data2, - 'Group': np.zeros(len(data2))})) - all_p = Parallel(n_jobs=n_jobs)(delayed(_permute_group)(data, - random_state=seeds[i]) for i in range(n_permute)) + stats = {"mean": np.nanmean(data1) - np.nanmean(data2)} + data = pd.DataFrame(data={"Values": data1, "Group": np.ones(len(data1))}) + data = data.append( + pd.DataFrame(data={"Values": data2, "Group": np.zeros(len(data2))}) + ) + all_p = Parallel(n_jobs=n_jobs)( + delayed(_permute_group)(data, random_state=seeds[i]) for i in range(n_permute) + ) - stats['p'] = _calc_pvalue(all_p, stats['mean'], tail) + stats["p"] = _calc_pvalue(all_p, stats["mean"], tail) if return_perms: - stats['perm_dist'] = all_p + stats["perm_dist"] = all_p return stats - - -def correlation_permutation(data1, data2, method='permute', n_permute=5000, metric='spearman', - tail=2, n_jobs=-1, return_perms=False, random_state=None): - ''' Compute correlation and calculate p-value using permutation methods. - - 'permute' method randomly shuffles one of the vectors. This method is recommended - for independent data. For timeseries data we recommend using 'circle_shift' or - 'phase_randomize' methods. - - Args: - - data1: (pd.DataFrame, pd.Series, np.array) dataset 1 to permute - data2: (pd.DataFrame, pd.Series, np.array) dataset 2 to permute - n_permute: (int) number of permutations - metric: (str) type of association metric ['spearman','pearson', - 'kendall'] - method: (str) type of permutation ['permute', 'circle_shift', 'phase_randomize'] - random_state: (int, None, or np.random.RandomState) Initial random seed (default: None) - tail: (int) either 1 for one-tail or 2 for two-tailed test (default: 2) - n_jobs: (int) The number of CPUs to use to do the computation. - -1 means all CPUs. - return_parms: (bool) Return the permutation distribution along with the p-value; default False - - Returns: - - stats: (dict) dictionary of permutation results ['correlation','p'] - - ''' + + +def correlation_permutation( + data1, + data2, + method="permute", + n_permute=5000, + metric="spearman", + tail=2, + n_jobs=-1, + return_perms=False, + random_state=None, +): + """Compute correlation and calculate p-value using permutation methods. + + 'permute' method randomly shuffles one of the vectors. This method is recommended + for independent data. For timeseries data we recommend using 'circle_shift' or + 'phase_randomize' methods. + + Args: + + data1: (pd.DataFrame, pd.Series, np.array) dataset 1 to permute + data2: (pd.DataFrame, pd.Series, np.array) dataset 2 to permute + n_permute: (int) number of permutations + metric: (str) type of association metric ['spearman','pearson', + 'kendall'] + method: (str) type of permutation ['permute', 'circle_shift', 'phase_randomize'] + random_state: (int, None, or np.random.RandomState) Initial random seed (default: None) + tail: (int) either 1 for one-tail or 2 for two-tailed test (default: 2) + n_jobs: (int) The number of CPUs to use to do the computation. + -1 means all CPUs. + return_parms: (bool) Return the permutation distribution along with the p-value; default False + + Returns: + + stats: (dict) dictionary of permutation results ['correlation','p'] + + """ if len(data1) != len(data2): - raise ValueError('Make sure that data1 is the same length as data2') - - if method not in ['permute', 'circle_shift', 'phase_randomize']: - raise ValueError("Make sure that method is ['permute', 'circle_shift', 'phase_randomize']") + raise ValueError("Make sure that data1 is the same length as data2") + + if method not in ["permute", "circle_shift", "phase_randomize"]: + raise ValueError( + "Make sure that method is ['permute', 'circle_shift', 'phase_randomize']" + ) random_state = check_random_state(random_state) data1 = np.array(data1) data2 = np.array(data2) - stats = {'correlation':correlation(data1, data2, metric=metric)[0]} - - if method == 'permute': - all_p = Parallel(n_jobs=n_jobs)(delayed(correlation)( - random_state.permutation(data1), data2, metric=metric) - for i in range(n_permute)) - elif method == 'circle_shift': - all_p = Parallel(n_jobs=n_jobs)(delayed(correlation)( - circle_shift(data1, random_state=random_state), data2, metric=metric) - for i in range(n_permute)) - elif method == 'phase_randomize': - all_p = Parallel(n_jobs=n_jobs)(delayed(correlation)( - phase_randomize(data1, random_state=random_state), phase_randomize(data2), metric=metric) - for i in range(n_permute)) + stats = {"correlation": correlation(data1, data2, metric=metric)[0]} + + if method == "permute": + all_p = Parallel(n_jobs=n_jobs)( + delayed(correlation)(random_state.permutation(data1), data2, metric=metric) + for i in range(n_permute) + ) + elif method == "circle_shift": + all_p = Parallel(n_jobs=n_jobs)( + delayed(correlation)( + circle_shift(data1, random_state=random_state), data2, metric=metric + ) + for i in range(n_permute) + ) + elif method == "phase_randomize": + all_p = Parallel(n_jobs=n_jobs)( + delayed(correlation)( + phase_randomize(data1, random_state=random_state), + phase_randomize(data2), + metric=metric, + ) + for i in range(n_permute) + ) all_p = [x[0] for x in all_p] - stats['p'] = _calc_pvalue(all_p, stats['correlation'], tail) + stats["p"] = _calc_pvalue(all_p, stats["correlation"], tail) if return_perms: - stats['perm_dist'] = all_p + stats["perm_dist"] = all_p return stats -def matrix_permutation(data1, data2, n_permute=5000, metric='spearman', - tail=2, n_jobs=-1, return_perms=False, random_state=None): - """ Permute 2-dimensional matrix correlation (mantel test). - - Chen, G. et al. (2016). Untangling the relatedness among correlations, - part I: nonparametric approaches to inter-subject correlation analysis - at the group level. Neuroimage, 142, 248-259. - - Args: - data1: (pd.DataFrame, np.array) square matrix - data2: (pd.DataFrame, np.array) square matrix - n_permute: (int) number of permutations - metric: (str) type of association metric ['spearman','pearson', - 'kendall'] - tail: (int) either 1 for one-tail or 2 for two-tailed test - (default: 2) - n_jobs: (int) The number of CPUs to use to do the computation. - -1 means all CPUs. - return_parms: (bool) Return the permutation distribution along with the p-value; default False - - Returns: - stats: (dict) dictionary of permutation results ['correlation','p'] +def matrix_permutation( + data1, + data2, + n_permute=5000, + metric="spearman", + tail=2, + n_jobs=-1, + return_perms=False, + random_state=None, +): + """Permute 2-dimensional matrix correlation (mantel test). + + Chen, G. et al. (2016). Untangling the relatedness among correlations, + part I: nonparametric approaches to inter-subject correlation analysis + at the group level. Neuroimage, 142, 248-259. + + Args: + data1: (pd.DataFrame, np.array) square matrix + data2: (pd.DataFrame, np.array) square matrix + n_permute: (int) number of permutations + metric: (str) type of association metric ['spearman','pearson', + 'kendall'] + tail: (int) either 1 for one-tail or 2 for two-tailed test + (default: 2) + n_jobs: (int) The number of CPUs to use to do the computation. + -1 means all CPUs. + return_parms: (bool) Return the permutation distribution along with the p-value; default False + + Returns: + stats: (dict) dictionary of permutation results ['correlation','p'] """ random_state = check_random_state(random_state) seeds = random_state.randint(MAX_INT, size=n_permute) @@ -657,18 +722,22 @@ def matrix_permutation(data1, data2, n_permute=5000, metric='spearman', data1 = sq_data1[np.triu_indices(sq_data1.shape[0], k=1)] data2 = sq_data2[np.triu_indices(sq_data2.shape[0], k=1)] - stats = {'correlation': correlation(data1, data2, metric=metric)[0]} + stats = {"correlation": correlation(data1, data2, metric=metric)[0]} - all_p = Parallel(n_jobs=n_jobs)(delayed(_permute_func)( - pd.DataFrame(sq_data1), data2, metric=metric, random_state=seeds[i]) - for i in range(n_permute)) - stats['p'] = _calc_pvalue(all_p, stats['correlation'], tail) + all_p = Parallel(n_jobs=n_jobs)( + delayed(_permute_func)( + pd.DataFrame(sq_data1), data2, metric=metric, random_state=seeds[i] + ) + for i in range(n_permute) + ) + stats["p"] = _calc_pvalue(all_p, stats["correlation"], tail) if return_perms: - stats['perm_dist'] = all_p + stats["perm_dist"] = all_p return stats + def make_cosine_basis(nsamples, sampling_freq, filter_length, unit_scale=True, drop=0): - """ Create a series of cosine basis functions for a discrete cosine + """Create a series of cosine basis functions for a discrete cosine transform. Based off of implementation in spm_filter and spm_dctmtx because scipy dct can only apply transforms but not return the basis functions. Like SPM, does not add constant (i.e. intercept), but does @@ -690,7 +759,7 @@ def make_cosine_basis(nsamples, sampling_freq, filter_length, unit_scale=True, d """ # Figure out number of basis functions to create - order = int(np.fix(2 * (nsamples * sampling_freq)/filter_length + 1)) + order = int(np.fix(2 * (nsamples * sampling_freq) / filter_length + 1)) n = np.arange(nsamples) @@ -698,20 +767,24 @@ def make_cosine_basis(nsamples, sampling_freq, filter_length, unit_scale=True, d C = np.zeros((len(n), order)) # Add constant - C[:, 0] = np.ones((1, len(n)))/np.sqrt(nsamples) + C[:, 0] = np.ones((1, len(n))) / np.sqrt(nsamples) # Insert higher order cosine basis functions for i in range(1, order): - C[:, i] = np.sqrt(2./nsamples) * np.cos(np.pi*(2*n+1) * i/(2*nsamples)) + C[:, i] = np.sqrt(2.0 / nsamples) * np.cos( + np.pi * (2 * n + 1) * i / (2 * nsamples) + ) # Drop intercept ala SPM C = C[:, 1:] if C.size == 0: - raise ValueError('Basis function creation failed! nsamples is too small for requested filter_length.') + raise ValueError( + "Basis function creation failed! nsamples is too small for requested filter_length." + ) if unit_scale: - C *= 1. / C[0, 0] + C *= 1.0 / C[0, 0] C = C[:, drop:] @@ -719,7 +792,7 @@ def make_cosine_basis(nsamples, sampling_freq, filter_length, unit_scale=True, d def transform_pairwise(X, y): - '''Transforms data into pairs with balanced labels for ranking + """Transforms data into pairs with balanced labels for ranking Transforms a n-class ranking problem into a two-class classification problem. Subclasses implementing particular strategies for choosing pairs should override this method. @@ -748,7 +821,7 @@ def transform_pairwise(X, y): Output class labels, where classes have values {-1, +1} If y was shape (n_samples, 2), then returns (k, 2) with groups on the second dimension. - ''' + """ X_new, y_new, y_group = [], [], [] y_ndim = y.ndim @@ -764,15 +837,15 @@ def transform_pairwise(X, y): y_group.append(y[i, 1]) # output balanced classes if y_new[-1] != (-1) ** k: - y_new[-1] = - y_new[-1] - X_new[-1] = - X_new[-1] + y_new[-1] = -y_new[-1] + X_new[-1] = -X_new[-1] if y_ndim == 1: return np.asarray(X_new), np.asarray(y_new).ravel() elif y_ndim == 2: return np.asarray(X_new), np.vstack((np.asarray(y_new), np.asarray(y_group))).T -def _robust_estimator(vals, X, robust_estimator='hc0', nlags=1): +def _robust_estimator(vals, X, robust_estimator="hc0", nlags=1): """ Computes robust sandwich estimators for standard errors used in OLS computation. Types include: 'hc0': Huber (1980) sandwich estimator to return robust standard error estimates. @@ -795,7 +868,7 @@ def _robust_estimator(vals, X, robust_estimator='hc0', nlags=1): """ - if robust_estimator not in ['hc0', 'hc3', 'hac']: + if robust_estimator not in ["hc0", "hc3", "hac"]: raise ValueError("robust_estimator must be one of hc0, hc3 or hac") # Make a sandwich! @@ -803,23 +876,23 @@ def _robust_estimator(vals, X, robust_estimator='hc0', nlags=1): bread = np.linalg.pinv(np.dot(X.T, X)) # Then we need meat - if robust_estimator == 'hc0': - V = np.diag(vals**2) + if robust_estimator == "hc0": + V = np.diag(vals ** 2) meat = np.dot(np.dot(X.T, V), X) - elif robust_estimator == 'hc3': - V = np.diag(vals**2)/(1-np.diag(np.dot(X, np.dot(bread, X.T))))**2 + elif robust_estimator == "hc3": + V = np.diag(vals ** 2) / (1 - np.diag(np.dot(X, np.dot(bread, X.T)))) ** 2 meat = np.dot(np.dot(X.T, V), X) - elif robust_estimator == 'hac': - weights = 1 - np.arange(nlags+1.)/(nlags+1.) + elif robust_estimator == "hac": + weights = 1 - np.arange(nlags + 1.0) / (nlags + 1.0) # First compute lag 0 - V = np.diag(vals**2) + V = np.diag(vals ** 2) meat = weights[0] * np.dot(np.dot(X.T, V), X) # Now loop over additional lags - for l in range(1, nlags+1): + for l in range(1, nlags + 1): V = np.diag(vals[l:] * vals[:-l]) meat_1 = np.dot(np.dot(X[l:].T, V), X[:-l]) @@ -834,7 +907,7 @@ def _robust_estimator(vals, X, robust_estimator='hc0', nlags=1): def summarize_bootstrap(data, save_weights=False): - """ Calculate summary of bootstrap samples + """Calculate summary of bootstrap samples Args: sample: (Brain_Data) Brain_Data instance of samples @@ -851,11 +924,11 @@ def summarize_bootstrap(data, save_weights=False): wz = deepcopy(wmean) wz.data = wmean.data / wstd.data wp = deepcopy(wmean) - wp.data = 2*(1-norm.cdf(np.abs(wz.data))) + wp.data = 2 * (1 - norm.cdf(np.abs(wz.data))) # Create outputs - output = {'Z': wz, 'p': wp, 'mean': wmean} + output = {"Z": wz, "p": wp, "mean": wmean} if save_weights: - output['samples'] = data + output["samples"] = data return output @@ -863,31 +936,51 @@ def _arma_func(X, Y, idx=None, **kwargs): """ Fit an ARMA(p,q) model. If Y is a matrix and not a vector, expects an idx argument that refers to columns of Y. Used by regress(). """ - method = kwargs.pop('method', 'css-mle') - order = kwargs.pop('order', (1, 1)) + method = kwargs.pop("method", "css-mle") + order = kwargs.pop("order", (1, 1)) - maxiter = kwargs.pop('maxiter', 50) - disp = kwargs.pop('disp', -1) - start_ar_lags = kwargs.pop('start_ar_lags', order[0]+1) - transparams = kwargs.pop('transparams', False) - trend = kwargs.pop('trend', 'nc') + maxiter = kwargs.pop("maxiter", 50) + disp = kwargs.pop("disp", -1) + start_ar_lags = kwargs.pop("start_ar_lags", order[0] + 1) + transparams = kwargs.pop("transparams", False) + trend = kwargs.pop("trend", "nc") if len(Y.shape) == 2: model = sm.tsa.arima_model.ARMA(endog=Y[:, idx], exog=X.values, order=order) else: model = sm.tsa.arima_model.ARMA(endog=Y, exog=X.values, order=order) try: - res = model.fit(trend=trend, method=method, transparams=transparams, - maxiter=maxiter, disp=disp, start_ar_lags=start_ar_lags, **kwargs) + res = model.fit( + trend=trend, + method=method, + transparams=transparams, + maxiter=maxiter, + disp=disp, + start_ar_lags=start_ar_lags, + **kwargs + ) except: - res = model.fit(trend=trend, method=method, transparams=transparams, - maxiter=maxiter, disp=disp, start_ar_lags=start_ar_lags, start_params=np.repeat(1., X.shape[1]+2)) - - return (res.params[:-2], res.tvalues[:-2], res.pvalues[:-2], res.df_resid, res.resid) - - -def regress(X, Y, mode='ols', stats='full', **kwargs): - """ This is a flexible function to run several types of regression models provided X and Y numpy arrays. Y can be a 1d numpy array or 2d numpy array. In the latter case, results will be output with shape 1 x Y.shape[1], in other words fitting a separate regression model to each column of Y. + res = model.fit( + trend=trend, + method=method, + transparams=transparams, + maxiter=maxiter, + disp=disp, + start_ar_lags=start_ar_lags, + start_params=np.repeat(1.0, X.shape[1] + 2), + ) + + return ( + res.params[:-2], + res.tvalues[:-2], + res.pvalues[:-2], + res.df_resid, + res.resid, + ) + + +def regress(X, Y, mode="ols", stats="full", **kwargs): + """This is a flexible function to run several types of regression models provided X and Y numpy arrays. Y can be a 1d numpy array or 2d numpy array. In the latter case, results will be output with shape 1 x Y.shape[1], in other words fitting a separate regression model to each column of Y. Does NOT add an intercept automatically to the X matrix before fitting like some other software packages. This is left up to the user. @@ -939,15 +1032,15 @@ def regress(X, Y, mode='ols', stats='full', **kwargs): """ if not isinstance(mode, six.string_types): - raise ValueError('mode must be a string') + raise ValueError("mode must be a string") if not isinstance(stats, six.string_types): - raise ValueError('stats must be a string') + raise ValueError("stats must be a string") - if mode not in ['ols', 'robust', 'arma']: + if mode not in ["ols", "robust", "arma"]: raise ValueError("Mode must be one of 'ols','robust' or 'arma'") - if stats not in ['full', 'betas', 'tstats']: + if stats not in ["full", "betas", "tstats"]: raise ValueError("stats must be one of 'full', 'betas', 'tstats'") # Make sure Y is a 2-D array @@ -955,52 +1048,61 @@ def regress(X, Y, mode='ols', stats='full', **kwargs): Y = Y[:, np.newaxis] # Compute standard errors based on regression mode - if mode == 'ols' or mode == 'robust': + if mode == "ols" or mode == "robust": b = np.dot(np.linalg.pinv(X), Y) - + # Return betas and stop other computations if that's all that's requested - if stats == 'betas': + if stats == "betas": return b.squeeze() res = Y - np.dot(X, b) # Vanilla OLS - if mode == 'ols': + if mode == "ols": sigma = np.std(res, axis=0, ddof=X.shape[1]) - stderr = np.sqrt(np.diag(np.linalg.pinv(np.dot(X.T, X))))[:, np.newaxis] * sigma[np.newaxis, :] + stderr = ( + np.sqrt(np.diag(np.linalg.pinv(np.dot(X.T, X))))[:, np.newaxis] + * sigma[np.newaxis, :] + ) # OLS with robust sandwich estimator based standard-errors - elif mode == 'robust': - robust_estimator = kwargs.pop('robust_estimator', 'hc0') - nlags = kwargs.pop('nlags', 1) + elif mode == "robust": + robust_estimator = kwargs.pop("robust_estimator", "hc0") + nlags = kwargs.pop("nlags", 1) axis_func = [_robust_estimator, 0, res, X, robust_estimator, nlags] stderr = np.apply_along_axis(*axis_func) # Then only compute t-stats at voxels where the standard error is at least .000001 t = np.zeros_like(b) - t[stderr > 1.e-6] = b[stderr > 1.e-6] / stderr[stderr > 1.e-6] + t[stderr > 1.0e-6] = b[stderr > 1.0e-6] / stderr[stderr > 1.0e-6] # Return betas and ts and stop other computations if that's all that's requested - if stats == 'tstats': + if stats == "tstats": return b.squeeze(), t.squeeze() - df = np.array([X.shape[0]-X.shape[1]] * t.shape[1]) - p = 2*(1-t_dist.cdf(np.abs(t), df)) + df = np.array([X.shape[0] - X.shape[1]] * t.shape[1]) + p = 2 * (1 - t_dist.cdf(np.abs(t), df)) # ARMA regression - elif mode == 'arma': + elif mode == "arma": if sm is None: - raise ImportError("statsmodels>=0.9.0 is required for ARMA regression. Please install this package manually or install nltools with optional arguments: pip install 'nltools[arma]'") - n_jobs = kwargs.pop('n_jobs', -1) - backend = kwargs.pop('backend', 'threading') - max_nbytes = kwargs.pop('max_nbytes', 1e8) - verbose = kwargs.pop('verbose', 0) + raise ImportError( + "statsmodels>=0.9.0 is required for ARMA regression. Please install this package manually or install nltools with optional arguments: pip install 'nltools[arma]'" + ) + n_jobs = kwargs.pop("n_jobs", -1) + backend = kwargs.pop("backend", "threading") + max_nbytes = kwargs.pop("max_nbytes", 1e8) + verbose = kwargs.pop("verbose", 0) # Parallelize if Y vector contains more than 1 column if len(Y.shape) == 2: - if backend == 'threading' and n_jobs == -1: + if backend == "threading" and n_jobs == -1: n_jobs = 10 - par_for = Parallel(n_jobs=n_jobs, verbose=verbose, backend=backend, max_nbytes=max_nbytes) - out_arma = par_for(delayed(_arma_func)(X, Y, idx=i, **kwargs) for i in range(Y.shape[-1])) + par_for = Parallel( + n_jobs=n_jobs, verbose=verbose, backend=backend, max_nbytes=max_nbytes + ) + out_arma = par_for( + delayed(_arma_func)(X, Y, idx=i, **kwargs) for i in range(Y.shape[-1]) + ) b = np.column_stack([elem[0] for elem in out_arma]) t = np.column_stack([elem[1] for elem in out_arma]) @@ -1014,7 +1116,9 @@ def regress(X, Y, mode='ols', stats='full', **kwargs): return b.squeeze(), t.squeeze(), p.squeeze(), df.squeeze(), res.squeeze() -def regress_permutation(X, Y, n_permute=5000, tail=2, random_state=None, verbose=False, **kwargs): +def regress_permutation( + X, Y, n_permute=5000, tail=2, random_state=None, verbose=False, **kwargs +): """ Permuted regression. Permute the design matrix each time by shuffling rows before running the estimation. @@ -1029,7 +1133,7 @@ def regress_permutation(X, Y, n_permute=5000, tail=2, random_state=None, verbose """ random_state = check_random_state(random_state) - b, t = regress(X, Y, stats='tstats', **kwargs) + b, t = regress(X, Y, stats="tstats", **kwargs) p = np.zeros_like(t) if tail == 1: pos_mask = np.where(t >= 0) @@ -1037,10 +1141,12 @@ def regress_permutation(X, Y, n_permute=5000, tail=2, random_state=None, verbose elif tail != 2: raise ValueError("tail must be 1 or 2") - if (X.shape[1] == 1) and (all(X[:].values == 1.)): + if (X.shape[1] == 1) and (all(X[:].values == 1.0)): if verbose: print("Running 1-sample sign flip test") - func = lambda x: (x.squeeze() * random_state.choice([1, -1], x.shape[0]))[:, np.newaxis] + func = lambda x: (x.squeeze() * random_state.choice([1, -1], x.shape[0]))[ + :, np.newaxis + ] else: if verbose: print("Running permuted OLS") @@ -1050,7 +1156,7 @@ def regress_permutation(X, Y, n_permute=5000, tail=2, random_state=None, verbose # inv = np.linalg.pinv(X) for _ in range(n_permute): - _, _t = regress(func(X.values), Y, stats='tstats', **kwargs) + _, _t = regress(func(X.values), Y, stats="tstats", **kwargs) if tail == 2: p += np.abs(_t) >= np.abs(t) elif tail == 1: @@ -1063,90 +1169,93 @@ def regress_permutation(X, Y, n_permute=5000, tail=2, random_state=None, verbose return b, t, p -def align(data, method='deterministic_srm', n_features=None, axis=0, - *args, **kwargs): - ''' Align subject data into a common response model. +def align(data, method="deterministic_srm", n_features=None, axis=0, *args, **kwargs): + """Align subject data into a common response model. - Can be used to hyperalign source data to target data using - Hyperalignment from Dartmouth (i.e., procrustes transformation; see - nltools.stats.procrustes) or Shared Response Model from Princeton (see - nltools.external.srm). (see nltools.data.Brain_Data.align for aligning - a single Brain object to another). Common Model is shared response - model or centered target data. Transformed data can be back projected to - original data using Tranformation matrix. Inputs must be a list of Brain_Data - instances or numpy arrays (observations by features). + Can be used to hyperalign source data to target data using + Hyperalignment from Dartmouth (i.e., procrustes transformation; see + nltools.stats.procrustes) or Shared Response Model from Princeton (see + nltools.external.srm). (see nltools.data.Brain_Data.align for aligning + a single Brain object to another). Common Model is shared response + model or centered target data. Transformed data can be back projected to + original data using Tranformation matrix. Inputs must be a list of Brain_Data + instances or numpy arrays (observations by features). - Examples: - Hyperalign using procrustes transform: - out = align(data, method='procrustes') + Examples: + Hyperalign using procrustes transform: + out = align(data, method='procrustes') - Align using shared response model: - out = align(data, method='probabilistic_srm', n_features=None) + Align using shared response model: + out = align(data, method='probabilistic_srm', n_features=None) - Project aligned data into original data: - original_data = [np.dot(t.data,tm.T) for t,tm in zip(out['transformed'], out['transformation_matrix'])] + Project aligned data into original data: + original_data = [np.dot(t.data,tm.T) for t,tm in zip(out['transformed'], out['transformation_matrix'])] - Args: - data: (list) A list of Brain_Data objects - method: (str) alignment method to use - ['probabilistic_srm','deterministic_srm','procrustes'] - n_features: (int) number of features to align to common space. - If None then will select number of voxels - axis: (int) axis to align on + Args: + data: (list) A list of Brain_Data objects + method: (str) alignment method to use + ['probabilistic_srm','deterministic_srm','procrustes'] + n_features: (int) number of features to align to common space. + If None then will select number of voxels + axis: (int) axis to align on - Returns: - out: (dict) a dictionary containing a list of transformed subject - matrices, a list of transformation matrices, the shared - response matrix, and the intersubject correlation of the shared resposnes + Returns: + out: (dict) a dictionary containing a list of transformed subject + matrices, a list of transformation matrices, the shared + response matrix, and the intersubject correlation of the shared resposnes - ''' + """ from nltools.data import Brain_Data, Adjacency if not isinstance(data, list): - raise ValueError('Make sure you are inputting data is a list.') + raise ValueError("Make sure you are inputting data is a list.") if not all(type(x) for x in data): - raise ValueError('Make sure all objects in the list are the same type.') - if method not in ['probabilistic_srm', 'deterministic_srm', 'procrustes']: - raise ValueError("Method must be ['probabilistic_srm','deterministic_srm','procrustes']") + raise ValueError("Make sure all objects in the list are the same type.") + if method not in ["probabilistic_srm", "deterministic_srm", "procrustes"]: + raise ValueError( + "Method must be ['probabilistic_srm','deterministic_srm','procrustes']" + ) data = deepcopy(data) if isinstance(data[0], Brain_Data): - data_type = 'Brain_Data' + data_type = "Brain_Data" data_out = [x.copy() for x in data] transformation_out = [x.copy() for x in data] data = [x.data.T for x in data] elif isinstance(data[0], np.ndarray): - data_type = 'numpy' + data_type = "numpy" data = [x.T for x in data] else: - raise ValueError('Type %s is not implemented yet.' % type(data[0])) + raise ValueError("Type %s is not implemented yet." % type(data[0])) # Align over time or voxels if axis == 1: data = [x.T for x in data] elif axis != 0: - raise ValueError('axis must be 0 or 1.') + raise ValueError("axis must be 0 or 1.") out = {} - if method in ['deterministic_srm', 'probabilistic_srm']: + if method in ["deterministic_srm", "probabilistic_srm"]: if n_features is None: n_features = int(data[0].shape[0]) - if method == 'deterministic_srm': + if method == "deterministic_srm": srm = DetSRM(features=n_features, *args, **kwargs) - elif method == 'probabilistic_srm': + elif method == "probabilistic_srm": srm = SRM(features=n_features, *args, **kwargs) srm.fit(data) - out['transformed'] = [x for x in srm.transform(data)] - out['common_model'] = srm.s_.T - out['transformation_matrix'] = srm.w_ + out["transformed"] = [x for x in srm.transform(data)] + out["common_model"] = srm.s_.T + out["transformation_matrix"] = srm.w_ - elif method == 'procrustes': + elif method == "procrustes": if n_features is not None: - raise NotImplementedError('Currently must use all voxels.' - 'Eventually will add a PCA reduction,' - 'must do this manually for now.') + raise NotImplementedError( + "Currently must use all voxels." + "Eventually will add a PCA reduction," + "must do this manually for now." + ) ## STEP 0: STANDARDIZE SIZE AND SHAPE## sizes_0 = [x.shape[0] for x in data] sizes_1 = [x.shape[1] for x in data] @@ -1171,7 +1280,7 @@ def align(data, method='deterministic_srm', n_features=None, axis=0, # use first data as template template = np.copy(x.T) else: - _, trans, _, _, _ = procrustes(template/i, x.T) + _, trans, _, _, _ = procrustes(template / i, x.T) template += trans template /= len(m) @@ -1195,49 +1304,58 @@ def align(data, method='deterministic_srm', n_features=None, axis=0, transformation_matrix.append(t) disparity.append(d) scale.append(s) - out['transformed'] = aligned - out['common_model'] = common - out['transformation_matrix'] = transformation_matrix - out['disparity'] = disparity - out['scale'] = scale + out["transformed"] = aligned + out["common_model"] = common + out["transformation_matrix"] = transformation_matrix + out["disparity"] = disparity + out["scale"] = scale if axis == 1: - out['transformed'] = [x.T for x in out['transformed']] - out['common_model'] = out['common_model'].T + out["transformed"] = [x.T for x in out["transformed"]] + out["common_model"] = out["common_model"].T - if data_type == 'Brain_Data': - out['transformation_matrix'] = [x.T for x in out['transformation_matrix']] + if data_type == "Brain_Data": + out["transformation_matrix"] = [x.T for x in out["transformation_matrix"]] # Calculate Intersubject correlation on aligned components if n_features is None: - n_features = out['common_model'].shape[1] + n_features = out["common_model"].shape[1] a = Adjacency() for f in range(n_features): - a = a.append(Adjacency(1-pairwise_distances(np.array([x[f,:] for x in out['transformed']]), metric='correlation'), metric='similarity')) - out['isc'] = dict(zip(np.arange(n_features), a.mean(axis=1))) - - if data_type == 'Brain_Data': - if method == 'procrustes': - for i, x in enumerate(out['transformed']): + a = a.append( + Adjacency( + 1 + - pairwise_distances( + np.array([x[f, :] for x in out["transformed"]]), + metric="correlation", + ), + metric="similarity", + ) + ) + out["isc"] = dict(zip(np.arange(n_features), a.mean(axis=1))) + + if data_type == "Brain_Data": + if method == "procrustes": + for i, x in enumerate(out["transformed"]): data_out[i].data = x.T - out['transformed'] = data_out + out["transformed"] = data_out common = data_out[0].copy() - common.data = out['common_model'] - out['common_model'] = common + common.data = out["common_model"] + out["common_model"] = common else: - out['transformed'] = [x.T for x in out['transformed']] + out["transformed"] = [x.T for x in out["transformed"]] - for i,x in enumerate(out['transformation_matrix']): + for i, x in enumerate(out["transformation_matrix"]): transformation_out[i].data = x.T - out['transformation_matrix'] = transformation_out + out["transformation_matrix"] = transformation_out return out def procrustes(data1, data2): - '''Procrustes analysis, a similarity test for two data sets. - + """Procrustes analysis, a similarity test for two data sets. + Each input matrix is a set of points or vectors (the rows of the matrix). The dimension of the space is the number of columns of each matrix. Given two identically sized matrices, procrustes standardizes both such that: @@ -1276,7 +1394,7 @@ def procrustes(data1, data2): dot(R.T, R) == I. scale : float Sum of the singular values of ``dot(data1.T, data2)``. - ''' + """ mtx1 = np.array(data1, dtype=np.double, copy=True) mtx2 = np.array(data2, dtype=np.double, copy=True) @@ -1290,9 +1408,13 @@ def procrustes(data1, data2): if mtx1.shape[1] != mtx2.shape[1]: # Pad with zeros if mtx1.shape[1] > mtx2.shape[1]: - mtx2 = np.append(mtx2, np.zeros((mtx1.shape[0], mtx1.shape[1] - mtx2.shape[1])), axis=1) + mtx2 = np.append( + mtx2, np.zeros((mtx1.shape[0], mtx1.shape[1] - mtx2.shape[1])), axis=1 + ) else: - mtx1 = np.append(mtx1, np.zeros((mtx1.shape[0], mtx2.shape[1] - mtx1.shape[1])), axis=1) + mtx1 = np.append( + mtx1, np.zeros((mtx1.shape[0], mtx2.shape[1] - mtx1.shape[1])), axis=1 + ) # translate all the data to the origin mtx1 -= np.mean(mtx1, 0) @@ -1319,7 +1441,7 @@ def procrustes(data1, data2): def double_center(mat): - '''Double center a 2d array. + """Double center a 2d array. Args: mat (ndarray): 2d numpy array @@ -1327,10 +1449,10 @@ def double_center(mat): Returns: mat (ndarray): double-centered version of input - ''' + """ if len(mat.shape) != 2: - raise ValueError('Array should be 2d') + raise ValueError("Array should be 2d") # keepdims ensures that row/column means are not incorrectly broadcast during subtraction row_mean = mat.mean(axis=0, keepdims=True) @@ -1340,17 +1462,17 @@ def double_center(mat): def u_center(mat): - '''U-center a 2d array. U-centering is a bias-corrected form of double-centering + """U-center a 2d array. U-centering is a bias-corrected form of double-centering Args: mat (ndarray): 2d numpy array Returns: mat (narray): u-centered version of input - ''' + """ if len(mat.shape) != 2: - raise ValueError('Array should be 2d') + raise ValueError("Array should be 2d") dim = mat.shape[0] u_mu = mat.sum() / ((dim - 1) * (dim - 2)) @@ -1369,7 +1491,7 @@ def u_center(mat): def distance_correlation(x, y, bias_corrected=True, ttest=False): - ''' + """ Compute the distance correlation betwen 2 arrays to test for multivariate dependence (linear or non-linear). Arrays must match on their first dimension. It's almost always preferable to compute the bias_corrected version which can also optionally perform a ttest. This ttest operates on a statistic thats ~dcorr^2 and will be also returned. Explanation: @@ -1385,7 +1507,7 @@ def distance_correlation(x, y, bias_corrected=True, ttest=False): Returns: results (dict): dictionary of results (correlation, t, p, and df.) Optionally, covariance, x variance, and y variance - ''' + """ if len(x.shape) > 2 or len(y.shape) > 2: raise ValueError("Both arrays must be 1d or 2d") @@ -1432,24 +1554,26 @@ def distance_correlation(x, y, bias_corrected=True, ttest=False): if dcor < 0: # This will only apply in the bias_corrected case as values can be < 0 - out['dcorr'] = 0 + out["dcorr"] = 0 else: - out['dcorr'] = np.sqrt(dcor) + out["dcorr"] = np.sqrt(dcor) if bias_corrected: - out['dcorr_squared'] = dcor + out["dcorr_squared"] = dcor if ttest: dof = (adjusted_n / 2) - 1 - t = np.sqrt(dof) * (dcor / np.sqrt(1 - dcor**2)) + t = np.sqrt(dof) * (dcor / np.sqrt(1 - dcor ** 2)) p = 1 - t_dist.cdf(t, dof) - out['t'] = t - out['p'] = p - out['df'] = dof + out["t"] = t + out["p"] = p + out["df"] = dof return out -def procrustes_distance(mat1, mat2, n_permute=5000, tail=2, n_jobs=-1, random_state=None): - """ Use procrustes super-position to perform a similarity test between 2 matrices. Matrices need to match in size on their first dimension only, as the smaller matrix on the second dimension will be padded with zeros. After aligning two matrices using the procrustes transformation, use the computed disparity between them (sum of squared error of elements) as a similarity metric. Shuffle the rows of one of the matrices and recompute the disparity to perform inference (Peres-Neto & Jackson, 2001). +def procrustes_distance( + mat1, mat2, n_permute=5000, tail=2, n_jobs=-1, random_state=None +): + """Use procrustes super-position to perform a similarity test between 2 matrices. Matrices need to match in size on their first dimension only, as the smaller matrix on the second dimension will be padded with zeros. After aligning two matrices using the procrustes transformation, use the computed disparity between them (sum of squared error of elements) as a similarity metric. Shuffle the rows of one of the matrices and recompute the disparity to perform inference (Peres-Neto & Jackson, 2001). Args: mat1 (ndarray): 2d numpy array; must have same number of rows as mat2 @@ -1464,9 +1588,9 @@ def procrustes_distance(mat1, mat2, n_permute=5000, tail=2, n_jobs=-1, random_st """ - #raise NotImplementedError("procrustes distance is not currently implemented") + # raise NotImplementedError("procrustes distance is not currently implemented") if mat1.shape[0] != mat2.shape[0]: - raise ValueError('Both arrays must match on their first dimension') + raise ValueError("Both arrays must match on their first dimension") random_state = check_random_state(random_state) @@ -1476,37 +1600,41 @@ def procrustes_distance(mat1, mat2, n_permute=5000, tail=2, n_jobs=-1, random_st if len(mat2.shape) < 2: mat2 = mat2[:, np.newaxis] if mat1.shape[1] > mat2.shape[1]: - mat2 = np.pad(mat2, ((0, 0), (0, mat1.shape[1] - mat2.shape[1])), 'constant') + mat2 = np.pad(mat2, ((0, 0), (0, mat1.shape[1] - mat2.shape[1])), "constant") elif mat2.shape[1] > mat1.shape[1]: - mat1 = np.pad(mat1, ((0, 0), (0, mat2.shape[1] - mat1.shape[1])), 'constant') + mat1 = np.pad(mat1, ((0, 0), (0, mat2.shape[1] - mat1.shape[1])), "constant") _, _, sse = procrust(mat1, mat2) - stats = {'similarity': sse} - all_p = Parallel(n_jobs=n_jobs)(delayed(procrust)(random_state.permutation(mat1), mat2) for i in range(n_permute)) + stats = {"similarity": sse} + all_p = Parallel(n_jobs=n_jobs)( + delayed(procrust)(random_state.permutation(mat1), mat2) + for i in range(n_permute) + ) all_p = [1 - x[2] for x in all_p] - stats['p'] = _calc_pvalue(all_p, sse, tail) + stats["p"] = _calc_pvalue(all_p, sse, tail) return stats + def find_spikes(data, global_spike_cutoff=3, diff_spike_cutoff=3): - '''Function to identify spikes from fMRI Time Series Data - - Args: - data: Brain_Data or nibabel instance - global_spike_cutoff: (int,None) cutoff to identify spikes in global signal - in standard deviations, None indicates do not calculate. - diff_spike_cutoff: (int,None) cutoff to identify spikes in average frame difference - in standard deviations, None indicates do not calculate. - Returns: - pandas dataframe with spikes as indicator variables - ''' + """Function to identify spikes from fMRI Time Series Data + + Args: + data: Brain_Data or nibabel instance + global_spike_cutoff: (int,None) cutoff to identify spikes in global signal + in standard deviations, None indicates do not calculate. + diff_spike_cutoff: (int,None) cutoff to identify spikes in average frame difference + in standard deviations, None indicates do not calculate. + Returns: + pandas dataframe with spikes as indicator variables + """ from nltools.data import Brain_Data if (global_spike_cutoff is None) & (diff_spike_cutoff is None): - raise ValueError('Did not input any cutoffs to identify spikes in this data.') + raise ValueError("Did not input any cutoffs to identify spikes in this data.") if isinstance(data, Brain_Data): data = deepcopy(data.data) @@ -1517,67 +1645,83 @@ def find_spikes(data, global_spike_cutoff=3, diff_spike_cutoff=3): if len(data.shape) > 3: data = np.squeeze(data) elif len(data.shape) < 3: - raise ValueError('nibabel instance does not appear to be 4D data.') - global_mn = np.mean(data, axis=(0,1,2)) - frame_diff = np.mean(np.abs(np.diff(data, axis=3)), axis=(0,1,2)) + raise ValueError("nibabel instance does not appear to be 4D data.") + global_mn = np.mean(data, axis=(0, 1, 2)) + frame_diff = np.mean(np.abs(np.diff(data, axis=3)), axis=(0, 1, 2)) else: - raise ValueError('Currently this function can only accomodate Brain_Data and nibabel instances') + raise ValueError( + "Currently this function can only accomodate Brain_Data and nibabel instances" + ) if global_spike_cutoff is not None: - global_outliers = np.append(np.where(global_mn > np.mean(global_mn) + np.std(global_mn) * global_spike_cutoff), - np.where(global_mn < np.mean(global_mn) - np.std(global_mn) * global_spike_cutoff)) + global_outliers = np.append( + np.where( + global_mn > np.mean(global_mn) + np.std(global_mn) * global_spike_cutoff + ), + np.where( + global_mn < np.mean(global_mn) - np.std(global_mn) * global_spike_cutoff + ), + ) if diff_spike_cutoff is not None: - frame_outliers = np.append(np.where(frame_diff > np.mean(frame_diff) + np.std(frame_diff) * diff_spike_cutoff), - np.where(frame_diff < np.mean(frame_diff) - np.std(frame_diff) * diff_spike_cutoff)) - # build spike regressors - outlier = pd.DataFrame([x+1 for x in range(len(global_mn))],columns=['TR']) - if (global_spike_cutoff is not None): + frame_outliers = np.append( + np.where( + frame_diff + > np.mean(frame_diff) + np.std(frame_diff) * diff_spike_cutoff + ), + np.where( + frame_diff + < np.mean(frame_diff) - np.std(frame_diff) * diff_spike_cutoff + ), + ) + # build spike regressors + outlier = pd.DataFrame([x + 1 for x in range(len(global_mn))], columns=["TR"]) + if global_spike_cutoff is not None: for i, loc in enumerate(global_outliers): - outlier['global_spike' + str(i + 1)] = 0 - outlier['global_spike' + str(i + 1)].iloc[int(loc)] = 1 + outlier["global_spike" + str(i + 1)] = 0 + outlier["global_spike" + str(i + 1)].iloc[int(loc)] = 1 # build FD regressors - if (diff_spike_cutoff is not None): + if diff_spike_cutoff is not None: for i, loc in enumerate(frame_outliers): - outlier['diff_spike' + str(i + 1)] = 0 - outlier['diff_spike' + str(i + 1)].iloc[int(loc)] = 1 + outlier["diff_spike" + str(i + 1)] = 0 + outlier["diff_spike" + str(i + 1)].iloc[int(loc)] = 1 return outlier def phase_randomize(data, random_state=None): - '''Perform phase randomization on time-series signal - - This procedure preserves the power spectrum/autocorrelation, - but destroys any nonlinear behavior. Based on the algorithm - described in: - - Theiler, J., Galdrikian, B., Longtin, A., Eubank, S., & Farmer, J. D. (1991). - Testing for nonlinearity in time series: the method of surrogate data - (No. LA-UR-91-3343; CONF-9108181-1). Los Alamos National Lab., NM (United States). - - Lancaster, G., Iatsenko, D., Pidde, A., Ticcinelli, V., & Stefanovska, A. (2018). - Surrogate data for hypothesis testing of physical systems. Physics Reports, 748, 1-60. - - 1. Calculate the Fourier transform ftx of the original signal xn. - 2. Generate a vector of random phases in the range[0, 2π]) with - length L/2,where L is the length of the time series. - 3. As the Fourier transform is symmetrical, to create the new phase - randomized vector ftr , multiply the first half of ftx (i.e.the half - corresponding to the positive frequencies) by exp(iφr) to create the - first half of ftr.The remainder of ftr is then the horizontally flipped - complex conjugate of the first half. - 4. Finally, the inverse Fourier transform of ftr gives the FT surrogate. - - Args: - - data: (np.array) data (can be 1d or 2d, time by features) - random_state: (int, None, or np.random.RandomState) Initial random seed (default: None) - - Returns: - - shifted_data: (np.array) phase randomized data - ''' + """Perform phase randomization on time-series signal + + This procedure preserves the power spectrum/autocorrelation, + but destroys any nonlinear behavior. Based on the algorithm + described in: + + Theiler, J., Galdrikian, B., Longtin, A., Eubank, S., & Farmer, J. D. (1991). + Testing for nonlinearity in time series: the method of surrogate data + (No. LA-UR-91-3343; CONF-9108181-1). Los Alamos National Lab., NM (United States). + + Lancaster, G., Iatsenko, D., Pidde, A., Ticcinelli, V., & Stefanovska, A. (2018). + Surrogate data for hypothesis testing of physical systems. Physics Reports, 748, 1-60. + + 1. Calculate the Fourier transform ftx of the original signal xn. + 2. Generate a vector of random phases in the range[0, 2π]) with + length L/2,where L is the length of the time series. + 3. As the Fourier transform is symmetrical, to create the new phase + randomized vector ftr , multiply the first half of ftx (i.e.the half + corresponding to the positive frequencies) by exp(iφr) to create the + first half of ftr.The remainder of ftr is then the horizontally flipped + complex conjugate of the first half. + 4. Finally, the inverse Fourier transform of ftr gives the FT surrogate. + + Args: + + data: (np.array) data (can be 1d or 2d, time by features) + random_state: (int, None, or np.random.RandomState) Initial random seed (default: None) + + Returns: + + shifted_data: (np.array) phase randomized data + """ random_state = check_random_state(random_state) data = np.array(data) @@ -1589,150 +1733,181 @@ def phase_randomize(data, random_state=None): else: pos_freq = np.arange(1, (data.shape[0] - 1) // 2 + 1) neg_freq = np.arange(data.shape[0] - 1, (data.shape[0] - 1) // 2, -1) - + if len(data.shape) == 1: - phase_shifts = random_state.uniform(0, 2*np.pi, size=(len(pos_freq))) + phase_shifts = random_state.uniform(0, 2 * np.pi, size=(len(pos_freq))) fft_data[pos_freq] *= np.exp(1j * phase_shifts) fft_data[neg_freq] *= np.exp(-1j * phase_shifts) else: - phase_shifts = random_state.uniform(0, 2*np.pi, size=(len(pos_freq), data.shape[1])) + phase_shifts = random_state.uniform( + 0, 2 * np.pi, size=(len(pos_freq), data.shape[1]) + ) fft_data[pos_freq, :] *= np.exp(1j * phase_shifts) fft_data[neg_freq, :] *= np.exp(-1j * phase_shifts) return np.real(ifft(fft_data, axis=0)) + def circle_shift(data, random_state=None): - '''Circle shift data for each feature - + """Circle shift data for each feature + Args: - + data: time series (1D or 2D). If 2D, then must be observations by features random_state: (int, None, or np.random.RandomState) Initial random seed (default: None) - + Returns: - + shifted data - - ''' + + """ random_state = check_random_state(random_state) data = np.array(data) if len(data.shape) == 1: shift = random_state.choice(np.arange(len(data)), replace=False) shifted = np.concatenate((data[-shift:], data[:-shift])) else: - shift = random_state.choice(np.arange(data.shape[0]), size=data.shape[1], replace=False) - shifted = np.array([np.concatenate([data[-int(s):, int(d)], data[:-int(s), int(d)]]) for d,s in zip(range(data.shape[1]), shift)]).T + shift = random_state.choice( + np.arange(data.shape[0]), size=data.shape[1], replace=False + ) + shifted = np.array( + [ + np.concatenate([data[-int(s) :, int(d)], data[: -int(s), int(d)]]) + for d, s in zip(range(data.shape[1]), shift) + ] + ).T return shifted - -def _bootstrap_isc(similarity_matrix, metric='median', exclude_self_corr=True, random_state=None): - '''Helper function to compute bootstrapped ISC from Adjacency Instance - - This function implements the subject-wise bootstrap method discussed in Chen et al., 2016. - - Chen, G., Shin, Y. W., Taylor, P. A., Glen, D. R., Reynolds, R. C., Israel, R. B., - & Cox, R. W. (2016). Untangling the relatedness among correlations, part I: - nonparametric approaches to inter-subject correlation analysis at the group level. - NeuroImage, 142, 248-259. - - Args: - - similarity_matrix: (Adjacency) Adjacency matrix of pairwise correlation values - metric: (str) type of summary statistic (Default: median) - exclude_self_corr: (bool) set correlations with random draws of same subject to NaN (Default: True) - random_state: random_state instance for permutation - - Returns: - - isc: summary statistic of bootstrapped similarity matrix - - ''' + + +def _bootstrap_isc( + similarity_matrix, metric="median", exclude_self_corr=True, random_state=None +): + """Helper function to compute bootstrapped ISC from Adjacency Instance + + This function implements the subject-wise bootstrap method discussed in Chen et al., 2016. + + Chen, G., Shin, Y. W., Taylor, P. A., Glen, D. R., Reynolds, R. C., Israel, R. B., + & Cox, R. W. (2016). Untangling the relatedness among correlations, part I: + nonparametric approaches to inter-subject correlation analysis at the group level. + NeuroImage, 142, 248-259. + + Args: + + similarity_matrix: (Adjacency) Adjacency matrix of pairwise correlation values + metric: (str) type of summary statistic (Default: median) + exclude_self_corr: (bool) set correlations with random draws of same subject to NaN (Default: True) + random_state: random_state instance for permutation + + Returns: + + isc: summary statistic of bootstrapped similarity matrix + + """ from nltools.data import Adjacency if not isinstance(similarity_matrix, Adjacency): - raise ValueError('similarity_matrix must be an Adjacency instance.') - + raise ValueError("similarity_matrix must be an Adjacency instance.") + random_state = check_random_state(random_state) square = similarity_matrix.squareform() n_sub = square.shape[0] np.fill_diagonal(square, 1) - - bootstrap_subject = sorted(random_state.choice(np.arange(n_sub), size=n_sub, replace=True)) - bootstrap_sample = Adjacency(square[bootstrap_subject, :][:, bootstrap_subject], matrix_type='similarity') - + + bootstrap_subject = sorted( + random_state.choice(np.arange(n_sub), size=n_sub, replace=True) + ) + bootstrap_sample = Adjacency( + square[bootstrap_subject, :][:, bootstrap_subject], matrix_type="similarity" + ) + if exclude_self_corr: bootstrap_sample.data[bootstrap_sample.data == 1] = np.nan - if metric == 'mean': + if metric == "mean": return np.tanh(bootstrap_sample.r_to_z().mean()) - elif metric == 'median': + elif metric == "median": return bootstrap_sample.median() -def _compute_isc(data, metric='median'): - ''' Helper function to compute intersubject correlation from observations by subjects array. - - Args: - data: (pd.DataFrame, np.array) observations by subjects where isc is computed across subjects - metric: (str) type of association metric ['spearman','pearson','kendall'] - - Returns: - isc: (float) intersubject correlation coefficient - - ''' + +def _compute_isc(data, metric="median"): + """Helper function to compute intersubject correlation from observations by subjects array. + + Args: + data: (pd.DataFrame, np.array) observations by subjects where isc is computed across subjects + metric: (str) type of association metric ['spearman','pearson','kendall'] + + Returns: + isc: (float) intersubject correlation coefficient + + """ from nltools.data import Adjacency - similarity = Adjacency(1 - pairwise_distances(data.T, metric='correlation'), matrix_type='similarity') - if metric =='mean': + similarity = Adjacency( + 1 - pairwise_distances(data.T, metric="correlation"), matrix_type="similarity" + ) + if metric == "mean": isc = np.tanh(similarity.r_to_z().mean()) - elif metric =='median': + elif metric == "median": isc = similarity.median() return isc -def isc(data, n_bootstraps=5000, metric='median', method='bootstrap', ci_percentile=95, exclude_self_corr=True, - return_bootstraps=False, tail=2, n_jobs=-1, random_state=None): - ''' Compute pairwise intersubject correlation from observations by subjects array. - - This function computes pairwise intersubject correlations (ISC) using the median as recommended by Chen - et al., 2016). However, if the mean is preferred, we compute the mean correlation after performing - the fisher r-to-z transformation and then convert back to correlations to minimize artificially - inflating the correlation values. - - There are currently three different methods to compute p-values. These include the classic methods for - computing permuted time-series by either circle-shifting the data or phase-randomizing the data - (see Lancaster et al., 2018). These methods create random surrogate data while preserving the temporal - autocorrelation inherent to the signal. By default, we use the subject-wise bootstrap method from - Chen et al., 2016. Instead of recomputing the pairwise ISC using circle_shift or phase_randomization methods, - this approach uses the computationally more efficient method of bootstrapping the subjects - and computing a new pairwise similarity matrix with randomly selected subjects with replacement. - If the same subject is selected multiple times, we set the perfect correlation to a nan with - (exclude_self_corr=True). We compute the p-values using the percentile method using the same - method in Brainiak. - - Chen, G., Shin, Y. W., Taylor, P. A., Glen, D. R., Reynolds, R. C., Israel, R. B., - & Cox, R. W. (2016). Untangling the relatedness among correlations, part I: - nonparametric approaches to inter-subject correlation analysis at the group level. - NeuroImage, 142, 248-259. - - Hall, P., & Wilson, S. R. (1991). Two guidelines for bootstrap hypothesis testing. - Biometrics, 757-762. - - Lancaster, G., Iatsenko, D., Pidde, A., Ticcinelli, V., & Stefanovska, A. (2018). - Surrogate data for hypothesis testing of physical systems. Physics Reports, 748, 1-60. - - Args: - data: (pd.DataFrame, np.array) observations by subjects where isc is computed across subjects - n_bootstraps: (int) number of bootstraps - metric: (str) type of association metric ['spearman','pearson','kendall'] - method: (str) method to compute p-values ['bootstrap', 'circle_shift','phase_randomize'] (default: bootstrap) - tail: (int) either 1 for one-tail or 2 for two-tailed test (default: 2) - n_jobs: (int) The number of CPUs to use to do the computation. -1 means all CPUs. - return_parms: (bool) Return the permutation distribution along with the p-value; default False - - Returns: - stats: (dict) dictionary of permutation results ['correlation','p'] - - ''' - + +def isc( + data, + n_bootstraps=5000, + metric="median", + method="bootstrap", + ci_percentile=95, + exclude_self_corr=True, + return_bootstraps=False, + tail=2, + n_jobs=-1, + random_state=None, +): + """Compute pairwise intersubject correlation from observations by subjects array. + + This function computes pairwise intersubject correlations (ISC) using the median as recommended by Chen + et al., 2016). However, if the mean is preferred, we compute the mean correlation after performing + the fisher r-to-z transformation and then convert back to correlations to minimize artificially + inflating the correlation values. + + There are currently three different methods to compute p-values. These include the classic methods for + computing permuted time-series by either circle-shifting the data or phase-randomizing the data + (see Lancaster et al., 2018). These methods create random surrogate data while preserving the temporal + autocorrelation inherent to the signal. By default, we use the subject-wise bootstrap method from + Chen et al., 2016. Instead of recomputing the pairwise ISC using circle_shift or phase_randomization methods, + this approach uses the computationally more efficient method of bootstrapping the subjects + and computing a new pairwise similarity matrix with randomly selected subjects with replacement. + If the same subject is selected multiple times, we set the perfect correlation to a nan with + (exclude_self_corr=True). We compute the p-values using the percentile method using the same + method in Brainiak. + + Chen, G., Shin, Y. W., Taylor, P. A., Glen, D. R., Reynolds, R. C., Israel, R. B., + & Cox, R. W. (2016). Untangling the relatedness among correlations, part I: + nonparametric approaches to inter-subject correlation analysis at the group level. + NeuroImage, 142, 248-259. + + Hall, P., & Wilson, S. R. (1991). Two guidelines for bootstrap hypothesis testing. + Biometrics, 757-762. + + Lancaster, G., Iatsenko, D., Pidde, A., Ticcinelli, V., & Stefanovska, A. (2018). + Surrogate data for hypothesis testing of physical systems. Physics Reports, 748, 1-60. + + Args: + data: (pd.DataFrame, np.array) observations by subjects where isc is computed across subjects + n_bootstraps: (int) number of bootstraps + metric: (str) type of association metric ['spearman','pearson','kendall'] + method: (str) method to compute p-values ['bootstrap', 'circle_shift','phase_randomize'] (default: bootstrap) + tail: (int) either 1 for one-tail or 2 for two-tailed test (default: 2) + n_jobs: (int) The number of CPUs to use to do the computation. -1 means all CPUs. + return_parms: (bool) Return the permutation distribution along with the p-value; default False + + Returns: + stats: (dict) dictionary of permutation results ['correlation','p'] + + """ + from nltools.data import Adjacency random_state = check_random_state(random_state) @@ -1740,91 +1915,117 @@ def isc(data, n_bootstraps=5000, metric='median', method='bootstrap', ci_percent if not isinstance(data, (pd.DataFrame, np.ndarray)): raise ValueError("data must be a pandas dataframe or numpy array") - if metric not in ['mean', 'median']: + if metric not in ["mean", "median"]: raise ValueError("metric must be ['mean', 'median']") - - stats = {'isc': _compute_isc(data, metric=metric)} - - similarity = Adjacency(1 - pairwise_distances(data.T, metric='correlation'), matrix_type='similarity') - - if method == 'bootstrap': - all_bootstraps = Parallel(n_jobs=n_jobs)(delayed(_bootstrap_isc)( - similarity, metric=metric, exclude_self_corr=exclude_self_corr, - random_state=random_state) for i in range(n_bootstraps)) - stats['p'] = _calc_pvalue(all_bootstraps - stats['isc'], stats['isc'], tail) - - elif method == 'circle_shift': - all_bootstraps = Parallel(n_jobs=n_jobs)(delayed(_compute_isc)( - circle_shift(data, random_state=random_state), metric=metric) - for i in range(n_bootstraps)) - stats['p'] = _calc_pvalue(all_bootstraps, stats['isc'], tail) - elif method == 'phase_randomize': - all_bootstraps = Parallel(n_jobs=n_jobs)(delayed(_compute_isc)( - phase_randomize(data, random_state=random_state), metric=metric) - for i in range(n_bootstraps)) - stats['p'] = _calc_pvalue(all_bootstraps, stats['isc'], tail) + stats = {"isc": _compute_isc(data, metric=metric)} + + similarity = Adjacency( + 1 - pairwise_distances(data.T, metric="correlation"), matrix_type="similarity" + ) + + if method == "bootstrap": + all_bootstraps = Parallel(n_jobs=n_jobs)( + delayed(_bootstrap_isc)( + similarity, + metric=metric, + exclude_self_corr=exclude_self_corr, + random_state=random_state, + ) + for i in range(n_bootstraps) + ) + stats["p"] = _calc_pvalue(all_bootstraps - stats["isc"], stats["isc"], tail) + + elif method == "circle_shift": + all_bootstraps = Parallel(n_jobs=n_jobs)( + delayed(_compute_isc)( + circle_shift(data, random_state=random_state), metric=metric + ) + for i in range(n_bootstraps) + ) + stats["p"] = _calc_pvalue(all_bootstraps, stats["isc"], tail) + elif method == "phase_randomize": + all_bootstraps = Parallel(n_jobs=n_jobs)( + delayed(_compute_isc)( + phase_randomize(data, random_state=random_state), metric=metric + ) + for i in range(n_bootstraps) + ) + stats["p"] = _calc_pvalue(all_bootstraps, stats["isc"], tail) else: - raise ValueError("method can only be ['bootstrap', 'circle_shift','phase_randomize']") - - stats['ci'] = (np.percentile(np.array(all_bootstraps), (100 - ci_percentile)/2, axis=0), - np.percentile(np.array(all_bootstraps), ci_percentile + (100 - ci_percentile)/2, axis=0)) + raise ValueError( + "method can only be ['bootstrap', 'circle_shift','phase_randomize']" + ) + + stats["ci"] = ( + np.percentile(np.array(all_bootstraps), (100 - ci_percentile) / 2, axis=0), + np.percentile( + np.array(all_bootstraps), ci_percentile + (100 - ci_percentile) / 2, axis=0 + ), + ) if return_bootstraps: - stats['null_distribution'] = all_bootstraps - + stats["null_distribution"] = all_bootstraps + return stats + def _compute_matrix_correlation(matrix1, matrix2): - '''Computes the intersubject functional correlation between 2 matrices (observation x feature)''' - return np.corrcoef(matrix1.T, matrix2.T)[matrix1.shape[1]:,:matrix2.shape[1]] + """Computes the intersubject functional correlation between 2 matrices (observation x feature)""" + return np.corrcoef(matrix1.T, matrix2.T)[matrix1.shape[1] :, : matrix2.shape[1]] + + +def isfc(data, method="average"): + """Compute intersubject functional connectivity (ISFC) from a list of observation x feature matrices -def isfc(data, method='average'): - '''Compute intersubject functional connectivity (ISFC) from a list of observation x feature matrices - This function uses the leave one out approach to compute ISFC (Simony et al., 2016). For each subject, compute the cross-correlation between each voxel/roi with the average of the rest of the subjects data. In other words, compute the mean voxel/ROI response for all participants except the target subject. Then compute the correlation between each ROI within the target subject with the mean ROI response in the group average. - + Simony, E., Honey, C. J., Chen, J., Lositsky, O., Yeshurun, Y., Wiesel, A., & Hasson, U. (2016). Dynamic reconfiguration of the default mode network during narrative comprehension. Nature communications, 7, 12141. - + Args: data: list of subject matrices (observations x voxels/rois) method: approach to computing ISFC. 'average' uses leave one - + Returns: list of subject ISFC matrices - - ''' + + """ subjects = np.arange(len(data)) - - if method == 'average': + + if method == "average": sub_isfc = [] for target in subjects: m1 = data[target] sub_mean = np.zeros(m1.shape) for y in (y for y in subjects if y != target): sub_mean += data[y] - sub_isfc.append(_compute_matrix_correlation(m1, sub_mean/(len(subjects)-1))) + sub_isfc.append( + _compute_matrix_correlation(m1, sub_mean / (len(subjects) - 1)) + ) else: - raise NotImplementedError('Only average method is implemented. Pairwise will be added at some point.') + raise NotImplementedError( + "Only average method is implemented. Pairwise will be added at some point." + ) return sub_isfc -def isps(data, sampling_freq=.5, low_cut=.04, high_cut=.07, order=5): - '''Compute Dynamic Intersubject Phase Synchrony (ISPS from a observation by subject array) - + +def isps(data, sampling_freq=0.5, low_cut=0.04, high_cut=0.07, order=5): + """Compute Dynamic Intersubject Phase Synchrony (ISPS from a observation by subject array) + This function computes the instantaneous intersubject phase synchrony for a single voxel/roi timeseries. Requires multiple subjects. This method is largely based on that described by Glerean et al., 2012 and performs a hilbert transform on narrow bandpass filtered timeseries (butterworth) data to get the instantaneous phase angle. The function returns a dictionary containing the average phase angle, the average vector length, and parametric p-values computed using the rayleigh test using circular statistics (Fisher, 1993). - + This function requires narrow band filtering your data. As a default we use the recommendations by (Glerean et al., 2012) of .04-.07Hz. This is similar to the "slow-4" band (0.025–0.067 Hz) described by (Zuo et al., 2010; Penttonen & Buzsáki, 2003), but excludes the .03 band, which has been @@ -1832,40 +2033,49 @@ def isps(data, sampling_freq=.5, low_cut=.04, high_cut=.07, order=5): Birn RM, Smith MA, Bandettini PA, Diamond JB. 2006. Separating respiratory-variation-related fluctuations from neuronal-activity- related fluctuations in fMRI. Neuroimage 31:1536–1548. - + Buzsáki, G., & Draguhn, A. (2004). Neuronal oscillations in cortical networks. Science, 304(5679), 1926-1929. - + Fisher, N. I. (1995). Statistical analysis of circular data. cambridge university press. Glerean, E., Salmi, J., Lahnakoski, J. M., Jääskeläinen, I. P., & Sams, M. (2012). Functional magnetic resonance imaging phase synchronization as a measure of dynamic functional connectivity. Brain connectivity, 2(2), 91-101. - + Args: data: (pd.DataFrame, np.ndarray) observations x subjects data sampling_freq: (float) sampling freqency of data in Hz low_cut: (float) lower bound cutoff for high pass filter high_cut: (float) upper bound cutoff for low pass filter order: (int) filter order for butterworth bandpass - + Returns: dictionary with mean phase angle, vector length, and rayleigh statistic - - ''' - + + """ + if not isinstance(data, (pd.DataFrame, np.ndarray)): - raise ValueError('data must be a pandas dataframe or numpy array (observations by subjects)') - - phase = np.angle(hilbert(_butter_bandpass_filter(pd.DataFrame(data), low_cut, high_cut, sampling_freq, order=order))) - - out = {'average_angle':_phase_mean_angle(phase)} - out['vector_length'] = _phase_vector_length(phase) - out['p'] = _phase_rayleigh_p(phase) + raise ValueError( + "data must be a pandas dataframe or numpy array (observations by subjects)" + ) + + phase = np.angle( + hilbert( + _butter_bandpass_filter( + pd.DataFrame(data), low_cut, high_cut, sampling_freq, order=order + ) + ) + ) + + out = {"average_angle": _phase_mean_angle(phase)} + out["vector_length"] = _phase_vector_length(phase) + out["p"] = _phase_rayleigh_p(phase) return out -def _butter_bandpass_filter(data, low_cut, high_cut, fs, axis = 0, order=5): - '''Apply a bandpass butterworth filter with zero-phase filtering + +def _butter_bandpass_filter(data, low_cut, high_cut, fs, axis=0, order=5): + """Apply a bandpass butterworth filter with zero-phase filtering Args: data: (np.array) @@ -1874,75 +2084,90 @@ def _butter_bandpass_filter(data, low_cut, high_cut, fs, axis = 0, order=5): fs: (float) sampling frequency in Hz axis: (int) axis to perform filtering. order: (int) filter order for butterworth bandpass - + Returns: bandpass filtered data. - ''' + """ nyq = 0.5 * fs - b, a = butter(order, [low_cut/nyq, high_cut/nyq], btype='band') + b, a = butter(order, [low_cut / nyq, high_cut / nyq], btype="band") return filtfilt(b, a, data, axis=axis) + def _phase_mean_angle(phase_angles): - '''Compute mean phase angle using circular statistics - - Can take 1D (observation for a single feature) or 2D (observation x feature) signals - - Implementation from: - - Fisher, N. I. (1995). Statistical analysis of circular data. cambridge university press. - - Args: - phase_angles: (np.array) 1D or 2D array of phase angles - - Returns: - mean phase angle: (np.array) - - ''' - + """Compute mean phase angle using circular statistics + + Can take 1D (observation for a single feature) or 2D (observation x feature) signals + + Implementation from: + + Fisher, N. I. (1995). Statistical analysis of circular data. cambridge university press. + + Args: + phase_angles: (np.array) 1D or 2D array of phase angles + + Returns: + mean phase angle: (np.array) + + """ + axis = 0 if len(phase_angles.shape) == 1 else 1 - return np.arctan2(np.mean(np.sin(phase_angles), axis=axis), np.mean(np.cos(phase_angles), axis=axis)) + return np.arctan2( + np.mean(np.sin(phase_angles), axis=axis), + np.mean(np.cos(phase_angles), axis=axis), + ) + def _phase_vector_length(phase_angles): - '''Compute vector length of phase angles using circular statistics - - Can take 1D (observation for a single feature) or 2D (observation x feature) signals - - Implementation from: - - Fisher, N. I. (1995). Statistical analysis of circular data. cambridge university press. - - Args: - phase_angles: (np.array) 1D or 2D array of phase angles - - Returns: - phase angle vector length: (np.array) - - ''' - + """Compute vector length of phase angles using circular statistics + + Can take 1D (observation for a single feature) or 2D (observation x feature) signals + + Implementation from: + + Fisher, N. I. (1995). Statistical analysis of circular data. cambridge university press. + + Args: + phase_angles: (np.array) 1D or 2D array of phase angles + + Returns: + phase angle vector length: (np.array) + + """ + axis = 0 if len(phase_angles.shape) == 1 else 1 - return np.float32(np.sqrt(np.mean(np.cos(phase_angles), axis=axis)**2 + np.mean(np.sin(phase_angles), axis=axis)**2)) + return np.float32( + np.sqrt( + np.mean(np.cos(phase_angles), axis=axis) ** 2 + + np.mean(np.sin(phase_angles), axis=axis) ** 2 + ) + ) + def _phase_rayleigh_p(phase_angles): - '''Compute the p-value of the phase_angles using the Rayleigh statistic - - Note: this test assumes every time point is independent, which is unlikely to be true in a timeseries with autocorrelation - - Implementation from: - - Fisher, N. I. (1995). Statistical analysis of circular data. cambridge university press. - - Args: - phase_angles: (np.array) 1D or 2D array of phase angles - - Returns: - p-values: (np.array) - - ''' - + """Compute the p-value of the phase_angles using the Rayleigh statistic + + Note: this test assumes every time point is independent, which is unlikely to be true in a timeseries with autocorrelation + + Implementation from: + + Fisher, N. I. (1995). Statistical analysis of circular data. cambridge university press. + + Args: + phase_angles: (np.array) 1D or 2D array of phase angles + + Returns: + p-values: (np.array) + + """ + n = len(phase_angles) if len(phase_angles.shape) == 1 else phase_angles.shape[1] - Z = n*_phase_vector_length(phase_angles)**2 + Z = n * _phase_vector_length(phase_angles) ** 2 if n <= 50: - return np.exp(-1*Z)*(1 + (2*Z - Z**2)/(4*n) - (24*Z - 132*Z**2 +76*Z**3 - 9*Z**4)/(288*n**2)) + return np.exp(-1 * Z) * ( + 1 + + (2 * Z - Z ** 2) / (4 * n) + - (24 * Z - 132 * Z ** 2 + 76 * Z ** 3 - 9 * Z ** 4) / (288 * n ** 2) + ) else: - return np.exp(-1*Z) \ No newline at end of file + return np.exp(-1 * Z) diff --git a/nltools/tests/conftest.py b/nltools/tests/conftest.py index 846a267e..de96ede7 100644 --- a/nltools/tests/conftest.py +++ b/nltools/tests/conftest.py @@ -3,10 +3,7 @@ import pandas as pd from sklearn.metrics import pairwise_distances from nltools.simulator import Simulator -from nltools.data import (Brain_Data, - Adjacency, - Groupby, - Design_Matrix) +from nltools.data import Brain_Data, Adjacency, Groupby, Design_Matrix from nltools.mask import create_sphere @@ -19,8 +16,9 @@ def sim_brain_data(): y = [0, 1] n_reps = 3 dat = sim.create_data(y, sigma, reps=n_reps) - dat.X = pd.DataFrame({'Intercept': np.ones(len(dat.Y)), - 'X1': np.array(dat.Y).flatten()}, index=None) + dat.X = pd.DataFrame( + {"Intercept": np.ones(len(dat.Y)), "X1": np.array(dat.Y).flatten()}, index=None + ) return dat @@ -28,47 +26,65 @@ def sim_brain_data(): def sim_design_matrix(): # Design matrices are specified in terms of sampling frequency TR = 2.0 - sampling_freq = 1. / TR - return Design_Matrix(np.random.randint(2, size=(500, 4)), - columns=['face_A', 'face_B', 'house_A', 'house_B'], - sampling_freq=sampling_freq) + sampling_freq = 1.0 / TR + return Design_Matrix( + np.random.randint(2, size=(500, 4)), + columns=["face_A", "face_B", "house_A", "house_B"], + sampling_freq=sampling_freq, + ) @pytest.fixture(scope="module") def sim_adjacency_single(): - sim = np.random.multivariate_normal([0, 0, 0, 0], [[1, 0.8, 0.1, 0.4], - [0.8, 1, 0.6, 0.1], - [0.1, 0.6, 1, 0.3], - [0.4, 0.1, 0.3, 1]], 100) - data = pairwise_distances(sim.T, metric='correlation') - labels = ['v_%s' % (x+1) for x in range(sim.shape[1])] + sim = np.random.multivariate_normal( + [0, 0, 0, 0], + [ + [1, 0.8, 0.1, 0.4], + [0.8, 1, 0.6, 0.1], + [0.1, 0.6, 1, 0.3], + [0.4, 0.1, 0.3, 1], + ], + 100, + ) + data = pairwise_distances(sim.T, metric="correlation") + labels = ["v_%s" % (x + 1) for x in range(sim.shape[1])] return Adjacency(data, labels=labels) @pytest.fixture(scope="module") def sim_adjacency_multiple(): n = 10 - sim = np.random.multivariate_normal([0, 0, 0, 0], [[1, 0.8, 0.1, 0.4], - [0.8, 1, 0.6, 0.1], - [0.1, 0.6, 1, 0.3], - [0.4, 0.1, 0.3, 1]], 100) - data = pairwise_distances(sim.T, metric='correlation') + sim = np.random.multivariate_normal( + [0, 0, 0, 0], + [ + [1, 0.8, 0.1, 0.4], + [0.8, 1, 0.6, 0.1], + [0.1, 0.6, 1, 0.3], + [0.4, 0.1, 0.3, 1], + ], + 100, + ) + data = pairwise_distances(sim.T, metric="correlation") dat_all = [] for t in range(n): tmp = data dat_all.append(tmp) - labels = ['v_%s' % (x+1) for x in range(sim.shape[1])] + labels = ["v_%s" % (x + 1) for x in range(sim.shape[1])] return Adjacency(dat_all, labels=labels) @pytest.fixture(scope="module") def sim_adjacency_directed(): - sim_directed = np.array([[1, 0.5, 0.3, 0.4], - [0.8, 1, 0.2, 0.1], - [0.7, 0.6, 1, 0.5], - [0.85, 0.4, 0.3, 1]]) - labels = ['v_%s' % (x+1) for x in range(sim_directed.shape[1])] - return Adjacency(sim_directed, matrix_type='directed', labels=labels) + sim_directed = np.array( + [ + [1, 0.5, 0.3, 0.4], + [0.8, 1, 0.2, 0.1], + [0.7, 0.6, 1, 0.5], + [0.85, 0.4, 0.3, 1], + ] + ) + labels = ["v_%s" % (x + 1) for x in range(sim_directed.shape[1])] + return Adjacency(sim_directed, matrix_type="directed", labels=labels) @pytest.fixture(scope="module") diff --git a/nltools/tests/test_adjacency.py b/nltools/tests/test_adjacency.py index 47ff2afb..bf730115 100644 --- a/nltools/tests/test_adjacency.py +++ b/nltools/tests/test_adjacency.py @@ -8,146 +8,212 @@ def test_type_single(sim_adjacency_single): - assert sim_adjacency_single.matrix_type == 'distance' - dat_single2 = Adjacency(1-sim_adjacency_single.squareform()) - assert dat_single2.matrix_type == 'similarity' + assert sim_adjacency_single.matrix_type == "distance" + dat_single2 = Adjacency(1 - sim_adjacency_single.squareform()) + assert dat_single2.matrix_type == "similarity" assert sim_adjacency_single.issymmetric + def test_type_directed(sim_adjacency_directed): assert not sim_adjacency_directed.issymmetric + def test_length(sim_adjacency_multiple): assert len(sim_adjacency_multiple) == sim_adjacency_multiple.data.shape[0] assert len(sim_adjacency_multiple[0]) == 1 + def test_indexing(sim_adjacency_multiple): assert len(sim_adjacency_multiple[0]) == 1 assert len(sim_adjacency_multiple[0:4]) == 4 assert len(sim_adjacency_multiple[0, 2, 3]) == 3 + def test_arithmetic(sim_adjacency_directed): - assert(sim_adjacency_directed+5).data[0] == sim_adjacency_directed.data[0]+5 - assert(sim_adjacency_directed-.5).data[0] == sim_adjacency_directed.data[0]-.5 - assert(sim_adjacency_directed*5).data[0] == sim_adjacency_directed.data[0]*5 - assert np.all(np.isclose((sim_adjacency_directed + sim_adjacency_directed).data, - (sim_adjacency_directed*2).data)) - assert np.all(np.isclose((sim_adjacency_directed*2 - sim_adjacency_directed).data, - sim_adjacency_directed.data)) - np.testing.assert_almost_equal(((2*sim_adjacency_directed/2) / sim_adjacency_directed).mean(), 1, decimal=4) + assert (sim_adjacency_directed + 5).data[0] == sim_adjacency_directed.data[0] + 5 + assert (sim_adjacency_directed - 0.5).data[0] == sim_adjacency_directed.data[ + 0 + ] - 0.5 + assert (sim_adjacency_directed * 5).data[0] == sim_adjacency_directed.data[0] * 5 + assert np.all( + np.isclose( + (sim_adjacency_directed + sim_adjacency_directed).data, + (sim_adjacency_directed * 2).data, + ) + ) + assert np.all( + np.isclose( + (sim_adjacency_directed * 2 - sim_adjacency_directed).data, + sim_adjacency_directed.data, + ) + ) + np.testing.assert_almost_equal( + ((2 * sim_adjacency_directed / 2) / sim_adjacency_directed).mean(), 1, decimal=4 + ) + def test_copy(sim_adjacency_multiple): assert np.all(sim_adjacency_multiple.data == sim_adjacency_multiple.copy().data) + def test_squareform(sim_adjacency_multiple): assert len(sim_adjacency_multiple.squareform()) == len(sim_adjacency_multiple) - assert sim_adjacency_multiple[0].squareform().shape == sim_adjacency_multiple[0].square_shape() + assert ( + sim_adjacency_multiple[0].squareform().shape + == sim_adjacency_multiple[0].square_shape() + ) + def test_write_multiple(sim_adjacency_multiple, tmpdir): - sim_adjacency_multiple.write(os.path.join(str(tmpdir.join('Test.csv'))), - method='long') - dat_multiple2 = Adjacency(os.path.join(str(tmpdir.join('Test.csv'))), - matrix_type='distance_flat') + sim_adjacency_multiple.write( + os.path.join(str(tmpdir.join("Test.csv"))), method="long" + ) + dat_multiple2 = Adjacency( + os.path.join(str(tmpdir.join("Test.csv"))), matrix_type="distance_flat" + ) assert np.all(np.isclose(sim_adjacency_multiple.data, dat_multiple2.data)) # Test i/o for hdf5 - sim_adjacency_multiple.write(os.path.join(str(tmpdir.join('test_write.h5')))) - b = Adjacency(os.path.join(tmpdir.join('test_write.h5'))) - for k in ['Y', 'matrix_type', 'is_single_matrix', 'issymmetric', 'data']: - if k == 'data': + sim_adjacency_multiple.write(os.path.join(str(tmpdir.join("test_write.h5")))) + b = Adjacency(os.path.join(tmpdir.join("test_write.h5"))) + for k in ["Y", "matrix_type", "is_single_matrix", "issymmetric", "data"]: + if k == "data": assert np.allclose(b.__dict__[k], sim_adjacency_multiple.__dict__[k]) - elif k == 'Y': + elif k == "Y": assert all(b.__dict__[k].eq(sim_adjacency_multiple.__dict__[k]).values) else: assert b.__dict__[k] == sim_adjacency_multiple.__dict__[k] def test_write_directed(sim_adjacency_directed, tmpdir): - sim_adjacency_directed.write(os.path.join(str(tmpdir.join('Test.csv'))), - method='long') - dat_directed2 = Adjacency(os.path.join(str(tmpdir.join('Test.csv'))), - matrix_type='directed_flat') + sim_adjacency_directed.write( + os.path.join(str(tmpdir.join("Test.csv"))), method="long" + ) + dat_directed2 = Adjacency( + os.path.join(str(tmpdir.join("Test.csv"))), matrix_type="directed_flat" + ) assert np.all(np.isclose(sim_adjacency_directed.data, dat_directed2.data)) def test_mean(sim_adjacency_multiple): assert isinstance(sim_adjacency_multiple.mean(axis=0), Adjacency) assert len(sim_adjacency_multiple.mean(axis=0)) == 1 - assert len(sim_adjacency_multiple.mean(axis=1)) == len(np.mean(sim_adjacency_multiple.data, axis=1)) + assert len(sim_adjacency_multiple.mean(axis=1)) == len( + np.mean(sim_adjacency_multiple.data, axis=1) + ) def test_std(sim_adjacency_multiple): assert isinstance(sim_adjacency_multiple.std(axis=0), Adjacency) assert len(sim_adjacency_multiple.std(axis=0)) == 1 - assert len(sim_adjacency_multiple.std(axis=1)) == len(np.std(sim_adjacency_multiple.data, axis=1)) + assert len(sim_adjacency_multiple.std(axis=1)) == len( + np.std(sim_adjacency_multiple.data, axis=1) + ) def test_similarity(sim_adjacency_multiple): n_permute = 1000 - assert len(sim_adjacency_multiple.similarity( - sim_adjacency_multiple[0].squareform(), perm_type='1d', - n_permute=n_permute)) == len(sim_adjacency_multiple) - assert len(sim_adjacency_multiple.similarity(sim_adjacency_multiple[0].squareform(), perm_type='1d', - metric='pearson', n_permute=n_permute)) == len(sim_adjacency_multiple) - assert len(sim_adjacency_multiple.similarity(sim_adjacency_multiple[0].squareform(), perm_type='1d', - metric='kendall', n_permute=n_permute)) == len(sim_adjacency_multiple) + squaremat = sim_adjacency_multiple[0].squareform() + + res = sim_adjacency_multiple.similarity( + squaremat, perm_type="1d", n_permute=n_permute + ) + assert len(res) == len(sim_adjacency_multiple) + + res = sim_adjacency_multiple.similarity( + squaremat, perm_type="1d", metric="pearson", n_permute=n_permute + ) + assert len(res) == len(sim_adjacency_multiple) + + res = sim_adjacency_multiple.similarity( + squaremat, perm_type="1d", metric="kendall", n_permute=n_permute + ) + assert len(res) == len(sim_adjacency_multiple) data2 = sim_adjacency_multiple[0].copy() - data2.data = data2.data + np.random.randn(len(data2.data))*.1 - assert sim_adjacency_multiple[0].similarity(data2.squareform(), perm_type=None, n_permute=n_permute)['correlation'] > .5 - assert sim_adjacency_multiple[0].similarity(data2.squareform(), perm_type='1d', n_permute=n_permute)['correlation'] > .5 - assert sim_adjacency_multiple[0].similarity(data2.squareform(), perm_type='2d', n_permute=n_permute)['correlation'] > .5 + data2.data = data2.data + np.random.randn(len(data2.data)) * 0.1 + assert ( + sim_adjacency_multiple[0].similarity( + data2.squareform(), perm_type=None, n_permute=n_permute + )["correlation"] + > 0.5 + ) + assert ( + sim_adjacency_multiple[0].similarity( + data2.squareform(), perm_type="1d", n_permute=n_permute + )["correlation"] + > 0.5 + ) + assert ( + sim_adjacency_multiple[0].similarity( + data2.squareform(), perm_type="2d", n_permute=n_permute + )["correlation"] + > 0.5 + ) def test_similarity_matrix_permutation(): - dat = np.random.multivariate_normal([2, 6], [[.5, 2], [.5, 3]], 190) + dat = np.random.multivariate_normal([2, 6], [[0.5, 2], [0.5, 3]], 190) x = Adjacency(dat[:, 0]) y = Adjacency(dat[:, 1]) - stats = x.similarity(y, perm_type='2d', n_permute=1000) - assert (stats['correlation'] > .4) & (stats['correlation'] < .85) & (stats['p'] < .001) + stats = x.similarity(y, perm_type="2d", n_permute=1000) + assert ( + (stats["correlation"] > 0.4) + & (stats["correlation"] < 0.85) + & (stats["p"] < 0.001) + ) stats = x.similarity(y, perm_type=None) - assert (stats['correlation'] > .4) & (stats['correlation'] < .85) + assert (stats["correlation"] > 0.4) & (stats["correlation"] < 0.85) def test_directed_similarity(): - dat = np.random.multivariate_normal([2, 6], [[.5, 2], [.5, 3]], 400) - x = Adjacency(dat[:, 0].reshape(20, 20), matrix_type='directed') - y = Adjacency(dat[:, 1].reshape(20, 20), matrix_type='directed') + dat = np.random.multivariate_normal([2, 6], [[0.5, 2], [0.5, 3]], 400) + x = Adjacency(dat[:, 0].reshape(20, 20), matrix_type="directed") + y = Adjacency(dat[:, 1].reshape(20, 20), matrix_type="directed") # Ignore diagonal - stats = x.similarity(y, perm_type='1d', ignore_diagonal=True, n_permute=1000) - assert (stats['correlation'] > .4) & (stats['correlation'] < .85) & (stats['p'] < .001) + stats = x.similarity(y, perm_type="1d", ignore_diagonal=True, n_permute=1000) + assert ( + (stats["correlation"] > 0.4) + & (stats["correlation"] < 0.85) + & (stats["p"] < 0.001) + ) # Use diagonal stats = x.similarity(y, perm_type=None, ignore_diagonal=False) - assert (stats['correlation'] > .4) & (stats['correlation'] < .85) + assert (stats["correlation"] > 0.4) & (stats["correlation"] < 0.85) # Error out but make usre TypeError is the reason why try: - x.similarity(y, perm_type='2d') - except TypeError as e: - pass + x.similarity(y, perm_type="2d") + except TypeError as _: # noqa + pass def test_distance(sim_adjacency_multiple): assert isinstance(sim_adjacency_multiple.distance(), Adjacency) - assert sim_adjacency_multiple.distance().square_shape()[0] == len(sim_adjacency_multiple) + assert sim_adjacency_multiple.distance().square_shape()[0] == len( + sim_adjacency_multiple + ) def test_ttest(sim_adjacency_multiple): out = sim_adjacency_multiple.ttest() - assert len(out['t']) == 1 - assert len(out['p']) == 1 - assert out['t'].shape()[0] == sim_adjacency_multiple.shape()[1] - assert out['p'].shape()[0] == sim_adjacency_multiple.shape()[1] + assert len(out["t"]) == 1 + assert len(out["p"]) == 1 + assert out["t"].shape()[0] == sim_adjacency_multiple.shape()[1] + assert out["p"].shape()[0] == sim_adjacency_multiple.shape()[1] out = sim_adjacency_multiple.ttest(permutation=True, n_permute=1000) - assert len(out['t']) == 1 - assert len(out['p']) == 1 - assert out['t'].shape()[0] == sim_adjacency_multiple.shape()[1] - assert out['p'].shape()[0] == sim_adjacency_multiple.shape()[1] + assert len(out["t"]) == 1 + assert len(out["p"]) == 1 + assert out["t"].shape()[0] == sim_adjacency_multiple.shape()[1] + assert out["p"].shape()[0] == sim_adjacency_multiple.shape()[1] def test_threshold(sim_adjacency_directed): - assert np.sum(sim_adjacency_directed.threshold(upper=.8).data == 0) == 10 - assert sim_adjacency_directed.threshold(upper=.8, binarize=True).data[0] - assert np.sum(sim_adjacency_directed.threshold(upper='70%', binarize=True).data) == 5 - assert np.sum(sim_adjacency_directed.threshold(lower=.4, binarize=True).data) == 6 + assert np.sum(sim_adjacency_directed.threshold(upper=0.8).data == 0) == 10 + assert sim_adjacency_directed.threshold(upper=0.8, binarize=True).data[0] + assert ( + np.sum(sim_adjacency_directed.threshold(upper="70%", binarize=True).data) == 5 + ) + assert np.sum(sim_adjacency_directed.threshold(lower=0.4, binarize=True).data) == 6 def test_graph_directed(sim_adjacency_directed): @@ -168,10 +234,10 @@ def test_append(sim_adjacency_single): def test_bootstrap(sim_adjacency_multiple): n_samples = 3 - b = sim_adjacency_multiple.bootstrap('mean', n_samples=n_samples) - assert isinstance(b['Z'], Adjacency) - b = sim_adjacency_multiple.bootstrap('std', n_samples=n_samples) - assert isinstance(b['Z'], Adjacency) + b = sim_adjacency_multiple.bootstrap("mean", n_samples=n_samples) + assert isinstance(b["Z"], Adjacency) + b = sim_adjacency_multiple.bootstrap("std", n_samples=n_samples) + assert isinstance(b["Z"], Adjacency) def test_plot(sim_adjacency_multiple): @@ -184,15 +250,32 @@ def test_plot_mds(sim_adjacency_single): def test_similarity_conversion(sim_adjacency_single): - np.testing.assert_approx_equal(-1, pearsonr(sim_adjacency_single.data, sim_adjacency_single.distance_to_similarity().data)[0], significant=1) - np.testing.assert_approx_equal(-1, pearsonr(sim_adjacency_single.distance_to_similarity().data, sim_adjacency_single.distance_to_similarity().similarity_to_distance().data)[0], significant=1) + np.testing.assert_approx_equal( + -1, + pearsonr( + sim_adjacency_single.data, + sim_adjacency_single.distance_to_similarity().data, + )[0], + significant=1, + ) + np.testing.assert_approx_equal( + -1, + pearsonr( + sim_adjacency_single.distance_to_similarity().data, + sim_adjacency_single.distance_to_similarity().similarity_to_distance().data, + )[0], + significant=1, + ) def test_cluster_mean(): - test_dat = Adjacency(block_diag(np.ones((4, 4)), np.ones((4, 4))*2, np.ones((4, 4))*3), matrix_type='similarity') - test_labels = np.concatenate([np.ones(4)*x for x in range(1, 4)]) + test_dat = Adjacency( + block_diag(np.ones((4, 4)), np.ones((4, 4)) * 2, np.ones((4, 4)) * 3), + matrix_type="similarity", + ) + test_labels = np.concatenate([np.ones(4) * x for x in range(1, 4)]) out = test_dat.within_cluster_mean(clusters=test_labels) - assert np.sum(np.array([1, 2, 3])-np.array([out[x] for x in out])) == 0 + assert np.sum(np.array([1, 2, 3]) - np.array([out[x] for x in out])) == 0 def test_regression(): @@ -200,48 +283,72 @@ def test_regression(): m1 = block_diag(np.ones((4, 4)), np.zeros((4, 4)), np.zeros((4, 4))) m2 = block_diag(np.zeros((4, 4)), np.ones((4, 4)), np.zeros((4, 4))) m3 = block_diag(np.zeros((4, 4)), np.zeros((4, 4)), np.ones((4, 4))) - Y = Adjacency(m1*1+m2*2+m3*3, matrix_type='similarity') - X = Adjacency([m1, m2, m3], matrix_type='similarity') + Y = Adjacency(m1 * 1 + m2 * 2 + m3 * 3, matrix_type="similarity") + X = Adjacency([m1, m2, m3], matrix_type="similarity") stats = Y.regress(X) - assert np.allclose(stats['beta'], np.array([1, 2, 3])) + assert np.allclose(stats["beta"], np.array([1, 2, 3])) # Test Design_Matrix Regression n = 10 - d = Adjacency([block_diag(np.ones((4, 4))+np.random.randn(4, 4)*.1, np.zeros((8, 8))) for x in range(n)], - matrix_type='similarity') + d = Adjacency( + [ + block_diag(np.ones((4, 4)) + np.random.randn(4, 4) * 0.1, np.zeros((8, 8))) + for _ in range(n) + ], + matrix_type="similarity", + ) X = Design_Matrix(np.ones(n)) stats = d.regress(X) - out = stats['beta'].within_cluster_mean(clusters=['Group1']*4 + ['Group2']*8) - assert np.allclose(np.array([out['Group1'], out['Group2']]), np.array([1, 0]), rtol=1e-01) # np.allclose(np.sum(stats['beta']-np.array([1,2,3])),0) + out = stats["beta"].within_cluster_mean(clusters=["Group1"] * 4 + ["Group2"] * 8) + assert np.allclose( + np.array([out["Group1"], out["Group2"]]), np.array([1, 0]), rtol=1e-01 + ) + # np.allclose(np.sum(stats['beta']-np.array([1,2,3])),0) def test_social_relations_model(): - data = Adjacency(np.array([[np.nan, 8, 5, 10], - [7, np.nan, 7, 6], - [8, 7, np.nan, 5], - [4, 5, 0, np.nan]]), matrix_type='directed') + data = Adjacency( + np.array( + [ + [np.nan, 8, 5, 10], + [7, np.nan, 7, 6], + [8, 7, np.nan, 5], + [4, 5, 0, np.nan], + ] + ), + matrix_type="directed", + ) data2 = data.append(data) results1 = data.social_relations_model() assert isinstance(data.social_relations_model(), pd.Series) assert isinstance(data2.social_relations_model(), pd.DataFrame) - assert len(results1['actor_effect']) == data.square_shape()[0] - assert results1['relationship_effect'].shape == data.square_shape() - np.testing.assert_approx_equal(results1['actor_variance'], 3.33, significant=2) - np.testing.assert_approx_equal(results1['partner_variance'], 0.66, significant=2) - np.testing.assert_approx_equal(results1['relationship_variance'], 3.33, significant=2) - np.testing.assert_approx_equal(results1['actor_partner_correlation'], 0.22, significant=2) - np.testing.assert_approx_equal(results1['dyadic_reciprocity_correlation'], 0.2, significant=2) + assert len(results1["actor_effect"]) == data.square_shape()[0] + assert results1["relationship_effect"].shape == data.square_shape() + np.testing.assert_approx_equal(results1["actor_variance"], 3.33, significant=2) + np.testing.assert_approx_equal(results1["partner_variance"], 0.66, significant=2) + np.testing.assert_approx_equal( + results1["relationship_variance"], 3.33, significant=2 + ) + np.testing.assert_approx_equal( + results1["actor_partner_correlation"], 0.22, significant=2 + ) + np.testing.assert_approx_equal( + results1["dyadic_reciprocity_correlation"], 0.2, significant=2 + ) # # Test stats_label_distance - FAILED - Need to sort this out # labels = np.array(['group1','group1','group2','group2']) # stats = dat_multiple[0].stats_label_distance(labels) # assert np.isclose(stats['group1']['mean'],-1*stats['group2']['mean']) + def test_isc(sim_adjacency_single): n_boot = 100 - for metric in ['median', 'mean']: - stats = sim_adjacency_single.isc(metric=metric, n_bootstraps=n_boot, return_bootstraps=True) - assert (stats['isc'] > -1) & (stats['isc'] < 1) - assert (stats['p'] > 0) & (stats['p'] < 1) - assert len(stats['null_distribution']) == n_boot + for metric in ["median", "mean"]: + stats = sim_adjacency_single.isc( + metric=metric, n_bootstraps=n_boot, return_bootstraps=True + ) + assert (stats["isc"] > -1) & (stats["isc"] < 1) + assert (stats["p"] > 0) & (stats["p"] < 1) + assert len(stats["null_distribution"]) == n_boot diff --git a/nltools/tests/test_analysis.py b/nltools/tests/test_analysis.py index acc944bc..8074913b 100644 --- a/nltools/tests/test_analysis.py +++ b/nltools/tests/test_analysis.py @@ -2,36 +2,43 @@ from nltools.simulator import Simulator from nltools.analysis import Roc import matplotlib -matplotlib.use('TkAgg') + +matplotlib.use("TkAgg") def test_roc(tmpdir): sim = Simulator() - sigma = .1 + sigma = 0.1 y = [0, 1] n_reps = 10 # output_dir = str(tmpdir) dat = sim.create_data(y, sigma, reps=n_reps, output_dir=None) # dat = Brain_Data(data=sim.data, Y=sim.y) - algorithm = 'svm' + algorithm = "svm" # output_dir = str(tmpdir) # cv = {'type': 'kfolds', 'n_folds': 5, 'subject_id': sim.rep_id} - extra = {'kernel': 'linear'} + extra = {"kernel": "linear"} output = dat.predict(algorithm=algorithm, plot=False, **extra) # Single-Interval - roc = Roc(input_values=output['yfit_all'], binary_outcome=output['Y'] == 1) + roc = Roc(input_values=output["yfit_all"], binary_outcome=output["Y"] == 1) roc.calculate() roc.summary() assert roc.accuracy == 1 # Forced Choice - binary_outcome = output['Y'] == 1 - forced_choice = list(range(int(len(binary_outcome)/2))) + list(range(int(len(binary_outcome)/2))) + binary_outcome = output["Y"] == 1 + forced_choice = list(range(int(len(binary_outcome) / 2))) + list( + range(int(len(binary_outcome) / 2)) + ) forced_choice = forced_choice.sort() - roc_fc = Roc(input_values=output['yfit_all'], binary_outcome=binary_outcome, forced_choice=forced_choice) + roc_fc = Roc( + input_values=output["yfit_all"], + binary_outcome=binary_outcome, + forced_choice=forced_choice, + ) roc_fc.calculate() assert roc_fc.accuracy == 1 assert roc_fc.accuracy == roc_fc.auc == roc_fc.sensitivity == roc_fc.specificity diff --git a/nltools/tests/test_brain_data.py b/nltools/tests/test_brain_data.py index bd3afbce..ee5c7dce 100644 --- a/nltools/tests/test_brain_data.py +++ b/nltools/tests/test_brain_data.py @@ -4,11 +4,10 @@ import nibabel as nb import pandas as pd from nltools.simulator import Simulator -from nltools.data import (Brain_Data, - Adjacency, - Groupby) +from nltools.data import Brain_Data, Adjacency, Groupby from nltools.stats import threshold, align from nltools.mask import create_sphere, roi_to_brain + # from nltools.prefs import MNI_Template @@ -31,96 +30,116 @@ def test_load(tmpdir): # shape_3d = (60, 72, 60) # shape_2d = (6, 71020) - y = pd.read_csv(os.path.join(str(tmpdir.join('y.csv'))), header=None, index_col=None) + y = pd.read_csv( + os.path.join(str(tmpdir.join("y.csv"))), header=None, index_col=None + ) # holdout = pd.read_csv(os.path.join(str(tmpdir.join('rep_id.csv'))), header=None, index_col=None) # Test load list of 4D images - file_list = [str(tmpdir.join('data.nii.gz')), str(tmpdir.join('data.nii.gz'))] + file_list = [str(tmpdir.join("data.nii.gz")), str(tmpdir.join("data.nii.gz"))] dat = Brain_Data(file_list) dat = Brain_Data([nb.load(x) for x in file_list]) # Test load list - dat = Brain_Data(data=str(tmpdir.join('data.nii.gz')), Y=y) + dat = Brain_Data(data=str(tmpdir.join("data.nii.gz")), Y=y) # Test Write - dat.write(os.path.join(str(tmpdir.join('test_write.nii')))) - assert Brain_Data(os.path.join(str(tmpdir.join('test_write.nii')))) + dat.write(os.path.join(str(tmpdir.join("test_write.nii")))) + assert Brain_Data(os.path.join(str(tmpdir.join("test_write.nii")))) # Test i/o for hdf5 - dat.write(os.path.join(str(tmpdir.join('test_write.h5')))) - b = Brain_Data(os.path.join(tmpdir.join('test_write.h5'))) - for k in ['X', 'Y', 'mask', 'nifti_masker', 'file_name', 'data']: - if k == 'data': + dat.write(os.path.join(str(tmpdir.join("test_write.h5")))) + b = Brain_Data(os.path.join(tmpdir.join("test_write.h5"))) + for k in ["X", "Y", "mask", "nifti_masker", "file_name", "data"]: + if k == "data": assert np.allclose(b.__dict__[k], dat.__dict__[k]) - elif k in ['X', 'Y']: + elif k in ["X", "Y"]: assert all(b.__dict__[k].eq(dat.__dict__[k]).values) - elif k == 'mask': + elif k == "mask": assert np.allclose(b.__dict__[k].affine, dat.__dict__[k].affine) assert np.allclose(b.__dict__[k].get_data(), dat.__dict__[k].get_data()) assert b.__dict__[k].get_filename() == dat.__dict__[k].get_filename() - elif k == 'nifti_masker': + elif k == "nifti_masker": assert np.allclose(b.__dict__[k].affine_, dat.__dict__[k].affine_) - assert np.allclose(b.__dict__[k].mask_img.get_data(), dat.__dict__[k].mask_img.get_data()) + assert np.allclose( + b.__dict__[k].mask_img.get_data(), dat.__dict__[k].mask_img.get_data() + ) else: assert b.__dict__[k] == dat.__dict__[k] - + def test_shape(sim_brain_data): assert sim_brain_data.shape() == shape_2d + def test_mean(sim_brain_data): assert sim_brain_data.mean().shape()[0] == shape_2d[1] assert sim_brain_data.mean().shape()[0] == shape_2d[1] assert len(sim_brain_data.mean(axis=1)) == shape_2d[0] with pytest.raises(ValueError): - sim_brain_data.mean(axis='1') + sim_brain_data.mean(axis="1") assert isinstance(sim_brain_data[0].mean(), (float, np.floating)) + def test_median(sim_brain_data): assert sim_brain_data.median().shape()[0] == shape_2d[1] assert sim_brain_data.median().shape()[0] == shape_2d[1] assert len(sim_brain_data.median(axis=1)) == shape_2d[0] with pytest.raises(ValueError): - sim_brain_data.median(axis='1') + sim_brain_data.median(axis="1") assert isinstance(sim_brain_data[0].median(), (float, np.floating)) + def test_std(sim_brain_data): assert sim_brain_data.std().shape()[0] == shape_2d[1] + def test_sum(sim_brain_data): s = sim_brain_data.sum() assert s.shape() == sim_brain_data[1].shape() + def test_add(sim_brain_data): new = sim_brain_data + sim_brain_data assert new.shape() == shape_2d value = 10 - assert(value + sim_brain_data[0]).mean() == (sim_brain_data[0] + value).mean() + assert (value + sim_brain_data[0]).mean() == (sim_brain_data[0] + value).mean() + def test_subtract(sim_brain_data): new = sim_brain_data - sim_brain_data assert new.shape() == shape_2d value = 10 - assert (-value-(-1)*sim_brain_data[0]).mean() == (sim_brain_data[0]-value).mean() + assert (-value - (-1) * sim_brain_data[0]).mean() == ( + sim_brain_data[0] - value + ).mean() + def test_multiply(sim_brain_data): new = sim_brain_data * sim_brain_data assert new.shape() == shape_2d value = 10 - assert(value * sim_brain_data[0]).mean() == (sim_brain_data[0] * value).mean() - c1 = [.5, .5, -.5, -.5] - new = sim_brain_data[0:4]*c1 - new2 = sim_brain_data[0]*.5 + sim_brain_data[1]*.5 - sim_brain_data[2]*.5 - sim_brain_data[3]*.5 - np.testing.assert_almost_equal((new-new2).sum(), 0, decimal=4) + assert (value * sim_brain_data[0]).mean() == (sim_brain_data[0] * value).mean() + c1 = [0.5, 0.5, -0.5, -0.5] + new = sim_brain_data[0:4] * c1 + new2 = ( + sim_brain_data[0] * 0.5 + + sim_brain_data[1] * 0.5 + - sim_brain_data[2] * 0.5 + - sim_brain_data[3] * 0.5 + ) + np.testing.assert_almost_equal((new - new2).sum(), 0, decimal=4) + def test_divide(sim_brain_data): new = sim_brain_data / sim_brain_data assert new.shape() == shape_2d np.testing.assert_almost_equal(new.mean(axis=0).mean(), 1, decimal=6) value = 10 - new2 = sim_brain_data/value - np.testing.assert_almost_equal(((new2*value) - new2).mean().mean(), 0, decimal=2) - + new2 = sim_brain_data / value + np.testing.assert_almost_equal(((new2 * value) - new2).mean().mean(), 0, decimal=2) + + def test_indexing(sim_brain_data): index = [0, 3, 1] assert len(sim_brain_data[index]) == len(index) @@ -134,70 +153,89 @@ def test_indexing(sim_brain_data): assert d.shape[0:3] == shape_3d assert Brain_Data(d) + def test_concatenate(sim_brain_data): out = Brain_Data([x for x in sim_brain_data]) assert isinstance(out, Brain_Data) assert len(out) == len(sim_brain_data) + def test_append(sim_brain_data): - assert sim_brain_data.append(sim_brain_data).shape()[0] == shape_2d[0]*2 + assert sim_brain_data.append(sim_brain_data).shape()[0] == shape_2d[0] * 2 + def test_ttest(sim_brain_data): out = sim_brain_data.ttest() - assert out['t'].shape()[0] == shape_2d[1] + assert out["t"].shape()[0] == shape_2d[1] + def test_distance(sim_brain_data): - distance = sim_brain_data.distance(metric='correlation') + distance = sim_brain_data.distance(metric="correlation") assert isinstance(distance, Adjacency) assert distance.square_shape()[0] == shape_2d[0] + def test_regress(sim_brain_data): - sim_brain_data.X = pd.DataFrame({'Intercept': np.ones(len(sim_brain_data.Y)), - 'X1': np.array(sim_brain_data.Y).flatten()}, index=None) + sim_brain_data.X = pd.DataFrame( + { + "Intercept": np.ones(len(sim_brain_data.Y)), + "X1": np.array(sim_brain_data.Y).flatten(), + }, + index=None, + ) # OLS out = sim_brain_data.regress() - assert type(out['beta'].data) == np.ndarray - assert type(out['t'].data) == np.ndarray - assert type(out['p'].data) == np.ndarray - assert type(out['residual'].data) == np.ndarray - assert out['beta'].shape() == (2, shape_2d[1]) - assert out['t'][1].shape()[0] == shape_2d[1] + assert type(out["beta"].data) == np.ndarray + assert type(out["t"].data) == np.ndarray + assert type(out["p"].data) == np.ndarray + assert type(out["residual"].data) == np.ndarray + assert out["beta"].shape() == (2, shape_2d[1]) + assert out["t"][1].shape()[0] == shape_2d[1] # Robust OLS - out = sim_brain_data.regress(mode='robust') - assert type(out['beta'].data) == np.ndarray - assert type(out['t'].data) == np.ndarray - assert type(out['p'].data) == np.ndarray - assert type(out['residual'].data) == np.ndarray - assert out['beta'].shape() == (2, shape_2d[1]) - assert out['t'][1].shape()[0] == shape_2d[1] + out = sim_brain_data.regress(mode="robust") + assert type(out["beta"].data) == np.ndarray + assert type(out["t"].data) == np.ndarray + assert type(out["p"].data) == np.ndarray + assert type(out["residual"].data) == np.ndarray + assert out["beta"].shape() == (2, shape_2d[1]) + assert out["t"][1].shape()[0] == shape_2d[1] # Test threshold i = 1 - tt = threshold(out['t'][i], out['p'][i], .05) + tt = threshold(out["t"][i], out["p"][i], 0.05) assert isinstance(tt, Brain_Data) + def test_randomise(sim_brain_data): - sim_brain_data.X = pd.DataFrame({'Intercept': np.ones(len(sim_brain_data.Y))}) + sim_brain_data.X = pd.DataFrame({"Intercept": np.ones(len(sim_brain_data.Y))}) out = sim_brain_data.randomise(n_permute=10) - assert type(out['beta'].data) == np.ndarray - assert type(out['t'].data) == np.ndarray - assert type(out['p'].data) == np.ndarray - assert out['beta'].shape() == (shape_2d[1],) - assert out['t'].shape() == (shape_2d[1],) - - sim_brain_data.X = pd.DataFrame({ - 'Intercept': np.ones(len(sim_brain_data.Y)), - 'X1': np.random.randn(len(sim_brain_data.Y)) - }) + assert type(out["beta"].data) == np.ndarray + assert type(out["t"].data) == np.ndarray + assert type(out["p"].data) == np.ndarray + assert out["beta"].shape() == (shape_2d[1],) + assert out["t"].shape() == (shape_2d[1],) + + sim_brain_data.X = pd.DataFrame( + { + "Intercept": np.ones(len(sim_brain_data.Y)), + "X1": np.random.randn(len(sim_brain_data.Y)), + } + ) out = sim_brain_data.randomise(n_permute=10) - assert type(out['beta'].data) == np.ndarray - assert type(out['t'].data) == np.ndarray - assert type(out['p'].data) == np.ndarray - assert out['beta'].shape() == (2, shape_2d[1],) - assert out['t'].shape() == (2, shape_2d[1],) + assert type(out["beta"].data) == np.ndarray + assert type(out["t"].data) == np.ndarray + assert type(out["p"].data) == np.ndarray + assert out["beta"].shape() == ( + 2, + shape_2d[1], + ) + assert out["t"].shape() == ( + 2, + shape_2d[1], + ) def test_apply_mask(sim_brain_data): @@ -211,50 +249,69 @@ def test_apply_mask(sim_brain_data): def test_extract_roi(sim_brain_data): mask = create_sphere([12, 10, -8], radius=10) - assert len(sim_brain_data.extract_roi(mask, metric='mean')) == shape_2d[0] - assert len(sim_brain_data.extract_roi(mask, metric='median')) == shape_2d[0] + assert len(sim_brain_data.extract_roi(mask, metric="mean")) == shape_2d[0] + assert len(sim_brain_data.extract_roi(mask, metric="median")) == shape_2d[0] n_components = 2 - assert sim_brain_data.extract_roi(mask, metric='pca', n_components=n_components).shape == (n_components, shape_2d[0]) + assert sim_brain_data.extract_roi( + mask, metric="pca", n_components=n_components + ).shape == (n_components, shape_2d[0]) with pytest.raises(NotImplementedError): - sim_brain_data.extract_roi(mask, metric='p') - - assert isinstance(sim_brain_data[0].extract_roi(mask, metric='mean'), (float, np.floating)) - assert isinstance(sim_brain_data[0].extract_roi(mask, metric='median'), (float, np.floating)) + sim_brain_data.extract_roi(mask, metric="p") + + assert isinstance( + sim_brain_data[0].extract_roi(mask, metric="mean"), (float, np.floating) + ) + assert isinstance( + sim_brain_data[0].extract_roi(mask, metric="median"), (float, np.floating) + ) with pytest.raises(ValueError): - sim_brain_data[0].extract_roi(mask, metric='pca') + sim_brain_data[0].extract_roi(mask, metric="pca") with pytest.raises(NotImplementedError): - sim_brain_data[0].extract_roi(mask, metric='p') + sim_brain_data[0].extract_roi(mask, metric="p") s1 = create_sphere([15, 10, -8], radius=10) s2 = create_sphere([-15, 10, -8], radius=10) s3 = create_sphere([0, -15, -8], radius=10) masks = Brain_Data([s1, s2, s3]) - mask = roi_to_brain([1,2,3], masks) - assert len(sim_brain_data[0].extract_roi(mask, metric='mean')) == len(masks) - assert len(sim_brain_data[0].extract_roi(mask, metric='median')) == len(masks) - assert sim_brain_data.extract_roi(mask, metric='mean').shape == (len(masks), shape_2d[0]) - assert sim_brain_data.extract_roi(mask, metric='median').shape == (len(masks), shape_2d[0]) - assert len(sim_brain_data.extract_roi(mask, metric='pca', n_components=n_components)) == len(masks) + mask = roi_to_brain([1, 2, 3], masks) + assert len(sim_brain_data[0].extract_roi(mask, metric="mean")) == len(masks) + assert len(sim_brain_data[0].extract_roi(mask, metric="median")) == len(masks) + assert sim_brain_data.extract_roi(mask, metric="mean").shape == ( + len(masks), + shape_2d[0], + ) + assert sim_brain_data.extract_roi(mask, metric="median").shape == ( + len(masks), + shape_2d[0], + ) + assert len( + sim_brain_data.extract_roi(mask, metric="pca", n_components=n_components) + ) == len(masks) + def test_r_to_z(sim_brain_data): z = sim_brain_data.r_to_z() assert z.shape() == sim_brain_data.shape() + def test_copy(sim_brain_data): d_copy = sim_brain_data.copy() assert d_copy.shape() == sim_brain_data.shape() + def test_detrend(sim_brain_data): detrend = sim_brain_data.detrend() assert detrend.shape() == sim_brain_data.shape() + def test_standardize(sim_brain_data): s = sim_brain_data.standardize() assert s.shape() == sim_brain_data.shape() - assert np.isclose(np.sum(s.mean().data), 0, atol=.1) - s = sim_brain_data.standardize(method='zscore') + assert np.isclose(np.sum(s.mean().data), 0, atol=0.1) + s = sim_brain_data.standardize(method="zscore") assert s.shape() == sim_brain_data.shape() - assert np.isclose(np.sum(s.mean().data), 0, atol=.1) + assert np.isclose(np.sum(s.mean().data), 0, atol=0.1) + def test_smooth(sim_brain_data): smoothed = sim_brain_data.smooth(5.0) @@ -263,95 +320,112 @@ def test_smooth(sim_brain_data): smoothed = sim_brain_data[0].smooth(5.0) assert len(smoothed.shape()) == 1 + def test_groupby_aggregate(sim_brain_data): s1 = create_sphere([12, 10, -8], radius=10) s2 = create_sphere([22, -2, -22], radius=10) mask = Brain_Data([s1, s2]) d = sim_brain_data.groupby(mask) assert isinstance(d, Groupby) - mn = sim_brain_data.aggregate(mask, 'mean') + mn = sim_brain_data.aggregate(mask, "mean") assert isinstance(mn, Brain_Data) assert len(mn.shape()) == 1 + def test_threshold(): s1 = create_sphere([12, 10, -8], radius=10) s2 = create_sphere([22, -2, -22], radius=10) - mask = Brain_Data(s1)*5 + mask = Brain_Data(s1) * 5 mask = mask + Brain_Data(s2) - m1 = mask.threshold(upper=.5) + m1 = mask.threshold(upper=0.5) m2 = mask.threshold(upper=3) - m3 = mask.threshold(upper='98%') - m4 = Brain_Data(s1)*5 + Brain_Data(s2)*-.5 - m4 = mask.threshold(upper=.5, lower=-.3) + m3 = mask.threshold(upper="98%") + m4 = Brain_Data(s1) * 5 + Brain_Data(s2) * -0.5 + m4 = mask.threshold(upper=0.5, lower=-0.3) assert np.sum(m1.data > 0) > np.sum(m2.data > 0) assert np.sum(m1.data > 0) == np.sum(m3.data > 0) - assert np.sum(m4.data[(m4.data > -.3) & (m4.data < .5)]) == 0 - assert np.sum(m4.data[(m4.data < -.3) | (m4.data > .5)]) > 0 + assert np.sum(m4.data[(m4.data > -0.3) & (m4.data < 0.5)]) == 0 + assert np.sum(m4.data[(m4.data < -0.3) | (m4.data > 0.5)]) > 0 # Test Regions r = mask.regions(min_region_size=10) m1 = Brain_Data(s1) m2 = r.threshold(1, binarize=True) assert len(np.unique(r.to_nifti().get_data())) == 2 - diff = m2-m1 + diff = m2 - m1 assert np.sum(diff.data) == 0 def test_bootstrap(sim_brain_data): masked = sim_brain_data.apply_mask(create_sphere(radius=10, coordinates=[0, 0, 0])) n_samples = 3 - b = masked.bootstrap('mean', n_samples=n_samples) - assert isinstance(b['Z'], Brain_Data) - b = masked.bootstrap('std', n_samples=n_samples) - assert isinstance(b['Z'], Brain_Data) - b = masked.bootstrap('predict', n_samples=n_samples, plot=False) - assert isinstance(b['Z'], Brain_Data) - b = masked.bootstrap('predict', n_samples=n_samples, - plot=False, cv_dict={'type': 'kfolds', 'n_folds': 3}) - assert isinstance(b['Z'], Brain_Data) - b = masked.bootstrap('predict', n_samples=n_samples, - save_weights=True, plot=False) - assert len(b['samples']) == n_samples + b = masked.bootstrap("mean", n_samples=n_samples) + assert isinstance(b["Z"], Brain_Data) + b = masked.bootstrap("std", n_samples=n_samples) + assert isinstance(b["Z"], Brain_Data) + b = masked.bootstrap("predict", n_samples=n_samples, plot=False) + assert isinstance(b["Z"], Brain_Data) + b = masked.bootstrap( + "predict", + n_samples=n_samples, + plot=False, + cv_dict={"type": "kfolds", "n_folds": 3}, + ) + assert isinstance(b["Z"], Brain_Data) + b = masked.bootstrap("predict", n_samples=n_samples, save_weights=True, plot=False) + assert len(b["samples"]) == n_samples def test_predict(sim_brain_data): - holdout = np.array([[x]*2 for x in range(3)]).flatten() - stats = sim_brain_data.predict(algorithm='svm', - cv_dict={'type': 'kfolds', 'n_folds': 2}, - plot=False, **{'kernel': "linear"}) + holdout = np.array([[x] * 2 for x in range(3)]).flatten() + stats = sim_brain_data.predict( + algorithm="svm", + cv_dict={"type": "kfolds", "n_folds": 2}, + plot=False, + **{"kernel": "linear"} + ) # Support Vector Regression, with 5 fold cross-validation with Platt Scaling # This will output probabilities of each class - stats = sim_brain_data.predict(algorithm='svm', - cv_dict=None, plot=False, - **{'kernel': 'linear', 'probability': True}) - assert isinstance(stats['weight_map'], Brain_Data) + stats = sim_brain_data.predict( + algorithm="svm", + cv_dict=None, + plot=False, + **{"kernel": "linear", "probability": True} + ) + assert isinstance(stats["weight_map"], Brain_Data) # Logistic classificiation, with 2 fold cross-validation. - stats = sim_brain_data.predict(algorithm='logistic', - cv_dict={'type': 'kfolds', 'n_folds': 2}, - plot=False) - assert isinstance(stats['weight_map'], Brain_Data) + stats = sim_brain_data.predict( + algorithm="logistic", cv_dict={"type": "kfolds", "n_folds": 2}, plot=False + ) + assert isinstance(stats["weight_map"], Brain_Data) # Ridge classificiation, - stats = sim_brain_data.predict(algorithm='ridgeClassifier', - cv_dict=None, plot=False) - assert isinstance(stats['weight_map'], Brain_Data) + stats = sim_brain_data.predict( + algorithm="ridgeClassifier", cv_dict=None, plot=False + ) + assert isinstance(stats["weight_map"], Brain_Data) # Ridge - stats = sim_brain_data.predict(algorithm='ridge', - cv_dict={'type': 'kfolds', 'n_folds': 2, - 'subject_id': holdout}, plot=False, **{'alpha': .1}) + stats = sim_brain_data.predict( + algorithm="ridge", + cv_dict={"type": "kfolds", "n_folds": 2, "subject_id": holdout}, + plot=False, + **{"alpha": 0.1} + ) # Lasso - stats = sim_brain_data.predict(algorithm='lasso', - cv_dict={'type': 'kfolds', 'n_folds': 2, - 'stratified': sim_brain_data.Y}, - plot=False, **{'alpha': .1}) + stats = sim_brain_data.predict( + algorithm="lasso", + cv_dict={"type": "kfolds", "n_folds": 2, "stratified": sim_brain_data.Y}, + plot=False, + **{"alpha": 0.1} + ) # PCR - stats = sim_brain_data.predict(algorithm='pcr', cv_dict=None, plot=False) + stats = sim_brain_data.predict(algorithm="pcr", cv_dict=None, plot=False) def test_predict_multi(): @@ -360,10 +434,10 @@ def test_predict_multi(): sigma = 1 y = [0, 1] n_reps = 50 - output_dir = '.' + output_dir = "." dat = sim.create_data(y, sigma, reps=n_reps, output_dir=output_dir) - y = pd.read_csv('y.csv', header=None, index_col=None) - dat = Brain_Data('data.nii.gz', Y=y) + y = pd.read_csv("y.csv", header=None, index_col=None) + dat = Brain_Data("data.nii.gz", Y=y) # Predict within given ROIs # Generate some "rois" (in reality non-contiguous, but also not overlapping) @@ -382,84 +456,115 @@ def test_predict_multi(): # roi = rois[0] from sklearn.datasets import make_classification - X, Y = make_classification(n_samples=100, n_features=rois[0].data.sum(), n_informative=500, n_redundant=5, n_classes=2) + + X, Y = make_classification( + n_samples=100, + n_features=rois[0].data.sum(), + n_informative=500, + n_redundant=5, + n_classes=2, + ) dat.data[:, rois[0].data.astype(bool)] = X dat.Y = pd.Series(Y) - out = dat.predict_multi(algorithm='svm', cv_dict={'type': 'kfolds', 'n_folds': 3}, method='rois', n_jobs=-1, rois=rois[:3], kernel='linear') + out = dat.predict_multi( + algorithm="svm", + cv_dict={"type": "kfolds", "n_folds": 3}, + method="rois", + n_jobs=-1, + rois=rois[:3], + kernel="linear", + ) assert len(out) == 3 - assert np.sum([elem['weight_map'].data.shape for elem in out]) == rois.data.sum() + assert np.sum([elem["weight_map"].data.shape for elem in out]) == rois.data.sum() # Searchlight roi_mask = rois[:2].sum() - out = dat.predict_multi(algorithm='svm', cv_dict={'type': 'kfolds', 'n_folds': 3}, method='searchlight', radius=4, verbose=50, n_jobs=-1, process_mask=roi_mask) + out = dat.predict_multi( + algorithm="svm", + cv_dict={"type": "kfolds", "n_folds": 3}, + method="searchlight", + radius=4, + verbose=50, + n_jobs=-1, + process_mask=roi_mask, + ) assert len(np.nonzero(out.data)[0]) == len(np.nonzero(roi_mask.data)[0]) def test_similarity(sim_brain_data): - stats = sim_brain_data.predict(algorithm='svm', - cv_dict=None, plot=False, **{'kernel': 'linear'}) - r = sim_brain_data.similarity(stats['weight_map']) + stats = sim_brain_data.predict( + algorithm="svm", cv_dict=None, plot=False, **{"kernel": "linear"} + ) + r = sim_brain_data.similarity(stats["weight_map"]) assert len(r) == shape_2d[0] - r2 = sim_brain_data.similarity(stats['weight_map'].to_nifti()) + r2 = sim_brain_data.similarity(stats["weight_map"].to_nifti()) assert len(r2) == shape_2d[0] - r = sim_brain_data.similarity(stats['weight_map'], method='dot_product') + r = sim_brain_data.similarity(stats["weight_map"], method="dot_product") assert len(r) == shape_2d[0] - r = sim_brain_data.similarity(stats['weight_map'], method='cosine') + r = sim_brain_data.similarity(stats["weight_map"], method="cosine") assert len(r) == shape_2d[0] - r = sim_brain_data.similarity(sim_brain_data, method='correlation') + r = sim_brain_data.similarity(sim_brain_data, method="correlation") assert r.shape == (sim_brain_data.shape()[0], sim_brain_data.shape()[0]) - r = sim_brain_data.similarity(sim_brain_data, method='dot_product') + r = sim_brain_data.similarity(sim_brain_data, method="dot_product") assert r.shape == (sim_brain_data.shape()[0], sim_brain_data.shape()[0]) - r = sim_brain_data.similarity(sim_brain_data, method='cosine') + r = sim_brain_data.similarity(sim_brain_data, method="cosine") assert r.shape == (sim_brain_data.shape()[0], sim_brain_data.shape()[0]) def test_decompose(sim_brain_data): n_components = 3 - stats = sim_brain_data.decompose(algorithm='pca', axis='voxels', - n_components=n_components) - assert n_components == len(stats['components']) - assert stats['weights'].shape == (len(sim_brain_data), n_components) - - stats = sim_brain_data.decompose(algorithm='ica', axis='voxels', - n_components=n_components) - assert n_components == len(stats['components']) - assert stats['weights'].shape == (len(sim_brain_data), n_components) + stats = sim_brain_data.decompose( + algorithm="pca", axis="voxels", n_components=n_components + ) + assert n_components == len(stats["components"]) + assert stats["weights"].shape == (len(sim_brain_data), n_components) + + stats = sim_brain_data.decompose( + algorithm="ica", axis="voxels", n_components=n_components + ) + assert n_components == len(stats["components"]) + assert stats["weights"].shape == (len(sim_brain_data), n_components) sim_brain_data.data = sim_brain_data.data + 2 sim_brain_data.data[sim_brain_data.data < 0] = 0 - stats = sim_brain_data.decompose(algorithm='nnmf', axis='voxels', - n_components=n_components) - assert n_components == len(stats['components']) - assert stats['weights'].shape == (len(sim_brain_data), n_components) - - stats = sim_brain_data.decompose(algorithm='fa', axis='voxels', - n_components=n_components) - assert n_components == len(stats['components']) - assert stats['weights'].shape == (len(sim_brain_data), n_components) - - stats = sim_brain_data.decompose(algorithm='pca', axis='images', - n_components=n_components) - assert n_components == len(stats['components']) - assert stats['weights'].shape == (len(sim_brain_data), n_components) - - stats = sim_brain_data.decompose(algorithm='ica', axis='images', - n_components=n_components) - assert n_components == len(stats['components']) - assert stats['weights'].shape == (len(sim_brain_data), n_components) + stats = sim_brain_data.decompose( + algorithm="nnmf", axis="voxels", n_components=n_components + ) + assert n_components == len(stats["components"]) + assert stats["weights"].shape == (len(sim_brain_data), n_components) + + stats = sim_brain_data.decompose( + algorithm="fa", axis="voxels", n_components=n_components + ) + assert n_components == len(stats["components"]) + assert stats["weights"].shape == (len(sim_brain_data), n_components) + + stats = sim_brain_data.decompose( + algorithm="pca", axis="images", n_components=n_components + ) + assert n_components == len(stats["components"]) + assert stats["weights"].shape == (len(sim_brain_data), n_components) + + stats = sim_brain_data.decompose( + algorithm="ica", axis="images", n_components=n_components + ) + assert n_components == len(stats["components"]) + assert stats["weights"].shape == (len(sim_brain_data), n_components) sim_brain_data.data = sim_brain_data.data + 2 sim_brain_data.data[sim_brain_data.data < 0] = 0 - stats = sim_brain_data.decompose(algorithm='nnmf', axis='images', - n_components=n_components) - assert n_components == len(stats['components']) - assert stats['weights'].shape == (len(sim_brain_data), n_components) + stats = sim_brain_data.decompose( + algorithm="nnmf", axis="images", n_components=n_components + ) + assert n_components == len(stats["components"]) + assert stats["weights"].shape == (len(sim_brain_data), n_components) - stats = sim_brain_data.decompose(algorithm='fa', axis='images', - n_components=n_components) - assert n_components == len(stats['components']) - assert stats['weights'].shape == (len(sim_brain_data), n_components) + stats = sim_brain_data.decompose( + algorithm="fa", axis="images", n_components=n_components + ) + assert n_components == len(stats["components"]) + assert stats["weights"].shape == (len(sim_brain_data), n_components) def test_hyperalignment(): @@ -473,37 +578,48 @@ def test_hyperalignment(): data = [d1, d2, d3] # Test deterministic brain_data - out = align(data, method='deterministic_srm') + out = align(data, method="deterministic_srm") - bout = d1.align(out['common_model'], method='deterministic_srm') - assert d1.shape() == bout['transformed'].shape - assert d1.shape() == bout['common_model'].shape - assert d1.shape()[1] == bout['transformation_matrix'].shape()[0] - btransformed = np.dot(d1.data, bout['transformation_matrix'].data.T) - np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data - btransformed)) + bout = d1.align(out["common_model"], method="deterministic_srm") + assert d1.shape() == bout["transformed"].shape + assert d1.shape() == bout["common_model"].shape + assert d1.shape()[1] == bout["transformation_matrix"].shape()[0] + btransformed = np.dot(d1.data, bout["transformation_matrix"].data.T) + np.testing.assert_almost_equal(0, np.sum(bout["transformed"].data - btransformed)) # Test probabilistic brain_data - bout = d1.align(out['common_model'], method='probabilistic_srm') - assert d1.shape() == bout['transformed'].shape - assert d1.shape() == bout['common_model'].shape - assert d1.shape()[1] == bout['transformation_matrix'].shape()[0] - btransformed = np.dot(d1.data, bout['transformation_matrix'].data.T) - np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data-btransformed)) + bout = d1.align(out["common_model"], method="probabilistic_srm") + assert d1.shape() == bout["transformed"].shape + assert d1.shape() == bout["common_model"].shape + assert d1.shape()[1] == bout["transformation_matrix"].shape()[0] + btransformed = np.dot(d1.data, bout["transformation_matrix"].data.T) + np.testing.assert_almost_equal(0, np.sum(bout["transformed"].data - btransformed)) # Test procrustes brain_data - out = align(data, method='procrustes') - centered = data[0].data-np.mean(data[0].data, 0) - transformed = (np.dot(centered/np.linalg.norm(centered), out['transformation_matrix'][0].data)*out['scale'][0]) - - bout = d1.align(out['common_model'], method='procrustes') - assert d1.shape() == bout['transformed'].shape() - assert d1.shape() == bout['common_model'].shape() - assert d1.shape()[1] == bout['transformation_matrix'].shape()[0] + out = align(data, method="procrustes") + centered = data[0].data - np.mean(data[0].data, 0) + transformed = ( + np.dot( + centered / np.linalg.norm(centered), out["transformation_matrix"][0].data + ) + * out["scale"][0] + ) + + bout = d1.align(out["common_model"], method="procrustes") + assert d1.shape() == bout["transformed"].shape() + assert d1.shape() == bout["common_model"].shape() + assert d1.shape()[1] == bout["transformation_matrix"].shape()[0] centered = d1.data - np.mean(d1.data, 0) - btransformed = (np.dot(centered/np.linalg.norm(centered), bout['transformation_matrix'].data)*bout['scale']) - np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data-btransformed), decimal=5) - np.testing.assert_almost_equal(0, np.sum(out['transformed'][0].data - bout['transformed'].data)) - + btransformed = ( + np.dot(centered / np.linalg.norm(centered), bout["transformation_matrix"].data) + * bout["scale"] + ) + np.testing.assert_almost_equal( + 0, np.sum(bout["transformed"].data - btransformed), decimal=5 + ) + np.testing.assert_almost_equal( + 0, np.sum(out["transformed"][0].data - bout["transformed"].data) + ) # Test over time sim = Simulator() @@ -515,35 +631,45 @@ def test_hyperalignment(): d3 = sim.create_data(y, 3, reps=n_reps, output_dir=None).apply_mask(s1) data = [d1, d2, d3] - out = align(data, method='deterministic_srm', axis=1) - bout = d1.align(out['common_model'], method='deterministic_srm', axis=1) - assert d1.shape() == bout['transformed'].shape - assert d1.shape() == bout['common_model'].shape - assert d1.shape()[0] == bout['transformation_matrix'].shape()[0] - btransformed = np.dot(d1.data.T, bout['transformation_matrix'].data.T) - np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data-btransformed.T)) - - out = align(data, method='probabilistic_srm', axis=1) - bout = d1.align(out['common_model'], method='probabilistic_srm', axis=1) - assert d1.shape() == bout['transformed'].shape - assert d1.shape() == bout['common_model'].shape - assert d1.shape()[0] == bout['transformation_matrix'].shape()[0] - btransformed = np.dot(d1.data.T, bout['transformation_matrix'].data.T) - np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data-btransformed.T)) - - out = align(data, method='procrustes', axis=1) - bout = d1.align(out['common_model'], method='procrustes', axis=1) - assert d1.shape() == bout['transformed'].shape() - assert d1.shape() == bout['common_model'].shape() - assert d1.shape()[0] == bout['transformation_matrix'].shape()[0] - centered = d1.data.T-np.mean(d1.data.T, 0) - btransformed = (np.dot(centered/np.linalg.norm(centered), bout['transformation_matrix'].data)*bout['scale']) - np.testing.assert_almost_equal(0, np.sum(bout['transformed'].data-btransformed.T), decimal=5) - np.testing.assert_almost_equal(0, np.sum(out['transformed'][0].data-bout['transformed'].data)) + out = align(data, method="deterministic_srm", axis=1) + bout = d1.align(out["common_model"], method="deterministic_srm", axis=1) + assert d1.shape() == bout["transformed"].shape + assert d1.shape() == bout["common_model"].shape + assert d1.shape()[0] == bout["transformation_matrix"].shape()[0] + btransformed = np.dot(d1.data.T, bout["transformation_matrix"].data.T) + np.testing.assert_almost_equal(0, np.sum(bout["transformed"].data - btransformed.T)) + + out = align(data, method="probabilistic_srm", axis=1) + bout = d1.align(out["common_model"], method="probabilistic_srm", axis=1) + assert d1.shape() == bout["transformed"].shape + assert d1.shape() == bout["common_model"].shape + assert d1.shape()[0] == bout["transformation_matrix"].shape()[0] + btransformed = np.dot(d1.data.T, bout["transformation_matrix"].data.T) + np.testing.assert_almost_equal(0, np.sum(bout["transformed"].data - btransformed.T)) + + out = align(data, method="procrustes", axis=1) + bout = d1.align(out["common_model"], method="procrustes", axis=1) + assert d1.shape() == bout["transformed"].shape() + assert d1.shape() == bout["common_model"].shape() + assert d1.shape()[0] == bout["transformation_matrix"].shape()[0] + centered = d1.data.T - np.mean(d1.data.T, 0) + btransformed = ( + np.dot(centered / np.linalg.norm(centered), bout["transformation_matrix"].data) + * bout["scale"] + ) + np.testing.assert_almost_equal( + 0, np.sum(bout["transformed"].data - btransformed.T), decimal=5 + ) + np.testing.assert_almost_equal( + 0, np.sum(out["transformed"][0].data - bout["transformed"].data) + ) + def test_temporal_resample(sim_brain_data): - up = sim_brain_data.temporal_resample(sampling_freq=1/2, target=2, target_type='hz') + up = sim_brain_data.temporal_resample( + sampling_freq=1 / 2, target=2, target_type="hz" + ) assert len(sim_brain_data) * 4 == len(up) - down = up.temporal_resample(sampling_freq=2, target=1/2, target_type='hz') + down = up.temporal_resample(sampling_freq=2, target=1 / 2, target_type="hz") assert len(sim_brain_data) == len(down) - assert len(up)/4 == len(down) \ No newline at end of file + assert len(up) / 4 == len(down) diff --git a/nltools/tests/test_cross_validation.py b/nltools/tests/test_cross_validation.py index 7be83bd1..99db219a 100644 --- a/nltools/tests/test_cross_validation.py +++ b/nltools/tests/test_cross_validation.py @@ -37,7 +37,7 @@ def check_cv_coverage(cv, X, y, groups, expected_n_splits=None): def test_stratified_kfold_ratios(): - y = pd.DataFrame(np.random.randn(1000))*20+50 + y = pd.DataFrame(np.random.randn(1000)) * 20 + 50 n_folds = 5 cv = KFoldStratified(n_splits=n_folds) for train, test in cv.split(np.zeros(len(y)), y): @@ -45,12 +45,16 @@ def test_stratified_kfold_ratios(): def test_kfoldstratified(): - y = pd.DataFrame(np.random.randn(50))*20+50 + y = pd.DataFrame(np.random.randn(50)) * 20 + 50 n_folds = 5 cv = KFoldStratified(n_splits=n_folds) - check_cv_coverage(cv, X=np.zeros(len(y)), y=y, groups=None, expected_n_splits=n_folds) + check_cv_coverage( + cv, X=np.zeros(len(y)), y=y, groups=None, expected_n_splits=n_folds + ) - y = pd.DataFrame(np.random.randn(51))*20+50 + y = pd.DataFrame(np.random.randn(51)) * 20 + 50 n_folds = 5 cv = KFoldStratified(n_splits=n_folds) - check_cv_coverage(cv, X=np.zeros(len(y)), y=y, groups=None, expected_n_splits=n_folds) + check_cv_coverage( + cv, X=np.zeros(len(y)), y=y, groups=None, expected_n_splits=n_folds + ) diff --git a/nltools/tests/test_design_matrix.py b/nltools/tests/test_design_matrix.py index 00801cee..ce6cd7f2 100644 --- a/nltools/tests/test_design_matrix.py +++ b/nltools/tests/test_design_matrix.py @@ -25,29 +25,44 @@ def test_vif(sim_design_matrix): def test_convolve(sim_design_matrix): TR = 2.0 assert sim_design_matrix.convolve().shape == sim_design_matrix.shape - hrf = glover_hrf(TR, oversampling=1.) - assert sim_design_matrix.convolve(conv_func=np.column_stack([hrf, hrf])).shape[1] == sim_design_matrix.shape[1] + 4 + hrf = glover_hrf(TR, oversampling=1.0) + assert ( + sim_design_matrix.convolve(conv_func=np.column_stack([hrf, hrf])).shape[1] + == sim_design_matrix.shape[1] + 4 + ) def test_zscore(sim_design_matrix): - matz = sim_design_matrix.zscore(columns=['face_A', 'face_B']) - assert (matz[['house_A', 'house_B']] == sim_design_matrix[['house_A', 'house_B']]).all().all() + matz = sim_design_matrix.zscore(columns=["face_A", "face_B"]) + assert ( + (matz[["house_A", "house_B"]] == sim_design_matrix[["house_A", "house_B"]]) + .all() + .all() + ) def test_replace(sim_design_matrix): - assert sim_design_matrix.replace_data(np.zeros((500, 4))).shape == sim_design_matrix.shape + assert ( + sim_design_matrix.replace_data(np.zeros((500, 4))).shape + == sim_design_matrix.shape + ) def test_upsample(sim_design_matrix): - newTR = 1. - target = 1./newTR - assert sim_design_matrix.upsample(target).shape[0] == sim_design_matrix.shape[0]*2 - target*2 + newTR = 1.0 + target = 1.0 / newTR + assert ( + sim_design_matrix.upsample(target).shape[0] + == sim_design_matrix.shape[0] * 2 - target * 2 + ) def test_downsample(sim_design_matrix): - newTR = 4. - target = 1./newTR - assert sim_design_matrix.downsample(target).shape[0] == sim_design_matrix.shape[0]/2 + newTR = 4.0 + target = 1.0 / newTR + assert ( + sim_design_matrix.downsample(target).shape[0] == sim_design_matrix.shape[0] / 2 + ) def test_append(sim_design_matrix): @@ -57,45 +72,58 @@ def test_append(sim_design_matrix): assert (mats.shape[1] - 4) == (sim_design_matrix.shape[1] - 4) * 2 # Otherwise stack them - mats = sim_design_matrix.append(sim_design_matrix, - keep_separate=False) + mats = sim_design_matrix.append(sim_design_matrix, keep_separate=False) assert mats.shape[1] == sim_design_matrix.shape[1] - assert(mats.shape[0] == sim_design_matrix.shape[0] * 2) - + assert mats.shape[0] == sim_design_matrix.shape[0] * 2 + # Keep a single stimulus column separate - assert sim_design_matrix.append(sim_design_matrix, - unique_cols=['face_A']).shape[1] == 5 + assert ( + sim_design_matrix.append(sim_design_matrix, unique_cols=["face_A"]).shape[1] + == 5 + ) # Keep a common stimulus class separate - assert sim_design_matrix.append(sim_design_matrix, - unique_cols=['face*']).shape[1] == 6 + assert ( + sim_design_matrix.append(sim_design_matrix, unique_cols=["face*"]).shape[1] == 6 + ) # Keep a common stimulus class and a different single stim separate - assert sim_design_matrix.append(sim_design_matrix, - unique_cols=['face*', 'house_A']).shape[1] == 7 + assert ( + sim_design_matrix.append( + sim_design_matrix, unique_cols=["face*", "house_A"] + ).shape[1] + == 7 + ) # Keep multiple stimulus class separate - assert sim_design_matrix.append(sim_design_matrix, - unique_cols=['face*', 'house*']).shape[1] == 8 + assert ( + sim_design_matrix.append( + sim_design_matrix, unique_cols=["face*", "house*"] + ).shape[1] + == 8 + ) # Growing a multi-run design matrix; keeping things separate num_runs = 4 - all_runs = Design_Matrix(sampling_freq=.5) + all_runs = Design_Matrix(sampling_freq=0.5) for i in range(num_runs): - run = Design_Matrix(np.array([ - [1, 0, 0, 0], - [1, 0, 0, 0], - [0, 0, 0, 0], - [0, 1, 0, 0], - [0, 1, 0, 0], - [0, 0, 0, 0], - [0, 0, 1, 0], - [0, 0, 1, 0], - [0, 0, 0, 0], - [0, 0, 0, 1], - [0, 0, 0, 1] - ]), - sampling_freq=.5, - columns=['stim_A', 'stim_B', 'cond_C', 'cond_D'] - ) + run = Design_Matrix( + np.array( + [ + [1, 0, 0, 0], + [1, 0, 0, 0], + [0, 0, 0, 0], + [0, 1, 0, 0], + [0, 1, 0, 0], + [0, 0, 0, 0], + [0, 0, 1, 0], + [0, 0, 1, 0], + [0, 0, 0, 0], + [0, 0, 0, 1], + [0, 0, 0, 1], + ] + ), + sampling_freq=0.5, + columns=["stim_A", "stim_B", "cond_C", "cond_D"], + ) run = run.add_poly(2) - all_runs = all_runs.append(run, unique_cols=['stim*', 'cond*']) + all_runs = all_runs.append(run, unique_cols=["stim*", "cond*"]) assert all_runs.shape == (44, 28) diff --git a/nltools/tests/test_file_reader.py b/nltools/tests/test_file_reader.py index 8314930e..7b3d16f5 100644 --- a/nltools/tests/test_file_reader.py +++ b/nltools/tests/test_file_reader.py @@ -51,7 +51,7 @@ def test_onsets_to_dm(): # Multiple onsets with polynomials auto-added dm = onsets_to_dm([data, data], sampling_freq, run_length, add_poly=2) assert dm.shape == (run_length * 2, data.Stim.nunique() + (3 * 2)) - + dm = onsets_to_dm( [data, data], sampling_freq, run_length, add_poly=2, keep_separate=False ) diff --git a/nltools/tests/test_groupby.py b/nltools/tests/test_groupby.py index dfb83c56..5469e400 100644 --- a/nltools/tests/test_groupby.py +++ b/nltools/tests/test_groupby.py @@ -11,14 +11,14 @@ def test_index(sim_groupby): def test_apply(sim_groupby): - mn = sim_groupby.apply('mean') + mn = sim_groupby.apply("mean") assert len(sim_groupby) == len(mn) assert mn[1].shape() == np.sum(sim_groupby.mask[1].data == 1) - reg = sim_groupby.apply('regress') + reg = sim_groupby.apply("regress") assert len(sim_groupby) == len(mn) def test_combine(sim_groupby): - mn = sim_groupby.apply('mean') + mn = sim_groupby.apply("mean") combine_mn = sim_groupby.combine(mn) assert len(combine_mn.shape()) == 1 diff --git a/nltools/tests/test_mask.py b/nltools/tests/test_mask.py index 524f79b2..bd179ced 100644 --- a/nltools/tests/test_mask.py +++ b/nltools/tests/test_mask.py @@ -19,26 +19,26 @@ def test_roi_to_brain(): s1 = create_sphere([15, 10, -8], radius=10) s2 = create_sphere([-15, 10, -8], radius=10) s3 = create_sphere([0, -15, -8], radius=10) - masks = Brain_Data([s1,s2,s3]) + masks = Brain_Data([s1, s2, s3]) - d = [1,2,3] + d = [1, 2, 3] m = roi_to_brain(d, masks) - assert np.all([np.any(m.data==x) for x in d]) + assert np.all([np.any(m.data == x) for x in d]) d = pd.Series([1.1, 2.1, 3.1]) m = roi_to_brain(d, masks) - assert np.all([np.any(m.data==x) for x in d]) + assert np.all([np.any(m.data == x) for x in d]) d = np.array([1, 2, 3]) m = roi_to_brain(d, masks) - assert np.all([np.any(m.data==x) for x in d]) + assert np.all([np.any(m.data == x) for x in d]) - d = pd.DataFrame([np.ones(10)*x for x in [1, 2, 3]]) + d = pd.DataFrame([np.ones(10) * x for x in [1, 2, 3]]) m = roi_to_brain(d, masks) assert len(m) == d.shape[1] - assert np.all([np.any(m[0].data==x) for x in d[0]]) + assert np.all([np.any(m[0].data == x) for x in d[0]]) - d = np.array([np.ones(10)*x for x in [1, 2, 3]]) + d = np.array([np.ones(10) * x for x in [1, 2, 3]]) m = roi_to_brain(d, masks) assert len(m) == d.shape[1] - assert np.all([np.any(m[0].data==x) for x in d[0]]) \ No newline at end of file + assert np.all([np.any(m[0].data == x) for x in d[0]]) diff --git a/nltools/tests/test_simulator.py b/nltools/tests/test_simulator.py index 363b0de1..f979c855 100644 --- a/nltools/tests/test_simulator.py +++ b/nltools/tests/test_simulator.py @@ -1,6 +1,7 @@ from nltools.simulator import Simulator, SimulateGrid import numpy as np + def test_simulator(tmpdir): sim = Simulator() r = 10 @@ -10,17 +11,20 @@ def test_simulator(tmpdir): output_dir = str(tmpdir) shape = (91, 109, 91) dat = sim.create_data(y, sigma, reps=n_reps, output_dir=None) - assert len(dat) == n_reps*len(y) - assert len(dat.Y) == n_reps*len(y) + assert len(dat) == n_reps * len(y) + assert len(dat.Y) == n_reps * len(y) + def test_simulategrid_fpr(tmpdir): grid_width = 10 n_subjects = 25 n_simulations = 100 - thresh = .05 - bonferroni_threshold = thresh/(grid_width**2) - simulation = SimulateGrid(grid_width=grid_width, n_subjects=n_subjects ) - simulation.plot_grid_simulation(threshold=bonferroni_threshold, threshold_type='p', n_simulations=n_simulations) + thresh = 0.05 + bonferroni_threshold = thresh / (grid_width ** 2) + simulation = SimulateGrid(grid_width=grid_width, n_subjects=n_subjects) + simulation.plot_grid_simulation( + threshold=bonferroni_threshold, threshold_type="p", n_simulations=n_simulations + ) assert simulation.isfit assert simulation.grid_width == grid_width @@ -28,19 +32,30 @@ def test_simulategrid_fpr(tmpdir): assert simulation.thresholded.shape == (grid_width, grid_width) assert simulation.fp_percent <= bonferroni_threshold assert len(simulation.multiple_fp) == n_simulations - assert np.sum(simulation.multiple_fp > 0)/n_simulations <= (thresh + .03) + assert np.sum(simulation.multiple_fp > 0) / n_simulations <= (thresh + 0.03) + def test_simulategrid_fdr(tmpdir): grid_width = 100 n_subjects = 25 n_simulations = 100 - thresh = .05 + thresh = 0.05 signal_amplitude = 1 signal_width = 10 - simulation = SimulateGrid(signal_amplitude=signal_amplitude, signal_width=signal_width, grid_width=grid_width, n_subjects=n_subjects) - simulation.plot_grid_simulation(threshold=thresh, threshold_type='q', n_simulations=n_simulations, correction='fdr') + simulation = SimulateGrid( + signal_amplitude=signal_amplitude, + signal_width=signal_width, + grid_width=grid_width, + n_subjects=n_subjects, + ) + simulation.plot_grid_simulation( + threshold=thresh, + threshold_type="q", + n_simulations=n_simulations, + correction="fdr", + ) assert len(simulation.multiple_fdr) == n_simulations assert np.mean(simulation.multiple_fdr) < thresh assert simulation.signal_width == signal_width - assert simulation.correction == 'fdr' + assert simulation.correction == "fdr" diff --git a/nltools/tests/test_stats.py b/nltools/tests/test_stats.py index 7dae5547..73a213bd 100644 --- a/nltools/tests/test_stats.py +++ b/nltools/tests/test_stats.py @@ -1,20 +1,22 @@ import numpy as np from numpy import sin, pi, arange import pandas as pd -from nltools.stats import (one_sample_permutation, - two_sample_permutation, - correlation_permutation, - matrix_permutation, - downsample, - upsample, - winsorize, - align, - transform_pairwise, - _calc_pvalue, - find_spikes, - isc, - isfc, - isps) +from nltools.stats import ( + one_sample_permutation, + two_sample_permutation, + correlation_permutation, + matrix_permutation, + downsample, + upsample, + winsorize, + align, + transform_pairwise, + _calc_pvalue, + find_spikes, + isc, + isfc, + isps, +) from nltools.simulator import Simulator from nltools.mask import create_sphere from sklearn.metrics import pairwise_distances @@ -24,17 +26,23 @@ def test_permutation(): - dat = np.random.multivariate_normal([2, 6], [[.5, 2], [.5, 3]], 1000) + dat = np.random.multivariate_normal([2, 6], [[0.5, 2], [0.5, 3]], 1000) x = dat[:, 0] y = dat[:, 1] stats = two_sample_permutation(x, y, tail=1, n_permute=1000) - assert (stats['mean'] < -2) & (stats['mean'] > -6) & (stats['p'] < .001) - stats = one_sample_permutation(x-y, tail=1, n_permute=1000) - assert (stats['mean'] < -2) & (stats['mean'] > -6) & (stats['p'] < .001) - for method in ['permute', 'circle_shift', 'phase_randomize']: - for metric in ['spearman', 'kendall', 'pearson']: - stats = correlation_permutation(x, y, metric=metric, method=method, n_permute=500, tail=1) - assert (stats['correlation'] > .4) & (stats['correlation'] < .85) & (stats['p'] < .05) + assert (stats["mean"] < -2) & (stats["mean"] > -6) & (stats["p"] < 0.001) + stats = one_sample_permutation(x - y, tail=1, n_permute=1000) + assert (stats["mean"] < -2) & (stats["mean"] > -6) & (stats["p"] < 0.001) + for method in ["permute", "circle_shift", "phase_randomize"]: + for metric in ["spearman", "kendall", "pearson"]: + stats = correlation_permutation( + x, y, metric=metric, method=method, n_permute=500, tail=1 + ) + assert ( + (stats["correlation"] > 0.4) + & (stats["correlation"] < 0.85) + & (stats["p"] < 0.05) + ) # with pytest.raises(ValueError): # correlation_permutation(x, y, metric='kendall',tail=3) @@ -48,19 +56,33 @@ def test_permutation(): np.testing.assert_almost_equal(two_sided, sum_p, decimal=3) # Test matrix_permutation - dat = np.random.multivariate_normal([2, 6], [[.5, 2], [.5, 3]], 190) + dat = np.random.multivariate_normal([2, 6], [[0.5, 2], [0.5, 3]], 190) x = squareform(dat[:, 0]) y = squareform(dat[:, 1]) stats = matrix_permutation(x, y, n_permute=1000) - assert (stats['correlation'] > .4) & (stats['correlation'] < .85) & (stats['p'] < .001) + assert ( + (stats["correlation"] > 0.4) + & (stats["correlation"] < 0.85) + & (stats["p"] < 0.001) + ) def test_downsample(): dat = pd.DataFrame() - dat['x'] = range(0, 100) - dat['y'] = np.repeat(range(1, 11), 10) - assert((dat.groupby('y').mean().values.ravel() == downsample(data=dat['x'], sampling_freq=10, target=1, target_type='hz', method='mean').values).all) - assert((dat.groupby('y').median().values.ravel() == downsample(data=dat['x'], sampling_freq=10, target=1, target_type='hz', method='median').values).all) + dat["x"] = range(0, 100) + dat["y"] = np.repeat(range(1, 11), 10) + assert ( + dat.groupby("y").mean().values.ravel() + == downsample( + data=dat["x"], sampling_freq=10, target=1, target_type="hz", method="mean" + ).values + ).all + assert ( + dat.groupby("y").median().values.ravel() + == downsample( + data=dat["x"], sampling_freq=10, target=1, target_type="hz", method="median" + ).values + ).all # with pytest.raises(ValueError): # downsample(data=list(dat['x']),sampling_freq=10,target=1,target_type='hz',method='median') # with pytest.raises(ValueError): @@ -71,14 +93,14 @@ def test_downsample(): def test_upsample(): dat = pd.DataFrame() - dat['x'] = range(0, 100) - dat['y'] = np.repeat(range(1, 11), 10) + dat["x"] = range(0, 100) + dat["y"] = np.repeat(range(1, 11), 10) fs = 2 - us = upsample(dat, sampling_freq=1, target=fs, target_type='hz') - assert(dat.shape[0]*fs-fs == us.shape[0]) + us = upsample(dat, sampling_freq=1, target=fs, target_type="hz") + assert dat.shape[0] * fs - fs == us.shape[0] fs = 3 - us = upsample(dat, sampling_freq=1, target=fs, target_type='hz') - assert(dat.shape[0]*fs-fs == us.shape[0]) + us = upsample(dat, sampling_freq=1, target=fs, target_type="hz") + assert dat.shape[0] * fs - fs == us.shape[0] # with pytest.raises(ValueError): # upsample(dat,sampling_freq=1,target=fs,target_type='hz',method='doesnotwork') # with pytest.raises(ValueError): @@ -86,28 +108,117 @@ def test_upsample(): def test_winsorize(): - outlier_test = pd.DataFrame([92, 19, 101, 58, 1053, 91, 26, 78, 10, 13, - -40, 101, 86, 85, 15, 89, 89, 28, -5, 41]) - - out = winsorize(outlier_test, cutoff={'quantile': [0.05, .95]}, - replace_with_cutoff=False).values.squeeze() - correct_result = np.array([92, 19, 101, 58, 101, 91, 26, 78, 10, - 13, -5, 101, 86, 85, 15, 89, 89, 28, - -5, 41]) - assert(np.sum(out == correct_result) == 20) - - out = winsorize(outlier_test, cutoff={'std': [2, 2]}, - replace_with_cutoff=False).values.squeeze() - correct_result = np.array([92, 19, 101, 58, 101, 91, 26, 78, 10, 13, - -40, 101, 86, 85, 15, 89, 89, 28, -5, 41]) - assert(np.sum(out == correct_result) == 20) - - out = winsorize(outlier_test, cutoff={'std': [2, 2]}, - replace_with_cutoff=True).values.squeeze() - correct_result = np.array([92., 19., 101., 58., 556.97961997, 91., 26., - 78., 10., 13., -40., 101., 86., 85., 15., 89., - 89., 28., -5., 41.]) - assert(np.round(np.mean(out)) == np.round(np.mean(correct_result))) + outlier_test = pd.DataFrame( + [ + 92, + 19, + 101, + 58, + 1053, + 91, + 26, + 78, + 10, + 13, + -40, + 101, + 86, + 85, + 15, + 89, + 89, + 28, + -5, + 41, + ] + ) + + out = winsorize( + outlier_test, cutoff={"quantile": [0.05, 0.95]}, replace_with_cutoff=False + ).values.squeeze() + correct_result = np.array( + [ + 92, + 19, + 101, + 58, + 101, + 91, + 26, + 78, + 10, + 13, + -5, + 101, + 86, + 85, + 15, + 89, + 89, + 28, + -5, + 41, + ] + ) + assert np.sum(out == correct_result) == 20 + + out = winsorize( + outlier_test, cutoff={"std": [2, 2]}, replace_with_cutoff=False + ).values.squeeze() + correct_result = np.array( + [ + 92, + 19, + 101, + 58, + 101, + 91, + 26, + 78, + 10, + 13, + -40, + 101, + 86, + 85, + 15, + 89, + 89, + 28, + -5, + 41, + ] + ) + assert np.sum(out == correct_result) == 20 + + out = winsorize( + outlier_test, cutoff={"std": [2, 2]}, replace_with_cutoff=True + ).values.squeeze() + correct_result = np.array( + [ + 92.0, + 19.0, + 101.0, + 58.0, + 556.97961997, + 91.0, + 26.0, + 78.0, + 10.0, + 13.0, + -40.0, + 101.0, + 86.0, + 85.0, + 15.0, + 89.0, + 89.0, + 28.0, + -5.0, + 41.0, + ] + ) + assert np.round(np.mean(out)) == np.round(np.mean(correct_result)) def test_align(): @@ -121,63 +232,87 @@ def test_align(): d3 = sim.create_data(y, 3, reps=n_reps, output_dir=None).apply_mask(s1) data = [d1.data, d2.data, d3.data] - out = align(data, method='deterministic_srm') - assert len(data) == len(out['transformed']) - assert len(data) == len(out['transformation_matrix']) - assert data[0].shape == out['common_model'].shape - transformed = np.dot(data[0], out['transformation_matrix'][0]) - np.testing.assert_almost_equal(np.sum(out['transformed'][0]-transformed.T), 0, decimal=3) - assert len(out['isc']) == out['transformed'][0].shape[0] - - out = align(data, method='probabilistic_srm') - assert len(data) == len(out['transformed']) - assert len(data) == len(out['transformation_matrix']) - assert data[0].shape == out['common_model'].shape - transformed = np.dot(data[0], out['transformation_matrix'][0]) - np.testing.assert_almost_equal(np.sum(out['transformed'][0]-transformed.T), 0, decimal=3) - assert len(out['isc']) == out['transformed'][0].shape[0] - - out2 = align(data, method='procrustes') - assert len(data) == len(out2['transformed']) - assert data[0].shape == out2['common_model'].shape - assert len(data) == len(out2['transformation_matrix']) - assert len(data) == len(out2['disparity']) - centered = data[0]-np.mean(data[0], 0) - transformed = (np.dot(centered/np.linalg.norm(centered), out2['transformation_matrix'][0])*out2['scale'][0]) - np.testing.assert_almost_equal(np.sum(out2['transformed'][0]-transformed.T), 0, decimal=3) - assert out2['transformed'][0].shape == out2['transformed'][0].shape - assert out2['transformation_matrix'][0].shape == out2['transformation_matrix'][0].shape - assert len(out2['isc']) == out['transformed'][0].shape[0] + out = align(data, method="deterministic_srm") + assert len(data) == len(out["transformed"]) + assert len(data) == len(out["transformation_matrix"]) + assert data[0].shape == out["common_model"].shape + transformed = np.dot(data[0], out["transformation_matrix"][0]) + np.testing.assert_almost_equal( + np.sum(out["transformed"][0] - transformed.T), 0, decimal=3 + ) + assert len(out["isc"]) == out["transformed"][0].shape[0] + + out = align(data, method="probabilistic_srm") + assert len(data) == len(out["transformed"]) + assert len(data) == len(out["transformation_matrix"]) + assert data[0].shape == out["common_model"].shape + transformed = np.dot(data[0], out["transformation_matrix"][0]) + np.testing.assert_almost_equal( + np.sum(out["transformed"][0] - transformed.T), 0, decimal=3 + ) + assert len(out["isc"]) == out["transformed"][0].shape[0] + + out2 = align(data, method="procrustes") + assert len(data) == len(out2["transformed"]) + assert data[0].shape == out2["common_model"].shape + assert len(data) == len(out2["transformation_matrix"]) + assert len(data) == len(out2["disparity"]) + centered = data[0] - np.mean(data[0], 0) + transformed = ( + np.dot(centered / np.linalg.norm(centered), out2["transformation_matrix"][0]) + * out2["scale"][0] + ) + np.testing.assert_almost_equal( + np.sum(out2["transformed"][0] - transformed.T), 0, decimal=3 + ) + assert out2["transformed"][0].shape == out2["transformed"][0].shape + assert ( + out2["transformation_matrix"][0].shape == out2["transformation_matrix"][0].shape + ) + assert len(out2["isc"]) == out["transformed"][0].shape[0] # Test hyperalignment on Brain_Data data = [d1, d2, d3] - out = align(data, method='deterministic_srm') - assert len(data) == len(out['transformed']) - assert len(data) == len(out['transformation_matrix']) - assert data[0].shape() == out['common_model'].shape - transformed = np.dot(d1.data, out['transformation_matrix'][0].data.T) - np.testing.assert_almost_equal(np.sum(out['transformed'][0].data-transformed), 0, decimal=3) - assert len(out['isc']) == out['transformed'][0].shape[1] - - out = align(data, method='probabilistic_srm') - assert len(data) == len(out['transformed']) - assert len(data) == len(out['transformation_matrix']) - assert data[0].shape() == out['common_model'].shape - transformed = np.dot(d1.data, out['transformation_matrix'][0].data.T) - np.testing.assert_almost_equal(np.sum(out['transformed'][0].data-transformed), 0, decimal=3) - assert len(out['isc']) == out['transformed'][0].shape[1] - - out2 = align(data, method='procrustes') - assert len(data) == len(out2['transformed']) - assert data[0].shape() == out2['common_model'].shape() - assert len(data) == len(out2['transformation_matrix']) - assert len(data) == len(out2['disparity']) - centered = data[0].data-np.mean(data[0].data, 0) - transformed = (np.dot(centered/np.linalg.norm(centered), out2['transformation_matrix'][0].data)*out2['scale'][0]) - np.testing.assert_almost_equal(np.sum(out2['transformed'][0].data-transformed), 0, decimal=3) - assert out2['transformed'][0].shape() == out2['transformed'][0].shape() - assert out2['transformation_matrix'][0].shape == out2['transformation_matrix'][0].shape - assert len(out2['isc']) == out2['transformed'][0].shape()[1] + out = align(data, method="deterministic_srm") + assert len(data) == len(out["transformed"]) + assert len(data) == len(out["transformation_matrix"]) + assert data[0].shape() == out["common_model"].shape + transformed = np.dot(d1.data, out["transformation_matrix"][0].data.T) + np.testing.assert_almost_equal( + np.sum(out["transformed"][0].data - transformed), 0, decimal=3 + ) + assert len(out["isc"]) == out["transformed"][0].shape[1] + + out = align(data, method="probabilistic_srm") + assert len(data) == len(out["transformed"]) + assert len(data) == len(out["transformation_matrix"]) + assert data[0].shape() == out["common_model"].shape + transformed = np.dot(d1.data, out["transformation_matrix"][0].data.T) + np.testing.assert_almost_equal( + np.sum(out["transformed"][0].data - transformed), 0, decimal=3 + ) + assert len(out["isc"]) == out["transformed"][0].shape[1] + + out2 = align(data, method="procrustes") + assert len(data) == len(out2["transformed"]) + assert data[0].shape() == out2["common_model"].shape() + assert len(data) == len(out2["transformation_matrix"]) + assert len(data) == len(out2["disparity"]) + centered = data[0].data - np.mean(data[0].data, 0) + transformed = ( + np.dot( + centered / np.linalg.norm(centered), out2["transformation_matrix"][0].data + ) + * out2["scale"][0] + ) + np.testing.assert_almost_equal( + np.sum(out2["transformed"][0].data - transformed), 0, decimal=3 + ) + assert out2["transformed"][0].shape() == out2["transformed"][0].shape() + assert ( + out2["transformation_matrix"][0].shape == out2["transformation_matrix"][0].shape + ) + assert len(out2["isc"]) == out2["transformed"][0].shape()[1] # Test hyperalignment on matrix over time (axis=1) sim = Simulator() @@ -189,88 +324,120 @@ def test_align(): d3 = sim.create_data(y, 3, reps=n_reps, output_dir=None).apply_mask(s1) data = [d1.data, d2.data, d3.data] - out = align(data, method='deterministic_srm', axis=1) - assert len(data) == len(out['transformed']) - assert len(data) == len(out['transformation_matrix']) - assert data[0].shape == out['common_model'].shape - transformed = np.dot(data[0].T, out['transformation_matrix'][0].data) - np.testing.assert_almost_equal(np.sum(out['transformed'][0]-transformed), 0, decimal=3) - assert len(out['isc']) == out['transformed'][0].shape[1] - - out = align(data, method='probabilistic_srm', axis=1) - assert len(data) == len(out['transformed']) - assert len(data) == len(out['transformation_matrix']) - assert data[0].shape == out['common_model'].shape - transformed = np.dot(data[0].T, out['transformation_matrix'][0]) - np.testing.assert_almost_equal(np.sum(out['transformed'][0]-transformed), 0, decimal=3) - assert len(out['isc']) == out['transformed'][0].shape[1] - - out2 = align(data, method='procrustes', axis=1) - assert len(data) == len(out2['transformed']) - assert data[0].shape == out2['common_model'].shape - assert len(data) == len(out2['transformation_matrix']) - assert len(data) == len(out2['disparity']) - centered = data[0]-np.mean(data[0], 0) - transformed = (np.dot((centered/np.linalg.norm(centered)).T, out2['transformation_matrix'][0].data)*out2['scale'][0]) - np.testing.assert_almost_equal(np.sum(out2['transformed'][0]-transformed), 0, decimal=3) - assert out2['transformed'][0].shape == out2['transformed'][0].shape - assert out2['transformation_matrix'][0].shape == out2['transformation_matrix'][0].shape - assert len(out2['isc']) == out2['transformed'][0].shape[0] + out = align(data, method="deterministic_srm", axis=1) + assert len(data) == len(out["transformed"]) + assert len(data) == len(out["transformation_matrix"]) + assert data[0].shape == out["common_model"].shape + transformed = np.dot(data[0].T, out["transformation_matrix"][0].data) + np.testing.assert_almost_equal( + np.sum(out["transformed"][0] - transformed), 0, decimal=3 + ) + assert len(out["isc"]) == out["transformed"][0].shape[1] + + out = align(data, method="probabilistic_srm", axis=1) + assert len(data) == len(out["transformed"]) + assert len(data) == len(out["transformation_matrix"]) + assert data[0].shape == out["common_model"].shape + transformed = np.dot(data[0].T, out["transformation_matrix"][0]) + np.testing.assert_almost_equal( + np.sum(out["transformed"][0] - transformed), 0, decimal=3 + ) + assert len(out["isc"]) == out["transformed"][0].shape[1] + + out2 = align(data, method="procrustes", axis=1) + assert len(data) == len(out2["transformed"]) + assert data[0].shape == out2["common_model"].shape + assert len(data) == len(out2["transformation_matrix"]) + assert len(data) == len(out2["disparity"]) + centered = data[0] - np.mean(data[0], 0) + transformed = ( + np.dot( + (centered / np.linalg.norm(centered)).T, + out2["transformation_matrix"][0].data, + ) + * out2["scale"][0] + ) + np.testing.assert_almost_equal( + np.sum(out2["transformed"][0] - transformed), 0, decimal=3 + ) + assert out2["transformed"][0].shape == out2["transformed"][0].shape + assert ( + out2["transformation_matrix"][0].shape == out2["transformation_matrix"][0].shape + ) + assert len(out2["isc"]) == out2["transformed"][0].shape[0] # Test hyperalignment on Brain_Data over time (axis=1) data = [d1, d2, d3] - out = align(data, method='deterministic_srm', axis=1) - assert len(data) == len(out['transformed']) - assert len(data) == len(out['transformation_matrix']) - assert data[0].shape() == out['common_model'].shape - transformed = np.dot(d1.data.T, out['transformation_matrix'][0].data).T - np.testing.assert_almost_equal(np.sum(out['transformed'][0].data-transformed), 0, decimal=5) - assert len(out['isc']) == out['transformed'][0].shape[0] - - out = align(data, method='probabilistic_srm', axis=1) - assert len(data) == len(out['transformed']) - assert len(data) == len(out['transformation_matrix']) - assert data[0].shape() == out['common_model'].shape - transformed = np.dot(d1.data.T, out['transformation_matrix'][0].data).T - np.testing.assert_almost_equal(np.sum(out['transformed'][0].data-transformed), 0, decimal=5) - assert len(out['isc']) == out['transformed'][0].shape[0] - - out2 = align(data, method='procrustes', axis=1) - assert len(data) == len(out2['transformed']) - assert data[0].shape() == out2['common_model'].shape() - assert len(data) == len(out2['transformation_matrix']) - assert len(data) == len(out2['disparity']) - centered = data[0].data.T-np.mean(data[0].data.T, 0) - transformed = (np.dot(centered/np.linalg.norm(centered), out2['transformation_matrix'][0].data)*out2['scale'][0]).T - np.testing.assert_almost_equal(np.sum(out2['transformed'][0].data-transformed), 0, decimal=5) - assert out2['transformed'][0].shape() == out2['transformed'][0].shape() - assert out2['transformation_matrix'][0].shape == out2['transformation_matrix'][0].shape - assert len(out2['isc']) == out2['transformed'][0].shape()[1] + out = align(data, method="deterministic_srm", axis=1) + assert len(data) == len(out["transformed"]) + assert len(data) == len(out["transformation_matrix"]) + assert data[0].shape() == out["common_model"].shape + transformed = np.dot(d1.data.T, out["transformation_matrix"][0].data).T + np.testing.assert_almost_equal( + np.sum(out["transformed"][0].data - transformed), 0, decimal=5 + ) + assert len(out["isc"]) == out["transformed"][0].shape[0] + + out = align(data, method="probabilistic_srm", axis=1) + assert len(data) == len(out["transformed"]) + assert len(data) == len(out["transformation_matrix"]) + assert data[0].shape() == out["common_model"].shape + transformed = np.dot(d1.data.T, out["transformation_matrix"][0].data).T + np.testing.assert_almost_equal( + np.sum(out["transformed"][0].data - transformed), 0, decimal=5 + ) + assert len(out["isc"]) == out["transformed"][0].shape[0] + + out2 = align(data, method="procrustes", axis=1) + assert len(data) == len(out2["transformed"]) + assert data[0].shape() == out2["common_model"].shape() + assert len(data) == len(out2["transformation_matrix"]) + assert len(data) == len(out2["disparity"]) + centered = data[0].data.T - np.mean(data[0].data.T, 0) + transformed = ( + np.dot( + centered / np.linalg.norm(centered), out2["transformation_matrix"][0].data + ) + * out2["scale"][0] + ).T + np.testing.assert_almost_equal( + np.sum(out2["transformed"][0].data - transformed), 0, decimal=5 + ) + assert out2["transformed"][0].shape() == out2["transformed"][0].shape() + assert ( + out2["transformation_matrix"][0].shape == out2["transformation_matrix"][0].shape + ) + assert len(out2["isc"]) == out2["transformed"][0].shape()[1] def test_transform_pairwise(): n_features = 50 n_samples = 100 # Test without groups - new_n_samples = int(n_samples * (n_samples-1) / 2) + new_n_samples = int(n_samples * (n_samples - 1) / 2) X = np.random.rand(n_samples, n_features) - y = np.random.rand(n_samples,) + y = np.random.rand( + n_samples, + ) x_new, y_new = transform_pairwise(X, y) assert x_new.shape == (new_n_samples, n_features) assert y_new.shape == (new_n_samples,) assert y_new.ndim == 1 # Test with groups n_subs = 4 - new_n_samples = int(n_subs * ((n_samples/n_subs)*(n_samples/n_subs-1))/2) - groups = np.repeat(np.arange(1, 1+n_subs), n_samples/n_subs) + new_n_samples = int(n_subs * ((n_samples / n_subs) * (n_samples / n_subs - 1)) / 2) + groups = np.repeat(np.arange(1, 1 + n_subs), n_samples / n_subs) y = np.vstack((y, groups)).T x_new, y_new = transform_pairwise(X, y) assert x_new.shape == (new_n_samples, n_features) assert y_new.shape == (new_n_samples, 2) assert y_new.ndim == 2 - a = y_new[:, 1] == np.repeat(np.arange(1, 1+n_subs), ((n_samples/n_subs)*(n_samples/n_subs-1))/2) + a = y_new[:, 1] == np.repeat( + np.arange(1, 1 + n_subs), ((n_samples / n_subs) * (n_samples / n_subs - 1)) / 2 + ) assert a.all() + def test_find_spikes(): sim = Simulator() y = [0, 1] @@ -286,30 +453,52 @@ def test_find_spikes(): assert isinstance(spikes, pd.DataFrame) assert spikes.shape[0] == len(d1) + def test_isc(): n_boot = 100 - dat = np.random.multivariate_normal([0,0,0,0,0], [[1, .2, .5, .7, .3], - [.2, 1, .6, .1, .2], - [.5, .6, 1, .3, .1], - [.7, .1, .3, 1, .4], - [.3, .2, .1, .4, 1]], 500) - for method in ['bootstrap', 'circle_shift', 'phase_randomize']: - for metric in ['median', 'mean']: - stats = isc(dat, method=method, metric=metric, n_bootstraps=n_boot, return_bootstraps=True) - assert stats['isc'] > .1 - assert (stats['isc'] > -1) & (stats['isc'] < 1) - assert (stats['p'] > 0) & (stats['p'] < 1) - assert len(stats['null_distribution']) == n_boot + dat = np.random.multivariate_normal( + [0, 0, 0, 0, 0], + [ + [1, 0.2, 0.5, 0.7, 0.3], + [0.2, 1, 0.6, 0.1, 0.2], + [0.5, 0.6, 1, 0.3, 0.1], + [0.7, 0.1, 0.3, 1, 0.4], + [0.3, 0.2, 0.1, 0.4, 1], + ], + 500, + ) + for method in ["bootstrap", "circle_shift", "phase_randomize"]: + for metric in ["median", "mean"]: + stats = isc( + dat, + method=method, + metric=metric, + n_bootstraps=n_boot, + return_bootstraps=True, + ) + assert stats["isc"] > 0.1 + assert (stats["isc"] > -1) & (stats["isc"] < 1) + assert (stats["p"] > 0) & (stats["p"] < 1) + assert len(stats["null_distribution"]) == n_boot + def test_isfc(): def simulate_sub_roi_data(n_sub, n_tr): sub_dat = [] for i in range(n_sub): - sub_dat.append(np.random.multivariate_normal([0,0,0,0,0], [[1, .2, .5, .7, .3], - [.2, 1, .6, .1, .2], - [.5, .6, 1, .3, .1], - [.7, .1, .3, 1, .4], - [.3, .2, .1, .4, 1]], n_tr)) + sub_dat.append( + np.random.multivariate_normal( + [0, 0, 0, 0, 0], + [ + [1, 0.2, 0.5, 0.7, 0.3], + [0.2, 1, 0.6, 0.1, 0.2], + [0.5, 0.6, 1, 0.3, 0.1], + [0.7, 0.1, 0.3, 1, 0.4], + [0.3, 0.2, 0.1, 0.4, 1], + ], + n_tr, + ) + ) return sub_dat n_sub = 10 @@ -317,24 +506,31 @@ def simulate_sub_roi_data(n_sub, n_tr): isfc_out = isfc(sub_dat) isfc_mean = np.array(isfc_out).mean(axis=0) assert len(isfc_out) == n_sub - assert isfc_mean.shape == (5,5) + assert isfc_mean.shape == (5, 5) np.testing.assert_almost_equal(np.array(isfc_out).mean(axis=0).mean(), 0, decimal=1) - + + def test_isps(): - sampling_freq = .5 + sampling_freq = 0.5 time = arange(0, 200, 1) amplitude = 5 - freq = .1 + freq = 0.1 theta = 0 n_sub = 15 simulation = amplitude * sin(2 * pi * freq * time + theta) simulation = np.array([simulation] * n_sub).T - simulation += np.random.randn(simulation.shape[0], simulation.shape[1])*2 - simulation[50:150,:] = np.random.randn(100, simulation.shape[1])*5 - stats = isps(simulation, low_cut=.05, high_cut=.2, sampling_freq=sampling_freq) - - assert stats['average_angle'].shape == time.shape - assert stats['vector_length'].shape == time.shape - assert stats['p'].shape == time.shape - assert stats['p'][50:150].mean() > (np.mean([stats['p'][:50].mean(), stats['p'][150:].mean()])) - assert stats['vector_length'][50:150].mean() < (np.mean([stats['vector_length'][:50].mean(), stats['vector_length'][150:].mean()])) + simulation += np.random.randn(simulation.shape[0], simulation.shape[1]) * 2 + simulation[50:150, :] = np.random.randn(100, simulation.shape[1]) * 5 + stats = isps(simulation, low_cut=0.05, high_cut=0.2, sampling_freq=sampling_freq) + + assert stats["average_angle"].shape == time.shape + assert stats["vector_length"].shape == time.shape + assert stats["p"].shape == time.shape + assert stats["p"][50:150].mean() > ( + np.mean([stats["p"][:50].mean(), stats["p"][150:].mean()]) + ) + assert stats["vector_length"][50:150].mean() < ( + np.mean( + [stats["vector_length"][:50].mean(), stats["vector_length"][150:].mean()] + ) + ) diff --git a/nltools/tests/test_utils.py b/nltools/tests/test_utils.py index 6ece6c53..c12bb0f5 100644 --- a/nltools/tests/test_utils.py +++ b/nltools/tests/test_utils.py @@ -1,17 +1,18 @@ - from nltools.utils import check_brain_data, check_brain_data_is_single from nltools.mask import create_sphere from nltools.data import Brain_Data import numpy as np + def test_check_brain_data(sim_brain_data): mask = Brain_Data(create_sphere([15, 10, -8], radius=10)) a = check_brain_data(sim_brain_data) assert isinstance(a, Brain_Data) b = check_brain_data(sim_brain_data, mask=mask) assert isinstance(b, Brain_Data) - assert b.shape()[1] == np.sum(mask.data==1) + assert b.shape()[1] == np.sum(mask.data == 1) + def test_check_brain_data_is_single(sim_brain_data): assert not check_brain_data_is_single(sim_brain_data) - assert check_brain_data_is_single(sim_brain_data[0]) \ No newline at end of file + assert check_brain_data_is_single(sim_brain_data[0]) diff --git a/nltools/utils.py b/nltools/utils.py index 899d15d6..ae3365e9 100644 --- a/nltools/utils.py +++ b/nltools/utils.py @@ -1,19 +1,20 @@ -''' +""" NeuroLearn Utilities ==================== handy utilities. -''' -__all__ = ['get_resource_path', - 'get_anatomical', - 'set_algorithm', - 'attempt_to_import', - 'all_same', - 'concatenate', - '_bootstrap_apply_func', - 'set_decomposition_algorithm' - ] +""" +__all__ = [ + "get_resource_path", + "get_anatomical", + "set_algorithm", + "attempt_to_import", + "all_same", + "concatenate", + "_bootstrap_apply_func", + "set_decomposition_algorithm", +] __author__ = ["Luke Chang"] __license__ = "MIT" @@ -29,11 +30,10 @@ import collections from types import GeneratorType - + def _df_meta_to_arr(df): - """Check what kind of data exists in pandas columns or index. If string return as numpy array 'S' type, otherwise regular numpy array. Used when saving Brain_Data objects to hdf5. - """ - + """Check what kind of data exists in pandas columns or index. If string return as numpy array 'S' type, otherwise regular numpy array. Used when saving Brain_Data objects to hdf5.""" + if len(df.columns): if isinstance(df.columns[0], str): columns = df.columns.values.astype("S") @@ -41,7 +41,7 @@ def _df_meta_to_arr(df): columns = df.columns.values else: columns = [] - + if len(df.index): if isinstance(df.index[0], str): index = df.index.values.astype("S") @@ -55,45 +55,47 @@ def _df_meta_to_arr(df): def get_resource_path(): """ Get path to nltools resource directory. """ - return join(dirname(__file__), 'resources') + pathsep + return join(dirname(__file__), "resources") + pathsep def get_anatomical(): - """ Get nltools default anatomical image. - DEPRECATED. See MNI_Template and resolve_mni_path from nltools.prefs + """Get nltools default anatomical image. + DEPRECATED. See MNI_Template and resolve_mni_path from nltools.prefs """ - return nib.load(os.path.join(get_resource_path(), 'MNI152_T1_2mm.nii.gz')) + return nib.load(os.path.join(get_resource_path(), "MNI152_T1_2mm.nii.gz")) -def get_mni_from_img_resolution(brain, img_type='plot'): +def get_mni_from_img_resolution(brain, img_type="plot"): """ Get the path to the resolution MNI anatomical image that matches the resolution of a Brain_Data instance. Used by Brain_Data.plot() and .iplot() to set backgrounds appropriately. - + Args: brain: Brain_Data instance - + Returns: file_path: path to MNI image """ - - if img_type not in ['plot', 'brain']: + + if img_type not in ["plot", "brain"]: raise ValueError("img_type must be 'plot' or 'brain' ") - + res_array = np.abs(np.diag(brain.nifti_masker.affine_)[:3]) voxel_dims = np.unique(abs(res_array)) if len(voxel_dims) != 1: - raise ValueError("Voxels are not isometric and cannot be visualized in standard space") + raise ValueError( + "Voxels are not isometric and cannot be visualized in standard space" + ) else: - dim = str(int(voxel_dims[0])) + 'mm' - if img_type == 'brain': - mni = f'MNI152_T1_{dim}_brain.nii.gz' + dim = str(int(voxel_dims[0])) + "mm" + if img_type == "brain": + mni = f"MNI152_T1_{dim}_brain.nii.gz" else: - mni = f'MNI152_T1_{dim}.nii.gz' + mni = f"MNI152_T1_{dim}.nii.gz" return os.path.join(get_resource_path(), mni) def set_algorithm(algorithm, *args, **kwargs): - """ Setup the algorithm to use in subsequent prediction analyses. + """Setup the algorithm to use in subsequent prediction analyses. Args: algorithm: The prediction algorithm to use. Either a string or an @@ -112,69 +114,79 @@ def set_algorithm(algorithm, *args, **kwargs): # NOTE: function currently located here instead of analysis.py to avoid circular imports predictor_settings = {} - predictor_settings['algorithm'] = algorithm + predictor_settings["algorithm"] = algorithm def load_class(import_string): class_data = import_string.split(".") - module_path = '.'.join(class_data[:-1]) + module_path = ".".join(class_data[:-1]) class_str = class_data[-1] module = importlib.import_module(module_path) return getattr(module, class_str) algs_classify = { - 'svm': 'sklearn.svm.SVC', - 'logistic': 'sklearn.linear_model.LogisticRegression', - 'ridgeClassifier': 'sklearn.linear_model.RidgeClassifier', - 'ridgeClassifierCV': 'sklearn.linear_model.RidgeClassifierCV', - 'randomforestClassifier': 'sklearn.ensemble.RandomForestClassifier' - } + "svm": "sklearn.svm.SVC", + "logistic": "sklearn.linear_model.LogisticRegression", + "ridgeClassifier": "sklearn.linear_model.RidgeClassifier", + "ridgeClassifierCV": "sklearn.linear_model.RidgeClassifierCV", + "randomforestClassifier": "sklearn.ensemble.RandomForestClassifier", + } algs_predict = { - 'svr': 'sklearn.svm.SVR', - 'linear': 'sklearn.linear_model.LinearRegression', - 'lasso': 'sklearn.linear_model.Lasso', - 'lassoCV': 'sklearn.linear_model.LassoCV', - 'ridge': 'sklearn.linear_model.Ridge', - 'ridgeCV': 'sklearn.linear_model.RidgeCV', - 'randomforest': 'sklearn.ensemble.RandomForest' - } + "svr": "sklearn.svm.SVR", + "linear": "sklearn.linear_model.LinearRegression", + "lasso": "sklearn.linear_model.Lasso", + "lassoCV": "sklearn.linear_model.LassoCV", + "ridge": "sklearn.linear_model.Ridge", + "ridgeCV": "sklearn.linear_model.RidgeCV", + "randomforest": "sklearn.ensemble.RandomForest", + } if algorithm in algs_classify.keys(): - predictor_settings['prediction_type'] = 'classification' + predictor_settings["prediction_type"] = "classification" alg = load_class(algs_classify[algorithm]) - predictor_settings['predictor'] = alg(*args, **kwargs) + predictor_settings["predictor"] = alg(*args, **kwargs) elif algorithm in algs_predict: - predictor_settings['prediction_type'] = 'prediction' + predictor_settings["prediction_type"] = "prediction" alg = load_class(algs_predict[algorithm]) - predictor_settings['predictor'] = alg(*args, **kwargs) - elif algorithm == 'lassopcr': - predictor_settings['prediction_type'] = 'prediction' + predictor_settings["predictor"] = alg(*args, **kwargs) + elif algorithm == "lassopcr": + predictor_settings["prediction_type"] = "prediction" from sklearn.linear_model import Lasso from sklearn.decomposition import PCA - predictor_settings['_lasso'] = Lasso() - predictor_settings['_pca'] = PCA() - predictor_settings['predictor'] = Pipeline( - steps=[('pca', predictor_settings['_pca']), - ('lasso', predictor_settings['_lasso'])]) - elif algorithm == 'pcr': - predictor_settings['prediction_type'] = 'prediction' + + predictor_settings["_lasso"] = Lasso() + predictor_settings["_pca"] = PCA() + predictor_settings["predictor"] = Pipeline( + steps=[ + ("pca", predictor_settings["_pca"]), + ("lasso", predictor_settings["_lasso"]), + ] + ) + elif algorithm == "pcr": + predictor_settings["prediction_type"] = "prediction" from sklearn.linear_model import LinearRegression from sklearn.decomposition import PCA - predictor_settings['_regress'] = LinearRegression() - predictor_settings['_pca'] = PCA() - predictor_settings['predictor'] = Pipeline( - steps=[('pca', predictor_settings['_pca']), - ('regress', predictor_settings['_regress'])]) + + predictor_settings["_regress"] = LinearRegression() + predictor_settings["_pca"] = PCA() + predictor_settings["predictor"] = Pipeline( + steps=[ + ("pca", predictor_settings["_pca"]), + ("regress", predictor_settings["_regress"]), + ] + ) else: - raise ValueError("""Invalid prediction/classification algorithm name. + raise ValueError( + """Invalid prediction/classification algorithm name. Valid options are 'svm','svr', 'linear', 'logistic', 'lasso', 'lassopcr','lassoCV','ridge','ridgeCV','ridgeClassifier', - 'randomforest', or 'randomforestClassifier'.""") + 'randomforest', or 'randomforestClassifier'.""" + ) return predictor_settings def set_decomposition_algorithm(algorithm, n_components=None, *args, **kwargs): - """ Setup the algorithm to use in subsequent decomposition analyses. + """Setup the algorithm to use in subsequent decomposition analyses. Args: algorithm: The decomposition algorithm to use. Either a string or an @@ -193,35 +205,38 @@ def set_decomposition_algorithm(algorithm, n_components=None, *args, **kwargs): def load_class(import_string): class_data = import_string.split(".") - module_path = '.'.join(class_data[:-1]) + module_path = ".".join(class_data[:-1]) class_str = class_data[-1] module = importlib.import_module(module_path) return getattr(module, class_str) algs = { - 'pca': 'sklearn.decomposition.PCA', - 'ica': 'sklearn.decomposition.FastICA', - 'nnmf': 'sklearn.decomposition.NMF', - 'fa': 'sklearn.decomposition.FactorAnalysis', - 'dictionary': 'sklearn.decomposition.DictionaryLearning', - 'kernelpca': 'sklearn.decomposition.KernelPCA'} + "pca": "sklearn.decomposition.PCA", + "ica": "sklearn.decomposition.FastICA", + "nnmf": "sklearn.decomposition.NMF", + "fa": "sklearn.decomposition.FactorAnalysis", + "dictionary": "sklearn.decomposition.DictionaryLearning", + "kernelpca": "sklearn.decomposition.KernelPCA", + } if algorithm in algs.keys(): alg = load_class(algs[algorithm]) alg = alg(n_components, *args, **kwargs) else: - raise ValueError("""Invalid prediction/classification algorithm name. - Valid options are 'pca','ica', 'nnmf', 'fa'""") + raise ValueError( + """Invalid prediction/classification algorithm name. + Valid options are 'pca','ica', 'nnmf', 'fa'""" + ) return alg def isiterable(obj): - ''' Returns True if the object is one of allowable iterable types. ''' + """ Returns True if the object is one of allowable iterable types. """ return isinstance(obj, (list, tuple, GeneratorType)) module_names = {} -Dependency = collections.namedtuple('Dependency', 'package value') +Dependency = collections.namedtuple("Dependency", "package value") def attempt_to_import(dependency, name=None, fromlist=None): @@ -240,10 +255,10 @@ def all_same(items): def concatenate(data): - '''Concatenate a list of Brain_Data() or Adjacency() objects''' + """Concatenate a list of Brain_Data() or Adjacency() objects""" if not isinstance(data, list): - raise ValueError('Make sure you are passing a list of objects.') + raise ValueError("Make sure you are passing a list of objects.") if all([isinstance(x, data[0].__class__) for x in data]): # Temporarily Removing this for circular imports (LC) @@ -255,22 +270,22 @@ def concatenate(data): for i in data: out = out.append(i) else: - raise ValueError('Make sure all objects in the list are the same type.') + raise ValueError("Make sure all objects in the list are the same type.") return out def _bootstrap_apply_func(data, function, random_state=None, *args, **kwargs): - '''Bootstrap helper function. Sample with replacement and apply function''' + """Bootstrap helper function. Sample with replacement and apply function""" random_state = check_random_state(random_state) data_row_id = range(data.shape()[0]) - new_dat = data[random_state.choice(data_row_id, - size=len(data_row_id), - replace=True)] + new_dat = data[ + random_state.choice(data_row_id, size=len(data_row_id), replace=True) + ] return getattr(new_dat, function)(*args, **kwargs) def check_square_numpy_matrix(data): - '''Helper function to make sure matrix is square and numpy array''' + """Helper function to make sure matrix is square and numpy array""" from nltools.data import Adjacency @@ -285,12 +300,14 @@ def check_square_numpy_matrix(data): try: data = squareform(data) except ValueError: - raise ValueError("Array does not contain the correct number of elements to be square") + raise ValueError( + "Array does not contain the correct number of elements to be square" + ) return data def check_brain_data(data, mask=None): - '''Check if data is a Brain_Data Instance.''' + """Check if data is a Brain_Data Instance.""" from nltools.data import Brain_Data if not isinstance(data, Brain_Data): @@ -303,32 +320,37 @@ def check_brain_data(data, mask=None): data = data.apply_mask(mask) return data + def check_brain_data_is_single(data): - '''Logical test if Brain_Data instance is a single image - + """Logical test if Brain_Data instance is a single image + Args: data: brain data - + Returns: (bool) - - ''' + + """ data = check_brain_data(data) if len(data.shape()) > 1: return False else: return True + def _roi_func(brain, roi, algorithm, cv_dict, **kwargs): - '''Brain_Data.predict_multi() helper function''' - return brain.apply_mask(roi).predict(algorithm=algorithm, cv_dict=cv_dict, plot=False, **kwargs) + """Brain_Data.predict_multi() helper function""" + return brain.apply_mask(roi).predict( + algorithm=algorithm, cv_dict=cv_dict, plot=False, **kwargs + ) class AmbiguityError(Exception): pass -def generate_jitter(n_trials, mean_time=5, min_time=2, max_time=12, atol=.2): - '''Generate jitter from exponential distribution with constraints + +def generate_jitter(n_trials, mean_time=5, min_time=2, max_time=12, atol=0.2): + """Generate jitter from exponential distribution with constraints Draws from exponential distribution until the distribution satisfies the constraints: np.abs(np.mean(min_time > data < max_time) - mean_time) <= atol @@ -342,17 +364,17 @@ def generate_jitter(n_trials, mean_time=5, min_time=2, max_time=12, atol=.2): Returns: data: (np.array) jitter for each trial - - ''' + + """ def generate_data(n_trials, scale=5, min_time=2, max_time=12): data = [] - i=0 + i = 0 while i < n_trials: datam = np.random.exponential(scale=5) if (datam > min_time) & (datam < max_time): data.append(datam) - i+=1 + i += 1 return data mean_diff = False diff --git a/nltools/version.py b/nltools/version.py index 548f38dc..3f214748 100644 --- a/nltools/version.py +++ b/nltools/version.py @@ -1,4 +1,4 @@ """Specifies current version of nltools to be used by setup.py and __init__.py """ -__version__ = '0.4.2' +__version__ = "0.4.2" diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 00000000..b39a47fa --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,6 @@ +pytest +black +sphinx +sphinx_gallery +sphinx_bootstrap_theme + From 88ab4111a884ba8f624b0405162bb56b89407b2c Mon Sep 17 00:00:00 2001 From: ejolly Date: Mon, 22 Mar 2021 16:44:53 -0400 Subject: [PATCH 02/20] edit ga to run on PRs. --- .github/workflows/conda_coveralls_ci.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/conda_coveralls_ci.yml b/.github/workflows/conda_coveralls_ci.yml index 691602f1..d8735e8d 100644 --- a/.github/workflows/conda_coveralls_ci.yml +++ b/.github/workflows/conda_coveralls_ci.yml @@ -10,7 +10,6 @@ on: branches: - main - master - jobs: # Job (1): Run testing in parallel against multiples OSs and Python versions test: From 7caa7caa97f09ca34b7e50efb7f27e4c3d1aa16f Mon Sep 17 00:00:00 2001 From: ejolly Date: Mon, 22 Mar 2021 16:46:29 -0400 Subject: [PATCH 03/20] Remove travis and tox. Fix spacing in ga ci to get rid of conflict --- .github/workflows/conda_coveralls_ci.yml | 1 + .travis.yml | 36 ------------------------ tox.ini | 8 ------ 3 files changed, 1 insertion(+), 44 deletions(-) delete mode 100644 .travis.yml delete mode 100644 tox.ini diff --git a/.github/workflows/conda_coveralls_ci.yml b/.github/workflows/conda_coveralls_ci.yml index d8735e8d..691602f1 100644 --- a/.github/workflows/conda_coveralls_ci.yml +++ b/.github/workflows/conda_coveralls_ci.yml @@ -10,6 +10,7 @@ on: branches: - main - master + jobs: # Job (1): Run testing in parallel against multiples OSs and Python versions test: diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 7e76b1f5..00000000 --- a/.travis.yml +++ /dev/null @@ -1,36 +0,0 @@ -language: python -sudo: false -python: -- '3.6' -- '3.7' -- '3.8' -services: - - xvfb # https://benlimmer.com/2019/01/14/travis-ci-xvfb/ - -before_script: -- "export DISPLAY=:99.0" # https://docs.travis-ci.com/user/gui-and-headless-browsers -- sleep 3 - -install: -- wget http://repo.continuum.io/miniconda/Miniconda-latest-Linux-x86_64.sh -O miniconda.sh -- bash miniconda.sh -b -p $HOME/miniconda -- export PATH="$HOME/miniconda/bin:$PATH" -- hash -r -- conda config --set always_yes yes --set changeps1 no -- conda update -q conda -- conda info -a -- conda create -q -n testenv python=$TRAVIS_PYTHON_VERSION pip pytest numpy pandas - scipy matplotlib scikit-learn -- source activate testenv -- pip install --upgrade python-coveralls pytest-cov codecov -- pip install -r requirements.txt --upgrade -- pip install -r optional-dependencies.txt --upgrade -- python setup.py install -- cp nltools/tests/matplotlibrc . - -script: -- coverage run --source nltools -m py.test - -after_success: -- coveralls -- codecov \ No newline at end of file diff --git a/tox.ini b/tox.ini deleted file mode 100644 index 1902ec2b..00000000 --- a/tox.ini +++ /dev/null @@ -1,8 +0,0 @@ -[tox] -envlist = py27,py36 - -[testenv] -deps= - pytest - -roptional-dependencies.txt -commands=pytest From 1cd3f379126067e97413571337269966f98f5998 Mon Sep 17 00:00:00 2001 From: ejolly Date: Mon, 22 Mar 2021 16:49:45 -0400 Subject: [PATCH 04/20] Add sphinx-napoleaon to dev reqs --- requirements-dev.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements-dev.txt b/requirements-dev.txt index b39a47fa..7b87f739 100644 --- a/requirements-dev.txt +++ b/requirements-dev.txt @@ -3,4 +3,5 @@ black sphinx sphinx_gallery sphinx_bootstrap_theme +sphinxcontrib-napoleon From c2f04d4d6e6ee482b722a35eb6e3a3def245490d Mon Sep 17 00:00:00 2001 From: ejolly Date: Mon, 22 Mar 2021 16:50:15 -0400 Subject: [PATCH 05/20] Format setup.py --- setup.py | 48 ++++++++++++++++++++++-------------------------- 1 file changed, 22 insertions(+), 26 deletions(-) diff --git a/setup.py b/setup.py index 45692a99..d37561ae 100644 --- a/setup.py +++ b/setup.py @@ -4,41 +4,37 @@ with open("nltools/version.py") as f: exec(f.read(), version) -with open('requirements.txt') as f: +with open("requirements.txt") as f: requirements = f.read().splitlines() -extra_setuptools_args = dict( - tests_require=['pytest'] -) +extra_setuptools_args = dict(tests_require=["pytest"]) setup( - name = 'nltools', - version = version['__version__'], - author = 'Cosan Lab', - author_email = 'luke.j.chang@dartmouth.edu', - url = 'http://neurolearn.readthedocs.org/en/latest/', - python_requires = '>=3.6', - install_requires = requirements, - extras_require = { - 'interactive_plots':['ipywidgets>=5.2.2'] - }, - packages = find_packages(exclude=['nltools/tests']), - package_data = {'nltools': ['resources/*']}, - include_package_data = True, - license = 'LICENSE.txt', - description = 'A Python package to analyze neuroimaging data', - long_description = 'nltools is a collection of python tools to perform ' - 'preprocessing, univariate GLMs, and predictive ' - 'multivariate modeling of neuroimaging data. It is the ' - 'analysis engine powering www.neuro-learn.org.', - keywords = ['neuroimaging', 'preprocessing', 'analysis','machine-learning'], - classifiers = [ + name="nltools", + version=version["__version__"], + author="Cosan Lab", + author_email="luke.j.chang@dartmouth.edu", + url="http://neurolearn.readthedocs.org/en/latest/", + python_requires=">=3.6", + install_requires=requirements, + extras_require={"interactive_plots": ["ipywidgets>=5.2.2"]}, + packages=find_packages(exclude=["nltools/tests"]), + package_data={"nltools": ["resources/*"]}, + include_package_data=True, + license="LICENSE.txt", + description="A Python package to analyze neuroimaging data", + long_description="nltools is a collection of python tools to perform " + "preprocessing, univariate GLMs, and predictive " + "multivariate modeling of neuroimaging data. It is the " + "analysis engine powering www.neuro-learn.org.", + keywords=["neuroimaging", "preprocessing", "analysis", "machine-learning"], + classifiers=[ "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Operating System :: OS Independent", "Intended Audience :: Science/Research", - "License :: OSI Approved :: MIT License" + "License :: OSI Approved :: MIT License", ], **extra_setuptools_args ) From 0cb8c95d37f3464362bd840f29819c0c2ceaebf1 Mon Sep 17 00:00:00 2001 From: ejolly Date: Mon, 22 Mar 2021 16:57:41 -0400 Subject: [PATCH 06/20] Remove all traces of python 2 and six dep. --- README.md | 1 - docs/install.rst | 1 - nltools/__init__.py | 2 -- nltools/analysis.py | 2 -- nltools/cross_validation.py | 2 -- nltools/data/adjacency.py | 15 +++++---------- nltools/data/brain_data.py | 27 ++++++++++++--------------- nltools/data/design_matrix.py | 5 +---- nltools/external/srm.py | 2 -- nltools/file_reader.py | 7 +++---- nltools/mask.py | 3 +-- nltools/prefs.py | 7 +++---- nltools/simulator.py | 7 +++---- nltools/stats.py | 7 ++----- nltools/tests/test_analysis.py | 1 - nltools/version.py | 2 +- requirements.txt | 1 - 17 files changed, 31 insertions(+), 61 deletions(-) diff --git a/README.md b/README.md index c6ab8e79..de5df652 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,6 @@ nltools requires several dependencies. All are available in pypi. Can use `pip - seaborn>=0.7.0 - matplotlib>=2.1 - scipy - - six - pynv - joblib diff --git a/docs/install.rst b/docs/install.rst index df2e2702..affd8b2b 100644 --- a/docs/install.rst +++ b/docs/install.rst @@ -35,7 +35,6 @@ nltools requires several dependencies. All are available in pypi. Can use *pip - seaborn>=0.7.0 - matplotlib>=2.2.0 - scipy - - six - pynv - joblib - deepdish>=0.3.6 diff --git a/nltools/__init__.py b/nltools/__init__.py index bc327eaa..4be0a1b7 100644 --- a/nltools/__init__.py +++ b/nltools/__init__.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import - __all__ = [ "data", "datasets", diff --git a/nltools/analysis.py b/nltools/analysis.py index 5adc8cb3..1df2e51b 100644 --- a/nltools/analysis.py +++ b/nltools/analysis.py @@ -1,5 +1,3 @@ -from __future__ import division - """ NeuroLearn Analysis Tools ========================= diff --git a/nltools/cross_validation.py b/nltools/cross_validation.py index 5952498d..c03bf09c 100644 --- a/nltools/cross_validation.py +++ b/nltools/cross_validation.py @@ -1,5 +1,3 @@ -from __future__ import division - """ Cross-Validation Data Classes ============================= diff --git a/nltools/data/adjacency.py b/nltools/data/adjacency.py index 1f9c76f7..1583cf45 100644 --- a/nltools/data/adjacency.py +++ b/nltools/data/adjacency.py @@ -1,5 +1,3 @@ -from __future__ import division - """ This data class is for working with similarity/dissimilarity matrices """ @@ -10,7 +8,6 @@ import os import pandas as pd import numpy as np -import six import deepdish as dd from copy import deepcopy from sklearn.metrics.pairwise import pairwise_distances @@ -115,9 +112,7 @@ def __init__(self, data=None, Y=None, matrix_type=None, labels=[], **kwargs): self.issymmetric = symmetric_all[0] self.matrix_type = matrix_type_all[0] self.is_single_matrix = False - elif isinstance(data, six.string_types) and ( - (".h5" in data) or (".hdf5" in data) - ): + elif isinstance(data, str) and ((".h5" in data) or (".hdf5" in data)): f = dd.io.load(data) self.data = f["data"] self.Y = pd.DataFrame( @@ -147,7 +142,7 @@ def __init__(self, data=None, Y=None, matrix_type=None, labels=[], **kwargs): ) = self._import_single_data(data, matrix_type=matrix_type) if Y is not None: - if isinstance(Y, six.string_types) and os.path.isfile(Y): + if isinstance(Y, str) and os.path.isfile(Y): Y = pd.read_csv(Y, header=None, index_col=None) if isinstance(Y, pd.DataFrame): if self.data.shape[0] != len(Y): @@ -330,7 +325,7 @@ def _test_is_single_matrix(data): def _import_single_data(self, data, matrix_type=None): """ Helper function to import single data matrix.""" - if isinstance(data, six.string_types): + if isinstance(data, str): if os.path.isfile(data): data = pd.read_csv(data) else: @@ -776,9 +771,9 @@ def threshold(self, upper=None, lower=None, binarize=False): """ b = self.copy() - if isinstance(upper, six.string_types) and upper[-1] == "%": + if isinstance(upper, str) and upper[-1] == "%": upper = np.percentile(b.data, float(upper[:-1])) - if isinstance(lower, six.string_types) and lower[-1] == "%": + if isinstance(lower, str) and lower[-1] == "%": lower = np.percentile(b.data, float(lower[:-1])) if upper and lower: diff --git a/nltools/data/brain_data.py b/nltools/data/brain_data.py index 51bbbff9..5a640f32 100644 --- a/nltools/data/brain_data.py +++ b/nltools/data/brain_data.py @@ -1,5 +1,3 @@ -from __future__ import division - """ NeuroLearn Brain Data ===================== @@ -28,7 +26,6 @@ import warnings import tempfile from copy import deepcopy -import six from sklearn.metrics import balanced_accuracy_score from sklearn.metrics.pairwise import pairwise_distances, cosine_similarity from sklearn.utils import check_random_state @@ -108,7 +105,7 @@ def __init__( ): if mask is not None: if not isinstance(mask, nib.Nifti1Image): - if isinstance(mask, six.string_types): + if isinstance(mask, str): if os.path.isfile(mask): mask = nib.load(mask) else: @@ -121,7 +118,7 @@ def __init__( self.nifti_masker = NiftiMasker(mask_img=self.mask) if data is not None: - if isinstance(data, six.string_types): + if isinstance(data, str): if "http://" in data or "https://" in data: from nltools.datasets import download_nifti @@ -177,7 +174,7 @@ def __init__( if all(isinstance(x, data[0].__class__) for x in data): self.data = [] for i in data: - if isinstance(i, six.string_types): + if isinstance(i, str): self.data.append( self.nifti_masker.fit_transform(nib.load(i)) ) @@ -200,7 +197,7 @@ def __init__( self.data = np.array([]) if Y is not None: - if isinstance(Y, six.string_types) and os.path.isfile(Y): + if isinstance(Y, str) and os.path.isfile(Y): Y = pd.read_csv(Y, header=None, index_col=None) if isinstance(Y, pd.DataFrame): if self.data.shape[0] != len(Y): @@ -212,7 +209,7 @@ def __init__( self.Y = pd.DataFrame() if X is not None: - if isinstance(X, six.string_types) and os.path.isfile(X): + if isinstance(X, str) and os.path.isfile(X): X = pd.read_csv(X, header=None, index_col=None) if isinstance(X, pd.DataFrame): if self.data.shape[0] != X.shape[0]: @@ -570,7 +567,7 @@ def plot( print("threshold is ignored for simple axial plots") if anatomical is not None: if not isinstance(anatomical, nib.Nifti1Image): - if isinstance(anatomical, six.string_types): + if isinstance(anatomical, str): anatomical = nib.load(anatomical) else: raise ValueError("anatomical is not a nibabel instance") @@ -645,7 +642,7 @@ def iplot(self, threshold=0, surface=False, anatomical=None, **kwargs): """ if anatomical is not None: if not isinstance(anatomical, nib.Nifti1Image): - if isinstance(anatomical, six.string_types): + if isinstance(anatomical, str): anatomical = nib.load(anatomical) else: raise ValueError("anatomical is not a nibabel instance") @@ -1437,7 +1434,7 @@ def predict_multi( groups = None if method == "rois": - if isinstance(rois, six.string_types): + if isinstance(rois, str): if os.path.isfile(rois): rois_img = Brain_Data(rois, mask=self.mask) elif isinstance(rois, Brain_Data): @@ -1464,7 +1461,7 @@ def predict_multi( process_mask_img = process_mask elif isinstance(process_mask, Brain_Data): process_mask_img = process_mask.to_nifti() - elif isinstance(process_mask, six.string_types): + elif isinstance(process_mask, str): if os.path.isfile(process_mask): process_mask_img = nib.load(process_mask) else: @@ -1763,7 +1760,7 @@ def add_image_to_collection( """ if (len(dat.shape()) > 1) & (dat.shape()[0] > 1): raise ValueError('"dat" must be a single image.') - if not dat.X.empty and isinstance(dat.X.name, six.string_types): + if not dat.X.empty and isinstance(dat.X.name, str): img_name = dat.X.name else: img_name = collection["name"] + "_" + str(index_id) + ".nii.gz" @@ -1917,10 +1914,10 @@ def threshold(self, upper=None, lower=None, binarize=False, coerce_nan=True): if coerce_nan: b.data = np.nan_to_num(b.data) - if isinstance(upper, six.string_types) and upper[-1] == "%": + if isinstance(upper, str) and upper[-1] == "%": upper = np.percentile(b.data, float(upper[:-1])) - if isinstance(lower, six.string_types) and lower[-1] == "%": + if isinstance(lower, str) and lower[-1] == "%": lower = np.percentile(b.data, float(lower[:-1])) if upper and lower: diff --git a/nltools/data/design_matrix.py b/nltools/data/design_matrix.py index a9ecf5d0..0d627555 100644 --- a/nltools/data/design_matrix.py +++ b/nltools/data/design_matrix.py @@ -1,5 +1,3 @@ -from __future__ import division - """ NeuroLearn Design Matrix ======================== @@ -18,7 +16,6 @@ import matplotlib.pyplot as plt from scipy.stats import pearsonr from scipy.special import legendre -import six from ..external.hrf import glover_hrf from nltools.stats import downsample, upsample, zscore, make_cosine_basis from nltools.utils import AmbiguityError @@ -539,7 +536,7 @@ def convolve(self, conv_func="hrf", columns=None): if isinstance(conv_func, np.ndarray): if len(conv_func.shape) > 2: raise ValueError("2d conv_func must be formatted as samplex X kernals!") - elif isinstance(conv_func, six.string_types): + elif isinstance(conv_func, str): if conv_func != "hrf": raise ValueError( "Did you mean 'hrf'? 'hrf' can generate a kernel for you, otherwise custom kernels should be passed in as 1d or 2d arrays." diff --git a/nltools/external/srm.py b/nltools/external/srm.py index 9f419f44..33432014 100644 --- a/nltools/external/srm.py +++ b/nltools/external/srm.py @@ -33,8 +33,6 @@ # Authors: Po-Hsuan Chen (Princeton Neuroscience Institute) and Javier Turek # (Intel Labs), 2015 -from __future__ import division - import logging import numpy as np diff --git a/nltools/file_reader.py b/nltools/file_reader.py index 738391b4..73f59247 100644 --- a/nltools/file_reader.py +++ b/nltools/file_reader.py @@ -10,7 +10,6 @@ import pandas as pd import numpy as np -import six from nltools.data import Design_Matrix import warnings @@ -65,7 +64,7 @@ def onsets_to_dm( out = [] TR = 1.0 / sampling_freq for f in F: - if isinstance(f, six.string_types): + if isinstance(f, str): df = pd.read_csv(f, header=header, **kwargs) elif isinstance(f, pd.core.frame.DataFrame): df = f.copy() @@ -84,9 +83,9 @@ def onsets_to_dm( # Try to infer the header if header is None: possibleHeaders = ["Stim", "Onset", "Duration"] - if isinstance(df.iloc[0, 0], six.string_types): + if isinstance(df.iloc[0, 0], str): df.columns = possibleHeaders[: df.shape[1]] - elif isinstance(df.iloc[0, df.shape[1] - 1], six.string_types): + elif isinstance(df.iloc[0, df.shape[1] - 1], str): df.columns = possibleHeaders[1:] + [possibleHeaders[0]] else: raise ValueError( diff --git a/nltools/mask.py b/nltools/mask.py index 5b645412..c8c12d51 100644 --- a/nltools/mask.py +++ b/nltools/mask.py @@ -15,7 +15,6 @@ from nltools.prefs import MNI_Template, resolve_mni_path import pandas as pd import numpy as np -import six import warnings from nilearn.masking import intersect_masks @@ -34,7 +33,7 @@ def create_sphere(coordinates, radius=5, mask=None): if mask is not None: if not isinstance(mask, nib.Nifti1Image): - if isinstance(mask, six.string_types): + if isinstance(mask, str): if os.path.isfile(mask): mask = nib.load(mask) else: diff --git a/nltools/prefs.py b/nltools/prefs.py index 68d6c3f8..b0bfcba4 100644 --- a/nltools/prefs.py +++ b/nltools/prefs.py @@ -10,7 +10,6 @@ import os from nltools.utils import get_resource_path -import six MNI_Template = dict( resolution="2mm", @@ -26,9 +25,9 @@ def resolve_mni_path(MNI_Template): res = MNI_Template["resolution"] m = MNI_Template["mask_type"] - if not isinstance(res, six.string_types): + if not isinstance(res, str): raise ValueError("resolution must be provided as a string!") - if not isinstance(m, six.string_types): + if not isinstance(m, str): raise ValueError("mask_type must be provided as a string!") if res == "3mm": @@ -96,7 +95,7 @@ def resolve_mni_path(MNI_Template): # return strOut # # def use_template(self,template_name): -# if isinstance(template_name,six.string_types): +# if isinstance(template_name, str): # if template_name == '3mm': # self.MNI_Template['mask'] = os.path.join(get_resource_path(),'MNI152_T1_3mm_brain_mask.nii.gz') # self.MNI_Template['plot'] = os.path.join(get_resource_path(),'MNI152_T1_3mm.nii.gz') diff --git a/nltools/simulator.py b/nltools/simulator.py index 31806eaf..984a2a93 100755 --- a/nltools/simulator.py +++ b/nltools/simulator.py @@ -12,7 +12,6 @@ import os -import six import numpy as np import nibabel as nib import pandas as pd @@ -217,7 +216,7 @@ def create_data( dat.Y = self.y # Write Data to files if requested - if output_dir is not None and isinstance(output_dir, six.string_types): + if output_dir is not None and isinstance(output_dir, str): NF_list.write(os.path.join(output_dir, "data.nii.gz")) self.y.to_csv(os.path.join(output_dir, "y.csv"), index=None, header=False) self.rep_id.to_csv( @@ -300,7 +299,7 @@ def create_cov_data( # self.y = mv_sim[:,0] # mv_sim = mv_sim[:,1:] # A_4d = np.resize(A,(reps,A.shape[0],A.shape[1],A.shape[2])) - # for i in xrange(len(x)): + # for i in range(len(x)): # A_4d[:,x[i],y[i],z[i]]=mv_sim[:,i] # A_4d = np.rollaxis(A_4d,0,4) # reorder shape of matrix so that time is in 4th dimension # self.data = self.to_nifti(np.add(A_4d,np.random.standard_normal(size=A_4d.shape)*sigma)) # add noise scaled by sigma @@ -308,7 +307,7 @@ def create_cov_data( # Write Data to files if requested if output_dir is not None: - if isinstance(output_dir, six.string_types): + if isinstance(output_dir, str): if not os.path.isdir(output_dir): os.makedirs(output_dir) self.data.to_filename( diff --git a/nltools/stats.py b/nltools/stats.py index d0b6655d..29092703 100644 --- a/nltools/stats.py +++ b/nltools/stats.py @@ -1,5 +1,3 @@ -from __future__ import division - """ NeuroLearn Statistics Tools =========================== @@ -63,7 +61,6 @@ import warnings import itertools from joblib import Parallel, delayed -import six from .utils import attempt_to_import, check_square_numpy_matrix from .external.srm import SRM, DetSRM from sklearn.utils import check_random_state @@ -1031,10 +1028,10 @@ def regress(X, Y, mode="ols", stats="full", **kwargs): """ - if not isinstance(mode, six.string_types): + if not isinstance(mode, str): raise ValueError("mode must be a string") - if not isinstance(stats, six.string_types): + if not isinstance(stats, str): raise ValueError("stats must be a string") if mode not in ["ols", "robust", "arma"]: diff --git a/nltools/tests/test_analysis.py b/nltools/tests/test_analysis.py index 8074913b..95036df0 100644 --- a/nltools/tests/test_analysis.py +++ b/nltools/tests/test_analysis.py @@ -1,4 +1,3 @@ -from __future__ import division from nltools.simulator import Simulator from nltools.analysis import Roc import matplotlib diff --git a/nltools/version.py b/nltools/version.py index 3f214748..725b975c 100644 --- a/nltools/version.py +++ b/nltools/version.py @@ -1,4 +1,4 @@ """Specifies current version of nltools to be used by setup.py and __init__.py """ -__version__ = "0.4.2" +__version__ = "0.4.3" diff --git a/requirements.txt b/requirements.txt index 7af9fedc..01694377 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,7 +6,6 @@ seaborn>=0.7.0 matplotlib>=2.2.0 scipy - six pynv joblib>=0.15 deepdish>=0.3.6 From 874c288ac0f9e46e1dee6aa9ea24417e7eb0f5fc Mon Sep 17 00:00:00 2001 From: ejolly Date: Mon, 22 Mar 2021 17:21:15 -0400 Subject: [PATCH 07/20] Support path objects in all file-loading operations. fixes #369 --- nltools/data/adjacency.py | 9 ++++++--- nltools/data/brain_data.py | 17 +++++++++-------- nltools/data/design_matrix.py | 1 + nltools/file_reader.py | 3 ++- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/nltools/data/adjacency.py b/nltools/data/adjacency.py index 1583cf45..8d55ab0f 100644 --- a/nltools/data/adjacency.py +++ b/nltools/data/adjacency.py @@ -39,6 +39,7 @@ ) from .design_matrix import Design_Matrix from joblib import Parallel, delayed +from pathlib import Path # Optional dependencies nx = attempt_to_import("networkx", "nx") @@ -112,7 +113,9 @@ def __init__(self, data=None, Y=None, matrix_type=None, labels=[], **kwargs): self.issymmetric = symmetric_all[0] self.matrix_type = matrix_type_all[0] self.is_single_matrix = False - elif isinstance(data, str) and ((".h5" in data) or (".hdf5" in data)): + elif (isinstance(data, str) or isinstance(data, Path)) and ( + (".h5" in data) or (".hdf5" in data) + ): f = dd.io.load(data) self.data = f["data"] self.Y = pd.DataFrame( @@ -142,7 +145,7 @@ def __init__(self, data=None, Y=None, matrix_type=None, labels=[], **kwargs): ) = self._import_single_data(data, matrix_type=matrix_type) if Y is not None: - if isinstance(Y, str) and os.path.isfile(Y): + if (isinstance(Y, str) or isinstance(Y, Path)) and os.path.isfile(Y): Y = pd.read_csv(Y, header=None, index_col=None) if isinstance(Y, pd.DataFrame): if self.data.shape[0] != len(Y): @@ -325,7 +328,7 @@ def _test_is_single_matrix(data): def _import_single_data(self, data, matrix_type=None): """ Helper function to import single data matrix.""" - if isinstance(data, str): + if isinstance(data, str) or isinstance(data, Path): if os.path.isfile(data): data = pd.read_csv(data) else: diff --git a/nltools/data/brain_data.py b/nltools/data/brain_data.py index 5a640f32..d336d882 100644 --- a/nltools/data/brain_data.py +++ b/nltools/data/brain_data.py @@ -70,6 +70,7 @@ from nltools.prefs import MNI_Template, resolve_mni_path from nilearn.decoding import SearchLight import deepdish as dd +from pathlib import Path # Optional dependencies @@ -105,7 +106,7 @@ def __init__( ): if mask is not None: if not isinstance(mask, nib.Nifti1Image): - if isinstance(mask, str): + if isinstance(mask, str) or isinstance(mask, Path): if os.path.isfile(mask): mask = nib.load(mask) else: @@ -174,7 +175,7 @@ def __init__( if all(isinstance(x, data[0].__class__) for x in data): self.data = [] for i in data: - if isinstance(i, str): + if isinstance(i, str) or isinstance(i, Path): self.data.append( self.nifti_masker.fit_transform(nib.load(i)) ) @@ -197,7 +198,7 @@ def __init__( self.data = np.array([]) if Y is not None: - if isinstance(Y, str) and os.path.isfile(Y): + if (isinstance(Y, str) or isinstance(Y, Path)) and os.path.isfile(Y): Y = pd.read_csv(Y, header=None, index_col=None) if isinstance(Y, pd.DataFrame): if self.data.shape[0] != len(Y): @@ -209,7 +210,7 @@ def __init__( self.Y = pd.DataFrame() if X is not None: - if isinstance(X, str) and os.path.isfile(X): + if (isinstance(X, str) or isinstance(X, Path)) and os.path.isfile(X): X = pd.read_csv(X, header=None, index_col=None) if isinstance(X, pd.DataFrame): if self.data.shape[0] != X.shape[0]: @@ -567,7 +568,7 @@ def plot( print("threshold is ignored for simple axial plots") if anatomical is not None: if not isinstance(anatomical, nib.Nifti1Image): - if isinstance(anatomical, str): + if isinstance(anatomical, str) or isinstance(anatomical, str): anatomical = nib.load(anatomical) else: raise ValueError("anatomical is not a nibabel instance") @@ -642,7 +643,7 @@ def iplot(self, threshold=0, surface=False, anatomical=None, **kwargs): """ if anatomical is not None: if not isinstance(anatomical, nib.Nifti1Image): - if isinstance(anatomical, str): + if isinstance(anatomical, str) or isinstance(anatomical, Path): anatomical = nib.load(anatomical) else: raise ValueError("anatomical is not a nibabel instance") @@ -1434,7 +1435,7 @@ def predict_multi( groups = None if method == "rois": - if isinstance(rois, str): + if isinstance(rois, str) or isinstance(rois, Path): if os.path.isfile(rois): rois_img = Brain_Data(rois, mask=self.mask) elif isinstance(rois, Brain_Data): @@ -1461,7 +1462,7 @@ def predict_multi( process_mask_img = process_mask elif isinstance(process_mask, Brain_Data): process_mask_img = process_mask.to_nifti() - elif isinstance(process_mask, str): + elif isinstance(process_mask, str) or isinstance(process_mask, Path): if os.path.isfile(process_mask): process_mask_img = nib.load(process_mask) else: diff --git a/nltools/data/design_matrix.py b/nltools/data/design_matrix.py index 0d627555..44c500ac 100644 --- a/nltools/data/design_matrix.py +++ b/nltools/data/design_matrix.py @@ -19,6 +19,7 @@ from ..external.hrf import glover_hrf from nltools.stats import downsample, upsample, zscore, make_cosine_basis from nltools.utils import AmbiguityError +from pathlib import Path class Design_Matrix_Series(Series): diff --git a/nltools/file_reader.py b/nltools/file_reader.py index 73f59247..8913b7de 100644 --- a/nltools/file_reader.py +++ b/nltools/file_reader.py @@ -12,6 +12,7 @@ import numpy as np from nltools.data import Design_Matrix import warnings +from pathlib import Path def onsets_to_dm( @@ -64,7 +65,7 @@ def onsets_to_dm( out = [] TR = 1.0 / sampling_freq for f in F: - if isinstance(f, str): + if isinstance(f, str) or isinstance(f, Path): df = pd.read_csv(f, header=header, **kwargs) elif isinstance(f, pd.core.frame.DataFrame): df = f.copy() From a83bd3c1230eb8f5fb7d5cec116f6b181c351ffc Mon Sep 17 00:00:00 2001 From: ejolly Date: Mon, 22 Mar 2021 17:51:51 -0400 Subject: [PATCH 08/20] Turn off matplotib agg for tests. --- nltools/tests/test_analysis.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/nltools/tests/test_analysis.py b/nltools/tests/test_analysis.py index 95036df0..cb3e7fb3 100644 --- a/nltools/tests/test_analysis.py +++ b/nltools/tests/test_analysis.py @@ -1,8 +1,5 @@ from nltools.simulator import Simulator from nltools.analysis import Roc -import matplotlib - -matplotlib.use("TkAgg") def test_roc(tmpdir): From 08d4b0e7606977ef3991dd483d0226f59b5f749a Mon Sep 17 00:00:00 2001 From: ejolly Date: Mon, 22 Mar 2021 20:03:35 -0400 Subject: [PATCH 09/20] fix ga. fix bug in design matrix repr. pin pandas version until deepdish updates. --- .github/workflows/conda_coveralls_ci.yml | 2 +- nltools/data/design_matrix.py | 15 +++------------ 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/.github/workflows/conda_coveralls_ci.yml b/.github/workflows/conda_coveralls_ci.yml index 691602f1..fc0414c1 100644 --- a/.github/workflows/conda_coveralls_ci.yml +++ b/.github/workflows/conda_coveralls_ci.yml @@ -140,7 +140,7 @@ jobs: run: | python3 -m pip install . -r requirements.txt python3 -m pip install . -r requirements-dev.txt - python3 -m pip install . -r optional-dependencies.txt + python3 - m pip install . -r optional-dependencies.txt - name: Build docs run: | diff --git a/nltools/data/design_matrix.py b/nltools/data/design_matrix.py index 44c500ac..57803a0a 100644 --- a/nltools/data/design_matrix.py +++ b/nltools/data/design_matrix.py @@ -112,16 +112,7 @@ def _sort_cols(self): return self[data_cols + separated_cols + self.polys] def details(self): - return ( - "%s.%s(sampling_freq=%s (hz), shape=%s, multi=%s, convolved=%s, polynomials=%s)" - % self.__class__.__module__, - self.__class__.__name__, - self.sampling_freq, - self.shape, - self.multi, - self.convolved, - self.polys, - ) + return f"{self.__class__.__module__}.{self.__class__.__name__}(sampling_freq={self.sampling_freq} (hz), shape={self.shape}, multi={self.multi}, convolved={self.convolved}, polynomials={self.polys})" def append( self, dm, axis=0, keep_separate=True, unique_cols=None, fill_na=0, verbose=False @@ -588,7 +579,7 @@ def downsample(self, target, **kwargs): sampling_freq=self.sampling_freq, target=target, target_type="hz", - **kwargs + **kwargs, ) ) @@ -616,7 +607,7 @@ def upsample(self, target, **kwargs): sampling_freq=self.sampling_freq, target=target, target_type="hz", - **kwargs + **kwargs, ) ) From 71ec1bb1e16744480ffc991ba1552e2a23463fad Mon Sep 17 00:00:00 2001 From: ejolly Date: Mon, 22 Mar 2021 20:10:02 -0400 Subject: [PATCH 10/20] fixes #364 --- nltools/stats.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/nltools/stats.py b/nltools/stats.py index 29092703..e2d9dc2c 100644 --- a/nltools/stats.py +++ b/nltools/stats.py @@ -119,6 +119,8 @@ def fdr(p, q=0.05): if not isinstance(p, np.ndarray): raise ValueError("Make sure vector of p-values is a numpy array") + if any(p < 0) or any(p > 1): + raise ValueError("array contains p-values that are outside the range 0-1") s = np.sort(p) nvox = p.shape[0] From d282e68f317d8de201210e5de6db1e0a31edacfa Mon Sep 17 00:00:00 2001 From: ejolly Date: Mon, 22 Mar 2021 20:37:48 -0400 Subject: [PATCH 11/20] Fix max randint issue on windows. Fix typo in ga. --- .github/workflows/conda_coveralls_ci.yml | 2 +- nltools/external/srm.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/conda_coveralls_ci.yml b/.github/workflows/conda_coveralls_ci.yml index fc0414c1..691602f1 100644 --- a/.github/workflows/conda_coveralls_ci.yml +++ b/.github/workflows/conda_coveralls_ci.yml @@ -140,7 +140,7 @@ jobs: run: | python3 -m pip install . -r requirements.txt python3 -m pip install . -r requirements-dev.txt - python3 - m pip install . -r optional-dependencies.txt + python3 -m pip install . -r optional-dependencies.txt - name: Build docs run: | diff --git a/nltools/external/srm.py b/nltools/external/srm.py index 33432014..6071eb99 100644 --- a/nltools/external/srm.py +++ b/nltools/external/srm.py @@ -423,7 +423,7 @@ def _srm(self, data): subjects = len(data) self.random_state_ = np.random.RandomState(self.rand_seed) random_states = [ - np.random.RandomState(self.random_state_.randint(2 ** 32)) + np.random.RandomState(self.random_state_.randint(2 ** 32 - 1)) for i in range(len(data)) ] @@ -781,7 +781,7 @@ def _srm(self, data): self.random_state_ = np.random.RandomState(self.rand_seed) random_states = [ - np.random.RandomState(self.random_state_.randint(2 ** 32)) + np.random.RandomState(self.random_state_.randint(2 ** 32 - 1)) for i in range(len(data)) ] From 1a0f1c708af370e317cbfe6719dd3a765f5798b1 Mon Sep 17 00:00:00 2001 From: ejolly Date: Tue, 23 Mar 2021 12:28:53 -0400 Subject: [PATCH 12/20] Added test for 3d mds adj plot checks #381 --- nltools/tests/test_adjacency.py | 1 + 1 file changed, 1 insertion(+) diff --git a/nltools/tests/test_adjacency.py b/nltools/tests/test_adjacency.py index bf730115..261fb150 100644 --- a/nltools/tests/test_adjacency.py +++ b/nltools/tests/test_adjacency.py @@ -247,6 +247,7 @@ def test_plot(sim_adjacency_multiple): def test_plot_mds(sim_adjacency_single): sim_adjacency_single.plot_mds() + sim_adjacency_single.plot_mds(n_components=3) def test_similarity_conversion(sim_adjacency_single): From a597f4a134f8a0524da2bd7825808108ea021957 Mon Sep 17 00:00:00 2001 From: ejolly Date: Tue, 23 Mar 2021 15:20:50 -0400 Subject: [PATCH 13/20] Force int64 dtype for windows error --- nltools/external/srm.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/nltools/external/srm.py b/nltools/external/srm.py index 6071eb99..b7f288fb 100644 --- a/nltools/external/srm.py +++ b/nltools/external/srm.py @@ -423,7 +423,9 @@ def _srm(self, data): subjects = len(data) self.random_state_ = np.random.RandomState(self.rand_seed) random_states = [ - np.random.RandomState(self.random_state_.randint(2 ** 32 - 1)) + np.random.RandomState( + self.random_state_.randint(2 ** 32 - 1, dtype=np.int64) + ) for i in range(len(data)) ] @@ -781,7 +783,9 @@ def _srm(self, data): self.random_state_ = np.random.RandomState(self.rand_seed) random_states = [ - np.random.RandomState(self.random_state_.randint(2 ** 32 - 1)) + np.random.RandomState( + self.random_state_.randint(2 ** 32 - 1, dtype=np.int64) + ) for i in range(len(data)) ] From f5b14bc9109ebad9f2c305f3f0d566fe9a8f0364 Mon Sep 17 00:00:00 2001 From: ejolly Date: Tue, 23 Mar 2021 15:27:29 -0400 Subject: [PATCH 14/20] Try to deploy built docs to gh pages --- .github/workflows/conda_coveralls_ci.yml | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/.github/workflows/conda_coveralls_ci.yml b/.github/workflows/conda_coveralls_ci.yml index 691602f1..dc11ba66 100644 --- a/.github/workflows/conda_coveralls_ci.yml +++ b/.github/workflows/conda_coveralls_ci.yml @@ -148,6 +148,15 @@ jobs: make clean make html + - name: Deploy docs + if: success() + uses: crazy-max/ghaction-github-pages@v2 + with: + target_branch: gh-pages + build_dir: docs/_build/html + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + # - name: Deploy docs # uses: peaceiris/actions-gh-pages@v3 # with: From 1ce5b23ebe5e6ad507bd94ffcfa6b8d36fef65d5 Mon Sep 17 00:00:00 2001 From: ejolly Date: Tue, 23 Mar 2021 16:50:16 -0400 Subject: [PATCH 15/20] Add random_state to simulator classes and pin it to see if tests pass. Add nojekyll after sphinx builds. --- .github/workflows/conda_coveralls_ci.yml | 1 + nltools/simulator.py | 45 +++++++++++++++--------- nltools/tests/test_simulator.py | 5 ++- 3 files changed, 33 insertions(+), 18 deletions(-) diff --git a/.github/workflows/conda_coveralls_ci.yml b/.github/workflows/conda_coveralls_ci.yml index dc11ba66..af72a213 100644 --- a/.github/workflows/conda_coveralls_ci.yml +++ b/.github/workflows/conda_coveralls_ci.yml @@ -147,6 +147,7 @@ jobs: cd docs make clean make html + touch _build/html/.nojekyll - name: Deploy docs if: success() diff --git a/nltools/simulator.py b/nltools/simulator.py index 984a2a93..284b9820 100755 --- a/nltools/simulator.py +++ b/nltools/simulator.py @@ -23,10 +23,13 @@ from nltools.prefs import MNI_Template, resolve_mni_path import csv from copy import deepcopy +from sklearn.utils import check_random_state class Simulator: - def __init__(self, brain_mask=None, output_dir=None): # no scoring param + def __init__( + self, brain_mask=None, output_dir=None, random_state=None + ): # no scoring param # self.resource_folder = os.path.join(os.getcwd(),'resources') if output_dir is None: self.output_dir = os.path.join(os.getcwd()) @@ -41,6 +44,7 @@ def __init__(self, brain_mask=None, output_dir=None): # no scoring param raise ValueError("brain_mask is not a string or a nibabel instance") self.brain_mask = brain_mask self.nifti_masker = NiftiMasker(mask_img=self.brain_mask) + self.random_state = check_random_state(random_state) def gaussian(self, mu, sigma, i_tot): """create a 3D gaussian signal normalized to a given intensity @@ -105,7 +109,7 @@ def normal_noise(self, mu, sigma): self.nifti_masker.fit(self.brain_mask) vlength = int(np.sum(self.brain_mask.get_data())) if sigma != 0: - n = np.random.normal(mu, sigma, vlength) + n = self.random_state.normal(mu, sigma, vlength) else: n = [mu] * vlength m = self.nifti_masker.inverse_transform(n) @@ -256,7 +260,7 @@ def create_cov_data( cov_matrix[0, :] = cor # set covariance with y cov_matrix[:, 0] = cor # set covariance with all other voxels np.fill_diagonal(cov_matrix, 1) # set diagonal to 1 - mv_sim = np.random.multivariate_normal( + mv_sim = self.random_state.multivariate_normal( np.zeros([n_vox + 1]), cov_matrix, size=reps ) print(mv_sim) @@ -266,7 +270,9 @@ def create_cov_data( new_dat = np.ones([mv_sim.shape[0], flat_sphere.shape[1]]) new_dat[:, np.where(flat_sphere == 1)[1]] = mv_sim self.data = self.nifti_masker.inverse_transform( - np.add(new_dat, np.random.standard_normal(size=new_dat.shape) * sigma) + np.add( + new_dat, self.random_state.standard_normal(size=new_dat.shape) * sigma + ) ) # add noise scaled by sigma self.rep_id = [1] * len(y) if n_sub > 1: @@ -278,13 +284,14 @@ def create_cov_data( self.nifti_masker.inverse_transform( np.add( new_dat, - np.random.standard_normal(size=new_dat.shape) * sigma, + self.random_state.standard_normal(size=new_dat.shape) + * sigma, ) ), ], axis=3, ) # add noise scaled by sigma - noise_y = list(y + np.random.randn(len(y)) * sigma) + noise_y = list(y + self.random_state.randn(len(y)) * sigma) self.y = self.y + noise_y self.rep_id = self.rep_id + [s + 1] * len(mv_sim[:, 0]) self.y = np.array(self.y) @@ -295,14 +302,14 @@ def create_cov_data( # cov_matrix[0,:] = cor # set covariance with y # cov_matrix[:,0] = cor # set covariance with all other voxels # np.fill_diagonal(cov_matrix,1) # set diagonal to 1 - # mv_sim = np.random.multivariate_normal(np.zeros([len(x)+1]),cov_matrix, size=reps) # simulate data from multivariate covar + # mv_sim = self.random_state.multivariate_normal(np.zeros([len(x)+1]),cov_matrix, size=reps) # simulate data from multivariate covar # self.y = mv_sim[:,0] # mv_sim = mv_sim[:,1:] # A_4d = np.resize(A,(reps,A.shape[0],A.shape[1],A.shape[2])) # for i in range(len(x)): # A_4d[:,x[i],y[i],z[i]]=mv_sim[:,i] # A_4d = np.rollaxis(A_4d,0,4) # reorder shape of matrix so that time is in 4th dimension - # self.data = self.to_nifti(np.add(A_4d,np.random.standard_normal(size=A_4d.shape)*sigma)) # add noise scaled by sigma + # self.data = self.to_nifti(np.add(A_4d,self.random_state.standard_normal(size=A_4d.shape)*sigma)) # add noise scaled by sigma # self.rep_id = ??? # need to add this later # Write Data to files if requested @@ -404,7 +411,7 @@ def create_ncov_data( # these operations happen in one vector that we'll later split into the separate regions print("Generating multivariate normal distribution...") - mv_sim_l = np.random.multivariate_normal( + mv_sim_l = self.random_state.multivariate_normal( np.zeros([np.sum(n_vox) + 1]), cov_matrix, size=reps ) print(mv_sim_l) @@ -422,7 +429,7 @@ def create_ncov_data( rep, start:stop ] - noise = np.random.standard_normal(size=new_dats.shape[1]) * sigma + noise = self.random_state.standard_normal(size=new_dats.shape[1]) * sigma self.data = self.nifti_masker.inverse_transform( np.add(new_dats, noise) ) # append 3d simulated data to list @@ -435,11 +442,13 @@ def create_ncov_data( y = list(self.y) for s in range(1, n_sub): # ask Luke about this new version - noise = np.random.standard_normal(size=new_dats.shape[1]) * sigma + noise = ( + self.random_state.standard_normal(size=new_dats.shape[1]) * sigma + ) next_subj = self.nifti_masker.inverse_transform(np.add(new_dats, noise)) self.data = nib.concat_images([self.data, next_subj], axis=3) - y += list(self.y + np.random.randn(len(self.y)) * sigma) + y += list(self.y + self.random_state.randn(len(self.y)) * sigma) print("y == " + str(len(y))) self.rep_id += [s + 1] * len(mv_sim[:, 0]) self.y = np.array(y) @@ -478,6 +487,7 @@ def __init__( n_subjects=20, sigma=1, signal_amplitude=None, + random_state=None, ): self.isfit = False @@ -490,6 +500,7 @@ def __init__( self.n_subjects = n_subjects self.sigma = sigma self.grid_width = grid_width + self.random_state = check_random_state(random_state) self.data = self._create_noise() if signal_amplitude is not None: @@ -507,7 +518,7 @@ def _create_noise(self): simulated_data (np.array): simulated noise using object parameters """ return ( - np.random.randn(self.grid_width, self.grid_width, self.n_subjects) + self.random_state.randn(self.grid_width, self.grid_width, self.n_subjects) * self.sigma ) @@ -686,7 +697,7 @@ def run_multiple_simulations( if self.signal_mask is None: simulations = [ - self._run_ttest(self._create_noise()) for x in range(n_simulations) + self._run_ttest(self._create_noise()) for _ in range(n_simulations) ] else: signal = ( @@ -697,7 +708,7 @@ def run_multiple_simulations( ) simulations = [ self._run_ttest(self._create_noise() + signal) - for x in range(n_simulations) + for _ in range(n_simulations) ] self.multiple_thresholded = [ @@ -736,9 +747,9 @@ def plot_grid_simulation( ) if self.signal_mask is None: - f, a = plt.subplots(ncols=3, figsize=(15, 5)) + _, a = plt.subplots(ncols=3, figsize=(15, 5)) else: - f, a = plt.subplots(ncols=4, figsize=(18, 5)) + _, a = plt.subplots(ncols=4, figsize=(18, 5)) a[3].hist(self.multiple_tp) a[3].set_ylabel("Frequency", fontsize=18) a[3].set_xlabel("Percent Signal Recovery", fontsize=18) diff --git a/nltools/tests/test_simulator.py b/nltools/tests/test_simulator.py index f979c855..259c7f40 100644 --- a/nltools/tests/test_simulator.py +++ b/nltools/tests/test_simulator.py @@ -21,7 +21,9 @@ def test_simulategrid_fpr(tmpdir): n_simulations = 100 thresh = 0.05 bonferroni_threshold = thresh / (grid_width ** 2) - simulation = SimulateGrid(grid_width=grid_width, n_subjects=n_subjects) + simulation = SimulateGrid( + grid_width=grid_width, n_subjects=n_subjects, random_state=0 + ) simulation.plot_grid_simulation( threshold=bonferroni_threshold, threshold_type="p", n_simulations=n_simulations ) @@ -47,6 +49,7 @@ def test_simulategrid_fdr(tmpdir): signal_width=signal_width, grid_width=grid_width, n_subjects=n_subjects, + random_state=0, ) simulation.plot_grid_simulation( threshold=thresh, From a6858d3ba0a2da5b4a664c7bced9507115becef0 Mon Sep 17 00:00:00 2001 From: Eshin Jolly Date: Tue, 23 Mar 2021 17:12:33 -0400 Subject: [PATCH 16/20] dont run 3.9 tests for now so PR messages are clean --- .github/workflows/conda_coveralls_ci.yml | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/conda_coveralls_ci.yml b/.github/workflows/conda_coveralls_ci.yml index af72a213..625a71cf 100644 --- a/.github/workflows/conda_coveralls_ci.yml +++ b/.github/workflows/conda_coveralls_ci.yml @@ -32,17 +32,17 @@ jobs: python-version: [3.7, 3.8] # By default everything should pass for the workflow to pass ok-fail: [false] - include: +# include: # Rather than include 3.9 in the python versions, do it here so we can ignore failures on mac and windows with 3.9 (they have install issues) - - os: ubuntu-latest - python-version: 3.9 - ok-fail: false - - os: macos-latest - python-version: 3.9 - ok-fail: true - - os: windows-latest - python-version: 3.9 - ok-fail: true +# - os: ubuntu-latest +# python-version: 3.9 +# ok-fail: false +# - os: macos-latest +# python-version: 3.9 +# ok-fail: true +# - os: windows-latest +# python-version: 3.9 +# ok-fail: true steps: # 1. Step up miniconda - name: Download and setup Miniconda From 1843b65f58ab533954e90b6aee8f85cc3a4ef5c1 Mon Sep 17 00:00:00 2001 From: ejolly Date: Wed, 24 Mar 2021 23:33:32 -0400 Subject: [PATCH 17/20] Split testing and doc deploy workflow. Added pypi upload on releases. Updated README. --- .../workflows/deploy_docs_pypi_onrelease.yml | 85 +++++++++++++++++++ ...overalls_ci.yml => tests_and_coverage.yml} | 64 ++++---------- README.md | 54 ++++-------- 3 files changed, 119 insertions(+), 84 deletions(-) create mode 100644 .github/workflows/deploy_docs_pypi_onrelease.yml rename .github/workflows/{conda_coveralls_ci.yml => tests_and_coverage.yml} (72%) diff --git a/.github/workflows/deploy_docs_pypi_onrelease.yml b/.github/workflows/deploy_docs_pypi_onrelease.yml new file mode 100644 index 00000000..9a0b6837 --- /dev/null +++ b/.github/workflows/deploy_docs_pypi_onrelease.yml @@ -0,0 +1,85 @@ +name: nltools + +on: release + +jobs: + # Job (1): Build and deploy docs. + docs: + if: "!contains(github.event.head_commit.message, 'skip ci')" + name: Build & deploy docs + runs-on: ubuntu-latest + steps: + - name: Checkout Code + uses: actions/checkout@v2 + + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: "3.8" + + - name: Upgrade pip + run: | + # install pip=>20.1 to use "pip cache dir" + python3 -m pip install --upgrade pip + + - name: Setup pip-cache + id: pip-cache + run: echo "::set-output name=dir::$(pip cache dir)" + + - name: Cache deps + uses: actions/cache@v2 + with: + path: ${{ steps.pip-cache.outputs.dir }} + key: ${{ runner.os }}-pip-${{ hashFiles('**/requirements.txt') }} + restore-keys: | + ${{ runner.os }}-pip- + + - name: Install deps + run: | + python3 -m pip install . -r requirements.txt + python3 -m pip install . -r requirements-dev.txt + python3 -m pip install . -r optional-dependencies.txt + + - name: Build docs + run: | + cd docs + make clean + make html + touch _build/html/.nojekyll + + - name: Deploy docs + if: success() + uses: crazy-max/ghaction-github-pages@v2 + with: + target_branch: gh-pages + build_dir: docs/_build/html + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + # Job (2): Build package and upload to pypi + deploy: + if: "!contains(github.event.head_commit.message, 'skip ci')" + name: Build & deploy package + runs-on: ubuntu-latest + needs: docs + steps: + - name: Checkout Code + uses: actions/checkout@v2 + + - name: Setup Python + uses: actions/setup-python@v2 + with: + python-version: "3.8" + + - name: Pypa build + run: | + python3 -m pip install build --user + + - name: Wheel and source build + run: | + python3 -m build --sdist --wheel --outdir dist/ + + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@master + with: + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/.github/workflows/conda_coveralls_ci.yml b/.github/workflows/tests_and_coverage.yml similarity index 72% rename from .github/workflows/conda_coveralls_ci.yml rename to .github/workflows/tests_and_coverage.yml index 625a71cf..53f6f99a 100644 --- a/.github/workflows/conda_coveralls_ci.yml +++ b/.github/workflows/tests_and_coverage.yml @@ -1,4 +1,4 @@ -name: nltools +name: Tests and Coverage on: push: @@ -32,30 +32,30 @@ jobs: python-version: [3.7, 3.8] # By default everything should pass for the workflow to pass ok-fail: [false] -# include: - # Rather than include 3.9 in the python versions, do it here so we can ignore failures on mac and windows with 3.9 (they have install issues) -# - os: ubuntu-latest -# python-version: 3.9 -# ok-fail: false -# - os: macos-latest -# python-version: 3.9 -# ok-fail: true -# - os: windows-latest -# python-version: 3.9 -# ok-fail: true + # include: + # Rather than include 3.9 in the python versions, do it here so we can ignore failures on mac and windows with 3.9 (they have install issues) + # - os: ubuntu-latest + # python-version: 3.9 + # ok-fail: false + # - os: macos-latest + # python-version: 3.9 + # ok-fail: true + # - os: windows-latest + # python-version: 3.9 + # ok-fail: true steps: - # 1. Step up miniconda + # Step up miniconda - name: Download and setup Miniconda uses: conda-incubator/setup-miniconda@v2 with: miniconda-version: "latest" python-version: ${{ matrix.python-version }} - # 2. Check out latest code on github + # Check out latest code on github - name: Checkout Code uses: actions/checkout@v2 - # 3. Install common sci-py packages via conda as well as testing packages and requirements + # Install common sci-py packages via conda as well as testing packages and requirements # TODO: unpin pandas version when deepdish adds support for 1.2: https://github.com/uchicago-cs/deepdish/issues/45 - name: Install Dependencies run: | @@ -71,14 +71,14 @@ jobs: run: | black nltools --check --diff - # 4. Actually run the tests with coverage + # Actually run the tests with coverage - name: Run Tests run: | conda activate test conda env list coverage run --source=nltools -m pytest -rs -n auto - # 5. Send coverage to coveralls.io but waiting on parallelization to finish + # Send coverage to coveralls.io but waiting on parallelization to finish # Not using the official github action in the marketplace to upload because it requires a .lcov file, which pytest doesn't generate. It's just easier to use the coveralls python library which does the same thing, but works with pytest. - name: Upload Coverage # The coveralls python package has some 422 server issues with uploads from github-actions so try both service providers, for more see: @@ -105,7 +105,7 @@ jobs: env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - # Job (3): Build and deploy docs + # Job (3): Build docs, but don't deploy. This is effectively another layer of testing because of our sphinx-gallery auto-examples docs: if: "!contains(github.event.head_commit.message, 'skip ci')" name: Build & deploy docs @@ -147,31 +147,3 @@ jobs: cd docs make clean make html - touch _build/html/.nojekyll - - - name: Deploy docs - if: success() - uses: crazy-max/ghaction-github-pages@v2 - with: - target_branch: gh-pages - build_dir: docs/_build/html - env: - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} - - # - name: Deploy docs - # uses: peaceiris/actions-gh-pages@v3 - # with: - # github_token: ${{ secrets.GITHUB_TOKEN }} - # publish_dir: ./site - - # Job (4): Build package and upload to conda/pypi - # deploy: - # if: "!contains(github.event.head_commit.message, 'skip ci')" - # name: Build & deploy package - # runs-on: ubuntu-latest - # needs: test - # steps: - # - name: Say Hi - # shell: bash - # run: | - # echo "hello world. I havent been configured for package deployment yet!" diff --git a/README.md b/README.md index de5df652..aef9107e 100644 --- a/README.md +++ b/README.md @@ -1,61 +1,39 @@ [![Package versioning](https://img.shields.io/pypi/v/nltools.svg)](https://pypi.org/project/nltools/) -[![Build Status](https://api.travis-ci.org/cosanlab/nltools.png)](https://travis-ci.org/cosanlab/nltools/) +[![Build Status](https://github.com/cosanlab/nltools/workflows/tests_and_coverage/badge.svg)] [![codecov](https://codecov.io/gh/cosanlab/nltools/branch/master/graph/badge.svg)](https://codecov.io/gh/cosanlab/nltools) [![Codacy Badge](https://api.codacy.com/project/badge/Grade/625677967a0749299f38c2bf8ee269c3)](https://www.codacy.com/app/ljchang/nltools?utm_source=github.com&utm_medium=referral&utm_content=ljchang/nltools&utm_campaign=Badge_Grade) -[![Documentation Status](https://readthedocs.org/projects/neurolearn/badge/?version=latest)](http://neurolearn.readthedocs.io/en/latest/?badge=latest) +[![Documentation Status](https://github.com/cosanlab/nltools/workflows/deploy_docs_pypi_onrelease/badge.svg) [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.2229813.svg)](https://doi.org/10.5281/zenodo.2229813) +![Python Versions](https://img.shields.io/badge/python-3.7%20%7C%203.8-blue) +![Platforms](https://img.shields.io/badge/platform-linux%20%7C%20osx%20%7C%20win-blue) # NLTools -Python toolbox for analyzing neuroimaging data. It is particularly useful for conducting multivariate analyses. It is originally based on Tor Wager's object oriented matlab [canlab core tools](http://wagerlab.colorado.edu/tools) and relies heavily on [nilearn](http://nilearn.github.io) and [scikit learn](http://scikit-learn.org/stable/index.html). Nltools is compatible with Python 3.6+. Python 2.7 was only supported through 0.3.11. We will no longer be supporting Python2 starting with version 0.3.12. +Python toolbox for analyzing neuroimaging data. It is particularly useful for conducting multivariate analyses. It is originally based on Tor Wager's object oriented matlab [canlab core tools](http://wagerlab.colorado.edu/tools) and relies heavily on [nilearn](http://nilearn.github.io) and [scikit learn](http://scikit-learn.org/stable/index.html). Nltools is only compatible with Python 3.7+. -### Installation -1. Method 1 +## Documentation + +Documentation and tutorials are available at https://nltools.org + +## Installation +1. Method 1 (stable) ``` pip install nltools ``` -2. Method 2 (Recommended) +2. Method 2 (bleeding edge) ``` pip install git+https://github.com/cosanlab/nltools ``` -3. Method 3 +3. Method 3 (for development) ``` git clone https://github.com/cosanlab/nltools - python setup.py install - ``` - or - ``` - pip install -e 'path_to_github_directory' + pip install -e nltools ``` -### Dependencies -nltools requires several dependencies. All are available in pypi. Can use `pip install 'package'` - - nibabel>=2.0.1 - - scikit-learn>=0.19.1 - - nilearn>=0.4 - - pandas>=0.20 - - numpy>=1.9 - - seaborn>=0.7.0 - - matplotlib>=2.1 - - scipy - - pynv - - joblib - -### Optional Dependencies - - mne - - requests - - networkx - - ipywidgets >=5.2.2 - -### Documentation -Current Documentation can be found at [readthedocs](http://neurolearn.readthedocs.org/en/latest). - -Please see our [tutorials](http://neurolearn.readthedocs.io/en/latest/auto_examples/index.html), which provide numerous examples for how to use the toolbox. - -### Preprocessing -Please see our [cosanlab_preproc](https://github.com/cosanlab/cosanlab_preproc) library for nipype pipelines to perform preprocessing on neuroimaging data. +## Preprocessing +Nltools has minimal routines for pre-processing data. For more complete pre-processing pipelines please see our [cosanlab_preproc](https://github.com/cosanlab/cosanlab_preproc) library built with `nipype`. From bef0b59c851c2c8df1ea00774efa22bef1502f9e Mon Sep 17 00:00:00 2001 From: ejolly Date: Wed, 24 Mar 2021 23:34:31 -0400 Subject: [PATCH 18/20] Rename deploy workflow. --- .github/workflows/deploy_docs_pypi_onrelease.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy_docs_pypi_onrelease.yml b/.github/workflows/deploy_docs_pypi_onrelease.yml index 9a0b6837..daeacce8 100644 --- a/.github/workflows/deploy_docs_pypi_onrelease.yml +++ b/.github/workflows/deploy_docs_pypi_onrelease.yml @@ -1,4 +1,4 @@ -name: nltools +name: Deploy Docs and PyPI on: release From 40b830cada2c5cbcf93a2ca77a8da6ae13d5454a Mon Sep 17 00:00:00 2001 From: ejolly Date: Wed, 24 Mar 2021 23:38:15 -0400 Subject: [PATCH 19/20] Update setup.py after paxton's merge. --- setup.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.py b/setup.py index d37561ae..9a9de927 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ version=version["__version__"], author="Cosan Lab", author_email="luke.j.chang@dartmouth.edu", - url="http://neurolearn.readthedocs.org/en/latest/", + url="https://cosanlab.github.io/nltools", python_requires=">=3.6", install_requires=requirements, extras_require={"interactive_plots": ["ipywidgets>=5.2.2"]}, @@ -29,7 +29,6 @@ "analysis engine powering www.neuro-learn.org.", keywords=["neuroimaging", "preprocessing", "analysis", "machine-learning"], classifiers=[ - "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Operating System :: OS Independent", From 86534f3403b039cbb8e42fc2317516cd10bbbb0a Mon Sep 17 00:00:00 2001 From: ejolly Date: Wed, 24 Mar 2021 23:40:59 -0400 Subject: [PATCH 20/20] Rename workflow steps --- .github/workflows/tests_and_coverage.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/tests_and_coverage.yml b/.github/workflows/tests_and_coverage.yml index 53f6f99a..4274808a 100644 --- a/.github/workflows/tests_and_coverage.yml +++ b/.github/workflows/tests_and_coverage.yml @@ -108,7 +108,7 @@ jobs: # Job (3): Build docs, but don't deploy. This is effectively another layer of testing because of our sphinx-gallery auto-examples docs: if: "!contains(github.event.head_commit.message, 'skip ci')" - name: Build & deploy docs + name: Build docs and auto-examples runs-on: ubuntu-latest steps: - name: Checkout Code