From f8e3f05e42c4e4e792ac99a6871440f8834ddd44 Mon Sep 17 00:00:00 2001
From: Luke Chang <lukejchang@gmail.com>
Date: Sun, 12 Nov 2017 17:20:03 -0500
Subject: [PATCH 1/2] Refactored data module

Former-commit-id: 47992913cdcc9b281ba08dc4ca83eb0748aaf3b2
---
 nltools/data/__init__.py                |   13 +
 nltools/data/adjacency.py               |  602 +++++++++++++
 nltools/{data.py => data/brain_data.py} | 1065 +----------------------
 nltools/data/design_matrix.py           |  449 ++++++++++
 nltools/utils.py                        |   40 +-
 5 files changed, 1130 insertions(+), 1039 deletions(-)
 create mode 100644 nltools/data/__init__.py
 create mode 100644 nltools/data/adjacency.py
 rename nltools/{data.py => data/brain_data.py} (57%)
 create mode 100644 nltools/data/design_matrix.py

diff --git a/nltools/data/__init__.py b/nltools/data/__init__.py
new file mode 100644
index 00000000..61e090a0
--- /dev/null
+++ b/nltools/data/__init__.py
@@ -0,0 +1,13 @@
+"""
+nltools data types.
+"""
+
+from .brain_data import Brain_Data, Groupby
+from .adjacency import Adjacency
+from .design_matrix import Design_Matrix, Design_Matrix_Series
+
+__all__ = ['Brain_Data',
+            'Adjacency',
+            'Groupby',
+            'Design_Matrix',
+            'Design_Matrix_Series']
diff --git a/nltools/data/adjacency.py b/nltools/data/adjacency.py
new file mode 100644
index 00000000..d9df1df7
--- /dev/null
+++ b/nltools/data/adjacency.py
@@ -0,0 +1,602 @@
+from __future__ import division
+
+'''
+This data class is for working with similarity/dissimilarity matrices
+'''
+
+__author__ = ["Luke Chang"]
+__license__ = "MIT"
+
+import os
+import pandas as pd
+import numpy as np
+import six
+from copy import deepcopy
+from sklearn.metrics.pairwise import pairwise_distances
+from scipy.spatial.distance import squareform
+import seaborn as sns
+import matplotlib.pyplot as plt
+from nltools.stats import (correlation_permutation,
+                           one_sample_permutation,
+                           two_sample_permutation,
+                           summarize_bootstrap)
+from nltools.plotting import (plot_stacked_adjacency,
+                              plot_silhouette)
+from nltools.utils import (all_same,
+                           attempt_to_import,
+                           concatenate,
+                           _bootstrap_apply_func)
+from joblib import Parallel, delayed
+
+# Optional dependencies
+nx = attempt_to_import('networkx', 'nx')
+
+class Adjacency(object):
+
+    '''
+    Adjacency is a class to represent Adjacency matrices as a vector rather
+    than a 2-dimensional matrix. This makes it easier to perform data
+    manipulation and analyses.
+
+    Args:
+        data: pandas data instance or list of files
+        matrix_type: (str) type of matrix.  Possible values include:
+                    ['distance','similarity','directed','distance_flat',
+                    'similarity_flat','directed_flat']
+        Y: Pandas DataFrame of training labels
+        **kwargs: Additional keyword arguments
+
+    '''
+
+    def __init__(self, data=None, Y=None, matrix_type=None, **kwargs):
+        if matrix_type is not None:
+            if matrix_type.lower() not in ['distance','similarity','directed',
+                                            'distance_flat','similarity_flat',
+                                            'directed_flat']:
+                raise ValueError("matrix_type must be [None,'distance', "
+                                "'similarity','directed','distance_flat', "
+                                "'similarity_flat','directed_flat']")
+
+        if data is None:
+            self.data = np.array([])
+            self.matrix_type = 'empty'
+            self.is_single_matrix = np.nan
+            self.issymmetric = np.nan
+        elif isinstance(data, list):
+            if isinstance(data[0], Adjacency):
+                tmp = concatenate(data)
+                for item in ['data', 'matrix_type', 'Y','issymmetric']:
+                    setattr(self, item, getattr(tmp,item))
+            else:
+                d_all = []; symmetric_all = []; matrix_type_all = []
+                for d in data:
+                    data_tmp, issymmetric_tmp, matrix_type_tmp, _ = self._import_single_data(d, matrix_type=matrix_type)
+                    d_all.append(data_tmp)
+                    symmetric_all.append(issymmetric_tmp)
+                    matrix_type_all.append(matrix_type_tmp)
+                if not all_same(symmetric_all):
+                    raise ValueError('Not all matrices are of the same '
+                                    'symmetric type.')
+                if not all_same(matrix_type_all):
+                    raise ValueError('Not all matrices are of the same matrix '
+                                    'type.')
+                self.data = np.array(d_all)
+                self.issymmetric = symmetric_all[0]
+                self.matrix_type = matrix_type_all[0]
+            self.is_single_matrix = False
+        else:
+            self.data, self.issymmetric, self.matrix_type, self.is_single_matrix = self._import_single_data(data, matrix_type=matrix_type)
+
+        if Y is not None:
+            if isinstance(Y, six.string_types):
+                if os.path.isfile(Y):
+                    Y = pd.read_csv(Y, header=None, index_col=None)
+            if isinstance(Y, pd.DataFrame):
+                if self.data.shape[0] != len(Y):
+                    raise ValueError("Y does not match the correct size of "
+                                     "data")
+                self.Y = Y
+            else:
+                raise ValueError("Make sure Y is a pandas data frame.")
+        else:
+            self.Y = pd.DataFrame()
+
+    def __repr__(self):
+        return ("%s.%s(shape=%s, square_shape=%s, Y=%s, is_symmetric=%s,"
+                "matrix_type=%s)") % (
+                    self.__class__.__module__,
+                    self.__class__.__name__,
+                    self.shape(),
+                    self.square_shape(),
+                    len(self.Y),
+                    self.issymmetric,
+                    self.matrix_type)
+
+    def __getitem__(self,index):
+        new = self.copy()
+        if isinstance(index, int):
+            new.data = np.array(self.data[index, :]).flatten()
+            new.is_single_matrix = True
+        else:
+            new.data = np.array(self.data[index, :])
+        if not self.Y.empty:
+            new.Y = self.Y.iloc[index]
+        return new
+
+    def __len__(self):
+        if self.is_single_matrix:
+            return 1
+        else:
+            return self.data.shape[0]
+
+    def __iter__(self):
+        for x in range(len(self)):
+            yield self[x]
+
+    def __add__(self, y):
+        new = deepcopy(self)
+        if isinstance(y, (int, float)):
+            new.data = new.data + y
+        if isinstance(y, Adjacency):
+            if self.shape() != y.shape():
+                raise ValueError('Both Adjacency() instances need to be the '
+                                 'same shape.')
+            new.data = new.data + y.data
+        return new
+
+    def __sub__(self, y):
+        new = deepcopy(self)
+        if isinstance(y, (int, float)):
+            new.data = new.data - y
+        if isinstance(y, Adjacency):
+            if self.shape() != y.shape():
+                raise ValueError('Both Adjacency() instances need to be the '
+                                 'same shape.')
+            new.data = new.data - y.data
+        return new
+
+    def __mul__(self, y):
+        new = deepcopy(self)
+        if isinstance(y, (int, float)):
+            new.data = new.data * y
+        if isinstance(y, Adjacency):
+            if self.shape() != y.shape():
+                raise ValueError('Both Adjacency() instances need to be the '
+                                 'same shape.')
+            new.data = np.multiply(new.data, y.data)
+        return new
+
+    def _import_single_data(self, data, matrix_type=None):
+        ''' Helper function to import single data matrix.'''
+
+        if isinstance(data, six.string_types):
+            if os.path.isfile(data):
+                data = pd.read_csv(data)
+            else:
+                raise ValueError('Make sure you have specified a valid file '
+                                 'path.')
+
+        def test_is_single_matrix(data):
+            if len(data.shape) == 1:
+                return True
+            else:
+                return False
+
+        if matrix_type is not None:
+            if matrix_type.lower() == 'distance_flat':
+                matrix_type = 'distance'
+                data = np.array(data)
+                issymmetric = True
+                is_single_matrix = test_is_single_matrix(data)
+            elif matrix_type.lower() == 'similarity_flat':
+                matrix_type = 'similarity'
+                data = np.array(data)
+                issymmetric = True
+                is_single_matrix = test_is_single_matrix(data)
+            elif matrix_type.lower() == 'directed_flat':
+                matrix_type = 'directed'
+                data = np.array(data).flatten()
+                issymmetric = False
+                is_single_matrix = test_is_single_matrix(data)
+            elif matrix_type.lower() in ['distance', 'similarity', 'directed']:
+                if data.shape[0] != data.shape[1]:
+                    raise ValueError('Data matrix must be square')
+                data = np.array(data)
+                matrix_type = matrix_type.lower()
+                if matrix_type in ['distance', 'similarity']:
+                    issymmetric = True
+                    data = data[np.triu_indices(data.shape[0], k=1)]
+                else:
+                    issymmetric = False
+                    if isinstance(data, pd.DataFrame):
+                        data = data.values.flatten()
+                    elif isinstance(data, np.ndarray):
+                        data = data.flatten()
+                is_single_matrix = True
+        else:
+            if len(data.shape) == 1:  # Single Vector
+                try:
+                    data = squareform(data)
+                except ValueError:
+                    print('Data is not flattened upper triangle from '
+                          'similarity/distance matrix or flattened directed '
+                          'matrix.')
+                is_single_matrix = True
+            elif data.shape[0] == data.shape[1]:  # Square Matrix
+                is_single_matrix = True
+            else:  # Rectangular Matrix
+                data_all = deepcopy(data)
+                try:
+                    data = squareform(data_all[0, :])
+                except ValueError:
+                    print('Data is not flattened upper triangle from multiple '
+                          'similarity/distance matrices or flattened directed '
+                          'matrices.')
+                is_single_matrix = False
+
+            # Test if matrix is symmetrical
+            if np.all(data[np.triu_indices(data.shape[0], k=1)] == data.T[np.triu_indices(data.shape[0], k=1)]):
+                issymmetric = True
+            else:
+                issymmetric = False
+
+            # Determine matrix type
+            if issymmetric:
+                if np.sum(np.diag(data)) == 0:
+                    matrix_type = 'distance'
+                elif np.sum(np.diag(data)) == data.shape[0]:
+                    matrix_type = 'similarity'
+                data = data[np.triu_indices(data.shape[0], k=1)]
+            else:
+                matrix_type = 'directed'
+                data = data.flatten()
+
+            if not is_single_matrix:
+                data = data_all
+
+        return (data, issymmetric, matrix_type, is_single_matrix)
+
+    def isempty(self):
+        '''Check if Adjacency object is empty'''
+        return bool(self.matrix_type is 'empty')
+
+    def squareform(self):
+        '''Convert adjacency back to squareform'''
+        if self.issymmetric:
+            if self.is_single_matrix:
+                return squareform(self.data)
+            else:
+                return [squareform(x.data) for x in self]
+        else:
+            if self.is_single_matrix:
+                return self.data.reshape(int(np.sqrt(self.data.shape[0])),
+                                         int(np.sqrt(self.data.shape[0])))
+            else:
+                return [x.data.reshape(int(np.sqrt(x.data.shape[0])),
+                            int(np.sqrt(x.data.shape[0]))) for x in self]
+
+    def plot(self, limit=3, **kwargs):
+        ''' Create Heatmap of Adjacency Matrix'''
+        if self.is_single_matrix:
+            return sns.heatmap(self.squareform(), square=True, **kwargs)
+        else:
+            f, a = plt.subplots(limit)
+            for i in range(limit):
+                sns.heatmap(self[i].squareform(), square=True, ax=a[i],
+                            **kwargs)
+            return f
+
+    def mean(self, axis=0):
+        ''' Calculate mean of Adjacency
+
+        Args:
+            axis:  calculate mean over features (0) or data (1).
+                    For data it will be on upper triangle.
+
+        Returns:
+            mean:  float if single, adjacency if axis=0, np.array if axis=1
+                    and multiple
+
+        '''
+
+        if self.is_single_matrix:
+            return np.mean(self.data)
+        else:
+            if axis == 0:
+                return Adjacency(data=np.mean(self.data, axis=axis),
+                                 matrix_type=self.matrix_type + '_flat')
+            elif axis == 1:
+                return np.mean(self.data, axis=axis)
+
+    def std(self, axis=0):
+        ''' Calculate standard deviation of Adjacency
+
+        Args:
+            axis:  calculate std over features (0) or data (1).
+                    For data it will be on upper triangle.
+
+        Returns:
+            std:  float if single, adjacency if axis=0, np.array if axis=1 and
+                    multiple
+
+        '''
+
+        if self.is_single_matrix:
+            return np.std(self.data)
+        else:
+            if axis == 0:
+                return Adjacency(data=np.std(self.data, axis=axis),
+                                 matrix_type=self.matrix_type + '_flat')
+            elif axis == 1:
+                return np.std(self.data, axis=axis)
+
+    def shape(self):
+        ''' Calculate shape of data. '''
+        return self.data.shape
+
+    def square_shape(self):
+        ''' Calculate shape of squareform data. '''
+        if self.matrix_type is 'empty':
+            return np.array([])
+        else:
+            if self.is_single_matrix:
+                return self.squareform().shape
+            else:
+                return self[0].squareform().shape
+
+    def copy(self):
+        ''' Create a copy of Adjacency object.'''
+        return deepcopy(self)
+
+    def append(self, data):
+        ''' Append data to Adjacency instance
+
+        Args:
+            data:  Adjacency instance to append
+
+        Returns:
+            out: new appended Adjacency instance
+
+        '''
+
+        if not isinstance(data, Adjacency):
+            raise ValueError('Make sure data is a Adjacency instance.')
+
+        if self.isempty():
+            out = data.copy()
+        else:
+            out = self.copy()
+            if self.square_shape() != data.square_shape():
+                raise ValueError('Data is not the same shape as Adjacency '
+                                 'instance.')
+
+            out.data = np.vstack([self.data, data.data])
+            out.is_single_matrix = False
+            if out.Y.size:
+                out.Y = self.Y.append(data.Y)
+
+        return out
+
+    def write(self, file_name, method='long'):
+        ''' Write out Adjacency object to csv file.
+
+            Args:
+                file_name (str):  name of file name to write
+                method (str):     method to write out data ['long','square']
+
+        '''
+        if method not in ['long', 'square']:
+            raise ValueError('Make sure method is ["long","square"].')
+        if self.is_single_matrix:
+            if method is 'long':
+                out = pd.DataFrame(self.data).to_csv(file_name, index=None)
+            elif method is 'square':
+                out = pd.DataFrame(self.squareform()).to_csv(file_name,
+                                                             index=None)
+        else:
+            if method is 'long':
+                out = pd.DataFrame(self.data).to_csv(file_name, index=None)
+            elif method is 'square':
+                raise NotImplementedError('Need to decide how we should write '
+                                          'out multiple matrices.  As separate '
+                                          'files?')
+
+    def similarity(self, data, plot=False, **kwargs):
+        ''' Calculate similarity between two Adjacency matrices.
+        Default is to use spearman correlation and permutation test.'''
+        if not isinstance(data, Adjacency):
+            data2 = Adjacency(data)
+        else:
+            data2 = data.copy()
+        if self.is_single_matrix:
+            if plot:
+                plot_stacked_adjacency(self, data)
+            return correlation_permutation(self.data, data2.data, **kwargs)
+        else:
+            if plot:
+                _, a = plt.subplots(len(self))
+                for i in a:
+                    plot_stacked_adjacency(self, data, ax=i)
+            return [correlation_permutation(x.data, data2.data, **kwargs) for x in self]
+
+    def distance(self, method='correlation', **kwargs):
+        ''' Calculate distance between images within an Adjacency() instance.
+
+        Args:
+            method: type of distance metric (can use any scikit learn or
+                    sciypy metric)
+
+        Returns:
+            dist: Outputs a 2D distance matrix.
+
+        '''
+        return Adjacency(pairwise_distances(self.data, metric=method, **kwargs),
+                         matrix_type='distance')
+
+    def threshold(self, thresh=0, binarize=False):
+        '''Threshold Adjacency instance
+
+        Args:
+            thresh: cutoff to threshold image (float).  if 'threshold'=50%,
+                    will calculate percentile.
+            binarize (bool): if 'binarize'=True then binarize output
+        Returns:
+            Brain_Data: thresholded Brain_Data instance
+
+        '''
+
+        b = self.copy()
+        if isinstance(thresh, six.string_types):
+            if thresh[-1] is '%':
+                thresh = np.percentile(b.data, float(thresh[:-1]))
+        if binarize:
+            b.data = b.data > thresh
+        else:
+            b.data[b.data < thresh] = 0
+        return b
+
+    def to_graph(self):
+        ''' Convert Adjacency into networkx graph.  only works on
+            single_matrix for now.'''
+
+        if self.is_single_matrix:
+            if self.matrix_type == 'directed':
+                return nx.DiGraph(self.squareform())
+            else:
+                return nx.Graph(self.squareform())
+        else:
+            raise NotImplementedError('This function currently only works on '
+                                      'single matrices.')
+
+    def ttest(self, **kwargs):
+        ''' Calculate ttest across samples. '''
+        if self.is_single_matrix:
+            raise ValueError('t-test cannot be run on single matrices.')
+        m = []; p = []
+        for i in range(self.data.shape[1]):
+            stats = one_sample_permutation(self.data[:, i], **kwargs)
+            m.append(stats['mean'])
+            p.append(stats['p'])
+        mn = Adjacency(np.array(m))
+        pval = Adjacency(np.array(p))
+        return (mn, pval)
+
+    def plot_label_distance(self, labels, ax=None):
+        ''' Create a violin plot indicating within and between label distance
+
+            Args:
+                labels (np.array):  numpy array of labels to plot
+
+            Returns:
+                violin plot handles
+
+        '''
+
+        if not self.is_single_matrix:
+            raise ValueError('This function only works on single adjacency '
+                             'matrices.')
+
+        distance = pd.DataFrame(self.squareform())
+
+        if len(labels) != distance.shape[0]:
+            raise ValueError('Labels must be same length as distance matrix')
+
+        within = []; between = []
+        out = pd.DataFrame(columns=['Distance', 'Group', 'Type'], index=None)
+        for i in np.unique(labels):
+            tmp_w = pd.DataFrame(columns=out.columns, index=None)
+            tmp_w['Distance'] = distance.loc[labels == i, labels == i].values[np.triu_indices(sum(labels == i), k=1)]
+            tmp_w['Type'] = 'Within'
+            tmp_w['Group'] = i
+            tmp_b = pd.DataFrame(columns=out.columns, index=None)
+            tmp_b['Distance'] = distance.loc[labels != i, labels != i].values[np.triu_indices(sum(labels == i), k=1)]
+            tmp_b['Type'] = 'Between'
+            tmp_b['Group'] = i
+            out = out.append(tmp_w).append(tmp_b)
+        f = sns.violinplot(x="Group", y="Distance", hue="Type", data=out, split=True, inner='quartile',
+              palette={"Within": "lightskyblue", "Between": "red"}, ax=ax)
+        f.set_ylabel('Average Distance')
+        f.set_title('Average Group Distance')
+        return f
+
+    def stats_label_distance(self, labels, n_permute=5000, n_jobs=-1):
+        ''' Calculate permutation tests on within and between label distance.
+
+            Args:
+                labels (np.array):  numpy array of labels to plot
+                n_permute (int): number of permutations to run (default=5000)
+
+            Returns:
+                dict:  dictionary of within and between group differences
+                        and p-values
+
+        '''
+
+        if not self.is_single_matrix:
+            raise ValueError('This function only works on single adjacency '
+                             'matrices.')
+
+        distance = pd.DataFrame(self.squareform())
+
+        if len(labels) != distance.shape[0]:
+            raise ValueError('Labels must be same length as distance matrix')
+
+        within = []; between = []
+        out = pd.DataFrame(columns=['Distance', 'Group', 'Type'], index=None)
+        for i in np.unique(labels):
+            tmp_w = pd.DataFrame(columns=out.columns, index=None)
+            tmp_w['Distance'] = distance.loc[labels == i, labels == i].values[np.triu_indices(sum(labels == i), k=1)]
+            tmp_w['Type'] = 'Within'
+            tmp_w['Group'] = i
+            tmp_b = pd.DataFrame(columns=out.columns, index=None)
+            tmp_b['Distance'] = distance.loc[labels == i, labels != i].values.flatten()
+            tmp_b['Type'] = 'Between'
+            tmp_b['Group'] = i
+            out = out.append(tmp_w).append(tmp_b)
+        stats = dict()
+        for i in np.unique(labels):
+            # Within group test
+            tmp1 = out.loc[(out['Group'] == i) & (out['Type'] == 'Within'), 'Distance']
+            tmp2 = out.loc[(out['Group'] == i) & (out['Type'] == 'Between'), 'Distance']
+            stats[str(i)] = two_sample_permutation(tmp1, tmp2,
+                                        n_permute=n_permute, n_jobs=n_jobs)
+        return stats
+
+    def plot_silhouette(self, labels, ax=None, permutation_test=True,
+                        n_permute=5000, **kwargs):
+        '''Create a silhouette plot'''
+        distance = pd.DataFrame(self.squareform())
+
+        if len(labels) != distance.shape[0]:
+            raise ValueError('Labels must be same length as distance matrix')
+
+        (f,outAll) = plot_silhouette(distance, labels, ax=None,
+                                    permutation_test=True,
+                                    n_permute=5000, **kwargs)
+        return (f,outAll)
+
+    def bootstrap(self, function, n_samples=5000, save_weights=False,
+                    n_jobs=-1, *args, **kwargs):
+        '''Bootstrap an Adjacency method.
+
+            Example Useage:
+            b = dat.bootstrap('mean', n_samples=5000)
+            b = dat.bootstrap('predict', n_samples=5000, algorithm='ridge')
+            b = dat.bootstrap('predict', n_samples=5000, save_weights=True)
+
+        Args:
+            function: (str) method to apply to data for each bootstrap
+            n_samples: (int) number of samples to bootstrap with replacement
+            save_weights: (bool) Save each bootstrap iteration
+                        (useful for aggregating many bootstraps on a cluster)
+            n_jobs: (int) The number of CPUs to use to do the computation.
+                        -1 means all CPUs.Returns:
+        output: summarized studentized bootstrap output
+
+        '''
+
+        bootstrapped = Parallel(n_jobs=n_jobs)(
+                        delayed(_bootstrap_apply_func)(self,
+                        function, *args, **kwargs) for i in range(n_samples))
+        bootstrapped = Adjacency(bootstrapped)
+        return summarize_bootstrap(bootstrapped, save_weights=save_weights)
diff --git a/nltools/data.py b/nltools/data/brain_data.py
similarity index 57%
rename from nltools/data.py
rename to nltools/data/brain_data.py
index f6706360..631fcae4 100644
--- a/nltools/data.py
+++ b/nltools/data/brain_data.py
@@ -11,23 +11,43 @@
 # Notes:
 # Need to figure out how to speed up loading and resampling of data
 
-__all__ = ['Brain_Data',
-            'Adjacency',
-            'Groupby',
-            'Design_Matrix',
-            'Design_Matrix_Series']
 __author__ = ["Luke Chang"]
 __license__ = "MIT"
 
-import os
 import pickle # import cPickle
+from nilearn.signal import clean
+from scipy.stats import ttest_1samp, t, norm
+from scipy.signal import detrend
+from scipy.spatial.distance import squareform
+import os
+import shutil
 import nibabel as nib
+import seaborn as sns
+import matplotlib.pyplot as plt
+import numpy as np
+import pandas as pd
+import warnings
+import tempfile
+from copy import deepcopy
+import six
+from sklearn.metrics.pairwise import pairwise_distances
+from pynv import Client
+from joblib import Parallel, delayed
+from nltools.mask import expand_mask, collapse_mask
+from nltools.analysis import Roc
+from nilearn.input_data import NiftiMasker
+from nilearn.image import resample_img
+from nilearn.masking import intersect_masks
+from nilearn.regions import connected_regions, connected_label_regions
+from nilearn.plotting.img_plotting import plot_epi, plot_roi, plot_stat_map
 from nltools.utils import (get_resource_path,
                             set_algorithm,
                             get_anatomical,
                             make_cosine_basis,
                             glover_hrf,
-                            attempt_to_import)
+                            attempt_to_import,
+                            concatenate,
+                            _bootstrap_apply_func)
 from nltools.cross_validation import set_cv
 from nltools.plotting import (dist_from_hyperplane_plot,
                               scatterplot,
@@ -47,34 +67,10 @@
                            zscore,
                            transform_pairwise,
                            summarize_bootstrap)
-from nltools.mask import expand_mask, collapse_mask
-from nltools.analysis import Roc
-from nilearn.input_data import NiftiMasker
-from nilearn.image import resample_img
-from nilearn.masking import intersect_masks
-from nilearn.regions import connected_regions, connected_label_regions
-from nilearn.plotting.img_plotting import plot_epi, plot_roi, plot_stat_map
-from nilearn.signal import clean
-from copy import deepcopy
-import pandas as pd
-from pandas import DataFrame, Series
-import numpy as np
-from scipy.stats import ttest_1samp, t, norm
-from scipy.signal import detrend
-from scipy.spatial.distance import squareform
-import six
-from sklearn.metrics.pairwise import pairwise_distances
 from nltools.pbs_job import PBS_Job
-import warnings
-import shutil
-import tempfile
-import seaborn as sns
-from pynv import Client
-import matplotlib.pyplot as plt
-from joblib import Parallel, delayed
+from .adjacency import Adjacency
 
 # Optional dependencies
-nx = attempt_to_import('networkx', 'nx')
 mne_stats = attempt_to_import('mne.stats',name='mne_stats', fromlist=
                                     ['spatio_temporal_cluster_1samp_test',
                                      'ttest_1samp_no_p'])
@@ -1301,575 +1297,6 @@ def bootstrap(self, function, n_samples=5000, save_weights=False,
         bootstrapped = Brain_Data(bootstrapped)
         return summarize_bootstrap(bootstrapped, save_weights=save_weights)
 
-class Adjacency(object):
-
-    '''
-    Adjacency is a class to represent Adjacency matrices as a vector rather
-    than a 2-dimensional matrix. This makes it easier to perform data
-    manipulation and analyses.
-
-    Args:
-        data: pandas data instance or list of files
-        matrix_type: (str) type of matrix.  Possible values include:
-                    ['distance','similarity','directed','distance_flat',
-                    'similarity_flat','directed_flat']
-        Y: Pandas DataFrame of training labels
-        **kwargs: Additional keyword arguments
-
-    '''
-
-    def __init__(self, data=None, Y=None, matrix_type=None, **kwargs):
-        if matrix_type is not None:
-            if matrix_type.lower() not in ['distance','similarity','directed','distance_flat','similarity_flat','directed_flat']:
-                raise ValueError("matrix_type must be [None,'distance', "
-                                "'similarity','directed','distance_flat', "
-                                "'similarity_flat','directed_flat']")
-
-        if data is None:
-            self.data = np.array([])
-            self.matrix_type = 'empty'
-            self.is_single_matrix = np.nan
-            self.issymmetric = np.nan
-        elif isinstance(data, list):
-            if isinstance(data[0], Adjacency):
-                tmp = concatenate(data)
-                for item in ['data', 'matrix_type', 'Y','issymmetric']:
-                    setattr(self, item, getattr(tmp,item))
-            else:
-                d_all = []; symmetric_all = []; matrix_type_all = []
-                for d in data:
-                    data_tmp, issymmetric_tmp, matrix_type_tmp, _ = self._import_single_data(d,matrix_type=matrix_type)
-                    d_all.append(data_tmp)
-                    symmetric_all.append(issymmetric_tmp)
-                    matrix_type_all.append(matrix_type_tmp)
-                if not all_same(symmetric_all):
-                    raise ValueError('Not all matrices are of the same symmetric '
-                                    'type.')
-                if not all_same(matrix_type_all):
-                    raise ValueError('Not all matrices are of the same matrix '
-                                    'type.')
-                self.data = np.array(d_all)
-                self.issymmetric = symmetric_all[0]
-                self.matrix_type = matrix_type_all[0]
-            self.is_single_matrix = False
-        else:
-            self.data, self.issymmetric, self.matrix_type, self.is_single_matrix = self._import_single_data(data, matrix_type=matrix_type)
-
-        if Y is not None:
-            if isinstance(Y, six.string_types):
-                if os.path.isfile(Y):
-                    Y = pd.read_csv(Y, header=None, index_col=None)
-            if isinstance(Y, pd.DataFrame):
-                if self.data.shape[0] != len(Y):
-                    raise ValueError("Y does not match the correct size of "
-                                     "data")
-                self.Y = Y
-            else:
-                raise ValueError("Make sure Y is a pandas data frame.")
-        else:
-            self.Y = pd.DataFrame()
-
-    def __repr__(self):
-        return ("%s.%s(shape=%s, square_shape=%s, Y=%s, is_symmetric=%s,"
-                "matrix_type=%s)") % (
-                    self.__class__.__module__,
-                    self.__class__.__name__,
-                    self.shape(),
-                    self.square_shape(),
-                    len(self.Y),
-                    self.issymmetric,
-                    self.matrix_type)
-
-    def __getitem__(self,index):
-        new = self.copy()
-        if isinstance(index, int):
-            new.data = np.array(self.data[index, :]).flatten()
-            new.is_single_matrix = True
-        else:
-            new.data = np.array(self.data[index, :])
-        if not self.Y.empty:
-            new.Y = self.Y.iloc[index]
-        return new
-
-    def __len__(self):
-        if self.is_single_matrix:
-            return 1
-        else:
-            return self.data.shape[0]
-
-    def __iter__(self):
-        for x in range(len(self)):
-            yield self[x]
-
-    def __add__(self, y):
-        new = deepcopy(self)
-        if isinstance(y, (int, float)):
-            new.data = new.data + y
-        if isinstance(y, Adjacency):
-            if self.shape() != y.shape():
-                raise ValueError('Both Adjacency() instances need to be the '
-                                 'same shape.')
-            new.data = new.data + y.data
-        return new
-
-    def __sub__(self, y):
-        new = deepcopy(self)
-        if isinstance(y, (int, float)):
-            new.data = new.data - y
-        if isinstance(y, Adjacency):
-            if self.shape() != y.shape():
-                raise ValueError('Both Adjacency() instances need to be the '
-                                 'same shape.')
-            new.data = new.data - y.data
-        return new
-
-    def __mul__(self, y):
-        new = deepcopy(self)
-        if isinstance(y, (int, float)):
-            new.data = new.data * y
-        if isinstance(y, Adjacency):
-            if self.shape() != y.shape():
-                raise ValueError('Both Adjacency() instances need to be the '
-                                 'same shape.')
-            new.data = np.multiply(new.data, y.data)
-        return new
-
-    def _import_single_data(self, data, matrix_type=None):
-        ''' Helper function to import single data matrix.'''
-
-        if isinstance(data, six.string_types):
-            if os.path.isfile(data):
-                data = pd.read_csv(data)
-            else:
-                raise ValueError('Make sure you have specified a valid file '
-                                 'path.')
-
-        def test_is_single_matrix(data):
-            if len(data.shape) == 1:
-                return True
-            else:
-                return False
-
-        if matrix_type is not None:
-            if matrix_type.lower() == 'distance_flat':
-                matrix_type = 'distance'
-                data = np.array(data)
-                issymmetric = True
-                is_single_matrix = test_is_single_matrix(data)
-            elif matrix_type.lower() == 'similarity_flat':
-                matrix_type = 'similarity'
-                data = np.array(data)
-                issymmetric = True
-                is_single_matrix = test_is_single_matrix(data)
-            elif matrix_type.lower() == 'directed_flat':
-                matrix_type = 'directed'
-                data = np.array(data).flatten()
-                issymmetric = False
-                is_single_matrix = test_is_single_matrix(data)
-            elif matrix_type.lower() in ['distance', 'similarity', 'directed']:
-                if data.shape[0] != data.shape[1]:
-                    raise ValueError('Data matrix must be square')
-                data = np.array(data)
-                matrix_type = matrix_type.lower()
-                if matrix_type in ['distance', 'similarity']:
-                    issymmetric = True
-                    data = data[np.triu_indices(data.shape[0], k=1)]
-                else:
-                    issymmetric = False
-                    if isinstance(data, pd.DataFrame):
-                        data = data.values.flatten()
-                    elif isinstance(data, np.ndarray):
-                        data = data.flatten()
-                is_single_matrix = True
-        else:
-            if len(data.shape) == 1:  # Single Vector
-                try:
-                    data = squareform(data)
-                except ValueError:
-                    print('Data is not flattened upper triangle from '
-                          'similarity/distance matrix or flattened directed '
-                          'matrix.')
-                is_single_matrix = True
-            elif data.shape[0] == data.shape[1]:  # Square Matrix
-                is_single_matrix = True
-            else:  # Rectangular Matrix
-                data_all = deepcopy(data)
-                try:
-                    data = squareform(data_all[0, :])
-                except ValueError:
-                    print('Data is not flattened upper triangle from multiple '
-                          'similarity/distance matrices or flattened directed '
-                          'matrices.')
-                is_single_matrix = False
-
-            # Test if matrix is symmetrical
-            if np.all(data[np.triu_indices(data.shape[0], k=1)] == data.T[np.triu_indices(data.shape[0], k=1)]):
-                issymmetric = True
-            else:
-                issymmetric = False
-
-            # Determine matrix type
-            if issymmetric:
-                if np.sum(np.diag(data)) == 0:
-                    matrix_type = 'distance'
-                elif np.sum(np.diag(data)) == data.shape[0]:
-                    matrix_type = 'similarity'
-                data = data[np.triu_indices(data.shape[0], k=1)]
-            else:
-                matrix_type = 'directed'
-                data = data.flatten()
-
-            if not is_single_matrix:
-                data = data_all
-
-        return (data, issymmetric, matrix_type, is_single_matrix)
-
-    def isempty(self):
-        '''Check if Adjacency object is empty'''
-        return bool(self.matrix_type is 'empty')
-
-    def squareform(self):
-        '''Convert adjacency back to squareform'''
-        if self.issymmetric:
-            if self.is_single_matrix:
-                return squareform(self.data)
-            else:
-                return [squareform(x.data) for x in self]
-        else:
-            if self.is_single_matrix:
-                return self.data.reshape(int(np.sqrt(self.data.shape[0])),
-                                         int(np.sqrt(self.data.shape[0])))
-            else:
-                return [x.data.reshape(int(np.sqrt(x.data.shape[0])),
-                            int(np.sqrt(x.data.shape[0]))) for x in self]
-
-    def plot(self, limit=3, **kwargs):
-        ''' Create Heatmap of Adjacency Matrix'''
-        if self.is_single_matrix:
-            return sns.heatmap(self.squareform(), square=True, **kwargs)
-        else:
-            f, a = plt.subplots(limit)
-            for i in range(limit):
-                sns.heatmap(self[i].squareform(), square=True, ax=a[i],
-                            **kwargs)
-            return f
-
-    def mean(self, axis=0):
-        ''' Calculate mean of Adjacency
-
-        Args:
-            axis:  calculate mean over features (0) or data (1).
-                    For data it will be on upper triangle.
-
-        Returns:
-            mean:  float if single, adjacency if axis=0, np.array if axis=1
-                    and multiple
-
-        '''
-
-        if self.is_single_matrix:
-            return np.mean(self.data)
-        else:
-            if axis == 0:
-                return Adjacency(data=np.mean(self.data, axis=axis),
-                                 matrix_type=self.matrix_type + '_flat')
-            elif axis == 1:
-                return np.mean(self.data, axis=axis)
-
-    def std(self, axis=0):
-        ''' Calculate standard deviation of Adjacency
-
-        Args:
-            axis:  calculate std over features (0) or data (1).
-                    For data it will be on upper triangle.
-
-        Returns:
-            std:  float if single, adjacency if axis=0, np.array if axis=1 and
-                    multiple
-
-        '''
-
-        if self.is_single_matrix:
-            return np.std(self.data)
-        else:
-            if axis == 0:
-                return Adjacency(data=np.std(self.data, axis=axis),
-                                 matrix_type=self.matrix_type + '_flat')
-            elif axis == 1:
-                return np.std(self.data, axis=axis)
-
-    def shape(self):
-        ''' Calculate shape of data. '''
-        return self.data.shape
-
-    def square_shape(self):
-        ''' Calculate shape of squareform data. '''
-        if self.matrix_type is 'empty':
-            return np.array([])
-        else:
-            if self.is_single_matrix:
-                return self.squareform().shape
-            else:
-                return self[0].squareform().shape
-
-    def copy(self):
-        ''' Create a copy of Adjacency object.'''
-        return deepcopy(self)
-
-    def append(self, data):
-        ''' Append data to Adjacency instance
-
-        Args:
-            data:  Adjacency instance to append
-
-        Returns:
-            out: new appended Adjacency instance
-
-        '''
-
-        if not isinstance(data, Adjacency):
-            raise ValueError('Make sure data is a Adjacency instance.')
-
-        if self.isempty():
-            out = data.copy()
-        else:
-            out = self.copy()
-            if self.square_shape() != data.square_shape():
-                raise ValueError('Data is not the same shape as Adjacency '
-                                 'instance.')
-
-            out.data = np.vstack([self.data, data.data])
-            out.is_single_matrix = False
-            if out.Y.size:
-                out.Y = self.Y.append(data.Y)
-
-        return out
-
-    def write(self, file_name, method='long'):
-        ''' Write out Adjacency object to csv file.
-
-            Args:
-                file_name (str):  name of file name to write
-                method (str):     method to write out data ['long','square']
-
-        '''
-        if method not in ['long', 'square']:
-            raise ValueError('Make sure method is ["long","square"].')
-        if self.is_single_matrix:
-            if method is 'long':
-                out = pd.DataFrame(self.data).to_csv(file_name, index=None)
-            elif method is 'square':
-                out = pd.DataFrame(self.squareform()).to_csv(file_name,
-                                                             index=None)
-        else:
-            if method is 'long':
-                out = pd.DataFrame(self.data).to_csv(file_name, index=None)
-            elif method is 'square':
-                raise NotImplementedError('Need to decide how we should write '
-                                          'out multiple matrices.  As separate '
-                                          'files?')
-
-    def similarity(self, data, plot=False, **kwargs):
-        ''' Calculate similarity between two Adjacency matrices.
-        Default is to use spearman correlation and permutation test.'''
-        if not isinstance(data, Adjacency):
-            data2 = Adjacency(data)
-        else:
-            data2 = data.copy()
-        if self.is_single_matrix:
-            if plot:
-                plot_stacked_adjacency(self, data)
-            return correlation_permutation(self.data, data2.data, **kwargs)
-        else:
-            if plot:
-                _, a = plt.subplots(len(self))
-                for i in a:
-                    plot_stacked_adjacency(self, data, ax=i)
-            return [correlation_permutation(x.data, data2.data, **kwargs) for x in self]
-
-    def distance(self, method='correlation', **kwargs):
-        ''' Calculate distance between images within an Adjacency() instance.
-
-        Args:
-            method: type of distance metric (can use any scikit learn or
-                    sciypy metric)
-
-        Returns:
-            dist: Outputs a 2D distance matrix.
-
-        '''
-        return Adjacency(pairwise_distances(self.data, metric=method, **kwargs),
-                         matrix_type='distance')
-
-    def threshold(self, thresh=0, binarize=False):
-        '''Threshold Adjacency instance
-
-        Args:
-            thresh: cutoff to threshold image (float).  if 'threshold'=50%,
-                    will calculate percentile.
-            binarize (bool): if 'binarize'=True then binarize output
-        Returns:
-            Brain_Data: thresholded Brain_Data instance
-
-        '''
-
-        b = self.copy()
-        if isinstance(thresh, six.string_types):
-            if thresh[-1] is '%':
-                thresh = np.percentile(b.data, float(thresh[:-1]))
-        if binarize:
-            b.data = b.data > thresh
-        else:
-            b.data[b.data < thresh] = 0
-        return b
-
-    def to_graph(self):
-        ''' Convert Adjacency into networkx graph.  only works on
-            single_matrix for now.'''
-
-        if self.is_single_matrix:
-            if self.matrix_type == 'directed':
-                return nx.DiGraph(self.squareform())
-            else:
-                return nx.Graph(self.squareform())
-        else:
-            raise NotImplementedError('This function currently only works on '
-                                      'single matrices.')
-
-    def ttest(self, **kwargs):
-        ''' Calculate ttest across samples. '''
-        if self.is_single_matrix:
-            raise ValueError('t-test cannot be run on single matrices.')
-        m = []; p = []
-        for i in range(self.data.shape[1]):
-            stats = one_sample_permutation(self.data[:, i], **kwargs)
-            m.append(stats['mean'])
-            p.append(stats['p'])
-        mn = Adjacency(np.array(m))
-        pval = Adjacency(np.array(p))
-        return (mn, pval)
-
-    def plot_label_distance(self, labels, ax=None):
-        ''' Create a violin plot indicating within and between label distance
-
-            Args:
-                labels (np.array):  numpy array of labels to plot
-
-            Returns:
-                violin plot handles
-
-        '''
-
-        if not self.is_single_matrix:
-            raise ValueError('This function only works on single adjacency '
-                             'matrices.')
-
-        distance = pd.DataFrame(self.squareform())
-
-        if len(labels) != distance.shape[0]:
-            raise ValueError('Labels must be same length as distance matrix')
-
-        within = []; between = []
-        out = pd.DataFrame(columns=['Distance', 'Group', 'Type'], index=None)
-        for i in np.unique(labels):
-            tmp_w = pd.DataFrame(columns=out.columns, index=None)
-            tmp_w['Distance'] = distance.loc[labels == i, labels == i].values[np.triu_indices(sum(labels == i), k=1)]
-            tmp_w['Type'] = 'Within'
-            tmp_w['Group'] = i
-            tmp_b = pd.DataFrame(columns=out.columns, index=None)
-            tmp_b['Distance'] = distance.loc[labels != i, labels != i].values[np.triu_indices(sum(labels == i), k=1)]
-            tmp_b['Type'] = 'Between'
-            tmp_b['Group'] = i
-            out = out.append(tmp_w).append(tmp_b)
-        f = sns.violinplot(x="Group", y="Distance", hue="Type", data=out, split=True, inner='quartile',
-              palette={"Within": "lightskyblue", "Between": "red"}, ax=ax)
-        f.set_ylabel('Average Distance')
-        f.set_title('Average Group Distance')
-        return f
-
-    def stats_label_distance(self, labels, n_permute=5000, n_jobs=-1):
-        ''' Calculate permutation tests on within and between label distance.
-
-            Args:
-                labels (np.array):  numpy array of labels to plot
-                n_permute (int): number of permutations to run (default=5000)
-
-            Returns:
-                dict:  dictionary of within and between group differences
-                        and p-values
-
-        '''
-
-        if not self.is_single_matrix:
-            raise ValueError('This function only works on single adjacency '
-                             'matrices.')
-
-        distance = pd.DataFrame(self.squareform())
-
-        if len(labels) != distance.shape[0]:
-            raise ValueError('Labels must be same length as distance matrix')
-
-        within = []; between = []
-        out = pd.DataFrame(columns=['Distance', 'Group', 'Type'], index=None)
-        for i in np.unique(labels):
-            tmp_w = pd.DataFrame(columns=out.columns, index=None)
-            tmp_w['Distance'] = distance.loc[labels == i, labels == i].values[np.triu_indices(sum(labels == i), k=1)]
-            tmp_w['Type'] = 'Within'
-            tmp_w['Group'] = i
-            tmp_b = pd.DataFrame(columns=out.columns, index=None)
-            tmp_b['Distance'] = distance.loc[labels == i, labels != i].values.flatten()
-            tmp_b['Type'] = 'Between'
-            tmp_b['Group'] = i
-            out = out.append(tmp_w).append(tmp_b)
-        stats = dict()
-        for i in np.unique(labels):
-            # Within group test
-            tmp1 = out.loc[(out['Group'] == i) & (out['Type'] == 'Within'), 'Distance']
-            tmp2 = out.loc[(out['Group'] == i) & (out['Type'] == 'Between'), 'Distance']
-            stats[str(i)] = two_sample_permutation(tmp1, tmp2,
-                                        n_permute=n_permute, n_jobs=n_jobs)
-        return stats
-
-    def plot_silhouette(self, labels, ax=None, permutation_test=True,
-                        n_permute=5000, **kwargs):
-        '''Create a silhouette plot'''
-        distance = pd.DataFrame(self.squareform())
-
-        if len(labels) != distance.shape[0]:
-            raise ValueError('Labels must be same length as distance matrix')
-
-        (f,outAll) = plot_silhouette(distance, labels, ax=None,
-                                    permutation_test=True,
-                                    n_permute=5000, **kwargs)
-        return (f,outAll)
-
-    def bootstrap(self, function, n_samples=5000, save_weights=False,
-                    n_jobs=-1, *args, **kwargs):
-        '''Bootstrap an Adjacency method.
-
-            Example Useage:
-            b = dat.bootstrap('mean', n_samples=5000)
-            b = dat.bootstrap('predict', n_samples=5000, algorithm='ridge')
-            b = dat.bootstrap('predict', n_samples=5000, save_weights=True)
-
-        Args:
-            function: (str) method to apply to data for each bootstrap
-            n_samples: (int) number of samples to bootstrap with replacement
-            save_weights: (bool) Save each bootstrap iteration
-                        (useful for aggregating many bootstraps on a cluster)
-            n_jobs: (int) The number of CPUs to use to do the computation.
-                        -1 means all CPUs.Returns:
-        output: summarized studentized bootstrap output
-
-        '''
-
-        bootstrapped = Parallel(n_jobs=n_jobs)(
-                        delayed(_bootstrap_apply_func)(self,
-                        function, *args, **kwargs) for i in range(n_samples))
-        bootstrapped = Adjacency(bootstrapped)
-        return summarize_bootstrap(bootstrapped, save_weights=save_weights)
-
-
 class Groupby(object):
     def __init__(self, data, mask):
 
@@ -1942,439 +1369,3 @@ def combine(self, value_dict):
                 raise ValueError('No method for aggregation implented for %s '
                                  'yet.' % type(value_dict[i]))
         return out.sum()
-
-
-class Design_Matrix_Series(Series):
-
-    """
-    This is a sub-class of pandas series. While not having additional methods
-    of it's own required to retain normal slicing functionality for the
-    Design_Matrix class, i.e. how slicing is typically handled in pandas.
-    All methods should be called on Design_Matrix below.
-    """
-
-    @property
-    def _constructor(self):
-        return Design_Matrix_Series
-
-    @property
-    def _constructor_expanddim(self):
-        return Design_Matrix
-
-
-class Design_Matrix(DataFrame):
-
-    """Design_Matrix is a class to represent design matrices with convenience
-        functionality for convolution, upsampling and downsampling. It plays
-        nicely with Brain_Data and can be used to build an experimental design
-        to pass to Brain_Data's X attribute. It is essentially an enhanced
-        pandas df, with extra attributes and methods. Methods always return a
-        new design matrix instance.
-
-    Args:
-        convolved (list, optional): on what columns convolution has been performed; defaults to None
-        hasIntercept (bool, optional): whether the design matrix has an
-                            intercept column; defaults to False
-        sampling_rate (float, optional): sampling rate of each row in seconds (e.g. TR in neuroimaging); defaults to None
-
-    """
-
-    _metadata = ['sampling_rate', 'convolved', 'hasIntercept']
-
-    def __init__(self, *args, **kwargs):
-
-        sampling_rate = kwargs.pop('sampling_rate',None)
-        convolved = kwargs.pop('convolved', None)
-        hasIntercept = kwargs.pop('hasIntercept', False)
-        self.sampling_rate = sampling_rate
-        self.convolved = convolved
-        self.hasIntercept = hasIntercept
-
-        super(Design_Matrix, self).__init__(*args, **kwargs)
-
-    @property
-    def _constructor(self):
-        return Design_Matrix
-
-    @property
-    def _constructor_sliced(self):
-        return Design_Matrix_Series
-
-
-    def info(self):
-        """Print class meta data.
-
-        """
-        return '%s.%s(sampling_rate=%s, shape=%s, convolved=%s, hasIntercept=%s)' % (
-            self.__class__.__module__,
-            self.__class__.__name__,
-            self.sampling_rate,
-            self.shape,
-            self.convolved,
-            self.hasIntercept
-            )
-
-    def append(self, df, axis, **kwargs):
-        """Method for concatenating another design matrix row or column-wise.
-            Can "uniquify" certain columns when appending row-wise, and by
-            default will attempt to do that with the intercept.
-
-        Args:
-            axis (int): 0 for row-wise (vert-cat), 1 for column-wise (horz-cat)
-            separate (bool,optional): whether try and uniquify columns;
-                                        defaults to True; only applies
-                                        when axis==0
-            addIntercept (bool,optional): whether to add intercepts to matrices
-                                        before appending; defaults to False;
-                                        only applies when axis==0
-            uniqueCols (list,optional): what additional columns to try to keep
-                                        separated by uniquifying; defaults to
-                                        intercept only; only applies when
-                                        axis==0
-
-        """
-        if axis == 1:
-            return self.horzcat(df)
-        elif axis == 0:
-            return self.vertcat(df, **kwargs)
-        else:
-            raise ValueError("Axis must be 0 (row) or 1 (column)")
-
-
-    def horzcat(self, df):
-        """Used by .append(). Append another design matrix, column-wise
-            (horz cat). Always returns a new design_matrix.
-
-        """
-        if self.shape[0] != df.shape[0]:
-            raise ValueError("Can't append differently sized design matrices! "
-                             "Mat 1 has %s rows and Mat 2 has %s rows."
-                            % (self.shape[0]), df.shape[0])
-        out = pd.concat([self, df], axis=1)
-        out.sampling_rate = self.sampling_rate
-        out.convolved = self.convolved
-        out.hasIntercept = self.hasIntercept
-        return out
-
-
-    def vertcat(self, df, separate=True, addIntercept=False, uniqueCols=[]):
-        """Used by .append(). Append another design matrix row-wise (vert cat).
-            Always returns a new design matrix.
-
-        """
-
-        outdf = df.copy()
-        assert self.hasIntercept == outdf.hasIntercept, ("Intercepts are "
-                "ambigious. Both design matrices should match in whether "
-                "they do or don't have intercepts.")
-
-        if addIntercept:
-            self['intercept'] = 1
-            self.hasIntercept = True
-            outdf['intercept'] = 1
-            outdf.hasIntercept = True
-        if separate:
-            if self.hasIntercept:
-                uniqueCols += ['intercept']
-            idx_1 = []
-            idx_2 = []
-            for col in uniqueCols:
-                # To match substrings within column names, we loop over each
-                # element and search for the uniqueCol as a substring within it;
-                # first we do to check if the uniqueCol actually occurs in both
-                # design matrices, if so then we change it's name before
-                # concatenating
-                joint = set(self.columns) & set(outdf.columns)
-                shared = [elem for elem in joint if col in elem]
-                if shared:
-                    idx_1 += [i for i, elem in enumerate(self.columns) if col in elem]
-                    idx_2 += [i for i, elem in enumerate(outdf.columns) if col in elem]
-            aRename = {self.columns[elem]: '0_'+self.columns[elem] for i, elem in enumerate(idx_1)}
-            bRename = {outdf.columns[elem]: '1_'+outdf.columns[elem] for i, elem in enumerate(idx_2)}
-            if aRename:
-                out = self.rename(columns=aRename)
-                outdf = outdf.rename(columns=bRename)
-                colOrder = []
-                #retain original column order as closely as possible
-                for colA,colB in zip(out.columns, outdf.columns):
-                    colOrder.append(colA)
-                    if colA != colB:
-                        colOrder.append(colB)
-                out = super(Design_Matrix, out).append(outdf, ignore_index=True)
-                # out = out.append(outdf,separate=False,axis=0,ignore_index=True).fillna(0)
-                out = out[colOrder]
-            else:
-                raise ValueError("Separate concatentation impossible. None of "
-                                "the requested unique columns were found in "
-                                "both design_matrices.")
-        else:
-            out = super(Design_Matrix, self).append(outdf, ignore_index=True)
-            # out = self.append(df,separate=False,axis=0,ignore_index=True).fillna(0)
-
-        out.convolved = self.convolved
-        out.sampling_rate = self.sampling_rate
-        out.hasIntercept = self.hasIntercept
-
-        return out
-
-    def vif(self):
-        """Compute variance inflation factor amongst columns of design matrix,
-            ignoring the intercept. Much faster that statsmodels and more
-            reliable too. Uses the same method as Matlab and R (diagonal
-            elements of the inverted correlation matrix).
-
-        Returns:
-            vifs (list): list with length == number of columns - intercept
-
-        """
-        assert self.shape[1] > 1, "Can't compute vif with only 1 column!"
-        if self.hasIntercept:
-            idx = [i for i, elem in enumerate(self.columns) if 'intercept' not in elem]
-            out = self[self.columns[np.r_[idx]]]
-        else:
-            out = self[self.columns]
-
-        if any(out.corr() < 0):
-            warnings.warn("Correlation matrix has negative values!")
-
-        return np.diag(np.linalg.inv(out.corr()), 0)
-
-    def heatmap(self, figsize=(8, 6), **kwargs):
-        """Visualize Design Matrix spm style. Use .plot() for typical pandas
-            plotting functionality. Can pass optional keyword args to seaborn
-            heatmap.
-
-        """
-        fig, ax = plt.subplots(1, figsize=figsize)
-        ax = sns.heatmap(self, cmap='gray', cbar=False, ax=ax, **kwargs)
-        for _, spine in ax.spines.items():
-            spine.set_visible(True)
-        for i, label in enumerate(ax.get_yticklabels()):
-            if i == 0 or i == self.shape[0]-1:
-                label.set_visible(True)
-            else:
-                label.set_visible(False)
-        ax.axhline(linewidth=4, color="k")
-        ax.axvline(linewidth=4, color="k")
-        ax.axhline(y=self.shape[0], color='k', linewidth=4)
-        ax.axvline(x=self.shape[1], color='k', linewidth=4)
-        plt.yticks(rotation=0)
-
-    def convolve(self, conv_func='hrf', colNames=None):
-        """Perform convolution using an arbitrary function.
-
-        Args:
-            conv_func (ndarray or string): either a 1d numpy array containing output of a function that you want to convolve; a samples by kernel 2d array of several kernels to convolve; or th string 'hrf' which defaults to a glover HRF function at the Design_matrix's sampling_freq
-            colNames (list): what columns to perform convolution on; defaults
-                            to all skipping intercept, and columns containing 'poly' or 'cosine'
-
-        """
-        assert self.sampling_rate is not None, "Design_matrix has no sampling_rate set!"
-
-        if colNames is None:
-            colNames = [col for col in self.columns if 'intercept' not in col and 'poly' not in col and 'cosine' not in col]
-        nonConvolved = [col for col in self.columns if col not in colNames]
-
-        if isinstance(conv_func,six.string_types):
-            assert conv_func == 'hrf',"Did you mean 'hrf'? 'hrf' can generate a kernel for you, otherwise custom kernels should be passed in as 1d or 2d arrays."
-
-            assert self.sampling_rate is not None, "Design_matrix sampling rate not set. Can't figure out how to generate HRF!"
-            conv_func = glover_hrf(self.sampling_rate, oversampling=1)
-
-        else:
-            assert type(conv_func) == np.ndarray, 'Must provide a function for convolution!'
-
-        if len(conv_func.shape) > 1:
-            assert conv_func.shape[0] > conv_func.shape[1], '2d conv_func must be formatted as, samples X kernels!'
-            conv_mats = []
-            for i in range(conv_func.shape[1]):
-                c = self[colNames].apply(lambda x: np.convolve(x, conv_func[:,i])[:self.shape[0]])
-                c.columns = [str(col)+'_c'+str(i) for col in c.columns]
-                conv_mats.append(c)
-                out = pd.concat(conv_mats+ [self[nonConvolved]], axis=1)
-        else:
-            c = self[colNames].apply(lambda x: np.convolve(x, conv_func)[:self.shape[0]])
-            c.columns = [str(col)+'_c0' for col in c.columns]
-            out = pd.concat([c,self[nonConvolved]], axis=1)
-
-        out.convolved = colNames
-        out.sampling_rate = self.sampling_rate
-        out.hasIntercept = self.hasIntercept
-        return out
-
-    def downsample(self, target,**kwargs):
-        """Downsample columns of design matrix. Relies on
-            nltools.stats.downsample, but ensures that returned object is a
-            design matrix.
-
-        Args:
-            target(float): downsampling target, typically in samples not
-                            seconds
-            kwargs: additional inputs to nltools.stats.downsample
-
-        """
-        df = downsample(self, sampling_freq=self.sampling_rate, target=target, **kwargs)
-
-        # convert df to a design matrix
-        newMat = Design_Matrix(df, sampling_rate=target,
-                               convolved=self.convolved,
-                               hasIntercept=self.hasIntercept)
-        return newMat
-
-    def upsample(self, target,**kwargs):
-        """Upsample columns of design matrix. Relies on
-            nltools.stats.upsample, but ensures that returned object is a
-            design matrix.
-
-        Args:
-            target(float): downsampling target, typically in samples not
-                            seconds
-            kwargs: additional inputs to nltools.stats.downsample
-
-        """
-        df = upsample(self, sampling_freq=self.sampling_rate, target=target, **kwargs)
-
-        # convert df to a design matrix
-        newMat = Design_Matrix(df, sampling_rate=target,
-                               convolved=self.convolved,
-                               hasIntercept=self.hasIntercept)
-        return newMat
-
-    def zscore(self, colNames=[]):
-        """Z-score specific columns of design matrix. Relies on
-            nltools.stats.downsample, but ensures that returned object is a
-            design matrix.
-
-        Args:
-            colNames (list): columns to z-score; defaults to all columns
-
-        """
-        colOrder = self.columns
-        if not list(colNames):
-            colNames = self.columns
-        nonZ = [col for col in self.columns if col not in colNames]
-        df = zscore(self[colNames])
-        df = pd.concat([df, self[nonZ]], axis=1)
-        df = df[colOrder]
-        newMat = Design_Matrix(df, sampling_rate=self.sampling_rate,
-                               convolved=self.convolved,
-                               hasIntercept=self.hasIntercept)
-
-        return newMat
-
-    def addpoly(self, order=0, include_lower=True):
-        """Add nth order polynomial terms as columns to design matrix.
-
-        Args:
-            order (int): what order terms to add; 0 = constant/intercept
-                        (default), 1 = linear, 2 = quadratic, etc
-            include_lower: (bool) whether to add lower order terms if order > 0
-
-        """
-
-        #This method is kind of ugly
-        polyDict = {}
-        if include_lower:
-            if order > 0:
-                for i in range(0, order+1):
-                    if i == 0:
-                        if self.hasIntercept:
-                            warnings.warn("Design Matrix already has "
-                                          "intercept...skipping")
-                        else:
-                            polyDict['intercept'] = np.repeat(1, self.shape[0])
-                    else:
-                        polyDict['poly_' + str(i)] = (range(self.shape[0]) - np.mean(range(self.shape[0]))) ** i
-            else:
-                if self.hasIntercept:
-                    raise ValueError("Design Matrix already has intercept!")
-                else:
-                    polyDict['intercept'] = np.repeat(1, self.shape[0])
-        else:
-            if order == 0:
-                if self.hasIntercept:
-                    raise ValueError("Design Matrix already has intercept!")
-                else:
-                    polyDict['intercept'] = np.repeat(1, self.shape[0])
-            else:
-                polyDict['poly_'+str(order)] = (range(self.shape[0]) - np.mean(range(self.shape[0])))**order
-
-        toAdd = Design_Matrix(polyDict)
-        out = self.append(toAdd, axis=1)
-        if 'intercept' in polyDict.keys() or self.hasIntercept:
-            out.hasIntercept = True
-        return out
-
-    def add_dct_basis(self,duration=180):
-        """Adds cosine basis functions to Design_Matrix columns,
-        based on spm-style discrete cosine transform for use in
-        high-pass filtering.
-
-        Args:
-            duration (int): length of filter in seconds
-
-        """
-        assert self.sampling_rate is not None, "Design_Matrix has no sampling_rate set!"
-        basis_mat = make_cosine_basis(self.shape[0],self.sampling_rate,duration)
-
-        basis_frame = Design_Matrix(basis_mat,
-                                    sampling_rate=self.sampling_rate,
-                                    hasIntercept=False, convolved=False)
-
-        basis_frame.columns = ['cosine_'+str(i+1) for i in range(basis_frame.shape[1])]
-
-        out = self.append(basis_frame,axis=1)
-
-        return out
-
-def concatenate(data):
-    '''Concatenate a list of Brain_Data() or Adjacency() objects'''
-
-    if not isinstance(data, list):
-        raise ValueError('Make sure you are passing a list of objects.')
-
-    if all([type(x) for x in data]):
-        if not isinstance(data[0], (Brain_Data, Adjacency)):
-            raise ValueError('Make sure you are passing a list of Brain_Data or Adjacency objects.')
-
-        out = data[0].__class__()
-        for i in data:
-            out = out.append(i)
-    else:
-        raise ValueError('Make sure all objects in the list are the same type.')
-    return out
-
-def all_same(items):
-    return np.all(x == items[0] for x in items)
-
-def _vif(X, y):
-    """
-        DEPRECATED
-        Helper function to compute variance inflation factor. Unclear whether
-        there are errors with this method relative to stats.models. Seems like
-        stats.models is sometimes inconsistent with R. R always uses the
-        diagonals of the inverted covariance matrix which is what's implemented
-        instead of this.
-
-        Args:
-            X: (Dataframe) explanatory variables
-            y: (Dataframe/Series) outcome variable
-
-    """
-
-    b,resid, _, _ = np.linalg.lstsq(X, y)
-    SStot = y.var(ddof=0)*len(y)
-    if SStot == 0:
-        SStot = .0001  # to prevent divide by 0 errors
-    r2 = 1.0 - (resid/SStot)
-    if r2 == 1:
-        r2 = 0.9999  # to prevent divide by 0 errors
-    return (1.0/(1.0-r2))[0]
-
-def _bootstrap_apply_func(data, function, *args, **kwargs):
-    '''Bootstrap helper function. Sample with replacement and apply function'''
-    data_row_id = range(data.shape()[0])
-    new_dat = data[np.random.choice(data_row_id,
-                                   size=len(data_row_id),
-                                   replace=True)]
-    return getattr(new_dat, function)( *args, **kwargs)
diff --git a/nltools/data/design_matrix.py b/nltools/data/design_matrix.py
new file mode 100644
index 00000000..96c3b8a9
--- /dev/null
+++ b/nltools/data/design_matrix.py
@@ -0,0 +1,449 @@
+from __future__ import division
+
+'''
+NeuroLearn Data Classes
+=======================
+
+Classes to represent various types of data
+
+'''
+
+# Notes:
+# Need to figure out how to speed up loading and resampling of data
+
+__all__ = ['Brain_Data',
+            'Adjacency',
+            'Groupby',
+            'Design_Matrix',
+            'Design_Matrix_Series']
+__author__ = ["Luke Chang"]
+__license__ = "MIT"
+
+from pandas import DataFrame, Series
+import pandas as pd
+import numpy as np
+import seaborn as sns
+import warnings
+import six
+from nltools.utils import (make_cosine_basis,
+                            glover_hrf)
+from nltools.stats import (pearson,
+                           fdr,
+                           threshold,
+                           fisher_r_to_z,
+                           correlation_permutation,
+                           one_sample_permutation,
+                           two_sample_permutation,
+                           downsample,
+                           upsample,
+                           zscore,
+                           )
+class Design_Matrix_Series(Series):
+
+    """
+    This is a sub-class of pandas series. While not having additional methods
+    of it's own required to retain normal slicing functionality for the
+    Design_Matrix class, i.e. how slicing is typically handled in pandas.
+    All methods should be called on Design_Matrix below.
+    """
+
+    @property
+    def _constructor(self):
+        return Design_Matrix_Series
+
+    @property
+    def _constructor_expanddim(self):
+        return Design_Matrix
+
+
+class Design_Matrix(DataFrame):
+
+    """Design_Matrix is a class to represent design matrices with convenience
+        functionality for convolution, upsampling and downsampling. It plays
+        nicely with Brain_Data and can be used to build an experimental design
+        to pass to Brain_Data's X attribute. It is essentially an enhanced
+        pandas df, with extra attributes and methods. Methods always return a
+        new design matrix instance.
+
+    Args:
+        convolved (list, optional): on what columns convolution has been performed; defaults to None
+        hasIntercept (bool, optional): whether the design matrix has an
+                            intercept column; defaults to False
+        sampling_rate (float, optional): sampling rate of each row in seconds (e.g. TR in neuroimaging); defaults to None
+
+    """
+
+    _metadata = ['sampling_rate', 'convolved', 'hasIntercept']
+
+    def __init__(self, *args, **kwargs):
+
+        sampling_rate = kwargs.pop('sampling_rate', None)
+        convolved = kwargs.pop('convolved', None)
+        hasIntercept = kwargs.pop('hasIntercept', False)
+        self.sampling_rate = sampling_rate
+        self.convolved = convolved
+        self.hasIntercept = hasIntercept
+
+        super(Design_Matrix, self).__init__(*args, **kwargs)
+
+    @property
+    def _constructor(self):
+        return Design_Matrix
+
+    @property
+    def _constructor_sliced(self):
+        return Design_Matrix_Series
+
+
+    def info(self):
+        """Print class meta data.
+
+        """
+        return '%s.%s(sampling_rate=%s, shape=%s, convolved=%s, hasIntercept=%s)' % (
+            self.__class__.__module__,
+            self.__class__.__name__,
+            self.sampling_rate,
+            self.shape,
+            self.convolved,
+            self.hasIntercept
+            )
+
+    def append(self, df, axis, **kwargs):
+        """Method for concatenating another design matrix row or column-wise.
+            Can "uniquify" certain columns when appending row-wise, and by
+            default will attempt to do that with the intercept.
+
+        Args:
+            axis (int): 0 for row-wise (vert-cat), 1 for column-wise (horz-cat)
+            separate (bool,optional): whether try and uniquify columns;
+                                        defaults to True; only applies
+                                        when axis==0
+            addIntercept (bool,optional): whether to add intercepts to matrices
+                                        before appending; defaults to False;
+                                        only applies when axis==0
+            uniqueCols (list,optional): what additional columns to try to keep
+                                        separated by uniquifying; defaults to
+                                        intercept only; only applies when
+                                        axis==0
+
+        """
+        if axis == 1:
+            return self.horzcat(df)
+        elif axis == 0:
+            return self.vertcat(df, **kwargs)
+        else:
+            raise ValueError("Axis must be 0 (row) or 1 (column)")
+
+
+    def horzcat(self, df):
+        """Used by .append(). Append another design matrix, column-wise
+            (horz cat). Always returns a new design_matrix.
+
+        """
+        if self.shape[0] != df.shape[0]:
+            raise ValueError("Can't append differently sized design matrices! "
+                             "Mat 1 has %s rows and Mat 2 has %s rows."
+                            % (self.shape[0]), df.shape[0])
+        out = pd.concat([self, df], axis=1)
+        out.sampling_rate = self.sampling_rate
+        out.convolved = self.convolved
+        out.hasIntercept = self.hasIntercept
+        return out
+
+
+    def vertcat(self, df, separate=True, addIntercept=False, uniqueCols=[]):
+        """Used by .append(). Append another design matrix row-wise (vert cat).
+            Always returns a new design matrix.
+
+        """
+
+        outdf = df.copy()
+        assert self.hasIntercept == outdf.hasIntercept, ("Intercepts are "
+                "ambigious. Both design matrices should match in whether "
+                "they do or don't have intercepts.")
+
+        if addIntercept:
+            self['intercept'] = 1
+            self.hasIntercept = True
+            outdf['intercept'] = 1
+            outdf.hasIntercept = True
+        if separate:
+            if self.hasIntercept:
+                uniqueCols += ['intercept']
+            idx_1 = []
+            idx_2 = []
+            for col in uniqueCols:
+                # To match substrings within column names, we loop over each
+                # element and search for the uniqueCol as a substring within it;
+                # first we do to check if the uniqueCol actually occurs in both
+                # design matrices, if so then we change it's name before
+                # concatenating
+                joint = set(self.columns) & set(outdf.columns)
+                shared = [elem for elem in joint if col in elem]
+                if shared:
+                    idx_1 += [i for i, elem in enumerate(self.columns) if col in elem]
+                    idx_2 += [i for i, elem in enumerate(outdf.columns) if col in elem]
+            aRename = {self.columns[elem]: '0_'+self.columns[elem] for i, elem in enumerate(idx_1)}
+            bRename = {outdf.columns[elem]: '1_'+outdf.columns[elem] for i, elem in enumerate(idx_2)}
+            if aRename:
+                out = self.rename(columns=aRename)
+                outdf = outdf.rename(columns=bRename)
+                colOrder = []
+                #retain original column order as closely as possible
+                for colA,colB in zip(out.columns, outdf.columns):
+                    colOrder.append(colA)
+                    if colA != colB:
+                        colOrder.append(colB)
+                out = super(Design_Matrix, out).append(outdf, ignore_index=True)
+                # out = out.append(outdf,separate=False,axis=0,ignore_index=True).fillna(0)
+                out = out[colOrder]
+            else:
+                raise ValueError("Separate concatentation impossible. None of "
+                                "the requested unique columns were found in "
+                                "both design_matrices.")
+        else:
+            out = super(Design_Matrix, self).append(outdf, ignore_index=True)
+            # out = self.append(df,separate=False,axis=0,ignore_index=True).fillna(0)
+
+        out.convolved = self.convolved
+        out.sampling_rate = self.sampling_rate
+        out.hasIntercept = self.hasIntercept
+
+        return out
+
+    def vif(self):
+        """Compute variance inflation factor amongst columns of design matrix,
+            ignoring the intercept. Much faster that statsmodels and more
+            reliable too. Uses the same method as Matlab and R (diagonal
+            elements of the inverted correlation matrix).
+
+        Returns:
+            vifs (list): list with length == number of columns - intercept
+
+        """
+        assert self.shape[1] > 1, "Can't compute vif with only 1 column!"
+        if self.hasIntercept:
+            idx = [i for i, elem in enumerate(self.columns) if 'intercept' not in elem]
+            out = self[self.columns[np.r_[idx]]]
+        else:
+            out = self[self.columns]
+
+        if any(out.corr() < 0):
+            warnings.warn("Correlation matrix has negative values!")
+
+        return np.diag(np.linalg.inv(out.corr()), 0)
+
+    def heatmap(self, figsize=(8, 6), **kwargs):
+        """Visualize Design Matrix spm style. Use .plot() for typical pandas
+            plotting functionality. Can pass optional keyword args to seaborn
+            heatmap.
+
+        """
+        fig, ax = plt.subplots(1, figsize=figsize)
+        ax = sns.heatmap(self, cmap='gray', cbar=False, ax=ax, **kwargs)
+        for _, spine in ax.spines.items():
+            spine.set_visible(True)
+        for i, label in enumerate(ax.get_yticklabels()):
+            if i == 0 or i == self.shape[0]-1:
+                label.set_visible(True)
+            else:
+                label.set_visible(False)
+        ax.axhline(linewidth=4, color="k")
+        ax.axvline(linewidth=4, color="k")
+        ax.axhline(y=self.shape[0], color='k', linewidth=4)
+        ax.axvline(x=self.shape[1], color='k', linewidth=4)
+        plt.yticks(rotation=0)
+
+    def convolve(self, conv_func='hrf', colNames=None):
+        """Perform convolution using an arbitrary function.
+
+        Args:
+            conv_func (ndarray or string): either a 1d numpy array containing output of a function that you want to convolve; a samples by kernel 2d array of several kernels to convolve; or th string 'hrf' which defaults to a glover HRF function at the Design_matrix's sampling_freq
+            colNames (list): what columns to perform convolution on; defaults
+                            to all skipping intercept, and columns containing 'poly' or 'cosine'
+
+        """
+        assert self.sampling_rate is not None, "Design_matrix has no sampling_rate set!"
+
+        if colNames is None:
+            colNames = [col for col in self.columns if 'intercept' not in col and 'poly' not in col and 'cosine' not in col]
+        nonConvolved = [col for col in self.columns if col not in colNames]
+
+        if isinstance(conv_func,six.string_types):
+            assert conv_func == 'hrf',"Did you mean 'hrf'? 'hrf' can generate a kernel for you, otherwise custom kernels should be passed in as 1d or 2d arrays."
+
+            assert self.sampling_rate is not None, "Design_matrix sampling rate not set. Can't figure out how to generate HRF!"
+            conv_func = glover_hrf(self.sampling_rate, oversampling=1)
+
+        else:
+            assert type(conv_func) == np.ndarray, 'Must provide a function for convolution!'
+
+        if len(conv_func.shape) > 1:
+            assert conv_func.shape[0] > conv_func.shape[1], '2d conv_func must be formatted as, samples X kernels!'
+            conv_mats = []
+            for i in range(conv_func.shape[1]):
+                c = self[colNames].apply(lambda x: np.convolve(x, conv_func[:,i])[:self.shape[0]])
+                c.columns = [str(col)+'_c'+str(i) for col in c.columns]
+                conv_mats.append(c)
+                out = pd.concat(conv_mats+ [self[nonConvolved]], axis=1)
+        else:
+            c = self[colNames].apply(lambda x: np.convolve(x, conv_func)[:self.shape[0]])
+            c.columns = [str(col)+'_c0' for col in c.columns]
+            out = pd.concat([c,self[nonConvolved]], axis=1)
+
+        out.convolved = colNames
+        out.sampling_rate = self.sampling_rate
+        out.hasIntercept = self.hasIntercept
+        return out
+
+    def downsample(self, target,**kwargs):
+        """Downsample columns of design matrix. Relies on
+            nltools.stats.downsample, but ensures that returned object is a
+            design matrix.
+
+        Args:
+            target(float): downsampling target, typically in samples not
+                            seconds
+            kwargs: additional inputs to nltools.stats.downsample
+
+        """
+        df = downsample(self, sampling_freq=self.sampling_rate, target=target, **kwargs)
+
+        # convert df to a design matrix
+        newMat = Design_Matrix(df, sampling_rate=target,
+                               convolved=self.convolved,
+                               hasIntercept=self.hasIntercept)
+        return newMat
+
+    def upsample(self, target,**kwargs):
+        """Upsample columns of design matrix. Relies on
+            nltools.stats.upsample, but ensures that returned object is a
+            design matrix.
+
+        Args:
+            target(float): downsampling target, typically in samples not
+                            seconds
+            kwargs: additional inputs to nltools.stats.downsample
+
+        """
+        df = upsample(self, sampling_freq=self.sampling_rate, target=target, **kwargs)
+
+        # convert df to a design matrix
+        newMat = Design_Matrix(df, sampling_rate=target,
+                               convolved=self.convolved,
+                               hasIntercept=self.hasIntercept)
+        return newMat
+
+    def zscore(self, colNames=[]):
+        """Z-score specific columns of design matrix. Relies on
+            nltools.stats.downsample, but ensures that returned object is a
+            design matrix.
+
+        Args:
+            colNames (list): columns to z-score; defaults to all columns
+
+        """
+        colOrder = self.columns
+        if not list(colNames):
+            colNames = self.columns
+        nonZ = [col for col in self.columns if col not in colNames]
+        df = zscore(self[colNames])
+        df = pd.concat([df, self[nonZ]], axis=1)
+        df = df[colOrder]
+        newMat = Design_Matrix(df, sampling_rate=self.sampling_rate,
+                               convolved=self.convolved,
+                               hasIntercept=self.hasIntercept)
+
+        return newMat
+
+    def addpoly(self, order=0, include_lower=True):
+        """Add nth order polynomial terms as columns to design matrix.
+
+        Args:
+            order (int): what order terms to add; 0 = constant/intercept
+                        (default), 1 = linear, 2 = quadratic, etc
+            include_lower: (bool) whether to add lower order terms if order > 0
+
+        """
+
+        #This method is kind of ugly
+        polyDict = {}
+        if include_lower:
+            if order > 0:
+                for i in range(0, order+1):
+                    if i == 0:
+                        if self.hasIntercept:
+                            warnings.warn("Design Matrix already has "
+                                          "intercept...skipping")
+                        else:
+                            polyDict['intercept'] = np.repeat(1, self.shape[0])
+                    else:
+                        polyDict['poly_' + str(i)] = (range(self.shape[0]) - np.mean(range(self.shape[0]))) ** i
+            else:
+                if self.hasIntercept:
+                    raise ValueError("Design Matrix already has intercept!")
+                else:
+                    polyDict['intercept'] = np.repeat(1, self.shape[0])
+        else:
+            if order == 0:
+                if self.hasIntercept:
+                    raise ValueError("Design Matrix already has intercept!")
+                else:
+                    polyDict['intercept'] = np.repeat(1, self.shape[0])
+            else:
+                polyDict['poly_'+str(order)] = (range(self.shape[0]) - np.mean(range(self.shape[0])))**order
+
+        toAdd = Design_Matrix(polyDict)
+        out = self.append(toAdd, axis=1)
+        if 'intercept' in polyDict.keys() or self.hasIntercept:
+            out.hasIntercept = True
+        return out
+
+    def add_dct_basis(self,duration=180):
+        """Adds cosine basis functions to Design_Matrix columns,
+        based on spm-style discrete cosine transform for use in
+        high-pass filtering.
+
+        Args:
+            duration (int): length of filter in seconds
+
+        """
+        assert self.sampling_rate is not None, "Design_Matrix has no sampling_rate set!"
+        basis_mat = make_cosine_basis(self.shape[0],self.sampling_rate,duration)
+
+        basis_frame = Design_Matrix(basis_mat,
+                                    sampling_rate=self.sampling_rate,
+                                    hasIntercept=False, convolved=False)
+
+        basis_frame.columns = ['cosine_'+str(i+1) for i in range(basis_frame.shape[1])]
+
+        out = self.append(basis_frame,axis=1)
+
+        return out
+
+def all_same(items):
+    return np.all(x == items[0] for x in items)
+
+def _vif(X, y):
+    """
+        DEPRECATED
+        Helper function to compute variance inflation factor. Unclear whether
+        there are errors with this method relative to stats.models. Seems like
+        stats.models is sometimes inconsistent with R. R always uses the
+        diagonals of the inverted covariance matrix which is what's implemented
+        instead of this.
+
+        Args:
+            X: (Dataframe) explanatory variables
+            y: (Dataframe/Series) outcome variable
+
+    """
+
+    b,resid, _, _ = np.linalg.lstsq(X, y)
+    SStot = y.var(ddof=0)*len(y)
+    if SStot == 0:
+        SStot = .0001  # to prevent divide by 0 errors
+    r2 = 1.0 - (resid/SStot)
+    if r2 == 1:
+        r2 = 0.9999  # to prevent divide by 0 errors
+    return (1.0/(1.0-r2))[0]
diff --git a/nltools/utils.py b/nltools/utils.py
index 9c82f970..ef62b352 100644
--- a/nltools/utils.py
+++ b/nltools/utils.py
@@ -13,7 +13,12 @@
             'spm_time_derivative',
             'glover_time_derivative',
             'spm_dispersion_derivative',
-            'make_cosine_basis']
+            'make_cosine_basis',
+            'attempt_to_import',
+            'all_same',
+            'concatenate',
+            '_bootstrap_apply_func',
+            ]
 __author__ = ["Luke Chang"]
 __license__ = "MIT"
 
@@ -274,7 +279,8 @@ def make_cosine_basis(nsamples,sampling_freq,filter_length):
     #Drop constant
     C = C[:,1:]
     if C.size == 0:
-        raise ValueError('Basis function creation failed! nsamples is too small for requested filter_length.')
+        raise ValueError('Basis function creation failed!'
+                        ' nsamples is too small for requested filter_length.')
     else:
         return C
 
@@ -294,3 +300,33 @@ def attempt_to_import(dependency, name=None, fromlist=None):
         mod = None
     module_names[name] = Dependency(dependency, mod)
     return mod
+
+def all_same(items):
+    return np.all(x == items[0] for x in items)
+
+def concatenate(data):
+    '''Concatenate a list of Brain_Data() or Adjacency() objects'''
+
+    if not isinstance(data, list):
+        raise ValueError('Make sure you are passing a list of objects.')
+
+    if all([type(x) for x in data]):
+        # Temporarily Removing this for circular imports (LC)
+        # if not isinstance(data[0], (Brain_Data, Adjacency)):
+        #     raise ValueError('Make sure you are passing a list of Brain_Data'
+        #                     ' or Adjacency objects.')
+
+        out = data[0].__class__()
+        for i in data:
+            out = out.append(i)
+    else:
+        raise ValueError('Make sure all objects in the list are the same type.')
+    return out
+
+def _bootstrap_apply_func(data, function, *args, **kwargs):
+    '''Bootstrap helper function. Sample with replacement and apply function'''
+    data_row_id = range(data.shape()[0])
+    new_dat = data[np.random.choice(data_row_id,
+                                   size=len(data_row_id),
+                                   replace=True)]
+    return getattr(new_dat, function)( *args, **kwargs)

From cfcdd69c733c308e19691d10c6c17163a81fc2ef Mon Sep 17 00:00:00 2001
From: Luke Chang <lukejchang@gmail.com>
Date: Sun, 12 Nov 2017 17:21:21 -0500
Subject: [PATCH 2/2] moved all_same to utils

Former-commit-id: ba5f15d8a0e332d050388da2fbb11e0b3b5c2454
---
 nltools/data/design_matrix.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/nltools/data/design_matrix.py b/nltools/data/design_matrix.py
index 96c3b8a9..a94e1f9a 100644
--- a/nltools/data/design_matrix.py
+++ b/nltools/data/design_matrix.py
@@ -421,9 +421,6 @@ def add_dct_basis(self,duration=180):
 
         return out
 
-def all_same(items):
-    return np.all(x == items[0] for x in items)
-
 def _vif(X, y):
     """
         DEPRECATED