Skip to content

Commit

Permalink
Appending guarantees deterministic sort order. Rewrote and cleaned up…
Browse files Browse the repository at this point in the history
… Design Matrix tutorial.

Former-commit-id: 0fa3615
  • Loading branch information
ejolly committed Apr 28, 2018
1 parent 1663463 commit 774a3a6
Show file tree
Hide file tree
Showing 28 changed files with 571 additions and 281 deletions.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
78 changes: 44 additions & 34 deletions docs/auto_examples/01_DataOperations/plot_design_matrix.ipynb

Large diffs are not rendered by default.

201 changes: 146 additions & 55 deletions docs/auto_examples/01_DataOperations/plot_design_matrix.py

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -1 +1 @@
674c1c7e55385d8ebc2e109812f5c3b8
d25dc9c6791965dca7df05760d6549bf
284 changes: 181 additions & 103 deletions docs/auto_examples/01_DataOperations/plot_design_matrix.rst

Large diffs are not rendered by default.

Binary file not shown.
Binary file modified docs/auto_examples/01_DataOperations/plot_mask_codeobj.pickle
Binary file not shown.
Binary file not shown.
Binary file modified docs/auto_examples/02_Analysis/plot_decomposition_codeobj.pickle
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file modified docs/auto_examples/auto_examples_jupyter.zip
Binary file not shown.
Binary file modified docs/auto_examples/auto_examples_python.zip
Binary file not shown.
4 changes: 2 additions & 2 deletions docs/auto_examples/index.rst
Original file line number Diff line number Diff line change
Expand Up @@ -235,13 +235,13 @@ Neuroimaging Analysis Examples
.. container:: sphx-glr-download
:download:`Download all examples in Python source code: auto_examples_python.zip <//Users/lukechang/Github/nltools/docs/auto_examples/auto_examples_python.zip>`
:download:`Download all examples in Python source code: auto_examples_python.zip <//Users/Esh/Documents/Python/Cosan/nltools/docs/auto_examples/auto_examples_python.zip>`
.. container:: sphx-glr-download
:download:`Download all examples in Jupyter notebooks: auto_examples_jupyter.zip <//Users/lukechang/Github/nltools/docs/auto_examples/auto_examples_jupyter.zip>`
:download:`Download all examples in Jupyter notebooks: auto_examples_jupyter.zip <//Users/Esh/Documents/Python/Cosan/nltools/docs/auto_examples/auto_examples_jupyter.zip>`
.. only:: html
Expand Down
201 changes: 146 additions & 55 deletions examples/01_DataOperations/plot_design_matrix.py

Large diffs are not rendered by default.

57 changes: 39 additions & 18 deletions nltools/data/design_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,14 @@ def _inherit_attributes(self,
setattr(dm_out, item, getattr(self,item))
return dm_out

def _sort_cols(self):
"""
This is a helper function that tries to ensure that columns of a Design Matrix are sorted according to: a) those not separated during append operations, b) those separated during append operations, c) polynomials. Called primarily during vertical concatentation and cleaning.
"""
data_cols = [elem for elem in self.columns if not elem.split('_')[0].isdigit() and elem not in self.polys]
separated_cols = [elem for elem in self.columns if elem.split('_')[0].isdigit() and elem not in self.polys]
return self[data_cols + separated_cols + self.polys]

def details(self):
"""Print class meta data.
Expand Down Expand Up @@ -144,7 +152,7 @@ def append(self, dm, axis=0, keep_separate = True, unique_cols = [], fill_na=0,
if not all([isinstance(elem,self.__class__) for elem in to_append]):
raise TypeError("Each object to be appended must be a Design_Matrix!")
if not all([elem.sampling_freq == self.sampling_freq for elem in to_append]):
raise ValueError("All Design Matrices must have the same sampling rate!")
raise ValueError("All Design Matrices must have the same sampling frequency!")

if axis == 1:
if any([not set(self.columns).isdisjoint(elem.columns) for elem in to_append]):
Expand Down Expand Up @@ -190,6 +198,7 @@ def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose):
modify_to_append = []
all_polys = []
cols_to_separate = []
all_separated = []

if len(unique_cols):
if not keep_separate:
Expand All @@ -216,7 +225,10 @@ def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose):
count = c.split('_')[0]
unique_count.append(int(count))
else:
to_rename[c] = '0_' + c
new_name = '0_' + c
all_separated.append(new_name)
to_rename[c] = new_name
all_separated.append(new_name)
cols_to_separate.append(searchstr)

if to_rename:
Expand Down Expand Up @@ -256,10 +268,12 @@ def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose):
count = int(c.split('_')[0])
name = '_'.join(c.split('_')[1:])
count += max_unique_count + 1
to_rename[c] = str(count) + '_' + name
new_name = str(count) + '_' + name
to_rename[c] = new_name
else:
to_rename[c] = str(max_unique_count + 1) + '_' + c

new_name = str(max_unique_count + 1) + '_' + c
to_rename[c] = new_name
all_separated.append(new_name)
modify_to_append.append(dm.rename(columns=to_rename))
max_unique_count += 1
else:
Expand All @@ -282,9 +296,12 @@ def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose):
count = int(c.split('_')[0])
name = '_'.join(c.split('_')[1:])
count += max_unique_count + 1
to_rename[c] = str(count) + '_' + name
new_name = str(count) + '_' + name
to_rename[c] = new_name
else:
to_rename[c] = str(max_unique_count + 1) + '_' + c
new_name = str(max_unique_count + 1) + '_' + c
to_rename[c] = new_name
all_separated.append(new_name)
modify_to_append.append(dm.rename(columns=to_rename))
max_unique_count += 1
else:
Expand Down Expand Up @@ -339,7 +356,6 @@ def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose):
current_poly_max += 1
all_polys += list(to_rename.values())


# Handle renaming additional unique cols to keep separate
if cols_to_separate:
if verbose:
Expand All @@ -353,9 +369,12 @@ def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose):
count = int(c.split('_')[0])
name = '_'.join(c.split('_')[1:])
count += max_unique_count + 1
to_rename[c] = str(count) + '_' + name
new_name = str(count) + '_' + name
to_rename[c] = new_name
else:
to_rename[c] = str(max_unique_count + 1) + '_' + c
new_name = str(max_unique_count + 1) + '_' + c
to_rename[c] = new_name
all_separated.append(new_name)

# Combine renamed polynomials and renamed uniqu_cols
modify_to_append.append(temp_dm.rename(columns=to_rename))
Expand All @@ -382,10 +401,12 @@ def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose):
count = int(c.split('_')[0])
name = '_'.join(c.split('_')[1:])
count += max_unique_count + 1
to_rename[c] = str(count) + '_' + name
new_name = str(count) + '_' + name
to_rename[c] = new_name
else:
to_rename[c] = str(max_unique_count + 1) + '_' + c

new_name = str(max_unique_count + 1) + '_' + c
to_rename[c] = new_name
all_separated.append(new_name)
modify_to_append.append(dm.rename(to_rename))
max_unique_count += 1
else:
Expand All @@ -403,10 +424,8 @@ def _vertcat(self, df, keep_separate, unique_cols, fill_na, verbose):
out.convolved = self.convolved
out.multi = True
out.polys = all_polys
data_cols = [elem for elem in out.columns if elem not in out.polys]
out = out[data_cols + out.polys]

return out
return out._sort_cols()

def vif(self,exclude_polys=True):
"""Compute variance inflation factor amongst columns of design matrix,
Expand Down Expand Up @@ -472,7 +491,7 @@ def convolve(self, conv_func='hrf', columns=None):
assert len(conv_func.shape) <= 2, "2d conv_func must be formatted as samplex X kernals!"
elif isinstance(conv_func, six.string_types):
assert conv_func == 'hrf',"Did you mean 'hrf'? 'hrf' can generate a kernel for you, otherwise custom kernels should be passed in as 1d or 2d arrays."
conv_func = glover_hrf(1. / self.sampling_freq, oversampling=1)
conv_func = glover_hrf(1. / self.sampling_freq, oversampling=1.)

else:
raise TypeError("conv_func must be a 1d or 2d numpy array organized as samples x kernels, or the string 'hrf' for the canonical glover hrf")
Expand Down Expand Up @@ -611,7 +630,7 @@ def add_dct_basis(self,duration=180,drop=0):
if any([elem.count('_') == 2 and 'cosine' in elem for elem in self.polys]):
raise AmbiguityError("It appears that this Design Matrix contains cosine bases that were kept seperate from a previous append operation. This makes it ambiguous for adding polynomials terms. Try calling .add_dct_basis() on each separate Design Matrix before appending them instead.")

basis_mat = make_cosine_basis(self.shape[0],self.sampling_freq,duration,drop=drop)
basis_mat = make_cosine_basis(self.shape[0],1./self.sampling_freq,duration,drop=drop)

basis_frame = Design_Matrix(basis_mat,
sampling_freq=self.sampling_freq,columns = [str(elem) for elem in range(basis_mat.shape[1])])
Expand Down Expand Up @@ -688,6 +707,8 @@ def clean(self,fill_na=0,exclude_polys=False,thresh=.95,verbose=True):
remove.append(j)
if remove:
out = out.drop(remove, axis=1)
out.polys = [elem for elem in out.polys if elem not in remove]
out = out._sort_cols()
else:
print("Dropping columns not needed...skipping")
np.seterr(**old_settings)
Expand Down
2 changes: 1 addition & 1 deletion nltools/file_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def onsets_to_dm(F, sampling_freq, run_length, header='infer', sort=False, keep_
df['Onset'] = df['Onset'].apply(lambda x: int(np.floor(x/TR)))

#Build dummy codes
X = Design_Matrix(np.zeros([run_length,len(df['Stim'].unique())]),columns=df['Stim'].unique(),sampling_rate=TR)
X = Design_Matrix(np.zeros([run_length,len(df['Stim'].unique())]),columns=df['Stim'].unique(),sampling_freq=sampling_freq)
for i, row in df.iterrows():
if df.shape[1] == 3:
dur = np.ceil(row['Duration']/TR)
Expand Down
9 changes: 5 additions & 4 deletions nltools/stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -563,14 +563,15 @@ def make_cosine_basis(nsamples, sampling_freq, filter_length, unit_scale=True, d
# Drop intercept ala SPM
C = C[:,1:]

if C.size == 0:
raise ValueError('Basis function creation failed! nsamples is too small for requested filter_length.')

if unit_scale:
C *= 1. / C[0,0]

C = C[:, drop:]
if C.size == 0:
raise ValueError('Basis function creation failed! nsamples is too small for requested filter_length.')
else:
return C

return C

def transform_pairwise(X, y):
'''Transforms data into pairs with balanced labels for ranking
Expand Down
14 changes: 6 additions & 8 deletions nltools/tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -617,13 +617,13 @@ def test_designmat(tmpdir):
assert mat.add_poly(2,include_lower=False).shape[1] == 5

matpd = matp.add_dct_basis()
assert matpd.shape[1] == 9
assert matpd.shape[1] == 18

assert all(matpd.vif() < 2.0)
assert not all(matpd.vif(exclude_polys=False) < 2.0)

matc = matpd.clean()
assert matc.shape[1] == 7
assert matc.shape[1] == 16

# Standard convolve
assert matpd.convolve().shape == matpd.shape
Expand Down Expand Up @@ -653,18 +653,17 @@ def test_designmat(tmpdir):
# Otherwise stack them
assert matpd.append(matpd,keep_separate=False).shape[1] == matpd.shape[1]
# Keep a single stimulus column separate
assert matpd.append(matpd,unique_cols=['face_A']).shape[1] == 15
assert matpd.append(matpd,unique_cols=['face_A']).shape[1] == 33
# Keep a common stimulus class separate
assert matpd.append(matpd,unique_cols=['face*']).shape[1] == 16
assert matpd.append(matpd,unique_cols=['face*']).shape[1] == 34
# Keep a common stimulus class and a different single stim separate
assert matpd.append(matpd,unique_cols=['face*','house_A']).shape[1] == 17
assert matpd.append(matpd,unique_cols=['face*','house_A']).shape[1] == 35
# Keep multiple stimulus class separate
assert matpd.append(matpd,unique_cols=['face*','house*']).shape[1] == 18
assert matpd.append(matpd,unique_cols=['face*','house*']).shape[1] == 36

# Growing a multi-run design matrix; keeping things separate
num_runs = 4
all_runs = Design_Matrix(sampling_freq=.5)
run_list = []
for i in range(num_runs):
run = Design_Matrix(np.array([
[1,0,0,0],
Expand All @@ -683,7 +682,6 @@ def test_designmat(tmpdir):
columns=['stim_A','stim_B','cond_C','cond_D']
)
run = run.add_poly(2)
run_list.append(run)
all_runs = all_runs.append(run,unique_cols=['stim*','cond*'])

assert all_runs.shape == (44, 28)

0 comments on commit 774a3a6

Please sign in to comment.