diff --git a/ModelPipeline.py b/ModelPipeline.py index ef943cf..8a29248 100644 --- a/ModelPipeline.py +++ b/ModelPipeline.py @@ -9,16 +9,18 @@ import scipy.io as sio import torch import matplotlib.pyplot as plt +import matplotlib from sklearn.linear_model import ridge_regression from scipy import io from scipy.sparse.linalg import eigsh from utils import test_train_split, evaluate_model_torch, subtract_spont, corrcoef, PCA -from sklearn.decomposition import SparsePCA +from sklearn.decomposition import SparsePCA#, PCA import pandas as pd from scipy import stats import matplotlib.gridspec as gridspec from sklearn.decomposition import NMF import time +from sklearn.decomposition import LatentDirichletAllocation class ModelPipeline(): def __init__(self,data_path, save_path, model,nr_of_components,lambdas=None, alphas=None): @@ -28,12 +30,20 @@ def __init__(self,data_path, save_path, model,nr_of_components,lambdas=None, alp self.lambdas=lambdas self.alphas=alphas self.nr_of_components=nr_of_components - + self.mat_file_lst=['natimg2800_M170717_MP034_2017-09-11.mat']#, +#'natimg2800_M160825_MP027_2016-12-14.mat', +#'natimg2800_M161025_MP030_2017-05-29.mat','natimg2800_M170604_MP031_2017-06-28.mat','natimg2800_M170714_MP032_2017-09-14.mat','natimg2800_M170714_MP032_2017-08-07.mat','natimg2800_M170717_MP033_2017-08-20.mat'] + def fit_model(self): #for filename in glob.glob(os.path.join(self.data_path, '*MP034_2017-09-11.mat')): - for filename in glob.glob(os.path.join(self.data_path, '*.mat')): - print(filename[45:85]) - data = io.loadmat(filename) + #for filename in glob.glob(os.path.join(self.data_path, '*.mat')): + #self.mat_file_lst=[#'natimg2800_M170717_MP034_2017-09-11.mat',#'natimg2800_M160825_MP027_2016-12-14.mat', +#'natimg2800_M161025_MP030_2017-05-29.mat'#, +#'natimg2800_M170604_MP031_2017-06-28.mat','natimg2800_M170714_MP032_2017-09-14.mat','natimg2800_M170714_MP032_2017-08-07.mat','natimg2800_M170717_MP033_2017-08-20.mat' +#] + for filename in self.mat_file_lst: + print(filename) + data = io.loadmat(self.data_path+filename) resp = data['stim'][0]['resp'][0] spont =data['stim'][0]['spont'][0] if self.model=='EnsemblePursuit': @@ -47,13 +57,24 @@ def fit_model(self): end=time.time() tm=end-start print('Time', tm) - np.save(self.save_path+filename[45:85]+'_n_av_n_'+str(neuron_init_dict['parameters']['n_av_neurons'])+'_'+str(lambd_)+'_'+str(self.nr_of_components)+'_V_ep.npy',V) - np.save(self.save_path+filename[45:85]+'_n_av_n_'+str(neuron_init_dict['parameters']['n_av_neurons'])+'_'+str(lambd_)+'_'+str(self.nr_of_components)+'_U_ep.npy',U) - np.save(self.save_path+filename[45:85]+'_n_av_n_'+str(neuron_init_dict['parameters']['n_av_neurons'])+'_'+str(lambd_)+'_'+str(self.nr_of_components)+'_cost_ep.npy',cost_lst) - np.save(self.save_path+filename[45:85]+'_n_av_n_'+str(neuron_init_dict['parameters']['n_av_neurons'])+'_'+str(lambd_)+'_'+str(self.nr_of_components)+'_n_neurons_ep.npy',nr_of_neurons) - np.save(self.save_path+filename[45:85]+'_n_av_n_'+str(neuron_init_dict['parameters']['n_av_neurons'])+'_'+str(lambd_)+'_'+str(self.nr_of_components)+'_ensemble_neuron_lst.npy',ensemble_neuron_lst) - np.save(self.save_path+filename[45:85]+'_n_av_n_'+str(neuron_init_dict['parameters']['n_av_neurons'])+'_'+str(lambd_)+'_'+str(self.nr_of_components)+'_time_ep.npy',tm) - np.save(self.save_path+filename[45:85]+'_n_av_n_'+str(neuron_init_dict['parameters']['n_av_neurons'])+'_'+str(lambd_)+'_'+str(self.nr_of_components)+'_seed_neurons.npy',seed_neurons) + #np.save(self.save_path+filename[45:85]+'_n_av_n_'+str(neuron_init_dict['parameters']['n_av_neurons'])+'_'+str(lambd_)+'_'+str(self.nr_of_components)+'_V_ep.npy',V) + + np.save(self.save_path+filename+'_V_ep.npy',V) + + np.save(self.save_path+filename+'_U_ep.npy',U) + + np.save(self.save_path+filename+'_ensemble_pursuit_lst_ep.npy',ensemble_neuron_lst) + np.save(self.save_path+filename+'_seed_neurons_ep.npy', seed_neurons) + np.save(self.save_path+filename+'_time_ep.npy', tm) + + + +#np.save(self.save_path+filename[45:85]+'_n_av_n_'+str(neuron_init_dict['parameters']['n_av_neurons'])+'_'+str(lambd_)+'_'+str(self.nr_of_components)+'_U_ep.npy',U) + #np.save(self.save_path+filename[45:85]+'_n_av_n_'+str(neuron_init_dict['parameters']['n_av_neurons'])+'_'+str(lambd_)+'_'+str(self.nr_of_components)+'_cost_ep.npy',cost_lst) + #np.save(self.save_path+filename[45:85]+'_n_av_n_'+str(neuron_init_dict['parameters']['n_av_neurons'])+'_'+str(lambd_)+'_'+str(self.nr_of_components)+'_n_neurons_ep.npy',nr_of_neurons) + #np.save(self.save_path+filename[45:85]+'_n_av_n_'+str(neuron_init_dict['parameters']['n_av_neurons'])+'_'+str(lambd_)+'_'+str(self.nr_of_components)+'_ensemble_neuron_lst.npy',ensemble_neuron_lst) + #np.save(self.save_path+filename[45:85]+'_n_av_n_'+str(neuron_init_dict['parameters']['n_av_neurons'])+'_'+str(lambd_)+'_'+str(self.nr_of_components)+'_time_ep.npy',tm) + #np.save(self.save_path+filename[45:85]+'_n_av_n_'+str(neuron_init_dict['parameters']['n_av_neurons'])+'_'+str(lambd_)+'_'+str(self.nr_of_components)+'_seed_neurons.npy',seed_neurons) if self.model=='SparsePCA': X=subtract_spont(spont,resp) X=stats.zscore(X) @@ -79,10 +100,43 @@ def fit_model(self): X-=X.min(axis=0) for alpha in self.alphas: model = NMF(n_components=self.nr_of_components, init='nndsvd', random_state=7,alpha=alpha) + start=time.time() V=model.fit_transform(X) + end=time.time() + time_=end-start + print(end-start) U=model.components_ np.save(self.save_path+filename[45:85]+'_'+str(alpha)+'_'+str(self.nr_of_components)+'_U_NMF.npy',U) np.save(self.save_path+filename[45:85]+'_'+str(alpha)+'_'+str(self.nr_of_components)+'_V_NMF.npy',V) + np.save(self.save_path+filename[45:85]+'_'+str(alpha)+'_'+str(self.nr_of_components)+'_time_NMF.npy',time_) + if self.model=='PCA': + X=subtract_spont(spont,resp) + X=stats.zscore(X) + pca=PCA(n_components=self.nr_of_components) + start=time.time() + V=pca.fit_transform(X) + U=pca.components_ + end=time.time() + elapsed_time=end-start + #V=pca.components_ + var=pca.explained_variance_ + np.save(self.save_path+filename[45:85]+'_'+str(self.nr_of_components)+'_V_pca.npy',V) + np.save(self.save_path+filename[45:85]+'_'+str(self.nr_of_components)+'_time_pca.npy',elapsed_time) + np.save(self.save_path+filename[45:85]+'_'+str(self.nr_of_components)+'_var_pca.npy',var) + np.save(self.save_path+filename[45:85]+'_'+str(self.nr_of_components)+'_U_pca.npy',U) + if self.model=='LDA': + X=resp + X-=X.min(axis=0) + lda=LatentDirichletAllocation(n_components=self.nr_of_components, random_state=7) + start=time.time() + V=lda.fit_transform(X) + end=time.time() + elapsed_time=end-start + print('time',elapsed_time) + U=lda.components_ + np.save(self.save_path+filename[45:85]+'_'+str(self.nr_of_components)+'_V_lda.npy',V) + np.save(self.save_path+filename[45:85]+'_'+str(self.nr_of_components)+'_U_lda.npy',U) + np.save(self.save_path+filename[45:85]+'_'+str(self.nr_of_components)+'_time_lda.npy',elapsed_time) def knn(self): @@ -92,12 +146,32 @@ def knn(self): model_string='*_V_ep.npy' if self.model=='NMF': model_string='*_V_NMF.npy' + if self.model=='PCA': + model_string='*_V_pca.npy' + if self.model=='LDA': + model_string='*_V_lda.npy' + if self.model=='all': + #self.save_path=self.data_path + model_string='*.mat' columns=['Experiment','accuracy'] acc_df=pd.DataFrame(columns=columns) + print(self.save_path) for filename in glob.glob(os.path.join(self.save_path, model_string)): - V=np.load(filename) + if self.model=='all': + data = io.loadmat(filename) + resp = data['stim'][0]['resp'][0] + spont =data['stim'][0]['spont'][0] + X=subtract_spont(spont,resp) + V=stats.zscore(X) + else: + print(filename) + V=np.load(filename) + #if self.model='PCA': + print(V.shape) #print(self.data_path+'/'+filename[43:78]+'.mat') - istim=sio.loadmat(self.data_path+'/'+filename[43:78]+'.mat')['stim']['istim'][0][0].astype(np.int32) + istim_path=filename[len(self.save_path):len(self.save_path)+len(self.mat_file_lst[0])] + print(istim_path) + istim=sio.loadmat(self.data_path+istim_path)['stim']['istim'][0][0].astype(np.int32) istim -= 1 # get out of MATLAB convention istim = istim[:,0] nimg = istim.max() # these are blank stims (exclude them) @@ -105,7 +179,7 @@ def knn(self): istim = istim[istim