diff --git a/DataPreprocessing/Neuroscan_locs_orig.mat b/DataPreprocessing/Neuroscan_locs_orig.mat new file mode 100644 index 0000000..bc431be Binary files /dev/null and b/DataPreprocessing/Neuroscan_locs_orig.mat differ diff --git a/DataPreprocessing/__pycache__/utils.cpython-36.pyc b/DataPreprocessing/__pycache__/utils.cpython-36.pyc new file mode 100644 index 0000000..135d629 Binary files /dev/null and b/DataPreprocessing/__pycache__/utils.cpython-36.pyc differ diff --git a/DataPreprocessing/data_split_cross.py b/DataPreprocessing/data_split_cross.py new file mode 100644 index 0000000..91761c7 --- /dev/null +++ b/DataPreprocessing/data_split_cross.py @@ -0,0 +1,79 @@ +import scipy.io as sio +import scipy.io +import numpy as np +from sklearn.model_selection import train_test_split + +''' +cross-subject data splitting +''' + +mat = scipy.io.loadmat('ucieeg.mat') + + +data = mat['X'].astype('float32') + +#data = np.transpose (data, (0, 3 ,2, 1)) + +print (np.shape (data)) + +label_alcoholism = mat['y_alcoholic'].astype('int') +label_alcoholism = label_alcoholism.reshape(np.shape(data)[0]) + +label_stimulus = mat['y_stimulus'].astype('int')- 1 +label_stimulus = label_stimulus.reshape(np.shape(data)[0]) + +label_id = mat['subjectid'].astype('int') - 1 +label_id = label_id.reshape(np.shape (data)[0]) + + +#train_data, test_data, val_data = [], [], [] +#train_label, test_label, val_label = [], [], [] + +num_subject = 122 +num_datapoint = data.shape[0] + +mask = np.zeros(num_subject) + +# 7-2-1 for tarin-test-validation cross-subject data splitting +for i in range(num_subject): + r = np.random.rand() + if r < 0.7: + mask[i] = 0 + elif r >= 0.7 and r < 0.9: + mask[i] = 1 + else: + mask[i] = 2 + +#split according to subject id +#70% of subjects will be in training set +train_data = [data[i] for i in range(num_datapoint) if mask[label_id[i]] == 0] +train_label_alcoholism = [label_alcoholism[i] for i in range(num_datapoint) if mask[label_id[i]] == 0] +train_label_stimulus = [label_stimulus[i] for i in range(num_datapoint) if mask[label_id[i]] == 0] +train_label_id = [label_id[i] for i in range(num_datapoint) if mask[label_id[i]] == 0] + +#20% subjects in testing set +test_data = [data[i] for i in range(num_datapoint) if mask[label_id[i]] == 1] +test_label_alcoholism = [label_alcoholism[i] for i in range(num_datapoint) if mask[label_id[i]] == 1] +test_label_stimulus = [label_stimulus[i] for i in range(num_datapoint) if mask[label_id[i]] == 1] +test_label_id = [label_id[i] for i in range(num_datapoint) if mask[label_id[i]] == 1] + +# 10% subjects for validation set +val_data = [data[i] for i in range(num_datapoint) if mask[label_id[i]] == 2] +val_label_alcoholism = [label_alcoholism[i] for i in range(num_datapoint) if mask[label_id[i]] == 2] +val_label_stimulus = [label_stimulus[i] for i in range(num_datapoint) if mask[label_id[i]] == 2] +val_label_id = [label_id[i] for i in range(num_datapoint) if mask[label_id[i]] == 2] + +#save data +sio.savemat( 'uci_eeg_train_cross.mat', + {'data': train_data, 'label_alcoholism':np.reshape(train_label_alcoholism,(-1,1)), + 'label_stimulus': np.reshape(train_label_stimulus,(-1,1)), 'label_id':np.reshape(train_label_id,(-1,1))}) + +sio.savemat( 'uci_eeg_test_cross.mat', + {'data': test_data, 'label_alcoholism':np.reshape(test_label_alcoholism,(-1,1)), + 'label_stimulus':np.reshape(test_label_stimulus,(-1,1)), 'label_id':np.reshape(test_label_id,(-1,1))}) + +sio.savemat( 'uci_eeg_validation_cross.mat', + {'data': val_data, 'label_alcoholism':np.reshape(val_label_alcoholism,(-1,1)), + 'label_stimulus': np.reshape(val_label_stimulus,(-1,1)), 'label_id':np.reshape(val_label_id,(-1,1))}) + + diff --git a/DataPreprocessing/data_split_within.py b/DataPreprocessing/data_split_within.py new file mode 100644 index 0000000..1b20051 --- /dev/null +++ b/DataPreprocessing/data_split_within.py @@ -0,0 +1,113 @@ +import scipy.io as sio +import scipy.io +import numpy as np +from sklearn.model_selection import train_test_split + +''' +within-subject data spliting +''' + +mat = sio.loadmat('ucieeg.mat') + + +data = mat['X'].astype('float32') + +#data = np.transpose (data, (0, 3 ,2, 1)) + +print (np.shape (data)) + +label_alcoholism = mat['y_alcoholic'].astype('int') +label_alcoholism = label_alcoholism.reshape(np.shape(data)[0]) + +label_stimulus = mat['y_stimulus'].astype('int')- 1 #label start from 0 +label_stimulus = label_stimulus.reshape(np.shape(data)[0]) #label start from 0 + +label_id = mat['subjectid'].astype('int') - 1 +label_id = label_id.reshape(np.shape (data)[0]) + + +train_data = [] +train_label = [] + +train_cyc_data = [] +train_cyc_label = [] + +val_data = [] +val_label = [] + +test_data = [] +test_label = [] + +num_subject = 122 + +#loop through each subject to split data within subject +for i in range(num_subject): + index_i = np.where(label_id == i) + data_i = data[index_i] + #print (np.shape (data_i)) + + label_alcoholism_i = label_alcoholism[index_i] + label_stimulus_i = label_stimulus[index_i] + label_id_i = label_id[index_i] + + #print (np.shape(data_i)) + + # try 8-1-1 train-test-validation splitting + label_stack_i = np.stack((label_alcoholism_i, label_stimulus_i, label_id_i), axis=1) + X_train_i, X_test_i, y_train_i, y_test_i = train_test_split(data_i, label_stack_i, test_size=0.2,random_state = 1) + X_test_i, X_val_i, y_test_i, y_val_i = train_test_split(X_test_i, y_test_i, test_size=0.5,random_state = 1) + + # if alcoholic, sample 70% data for the cyclegan-based model training => balanced data + if label_alcoholism_i[0] == 0 or (label_alcoholism_i[0] == 1 and np.random.rand()<=0.7): + train_cyc_data.append(X_train_i) + + # X_train_i, X_test_i, y_train_i, y_test_i = train_test_split(data_i, label_stack_i, test_size=0.45,random_state = 1) + # X_test_i, X_val_i, y_test_i, y_val_i = train_test_split(X_test_i, y_test_i, test_size=0.33,random_state = 1) + + #else: + # X_train_i, X_test_i, y_train_i, y_test_i = train_test_split(data_i, label_stack_i, test_size=0.1,random_state = 1) + # X_test_i, X_val_i, y_test_i, y_val_i = train_test_split(X_test_i, y_test_i, test_size=0.5,random_state = 1) + + train_data.append (X_train_i) + + val_data.append (X_val_i) + + test_data.append (X_test_i) + + train_label.append (y_train_i) + + test_label.append (y_test_i) + + val_label.append (y_val_i) + + +train_data = np.concatenate(train_data) +train_label = np.concatenate(train_label) + +val_data = np.concatenate(val_data) +val_label = np.concatenate(val_label) + +test_data = np.concatenate(test_data) +test_label = np.concatenate(test_label) + + + +print (np.shape (train_label)) +print (np.shape (val_label)) +print (np.shape (test_label)) + + + +sio.savemat( 'uci_eeg_train_within_8.mat', + {'data': train_data, 'label_alcoholism':np.reshape(train_label[:,0],(-1,1)), + 'label_stimulus': np.reshape(train_label[:,1],(-1,1)), 'label_id':np.reshape(train_label[:,2],(-1,1))}) + +sio.savemat( 'uci_eeg_test_within_1.mat', + {'data': test_data, 'label_alcoholism':np.reshape(test_label[:,0],(-1,1)), + 'label_stimulus':np.reshape(test_label[:,1],(-1,1)), 'label_id':np.reshape(test_label[:,2],(-1,1))}) + +sio.savemat( 'uci_eeg_validation_within_1.mat', + {'data': val_data, 'label_alcoholism':np.reshape(val_label[:,0],(-1,1)), + 'label_stimulus': np.reshape(val_label[:,1],(-1,1)), 'label_id':np.reshape(val_label[:,2],(-1,1))}) + + diff --git a/DataPreprocessing/eegtoimg.py b/DataPreprocessing/eegtoimg.py new file mode 100644 index 0000000..2779d49 --- /dev/null +++ b/DataPreprocessing/eegtoimg.py @@ -0,0 +1,281 @@ +from __future__ import print_function +import time + +import numpy as np +np.random.seed(1234) +from functools import reduce +import math as m + +import scipy.io + +from scipy.interpolate import griddata +from sklearn.preprocessing import scale +from utils import augment_EEG, cart2sph, pol2cart +import scipy.io as sio + +import matplotlib.pyplot as plt + +from sklearn import preprocessing +#import mne +#from plotly import tools +#from plotly.graph_objs import Layout, YAxis, Scatter, Annotation, Annotations, Data, Figure, Marker, Font +#import matplotlib.pyplot as plt +#import plotly.plotly as py +#import plotly.offline as offline +#import plotly.graph_objs as go + +#since it is my personal account for plotly, do not use it too often. QAQ +#tools.set_credentials_file(username='yy.fy.cn',api_key='s4aYwqqYayPPjcqFxk7p') + +def azim_proj(pos): + """ + Computes the Azimuthal Equidistant Projection of input point in 3D Cartesian Coordinates. + Imagine a plane being placed against (tangent to) a globe. If + a light source inside the globe projects the graticule onto + the plane the result would be a planar, or azimuthal, map + projection. + + :param pos: position in 3D Cartesian coordinates + :return: projected coordinates using Azimuthal Equidistant Projection + """ + [r, elev, az] = cart2sph(pos[0], pos[1], pos[2]) + return pol2cart(az, m.pi / 2 - elev) + + +def gen_images(locs, features, n_gridpoints, normalize=True, + augment=False, pca=False, std_mult=0.1, n_components=2, edgeless=False): + """ + Generates EEG images given electrode locations in 2D space and multiple feature values for each electrode + + :param locs: An array with shape [n_electrodes, 2] containing X, Y + coordinates for each electrode. + :param features: Feature matrix as [n_samples, n_features] + Features are as columns. + Features corresponding to each frequency band are concatenated. + (alpha1, alpha2, ..., beta1, beta2,...) + :param n_gridpoints: Number of pixels in the output images + :param normalize: Flag for whether to normalize each band over all samples + :param augment: Flag for generating augmented images + :param pca: Flag for PCA based data augmentation + :param std_mult Multiplier for std of added noise + :param n_components: Number of components in PCA to retain for augmentation + :param edgeless: If True generates edgeless images by adding artificial channels + at four corners of the image with value = 0 (default=False). + :return: Tensor of size [samples, colors, W, H] containing generated + images. + """ + feat_array_temp = [] + nElectrodes = locs.shape[0] # Number of electrodes + # Test whether the feature vector length is divisible by number of electrodes + assert features.shape[1] % nElectrodes == 0 + n_colors = int(features.shape[1] / nElectrodes) + for c in range(n_colors): + feat_array_temp.append(features[:, c * nElectrodes : nElectrodes * (c+1)]) + if augment: + if pca: + for c in range(n_colors): + feat_array_temp[c] = augment_EEG(feat_array_temp[c], std_mult, pca=True, n_components=n_components) + else: + for c in range(n_colors): + feat_array_temp[c] = augment_EEG(feat_array_temp[c], std_mult, pca=False, n_components=n_components) + nSamples = features.shape[0] + # Interpolate the values + grid_x, grid_y = np.mgrid[ + min(locs[:, 0]):max(locs[:, 0]):n_gridpoints*1j, + min(locs[:, 1]):max(locs[:, 1]):n_gridpoints*1j + ] + temp_interp = [] + for c in range(n_colors): + temp_interp.append(np.zeros([nSamples, n_gridpoints, n_gridpoints])) + # Generate edgeless images + if edgeless: + min_x, min_y = np.min(locs, axis=0) + max_x, max_y = np.max(locs, axis=0) + locs = np.append(locs, np.array([[min_x, min_y], [min_x, max_y],[max_x, min_y],[max_x, max_y]]),axis=0) + for c in range(n_colors): + feat_array_temp[c] = np.append(feat_array_temp[c], np.zeros((nSamples, 4)), axis=1) + # Interpolating + for i in range(nSamples): + for c in range(n_colors): + temp_interp[c][i, :, :] = griddata(locs, feat_array_temp[c][i, :], (grid_x, grid_y), + method='cubic', fill_value=np.nan) + print('Interpolating {0}/{1}\r'.format(i+1, nSamples), end='\r') + + # Normalizing + for c in range(n_colors): + if normalize: + temp_interp[c][~np.isnan(temp_interp[c])] = \ + scale(temp_interp[c][~np.isnan(temp_interp[c])]) + temp_interp[c] = np.nan_to_num(temp_interp[c]) #Replace nan with zero and inf with large finite numbers. + return np.swapaxes(np.asarray(temp_interp), 0, 1) # swap axes to have [samples, colors, W, H] + +def get_fft(snippet): + Fs = 256.0; # sampling rate + #Ts = len(snippet)/Fs/Fs; # sampling interval + snippet_time = len(snippet)/Fs + Ts = 1.0/Fs; # sampling interval + t = np.arange(0,snippet_time,Ts) # time vector + + # ff = 5; # frequency of the signal + # y = np.sin(2*np.pi*ff*t) + y = snippet +# print('Ts: ',Ts) +# print(t) +# print(y.shape) + n = len(y) # length of the signal + k = np.arange(n) + T = n/Fs + frq = k/T # two sides frequency range + frq = frq[range(int(n/2))] # one side frequency range + + Y = np.fft.fft(y)/n # fft computing and normalization + Y = Y[range(int(n/2))] + #Added in: (To remove bias.) + #Y[0] = 0 + return frq,abs(Y) + +def theta_alpha_beta_averages(f,Y): + theta_range = (4,8) + alpha_range = (8,13) + beta_range = (13,30) + theta = Y[(f>theta_range[0]) & (f<=theta_range[1])].mean() + alpha = Y[(f>alpha_range[0]) & (f<=alpha_range[1])].mean() + beta = Y[(f>beta_range[0]) & (f<=beta_range[1])].mean() + return theta, alpha, beta + + +def make_frames(df,frame_duration): + ''' + in: dataframe or array with all channels, frame duration in seconds + out: array of theta, alpha, beta averages for each probe for each time step + shape: (n-frames,m-probes,k-brainwave bands) + ''' + Fs = 256.0 + frame_length = Fs*frame_duration + + frames = [] + print('df shape',np.shape(df)) + + for i in range(0, np.shape(df)[0]): + frame = [] + + for channel in range(0, np.shape(df)[1]): + snippet = df[i][channel] + #print(i, channel) + f,Y = get_fft(snippet) + #print (Y.shape) + theta, alpha, beta = theta_alpha_beta_averages(f,Y) + #print (theta, alpha, beta) + frame.append([theta, alpha, beta]) + + frames.append(frame) + return np.array(frames) + +''' +def visual_one_eeg (images, img_path): + + #images = np.reshape(images, [self.input_height, self.input_width]) + n_channels = np.shape(images)[0] + step = 1. / n_channels + data = images + eeg_time = np.linspace(1,n_channels,n_channels) + kwargs = dict(domain=[1 - step, 1], showticklabels=False, zeroline=False, showgrid=False) + + # create objects for layout and traces + layout = Layout(yaxis=YAxis(kwargs), showlegend=False) + traces = [Scatter(x=eeg_time, y=data.T[:, 0])] + + # loop over the channels + for ii in range(1, n_channels): + kwargs.update(domain=[1 - (ii + 1) * step, 1 - ii * step]) + layout.update({'yaxis%d' % (ii + 1): YAxis(kwargs), 'showlegend': False}) + traces.append(Scatter(x=eeg_time, y=data.T[:, ii], yaxis='y%d' % (ii + 1))) + + # add channel names using Annotations + + # set the size of the figure and plot it + layout.update(autosize=False, width=1000, height=600) + fig = Figure(data=Data(traces), layout=layout) + #py.image.save_as(fig, filename='') + py.image.save_as(fig, filename=img_path) +''' + +if __name__ == '__main__': + from utils import reformatInput + + # Load electrode locations + print('Loading data...') + locs = scipy.io.loadmat('Neuroscan_locs_orig.mat') + locs_3d = locs['A'] + locs_2d = [] + # Convert to 2D + for e in locs_3d: + locs_2d.append(azim_proj(e)) + + + + for s in ('train','test','validation'): + mat = sio.loadmat('uci_eeg_'+ s +'_within.mat') + + data = mat['data'] + label_alcoholic = mat['label_alcoholism'] + label_stimulus = mat['label_stimulus'] + label_id = mat['label_id'] + + tras_X = np.transpose(data, (0, 2, 1)) + print (np.shape(tras_X)) + #visual_one_eeg (tras_X[0], "One-eeg-plot.png") + + X = make_frames (tras_X, 1) + #print (np.shape(X)) + X_1 = X.reshape (np.shape(X)[0], np.shape(X)[1] * np.shape(X)[2]) + + + print('Generating images...') + + images = gen_images(np.array(locs_2d), + X_1, + 32, normalize=False) + + print(images.shape) + + images = np.transpose (images, (0, 3 ,2, 1)) + + ########################### add normazation ################################# + #trs_data = images.reshape (np.shape(images)[0], np.shape(images)[1] * np.shape(images)[2] * np.shape(images)[3]) + + #max_abs_scaler = preprocessing.MaxAbsScaler() + + #trs_data = max_abs_scaler.fit_transform (trs_data) + + #real_data = trs_data.reshape (np.shape(images)[0], np.shape(images)[1], np.shape(images)[2], np.shape(images)[3]) + + #images = real_data + + ########################################################################################### + + sio.savemat( 'uci_eeg_images_'+ s +'_within.mat', + {'data': images, 'label_alcoholism': label_alcoholic, 'label_stimulus': label_stimulus, 'label_id': label_id}) + + print (np.max(images)) + print (np.min(images)) + + img_in = images[0,:,:,:] + img_in -= np.min(img_in) + img_in /= np.max(img_in) + + plt.clf() + plt.subplot(1,1,1) + plt.imshow(img_in) + #plt.pause(50) + + ''' + print (np.shape(images)) + plt.clf() + plt.subplot(1,1,1) + plt.imshow(images[0]) + plt.pause(20) + ''' + print('\n') + + \ No newline at end of file diff --git a/DataPreprocessing/gen_joint_train_set.py b/DataPreprocessing/gen_joint_train_set.py new file mode 100644 index 0000000..f63702e --- /dev/null +++ b/DataPreprocessing/gen_joint_train_set.py @@ -0,0 +1,25 @@ +# -*- coding: utf-8 -*- +""" +Created on Wed May 20 01:17:56 2020 + +@author: konaw +""" + +import scipy.io as sio +import numpy as np + + +''' +Generate joint training set by combining the dummy EEG images with the training set +''' + +data_real = sio.loadmat('uci_eeg_images_train_within.mat') +data_dummy = sio.loadmat('eeg_dummy_images_w_label_step3_within.mat') + + +data = np.append(data_real['data'], data_dummy['data'],axis=0) +label_alc = np.append(data_real['label_alcoholism'], data_dummy['label_alcoholism'],axis=0) +label_stimulus = np.append(data_real['label_stimulus'], data_dummy['label_stimulus'],axis=0) + +sio.savemat('eeg_images_train_augmented_within.mat',{'data':data, 'label_alcoholism':label_alc, + 'label_stimulus':label_stimulus}) \ No newline at end of file diff --git a/DataPreprocessing/grand_avg.py b/DataPreprocessing/grand_avg.py new file mode 100644 index 0000000..74980f0 --- /dev/null +++ b/DataPreprocessing/grand_avg.py @@ -0,0 +1,283 @@ +from __future__ import print_function +import time + +import scipy.io as sio +import matplotlib.pyplot as plt +import numpy as np + +np.random.seed(1234) +from functools import reduce +import math as m + +from scipy.interpolate import griddata +from sklearn.preprocessing import scale +from utils import augment_EEG, cart2sph, pol2cart + +from sklearn import preprocessing + +''' +generate dummy identities using grand average +''' + + +''' +helper function for EEG2Img, 3D location projected to 2D plane +code from Yao +''' +def azim_proj(pos): + """ + Computes the Azimuthal Equidistant Projection of input point in 3D Cartesian Coordinates. + Imagine a plane being placed against (tangent to) a globe. If + a light source inside the globe projects the graticule onto + the plane the result would be a planar, or azimuthal, map + projection. + + :param pos: position in 3D Cartesian coordinates + :return: projected coordinates using Azimuthal Equidistant Projection + """ + [r, elev, az] = cart2sph(pos[0], pos[1], pos[2]) + return pol2cart(az, m.pi / 2 - elev) + +''' +helper function for EEG2Img, generate EEG images +code from Yao +''' +def gen_images(locs, features, n_gridpoints, normalize=True, + augment=False, pca=False, std_mult=0.1, n_components=2, edgeless=False): + """ + Generates EEG images given electrode locations in 2D space and multiple feature values for each electrode + + :param locs: An array with shape [n_electrodes, 2] containing X, Y + coordinates for each electrode. + :param features: Feature matrix as [n_samples, n_features] + Features are as columns. + Features corresponding to each frequency band are concatenated. + (alpha1, alpha2, ..., beta1, beta2,...) + :param n_gridpoints: Number of pixels in the output images + :param normalize: Flag for whether to normalize each band over all samples + :param augment: Flag for generating augmented images + :param pca: Flag for PCA based data augmentation + :param std_mult Multiplier for std of added noise + :param n_components: Number of components in PCA to retain for augmentation + :param edgeless: If True generates edgeless images by adding artificial channels + at four corners of the image with value = 0 (default=False). + :return: Tensor of size [samples, colors, W, H] containing generated + images. + """ + feat_array_temp = [] + nElectrodes = locs.shape[0] # Number of electrodes + # Test whether the feature vector length is divisible by number of electrodes + assert features.shape[1] % nElectrodes == 0 + n_colors = int(features.shape[1] / nElectrodes) + for c in range(n_colors): + feat_array_temp.append(features[:, c * nElectrodes : nElectrodes * (c+1)]) + if augment: + if pca: + for c in range(n_colors): + feat_array_temp[c] = augment_EEG(feat_array_temp[c], std_mult, pca=True, n_components=n_components) + else: + for c in range(n_colors): + feat_array_temp[c] = augment_EEG(feat_array_temp[c], std_mult, pca=False, n_components=n_components) + nSamples = features.shape[0] + # Interpolate the values + grid_x, grid_y = np.mgrid[ + min(locs[:, 0]):max(locs[:, 0]):n_gridpoints*1j, + min(locs[:, 1]):max(locs[:, 1]):n_gridpoints*1j + ] + temp_interp = [] + for c in range(n_colors): + temp_interp.append(np.zeros([nSamples, n_gridpoints, n_gridpoints])) + # Generate edgeless images + if edgeless: + min_x, min_y = np.min(locs, axis=0) + max_x, max_y = np.max(locs, axis=0) + locs = np.append(locs, np.array([[min_x, min_y], [min_x, max_y],[max_x, min_y],[max_x, max_y]]),axis=0) + for c in range(n_colors): + feat_array_temp[c] = np.append(feat_array_temp[c], np.zeros((nSamples, 4)), axis=1) + # Interpolating + for i in range(nSamples): + for c in range(n_colors): + temp_interp[c][i, :, :] = griddata(locs, feat_array_temp[c][i, :], (grid_x, grid_y), + method='cubic', fill_value=np.nan) + print('Interpolating {0}/{1}\r'.format(i+1, nSamples), end='\r') + + # Normalizing + for c in range(n_colors): + if normalize: + temp_interp[c][~np.isnan(temp_interp[c])] = \ + scale(temp_interp[c][~np.isnan(temp_interp[c])]) + temp_interp[c] = np.nan_to_num(temp_interp[c]) #Replace nan with zero and inf with large finite numbers. + return np.swapaxes(np.asarray(temp_interp), 0, 1) # swap axes to have [samples, colors, W, H] + +''' +helper function for EEG2Img, obtain features of 3 frequency band by averaging spectrum attitude +code from Yao +''' +def theta_alpha_beta_averages(f,Y): + theta_range = (4,8) + alpha_range = (8,13) + beta_range = (13,30) + theta = Y[(f>theta_range[0]) & (f<=theta_range[1])].mean() + alpha = Y[(f>alpha_range[0]) & (f<=alpha_range[1])].mean() + beta = Y[(f>beta_range[0]) & (f<=beta_range[1])].mean() + return theta, alpha, beta + + +def make_frames(df,frame_duration): + ''' + in: dataframe or array with all channels, frame duration in seconds + out: array of theta, alpha, beta averages for each probe for each time step + shape: (n-frames,m-probes,k-brainwave bands) + ''' + Fs = 256.0 + frame_length = Fs*frame_duration + + frames = [] + #print('df shape',np.shape(df)) + for i in range(0, np.shape(df)[0]): + frame = [] + + for channel in range(0, np.shape(df)[1]): + snippet = df[i][channel] + #print(i, channel) + #f,Y = get_fft(snippet) + #print (len(snippet)) + theta, alpha, beta = theta_alpha_beta_averages(np.array(range(len(snippet))), snippet) + #print (theta, alpha, beta) + frame.append([theta, alpha, beta]) + + frames.append(frame) + if i % 100 == 0: + print('===== %d end ====='%(i)) + return np.array(frames) + + + +if __name__ == '__main__': + + #load EEG spectrums in training set + data = sio.loadmat('eeg_spectrum_train_within.mat') + + + label_disease_range = 2 + label_stimulus_range = 5 + num_ch, band_width = data['data'][0].shape + num_subjects = 120 + + + ####### Group the EEG spectrums ####### + #generate 10 groups: 10 combinations of alcoholism and stimulus attributes + groups = [{} for i in range(label_disease_range * label_stimulus_range)] #map: subject id -> data + + #allocate each trial of EEG signals to the corresponding group + for i in range(len(data['data'])): + index_label = label_stimulus_range * data['label_alcoholism'][i,0] + data['label_stimulus'][i,0] + index_id = data['label_id'][i,0] + + #add EEG data to the corresponding group and subject + if index_id in groups[index_label]: + groups[index_label][index_id].append(data['data'][i]) + else: + groups[index_label][index_id] = [data['data'][i]] + + ####### visulise ####### + #g0 = groups[0] + #s = [g0[121][3][17], g0[70][3][17], g0[72][3][17]] + #cnt = 0; + #for c in s: + # plt.figure() + # plt.xticks([]) + # plt.yticks([]) + # plt.plot(c) + # plt.savefig('EEG_spectum_candi_%d.png'%cnt, bbox_inches='tight') + # cnt += 1 + #avg = np.mean(s,axis=0) + #plt.figure() + #plt.xticks([]) + #plt.yticks([]) + #plt.plot(avg) + #plt.savefig('EEG_spectum_candi_avg.png', bbox_inches='tight') + #cnt += 1 + + ####### generate labellled EEG spectrums with dummy identities ####### + dummy = [] + dummy_label_disease = [] + dummy_label_stimulus = [] + + #loop through 10 groups + for i in range(len(groups)): + #sort each group w.r.t the number of EEG signals for each subject + candidate_list = sorted(list(groups[i].values()), key=len) + step = 3 #a sliding window of 3 + #average trials of EEG siganls of the subjects within the window + for j in range(len(candidate_list) - step + 1): + for k in range(len(candidate_list[j])): + #average across subjects + new = np.mean([item[k] for item in candidate_list[j:j+step]], axis=0) + dummy.append(new) + #label the dunmmy identity + dummy_label_disease.append(i//label_stimulus_range) + dummy_label_stimulus.append(i%label_stimulus_range) + + dummy = np.array(dummy) + + + ####### generate EEG images with dummy identities ####### + # Load electrode locations + print('Loading data...') + locs = sio.loadmat('Neuroscan_locs_orig.mat') + locs_3d = locs['A'] + locs_2d = [] + # Convert to 2D + for e in locs_3d: + locs_2d.append(azim_proj(e)) + + X = make_frames(dummy, 1) + #print (np.shape(X)) + X_1 = X.reshape(np.shape(X)[0], np.shape(X)[1] * np.shape(X)[2]) + + + print('Generating images...') + + images = gen_images(np.array(locs_2d), + X_1, + 32, normalize=False) + + images = np.transpose (images, (0, 3 ,2, 1)) + + # save the dummy EEG images + sio.savemat( 'eeg_dummy_images_w_label_step3_within.mat', + {'data': images,'label_alcoholism':np.reshape(dummy_label_disease,(-1,1)), + 'label_stimulus':np.reshape(dummy_label_stimulus,(-1,1))}) + + + + + # print (np.max(images)) + # print (np.min(images)) + + # img_in = images[0,:,:,:] + # img_in -= np.min(img_in) + # img_in /= np.max(img_in) + + # plt.clf() + # plt.subplot(1,1,1) + # plt.imshow(img_in) + # #plt.pause(50) + + # print (np.shape(images)) + # plt.clf() + # plt.subplot(1,1,1) + # plt.imshow(images[0]) + # plt.pause(20) + + + #print('\n') + + #print(spectrums.shape) + #sample0 = np.array(data['X'][:88]).mean(axis=0) + + #sample_n = data['X'][6001] + + #plt.plot(sample_n) + diff --git a/DataPreprocessing/images/EEG_image_dummy_sample.png b/DataPreprocessing/images/EEG_image_dummy_sample.png new file mode 100644 index 0000000..422d69c Binary files /dev/null and b/DataPreprocessing/images/EEG_image_dummy_sample.png differ diff --git a/DataPreprocessing/images/EEG_spectum.png b/DataPreprocessing/images/EEG_spectum.png new file mode 100644 index 0000000..23ba128 Binary files /dev/null and b/DataPreprocessing/images/EEG_spectum.png differ diff --git a/DataPreprocessing/images/EEG_spectum_0.png b/DataPreprocessing/images/EEG_spectum_0.png new file mode 100644 index 0000000..b0dd8c9 Binary files /dev/null and b/DataPreprocessing/images/EEG_spectum_0.png differ diff --git a/DataPreprocessing/images/EEG_spectum_1.png b/DataPreprocessing/images/EEG_spectum_1.png new file mode 100644 index 0000000..ee3798f Binary files /dev/null and b/DataPreprocessing/images/EEG_spectum_1.png differ diff --git a/DataPreprocessing/images/EEG_spectum_2.png b/DataPreprocessing/images/EEG_spectum_2.png new file mode 100644 index 0000000..7bc63e7 Binary files /dev/null and b/DataPreprocessing/images/EEG_spectum_2.png differ diff --git a/DataPreprocessing/images/EEG_time_series.png b/DataPreprocessing/images/EEG_time_series.png new file mode 100644 index 0000000..6af6776 Binary files /dev/null and b/DataPreprocessing/images/EEG_time_series.png differ diff --git a/DataPreprocessing/images/loc3D.png b/DataPreprocessing/images/loc3D.png new file mode 100644 index 0000000..89d1e35 Binary files /dev/null and b/DataPreprocessing/images/loc3D.png differ diff --git a/DataPreprocessing/t2f.py b/DataPreprocessing/t2f.py new file mode 100644 index 0000000..8f317a0 --- /dev/null +++ b/DataPreprocessing/t2f.py @@ -0,0 +1,61 @@ +import scipy.io as sio +import matplotlib.pyplot as plt +import numpy as np + +''' +time-frequency convertion using FFT +''' +def get_fft(snippet): + Fs = 256.0; # sampling rate + #Ts = len(snippet)/Fs/Fs; # sampling interval + #snippet_time = len(snippet)/Fs + #Ts = 1.0/Fs; # sampling interval + #t = np.arange(0,snippet_time,Ts) # time vector + + # ff = 5; # frequency of the signal + # y = np.sin(2*np.pi*ff*t) + y = snippet +# print('Ts: ',Ts) +# print(t) +# print(y.shape) + n = len(y) # length of the signal + k = np.arange(n) + T = n/Fs + frq = k/T # two sides frequency range + frq = frq[range(int(n/2))] # one side frequency range + + Y = np.fft.fft(y)/n # fft computing and normalization + Y = Y[range(int(n/2))] + #Added in: (To remove bias.) + #Y[0] = 0 + return frq,abs(Y) + + + +if __name__ == '__main__': + + data = sio.loadmat('uci_eeg_train_within.mat') + + X = np.transpose(data['data'], (0, 2, 1)) + + num_exp, num_ch, rate = X.shape + + spectrums = [] + + # time-frequency convertion on all the EEG signals + for i in range(num_exp): + spectrum = [] + for ch in range(num_ch): + time_domain = X[i][ch] + f , magnitude = get_fft(time_domain) + spectrum.append(magnitude) + spectrums.append(spectrum) + + dummy = np.array([]) + + sio.savemat('eeg_spectrum_train_within.mat', {'data': spectrums, 'label_alcoholism':data['label_alcoholism'], + 'label_stimulus':data['label_stimulus'], 'label_id':data['label_id']}) + + + + \ No newline at end of file diff --git a/DataPreprocessing/utils.py b/DataPreprocessing/utils.py new file mode 100644 index 0000000..43057a7 --- /dev/null +++ b/DataPreprocessing/utils.py @@ -0,0 +1,242 @@ + +import numpy as np +import matplotlib.patches as mpatches +import matplotlib.pyplot as plt +from mpl_toolkits.axes_grid1 import ImageGrid +import scipy + + +import math as m +import numpy as np +np.random.seed(123) +import scipy.io +from sklearn.decomposition import PCA + +def cart2sph(x, y, z): + """ + Transform Cartesian coordinates to spherical + :param x: X coordinate + :param y: Y coordinate + :param z: Z coordinate + :return: radius, elevation, azimuth + """ + x2_y2 = x**2 + y**2 + r = m.sqrt(x2_y2 + z**2) # r + elev = m.atan2(z, m.sqrt(x2_y2)) # Elevation + az = m.atan2(y, x) # Azimuth + return r, elev, az + + +def pol2cart(theta, rho): + """ + Transform polar coordinates to Cartesian + :param theta: angle value + :param rho: radius value + :return: X, Y + """ + return rho * m.cos(theta), rho * m.sin(theta) + + +def augment_EEG(data, stdMult, pca=False, n_components=2): + """ + Augment data by adding normal noise to each feature. + + :param data: EEG feature data as a matrix (n_samples x n_features) + :param stdMult: Multiplier for std of added noise + :param pca: if True will perform PCA on data and add noise proportional to PCA components. + :param n_components: Number of components to consider when using PCA. + :return: Augmented data as a matrix (n_samples x n_features) + """ + augData = np.zeros(data.shape) + if pca: + pca = PCA(n_components=n_components) + pca.fit(data) + components = pca.components_ + variances = pca.explained_variance_ratio_ + coeffs = np.random.normal(scale=stdMult, size=pca.n_components) * variances + for s, sample in enumerate(data): + augData[s, :] = sample + (components * coeffs.reshape((n_components, -1))).sum(axis=0) + else: + # Add Gaussian noise with std determined by weighted std of each feature + for f, feat in enumerate(data.transpose()): + augData[:, f] = feat + np.random.normal(scale=stdMult*np.std(feat), size=feat.size) + return augData + + +def augment_EEG_image(image, std_mult, pca=False, n_components=2): + """ + Augment data by adding normal noise to each feature. + + :param image: EEG feature data as a a colored image [n_samples, n_colors, W, H] + :param std_mult: Multiplier for std of added noise + :param pca: if True will perform PCA on data and add noise proportional to PCA components. + :param n_components: Number of components to consider when using PCA. + :return: Augmented data as a matrix (n_samples x n_features) + """ + augData = np.zeros((data.shape[0], data.shape[1], data.shape[2] * data.shape[3])) + for c in xrange(image.shape[1]): + reshData = np.reshape(data['featMat'][:, c, :, :], (data['featMat'].shape[0], -1)) + if pca: + augData[:, c, :] = augment_EEG(reshData, std_mult, pca=True, n_components=n_components) + else: + augData[:, c, :] = augment_EEG(reshData, std_mult, pca=False) + return np.reshape(augData, data['featMat'].shape) + + +def load_data(data_file): + """ + Loads the data from MAT file. MAT file should contain two + variables. 'featMat' which contains the feature matrix in the + shape of [samples, features] and 'labels' which contains the output + labels as a vector. Label numbers are assumed to start from 1. + + Parameters + ---------- + data_file: str + + Returns + ------- + data: array_like + """ + print("Loading data from %s" % (data_file)) + + dataMat = scipy.io.loadmat(data_file, mat_dtype=True) + + print("Data loading complete. Shape is %r" % (dataMat['featMat'].shape,)) + return dataMat['features'][:, :-1], dataMat['features'][:, -1] - 1 # Sequential indices + + +def reformatInput(data, labels, indices): + """ + Receives the the indices for train and test datasets. + Outputs the train, validation, and test data and label datasets. + """ + + trainIndices = indices[0][len(indices[1]):] + validIndices = indices[0][:len(indices[1])] + testIndices = indices[1] + # Shuffling training data + # shuffledIndices = np.random.permutation(len(trainIndices)) + # trainIndices = trainIndices[shuffledIndices] + if data.ndim == 4: + return [(data[trainIndices], np.squeeze(labels[trainIndices]).astype(np.int32)), + (data[validIndices], np.squeeze(labels[validIndices]).astype(np.int32)), + (data[testIndices], np.squeeze(labels[testIndices]).astype(np.int32))] + elif data.ndim == 5: + return [(data[:, trainIndices], np.squeeze(labels[trainIndices]).astype(np.int32)), + (data[:, validIndices], np.squeeze(labels[validIndices]).astype(np.int32)), + (data[:, testIndices], np.squeeze(labels[testIndices]).astype(np.int32))] + + +def get_params(sess): + variables = tf.trainable_variables() + params = {} + for i in range(len(variables)): + name = variables[i].name + params[name] = sess.run(variables[i]) + return params + + +def to_one_hot(x, N = -1): + x = x.astype('int32') + if np.min(x) !=0 and N == -1: + x = x - np.min(x) + x = x.reshape(-1) + if N == -1: + N = np.max(x) + 1 + label = np.zeros((x.shape[0],N)) + idx = range(x.shape[0]) + label[idx,x] = 1 + return label.astype('float32') + +def image_mean(x): + x_mean = x.mean((0, 1, 2)) + return x_mean + +def shape(tensor): + """ + Get the shape of a tensor. This is a compile-time operation, + meaning that it runs when building the graph, not running it. + This means that it cannot know the shape of any placeholders + or variables with shape determined by feed_dict. + """ + return tuple([d.value for d in tensor.get_shape()]) + + + +def tfvar2str(tf_vars): + names = [] + for i in range(len(tf_vars)): + names.append(tf_vars[i].name) + return names + + +def shuffle_aligned_list(data): + """Shuffle arrays in a list by shuffling each array identically.""" + num = data[0].shape[0] + p = np.random.permutation(num) + return [d[p] for d in data] + + +def batch_generator(data, batch_size, shuffle=True): + """Generate batches of data. + + Given a list of array-like objects, generate batches of a given + size by yielding a list of array-like objects corresponding to the + same slice of each input. + """ + if shuffle: + data = shuffle_aligned_list(data) + + batch_count = 0 + while True: + if batch_count * batch_size + batch_size >= len(data[0]): + batch_count = 0 + + if shuffle: + data = shuffle_aligned_list(data) + + start = batch_count * batch_size + end = start + batch_size + batch_count += 1 + yield [d[start:end] for d in data] + + + +def dic2list(sources, targets): + names_dic = {} + for key in sources: + names_dic[sources[key]] = key + for key in targets: + names_dic[targets[key]] = key + names = [] + for i in range(len(names_dic)): + names.append(names_dic[i]) + return names + +def softmax(x): + """Compute softmax values for each sets of scores in x.""" + e_x = np.exp(x - np.max(x)) + return e_x / e_x.sum(axis=0) + +def norm_matrix(X, l): + Y = np.zeros(X.shape); + for i in range(X.shape[0]): + Y[i] = X[i]/np.linalg.norm(X[i],l) + return Y + + +def description(sources, targets): + source_names = sources.keys() + target_names = targets.keys() + N = min(len(source_names), 4) + description = source_names[0] + for i in range(1,N): + description = description + '_' + source_names[i] + description = description + '-' + target_names[0] + return description + + + +def sigmoid(x): + return 1 / (1 + np.exp(-x)) \ No newline at end of file diff --git a/DataPreprocessing/visualizer.py b/DataPreprocessing/visualizer.py new file mode 100644 index 0000000..ada6288 --- /dev/null +++ b/DataPreprocessing/visualizer.py @@ -0,0 +1,82 @@ +import scipy.io as sio +import numpy as np +import matplotlib.pyplot as plt +from mpl_toolkits.mplot3d import Axes3D +from matplotlib.axes import Axes +import os + +''' +Generate some sample images or plots for the report +''' + +def time_freq_signals(root_path): + data_time = sio.loadmat('uci_eeg_train_within.mat') + data_spectrum = sio.loadmat('eeg_spectrum_train_within.mat') + + eeg_time_sample = data_time['data'][0][:,0] + eeg_spectrum_sample = data_spectrum['data'][0][0] + + plt.figure() + plt.plot(np.linspace(0,1,256), eeg_time_sample) + plt.xlabel('Time (s)', fontsize=16) + plt.ylabel('Amplitude (mV)', fontsize=16) + plt.tick_params(axis='both', which='major', labelsize=12) + plt.savefig('%s\EEG_time_series.png'%root_path, bbox_inches='tight') + + plt.figure() + plt.plot(eeg_spectrum_sample) + plt.xlabel('Frequency (Hz)', fontsize=16) + plt.ylabel('Normalised Amplitude (mV)', fontsize=14) + plt.tick_params(axis='both', which='major', labelsize=12) + plt.savefig('%s\EEG_spectum.png'%root_path, bbox_inches='tight') + + +def spectrum_simple(root_path, n): + data_spectrum = sio.loadmat('eeg_spectrum_train_within.mat') + eeg_spectrum_sample = data_spectrum['data'][0] + + for i in range(n): + ch = eeg_spectrum_sample[i] + fig = plt.figure() + plt.xticks([]) + plt.yticks([]) + plt.plot(ch) + plt.savefig('%s\EEG_spectum_%d.png'%(root_path, i), bbox_inches='tight') + + +def dummy_EEG_img(root_path, i): + data = sio.loadmat('eeg_dummy_images_w_label_step3_within.mat') + + images = data['data'] + + img_in = images[i,:,:,:] + img_in -= np.min(img_in) + img_in /= np.max(img_in) + + plt.clf() + plt.subplot(1,1,1) + plt.axis('off') + plt.imshow(img_in) + plt.savefig('%s\EEG_image_dummy_sample.png'%root_path, bbox_inches='tight') + + +def locations_3D(root_path): + locs = sio.loadmat('Neuroscan_locs_orig.mat') + locs_3d = locs['A'] + fig = plt.figure() + ax = fig.add_subplot(111, projection='3d') + ax.set_xticklabels([]) + ax.set_yticklabels([]) + ax.set_zticklabels([]) + ax.scatter(locs_3d[:,0],locs_3d[:,1],locs_3d[:,2]) + plt.savefig('%s\loc3D.png'%root_path, bbox_inches='tight') + +if __name__ == '__main__': + img_folder = 'images' + if not os.path.isdir(img_folder): + os.makedirs(img_folder) + time_freq_signals(img_folder) + spectrum_simple(img_folder, 3) + dummy_EEG_img(img_folder, 11) + locations_3D(img_folder) +