diff --git a/DataPreprocessing/Neuroscan_locs_orig.mat b/DataPreprocessing/Neuroscan_locs_orig.mat
new file mode 100644
index 0000000..bc431be
Binary files /dev/null and b/DataPreprocessing/Neuroscan_locs_orig.mat differ
diff --git a/DataPreprocessing/__pycache__/utils.cpython-36.pyc b/DataPreprocessing/__pycache__/utils.cpython-36.pyc
new file mode 100644
index 0000000..135d629
Binary files /dev/null and b/DataPreprocessing/__pycache__/utils.cpython-36.pyc differ
diff --git a/DataPreprocessing/data_split_cross.py b/DataPreprocessing/data_split_cross.py
new file mode 100644
index 0000000..91761c7
--- /dev/null
+++ b/DataPreprocessing/data_split_cross.py
@@ -0,0 +1,79 @@
+import scipy.io as sio
+import scipy.io
+import numpy as np
+from sklearn.model_selection import train_test_split
+
+'''
+cross-subject data splitting
+'''
+
+mat = scipy.io.loadmat('ucieeg.mat')
+
+
+data = mat['X'].astype('float32')
+
+#data = np.transpose (data, (0, 3 ,2, 1))
+
+print (np.shape (data))
+
+label_alcoholism = mat['y_alcoholic'].astype('int') 
+label_alcoholism = label_alcoholism.reshape(np.shape(data)[0])
+
+label_stimulus = mat['y_stimulus'].astype('int')- 1
+label_stimulus = label_stimulus.reshape(np.shape(data)[0])
+
+label_id = mat['subjectid'].astype('int') - 1
+label_id = label_id.reshape(np.shape (data)[0])
+
+
+#train_data, test_data, val_data = [], [], []
+#train_label, test_label, val_label = [], [], []
+
+num_subject = 122
+num_datapoint = data.shape[0]
+
+mask = np.zeros(num_subject)
+
+# 7-2-1 for tarin-test-validation cross-subject data splitting 
+for i in range(num_subject):
+    r = np.random.rand()
+    if r < 0.7:
+        mask[i] = 0
+    elif r >= 0.7 and r < 0.9:
+        mask[i] = 1
+    else:
+        mask[i] = 2
+
+#split according to subject id 
+#70% of subjects will be in training set      
+train_data = [data[i] for i in range(num_datapoint) if mask[label_id[i]] == 0]
+train_label_alcoholism = [label_alcoholism[i] for i in range(num_datapoint) if mask[label_id[i]] == 0]
+train_label_stimulus = [label_stimulus[i] for i in range(num_datapoint) if mask[label_id[i]] == 0]
+train_label_id = [label_id[i] for i in range(num_datapoint) if mask[label_id[i]] == 0]
+
+#20% subjects in testing set
+test_data = [data[i] for i in range(num_datapoint) if mask[label_id[i]] == 1]
+test_label_alcoholism = [label_alcoholism[i] for i in range(num_datapoint) if mask[label_id[i]] == 1]
+test_label_stimulus = [label_stimulus[i] for i in range(num_datapoint) if mask[label_id[i]] == 1]
+test_label_id = [label_id[i] for i in range(num_datapoint) if mask[label_id[i]] == 1]
+
+# 10% subjects for validation set
+val_data = [data[i] for i in range(num_datapoint) if mask[label_id[i]] == 2]
+val_label_alcoholism = [label_alcoholism[i] for i in range(num_datapoint) if mask[label_id[i]] == 2]
+val_label_stimulus = [label_stimulus[i] for i in range(num_datapoint) if mask[label_id[i]] == 2]
+val_label_id = [label_id[i] for i in range(num_datapoint) if mask[label_id[i]] == 2]
+
+#save data
+sio.savemat( 'uci_eeg_train_cross.mat',
+            {'data':  train_data, 'label_alcoholism':np.reshape(train_label_alcoholism,(-1,1)), 
+              'label_stimulus': np.reshape(train_label_stimulus,(-1,1)), 'label_id':np.reshape(train_label_id,(-1,1))})
+
+sio.savemat( 'uci_eeg_test_cross.mat',
+            {'data':  test_data, 'label_alcoholism':np.reshape(test_label_alcoholism,(-1,1)), 
+              'label_stimulus':np.reshape(test_label_stimulus,(-1,1)), 'label_id':np.reshape(test_label_id,(-1,1))})
+
+sio.savemat( 'uci_eeg_validation_cross.mat',
+            {'data':  val_data, 'label_alcoholism':np.reshape(val_label_alcoholism,(-1,1)), 
+              'label_stimulus': np.reshape(val_label_stimulus,(-1,1)), 'label_id':np.reshape(val_label_id,(-1,1))})
+
+
diff --git a/DataPreprocessing/data_split_within.py b/DataPreprocessing/data_split_within.py
new file mode 100644
index 0000000..1b20051
--- /dev/null
+++ b/DataPreprocessing/data_split_within.py
@@ -0,0 +1,113 @@
+import scipy.io as sio
+import scipy.io
+import numpy as np
+from sklearn.model_selection import train_test_split
+
+'''
+within-subject data spliting
+'''
+
+mat = sio.loadmat('ucieeg.mat')
+
+
+data = mat['X'].astype('float32')
+
+#data = np.transpose (data, (0, 3 ,2, 1))
+
+print (np.shape (data))
+
+label_alcoholism = mat['y_alcoholic'].astype('int') 
+label_alcoholism = label_alcoholism.reshape(np.shape(data)[0])
+
+label_stimulus = mat['y_stimulus'].astype('int')- 1  #label start from 0
+label_stimulus = label_stimulus.reshape(np.shape(data)[0]) #label start from 0
+
+label_id = mat['subjectid'].astype('int') - 1
+label_id = label_id.reshape(np.shape (data)[0])
+
+
+train_data = []
+train_label = []
+
+train_cyc_data = []
+train_cyc_label = []
+
+val_data = []
+val_label = []
+
+test_data = []
+test_label = []
+
+num_subject = 122
+
+#loop through each subject to split data within subject
+for i in range(num_subject):
+    index_i = np.where(label_id == i)
+    data_i = data[index_i]
+    #print (np.shape (data_i))
+
+    label_alcoholism_i = label_alcoholism[index_i]
+    label_stimulus_i = label_stimulus[index_i]
+    label_id_i = label_id[index_i]
+
+    #print (np.shape(data_i))
+    
+    # try 8-1-1 train-test-validation splitting
+    label_stack_i =  np.stack((label_alcoholism_i, label_stimulus_i, label_id_i), axis=1)
+    X_train_i, X_test_i, y_train_i, y_test_i = train_test_split(data_i, label_stack_i, test_size=0.2,random_state = 1)
+    X_test_i, X_val_i, y_test_i, y_val_i = train_test_split(X_test_i, y_test_i, test_size=0.5,random_state = 1)
+
+    # if alcoholic, sample 70% data for the cyclegan-based model training => balanced data
+    if label_alcoholism_i[0] == 0 or (label_alcoholism_i[0] == 1 and np.random.rand()<=0.7):
+        train_cyc_data.append(X_train_i)
+        
+    #    X_train_i, X_test_i, y_train_i, y_test_i = train_test_split(data_i, label_stack_i, test_size=0.45,random_state = 1)
+    #    X_test_i, X_val_i, y_test_i, y_val_i = train_test_split(X_test_i, y_test_i, test_size=0.33,random_state = 1)
+
+    #else:
+    #    X_train_i, X_test_i, y_train_i, y_test_i = train_test_split(data_i, label_stack_i, test_size=0.1,random_state = 1)
+    #    X_test_i, X_val_i, y_test_i, y_val_i = train_test_split(X_test_i, y_test_i, test_size=0.5,random_state = 1)
+
+    train_data.append (X_train_i)
+
+    val_data.append (X_val_i)
+
+    test_data.append (X_test_i)
+
+    train_label.append (y_train_i)
+
+    test_label.append (y_test_i)
+
+    val_label.append (y_val_i)
+    
+
+train_data = np.concatenate(train_data)
+train_label = np.concatenate(train_label)
+
+val_data = np.concatenate(val_data)
+val_label = np.concatenate(val_label)
+
+test_data = np.concatenate(test_data)
+test_label = np.concatenate(test_label)
+
+
+
+print (np.shape (train_label))
+print (np.shape (val_label))
+print (np.shape (test_label))
+
+
+
+sio.savemat( 'uci_eeg_train_within_8.mat',
+            {'data':  train_data, 'label_alcoholism':np.reshape(train_label[:,0],(-1,1)), 
+             'label_stimulus': np.reshape(train_label[:,1],(-1,1)), 'label_id':np.reshape(train_label[:,2],(-1,1))})
+
+sio.savemat( 'uci_eeg_test_within_1.mat',
+            {'data':  test_data, 'label_alcoholism':np.reshape(test_label[:,0],(-1,1)), 
+             'label_stimulus':np.reshape(test_label[:,1],(-1,1)), 'label_id':np.reshape(test_label[:,2],(-1,1))})
+
+sio.savemat( 'uci_eeg_validation_within_1.mat',
+            {'data':  val_data, 'label_alcoholism':np.reshape(val_label[:,0],(-1,1)), 
+             'label_stimulus': np.reshape(val_label[:,1],(-1,1)), 'label_id':np.reshape(val_label[:,2],(-1,1))})
+
+
diff --git a/DataPreprocessing/eegtoimg.py b/DataPreprocessing/eegtoimg.py
new file mode 100644
index 0000000..2779d49
--- /dev/null
+++ b/DataPreprocessing/eegtoimg.py
@@ -0,0 +1,281 @@
+from __future__ import print_function
+import time
+
+import numpy as np
+np.random.seed(1234)
+from functools import reduce
+import math as m
+
+import scipy.io
+
+from scipy.interpolate import griddata
+from sklearn.preprocessing import scale
+from utils import augment_EEG, cart2sph, pol2cart
+import scipy.io as sio
+
+import matplotlib.pyplot as plt
+
+from sklearn import preprocessing
+#import mne
+#from plotly import tools
+#from plotly.graph_objs import Layout, YAxis, Scatter, Annotation, Annotations, Data, Figure, Marker, Font
+#import matplotlib.pyplot as plt
+#import plotly.plotly as py
+#import plotly.offline as offline
+#import plotly.graph_objs as go
+
+#since it is my personal account for plotly, do not use it too often. QAQ
+#tools.set_credentials_file(username='yy.fy.cn',api_key='s4aYwqqYayPPjcqFxk7p')
+
+def azim_proj(pos):
+    """
+    Computes the Azimuthal Equidistant Projection of input point in 3D Cartesian Coordinates.
+    Imagine a plane being placed against (tangent to) a globe. If
+    a light source inside the globe projects the graticule onto
+    the plane the result would be a planar, or azimuthal, map
+    projection.
+
+    :param pos: position in 3D Cartesian coordinates
+    :return: projected coordinates using Azimuthal Equidistant Projection
+    """
+    [r, elev, az] = cart2sph(pos[0], pos[1], pos[2])
+    return pol2cart(az, m.pi / 2 - elev)
+
+
+def gen_images(locs, features, n_gridpoints, normalize=True,
+               augment=False, pca=False, std_mult=0.1, n_components=2, edgeless=False):
+    """
+    Generates EEG images given electrode locations in 2D space and multiple feature values for each electrode
+
+    :param locs: An array with shape [n_electrodes, 2] containing X, Y
+                        coordinates for each electrode.
+    :param features: Feature matrix as [n_samples, n_features]
+                                Features are as columns.
+                                Features corresponding to each frequency band are concatenated.
+                                (alpha1, alpha2, ..., beta1, beta2,...)
+    :param n_gridpoints: Number of pixels in the output images
+    :param normalize:   Flag for whether to normalize each band over all samples
+    :param augment:     Flag for generating augmented images
+    :param pca:         Flag for PCA based data augmentation
+    :param std_mult     Multiplier for std of added noise
+    :param n_components: Number of components in PCA to retain for augmentation
+    :param edgeless:    If True generates edgeless images by adding artificial channels
+                        at four corners of the image with value = 0 (default=False).
+    :return:            Tensor of size [samples, colors, W, H] containing generated
+                        images.
+    """
+    feat_array_temp = []
+    nElectrodes = locs.shape[0]     # Number of electrodes
+    # Test whether the feature vector length is divisible by number of electrodes
+    assert features.shape[1] % nElectrodes == 0
+    n_colors = int(features.shape[1] / nElectrodes)
+    for c in range(n_colors):
+        feat_array_temp.append(features[:, c * nElectrodes : nElectrodes * (c+1)])
+    if augment:
+        if pca:
+            for c in range(n_colors):
+                feat_array_temp[c] = augment_EEG(feat_array_temp[c], std_mult, pca=True, n_components=n_components)
+        else:
+            for c in range(n_colors):
+                feat_array_temp[c] = augment_EEG(feat_array_temp[c], std_mult, pca=False, n_components=n_components)
+    nSamples = features.shape[0]
+    # Interpolate the values
+    grid_x, grid_y = np.mgrid[
+                     min(locs[:, 0]):max(locs[:, 0]):n_gridpoints*1j,
+                     min(locs[:, 1]):max(locs[:, 1]):n_gridpoints*1j
+                     ]
+    temp_interp = []
+    for c in range(n_colors):
+        temp_interp.append(np.zeros([nSamples, n_gridpoints, n_gridpoints]))
+    # Generate edgeless images
+    if edgeless:
+        min_x, min_y = np.min(locs, axis=0)
+        max_x, max_y = np.max(locs, axis=0)
+        locs = np.append(locs, np.array([[min_x, min_y], [min_x, max_y],[max_x, min_y],[max_x, max_y]]),axis=0)
+        for c in range(n_colors):
+            feat_array_temp[c] = np.append(feat_array_temp[c], np.zeros((nSamples, 4)), axis=1)
+    # Interpolating
+    for i in range(nSamples):
+        for c in range(n_colors):
+            temp_interp[c][i, :, :] = griddata(locs, feat_array_temp[c][i, :], (grid_x, grid_y),
+                                    method='cubic', fill_value=np.nan)
+        print('Interpolating {0}/{1}\r'.format(i+1, nSamples), end='\r')
+    
+    # Normalizing
+    for c in range(n_colors):
+        if normalize:
+            temp_interp[c][~np.isnan(temp_interp[c])] = \
+                scale(temp_interp[c][~np.isnan(temp_interp[c])])
+        temp_interp[c] = np.nan_to_num(temp_interp[c])    #Replace nan with zero and inf with large finite numbers.
+    return np.swapaxes(np.asarray(temp_interp), 0, 1)     # swap axes to have [samples, colors, W, H]
+
+def get_fft(snippet):
+    Fs = 256.0;  # sampling rate
+    #Ts = len(snippet)/Fs/Fs; # sampling interval
+    snippet_time = len(snippet)/Fs
+    Ts = 1.0/Fs; # sampling interval
+    t = np.arange(0,snippet_time,Ts) # time vector
+
+    # ff = 5;   # frequency of the signal
+    # y = np.sin(2*np.pi*ff*t)
+    y = snippet
+#     print('Ts: ',Ts)
+#     print(t)
+#     print(y.shape)
+    n = len(y) # length of the signal
+    k = np.arange(n)
+    T = n/Fs
+    frq = k/T # two sides frequency range
+    frq = frq[range(int(n/2))] # one side frequency range
+
+    Y = np.fft.fft(y)/n # fft computing and normalization
+    Y = Y[range(int(n/2))]
+    #Added in: (To remove bias.)
+    #Y[0] = 0
+    return frq,abs(Y)
+
+def theta_alpha_beta_averages(f,Y):
+    theta_range = (4,8)
+    alpha_range = (8,13)
+    beta_range = (13,30)
+    theta = Y[(f>theta_range[0]) & (f<=theta_range[1])].mean()
+    alpha = Y[(f>alpha_range[0]) & (f<=alpha_range[1])].mean()
+    beta = Y[(f>beta_range[0]) & (f<=beta_range[1])].mean()
+    return theta, alpha, beta
+
+
+def make_frames(df,frame_duration):
+    '''
+    in: dataframe or array with all channels, frame duration in seconds
+    out: array of theta, alpha, beta averages for each probe for each time step
+        shape: (n-frames,m-probes,k-brainwave bands)
+    '''
+    Fs = 256.0
+    frame_length = Fs*frame_duration
+    
+    frames = []
+    print('df shape',np.shape(df))
+
+    for i in range(0, np.shape(df)[0]):
+        frame = []
+
+        for channel in range(0, np.shape(df)[1]):
+            snippet = df[i][channel]
+            #print(i, channel)
+            f,Y =  get_fft(snippet)
+            #print (Y.shape)
+            theta, alpha, beta = theta_alpha_beta_averages(f,Y)
+            #print (theta, alpha, beta)
+            frame.append([theta, alpha, beta])
+            
+        frames.append(frame)
+    return np.array(frames)
+
+'''
+def visual_one_eeg (images, img_path):
+
+    #images = np.reshape(images, [self.input_height, self.input_width])
+    n_channels = np.shape(images)[0]
+    step = 1. / n_channels
+    data = images
+    eeg_time = np.linspace(1,n_channels,n_channels)
+    kwargs = dict(domain=[1 - step, 1], showticklabels=False, zeroline=False, showgrid=False)
+
+    # create objects for layout and traces
+    layout = Layout(yaxis=YAxis(kwargs), showlegend=False)
+    traces = [Scatter(x=eeg_time, y=data.T[:, 0])]
+
+    # loop over the channels
+    for ii in range(1, n_channels):
+            kwargs.update(domain=[1 - (ii + 1) * step, 1 - ii * step])
+            layout.update({'yaxis%d' % (ii + 1): YAxis(kwargs), 'showlegend': False})
+            traces.append(Scatter(x=eeg_time, y=data.T[:, ii], yaxis='y%d' % (ii + 1)))
+
+    # add channel names using Annotations
+
+    # set the size of the figure and plot it
+    layout.update(autosize=False, width=1000, height=600)
+    fig = Figure(data=Data(traces), layout=layout)
+    #py.image.save_as(fig, filename='')
+    py.image.save_as(fig, filename=img_path)
+'''
+
+if __name__ == '__main__':
+    from utils import reformatInput
+
+    # Load electrode locations
+    print('Loading data...')
+    locs = scipy.io.loadmat('Neuroscan_locs_orig.mat')
+    locs_3d = locs['A']
+    locs_2d = []
+    # Convert to 2D
+    for e in locs_3d:
+        locs_2d.append(azim_proj(e))
+        
+        
+        
+    for s in ('train','test','validation'):
+        mat = sio.loadmat('uci_eeg_'+ s +'_within.mat')
+        
+        data = mat['data']
+        label_alcoholic = mat['label_alcoholism']
+        label_stimulus = mat['label_stimulus']
+        label_id = mat['label_id']
+        
+        tras_X = np.transpose(data, (0, 2, 1))
+        print (np.shape(tras_X))
+        #visual_one_eeg (tras_X[0], "One-eeg-plot.png")
+        
+        X = make_frames (tras_X, 1)
+        #print (np.shape(X))
+        X_1 = X.reshape (np.shape(X)[0], np.shape(X)[1] * np.shape(X)[2])
+        
+    
+        print('Generating images...')
+    
+        images = gen_images(np.array(locs_2d),
+                                      X_1,
+                                      32, normalize=False)
+    
+        print(images.shape)
+        
+        images = np.transpose (images, (0, 3 ,2, 1))
+        
+        ########################### add normazation #################################
+        #trs_data = images.reshape (np.shape(images)[0], np.shape(images)[1] * np.shape(images)[2] * np.shape(images)[3]) 
+    
+        #max_abs_scaler = preprocessing.MaxAbsScaler()
+    
+        #trs_data = max_abs_scaler.fit_transform (trs_data)
+    
+        #real_data = trs_data.reshape (np.shape(images)[0], np.shape(images)[1], np.shape(images)[2], np.shape(images)[3]) 
+    
+        #images = real_data
+        
+        ###########################################################################################
+            
+        sio.savemat( 'uci_eeg_images_'+ s +'_within.mat',
+                {'data': images, 'label_alcoholism': label_alcoholic, 'label_stimulus': label_stimulus, 'label_id': label_id})
+    
+        print (np.max(images))
+        print (np.min(images))
+          
+        img_in = images[0,:,:,:]
+        img_in -= np.min(img_in)
+        img_in /= np.max(img_in)
+    
+        plt.clf()
+        plt.subplot(1,1,1)
+        plt.imshow(img_in)
+        #plt.pause(50)
+    
+    '''
+    print (np.shape(images))
+    plt.clf()
+    plt.subplot(1,1,1)
+    plt.imshow(images[0])
+    plt.pause(20)
+    '''
+    print('\n')
+
+    
\ No newline at end of file
diff --git a/DataPreprocessing/gen_joint_train_set.py b/DataPreprocessing/gen_joint_train_set.py
new file mode 100644
index 0000000..f63702e
--- /dev/null
+++ b/DataPreprocessing/gen_joint_train_set.py
@@ -0,0 +1,25 @@
+# -*- coding: utf-8 -*-
+"""
+Created on Wed May 20 01:17:56 2020
+
+@author: konaw
+"""
+
+import scipy.io as sio
+import numpy as np
+
+
+'''
+Generate joint training set by combining the dummy EEG images with the training set 
+'''
+
+data_real = sio.loadmat('uci_eeg_images_train_within.mat')
+data_dummy = sio.loadmat('eeg_dummy_images_w_label_step3_within.mat')
+
+
+data = np.append(data_real['data'], data_dummy['data'],axis=0)
+label_alc = np.append(data_real['label_alcoholism'], data_dummy['label_alcoholism'],axis=0)
+label_stimulus = np.append(data_real['label_stimulus'], data_dummy['label_stimulus'],axis=0)
+
+sio.savemat('eeg_images_train_augmented_within.mat',{'data':data, 'label_alcoholism':label_alc,
+                                                'label_stimulus':label_stimulus})
\ No newline at end of file
diff --git a/DataPreprocessing/grand_avg.py b/DataPreprocessing/grand_avg.py
new file mode 100644
index 0000000..74980f0
--- /dev/null
+++ b/DataPreprocessing/grand_avg.py
@@ -0,0 +1,283 @@
+from __future__ import print_function
+import time
+
+import scipy.io as sio
+import matplotlib.pyplot as plt
+import numpy as np
+
+np.random.seed(1234)
+from functools import reduce
+import math as m
+
+from scipy.interpolate import griddata
+from sklearn.preprocessing import scale
+from utils import augment_EEG, cart2sph, pol2cart
+
+from sklearn import preprocessing
+
+'''
+generate dummy identities using grand average
+'''
+
+
+'''
+helper function for EEG2Img, 3D location projected to 2D plane
+code from Yao
+'''
+def azim_proj(pos):
+    """
+    Computes the Azimuthal Equidistant Projection of input point in 3D Cartesian Coordinates.
+    Imagine a plane being placed against (tangent to) a globe. If
+    a light source inside the globe projects the graticule onto
+    the plane the result would be a planar, or azimuthal, map
+    projection.
+
+    :param pos: position in 3D Cartesian coordinates
+    :return: projected coordinates using Azimuthal Equidistant Projection
+    """
+    [r, elev, az] = cart2sph(pos[0], pos[1], pos[2])
+    return pol2cart(az, m.pi / 2 - elev)
+
+'''
+helper function for EEG2Img, generate EEG images
+code from Yao
+'''
+def gen_images(locs, features, n_gridpoints, normalize=True,
+               augment=False, pca=False, std_mult=0.1, n_components=2, edgeless=False):
+    """
+    Generates EEG images given electrode locations in 2D space and multiple feature values for each electrode
+
+    :param locs: An array with shape [n_electrodes, 2] containing X, Y
+                        coordinates for each electrode.
+    :param features: Feature matrix as [n_samples, n_features]
+                                Features are as columns.
+                                Features corresponding to each frequency band are concatenated.
+                                (alpha1, alpha2, ..., beta1, beta2,...)
+    :param n_gridpoints: Number of pixels in the output images
+    :param normalize:   Flag for whether to normalize each band over all samples
+    :param augment:     Flag for generating augmented images
+    :param pca:         Flag for PCA based data augmentation
+    :param std_mult     Multiplier for std of added noise
+    :param n_components: Number of components in PCA to retain for augmentation
+    :param edgeless:    If True generates edgeless images by adding artificial channels
+                        at four corners of the image with value = 0 (default=False).
+    :return:            Tensor of size [samples, colors, W, H] containing generated
+                        images.
+    """
+    feat_array_temp = []
+    nElectrodes = locs.shape[0]     # Number of electrodes
+    # Test whether the feature vector length is divisible by number of electrodes
+    assert features.shape[1] % nElectrodes == 0
+    n_colors = int(features.shape[1] / nElectrodes)
+    for c in range(n_colors):
+        feat_array_temp.append(features[:, c * nElectrodes : nElectrodes * (c+1)])
+    if augment:
+        if pca:
+            for c in range(n_colors):
+                feat_array_temp[c] = augment_EEG(feat_array_temp[c], std_mult, pca=True, n_components=n_components)
+        else:
+            for c in range(n_colors):
+                feat_array_temp[c] = augment_EEG(feat_array_temp[c], std_mult, pca=False, n_components=n_components)
+    nSamples = features.shape[0]
+    # Interpolate the values
+    grid_x, grid_y = np.mgrid[
+                     min(locs[:, 0]):max(locs[:, 0]):n_gridpoints*1j,
+                     min(locs[:, 1]):max(locs[:, 1]):n_gridpoints*1j
+                     ]
+    temp_interp = []
+    for c in range(n_colors):
+        temp_interp.append(np.zeros([nSamples, n_gridpoints, n_gridpoints]))
+    # Generate edgeless images
+    if edgeless:
+        min_x, min_y = np.min(locs, axis=0)
+        max_x, max_y = np.max(locs, axis=0)
+        locs = np.append(locs, np.array([[min_x, min_y], [min_x, max_y],[max_x, min_y],[max_x, max_y]]),axis=0)
+        for c in range(n_colors):
+            feat_array_temp[c] = np.append(feat_array_temp[c], np.zeros((nSamples, 4)), axis=1)
+    # Interpolating
+    for i in range(nSamples):
+        for c in range(n_colors):
+            temp_interp[c][i, :, :] = griddata(locs, feat_array_temp[c][i, :], (grid_x, grid_y),
+                                    method='cubic', fill_value=np.nan)
+        print('Interpolating {0}/{1}\r'.format(i+1, nSamples), end='\r')
+    
+    # Normalizing
+    for c in range(n_colors):
+        if normalize:
+            temp_interp[c][~np.isnan(temp_interp[c])] = \
+                scale(temp_interp[c][~np.isnan(temp_interp[c])])
+        temp_interp[c] = np.nan_to_num(temp_interp[c])    #Replace nan with zero and inf with large finite numbers.
+    return np.swapaxes(np.asarray(temp_interp), 0, 1)     # swap axes to have [samples, colors, W, H]
+
+'''
+helper function for EEG2Img, obtain features of 3 frequency band by averaging spectrum attitude
+code from Yao
+'''
+def theta_alpha_beta_averages(f,Y):
+    theta_range = (4,8)
+    alpha_range = (8,13)
+    beta_range = (13,30)
+    theta = Y[(f>theta_range[0]) & (f<=theta_range[1])].mean()
+    alpha = Y[(f>alpha_range[0]) & (f<=alpha_range[1])].mean()
+    beta = Y[(f>beta_range[0]) & (f<=beta_range[1])].mean()
+    return theta, alpha, beta
+
+
+def make_frames(df,frame_duration):
+    '''
+    in: dataframe or array with all channels, frame duration in seconds
+    out: array of theta, alpha, beta averages for each probe for each time step
+        shape: (n-frames,m-probes,k-brainwave bands)
+    '''
+    Fs = 256.0
+    frame_length = Fs*frame_duration
+    
+    frames = []
+    #print('df shape',np.shape(df))
+    for i in range(0, np.shape(df)[0]):
+        frame = []
+        
+        for channel in range(0, np.shape(df)[1]):
+            snippet = df[i][channel]
+            #print(i, channel)
+            #f,Y =  get_fft(snippet)
+            #print (len(snippet))
+            theta, alpha, beta = theta_alpha_beta_averages(np.array(range(len(snippet))), snippet)
+            #print (theta, alpha, beta)
+            frame.append([theta, alpha, beta])
+            
+        frames.append(frame)
+        if i % 100 == 0:
+            print('===== %d end ====='%(i))
+    return np.array(frames)
+
+
+
+if __name__ == '__main__':
+    
+    #load EEG spectrums in training set
+    data = sio.loadmat('eeg_spectrum_train_within.mat') 
+    
+    
+    label_disease_range = 2
+    label_stimulus_range = 5
+    num_ch, band_width = data['data'][0].shape
+    num_subjects = 120
+    
+    
+    ####### Group the EEG spectrums #######
+    #generate 10 groups: 10 combinations of alcoholism and stimulus attributes
+    groups = [{} for i in range(label_disease_range * label_stimulus_range)]    #map: subject id -> data
+    
+    #allocate each trial of EEG signals to the corresponding group
+    for i in range(len(data['data'])):
+        index_label = label_stimulus_range * data['label_alcoholism'][i,0] + data['label_stimulus'][i,0]
+        index_id = data['label_id'][i,0]
+        
+        #add EEG data to the corresponding group and subject 
+        if index_id in groups[index_label]:
+            groups[index_label][index_id].append(data['data'][i])
+        else:
+            groups[index_label][index_id] = [data['data'][i]]
+
+    ####### visulise #######
+    #g0 = groups[0]
+    #s = [g0[121][3][17], g0[70][3][17], g0[72][3][17]]
+    #cnt = 0;
+    #for c in s:
+    #    plt.figure()
+    #    plt.xticks([])
+    #    plt.yticks([])
+    #    plt.plot(c)
+    #    plt.savefig('EEG_spectum_candi_%d.png'%cnt, bbox_inches='tight')
+    #    cnt += 1
+    #avg = np.mean(s,axis=0)
+    #plt.figure()
+    #plt.xticks([])
+    #plt.yticks([])
+    #plt.plot(avg)
+    #plt.savefig('EEG_spectum_candi_avg.png', bbox_inches='tight')
+    #cnt += 1    
+    
+    ####### generate labellled EEG spectrums with dummy identities #######
+    dummy = []
+    dummy_label_disease = []
+    dummy_label_stimulus = []
+    
+    #loop through 10 groups
+    for i in range(len(groups)):
+        #sort each group w.r.t the number of EEG signals for each subject
+        candidate_list = sorted(list(groups[i].values()), key=len)
+        step = 3    #a sliding window of 3
+        #average trials of EEG siganls of the subjects within the window
+        for j in range(len(candidate_list) - step + 1):
+            for k in range(len(candidate_list[j])):
+                #average across subjects
+                new = np.mean([item[k] for item in candidate_list[j:j+step]], axis=0)
+                dummy.append(new)
+                #label the dunmmy identity
+                dummy_label_disease.append(i//label_stimulus_range)
+                dummy_label_stimulus.append(i%label_stimulus_range)
+                
+    dummy = np.array(dummy)
+ 
+    
+    #######  generate EEG images with dummy identities #######
+    # Load electrode locations
+    print('Loading data...')
+    locs = sio.loadmat('Neuroscan_locs_orig.mat')
+    locs_3d = locs['A']
+    locs_2d = []
+    # Convert to 2D
+    for e in locs_3d:
+        locs_2d.append(azim_proj(e))
+    
+    X = make_frames(dummy, 1)
+    #print (np.shape(X))
+    X_1 = X.reshape(np.shape(X)[0], np.shape(X)[1] * np.shape(X)[2])
+    
+
+    print('Generating images...')
+
+    images = gen_images(np.array(locs_2d),
+                                  X_1,
+                                  32, normalize=False)
+
+    images = np.transpose (images, (0, 3 ,2, 1))
+    
+    # save the dummy EEG images
+    sio.savemat( 'eeg_dummy_images_w_label_step3_within.mat',
+            {'data': images,'label_alcoholism':np.reshape(dummy_label_disease,(-1,1)),
+             'label_stimulus':np.reshape(dummy_label_stimulus,(-1,1))})
+
+
+
+    
+    # print (np.max(images))
+    # print (np.min(images))
+      
+    # img_in = images[0,:,:,:]
+    # img_in -= np.min(img_in)
+    # img_in /= np.max(img_in)
+
+    # plt.clf()
+    # plt.subplot(1,1,1)
+    # plt.imshow(img_in)
+    # #plt.pause(50)
+
+    # print (np.shape(images))
+    # plt.clf()
+    # plt.subplot(1,1,1)
+    # plt.imshow(images[0])
+    # plt.pause(20)
+    
+    
+    #print('\n')
+            
+    #print(spectrums.shape)
+    #sample0 = np.array(data['X'][:88]).mean(axis=0)
+    
+    #sample_n = data['X'][6001]
+    
+    #plt.plot(sample_n)
+    
diff --git a/DataPreprocessing/images/EEG_image_dummy_sample.png b/DataPreprocessing/images/EEG_image_dummy_sample.png
new file mode 100644
index 0000000..422d69c
Binary files /dev/null and b/DataPreprocessing/images/EEG_image_dummy_sample.png differ
diff --git a/DataPreprocessing/images/EEG_spectum.png b/DataPreprocessing/images/EEG_spectum.png
new file mode 100644
index 0000000..23ba128
Binary files /dev/null and b/DataPreprocessing/images/EEG_spectum.png differ
diff --git a/DataPreprocessing/images/EEG_spectum_0.png b/DataPreprocessing/images/EEG_spectum_0.png
new file mode 100644
index 0000000..b0dd8c9
Binary files /dev/null and b/DataPreprocessing/images/EEG_spectum_0.png differ
diff --git a/DataPreprocessing/images/EEG_spectum_1.png b/DataPreprocessing/images/EEG_spectum_1.png
new file mode 100644
index 0000000..ee3798f
Binary files /dev/null and b/DataPreprocessing/images/EEG_spectum_1.png differ
diff --git a/DataPreprocessing/images/EEG_spectum_2.png b/DataPreprocessing/images/EEG_spectum_2.png
new file mode 100644
index 0000000..7bc63e7
Binary files /dev/null and b/DataPreprocessing/images/EEG_spectum_2.png differ
diff --git a/DataPreprocessing/images/EEG_time_series.png b/DataPreprocessing/images/EEG_time_series.png
new file mode 100644
index 0000000..6af6776
Binary files /dev/null and b/DataPreprocessing/images/EEG_time_series.png differ
diff --git a/DataPreprocessing/images/loc3D.png b/DataPreprocessing/images/loc3D.png
new file mode 100644
index 0000000..89d1e35
Binary files /dev/null and b/DataPreprocessing/images/loc3D.png differ
diff --git a/DataPreprocessing/t2f.py b/DataPreprocessing/t2f.py
new file mode 100644
index 0000000..8f317a0
--- /dev/null
+++ b/DataPreprocessing/t2f.py
@@ -0,0 +1,61 @@
+import scipy.io as sio
+import matplotlib.pyplot as plt
+import numpy as np
+
+'''
+time-frequency convertion using FFT
+'''
+def get_fft(snippet):
+    Fs = 256.0;  # sampling rate
+    #Ts = len(snippet)/Fs/Fs; # sampling interval
+    #snippet_time = len(snippet)/Fs
+    #Ts = 1.0/Fs; # sampling interval
+    #t = np.arange(0,snippet_time,Ts) # time vector
+
+    # ff = 5;   # frequency of the signal
+    # y = np.sin(2*np.pi*ff*t)
+    y = snippet
+#     print('Ts: ',Ts)
+#     print(t)
+#     print(y.shape)
+    n = len(y) # length of the signal
+    k = np.arange(n)
+    T = n/Fs
+    frq = k/T # two sides frequency range
+    frq = frq[range(int(n/2))] # one side frequency range
+
+    Y = np.fft.fft(y)/n # fft computing and normalization
+    Y = Y[range(int(n/2))]
+    #Added in: (To remove bias.)
+    #Y[0] = 0
+    return frq,abs(Y)
+
+
+
+if __name__ == '__main__':
+    
+    data = sio.loadmat('uci_eeg_train_within.mat') 
+    
+    X = np.transpose(data['data'], (0, 2, 1))
+    
+    num_exp, num_ch, rate = X.shape
+    
+    spectrums = []
+    
+    # time-frequency convertion on all the EEG signals
+    for i in range(num_exp):
+        spectrum = []
+        for ch in range(num_ch):
+            time_domain = X[i][ch]
+            f , magnitude = get_fft(time_domain)
+            spectrum.append(magnitude)
+        spectrums.append(spectrum)
+        
+    dummy = np.array([])
+    
+    sio.savemat('eeg_spectrum_train_within.mat', {'data': spectrums, 'label_alcoholism':data['label_alcoholism'], 
+                                'label_stimulus':data['label_stimulus'], 'label_id':data['label_id']})
+    
+    
+    
+ 
\ No newline at end of file
diff --git a/DataPreprocessing/utils.py b/DataPreprocessing/utils.py
new file mode 100644
index 0000000..43057a7
--- /dev/null
+++ b/DataPreprocessing/utils.py
@@ -0,0 +1,242 @@
+
+import numpy as np
+import matplotlib.patches as mpatches
+import matplotlib.pyplot as plt
+from mpl_toolkits.axes_grid1 import ImageGrid
+import scipy
+
+
+import math as m
+import numpy as np
+np.random.seed(123)
+import scipy.io
+from sklearn.decomposition import PCA
+
+def cart2sph(x, y, z):
+    """
+    Transform Cartesian coordinates to spherical
+    :param x: X coordinate
+    :param y: Y coordinate
+    :param z: Z coordinate
+    :return: radius, elevation, azimuth
+    """
+    x2_y2 = x**2 + y**2
+    r = m.sqrt(x2_y2 + z**2)                    # r
+    elev = m.atan2(z, m.sqrt(x2_y2))            # Elevation
+    az = m.atan2(y, x)                          # Azimuth
+    return r, elev, az
+
+
+def pol2cart(theta, rho):
+    """
+    Transform polar coordinates to Cartesian
+    :param theta: angle value
+    :param rho: radius value
+    :return: X, Y
+    """
+    return rho * m.cos(theta), rho * m.sin(theta)
+
+
+def augment_EEG(data, stdMult, pca=False, n_components=2):
+    """
+    Augment data by adding normal noise to each feature.
+
+    :param data: EEG feature data as a matrix (n_samples x n_features)
+    :param stdMult: Multiplier for std of added noise
+    :param pca: if True will perform PCA on data and add noise proportional to PCA components.
+    :param n_components: Number of components to consider when using PCA.
+    :return: Augmented data as a matrix (n_samples x n_features)
+    """
+    augData = np.zeros(data.shape)
+    if pca:
+        pca = PCA(n_components=n_components)
+        pca.fit(data)
+        components = pca.components_
+        variances = pca.explained_variance_ratio_
+        coeffs = np.random.normal(scale=stdMult, size=pca.n_components) * variances
+        for s, sample in enumerate(data):
+            augData[s, :] = sample + (components * coeffs.reshape((n_components, -1))).sum(axis=0)
+    else:
+        # Add Gaussian noise with std determined by weighted std of each feature
+        for f, feat in enumerate(data.transpose()):
+            augData[:, f] = feat + np.random.normal(scale=stdMult*np.std(feat), size=feat.size)
+    return augData
+
+
+def augment_EEG_image(image, std_mult, pca=False, n_components=2):
+    """
+    Augment data by adding normal noise to each feature.
+
+    :param image: EEG feature data as a a colored image [n_samples, n_colors, W, H]
+    :param std_mult: Multiplier for std of added noise
+    :param pca: if True will perform PCA on data and add noise proportional to PCA components.
+    :param n_components: Number of components to consider when using PCA.
+    :return: Augmented data as a matrix (n_samples x n_features)
+    """
+    augData = np.zeros((data.shape[0], data.shape[1], data.shape[2] * data.shape[3]))
+    for c in xrange(image.shape[1]):
+        reshData = np.reshape(data['featMat'][:, c, :, :], (data['featMat'].shape[0], -1))
+        if pca:
+            augData[:, c, :] = augment_EEG(reshData, std_mult, pca=True, n_components=n_components)
+        else:
+            augData[:, c, :] = augment_EEG(reshData, std_mult, pca=False)
+    return np.reshape(augData, data['featMat'].shape)
+
+
+def load_data(data_file):
+    """
+    Loads the data from MAT file. MAT file should contain two
+    variables. 'featMat' which contains the feature matrix in the
+    shape of [samples, features] and 'labels' which contains the output
+    labels as a vector. Label numbers are assumed to start from 1.
+
+    Parameters
+    ----------
+    data_file: str
+
+    Returns
+    -------
+    data: array_like
+    """
+    print("Loading data from %s" % (data_file))
+
+    dataMat = scipy.io.loadmat(data_file, mat_dtype=True)
+
+    print("Data loading complete. Shape is %r" % (dataMat['featMat'].shape,))
+    return dataMat['features'][:, :-1], dataMat['features'][:, -1] - 1   # Sequential indices
+
+
+def reformatInput(data, labels, indices):
+    """
+    Receives the the indices for train and test datasets.
+    Outputs the train, validation, and test data and label datasets.
+    """
+
+    trainIndices = indices[0][len(indices[1]):]
+    validIndices = indices[0][:len(indices[1])]
+    testIndices = indices[1]
+    # Shuffling training data
+    # shuffledIndices = np.random.permutation(len(trainIndices))
+    # trainIndices = trainIndices[shuffledIndices]
+    if data.ndim == 4:
+        return [(data[trainIndices], np.squeeze(labels[trainIndices]).astype(np.int32)),
+                (data[validIndices], np.squeeze(labels[validIndices]).astype(np.int32)),
+                (data[testIndices], np.squeeze(labels[testIndices]).astype(np.int32))]
+    elif data.ndim == 5:
+        return [(data[:, trainIndices], np.squeeze(labels[trainIndices]).astype(np.int32)),
+                (data[:, validIndices], np.squeeze(labels[validIndices]).astype(np.int32)),
+                (data[:, testIndices], np.squeeze(labels[testIndices]).astype(np.int32))]
+
+
+def get_params(sess):
+    variables = tf.trainable_variables()
+    params = {}
+    for i in range(len(variables)):
+        name = variables[i].name
+        params[name] = sess.run(variables[i])
+    return params
+    
+    
+def to_one_hot(x, N = -1):
+    x = x.astype('int32')
+    if np.min(x) !=0 and N == -1:
+        x = x - np.min(x)
+    x = x.reshape(-1)
+    if N == -1:
+        N = np.max(x) + 1
+    label = np.zeros((x.shape[0],N))
+    idx = range(x.shape[0])
+    label[idx,x] = 1
+    return label.astype('float32')
+    
+def image_mean(x):
+    x_mean = x.mean((0, 1, 2))
+    return x_mean
+
+def shape(tensor):
+    """
+    Get the shape of a tensor. This is a compile-time operation,
+    meaning that it runs when building the graph, not running it.
+    This means that it cannot know the shape of any placeholders
+    or variables with shape determined by feed_dict.
+    """
+    return tuple([d.value for d in tensor.get_shape()])
+
+
+
+def tfvar2str(tf_vars):
+    names = []
+    for i in range(len(tf_vars)):
+        names.append(tf_vars[i].name)
+    return names
+
+
+def shuffle_aligned_list(data):
+    """Shuffle arrays in a list by shuffling each array identically."""
+    num = data[0].shape[0]
+    p = np.random.permutation(num)
+    return [d[p] for d in data]
+
+
+def batch_generator(data, batch_size, shuffle=True):
+    """Generate batches of data.
+    
+    Given a list of array-like objects, generate batches of a given
+    size by yielding a list of array-like objects corresponding to the
+    same slice of each input.
+    """
+    if shuffle:
+        data = shuffle_aligned_list(data)
+
+    batch_count = 0
+    while True:
+        if batch_count * batch_size + batch_size >= len(data[0]):
+            batch_count = 0
+
+            if shuffle:
+                data = shuffle_aligned_list(data)
+
+        start = batch_count * batch_size
+        end = start + batch_size
+        batch_count += 1
+        yield [d[start:end] for d in data]
+        
+    
+  
+def dic2list(sources, targets):
+    names_dic = {}
+    for key in sources:
+        names_dic[sources[key]] = key
+    for key in targets:
+        names_dic[targets[key]] = key
+    names = []
+    for i in range(len(names_dic)):
+        names.append(names_dic[i])
+    return names
+
+def softmax(x):
+    """Compute softmax values for each sets of scores in x."""
+    e_x = np.exp(x - np.max(x))
+    return e_x / e_x.sum(axis=0) 
+
+def norm_matrix(X, l):
+    Y = np.zeros(X.shape);
+    for i in range(X.shape[0]):
+        Y[i] = X[i]/np.linalg.norm(X[i],l)
+    return Y
+
+
+def description(sources, targets):
+    source_names = sources.keys()
+    target_names = targets.keys()
+    N = min(len(source_names), 4)
+    description = source_names[0]   
+    for i in range(1,N):
+        description = description  + '_' + source_names[i]
+    description = description + '-' + target_names[0]
+    return description
+
+
+    
+def sigmoid(x):
+  return 1 / (1 + np.exp(-x))
\ No newline at end of file
diff --git a/DataPreprocessing/visualizer.py b/DataPreprocessing/visualizer.py
new file mode 100644
index 0000000..ada6288
--- /dev/null
+++ b/DataPreprocessing/visualizer.py
@@ -0,0 +1,82 @@
+import scipy.io as sio
+import numpy as np
+import matplotlib.pyplot as plt
+from mpl_toolkits.mplot3d import Axes3D 
+from matplotlib.axes import Axes
+import os
+
+'''
+Generate some sample images or plots for the report
+'''
+
+def time_freq_signals(root_path):
+    data_time = sio.loadmat('uci_eeg_train_within.mat')
+    data_spectrum = sio.loadmat('eeg_spectrum_train_within.mat')
+
+    eeg_time_sample = data_time['data'][0][:,0]
+    eeg_spectrum_sample = data_spectrum['data'][0][0]
+
+    plt.figure()
+    plt.plot(np.linspace(0,1,256), eeg_time_sample)
+    plt.xlabel('Time (s)', fontsize=16)
+    plt.ylabel('Amplitude (mV)', fontsize=16)
+    plt.tick_params(axis='both', which='major', labelsize=12)
+    plt.savefig('%s\EEG_time_series.png'%root_path, bbox_inches='tight')
+    
+    plt.figure()
+    plt.plot(eeg_spectrum_sample)
+    plt.xlabel('Frequency (Hz)', fontsize=16)
+    plt.ylabel('Normalised Amplitude (mV)', fontsize=14)
+    plt.tick_params(axis='both', which='major', labelsize=12)
+    plt.savefig('%s\EEG_spectum.png'%root_path, bbox_inches='tight')
+
+
+def spectrum_simple(root_path, n):
+    data_spectrum = sio.loadmat('eeg_spectrum_train_within.mat')
+    eeg_spectrum_sample = data_spectrum['data'][0]
+
+    for i in range(n):
+        ch = eeg_spectrum_sample[i]
+        fig = plt.figure()
+        plt.xticks([])
+        plt.yticks([])
+        plt.plot(ch)
+        plt.savefig('%s\EEG_spectum_%d.png'%(root_path, i), bbox_inches='tight')
+
+
+def dummy_EEG_img(root_path, i):
+    data = sio.loadmat('eeg_dummy_images_w_label_step3_within.mat')
+    
+    images = data['data']
+      
+    img_in = images[i,:,:,:]
+    img_in -= np.min(img_in)
+    img_in /= np.max(img_in)
+    
+    plt.clf()
+    plt.subplot(1,1,1)
+    plt.axis('off')
+    plt.imshow(img_in)
+    plt.savefig('%s\EEG_image_dummy_sample.png'%root_path, bbox_inches='tight')
+
+
+def locations_3D(root_path):
+    locs = sio.loadmat('Neuroscan_locs_orig.mat')
+    locs_3d = locs['A']
+    fig = plt.figure()
+    ax = fig.add_subplot(111, projection='3d')    
+    ax.set_xticklabels([])
+    ax.set_yticklabels([])
+    ax.set_zticklabels([])
+    ax.scatter(locs_3d[:,0],locs_3d[:,1],locs_3d[:,2])
+    plt.savefig('%s\loc3D.png'%root_path, bbox_inches='tight')
+    
+if __name__ == '__main__':
+    img_folder = 'images'
+    if not os.path.isdir(img_folder):
+        os.makedirs(img_folder)
+    time_freq_signals(img_folder)
+    spectrum_simple(img_folder, 3)
+    dummy_EEG_img(img_folder, 11)
+    locations_3D(img_folder)
+