gumpy-bci · zied-tayeb · Oct 30, 2018
diff --git a/gumpy/features.py b/gumpy/features.py
@@ -12,7 +12,8 @@
 import pywt
 
 
-def sequential_feature_selector(features, labels, classifier, k_features, kfold, selection_type, plot=True, **kwargs):
+def sequential_feature_selector(features, labels, classifier, k_features,
+                                kfold, selection_type, plot=True, **kwargs):
     """Sequential feature selection to reduce the number of features.
 
     The function reduces a d-dimensional feature space to a k-dimensional
@@ -28,14 +29,15 @@ def sequential_feature_selector(features, labels, classifier, k_features, kfold,
         features: The original d-dimensional feature space
         labels: corresponding labels
         classifier (str or object): The classifier which should be used for
-            feature selection. This can be either a string (name of a classifier
-            known to gumpy) or an instance of a classifier which adheres
-            to the sklearn classifier interface.
+            feature selection. This can be either a string (name of a
+            classifier known to gumpy) or an instance of a classifier which
+            adheres to the sklearn classifier interface.
         k_features (int): Number of features to select
         kfold (int): k-fold cross validation
         selection_type (str): One of ``SFS`` (Sequential Forward Selection),
-            ``SBS`` (Sequential Backward Selection), ``SFFS`` (Sequential Forward
-            Floating Selection), ``SBFS`` (Sequential Backward Floating Selection)
+            ``SBS`` (Sequential Backward Selection), ``SFFS`` (Sequential
+            Forward Floating Selection), ``SBFS`` (Sequential Backward Floating
+            Selection)
         plot (bool): Plot the results of the dimensinality reduction
         **kwargs: Additional keyword arguments that will be passed to the
             Classifier instantiation
@@ -52,14 +54,16 @@ def sequential_feature_selector(features, labels, classifier, k_features, kfold,
     # retrieve the appropriate classifier
     if isinstance(classifier, str):
         if not (classifier in available_classifiers):
-            raise ClassifierError("Unknown classifier {c}".format(c=classifier.__repr__()))
+            raise ClassifierError(
+                    "Unknown classifier {c}".format(c=classifier.__repr__()))
 
         kwopts = kwargs.pop('opts', dict())
         # opts = dict()
 
         # retrieve the options that we need to forward to the classifier
         # TODO: should we forward all arguments to sequential_feature_selector ?
-        opts = available_classifiers[classifier].static_opts('sequential_feature_selector', features=features)
+        opts = available_classifiers[classifier].static_opts(
+                'sequential_feature_selector', features=features)
         opts.update(kwopts)
 
         # XXX: now merged into the static_opts invocation. TODO: test
@@ -82,31 +86,29 @@ def sequential_feature_selector(features, labels, classifier, k_features, kfold,
         # if we received a classifier object we'll just use this one
         clf = classifier.clf
 
-
     if selection_type == 'SFS':
         algorithm = "Sequential Forward Selection (SFS)"
         sfs = SFS(clf, k_features, forward=True, floating=False,
-                verbose=2, scoring='accuracy', cv=kfold, n_jobs=-1)
+                  verbose=2, scoring='accuracy', cv=kfold, n_jobs=-1)
 
     elif selection_type == 'SBS':
         algorithm = "Sequential Backward Selection (SBS)"
         sfs = SFS(clf, k_features, forward=False, floating=False,
-                verbose=2, scoring='accuracy', cv=kfold, n_jobs=-1)
+                  verbose=2, scoring='accuracy', cv=kfold, n_jobs=-1)
 
     elif selection_type == 'SFFS':
         algorithm = "Sequential Forward Floating Selection (SFFS)"
         sfs = SFS(clf, k_features, forward=True, floating=True,
-                verbose=2, scoring='accuracy', cv=kfold, n_jobs=-1)
+                  verbose=2, scoring='accuracy', cv=kfold, n_jobs=-1)
 
     elif selection_type == 'SBFS':
         algorithm = "Sequential Backward Floating Selection (SFFS)"
         sfs = SFS(clf, k_features, forward=True, floating=True,
-                verbose=2, scoring='accuracy', cv=kfold, n_jobs=-1)
+                  verbose=2, scoring='accuracy', cv=kfold, n_jobs=-1)
 
     else:
         raise Exception("Unknown selection type '{}'".format(selection_type))
 
-
     pipe = make_pipeline(StandardScaler(), sfs)
     pipe.fit(features, labels)
     subsets = sfs.subsets_
@@ -137,38 +139,38 @@ def CSP(tasks):
 
     """
     if len(tasks) < 2:
-		print("Must have at least 2 tasks for filtering.")
-		return (None,) * len(tasks)
-	else:
-		filters = ()
-		# CSP algorithm
-		# For each task x, find the mean variance matrices Rx and not_Rx, which will be used to compute spatial filter SFx
-		iterator = range(0,len(tasks))
-		for x in iterator:
-			# Find Rx
-			Rx = covarianceMatrix(tasks[x][0])
-			for t in range(1,len(tasks[x])):
-				Rx += covarianceMatrix(tasks[x][t])
-			Rx = Rx / len(tasks[x])
-
-			# Find not_Rx
-			count = 0
-			not_Rx = Rx * 0
-			for not_x in [element for element in iterator if element != x]:
-				for t in range(0,len(tasks[not_x])):
-					not_Rx += covarianceMatrix(tasks[not_x][t])
-					count += 1
-			not_Rx = not_Rx / count
-
-			# Find the spatial filter SFx
-			SFx = spatialFilter(Rx,not_Rx)
-			filters += (SFx,)
-
-			# Special case: only two tasks, no need to compute any more mean variances
-			if len(tasks) == 2:
-				filters += (spatialFilter(not_Rx,Rx),)
-				break
-		return filters
+        print("Must have at least 2 tasks for filtering.")
+        return (None,) * len(tasks)
+    else:
+        filters = ()
+        # CSP algorithm
+        # For each task x, find the mean variance matrices Rx and not_Rx, which will be used to compute spatial filter SFx
+        iterator = range(0, len(tasks))
+        for x in iterator:
+            # Find Rx
+            Rx = covarianceMatrix(tasks[x][0])
+            for t in range(1, len(tasks[x])):
+                Rx += covarianceMatrix(tasks[x][t])
+            Rx = Rx / len(tasks[x])
+
+            # Find not_Rx
+            count = 0
+            not_Rx = Rx * 0
+            for not_x in [element for element in iterator if element != x]:
+                for t in range(0, len(tasks[not_x])):
+                    not_Rx += covarianceMatrix(tasks[not_x][t])
+                    count += 1
+            not_Rx = not_Rx / count
+
+            # Find the spatial filter SFx
+            SFx = spatialFilter(Rx, not_Rx)
+            filters += (SFx,)
+
+            # Special case: only two tasks, no need to compute any more mean variances
+            if len(tasks) == 2:
+                filters += (spatialFilter(not_Rx, Rx),)
+                break
+        return filters
 
 
 # covarianceMatrix takes a matrix A and returns the covariance matrix, scaled by the variance
@@ -181,13 +183,13 @@ def covarianceMatrix(A):
     Returns:
         A 2D covariance matrix scaled by the variance
     """
-	#Ca = np.dot(A,np.transpose(A))/np.trace(np.dot(A,np.transpose(A)))
-	Ca = np.cov(A)
-	return Ca
+    # Ca = np.dot(A,np.transpose(A))/np.trace(np.dot(A,np.transpose(A)))
+    Ca = np.cov(A)
+    return Ca
 
 
-def spatialFilter(Ra,Rb):
-	"""This function extracts spatial filters
+def spatialFilter(Ra, Rb):
+    """This function extracts spatial filters
 
     Args:
         Ra, Rb: Covariance matrices Ra and Rb
@@ -196,33 +198,33 @@ def spatialFilter(Ra,Rb):
         A 2D spatial filter matrix
     """
 
-	R = Ra + Rb
-	E,U = la.eig(R)
+    R = Ra + Rb
+    E, U = la.eig(R)
 
-	# CSP requires the eigenvalues E and eigenvector U be sorted in descending order
-	ord = np.argsort(E)
-	ord = ord[::-1] # argsort gives ascending order, flip to get descending
-	E = E[ord]
-	U = U[:,ord]
+    # CSP requires the eigenvalues E and eigenvector U be sorted in descending order
+    ord = np.argsort(E)
+    ord = ord[::-1]  # argsort gives ascending order, flip to get descending
+    E = E[ord]
+    U = U[:, ord]
 
-	# Find the whitening transformation matrix
-	P = np.dot(np.sqrt(la.inv(np.diag(E))),np.transpose(U))
+    # Find the whitening transformation matrix
+    P = np.dot(np.sqrt(la.inv(np.diag(E))), np.transpose(U))
 
-	# The mean covariance matrices may now be transformed
-	Sa = np.dot(P,np.dot(Ra,np.transpose(P)))
-	Sb = np.dot(P,np.dot(Rb,np.transpose(P)))
+    # The mean covariance matrices may now be transformed
+    Sa = np.dot(P, np.dot(Ra, np.transpose(P)))
+    Sb = np.dot(P, np.dot(Rb, np.transpose(P)))
 
-	# Find and sort the generalized eigenvalues and eigenvector
-	E1,U1 = la.eig(Sa,Sb)
-	ord1 = np.argsort(E1)
-	ord1 = ord1[::-1]
-	E1 = E1[ord1]
-	U1 = U1[:,ord1]
+    # Find and sort the generalized eigenvalues and eigenvector
+    E1, U1 = la.eig(Sa, Sb)
+    ord1 = np.argsort(E1)
+    ord1 = ord1[::-1]
+    E1 = E1[ord1]
+    U1 = U1[:, ord1]
 
-	# The projection matrix (the spatial filter) may now be obtained
-	SFa = np.dot(np.transpose(U1),P)
-	#return SFa.astype(np.float32)
-	return SFa
+    # The projection matrix (the spatial filter) may now be obtained
+    SFa = np.dot(np.transpose(U1), P)
+    # return SFa.astype(np.float32)
+    return SFa
 
 
 def PCA_dim_red(features, var_desired):
@@ -245,7 +247,8 @@ def PCA_dim_red(features, var_desired):
     for n, v in enumerate(pca.explained_variance_ratio_):
         var += v
         if var / var_sum >= var_desired:
-            features_reduced = sklearn.decomposition.PCA(n_components=n+1).fit_transform(features)
+            features_reduced = sklearn.decomposition.PCA(
+                    n_components=n+1).fit_transform(features)
             return features_reduced
 
 
@@ -254,9 +257,9 @@ def RMS_features_extraction(data, trial_list, window_size, window_shift):
 
     Args:
         data: 2D (time points, Channels)
-	    trial_list: list of the trials
-	    window_size: Size of the window for extracting features
-	    window_shift: size of the overalp
+        trial_list: list of the trials
+        window_size: Size of the window for extracting features
+        window_shift: size of the overalp
 
     Returns:
         The features matrix (trials, features)
@@ -273,11 +276,11 @@ def RMS_features_extraction(data, trial_list, window_size, window_shift):
     t = 0
     for trial in trial_list:
         # x3 is the worst of all with 43.3% average performance
-        x1=gumpy.signal.rms(trial[0], fs, window_size, window_shift)
-        x2=gumpy.signal.rms(trial[1], fs, window_size, window_shift)
-        x3=gumpy.signal.rms(trial[2], fs, window_size, window_shift)
-        x4=gumpy.signal.rms(trial[3], fs, window_size, window_shift)
-        x=np.concatenate((x1, x2, x3, x4))
+        x1 = gumpy.signal.rms(trial[0], fs, window_size, window_shift)
+        x2 = gumpy.signal.rms(trial[1], fs, window_size, window_shift)
+        x3 = gumpy.signal.rms(trial[2], fs, window_size, window_shift)
+        x4 = gumpy.signal.rms(trial[3], fs, window_size, window_shift)
+        x = np.concatenate((x1, x2, x3, x4))
         X[t, :] = np.array([x])
         t += 1
     return X
@@ -303,10 +306,13 @@ def dwt_features(data, trials, level, sampling_freq, w, n, wavelet):
 
     # Extract Features
     for t, trial in enumerate(trials):
-        signals = data[trial + fs*4 + (w[0]) : trial + fs*4 + (w[1])]
-        coeffs_c3 = pywt.wavedec(data = signals[:,0], wavelet=wavelet, level=level)
-        coeffs_c4 = pywt.wavedec(data = signals[:,1], wavelet=wavelet, level=level)
-        coeffs_cz = pywt.wavedec(data = signals[:,2], wavelet=wavelet, level=level)
+        signals = data[trial + fs*4 + (w[0]): trial + fs*4 + (w[1])]
+        coeffs_c3 = pywt.wavedec(data=signals[:, 0], wavelet=wavelet,
+                                 level=level)
+        coeffs_c4 = pywt.wavedec(data=signals[:, 1], wavelet=wavelet,
+                                 level=level)
+        coeffs_cz = pywt.wavedec(data=signals[:, 2], wavelet=wavelet,
+                                 level=level)
 
         X[t, :] = np.array([
             np.std(coeffs_c3[n]), np.mean(coeffs_c3[n]**2),
@@ -320,7 +326,7 @@ def dwt_features(data, trials, level, sampling_freq, w, n, wavelet):
 
 
 def alpha_subBP_features(data):
-	"""Extract alpha bands
+    """Extract alpha bands
 
     Args:
         data: 2D (time points, Channels)
@@ -339,7 +345,7 @@ def alpha_subBP_features(data):
 
 
 def beta_subBP_features(data):
-   """Extract beta bands
+    """Extract beta bands
 
     Args:
         data: 2D (time points, Channels)
@@ -355,7 +361,7 @@ def beta_subBP_features(data):
 
 
 def powermean(data, trial, fs, w):
-   """Compute the mean power of the data
+    """Compute the mean power of the data
 
     Args:
         data: 2D (time points, Channels)
@@ -366,13 +372,13 @@ def powermean(data, trial, fs, w):
     Returns:
         The mean power
     """
-    return np.power(data[trial+fs*4+w[0]: trial+fs*4+w[1],0],2).mean(), \
-           np.power(data[trial+fs*4+w[0]: trial+fs*4+w[1],1],2).mean(), \
-           np.power(data[trial+fs*4+w[0]: trial+fs*4+w[1],2],2).mean()
+    return np.power(data[trial+fs*4+w[0]: trial+fs*4+w[1], 0], 2).mean(), \
+        np.power(data[trial+fs*4+w[0]: trial+fs*4+w[1], 1], 2).mean(), \
+        np.power(data[trial+fs*4+w[0]: trial+fs*4+w[1], 2], 2).mean()
 
 
 def log_subBP_feature_extraction(alpha, beta, trials, fs, w):
-	"""Extract the log power of alpha and beta bands
+    """Extract the log power of alpha and beta bands
 
     Args:
         alpha: filtered data in the alpha band
@@ -395,7 +401,8 @@ def log_subBP_feature_extraction(alpha, beta, trials, fs, w):
         power_c32, power_c42, power_cz2 = powermean(alpha[1], trial, fs, w)
         power_c33, power_c43, power_cz3 = powermean(alpha[2], trial, fs, w)
         power_c34, power_c44, power_cz4 = powermean(alpha[3], trial, fs, w)
-        power_c31_b, power_c41_b, power_cz1_b = powermean(beta[0], trial, fs, w)
+        power_c31_b, power_c41_b, power_cz1_b = powermean(beta[0], trial,
+                                                          fs, w)
 
         X[t, :] = np.array(
             [np.log(power_c31), np.log(power_c41), np.log(power_cz1),
@@ -405,5 +412,3 @@ def log_subBP_feature_extraction(alpha, beta, trials, fs, w):
              np.log(power_c31_b), np.log(power_c41_b), np.log(power_cz1_b)])
 
     return X
-
-