Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
197 changes: 101 additions & 96 deletions gumpy/features.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,8 @@
import pywt


def sequential_feature_selector(features, labels, classifier, k_features, kfold, selection_type, plot=True, **kwargs):
def sequential_feature_selector(features, labels, classifier, k_features,
kfold, selection_type, plot=True, **kwargs):
"""Sequential feature selection to reduce the number of features.

The function reduces a d-dimensional feature space to a k-dimensional
Expand All @@ -28,14 +29,15 @@ def sequential_feature_selector(features, labels, classifier, k_features, kfold,
features: The original d-dimensional feature space
labels: corresponding labels
classifier (str or object): The classifier which should be used for
feature selection. This can be either a string (name of a classifier
known to gumpy) or an instance of a classifier which adheres
to the sklearn classifier interface.
feature selection. This can be either a string (name of a
classifier known to gumpy) or an instance of a classifier which
adheres to the sklearn classifier interface.
k_features (int): Number of features to select
kfold (int): k-fold cross validation
selection_type (str): One of ``SFS`` (Sequential Forward Selection),
``SBS`` (Sequential Backward Selection), ``SFFS`` (Sequential Forward
Floating Selection), ``SBFS`` (Sequential Backward Floating Selection)
``SBS`` (Sequential Backward Selection), ``SFFS`` (Sequential
Forward Floating Selection), ``SBFS`` (Sequential Backward Floating
Selection)
plot (bool): Plot the results of the dimensinality reduction
**kwargs: Additional keyword arguments that will be passed to the
Classifier instantiation
Expand All @@ -52,14 +54,16 @@ def sequential_feature_selector(features, labels, classifier, k_features, kfold,
# retrieve the appropriate classifier
if isinstance(classifier, str):
if not (classifier in available_classifiers):
raise ClassifierError("Unknown classifier {c}".format(c=classifier.__repr__()))
raise ClassifierError(
"Unknown classifier {c}".format(c=classifier.__repr__()))

kwopts = kwargs.pop('opts', dict())
# opts = dict()

# retrieve the options that we need to forward to the classifier
# TODO: should we forward all arguments to sequential_feature_selector ?
opts = available_classifiers[classifier].static_opts('sequential_feature_selector', features=features)
opts = available_classifiers[classifier].static_opts(
'sequential_feature_selector', features=features)
opts.update(kwopts)

# XXX: now merged into the static_opts invocation. TODO: test
Expand All @@ -82,31 +86,29 @@ def sequential_feature_selector(features, labels, classifier, k_features, kfold,
# if we received a classifier object we'll just use this one
clf = classifier.clf


if selection_type == 'SFS':
algorithm = "Sequential Forward Selection (SFS)"
sfs = SFS(clf, k_features, forward=True, floating=False,
verbose=2, scoring='accuracy', cv=kfold, n_jobs=-1)
verbose=2, scoring='accuracy', cv=kfold, n_jobs=-1)

elif selection_type == 'SBS':
algorithm = "Sequential Backward Selection (SBS)"
sfs = SFS(clf, k_features, forward=False, floating=False,
verbose=2, scoring='accuracy', cv=kfold, n_jobs=-1)
verbose=2, scoring='accuracy', cv=kfold, n_jobs=-1)

elif selection_type == 'SFFS':
algorithm = "Sequential Forward Floating Selection (SFFS)"
sfs = SFS(clf, k_features, forward=True, floating=True,
verbose=2, scoring='accuracy', cv=kfold, n_jobs=-1)
verbose=2, scoring='accuracy', cv=kfold, n_jobs=-1)

elif selection_type == 'SBFS':
algorithm = "Sequential Backward Floating Selection (SFFS)"
sfs = SFS(clf, k_features, forward=True, floating=True,
verbose=2, scoring='accuracy', cv=kfold, n_jobs=-1)
verbose=2, scoring='accuracy', cv=kfold, n_jobs=-1)

else:
raise Exception("Unknown selection type '{}'".format(selection_type))


pipe = make_pipeline(StandardScaler(), sfs)
pipe.fit(features, labels)
subsets = sfs.subsets_
Expand Down Expand Up @@ -137,38 +139,38 @@ def CSP(tasks):

"""
if len(tasks) < 2:
print("Must have at least 2 tasks for filtering.")
return (None,) * len(tasks)
else:
filters = ()
# CSP algorithm
# For each task x, find the mean variance matrices Rx and not_Rx, which will be used to compute spatial filter SFx
iterator = range(0,len(tasks))
for x in iterator:
# Find Rx
Rx = covarianceMatrix(tasks[x][0])
for t in range(1,len(tasks[x])):
Rx += covarianceMatrix(tasks[x][t])
Rx = Rx / len(tasks[x])

# Find not_Rx
count = 0
not_Rx = Rx * 0
for not_x in [element for element in iterator if element != x]:
for t in range(0,len(tasks[not_x])):
not_Rx += covarianceMatrix(tasks[not_x][t])
count += 1
not_Rx = not_Rx / count

# Find the spatial filter SFx
SFx = spatialFilter(Rx,not_Rx)
filters += (SFx,)

# Special case: only two tasks, no need to compute any more mean variances
if len(tasks) == 2:
filters += (spatialFilter(not_Rx,Rx),)
break
return filters
print("Must have at least 2 tasks for filtering.")
return (None,) * len(tasks)
else:
filters = ()
# CSP algorithm
# For each task x, find the mean variance matrices Rx and not_Rx, which will be used to compute spatial filter SFx
iterator = range(0, len(tasks))
for x in iterator:
# Find Rx
Rx = covarianceMatrix(tasks[x][0])
for t in range(1, len(tasks[x])):
Rx += covarianceMatrix(tasks[x][t])
Rx = Rx / len(tasks[x])

# Find not_Rx
count = 0
not_Rx = Rx * 0
for not_x in [element for element in iterator if element != x]:
for t in range(0, len(tasks[not_x])):
not_Rx += covarianceMatrix(tasks[not_x][t])
count += 1
not_Rx = not_Rx / count

# Find the spatial filter SFx
SFx = spatialFilter(Rx, not_Rx)
filters += (SFx,)

# Special case: only two tasks, no need to compute any more mean variances
if len(tasks) == 2:
filters += (spatialFilter(not_Rx, Rx),)
break
return filters


# covarianceMatrix takes a matrix A and returns the covariance matrix, scaled by the variance
Expand All @@ -181,13 +183,13 @@ def covarianceMatrix(A):
Returns:
A 2D covariance matrix scaled by the variance
"""
#Ca = np.dot(A,np.transpose(A))/np.trace(np.dot(A,np.transpose(A)))
Ca = np.cov(A)
return Ca
# Ca = np.dot(A,np.transpose(A))/np.trace(np.dot(A,np.transpose(A)))
Ca = np.cov(A)
return Ca


def spatialFilter(Ra,Rb):
"""This function extracts spatial filters
def spatialFilter(Ra, Rb):
"""This function extracts spatial filters

Args:
Ra, Rb: Covariance matrices Ra and Rb
Expand All @@ -196,33 +198,33 @@ def spatialFilter(Ra,Rb):
A 2D spatial filter matrix
"""

R = Ra + Rb
E,U = la.eig(R)
R = Ra + Rb
E, U = la.eig(R)

# CSP requires the eigenvalues E and eigenvector U be sorted in descending order
ord = np.argsort(E)
ord = ord[::-1] # argsort gives ascending order, flip to get descending
E = E[ord]
U = U[:,ord]
# CSP requires the eigenvalues E and eigenvector U be sorted in descending order
ord = np.argsort(E)
ord = ord[::-1] # argsort gives ascending order, flip to get descending
E = E[ord]
U = U[:, ord]

# Find the whitening transformation matrix
P = np.dot(np.sqrt(la.inv(np.diag(E))),np.transpose(U))
# Find the whitening transformation matrix
P = np.dot(np.sqrt(la.inv(np.diag(E))), np.transpose(U))

# The mean covariance matrices may now be transformed
Sa = np.dot(P,np.dot(Ra,np.transpose(P)))
Sb = np.dot(P,np.dot(Rb,np.transpose(P)))
# The mean covariance matrices may now be transformed
Sa = np.dot(P, np.dot(Ra, np.transpose(P)))
Sb = np.dot(P, np.dot(Rb, np.transpose(P)))

# Find and sort the generalized eigenvalues and eigenvector
E1,U1 = la.eig(Sa,Sb)
ord1 = np.argsort(E1)
ord1 = ord1[::-1]
E1 = E1[ord1]
U1 = U1[:,ord1]
# Find and sort the generalized eigenvalues and eigenvector
E1, U1 = la.eig(Sa, Sb)
ord1 = np.argsort(E1)
ord1 = ord1[::-1]
E1 = E1[ord1]
U1 = U1[:, ord1]

# The projection matrix (the spatial filter) may now be obtained
SFa = np.dot(np.transpose(U1),P)
#return SFa.astype(np.float32)
return SFa
# The projection matrix (the spatial filter) may now be obtained
SFa = np.dot(np.transpose(U1), P)
# return SFa.astype(np.float32)
return SFa


def PCA_dim_red(features, var_desired):
Expand All @@ -245,7 +247,8 @@ def PCA_dim_red(features, var_desired):
for n, v in enumerate(pca.explained_variance_ratio_):
var += v
if var / var_sum >= var_desired:
features_reduced = sklearn.decomposition.PCA(n_components=n+1).fit_transform(features)
features_reduced = sklearn.decomposition.PCA(
n_components=n+1).fit_transform(features)
return features_reduced


Expand All @@ -254,9 +257,9 @@ def RMS_features_extraction(data, trial_list, window_size, window_shift):

Args:
data: 2D (time points, Channels)
trial_list: list of the trials
window_size: Size of the window for extracting features
window_shift: size of the overalp
trial_list: list of the trials
window_size: Size of the window for extracting features
window_shift: size of the overalp

Returns:
The features matrix (trials, features)
Expand All @@ -273,11 +276,11 @@ def RMS_features_extraction(data, trial_list, window_size, window_shift):
t = 0
for trial in trial_list:
# x3 is the worst of all with 43.3% average performance
x1=gumpy.signal.rms(trial[0], fs, window_size, window_shift)
x2=gumpy.signal.rms(trial[1], fs, window_size, window_shift)
x3=gumpy.signal.rms(trial[2], fs, window_size, window_shift)
x4=gumpy.signal.rms(trial[3], fs, window_size, window_shift)
x=np.concatenate((x1, x2, x3, x4))
x1 = gumpy.signal.rms(trial[0], fs, window_size, window_shift)
x2 = gumpy.signal.rms(trial[1], fs, window_size, window_shift)
x3 = gumpy.signal.rms(trial[2], fs, window_size, window_shift)
x4 = gumpy.signal.rms(trial[3], fs, window_size, window_shift)
x = np.concatenate((x1, x2, x3, x4))
X[t, :] = np.array([x])
t += 1
return X
Expand All @@ -303,10 +306,13 @@ def dwt_features(data, trials, level, sampling_freq, w, n, wavelet):

# Extract Features
for t, trial in enumerate(trials):
signals = data[trial + fs*4 + (w[0]) : trial + fs*4 + (w[1])]
coeffs_c3 = pywt.wavedec(data = signals[:,0], wavelet=wavelet, level=level)
coeffs_c4 = pywt.wavedec(data = signals[:,1], wavelet=wavelet, level=level)
coeffs_cz = pywt.wavedec(data = signals[:,2], wavelet=wavelet, level=level)
signals = data[trial + fs*4 + (w[0]): trial + fs*4 + (w[1])]
coeffs_c3 = pywt.wavedec(data=signals[:, 0], wavelet=wavelet,
level=level)
coeffs_c4 = pywt.wavedec(data=signals[:, 1], wavelet=wavelet,
level=level)
coeffs_cz = pywt.wavedec(data=signals[:, 2], wavelet=wavelet,
level=level)

X[t, :] = np.array([
np.std(coeffs_c3[n]), np.mean(coeffs_c3[n]**2),
Expand All @@ -320,7 +326,7 @@ def dwt_features(data, trials, level, sampling_freq, w, n, wavelet):


def alpha_subBP_features(data):
"""Extract alpha bands
"""Extract alpha bands

Args:
data: 2D (time points, Channels)
Expand All @@ -339,7 +345,7 @@ def alpha_subBP_features(data):


def beta_subBP_features(data):
"""Extract beta bands
"""Extract beta bands

Args:
data: 2D (time points, Channels)
Expand All @@ -355,7 +361,7 @@ def beta_subBP_features(data):


def powermean(data, trial, fs, w):
"""Compute the mean power of the data
"""Compute the mean power of the data

Args:
data: 2D (time points, Channels)
Expand All @@ -366,13 +372,13 @@ def powermean(data, trial, fs, w):
Returns:
The mean power
"""
return np.power(data[trial+fs*4+w[0]: trial+fs*4+w[1],0],2).mean(), \
np.power(data[trial+fs*4+w[0]: trial+fs*4+w[1],1],2).mean(), \
np.power(data[trial+fs*4+w[0]: trial+fs*4+w[1],2],2).mean()
return np.power(data[trial+fs*4+w[0]: trial+fs*4+w[1], 0], 2).mean(), \
np.power(data[trial+fs*4+w[0]: trial+fs*4+w[1], 1], 2).mean(), \
np.power(data[trial+fs*4+w[0]: trial+fs*4+w[1], 2], 2).mean()


def log_subBP_feature_extraction(alpha, beta, trials, fs, w):
"""Extract the log power of alpha and beta bands
"""Extract the log power of alpha and beta bands

Args:
alpha: filtered data in the alpha band
Expand All @@ -395,7 +401,8 @@ def log_subBP_feature_extraction(alpha, beta, trials, fs, w):
power_c32, power_c42, power_cz2 = powermean(alpha[1], trial, fs, w)
power_c33, power_c43, power_cz3 = powermean(alpha[2], trial, fs, w)
power_c34, power_c44, power_cz4 = powermean(alpha[3], trial, fs, w)
power_c31_b, power_c41_b, power_cz1_b = powermean(beta[0], trial, fs, w)
power_c31_b, power_c41_b, power_cz1_b = powermean(beta[0], trial,
fs, w)

X[t, :] = np.array(
[np.log(power_c31), np.log(power_c41), np.log(power_cz1),
Expand All @@ -405,5 +412,3 @@ def log_subBP_feature_extraction(alpha, beta, trials, fs, w):
np.log(power_c31_b), np.log(power_c41_b), np.log(power_cz1_b)])

return X