-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathfeature_rank.m
143 lines (127 loc) · 6.1 KB
/
feature_rank.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
% Function to compute Fisher-Score or Discriminating Coefficient
% Inputs: Input: input data matrix where each row is a
% feature and each column corresponds to an instance or example
% labels: grouping variable that contains class
% labels. It can be cell array of strings,
% numerical array or logical array
% numIndices: (optional)
% Number of significant features to be returned
% mehod: (optional)
% Method for feature-ranking
% 'Fisher_Score'(default) or
% 'Discriminating_Coefficient'
%
% Outputs: featureScore: score of each feature according to
% ranking criteria used
% index: indices of features according to
% the feature score
%
% References: Y. W. Chen and C. J. Lin,
% “Combining SVMs with various feature selection strategies”, Feature Extraction, Foundations and
% Applications. New York, Springer-Verlag, 2006
% T. Markiewicz and S. Osowski1
% "Data mining techniques for feature selection in blood cell recognition",
% Proceedings of European Symposium on Artifical Neaural Networks, April 2006
%
% Author: Vishnu Muralidharan
% Department of Electrical and
% Computer Engineering
% University of Alabama in Huntsville
function [index,featureScore] = feature_rank(Input,labels,numIndices,method)
%% if the a specified number of significant features are needed
if nargin == 2
numIndices = NaN;
end
%% if the discriminating co-efficient needs to be calculated
if nargin == 3
criteria = method;
else
criteria = 'Fisher_Score';
end
%% Check for number of instances being equal to number of labels
if size(Input,1) ~= size(labels,1)
error('Input and labels must have the same number of rows');
end
%% Inicdes for class labels
groupLabels = unique(labels); % fidn the unique lables and hence number of classes
labelType = whos('labels');
% finding indices of respective classes in data according grouping variable
% data type
switch labelType.class
case 'cell'
idxA = find(strcmp(labels,groupLabels{1}));
idxB = find(strcmp(labels,groupLabels{2}));
case 'double'
idxA = find(labels,groupLabels(1));
idxB = find(labels,groupLabels(2));
case 'logical'
idxA = find(labels,groupLabels(1));
idxB = find(labels,groupLabels(2));
otherwise
error('Grouping vector of undefined data type');
end
numClassA = length(idxA); % number of instances of class A
numClassB = length(idxB); % number of insatnces of class B
numFeat = size(Input,1); % number of features in dataset
numInst = size(Input,2); % number of instances or examples in dataset
%% Compute Discriminating Co-efficient or Fisher Score
if strcmp(criteria,'Discriminating_Coefficient')
% compute value of discriminating coefficient between two classes for
% each feature
for i=1:1:numFeat
vectorA = Input(i,idxA);
vectorB = Input(i,idxB);
muA = mean(vectorA); % mean of given feature for class A
muB = mean(vectorB); % mean of given feature for class B
sigmaA = std(vectorA); % standard deviation of given feature for class A
sigmaB = std(vectorB); % standard deviation of given feature for class B
scoreIndex(i,1) = (abs(muA - muB))/(sigmaA + sigmaB); % compute Discriminating Coefficient
scoreIndex(i,2) = i; % store index of feature
end
else
% compute Fisher Score between two classes for each feature
for i=1:1:numFeat
muFeat = mean(Input(i,:)); % extract mean of feature for both classes combined
vectorA = Input(i,idxA);
vectorB = Input(i,idxB);
muA = mean(vectorA); % mean of given feature for class A
muB = mean(vectorB); % mean of given feature for class B
numer = ((muA - muFeat)^2) + ((muB - muFeat)^2); % numerator of Fisher Score equation
sumA = 0;
sumB = 0;
for k=1:1:numClassA
sumA = sumA + (vectorA(k) - muA)^2;
end
term1 = sumA/(numClassA -1);
for k=1:1:numClassB
sumB = sumB + (vectorB(k) - muB)^2;
end
term2 = sumB/(numClassB -1);
denom = term1 + term2; % denominator of Fisher Score equation
scoreIndex(i,1) = numer/denom; % compute Fisher Score for the feature
scoreIndex(i,2) = i; % store index of feature
end
end
%% Rank features according to score
for i=1:1:numFeat - 1
for j=1:1:numFeat - i
% rank features and store their respective indices
if scoreIndex(j,1) < scoreIndex(j+1,1)
tempScore = scoreIndex(j,1);
scoreIndex(j,1) = scoreIndex(j+1,1);
scoreIndex(j+1,1) = tempScore;
tempIdx = scoreIndex(j,2);
scoreIndex(j,2) = scoreIndex(j+1,2);
scoreIndex(j+1,2) = tempIdx;
end
end
end
%% Outputs
% Output scores of features
if isnan(numIndices)
featureScore = scoreIndex(:,1);
index = scoreIndex(:,2);
else
featureScore = scoreIndex(1:numIndices,1);
index = scoreIndex(1:numIndices,2);
end