Skip to content

Commit

Permalink
Added option to not normalize features
Browse files Browse the repository at this point in the history
  • Loading branch information
Ekeany committed Sep 27, 2020
1 parent 892df14 commit f72d553
Showing 1 changed file with 22 additions and 7 deletions.
29 changes: 22 additions & 7 deletions src/BorutaShap.py
Original file line number Diff line number Diff line change
Expand Up @@ -253,7 +253,8 @@ def Train_model(self, X, y):



def fit(self, X, y, n_trials = 20, random_state=0, sample=False, train_or_test = 'test', verbose=True):
def fit(self, X, y, n_trials = 20, random_state=0, sample=False,
train_or_test = 'test', normalize=True, verbose=True):

"""
The main body of the program this method it computes the following
Expand Down Expand Up @@ -297,7 +298,7 @@ def fit(self, X, y, n_trials = 20, random_state=0, sample=False, train_or_test =
A random state for reproducibility of results
Sample: Boolean
if true then the a rowise sample of the data will be used to calculate the feature importance values
if true then a rowise sample of the data will be used to calculate the feature importance values
sample_fraction: float
The sample fraction of the original data used in calculating the feature importance values only
Expand All @@ -307,6 +308,9 @@ def fit(self, X, y, n_trials = 20, random_state=0, sample=False, train_or_test =
Decides whether the feature improtance should be calculated on out of sample data see the dicussion here.
https://compstat-lmu.github.io/iml_methods_limitations/pfi-data.html#introduction-to-test-vs.training-data
normalize: boolean
if true the importance values will be normalized using the z-score formula
verbose: Boolean
a flag indicator to print out all the rejected or accepted features.
Expand Down Expand Up @@ -349,7 +353,7 @@ def fit(self, X, y, n_trials = 20, random_state=0, sample=False, train_or_test =

self.Check_if_chose_train_or_test_and_train_model()

self.X_feature_import, self.Shadow_feature_import = self.feature_importance()
self.X_feature_import, self.Shadow_feature_import = self.feature_importance(normalize=normalize)
self.update_importance_history()
self.hits += self.calculate_hits()
self.test_features(iteration=trial+1)
Expand Down Expand Up @@ -559,15 +563,18 @@ def calculate_Zscore(array):
return [(element-mean_value)/std_value for element in array]


def feature_importance(self):
def feature_importance(self, normalize):

"""
Caculates the feature importances scores of the model
Parameters
----------
importance_measure: string
allows the user to choose either the Shap or Gini importance metrics
allows the user to choose either the Shap or Gini importance metrics
normalize: boolean
if true the importance values will be normalized using the z-score formula
Returns:
array of normalized feature importance scores for both the shadow and original features.
Expand All @@ -582,21 +589,29 @@ def feature_importance(self):

self.explain()
vals = self.shap_values
vals = self.calculate_Zscore(vals)

if normalize:
vals = self.calculate_Zscore(vals)

X_feature_import = vals[:len(self.X.columns)]
Shadow_feature_import = vals[len(self.X_shadow.columns):]


elif self.importance_measure == 'gini':

feature_importances_ = self.calculate_Zscore(np.abs(self.model.feature_importances_))
feature_importances_ = np.abs(self.model.feature_importances_)

if normalize:
feature_importances_ = self.calculate_Zscore(feature_importances_)

X_feature_import = feature_importances_[:len(self.X.columns)]
Shadow_feature_import = feature_importances_[len(self.X.columns):]

else:

raise ValueError('No Importance_measure was specified select one of (shap, gini)')


return X_feature_import, Shadow_feature_import


Expand Down

0 comments on commit f72d553

Please sign in to comment.