From 974ba0ac09da7ca60b9118368f96f7d2de84434f Mon Sep 17 00:00:00 2001 From: Guillaume VIGNAL Date: Fri, 8 Mar 2024 17:32:31 +0100 Subject: [PATCH 1/2] compute predictions and probabilities in compile --- shapash/explainer/smart_explainer.py | 14 +++++++++----- shapash/explainer/smart_plotter.py | 18 +++--------------- 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/shapash/explainer/smart_explainer.py b/shapash/explainer/smart_explainer.py index 72489c5b..1799cf2c 100644 --- a/shapash/explainer/smart_explainer.py +++ b/shapash/explainer/smart_explainer.py @@ -291,6 +291,15 @@ def compile( x_init = inverse_transform(self.x_encoded, self.preprocessing) self.x_init = handle_categorical_missing(x_init) self.y_pred = check_y(self.x_init, y_pred, y_name="y_pred") + if not hasattr(self, "y_pred") or self.y_pred is None: + if hasattr(self.model, "predict"): + self.predict() + if self._case == "classification": + if hasattr(self.model, "predict_proba"): + self.predict_proba() + else: + self.proba_values = None + self.y_target = check_y(self.x_init, y_target, y_name="y_target") self.prediction_error = predict_error(self.y_target, self.y_pred, self._case) @@ -895,7 +904,6 @@ def to_pandas( ) # Matching with y_pred if proba: - self.predict_proba() if proba else None proba_values = self.proba_values else: proba_values = None @@ -1006,8 +1014,6 @@ def init_app(self, settings: dict = None): Possible settings (dict keys) are 'rows', 'points', 'violin', 'features' Values should be positive ints """ - if self.y_pred is None: - self.predict() self.smartapp = SmartApp(self, settings) def run_app( @@ -1046,8 +1052,6 @@ def run_app( if title_story is not None: self.title_story = title_story - if self.y_pred is None: - self.predict() if hasattr(self, "_case"): self.smartapp = SmartApp(self, settings) if host is None: diff --git a/shapash/explainer/smart_plotter.py b/shapash/explainer/smart_plotter.py index 78ec8981..e42b447f 100644 --- a/shapash/explainer/smart_plotter.py +++ b/shapash/explainer/smart_plotter.py @@ -949,9 +949,7 @@ def local_pred(self, index, label=None): float: Predict or predict_proba value """ if self.explainer._case == "classification": - if hasattr(self.explainer.model, "predict_proba"): - if not hasattr(self.explainer, "proba_values"): - self.explainer.predict_proba() + if self.explainer.proba_values is not None: value = self.explainer.proba_values.iloc[:, [label]].loc[index].values[0] else: value = None @@ -1237,9 +1235,7 @@ def contribution_plot( col_value = self.explainer._classes[label_num] subtitle = f"Response: {label_value}" # predict proba Color scale - if proba and hasattr(self.explainer.model, "predict_proba"): - if not hasattr(self.explainer, "proba_values"): - self.explainer.predict_proba() + if proba and self.explainer.proba_values is not None: proba_values = self.explainer.proba_values.iloc[:, [label_num]] if not hasattr(self, "pred_colorscale"): self.pred_colorscale = {} @@ -3209,12 +3205,7 @@ def _prediction_classification_plot( label_num, _, label_value = self.explainer.check_label_name(label) # predict proba Color scale - if hasattr(self.explainer.model, "predict_proba"): - if not hasattr(self.explainer, "proba_values"): - self.explainer.predict_proba() - if hasattr(self.explainer.model, "predict"): - if not hasattr(self.explainer, "y_pred") or self.explainer.y_pred is None: - self.explainer.predict() + if self.explainer.proba_values is not None: # Assign proba values of the target df_proba_target = self.explainer.proba_values.copy() df_proba_target["proba_target"] = df_proba_target.iloc[:, label_num] @@ -3333,9 +3324,6 @@ def _prediction_regression_plot( fig = go.Figure() subtitle = None - if self.explainer.y_pred is None: - if hasattr(self.explainer.model, "predict"): - self.explainer.predict() prediction_error = self.explainer.prediction_error if prediction_error is not None: if (self.explainer.y_target == 0).any()[0]: From 0dad3f79c2c022677c2c5211f3d0e50470c5b8dc Mon Sep 17 00:00:00 2001 From: Guillaume VIGNAL Date: Mon, 11 Mar 2024 11:29:34 +0100 Subject: [PATCH 2/2] Set proba_values in the method add of the smart_explainer --- shapash/explainer/smart_explainer.py | 35 +++++++++++++++++++++------- 1 file changed, 26 insertions(+), 9 deletions(-) diff --git a/shapash/explainer/smart_explainer.py b/shapash/explainer/smart_explainer.py index 1799cf2c..4fbda92d 100644 --- a/shapash/explainer/smart_explainer.py +++ b/shapash/explainer/smart_explainer.py @@ -240,7 +240,14 @@ def __init__( self.features_imp = None def compile( - self, x, contributions=None, y_pred=None, y_target=None, additional_data=None, additional_features_dict=None + self, + x, + contributions=None, + y_pred=None, + proba_values=None, + y_target=None, + additional_data=None, + additional_features_dict=None, ): """ The compile method is the first step to understand model and @@ -266,6 +273,11 @@ def compile( This is an interesting parameter for more explicit outputs. Shapash lets users define their own predict, as they may wish to set their own threshold (classification) + proba_values : pandas.Series or pandas.DataFrame, optional (default: None) + Probability values (1 column only). + The index must be identical to the index of x_init. + This is an interesting parameter for more explicit outputs. + Shapash lets users define their own probability values y_target : pandas.Series or pandas.DataFrame, optional (default: None) Target values (1 column only). The index must be identical to the index of x_init. @@ -291,14 +303,12 @@ def compile( x_init = inverse_transform(self.x_encoded, self.preprocessing) self.x_init = handle_categorical_missing(x_init) self.y_pred = check_y(self.x_init, y_pred, y_name="y_pred") - if not hasattr(self, "y_pred") or self.y_pred is None: - if hasattr(self.model, "predict"): - self.predict() - if self._case == "classification": - if hasattr(self.model, "predict_proba"): - self.predict_proba() - else: - self.proba_values = None + if (self.y_pred is None) and (hasattr(self.model, "predict")): + self.predict() + + self.proba_values = check_y(self.x_init, proba_values, y_name="proba_values") + if (self._case == "classification") and (self.proba_values is None) and (hasattr(self.model, "predict_proba")): + self.predict_proba() self.y_target = check_y(self.x_init, y_target, y_name="y_target") self.prediction_error = predict_error(self.y_target, self.y_pred, self._case) @@ -414,6 +424,7 @@ def define_style(self, palette_name=None, colors_dict=None): def add( self, y_pred=None, + proba_values=None, y_target=None, label_dict=None, features_dict=None, @@ -432,6 +443,9 @@ def add( y_pred : pandas.Series, optional (default: None) Prediction values (1 column only). The index must be identical to the index of x_init. + proba_values : pandas.Series, optional (default: None) + Probability values (1 column only). + The index must be identical to the index of x_init. label_dict: dict, optional (default: None) Dictionary mapping integer labels to domain names. features_dict: dict, optional (default: None) @@ -455,6 +469,8 @@ def add( self.y_pred = check_y(self.x_init, y_pred, y_name="y_pred") if hasattr(self, "y_target"): self.prediction_error = predict_error(self.y_target, self.y_pred, self._case) + if proba_values is not None: + self.proba_values = check_y(self.x_init, proba_values, y_name="proba_values") if y_target is not None: self.y_target = check_y(self.x_init, y_target, y_name="y_target") if hasattr(self, "y_pred"): @@ -904,6 +920,7 @@ def to_pandas( ) # Matching with y_pred if proba: + self.predict_proba() proba_values = self.proba_values else: proba_values = None