Skip to content

Commit

Permalink
Merge branch 'master' into hotfix/report_demo
Browse files Browse the repository at this point in the history
  • Loading branch information
MLecardonnel authored Apr 25, 2024
2 parents 915dc6f + 641405b commit 4d79a86
Show file tree
Hide file tree
Showing 8 changed files with 54 additions and 38 deletions.
2 changes: 1 addition & 1 deletion requirements.dev.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ nbsphinx==0.8.8
sphinx_material==0.0.35
pytest>=6.2.5
pytest-cov>=2.8.1
scikit-learn>=1.0.1,<1.4
scikit-learn>=1.4.0
xgboost>=1.0.0
nbformat>4.2.0
numba>=0.53.1
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
"dash-table>=5.0.0",
"nbformat>4.2.0",
"numba>=0.53.1",
"scikit-learn>=1.0.1,<1.4",
"scikit-learn>=1.4.0",
"category_encoders>=2.6.0",
"scipy>=0.19.1",
]
Expand Down
33 changes: 27 additions & 6 deletions shapash/explainer/smart_explainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,14 @@ def __init__(
self.features_imp = None

def compile(
self, x, contributions=None, y_pred=None, y_target=None, additional_data=None, additional_features_dict=None
self,
x,
contributions=None,
y_pred=None,
proba_values=None,
y_target=None,
additional_data=None,
additional_features_dict=None,
):
"""
The compile method is the first step to understand model and
Expand All @@ -266,6 +273,11 @@ def compile(
This is an interesting parameter for more explicit outputs.
Shapash lets users define their own predict,
as they may wish to set their own threshold (classification)
proba_values : pandas.Series or pandas.DataFrame, optional (default: None)
Probability values (1 column only).
The index must be identical to the index of x_init.
This is an interesting parameter for more explicit outputs.
Shapash lets users define their own probability values
y_target : pandas.Series or pandas.DataFrame, optional (default: None)
Target values (1 column only).
The index must be identical to the index of x_init.
Expand All @@ -291,6 +303,13 @@ def compile(
x_init = inverse_transform(self.x_encoded, self.preprocessing)
self.x_init = handle_categorical_missing(x_init)
self.y_pred = check_y(self.x_init, y_pred, y_name="y_pred")
if (self.y_pred is None) and (hasattr(self.model, "predict")):
self.predict()

self.proba_values = check_y(self.x_init, proba_values, y_name="proba_values")
if (self._case == "classification") and (self.proba_values is None) and (hasattr(self.model, "predict_proba")):
self.predict_proba()

self.y_target = check_y(self.x_init, y_target, y_name="y_target")
self.prediction_error = predict_error(self.y_target, self.y_pred, self._case)

Expand Down Expand Up @@ -405,6 +424,7 @@ def define_style(self, palette_name=None, colors_dict=None):
def add(
self,
y_pred=None,
proba_values=None,
y_target=None,
label_dict=None,
features_dict=None,
Expand All @@ -423,6 +443,9 @@ def add(
y_pred : pandas.Series, optional (default: None)
Prediction values (1 column only).
The index must be identical to the index of x_init.
proba_values : pandas.Series, optional (default: None)
Probability values (1 column only).
The index must be identical to the index of x_init.
label_dict: dict, optional (default: None)
Dictionary mapping integer labels to domain names.
features_dict: dict, optional (default: None)
Expand All @@ -446,6 +469,8 @@ def add(
self.y_pred = check_y(self.x_init, y_pred, y_name="y_pred")
if hasattr(self, "y_target"):
self.prediction_error = predict_error(self.y_target, self.y_pred, self._case)
if proba_values is not None:
self.proba_values = check_y(self.x_init, proba_values, y_name="proba_values")
if y_target is not None:
self.y_target = check_y(self.x_init, y_target, y_name="y_target")
if hasattr(self, "y_pred"):
Expand Down Expand Up @@ -895,7 +920,7 @@ def to_pandas(
)
# Matching with y_pred
if proba:
self.predict_proba() if proba else None
self.predict_proba()
proba_values = self.proba_values
else:
proba_values = None
Expand Down Expand Up @@ -1006,8 +1031,6 @@ def init_app(self, settings: dict = None):
Possible settings (dict keys) are 'rows', 'points', 'violin', 'features'
Values should be positive ints
"""
if self.y_pred is None:
self.predict()
self.smartapp = SmartApp(self, settings)

def run_app(
Expand Down Expand Up @@ -1046,8 +1069,6 @@ def run_app(

if title_story is not None:
self.title_story = title_story
if self.y_pred is None:
self.predict()
if hasattr(self, "_case"):
self.smartapp = SmartApp(self, settings)
if host is None:
Expand Down
18 changes: 3 additions & 15 deletions shapash/explainer/smart_plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -949,9 +949,7 @@ def local_pred(self, index, label=None):
float: Predict or predict_proba value
"""
if self.explainer._case == "classification":
if hasattr(self.explainer.model, "predict_proba"):
if not hasattr(self.explainer, "proba_values"):
self.explainer.predict_proba()
if self.explainer.proba_values is not None:
value = self.explainer.proba_values.iloc[:, [label]].loc[index].values[0]
else:
value = None
Expand Down Expand Up @@ -1237,9 +1235,7 @@ def contribution_plot(
col_value = self.explainer._classes[label_num]
subtitle = f"Response: <b>{label_value}</b>"
# predict proba Color scale
if proba and hasattr(self.explainer.model, "predict_proba"):
if not hasattr(self.explainer, "proba_values"):
self.explainer.predict_proba()
if proba and self.explainer.proba_values is not None:
proba_values = self.explainer.proba_values.iloc[:, [label_num]]
if not hasattr(self, "pred_colorscale"):
self.pred_colorscale = {}
Expand Down Expand Up @@ -3209,12 +3205,7 @@ def _prediction_classification_plot(

label_num, _, label_value = self.explainer.check_label_name(label)
# predict proba Color scale
if hasattr(self.explainer.model, "predict_proba"):
if not hasattr(self.explainer, "proba_values"):
self.explainer.predict_proba()
if hasattr(self.explainer.model, "predict"):
if not hasattr(self.explainer, "y_pred") or self.explainer.y_pred is None:
self.explainer.predict()
if self.explainer.proba_values is not None:
# Assign proba values of the target
df_proba_target = self.explainer.proba_values.copy()
df_proba_target["proba_target"] = df_proba_target.iloc[:, label_num]
Expand Down Expand Up @@ -3333,9 +3324,6 @@ def _prediction_regression_plot(
fig = go.Figure()

subtitle = None
if self.explainer.y_pred is None:
if hasattr(self.explainer.model, "predict"):
self.explainer.predict()
prediction_error = self.explainer.prediction_error
if prediction_error is not None:
if (self.explainer.y_target == 0).any()[0]:
Expand Down
16 changes: 8 additions & 8 deletions shapash/utils/columntransformer_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import numpy as np
import pandas as pd
from sklearn.preprocessing import FunctionTransformer

from shapash.utils.category_encoder_backend import (
category_encoder_binary,
Expand Down Expand Up @@ -91,7 +92,7 @@ def inv_transform_ct(x_in, encoding):

# columns not encode
elif name_encoding == "remainder":
if ct_encoding == "passthrough":
if isinstance(ct_encoding, FunctionTransformer):
nb_col = len(col_encoding)
frame = x_in.iloc[:, init : init + nb_col]
else:
Expand Down Expand Up @@ -249,7 +250,7 @@ def calc_inv_contrib_ct(x_contrib, encoding, agg_columns):
init += nb_col

elif name_encoding == "remainder":
if ct_encoding == "passthrough":
if isinstance(ct_encoding, FunctionTransformer):
nb_col = len(col_encoding)
frame = x_contrib.iloc[:, init : init + nb_col]
rst = pd.concat([rst, frame], axis=1)
Expand Down Expand Up @@ -366,7 +367,9 @@ def get_feature_names(column_transformer):
List of returned features names when ColumnTransformer is applied.
"""
feature_names = []
l_transformers = list(column_transformer._iter(fitted=True))
l_transformers = list(
column_transformer._iter(fitted=True, column_as_labels=False, skip_drop=True, skip_empty_columns=True)
)

for name, trans, column, _ in l_transformers:
feature_names.extend(get_names(name, trans, column, column_transformer))
Expand Down Expand Up @@ -463,11 +466,8 @@ def get_col_mapping_ct(encoder, x_encoded):
else:
raise NotImplementedError(f"Estimator not supported : {estimator}")

elif estimator == "passthrough":
try:
features_out = encoder.feature_names_in_[features]
except Exception:
features_out = encoder._feature_names_in[features] # for oldest sklearn version
elif isinstance(estimator, FunctionTransformer):
features_out = encoder.feature_names_in_[features]
for f_name in features_out:
dict_col_mapping[f_name] = [x_encoded.columns.to_list()[idx_encoded]]
idx_encoded += 1
Expand Down
4 changes: 3 additions & 1 deletion shapash/utils/transform.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
"""
Transform Module
"""

import re

import numpy as np
import pandas as pd
from sklearn.preprocessing import FunctionTransformer

from shapash.utils.category_encoder_backend import (
get_col_mapping_ce,
Expand Down Expand Up @@ -185,7 +187,7 @@ def check_transformers(list_encoding):
if (str(type(ct_encoding)) not in supported_sklearn) and (
str(type(ct_encoding)) not in supported_category_encoder
):
if str(type(ct_encoding)) != "<class 'str'>":
if not isinstance(ct_encoding, str) and not isinstance(ct_encoding, FunctionTransformer):
raise ValueError("One of the encoders used in ColumnTransformers isn't supported.")

elif str(type(enc)) in supported_category_encoder:
Expand Down
8 changes: 6 additions & 2 deletions tests/unit_tests/explainer/test_smart_plotter.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ def setUp(self):
self.smart_explainer._case, self.smart_explainer._classes = check_model(model)
self.smart_explainer.state = MultiDecorator(SmartState())
self.smart_explainer.y_pred = None
self.smart_explainer.proba_values = None
self.smart_explainer.features_desc = dict(self.x_init.nunique())
self.smart_explainer.features_compacity = self.features_compacity

Expand Down Expand Up @@ -863,7 +864,7 @@ def test_contribution_plot_8(self):
xpl.model = model
np_hv = [f"Id: {x}<br />Predict: {y}" for x, y in zip(xpl.x_init.index, xpl.y_pred.iloc[:, 0].tolist())]
np_hv.sort()
output = xpl.plot.contribution_plot(col)
output = xpl.plot.contribution_plot(col, proba=False)
annot_list = []
for data_plot in output.data:
annot_list.extend(data_plot.hovertext.tolist())
Expand Down Expand Up @@ -895,7 +896,7 @@ def test_contribution_plot_9(self):
model = lambda: None
model.classes_ = np.array([0, 1])
xpl.model = model
output = xpl.plot.contribution_plot(col, max_points=39)
output = xpl.plot.contribution_plot(col, max_points=39, proba=False)
assert len(output.data) == 4
for elem in output.data:
assert elem.type == "violin"
Expand Down Expand Up @@ -1266,6 +1267,9 @@ def test_features_importance_4(self):

def test_local_pred_1(self):
xpl = self.smart_explainer
xpl.proba_values = pd.DataFrame(
data=np.array([[0.4, 0.6], [0.3, 0.7]]), columns=["class_1", "class_2"], index=xpl.x_encoded.index.values
)
output = xpl.plot.local_pred("person_A", label=0)
assert isinstance(output, float)

Expand Down
9 changes: 5 additions & 4 deletions tests/unit_tests/utils/test_columntransformer_backend.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Unit test of Inverse Transform
"""

import unittest

import catboost as cb
Expand Down Expand Up @@ -959,25 +960,25 @@ def test_get_names_1(self):
enc_4.fit(train)

feature_names_1 = []
l_transformers = list(enc_1._iter(fitted=True))
l_transformers = list(enc_1._iter(fitted=True, column_as_labels=False, skip_drop=True, skip_empty_columns=True))

for name, trans, column, _ in l_transformers:
feature_names_1.extend(get_names(name, trans, column, enc_1))

feature_names_2 = []
l_transformers = list(enc_2._iter(fitted=True))
l_transformers = list(enc_2._iter(fitted=True, column_as_labels=False, skip_drop=True, skip_empty_columns=True))

for name, trans, column, _ in l_transformers:
feature_names_2.extend(get_names(name, trans, column, enc_2))

feature_names_3 = []
l_transformers = list(enc_3._iter(fitted=True))
l_transformers = list(enc_3._iter(fitted=True, column_as_labels=False, skip_drop=True, skip_empty_columns=True))

for name, trans, column, _ in l_transformers:
feature_names_3.extend(get_names(name, trans, column, enc_3))

feature_names_4 = []
l_transformers = list(enc_4._iter(fitted=True))
l_transformers = list(enc_4._iter(fitted=True, column_as_labels=False, skip_drop=True, skip_empty_columns=True))

for name, trans, column, _ in l_transformers:
feature_names_4.extend(get_names(name, trans, column, enc_4))
Expand Down

0 comments on commit 4d79a86

Please sign in to comment.