diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 5e9c5589..0b5065ab 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -17,21 +17,17 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - python -m pip install --upgrade pip wheel setuptools - pip install flake8 pytest - if [ -f requirements.dev.txt ]; then pip install -r requirements.dev.txt; fi - - name: Lint with flake8 - run: | - # stop the build if there are Python syntax errors or undefined names - flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics + python -m pip install --upgrade pip + pip install .[all] + - name: Run pre-commit hook + run: pre-commit run --all-files - name: Test with pytest run: | python -m pytest - name: Build python package run: | - python setup.py sdist bdist_wheel + python -m pip install --upgrade build + python -m build - name: Deploy to PyPI if: success() && startsWith(github.ref, 'refs/tags') && matrix.python-version == '3.11' uses: pypa/gh-action-pypi-publish@release/v1 diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 5c1355b3..bced5df7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,13 +2,13 @@ default_language_version: python: python3 repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v3.2.0 + rev: v4.6.0 hooks: - id: check-ast - - id: check-byte-order-marker + - id: fix-byte-order-marker - id: check-case-conflict - id: check-docstring-first - - id: check-executables-have-shebangs + - id: check-shebang-scripts-are-executable - id: check-json - id: check-yaml exclude: ^chart/ @@ -28,30 +28,12 @@ repos: args: ['--maxkb=500'] - id: no-commit-to-branch args: ['--branch', 'master', '--branch', 'develop'] -- repo: https://github.com/psf/black - rev: 21.12b0 +- repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.5.2 hooks: - - id: black - args: [--line-length=120] - additional_dependencies: ['click==8.0.4'] -#- repo: https://github.com/pre-commit/mirrors-mypy -# rev: 'v0.931' -# hooks: -# - id: mypy -# args: [--ignore-missing-imports, --disallow-untyped-defs, --show-error-codes, --no-site-packages] -# files: src -- repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 - hooks: - - id: flake8 - exclude: ^tests/ - args: ['--ignore=E501,D2,D3,D4,D104,D100,D106,D107,W503,D105,E203', '--per-file-ignores=__init__.py:F401'] - additional_dependencies: [ flake8-docstrings, "flake8-bugbear==22.8.23" ] -- repo: https://github.com/pre-commit/mirrors-isort - rev: v5.4.2 - hooks: - - id: isort - args: ["--profile", "black", "-l", "120"] + - id: ruff + args: [--fix] + - id: ruff-format - repo: https://github.com/asottile/pyupgrade rev: v2.7.2 hooks: @@ -62,8 +44,13 @@ repos: hooks: - id: blacken-docs additional_dependencies: [black==21.12b0] +- repo: https://github.com/pypa/pip-audit + rev: v2.7.3 + hooks: + - id: pip-audit + args: ["--skip-editable"] - repo: https://github.com/compilerla/conventional-pre-commit - rev: v2.1.1 + rev: v3.3.0 hooks: - id: conventional-pre-commit stages: [commit-msg] diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..494394e0 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,158 @@ +[build-system] +requires = ["setuptools", "wheel"] +build-backend = "setuptools.build_meta" + +[project] +name = "shapash" +version = "2.6.0" +authors = [ + {name = "Yann Golhen"}, + {name = "Sebastien Bidault"}, + {name = "Yann Lagre"}, + {name = "Maxime Gendre"}, + {name = "Thomas Bouché", email = "thomas.bouche@maif.fr"}, + {name = "Maxime Lecardonnel"}, + {name = "Guillaume Vignal"}, +] +description = "Shapash is a Python library which aims to make machine learning interpretable and understandable by everyone." +readme = "README.md" +requires-python = ">3.8, <3.13" +license = {text = "Apache Software License 2.0"} +keywords = ["shapash"] +classifiers = [ + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "License :: OSI Approved :: Apache Software License", + "Operating System :: OS Independent", +] +dependencies = [ + "plotly>=5.0.0", + "matplotlib>=3.2.0", + "numpy>1.18.0,<2", + "pandas>=2.1.0", + "shap>=0.45.0", + "Flask>=1.0.4", + "dash>=2.3.1", + "dash-bootstrap-components>=1.1.0", + "dash-core-components>=2.0.0", + "dash-daq>=0.5.0", + "dash-html-components>=2.0.0", + "dash-renderer==1.8.3", + "dash-table>=5.0.0", + "nbformat>4.2.0", + "numba>=0.53.1", + "scikit-learn>=1.4.0", + "category_encoders>=2.6.0", + "scipy>=0.19.1", +] + +[project.optional-dependencies] # Optional +report = [ + "nbconvert>=6.0.7", + "papermill>=2.0.0", + "jupyter-client>=7.4.0", + "seaborn==0.12.2", + "notebook", + "Jinja2>=2.11.0", + "phik", +] +xgboost = ["xgboost>=1.0.0"] +lightgbm = ["lightgbm>=2.3.0"] +catboost = ["catboost>=1.0.1"] +lime = ["lime>=0.2.0.0"] + +dev = ["pre-commit", "mypy", "ruff"] +test = ["pytest", "pytest-cov"] +mypy = ["mypy"] +ruff = ["ruff"] +doc = [ + "Sphinx==4.5.0", + "sphinxcontrib-applehelp==1.0.2", + "sphinxcontrib-devhelp==1.0.2", + "sphinxcontrib-htmlhelp==2.0.0", + "sphinxcontrib-jsmath==1.0.1", + "sphinxcontrib-qthelp==1.0.3", + "sphinxcontrib-serializinghtml==1.1.5", + "nbsphinx==0.8.8", + "sphinx_material==0.0.35", +] + +all = ["shapash[dev, test, mypy, ruff, report, xgboost, lightgbm, catboost, lime, doc]"] + +[project.urls] +Homepage = "https://github.com/MAIF/shapash" + +[tool.setuptools] +package-dir = {"" = "shapash"} + +[tool.setuptools.packages.find] +where = ["shapash"] + + +[tool.setuptools.package-data] +"shapash" = ["*.csv", "*json", "*.yml", "*.css", "*.js", "*.png"] + +[tool.pytest.ini_options] +pythonpath = ["."] +testpaths = ["tests"] + +[tool.mypy] +exclude = ["tests", "tutorial"] +ignore_missing_imports = true + +[tool.ruff] +line-length = 120 +exclude = [ + "tests", + "docs", + "tutorial", +] + +[tool.ruff.lint] +select = [ + "E", # pycodestyle errors + "F", # pyflakes + "W", # pycodestyle warnings + "A", # flake8-builtins + "PLC", # pylint conventions + "PLE", # pylint errors + "PLW", # pylint warnings + "UP", # pyupgrade + "S", # flake8-bandit, + "B", # flake8-bugbear + "I", # isort + "D", # pydocstyle + "NPY", # NumPy-specific rules +] +ignore = ["E501", "D2", "D3", "D4", "D104", "D100", "D105", "D106", "D107", "S311"] +exclude = ["tests/*", "*.ipynb"] + +[tool.ruff.lint.per-file-ignores] +"shapash/__init__.py" = ["F401"] +"shapash/backend/__init__.py" = ["F401"] +"shapash/backend/base_backend.py" = ["S101"] +"shapash/backend/lime_backend.py" = ["PLW2901"] +"shapash/data/data_loader.py" = ["S310", "B904"] +"shapash/explainer/consistency.py" = ["PLW2901", "NPY002", "UP031", "E741"] +"shapash/explainer/smart_explainer.py" = ["S104", "B904"] +"shapash/explainer/smart_plotter.py" = ["PLW3301", "A001", "S101"] +"shapash/explainer/smart_predictor.py" = ["S101", "B904", "E721"] +"shapash/manipulation/summarize.py" = ["B028"] +"shapash/plots/plot_line_comparison.py" = ["B028", "A001"] +"shapash/plots/plot_scatter_prediction.py" = ["PLW0127", "PLW3301"] +"shapash/report/__init__.py" = ["B904"] +"shapash/report/plots.py" = ["A002"] +"shapash/report/visualisation.py" = ["UP031"] +"shapash/report/project_report.py" = ["S101", "S701"] +"shapash/utils/columntransformer_backend.py" = ["PLW0127"] +"shapash/utils/explanation_metrics.py" = ["S101"] +"shapash/utils/io.py" = ["S301"] +"shapash/webapp/utils/callbacks.py" = ["A002", "E721"] +"shapash/webapp/utils/utils.py" = ["UP031"] +"shapash/webapp/utils/MyGraph.py" = ["A002"] +"shapash/webapp/smart_app.py" = ["A002", "S307", "E721", "A001"] +"shapash/webapp/webapp_launch_DVF.py" = ["S104"] +"shapash/webapp/webapp_launch.py" = ["S104", "S301"] diff --git a/requirements.dev.txt b/requirements.dev.txt deleted file mode 100644 index 864c16e3..00000000 --- a/requirements.dev.txt +++ /dev/null @@ -1,43 +0,0 @@ -pip>=23.2.0 -numpy>1.18.0,<2 -dash==2.15.0 -catboost>=1.0.1 -category-encoders>=2.6.0 -Flask<2.3.0 -dash-bootstrap-components==1.1.0 -dash-core-components==2.0.0 -dash-daq==0.5.0 -dash-html-components==2.0.0 -dash-renderer==1.8.3 -dash-table==5.0.0 -lightgbm==2.3.1 -pandas>=2.1.0 -plotly==5.6.0 -shap>=0.45.0 -Sphinx==4.5.0 -sphinxcontrib-applehelp==1.0.2 -sphinxcontrib-devhelp==1.0.2 -sphinxcontrib-htmlhelp==2.0.0 -sphinxcontrib-jsmath==1.0.1 -sphinxcontrib-qthelp==1.0.3 -sphinxcontrib-serializinghtml==1.1.5 -nbsphinx==0.8.8 -sphinx_material==0.0.35 -pytest>=6.2.5 -pytest-cov>=2.8.1 -scikit-learn>=1.4.0 -xgboost>=1.0.0 -nbformat>4.2.0 -numba>=0.53.1 -nbconvert>=6.0.7 -papermill>=2.0.0 -matplotlib>=3.3.0 -seaborn==0.12.2 -scipy>=0.19.1 -notebook>=6.0.0 -jupyter-client<8.0.0 -Jinja2>=2.11.0 -phik>=0.12.0 -skranger>=0.8.0 -lime>=0.2.0.0 -regex diff --git a/setup.cfg b/setup.cfg deleted file mode 100644 index 6dbdfc77..00000000 --- a/setup.cfg +++ /dev/null @@ -1,16 +0,0 @@ -[bdist_wheel] -universal = 1 - -[flake8] -exclude = docs -max-line-length = 100 - -[aliases] -test = pytest - -[tool:pytest] -collect_ignore = ['setup.py'] -pep8maxlinelength = 100 - -[pep8] -max-line-length = 100 diff --git a/setup.py b/setup.py deleted file mode 100644 index 9b7b5bed..00000000 --- a/setup.py +++ /dev/null @@ -1,121 +0,0 @@ -#!/usr/bin/env python - -"""The setup script.""" -import os - -from setuptools import setup - -here = os.path.abspath(os.path.dirname(__file__)) - -with open("README.md", encoding="utf8") as readme_file: - long_description = readme_file.read() - -# Load the package's __version__.py module as a dictionary. -version_d: dict = {} -with open(os.path.join(here, "shapash", "__version__.py")) as f: - exec(f.read(), version_d) - - -requirements = [ - "plotly>=5.0.0", - "matplotlib>=3.2.0", - "numpy>1.18.0,<2", - "pandas>=2.1.0", - "shap>=0.45.0", - "Flask>=1.0.4", - "dash>=2.3.1", - "dash-bootstrap-components>=1.1.0", - "dash-core-components>=2.0.0", - "dash-daq>=0.5.0", - "dash-html-components>=2.0.0", - "dash-renderer==1.8.3", - "dash-table>=5.0.0", - "nbformat>4.2.0", - "numba>=0.53.1", - "scikit-learn>=1.4.0", - "category_encoders>=2.6.0", - "scipy>=0.19.1", -] - -extras = dict() - -# This list should be identical to the list in shapash/report/__init__.py -extras["report"] = [ - "nbconvert>=6.0.7", - "papermill>=2.0.0", - "jupyter-client>=7.4.0", - "seaborn==0.12.2", - "notebook", - "Jinja2>=2.11.0", - "phik", -] - -extras["xgboost"] = ["xgboost>=1.0.0"] -extras["lightgbm"] = ["lightgbm>=2.3.0"] -extras["catboost"] = ["catboost>=1.0.1"] -extras["lime"] = ["lime>=0.2.0.0"] - -setup_requirements = [ - "pytest-runner", -] - -test_requirements = [ - "pytest", -] - -setup( - name="shapash", - version=version_d["__version__"], - python_requires=">3.8, <3.13", - url="https://github.com/MAIF/shapash", - author="Yann Golhen, Sebastien Bidault, Yann Lagre, Maxime Gendre, Thomas Bouché, Maxime Lecardonnel, Guillaume Vignal", - author_email="yann.golhen@maif.fr", - description="Shapash is a Python library which aims to make machine learning interpretable and understandable by everyone.", - long_description=long_description, - long_description_content_type="text/markdown", - classifiers=[ - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "License :: OSI Approved :: Apache Software License", - "Operating System :: OS Independent", - ], - install_requires=requirements, - extras_require=extras, - license="Apache Software License 2.0", - keywords="shapash", - package_dir={ - "shapash": "shapash", - "shapash.data": "shapash/data", - "shapash.decomposition": "shapash/decomposition", - "shapash.explainer": "shapash/explainer", - "shapash.backend": "shapash/backend", - "shapash.manipulation": "shapash/manipulation", - "shapash.report": "shapash/report", - "shapash.utils": "shapash/utils", - "shapash.webapp": "shapash/webapp", - "shapash.webapp.utils": "shapash/webapp/utils", - "shapash.style": "shapash/style", - }, - packages=[ - "shapash", - "shapash.data", - "shapash.decomposition", - "shapash.explainer", - "shapash.backend", - "shapash.manipulation", - "shapash.utils", - "shapash.webapp", - "shapash.webapp.utils", - "shapash.report", - "shapash.style", - ], - data_files=[("style", ["shapash/style/colors.json"])], - include_package_data=True, - setup_requires=setup_requirements, - test_suite="tests", - tests_require=test_requirements, - zip_safe=False, -) diff --git a/shapash/data/data_loader.py b/shapash/data/data_loader.py index e1ad6699..aca69db4 100644 --- a/shapash/data/data_loader.py +++ b/shapash/data/data_loader.py @@ -1,6 +1,7 @@ """ Data loader module """ + import json import os from pathlib import Path diff --git a/shapash/decomposition/contributions.py b/shapash/decomposition/contributions.py index 79f21409..b447ce63 100644 --- a/shapash/decomposition/contributions.py +++ b/shapash/decomposition/contributions.py @@ -117,7 +117,6 @@ def assign_contributions(ranked): """ if len(ranked) != 3: raise ValueError( - "Expected lenght : 3, observed lenght : {}," - "please check the outputs of rank_contributions.".format(len(ranked)) + f"Expected lenght : 3, observed lenght : {len(ranked)}," "please check the outputs of rank_contributions." ) return {"contrib_sorted": ranked[0], "x_sorted": ranked[1], "var_dict": ranked[2]} diff --git a/shapash/explainer/consistency.py b/shapash/explainer/consistency.py index 078124b8..27bdc7c8 100644 --- a/shapash/explainer/consistency.py +++ b/shapash/explainer/consistency.py @@ -306,7 +306,7 @@ def plot_comparison(self, mean_distances): mean_distances : DataFrame DataFrame storing all pairwise distances between methods """ - font = {"color": "#{:02x}{:02x}{:02x}".format(50, 50, 50)} + font = {"color": f"#{50:02x}{50:02x}{50:02x}"} fig, ax = plt.subplots(ncols=1, figsize=(10, 6)) @@ -426,13 +426,13 @@ def plot_examples(self, method_1, method_2, l2, index, backend_name_1, backend_n idx = np.flip(i.argsort()) i, j = i[idx], j[idx] - axes[n].barh(y, i, label="method 1", left=0, color="#{:02x}{:02x}{:02x}".format(255, 166, 17)) + axes[n].barh(y, i, label="method 1", left=0, color=f"#{255:02x}{166:02x}{17:02x}") axes[n].barh( y, j, label="method 2", left=np.abs(np.max(i)) + np.abs(np.min(j)) + np.max(i) / 3, - color="#{:02x}{:02x}{:02x}".format(117, 152, 189), + color=f"#{117:02x}{152:02x}{189:02x}", ) # /3 to add space # set gray background @@ -567,10 +567,8 @@ def plot_pairwise_consistency(self, weights, x, top_features, methods, file_name # Plot the distribution for i, c in enumerate(top_features): - switch = False if c in encoder.cols: - switch = True mapping = encoder.mapping[encoder.cols.index(c)]["mapping"] diff --git a/shapash/explainer/multi_decorator.py b/shapash/explainer/multi_decorator.py index ffeeca49..ad0403d4 100644 --- a/shapash/explainer/multi_decorator.py +++ b/shapash/explainer/multi_decorator.py @@ -1,6 +1,7 @@ """ Multi Decorator module """ + from shapash.explainer.smart_state import SmartState @@ -75,7 +76,7 @@ def check_args(self, args, name): """ if not args: raise ValueError( - "{} is applied without arguments," "please check that you have specified contributions.".format(name) + f"{name} is applied without arguments," "please check that you have specified contributions." ) def check_method(self, method, name): @@ -115,8 +116,8 @@ def check_first_arg(self, arg, name): """ if not isinstance(arg, list): raise ValueError( - "{} is not applied to a list of contributions," - "please check that you are dealing with a multi-class problem.".format(name) + f"{name} is not applied to a list of contributions," + "please check that you are dealing with a multi-class problem." ) def assign_contributions(self, ranked): @@ -229,7 +230,7 @@ def summarize(self, s_contribs, var_dicts, xs_sorted, masks, columns_dict, featu def compute_features_import(self, contributions, norm=1): """ Compute a relative features importance, sum of absolute values - ​​of the contributions for each + \u200b\u200bof the contributions for each features importance compute in base 100 Parameters diff --git a/shapash/explainer/smart_explainer.py b/shapash/explainer/smart_explainer.py index 7ca69df9..99c5892f 100644 --- a/shapash/explainer/smart_explainer.py +++ b/shapash/explainer/smart_explainer.py @@ -153,7 +153,7 @@ class SmartExplainer: model: model object model used to check the different values of target estimate predict proba features_desc: dict - Dictionary that references the numbers of feature values ​​in the x_init + Dictionary that references the numbers of feature values \u200b\u200bin the x_init features_imp: pandas.Series (regression) or list (classification) Features importance values local_neighbors: dict @@ -717,11 +717,9 @@ def check_attributes(self, attribute): """ if not hasattr(self, attribute): raise ValueError( + f""" + attribute {attribute} isn't an attribute of the explainer precised. """ - attribute {} isn't an attribute of the explainer precised. - """.format( - attribute - ) ) return self.__dict__[attribute] diff --git a/shapash/explainer/smart_plotter.py b/shapash/explainer/smart_plotter.py index 93a62e54..3422d0ed 100644 --- a/shapash/explainer/smart_plotter.py +++ b/shapash/explainer/smart_plotter.py @@ -1643,7 +1643,6 @@ def stability_plot( # Plot 2 : Show distribution of variability else: - # If set, only keep features with the highest mean amplitude if max_features is not None: keep = mean_amplitude.argsort()[::-1][:max_features] diff --git a/shapash/explainer/smart_predictor.py b/shapash/explainer/smart_predictor.py index cd9f1225..a931e068 100644 --- a/shapash/explainer/smart_predictor.py +++ b/shapash/explainer/smart_predictor.py @@ -1,6 +1,7 @@ """ Smart predictor module """ + import copy import pandas as pd @@ -121,11 +122,9 @@ def __init__( for params in params_dict: if (params is not None) and (not isinstance(params, dict)): raise ValueError( + f""" + {str(params)} must be a dict. """ - {} must be a dict. - """.format( - str(params) - ) ) self.model = model @@ -284,7 +283,7 @@ def check_dataset_type(self, x=None): Raw dataset used by the model to perform the prediction (not preprocessed). """ - if not (type(x) in [pd.DataFrame, dict]): + if type(x) not in [pd.DataFrame, dict]: raise ValueError( """ x must be a dict or a pandas.DataFrame. diff --git a/shapash/explainer/smart_state.py b/shapash/explainer/smart_state.py index 05d3b12a..d2d3fb41 100644 --- a/shapash/explainer/smart_state.py +++ b/shapash/explainer/smart_state.py @@ -1,6 +1,7 @@ """ Smart State Module """ + import numpy as np import pandas as pd @@ -304,7 +305,7 @@ def summarize(self, s_contrib, var_dict, x_sorted, mask, columns_dict, features_ def compute_features_import(self, contributions, norm=1): """ Compute a relative features importance, sum of absolute values - ​​of the contributions for each + \u200b\u200bof the contributions for each features importance compute in base 100 Parameters ---------- diff --git a/shapash/manipulation/filters.py b/shapash/manipulation/filters.py index 0860cf7f..98394f08 100644 --- a/shapash/manipulation/filters.py +++ b/shapash/manipulation/filters.py @@ -1,6 +1,7 @@ """ Filters module """ + import numpy as np import pandas as pd @@ -144,5 +145,5 @@ def combine_masks(masks_list): mask_final = np.min(masks_cube, axis=2) return pd.DataFrame( - mask_final, columns=["contrib_{}".format(i + 1) for i in range(mask_final.shape[1])], index=masks_list[0].index + mask_final, columns=[f"contrib_{i + 1}" for i in range(mask_final.shape[1])], index=masks_list[0].index ) diff --git a/shapash/manipulation/mask.py b/shapash/manipulation/mask.py index 6dd65062..686ec15a 100644 --- a/shapash/manipulation/mask.py +++ b/shapash/manipulation/mask.py @@ -1,6 +1,7 @@ """ Mask module """ + import numpy as np import numpy.ma as ma import pandas as pd diff --git a/shapash/manipulation/select_lines.py b/shapash/manipulation/select_lines.py index c2ec0f54..0bf39352 100644 --- a/shapash/manipulation/select_lines.py +++ b/shapash/manipulation/select_lines.py @@ -1,6 +1,7 @@ """ Select Lines Module """ + import pandas as pd from pandas.core.common import flatten diff --git a/shapash/manipulation/summarize.py b/shapash/manipulation/summarize.py index 94a3dfea..daef49ba 100644 --- a/shapash/manipulation/summarize.py +++ b/shapash/manipulation/summarize.py @@ -47,7 +47,7 @@ def summarize_el(dataframe, mask, prefix): def compute_features_import(dataframe, norm=1): """ Compute a relative features importance, sum of absolute values - ​​of the contributions for each + \u200b\u200bof the contributions for each features importance compute in base 100 Parameters ---------- diff --git a/shapash/plots/plot_bar_chart.py b/shapash/plots/plot_bar_chart.py index ef8acd93..d7553007 100644 --- a/shapash/plots/plot_bar_chart.py +++ b/shapash/plots/plot_bar_chart.py @@ -114,17 +114,12 @@ def plot_bar_chart( feat_groups_values = x_init[features_groups[group_name]].loc[index_value[0]] hoverlabel = "
".join( [ - "{} :{}".format( - add_line_break(features_dict.get(f_name, f_name), 40, maxlen=120), - add_line_break(f_value, 40, maxlen=160), - ) + f"{add_line_break(features_dict.get(f_name, f_name), 40, maxlen=120)} :{add_line_break(f_value, 40, maxlen=160)}" for f_name, f_value in feat_groups_values.to_dict().items() ] ) else: - hoverlabel = "{} :
{}".format( - add_line_break(feat_name, 40, maxlen=120), add_line_break(x_val_el, 40, maxlen=160) - ) + hoverlabel = f"{add_line_break(feat_name, 40, maxlen=120)} :
{add_line_break(x_val_el, 40, maxlen=160)}" trunc_value = truncate_str(feat_name, 45) if not zoom: # Truncate value if length is upper than 30 @@ -141,7 +136,7 @@ def plot_bar_chart( or (features_groups is not None and group_name not in features_groups.keys()) ): # ylabel is based on trunc_new_value - ylabel = "{} :
{}".format(trunc_new_value, truncate_str(x_val_el, 45)) + ylabel = f"{trunc_new_value} :
{truncate_str(x_val_el, 45)}" else: ylabel = f"{trunc_new_value}" # colors diff --git a/shapash/plots/plot_contribution.py b/shapash/plots/plot_contribution.py index 125b19c7..45e4cee6 100644 --- a/shapash/plots/plot_contribution.py +++ b/shapash/plots/plot_contribution.py @@ -101,10 +101,7 @@ def plot_scatter( "%{hovertext}
" + "Contribution: %{y:.4f}
" + "
".join( - [ - "{}: %{{text[{}]}}".format(text_groups_features_keys[i], i) - for i in range(len(text_groups_features_keys)) - ] + [f"{text_groups_features_keys[i]}: %{{text[{i}]}}" for i in range(len(text_groups_features_keys))] ) + "" ) @@ -752,7 +749,7 @@ def _add_violin_trace(fig, name, x, y, side, line_color, hovertext, secondary_y= """Adds a Violin trace to the figure.""" # Violin plot has a problem if for one violin all the points have the same contribution value rng = np.random.default_rng(seed=79) - y = y + rng.normal(size=y.shape) * (max(y.max(), 0) - min(y.min(), 0)) / 10 ** 8 + y = y + rng.normal(size=y.shape) * (max(y.max(), 0) - min(y.min(), 0)) / 10**8 violin_trace = go.Violin( name=name, x=x, diff --git a/shapash/plots/plot_correlations.py b/shapash/plots/plot_correlations.py index f4a4a3fb..e716fcfc 100644 --- a/shapash/plots/plot_correlations.py +++ b/shapash/plots/plot_correlations.py @@ -94,7 +94,7 @@ def cluster_corr(corr, degree, inplace=False): if corr.shape[0] < 2: return corr - pairwise_distances = sch.distance.pdist(corr ** degree) + pairwise_distances = sch.distance.pdist(corr**degree) linkage = sch.linkage(pairwise_distances, method="complete") cluster_distance_threshold = pairwise_distances.max() / 2 idx_to_cluster_array = sch.fcluster(linkage, cluster_distance_threshold, criterion="distance") diff --git a/shapash/plots/plot_feature_importance.py b/shapash/plots/plot_feature_importance.py index f2a5491c..7628b840 100644 --- a/shapash/plots/plot_feature_importance.py +++ b/shapash/plots/plot_feature_importance.py @@ -500,7 +500,7 @@ def _plot_feature_contributions_cumulative( col_scale = get_pyplot_color(colors=style_dict["feature_contributions_cumulative"]) cmap = LinearSegmentedColormap.from_list("feature_contributions_cumulative", col_scale, N=256) colors = [cmap(i / num_features) for i in range(num_features)] - colors_hex = ["#{:02x}{:02x}{:02x}".format(int(r * 255), int(g * 255), int(b * 255)) for r, g, b, _ in colors] + colors_hex = [f"#{int(r * 255):02x}{int(g * 255):02x}{int(b * 255):02x}" for r, g, b, _ in colors] # Initialize data for storing the series data = [] @@ -555,7 +555,7 @@ def _plot_feature_contributions_cumulative( # Apply initial degree-based normalization if degree not in [0, "slider"]: - serie_values /= serie_tot ** degree + serie_values /= serie_tot**degree # Append the trace for the current series figs.append( diff --git a/shapash/plots/plot_scatter_prediction.py b/shapash/plots/plot_scatter_prediction.py index e13610b2..c2e65b08 100644 --- a/shapash/plots/plot_scatter_prediction.py +++ b/shapash/plots/plot_scatter_prediction.py @@ -453,7 +453,7 @@ def _prediction_regression_plot(y_target, y_pred, prediction_error, list_ind, st fig.layout.coloraxis.colorbar = { "title": {"text": colorbar_title}, "tickvals": [col_scale[0][0], col_scale[-1][0] - 0.15], - "ticktext": [float("{:0.3f}".format(equal_bins[0])), float("{:0.3f}".format(equal_bins[-2]))], + "ticktext": [float(f"{equal_bins[0]:0.3f}"), float(f"{equal_bins[-2]:0.3f}")], "tickformat": ".2s", "yanchor": "top", "y": 1.1, diff --git a/shapash/report/generation.py b/shapash/report/generation.py index 32853330..2ab8dac2 100644 --- a/shapash/report/generation.py +++ b/shapash/report/generation.py @@ -1,6 +1,7 @@ """ Report generation helper module. """ + import os from typing import Optional, Union diff --git a/shapash/report/plots.py b/shapash/report/plots.py index e611a33c..0f06bcbe 100644 --- a/shapash/report/plots.py +++ b/shapash/report/plots.py @@ -147,7 +147,7 @@ def generate_fig_univariate_categorical( for p in ax.patches: ax.annotate( - "{:.1f}%".format(np.nan_to_num(p.get_width(), nan=0)), + f"{np.nan_to_num(p.get_width(), nan=0):.1f}%", xy=(p.get_width(), p.get_y() + p.get_height() / 2), xytext=(5, 0), textcoords="offset points", diff --git a/shapash/utils/columntransformer_backend.py b/shapash/utils/columntransformer_backend.py index 09a0ab09..591ae09a 100644 --- a/shapash/utils/columntransformer_backend.py +++ b/shapash/utils/columntransformer_backend.py @@ -440,7 +440,6 @@ def get_col_mapping_ct(encoder, x_encoded): idx_encoded = 0 for name, estimator, features in encoder.transformers_: if name != "remainder": - if str(type(estimator)) in dummies_sklearn: features_out, categories_out = get_feature_out(estimator, features) for i, f_name in enumerate(features): diff --git a/shapash/utils/io.py b/shapash/utils/io.py index 628fcc37..68c64566 100644 --- a/shapash/utils/io.py +++ b/shapash/utils/io.py @@ -1,6 +1,7 @@ """ IO module """ + import pickle try: diff --git a/shapash/utils/load_smartpredictor.py b/shapash/utils/load_smartpredictor.py index 13946079..b6fb3c5b 100644 --- a/shapash/utils/load_smartpredictor.py +++ b/shapash/utils/load_smartpredictor.py @@ -1,6 +1,7 @@ """ load_smartpredictor module """ + from shapash.explainer.smart_predictor import SmartPredictor from shapash.utils.io import load_pickle diff --git a/shapash/utils/model.py b/shapash/utils/model.py index 19b55052..51d09f7c 100644 --- a/shapash/utils/model.py +++ b/shapash/utils/model.py @@ -1,6 +1,7 @@ """ Model Module """ + from inspect import ismethod import pandas as pd diff --git a/shapash/utils/sampling.py b/shapash/utils/sampling.py index 2a73aff6..53b3d0ad 100644 --- a/shapash/utils/sampling.py +++ b/shapash/utils/sampling.py @@ -103,7 +103,7 @@ def _intelligent_sampling(data, max_points, col, col_value_count, random_seed): cluster_labels = pd.Series(kmeans.fit_predict(data[col].values.reshape(-1, 1))) cluster_counts = cluster_labels.value_counts() - weights = cluster_counts.apply(lambda x: (x ** 0.5) / x).to_dict() + weights = cluster_counts.apply(lambda x: (x**0.5) / x).to_dict() selection_weights = cluster_labels.apply(lambda x: weights[x]) selection_weights /= selection_weights.sum() selected_indices = rng.choice(data.index.tolist(), max_points, p=selection_weights, replace=False) diff --git a/shapash/utils/threading.py b/shapash/utils/threading.py index 211f669a..719237e4 100644 --- a/shapash/utils/threading.py +++ b/shapash/utils/threading.py @@ -1,6 +1,7 @@ """ Override threading custom module """ + import sys import threading diff --git a/shapash/utils/transform.py b/shapash/utils/transform.py index 37f56655..c8caca50 100644 --- a/shapash/utils/transform.py +++ b/shapash/utils/transform.py @@ -215,7 +215,7 @@ def check_transformers(list_encoding): # check that Category encoding is apply on different columns col = [] for enc in list_encoding: - if not str(type(enc)) in ("", "", columntransformer): + if str(type(enc)) not in ("", "", columntransformer): col += enc.cols duplicate = {x for x in col if col.count(x) > 1} if duplicate: diff --git a/shapash/utils/utils.py b/shapash/utils/utils.py index 23a9a664..60226308 100644 --- a/shapash/utils/utils.py +++ b/shapash/utils/utils.py @@ -1,6 +1,7 @@ """ Utils is a group of function for the library """ + import math import socket diff --git a/shapash/webapp/utils/callbacks.py b/shapash/webapp/utils/callbacks.py index 8fbf9c95..9f4b52c7 100644 --- a/shapash/webapp/utils/callbacks.py +++ b/shapash/webapp/utils/callbacks.py @@ -682,7 +682,7 @@ def create_filter_modalities_selection(value: str, id: dict, round_dataframe: pd type="number", style={"width": "60px"}, ), - " <= {} in [{}, {}]<= ".format(value, round_dataframe[value].min(), round_dataframe[value].max()), + f" <= {value} in [{round_dataframe[value].min()}, {round_dataframe[value].max()}]<= ", dcc.Input( id={"type": "upper", "index": id["index"]}, value=upper_value, diff --git a/shapash/webapp/webapp_launch.py b/shapash/webapp/webapp_launch.py index 1c768311..dc4a7f26 100644 --- a/shapash/webapp/webapp_launch.py +++ b/shapash/webapp/webapp_launch.py @@ -2,6 +2,7 @@ Webapp launch module This is an example in python how to launch app from explainer """ + import pandas as pd from category_encoders import one_hot from lightgbm import LGBMClassifier, LGBMRegressor diff --git a/tests/data/predictor_to_load_39.pkl b/tests/data/predictor_to_load_39.pkl index 94302837..0602433f 100644 Binary files a/tests/data/predictor_to_load_39.pkl and b/tests/data/predictor_to_load_39.pkl differ diff --git a/tests/unit_tests/explainer/test_smart_explainer.py b/tests/unit_tests/explainer/test_smart_explainer.py index 76db4cc1..5b3f03e2 100644 --- a/tests/unit_tests/explainer/test_smart_explainer.py +++ b/tests/unit_tests/explainer/test_smart_explainer.py @@ -992,8 +992,8 @@ def test_compute_features_import_2(self): expect1 = expect1 / expect1.sum() expect2 = contrib2.abs().sum().sort_values(ascending=True) expect2 = expect2 / expect2.sum() - assert expect1.equals(xpl.features_imp[0]) - assert expect2.equals(xpl.features_imp[1]) + assert expect1.round(8).equals(xpl.features_imp[0].round(8)) + assert expect2.round(8).equals(xpl.features_imp[1].round(8)) def test_to_smartpredictor_1(self): """