From fd69745128098710a55ada495682d57d12718604 Mon Sep 17 00:00:00 2001 From: Synchon Mandal Date: Thu, 30 Nov 2023 10:03:19 +0100 Subject: [PATCH] [MAINT]: Add support for `ruff` linter (#231) * update: add ruff config in pyproject.toml * chore: remove isort and flake8 configs and envs from tox.ini * chore: add ruff badge in README * update: expand target-version for black and cosmetic chages in pyproject.toml * update: use ruff in lint.yml * chore: ruff autofixes * chore: fix ruff N803 issues * chore: fix ruff C901 issues * chore: add missing module, function, class and method docstrings * chore: further ruff fixes and improve typing and docstrings * chore: ruff F401 checks * chore: sort imports via ruff * fix: typo * chore: add entries in nitpick_ignore_regex in conf.py to build docs * chore: add changelog 231.misc * chore: remove unnecessary commands from tox.ini * fix: E721 issue in test_confounds.py * Fix codespell * Add joblib to intersphinx * Fix some sklearn ignores * chore: update lint.yml to fix ruff config * remove this file --------- Co-authored-by: Fede Co-authored-by: Fede Raimondo --- .github/workflows/lint.yml | 26 +- .gitignore | 1 + README.md | 1 + docs/changes/newsfragments/231.misc | 1 + docs/conf.py | 13 +- docs/contributing.rst | 2 +- docs/sphinxext/gh_substitutions.py | 2 +- .../plot_confound_removal_classification.py | 2 +- ignore_words.txt | 1 + julearn/api.py | 34 +- julearn/base/column_types.py | 36 ++- julearn/base/estimators.py | 56 +++- julearn/base/tests/test_base_estimators.py | 2 +- julearn/base/tests/test_column_types.py | 4 +- julearn/config.py | 2 + julearn/conftest.py | 14 +- julearn/inspect/_cv.py | 15 +- julearn/inspect/_pipeline.py | 35 ++- julearn/inspect/_preprocess.py | 4 +- julearn/inspect/inspector.py | 87 +++++- julearn/inspect/tests/test_cv.py | 68 ++-- julearn/inspect/tests/test_inspector.py | 83 +++-- julearn/inspect/tests/test_pipeline.py | 220 ++++++++++--- julearn/inspect/tests/test_preprocess.py | 55 +++- .../continuous_stratified_kfold.py | 10 +- .../model_selection/stratified_bootstrap.py | 10 +- .../tests/test_continous_stratified_kfold.py | 12 +- .../tests/test_stratified_bootstrap.py | 2 +- julearn/models/available_models.py | 3 +- julearn/models/dynamic.py | 15 +- julearn/models/tests/test_models.py | 4 +- julearn/pipeline/merger.py | 7 +- julearn/pipeline/pipeline_creator.py | 32 +- julearn/pipeline/target_pipeline.py | 16 +- julearn/pipeline/target_pipeline_creator.py | 2 + julearn/pipeline/test/test_merger.py | 3 +- .../pipeline/test/test_pipeline_creator.py | 29 +- julearn/pipeline/test/test_target_pipeline.py | 59 ++-- julearn/prepare.py | 29 +- julearn/scoring/available_scorers.py | 33 +- .../scoring/tests/test_available_scorers.py | 4 +- julearn/stats/corrected_ttest.py | 8 +- julearn/stats/tests/test_corrected_ttest.py | 5 + julearn/tests/test_api.py | 88 +++--- julearn/tests/test_config.py | 3 +- julearn/tests/test_prepare.py | 20 +- .../transformers/available_transformers.py | 10 +- julearn/transformers/cbpm.py | 76 +++-- julearn/transformers/confound_remover.py | 14 +- .../dataframe/change_column_types.py | 17 +- .../transformers/dataframe/drop_columns.py | 16 +- .../transformers/dataframe/filter_columns.py | 15 +- .../dataframe/set_column_types.py | 8 +- .../tests/test_change_column_types.py | 27 +- .../dataframe/tests/test_set_column_types.py | 8 +- julearn/transformers/ju_column_transformer.py | 59 ++-- .../target/ju_target_transformer.py | 27 +- .../target/ju_transformed_target_model.py | 49 ++- .../target/target_confound_remover.py | 8 +- .../tests/test_ju_transformed_target_model.py | 22 +- .../tests/test_target_confound_remover.py | 2 +- .../tests/test_available_transformers.py | 6 +- julearn/transformers/tests/test_cbpm.py | 157 +++++----- julearn/transformers/tests/test_confounds.py | 10 +- .../tests/test_jucolumntransformers.py | 66 ++-- julearn/utils/_cv.py | 9 +- julearn/utils/checks.py | 2 +- julearn/utils/logging.py | 22 +- julearn/utils/testing.py | 23 +- julearn/utils/tests/test_logging.py | 8 +- julearn/utils/tests/test_version.py | 2 +- julearn/utils/typing.py | 293 +++++++++++++++++- julearn/viz/_scores.py | 28 +- pyproject.toml | 87 +++++- tox.ini | 70 +---- 75 files changed, 1570 insertions(+), 729 deletions(-) create mode 100644 docs/changes/newsfragments/231.misc diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index e129f0af8..a8cd2ba90 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -6,26 +6,18 @@ on: jobs: lint: - runs-on: ${{ matrix.os }} - strategy: - fail-fast: false - matrix: - os: [ubuntu-latest] - python-version: ['3.11'] - + runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - - name: Set up Python ${{ matrix.python-version }} + - name: Install Python uses: actions/setup-python@v4 with: - python-version: ${{ matrix.python-version }} + python-version: "3.11" - name: Install dependencies run: | - python -m pip install --upgrade pip setuptools wheel - python -m pip install tox tox-gh-actions - - name: Check with flake8 - run: | - tox -e flake8 - - name: Check with codespell - run: | - tox -e codespell + python -m pip install --upgrade pip + python -m pip install tox "ruff>=0.1.0" + - name: Run ruff + run: ruff check --output-format=github . + - name: Run codespell + run: tox -e codespell diff --git a/.gitignore b/.gitignore index 4d9f6686a..3ca9d088e 100644 --- a/.gitignore +++ b/.gitignore @@ -73,6 +73,7 @@ docs/_build/ docs/auto_examples docs/**/generated docs/make.bat +docs/sg_execution_times.rst # PyBuilder .pybuilder/ diff --git a/README.md b/README.md index 4da0cd76f..5e5fc3ca7 100644 --- a/README.md +++ b/README.md @@ -5,6 +5,7 @@ ![PyPI - Wheel](https://img.shields.io/pypi/wheel/julearn?style=flat-square) ![GitHub](https://img.shields.io/github/license/juaml/julearn?style=flat-square) ![Codecov](https://img.shields.io/codecov/c/github/juaml/julearn?style=flat-square) +[![Ruff](https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/charliermarsh/ruff/main/assets/badge/v2.json)](https://github.com/charliermarsh/ruff) [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit) ## About diff --git a/docs/changes/newsfragments/231.misc b/docs/changes/newsfragments/231.misc new file mode 100644 index 000000000..6978c04c1 --- /dev/null +++ b/docs/changes/newsfragments/231.misc @@ -0,0 +1 @@ +Adopt ``ruff`` as the only linter for the codebase by `Synchon Mandal`_ \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 7522fe3db..8ac7c076e 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -5,10 +5,9 @@ # https://www.sphinx-doc.org/en/master/usage/configuration.html import os import re -from sphinx_gallery.sorting import ExplicitOrder +import sys # -- Path setup -------------------------------------------------------------- - # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. @@ -16,9 +15,8 @@ # import os # import sys # sys.path.insert(0, os.path.abspath('.')) - from pathlib import Path -import sys + # Check if sphinx-multiversion is installed use_multiversion = False @@ -87,6 +85,7 @@ # Sklearn doc issue to be solved in next release ("py:class", "pipeline.Pipeline"), ("py:class", "sklearn.utils.metadata_routing.MetadataRequest"), + ("py:class", "julearn.inspect._pipeline.PipelineInspector"), ] @@ -157,9 +156,9 @@ "nilearn": ("https://nilearn.github.io/stable/", None), "nibabel": ("https://nipy.org/nibabel/", None), "numpy": ("https://numpy.org/doc/stable/", None), - "numpy": ("https://numpy.org/doc/stable/", None), "pandas": ("https://pandas.pydata.org/pandas-docs/dev", None), # "sqlalchemy": ("https://docs.sqlalchemy.org/en/20/", None), + "joblib": ("https://joblib.readthedocs.io/en/latest/", None), "scipy": ("https://docs.scipy.org/doc/scipy/", None), } @@ -219,7 +218,7 @@ def __init__(self, src_dir): self.regex = re.compile(r"^([\w ]+)\n-", re.MULTILINE) def __repr__(self): - return "<%s>" % (self.__class__.__name__,) + return f"<{self.__class__.__name__}>" def __call__(self, directory): src_path = os.path.normpath(os.path.join(self.src_dir, directory)) @@ -231,7 +230,7 @@ def __call__(self, directory): readme = os.path.join(src_path, "README.txt") try: - with open(readme, "r") as f: + with open(readme) as f: content = f.read() except FileNotFoundError: return directory diff --git a/docs/contributing.rst b/docs/contributing.rst index ee11aa8af..b42558f1e 100644 --- a/docs/contributing.rst +++ b/docs/contributing.rst @@ -165,7 +165,7 @@ Writing Examples The format used for text is reST. Check the `sphinx RST reference`_ for more details. The examples are run and displayed in HTML format using `sphinx gallery`_. To add an example, just create a ``.py`` file that starts -either with ``plot_`` or ``run_``, dependending on whether the example generates +either with ``plot_`` or ``run_``, depending on whether the example generates a figure or not. The first lines of the example should be a Python block comment with a title, diff --git a/docs/sphinxext/gh_substitutions.py b/docs/sphinxext/gh_substitutions.py index 47b5e86f6..38901d5a5 100644 --- a/docs/sphinxext/gh_substitutions.py +++ b/docs/sphinxext/gh_substitutions.py @@ -1,8 +1,8 @@ -# -*- coding: utf-8 -*- from docutils.nodes import reference from docutils.parsers.rst.roles import set_classes + # Taken from MNE-Python # adapted from # https://doughellmann.com/blog/2010/05/09/defining-custom-roles-in-sphinx/ diff --git a/examples/04_confounds/plot_confound_removal_classification.py b/examples/04_confounds/plot_confound_removal_classification.py index 024238978..c179805f2 100644 --- a/examples/04_confounds/plot_confound_removal_classification.py +++ b/examples/04_confounds/plot_confound_removal_classification.py @@ -151,7 +151,7 @@ ############################################################################### # While this plot allows us to see the mean performance values and compare # them, these samples are paired. In order to see if there is a systematic -# difference, we need to check the distribution of differeces between the +# difference, we need to check the distribution of differences between the # the models. # # First, we remove the column "confounds" from the index and make the difference diff --git a/ignore_words.txt b/ignore_words.txt index 06132dd91..585932d33 100644 --- a/ignore_words.txt +++ b/ignore_words.txt @@ -5,3 +5,4 @@ fpr master whis jupyter +arange \ No newline at end of file diff --git a/julearn/api.py b/julearn/api.py index 33af1a3b8..37bf77ddf 100644 --- a/julearn/api.py +++ b/julearn/api.py @@ -4,8 +4,7 @@ # Sami Hamdan # License: AGPL -from typing import List, Optional, Union, Dict - +from typing import Dict, List, Optional, Union import numpy as np import pandas as pd @@ -14,26 +13,26 @@ from sklearn.model_selection._search import BaseSearchCV from sklearn.pipeline import Pipeline +from .inspect import Inspector from .pipeline import PipelineCreator from .pipeline.merger import merge_pipelines from .prepare import check_consistency, prepare_input_data from .scoring import check_scoring -from .utils import logger, raise_error, _compute_cvmdsum -from .inspect import Inspector +from .utils import _compute_cvmdsum, logger, raise_error -def run_cross_validation( - X: List[str], +def run_cross_validation( # noqa: C901 + X: List[str], # noqa: N803 y: str, model: Union[str, PipelineCreator, BaseEstimator, List[PipelineCreator]], - X_types: Optional[Dict] = None, + X_types: Optional[Dict] = None, # noqa: N803 data: Optional[pd.DataFrame] = None, problem_type: Optional[str] = None, preprocess: Union[None, str, List[str]] = None, return_estimator: Optional[str] = None, return_inspector: bool = False, return_train_score: bool = False, - cv: Union[None, int] = None, + cv: Optional[int] = None, groups: Optional[str] = None, scoring: Union[str, List[str], None] = None, pos_labels: Union[str, List[str], None] = None, @@ -47,10 +46,10 @@ def run_cross_validation( Parameters ---------- - X : str, list(str) or numpy.array + X : list of str The features to use. See :ref:`data_usage` for details. - y : str or numpy.array + y : str The targets to predict. See :ref:`data_usage` for details. model : str or scikit-learn compatible model. @@ -106,7 +105,7 @@ def run_cross_validation( * CV Splitter (see scikit-learn documentation on CV) * An iterable yielding (train, test) splits as arrays of indices. - groups : str or numpy.array | None + groups : str | None The grouping labels in case a Group CV is used. See :ref:`data_usage` for details. scoring : ScorerLike, optional @@ -145,11 +144,16 @@ def run_cross_validation( seed : int | None If not None, set the random seed before any operation. Useful for reproducibility. + n_jobs : int, optional + Number of jobs to run in parallel. Training the estimator and computing + the score are parallelized over the cross-validation splits. + ``None`` means 1 unless in a :obj:`joblib.parallel_backend` context. + ``-1`` means using all processors (default None). verbose: int Verbosity level of outer cross-validation. Follows scikit-learn/joblib converntions. 0 means no additional information is printed. - Larger number genereally mean more information is printed. + Larger number generally mean more information is printed. Note: verbosity up to 50 will print into standard error, while larger than 50 will print in standrad output. @@ -237,7 +241,7 @@ def run_cross_validation( else: model = [model] - problem_types = set([m.problem_type for m in model]) + problem_types = {m.problem_type for m in model} if len(problem_types) > 1: raise_error( "If model is a list of PipelineCreator, all elements must have" @@ -345,9 +349,7 @@ def run_cross_validation( check_consistency(y, cv, groups, problem_type) cv_return_estimator = return_estimator in ["cv", "all"] - scoring = check_scoring(pipeline, scoring, - wrap_score=wrap_score - ) + scoring = check_scoring(pipeline, scoring, wrap_score=wrap_score) cv_mdsum = _compute_cvmdsum(cv_outer) fit_params = {} diff --git a/julearn/base/column_types.py b/julearn/base/column_types.py index 0048ec3b4..e256577ae 100644 --- a/julearn/base/column_types.py +++ b/julearn/base/column_types.py @@ -48,7 +48,7 @@ def get_column_type(column): return column.split("__:type:__")[1] -def get_renamer(X_df): +def get_renamer(X_df): # noqa: N803 """Get the dictionary that will rename the columns to add the type. Parameters @@ -68,26 +68,28 @@ def get_renamer(X_df): } -class make_type_selector(object): - def __init__(self, pattern): - """Make a type selector. +class make_type_selector: + """Make a type selector. - This type selector is to be used with - :class:`sklearn.compose.ColumnTransformer` + This type selector is to be used with + :class:`sklearn.compose.ColumnTransformer` - Parameters - ---------- - pattern : str - The pattern to select the columns. + Parameters + ---------- + pattern : str + The pattern to select the columns. - Returns - ------- - function - The type selector. - """ + Returns + ------- + function + The type selector. + + """ + + def __init__(self, pattern): self.pattern = pattern - def __call__(self, X_df): + def __call__(self, X_df): # noqa: N803 """Select the columns based on the pattern. Parameters @@ -137,7 +139,7 @@ def __init__(self, column_types: ColumnTypesLike): if isinstance(column_types, ColumnTypes): _types = column_types._column_types.copy() elif isinstance(column_types, str): - _types = set([column_types]) + _types = {column_types} elif not isinstance(column_types, Set): _types = set(column_types) elif isinstance(column_types, Set): diff --git a/julearn/base/estimators.py b/julearn/base/estimators.py index 9ad920077..f4fc1ebf0 100644 --- a/julearn/base/estimators.py +++ b/julearn/base/estimators.py @@ -49,7 +49,7 @@ def check(self): return check -def _ensure_dataframe(X: DataLike) -> pd.DataFrame: +def _ensure_dataframe(X: DataLike) -> pd.DataFrame: # noqa: N803 """Ensure that the input is a pandas DataFrame. Parameters @@ -101,6 +101,7 @@ def get_needed_types(self) -> ColumnTypes: ------- ColumnTypes The column types needed by the estimator. + """ needed_types = self.get_apply_to().copy() if self.needed_types is not None: @@ -114,10 +115,11 @@ def get_apply_to(self) -> ColumnTypes: ------- ColumnTypes The column types the estimator applies to. + """ return ensure_column_types(self.apply_to) - def filter_columns(self, X: pd.DataFrame) -> pd.DataFrame: + def filter_columns(self, X: pd.DataFrame) -> pd.DataFrame: # noqa: N803 """Get the `apply_to` columns of a pandas DataFrame. Parameters @@ -156,21 +158,41 @@ def __init__( self, apply_to: ColumnTypesLike, needed_types: Optional[ColumnTypesLike] = None, - row_select_col_type: Optional[ColumnTypesLike] = None, - row_select_vals: Optional[Union[str, int, list, bool]] = None, + row_select_col_type: Optional[ColumnTypesLike] = None, + row_select_vals: Optional[Union[str, int, list, bool]] = None, ): self.apply_to = apply_to self.needed_types = needed_types self.row_select_col_type = row_select_col_type self.row_select_vals = row_select_vals - def fit(self, X, y=None, **fit_params): + def fit(self, X, y=None, **fit_params): # noqa: N803 + """Fit the model. + + This method will fit the model using only the columns selected by + `apply_to`. + + Parameters + ---------- + X : pd.DataFrame + The data to fit the model on. + y : DataLike, optional + The target data (default is None). + **fit_params : Any + Additional parameters to pass to the model's fit method. + + Returns + ------- + JuTransformer + The fitted model. + """ if self.row_select_col_type is None: return self._fit(X, y, **fit_params) self._col_to_select_rows = make_type_selector( - ColumnTypes(self.row_select_col_type)._to_pattern())(X) + ColumnTypes(self.row_select_col_type)._to_pattern() + )(X) if len(self._col_to_select_rows) != 1: raise_error( "Only, one column can be selected for row_select_col_type." @@ -182,7 +204,7 @@ def fit(self, X, y=None, **fit_params): return self._fit(**self._select_rows(X, y, **fit_params)) def _add_backed_filtered( - self, X: pd.DataFrame, X_trans: pd.DataFrame + self, X: pd.DataFrame, X_trans: pd.DataFrame # noqa: N803 ) -> pd.DataFrame: """Add the left-out columns back to the transformed data. @@ -205,7 +227,7 @@ def _add_backed_filtered( ] return pd.concat((X.loc[:, non_filtered_columns], X_trans), axis=1) - def _select_rows(self, X, y, **fit_params): + def _select_rows(self, X, y, **fit_params): # noqa: N803 idx = X.query( f"`{self._col_to_select_rows}` in @self.row_select_vals" ).index.values @@ -262,7 +284,10 @@ def __init__( super().__init__(apply_to=apply_to, needed_types=needed_types) def fit( - self, X: pd.DataFrame, y: Optional[DataLike] = None, **fit_params: Any + self, + X: pd.DataFrame, # noqa: N803 + y: Optional[DataLike] = None, + **fit_params: Any, ) -> "WrapModel": """Fit the model. @@ -292,7 +317,7 @@ def fit( self.model_.fit(Xt, y, **fit_params) return self - def predict(self, X: pd.DataFrame) -> DataLike: + def predict(self, X: pd.DataFrame) -> DataLike: # noqa: N803 """Predict using the model. Parameters @@ -304,11 +329,12 @@ def predict(self, X: pd.DataFrame) -> DataLike: ------- DataLike The predictions. + """ Xt = self.filter_columns(X) return self.model_.predict(Xt) - def score(self, X: pd.DataFrame, y: DataLike) -> float: + def score(self, X: pd.DataFrame, y: DataLike) -> float: # noqa: N803 """Score the model. Parameters @@ -327,7 +353,7 @@ def score(self, X: pd.DataFrame, y: DataLike) -> float: return self.model_.score(Xt, y) @available_if(_wrapped_model_has("predict_proba")) - def predict_proba(self, X: pd.DataFrame) -> np.ndarray: + def predict_proba(self, X: pd.DataFrame) -> np.ndarray: # noqa: N803 """Compute probabilities of possible outcomes for samples in X. Parameters @@ -346,7 +372,7 @@ def predict_proba(self, X: pd.DataFrame) -> np.ndarray: return self.model_.predict_proba(Xt) # type: ignore @available_if(_wrapped_model_has("decision_function")) - def decision_function(self, X: pd.DataFrame) -> np.ndarray: + def decision_function(self, X: pd.DataFrame) -> np.ndarray: # noqa: N803 """Evaluate the decision function for the samples in X. Parameters @@ -364,7 +390,7 @@ def decision_function(self, X: pd.DataFrame) -> np.ndarray: return self.model_.decision_function(Xt) # type: ignore @available_if(_wrapped_model_has("predict_log_proba")) - def predict_log_proba(self, X: pd.DataFrame) -> np.ndarray: + def predict_log_proba(self, X: pd.DataFrame) -> np.ndarray: # noqa: N803 """Compute probabilities of possible outcomes for samples in X. Parameters @@ -416,7 +442,7 @@ def set_params(self, **kwargs: Any) -> "WrapModel": Parameters ---------- - **params : dict + **kwargs : dict Model parameters. Returns diff --git a/julearn/base/tests/test_base_estimators.py b/julearn/base/tests/test_base_estimators.py index a4778c12b..e69284116 100644 --- a/julearn/base/tests/test_base_estimators.py +++ b/julearn/base/tests/test_base_estimators.py @@ -75,7 +75,7 @@ def model(request): ], ) def test_WrapModel( - X_iris: pd.DataFrame, + X_iris: pd.DataFrame, # noqa: N803 y_iris: pd.DataFrame, model: Type[ModelLike], apply_to: ColumnTypesLike, diff --git a/julearn/base/tests/test_column_types.py b/julearn/base/tests/test_column_types.py index be85745ba..64f7abf9a 100644 --- a/julearn/base/tests/test_column_types.py +++ b/julearn/base/tests/test_column_types.py @@ -43,7 +43,7 @@ ], ) def test_make_column_selector( - X_iris: pd.DataFrame, + X_iris: pd.DataFrame, # noqa: N803 pattern: str, column_types: List[str], selection: slice, @@ -158,7 +158,7 @@ def test_ColumnTypes_patterns( ], ) def test_ColumnTypes_to_column_selector( - X_iris: pd.DataFrame, + X_iris: pd.DataFrame, # noqa: N803 selected_column_types: ColumnTypesLike, data_column_types: List[str], selection: slice, diff --git a/julearn/config.py b/julearn/config.py index 27fe0332f..afa2e016f 100644 --- a/julearn/config.py +++ b/julearn/config.py @@ -4,8 +4,10 @@ # License: AGPL from typing import Any + from .utils import logger, raise_error + _global_config = {} _global_config["MAX_X_WARNS"] = 5000 _global_config["disable_x_check"] = False diff --git a/julearn/conftest.py b/julearn/conftest.py index eb5a0ddf5..4364ce761 100644 --- a/julearn/conftest.py +++ b/julearn/conftest.py @@ -102,9 +102,9 @@ def y_iris() -> pd.Series: @fixture( params=[ None, - dict(), - dict(duck=["petal_length"]), - dict(duck=["petal_length"], confound=["petal_width"]), + {}, + {"duck": ["petal_length"]}, + {"duck": ["petal_length"], "confound": ["petal_width"]}, ], scope="function", ) @@ -162,10 +162,10 @@ def all_problem_types(request: FixtureRequest) -> str: @fixture( params=[ None, - dict(), - dict(kind="grid"), - dict(kind="random", n_iter=2), - dict(kind="random", n_iter=2, cv=3), + {}, + {"kind": "grid"}, + {"kind": "random", "n_iter": 2}, + {"kind": "random", "n_iter": 2, "cv": 3}, ], scope="function", ) diff --git a/julearn/inspect/_cv.py b/julearn/inspect/_cv.py index 94233e740..5ba648d09 100644 --- a/julearn/inspect/_cv.py +++ b/julearn/inspect/_cv.py @@ -1,11 +1,16 @@ -from typing import List, Union, Optional +"""Provide base classes and functions to inspect folds of cross-validation.""" -from sklearn.model_selection import BaseCrossValidator, check_cv -from sklearn.utils.metaestimators import available_if +# Authors: Federico Raimondo +# Sami Hamdan +# License: AGPL + +from typing import List, Optional, Union import pandas as pd +from sklearn.model_selection import BaseCrossValidator, check_cv +from sklearn.utils.metaestimators import available_if -from ..utils import raise_error, _compute_cvmdsum, is_nonoverlapping_cv +from ..utils import _compute_cvmdsum, is_nonoverlapping_cv, raise_error from ._pipeline import PipelineInspector @@ -55,7 +60,7 @@ def __init__( self, scores: pd.DataFrame, cv: BaseCrossValidator, - X: Union[str, List[str]], + X: Union[str, List[str]], # noqa: N803 y: str, func: str = "predict", groups: Optional[str] = None, diff --git a/julearn/inspect/_pipeline.py b/julearn/inspect/_pipeline.py index a085bc124..1920d6c71 100644 --- a/julearn/inspect/_pipeline.py +++ b/julearn/inspect/_pipeline.py @@ -1,3 +1,9 @@ +"""Provide base classes for pipeline and estimator inspectors.""" + +# Authors: Federico Raimondo +# Sami Hamdan +# License: AGPL + import re from sklearn.utils.validation import check_is_fitted @@ -5,8 +11,7 @@ from ..transformers import JuColumnTransformer -class PipelineInspector(): - +class PipelineInspector: def __init__(self, model): check_is_fitted(model) self._model = model @@ -21,27 +26,27 @@ def get_step(self, name, as_estimator=False): return step def get_params(self): - if hasattr(self._model, "best_estimator_"): self._model.best_estimator_.get_params() return self._model.get_params() def get_fitted_params(self): fitted_params = {} - model = (self._model.best_estimator_ - if hasattr(self._model, "best_estimator_") - else self._model - ) + model = ( + self._model.best_estimator_ + if hasattr(self._model, "best_estimator_") + else self._model + ) for name, step in model.steps: params = _EstimatorInspector(step).get_fitted_params() fitted_params = { **fitted_params, - ** {f"{name}__{param}": val for param, val in params.items()} + **{f"{name}__{param}": val for param, val in params.items()}, } return fitted_params -class _EstimatorInspector(): +class _EstimatorInspector: def __init__(self, estimator): self._estimator = estimator @@ -53,12 +58,16 @@ def get_fitted_params(self): if isinstance(self._estimator, JuColumnTransformer): all_params = { **all_params, - **vars(self._estimator.column_transformer_.transformers_[0][1]) + **vars( + self._estimator.column_transformer_.transformers_[0][1] + ), } - return {param: val for param, val in all_params.items() - if re.match(r"^[a-zA-Z].*[a-zA-Z0-9]*_$", param) - } + return { + param: val + for param, val in all_params.items() + if re.match(r"^[a-zA-Z].*[a-zA-Z0-9]*_$", param) + } @property def estimator(self): diff --git a/julearn/inspect/_preprocess.py b/julearn/inspect/_preprocess.py index b7b4bd0b4..ec7cb05da 100644 --- a/julearn/inspect/_preprocess.py +++ b/julearn/inspect/_preprocess.py @@ -1,4 +1,4 @@ -"""Functions to inspect the preprocessing steps of pipeline.""" +"""Provide functions to inspect the preprocessing steps of pipeline.""" # Authors: Federico Raimondo # Sami Hamdan @@ -14,7 +14,7 @@ def preprocess( pipeline: Pipeline, - X: List[str], + X: List[str], # noqa: N803 data: pd.DataFrame, until: Optional[str] = None, with_column_types: bool = False, diff --git a/julearn/inspect/inspector.py b/julearn/inspect/inspector.py index a40774cb0..ccb80388f 100644 --- a/julearn/inspect/inspector.py +++ b/julearn/inspect/inspector.py @@ -1,19 +1,58 @@ -from ._pipeline import PipelineInspector -from ._cv import FoldsInspector +"""Provide base class for inspector.""" + +# Authors: Federico Raimondo +# Sami Hamdan +# License: AGPL + +from typing import TYPE_CHECKING, List, Optional, Union from ..utils.logging import raise_error +from ._cv import FoldsInspector +from ._pipeline import PipelineInspector + + +if TYPE_CHECKING: + import pandas as pd + from sklearn.base import BaseEstimator + + from ..pipeline.pipeline_creator import PipelineCreator class Inspector: + """Base class for inspector. + + Parameters + ---------- + scores : pd.DataFrame + The scores as dataframe. + model : str, optional + The model to inspect (default None). + X : list of str, optional + The features as list (default None). + y : str, optional + The target (default None). + groups : str, optional + The grouping labels in case a group CV is used (default None). + cv : int, optional + The number of folds for cross-validation (default None). + + """ + def __init__( self, - scores, - model=None, - X=None, - y=None, - groups=None, - cv=None, - ): + scores: "pd.DataFrame", + model: Union[ + str, + "PipelineCreator", + List["PipelineCreator"], + "BaseEstimator", + None, + ] = None, + X: Optional[List[str]] = None, # noqa: N803 + y: Optional[str] = None, + groups: Optional[str] = None, + cv: Optional[int] = None, + ) -> None: self._scores = scores self._model = model self._X = X @@ -22,13 +61,39 @@ def __init__( self._cv = cv @property - def model(self): + def model(self) -> PipelineInspector: + """Return the model. + + Returns + ------- + PipelineInspector + A PipelineInspector instance with model set. + + Raises + ------ + ValueError + If no ``model`` is provided. + + """ if self._model is None: raise_error("No model was provided. Cannot inspect the model.") return PipelineInspector(model=self._model) @property - def folds(self): + def folds(self) -> FoldsInspector: + """Return the folds. + + Returns + ------- + FoldsInspector + A FoldsInspector instance with parameters set. + + Raises + ------ + ValueError + If no ``cv``, ``X`` or ``y`` is provided. + + """ if self._cv is None: raise_error("No cv was provided. Cannot inspect the folds.") if self._X is None: diff --git a/julearn/inspect/tests/test_cv.py b/julearn/inspect/tests/test_cv.py index 7d0461d62..cced789ea 100644 --- a/julearn/inspect/tests/test_cv.py +++ b/julearn/inspect/tests/test_cv.py @@ -1,47 +1,69 @@ +"""Provide tests for cross-validation inspection.""" + +# Authors: Federico Raimondo +# Sami Hamdan +# License: AGPL + +import numpy as np import pandas as pd -from sklearn.model_selection import RepeatedKFold +import pytest +from pandas.testing import assert_frame_equal from sklearn.base import BaseEstimator +from sklearn.model_selection import RepeatedKFold + from julearn.base.estimators import WrapModel from julearn.inspect import FoldsInspector, PipelineInspector -from julearn.utils import _compute_cvmdsum from julearn.pipeline import PipelineCreator -import numpy as np -import pytest -from pandas.testing import assert_frame_equal +from julearn.utils import _compute_cvmdsum class MockModelReturnsIndex(BaseEstimator): - def fit(self, X, y=None, **fit_params): + """Class for mock model.""" + + def fit(self, X, y=None, **fit_params): # noqa: N803 + """Fit the model.""" return self - def predict(self, X): + def predict(self, X): # noqa: N803 + """Predict using the model.""" return np.array(X.index)[:, None] - def predict_proba(self, X): + def predict_proba(self, X): # noqa: N803 + """Predict probability using the model.""" return np.array(X.index)[:, None] - def predict_log_proba(self, X): + def predict_log_proba(self, X): # noqa: N803 + """Predict log probability using the model.""" return np.array(X.index)[:, None] - def decision_function(self, X): + def decision_function(self, X): # noqa: N803 + """Decision function.""" return np.array(X.index)[:, None] def __sklearn_is_fitted__(self): + """Check if model is fitted on data.""" return True class MockRegressorReturnsIndex(BaseEstimator): - def fit(self, X, y=None, **fit_params): + """Class for mock regressor.""" + + def fit(self, X, y=None, **fit_params): # noqa: N803 + """Fit the model.""" return self - def predict(self, X): + def predict(self, X): # noqa: N803 + """Predict using the model.""" return np.array(X.index) def __sklearn_is_fitted__(self): + """Check if model is fitted on data.""" return True def scores(df_typed_iris, n_iters=5, mock_model=None): + """Pre-define scores.""" + X = df_typed_iris.iloc[:, :-1] y = df_typed_iris.iloc[:, -1] @@ -51,17 +73,19 @@ def scores(df_typed_iris, n_iters=5, mock_model=None): estimators = [WrapModel(mock_model()).fit(X, y) for _ in range(n_iters)] return pd.DataFrame( - dict( - estimator=estimators, - test_scores=[0.5] * n_iters, - repeat=0, - fold=range(n_iters), - ) + { + "estimator": estimators, + "test_scores": [0.5] * n_iters, + "repeat": 0, + "fold": range(n_iters), + } ) @pytest.fixture def get_cv_scores(request, df_typed_iris): + """Fixture for getting CV scores.""" + n_iters = request.param mock_model = None if isinstance(n_iters, list): @@ -76,6 +100,8 @@ def get_cv_scores(request, df_typed_iris): @pytest.mark.parametrize("get_cv_scores", [2, 5, 10], indirect=True) def test_get_predictions(get_cv_scores, df_typed_iris): + """Test predictions.""" + X = df_typed_iris.iloc[:, :-1] y = df_typed_iris.iloc[:, -1] cv, df_scores = get_cv_scores @@ -94,6 +120,8 @@ def test_get_predictions(get_cv_scores, df_typed_iris): "get_cv_scores", [[2, MockRegressorReturnsIndex]], indirect=True ) def test_predictions_available(get_cv_scores, df_typed_iris): + """Test available predictions.""" + X = df_typed_iris.iloc[:, :-1] y = df_typed_iris.iloc[:, -1] cv, df_scores = get_cv_scores @@ -121,6 +149,8 @@ def test_predictions_available(get_cv_scores, df_typed_iris): "get_cv_scores", [[2, MockRegressorReturnsIndex]], indirect=True ) def test_invalid_func(get_cv_scores, df_typed_iris): + """Test invalid function.""" + X = df_typed_iris.iloc[:, :-1] y = df_typed_iris.iloc[:, -1] cv, df_scores = get_cv_scores @@ -131,6 +161,8 @@ def test_invalid_func(get_cv_scores, df_typed_iris): @pytest.mark.parametrize("get_cv_scores", [5], indirect=True) def test_foldsinspector_iter(get_cv_scores, df_typed_iris): + """Test folds inspector iterations.""" + X = df_typed_iris.iloc[:, :-1] y = df_typed_iris.iloc[:, -1] cv, df_scores = get_cv_scores diff --git a/julearn/inspect/tests/test_inspector.py b/julearn/inspect/tests/test_inspector.py index c593d1cff..6bd5d1487 100644 --- a/julearn/inspect/tests/test_inspector.py +++ b/julearn/inspect/tests/test_inspector.py @@ -1,51 +1,92 @@ -from julearn.inspect import Inspector -from julearn import run_cross_validation, PipelineCreator +"""Provide tests for base inspector.""" + +# Authors: Federico Raimondo +# Sami Hamdan +# License: AGPL + +from typing import TYPE_CHECKING + import pytest +from julearn import PipelineCreator, run_cross_validation +from julearn.inspect import Inspector + -def test_no_cv(): - inspector = Inspector(dict()) +if TYPE_CHECKING: + import pandas as pd + + +def test_no_cv() -> None: + """Test inspector with no cross-validation.""" + inspector = Inspector({}) with pytest.raises(ValueError, match="No cv"): - inspector.folds + _ = inspector.folds -def test_no_X(): - inspector = Inspector(dict(), cv=5) +def test_no_X() -> None: + """Test inspector with no features.""" + inspector = Inspector({}, cv=5) with pytest.raises(ValueError, match="No X"): - inspector.folds + _ = inspector.folds -def test_no_y(): - inspector = Inspector(dict(), cv=5, X=[1, 2, 3]) +def test_no_y() -> None: + """Test inspector with no targets.""" + inspector = Inspector({}, cv=5, X=[1, 2, 3]) with pytest.raises(ValueError, match="No y"): - inspector.folds + _ = inspector.folds -def test_no_model(): - inspector = Inspector(dict()) +def test_no_model() -> None: + """Test inspector with no model.""" + inspector = Inspector({}) with pytest.raises(ValueError, match="No model"): - inspector.model + _ = inspector.model + +def test_normal_usage(df_iris: "pd.DataFrame") -> None: + """Test inspector. -def test_normal_usage(df_iris): + Parameters + ---------- + df_iris : pd.DataFrame + The iris dataset. + + """ X = list(df_iris.iloc[:, :-1].columns) scores, pipe, inspect = run_cross_validation( - X=X, y="species", data=df_iris, model="svm", - return_estimator="all", return_inspector=True, - problem_type="classification" + X=X, + y="species", + data=df_iris, + model="svm", + return_estimator="all", + return_inspector=True, + problem_type="classification", ) assert pipe == inspect.model._model for (_, score), inspect_fold in zip(scores.iterrows(), inspect.folds): assert score["estimator"] == inspect_fold.model._model -def test_normal_usage_with_search(df_iris): +def test_normal_usage_with_search(df_iris: "pd.DataFrame") -> None: + """Test inspector with search. + + Parameters + ---------- + df_iris : pd.DataFrame + The iris dataset. + + """ X = list(df_iris.iloc[:, :-1].columns) pipe = PipelineCreator(problem_type="classification").add("svm", C=[1, 2]) _, pipe, inspect = run_cross_validation( - X=X, y="species", data=df_iris, model=pipe, - return_estimator="all", return_inspector=True, + X=X, + y="species", + data=df_iris, + model=pipe, + return_estimator="all", + return_inspector=True, ) assert pipe == inspect.model._model inspect.model.get_fitted_params() diff --git a/julearn/inspect/tests/test_pipeline.py b/julearn/inspect/tests/test_pipeline.py index 264eac5fb..54cf36773 100644 --- a/julearn/inspect/tests/test_pipeline.py +++ b/julearn/inspect/tests/test_pipeline.py @@ -1,59 +1,150 @@ +"""Provide tests for pipeline and estimator inspectors.""" + +# Authors: Federico Raimondo +# Sami Hamdan +# License: AGPL + +from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type + import pytest from sklearn.base import BaseEstimator from sklearn.decomposition import PCA from sklearn.preprocessing import StandardScaler from sklearn.svm import SVC -from julearn.inspect import _EstimatorInspector, PipelineInspector +from julearn.inspect import PipelineInspector, _EstimatorInspector from julearn.pipeline import PipelineCreator from julearn.transformers import JuColumnTransformer +if TYPE_CHECKING: + import pandas as pd + + class TestEst(BaseEstimator): - def __init__(self, hype_0=0, hype_1=1): + """Class for estimator tests. + + Parameters + ---------- + hype_0 : int + First hyperparameter. + hype_1 : int + Second hyperparameter. + + """ + + def __init__(self, hype_0: int = 0, hype_1: int = 1) -> None: self.hype_0 = hype_0 self.hype_1 = hype_1 - def fit(self, X, y=None, **fit_params): + def fit( + self, + X: List[str], # noqa: N803 + y: Optional[str] = None, + **fit_params: Any, + ) -> "TestEst": + """Fit the estimator. + + Parameters + ---------- + X : list of str + The features to use. + y : str, optional + The target to use (default None). + **fit_params : dict + Parameters for fitting the estimator. + + Returns + ------- + TestEst + The fitted estimator. + + """ self.param_0_ = 0 self.param_1_ = 1 return self - def transform(self, X): + def transform(self, X: List[str]) -> List[str]: # noqa: N803 + """Transform the estimator. + + Parameters + ---------- + X : list of str + The features to use. + + Returns + ------- + list of str + The transformed estimator. + + """ return X @pytest.mark.parametrize( - "steps", [ + "steps", + [ ["svm"], ["zscore", "svm"], ["pca", "svm"], ["zscore", "pca", "svm"], - ]) -def test_get_stepnames(steps, df_iris): - pipe = (PipelineCreator(problem_type="classification") - .from_list(steps, model_params={}, problem_type="classification") - .to_pipeline() - ) + ], +) +def test_get_stepnames(steps: List[str], df_iris: "pd.DataFrame") -> None: + """Test step names fetch. + + Parameters + ---------- + steps : list of str + The parametrized step names. + df_iris : pd.DataFrame + The iris dataset. + + """ + pipe = ( + PipelineCreator(problem_type="classification") + .from_list(steps, model_params={}, problem_type="classification") + .to_pipeline() + ) pipe.fit(df_iris.iloc[:, :-1], df_iris.species) - assert (["set_column_types"] + steps - == PipelineInspector(pipe).get_step_names() - ) + assert ["set_column_types", *steps] == PipelineInspector( + pipe + ).get_step_names() @pytest.mark.parametrize( - "steps,as_estimator,returns", [ + "steps,as_estimator,returns", + [ (["svm"], True, [SVC()]), (["zscore", "pca", "svm"], True, [StandardScaler(), PCA(), SVC()]), (["svm"], False, [_EstimatorInspector(SVC())]), - - ]) -def test_steps(steps, as_estimator, returns, df_iris): - - pipe = (PipelineCreator(problem_type="classification") - .from_list(steps, model_params={}, problem_type="classification") - .to_pipeline() - ) + ], +) +def test_steps( + steps: List[str], + as_estimator: bool, + returns: List[Type], + df_iris: "pd.DataFrame", +) -> None: + """Test steps. + + Parameters + ---------- + steps : list of str + The parametrized step names. + as_estimator : bool + The parametrized flag to indicate whether to use as estimator. + returns : list + The parametrized list of object instances to return. + df_iris : pd.DataFrame + The iris dataset. + + """ + pipe = ( + PipelineCreator(problem_type="classification") + .from_list(steps, model_params={}, problem_type="classification") + .to_pipeline() + ) pipe.fit(df_iris.iloc[:, :-1], df_iris.species) inspector = PipelineInspector(pipe) for i, _ in enumerate(steps): @@ -62,13 +153,30 @@ def test_steps(steps, as_estimator, returns, df_iris): @pytest.mark.parametrize( - "est,fitted_params", [ + "est,fitted_params", + [ [TestEst(), {"param_0_": 0, "param_1_": 1}], - [JuColumnTransformer("test", TestEst(), "continuous"), - {"param_0_": 0, "param_1_": 1}], - ]) -def test_inspect_estimator(est, fitted_params, df_iris): - + [ + JuColumnTransformer("test", TestEst(), "continuous"), + {"param_0_": 0, "param_1_": 1}, + ], + ], +) +def test_inspect_estimator( + est: Type, fitted_params: Dict[str, int], df_iris: "pd.DataFrame" +) -> None: + """Test estimator inspector. + + Parameters + ---------- + est : Estimator-like + Estimator-like object. + fitted_params : dict of str and int + The fitted parameters for ``est``. + df_iris : pd.DataFrame + The iris dataset. + + """ est.fit(df_iris.iloc[:, :-1], df_iris.species) inspector = _EstimatorInspector(est) assert est.get_params() == inspector.get_params() @@ -77,37 +185,57 @@ def test_inspect_estimator(est, fitted_params, df_iris): assert fitted_params == inspect_params -def test_inspect_pipeline(df_iris): +def test_inspect_pipeline(df_iris: "pd.DataFrame") -> None: + """Test pipeline inspector. + Parameters + ---------- + df_iris : pd.DataFrame + The iris dataset. + + """ expected_fitted_params = { - "jucolumntransformer__param_0_": 0, "jucolumntransformer__param_1_": 1} + "jucolumntransformer__param_0_": 0, + "jucolumntransformer__param_1_": 1, + } - pipe = (PipelineCreator(problem_type="classification") - .add(JuColumnTransformer("test", TestEst(), "continuous")) - .add(SVC()) - .to_pipeline() - ) + pipe = ( + PipelineCreator(problem_type="classification") + .add(JuColumnTransformer("test", TestEst(), "continuous")) + .add(SVC()) + .to_pipeline() + ) pipe.fit(df_iris.iloc[:, :-1], df_iris.species) inspector = PipelineInspector(pipe) inspect_params = inspector.get_fitted_params() inspect_params.pop("jucolumntransformer__column_transformer_", None) inspect_params = { - key: val for key, val in inspect_params.items() - if (not key.startswith("svc")) and ( - not key.startswith("set_column_types")) + key: val + for key, val in inspect_params.items() + if (not key.startswith("svc")) + and (not key.startswith("set_column_types")) } assert expected_fitted_params == inspect_params -def test_get_estimator(df_iris): - pipe = (PipelineCreator(problem_type="classification") - .add(JuColumnTransformer("test", TestEst(), "continuous")) - .add(SVC()) - .to_pipeline() - ) +def test_get_estimator(df_iris: "pd.DataFrame") -> None: + """Test estimator fetch from inspector. + + Parameters + ---------- + df_iris : pd.DataFrame + The iris dataset. + + """ + pipe = ( + PipelineCreator(problem_type="classification") + .add(JuColumnTransformer("test", TestEst(), "continuous")) + .add(SVC()) + .to_pipeline() + ) pipe.fit(df_iris.iloc[:, :-1], df_iris.species) inspector = PipelineInspector(pipe) svc = inspector.get_step("svc").estimator assert isinstance(svc, SVC) - assert pipe.get_params() == inspector.get_params() + assert pipe.get_params() == inspector.get_params() diff --git a/julearn/inspect/tests/test_preprocess.py b/julearn/inspect/tests/test_preprocess.py index f32214c60..ec281c519 100644 --- a/julearn/inspect/tests/test_preprocess.py +++ b/julearn/inspect/tests/test_preprocess.py @@ -1,4 +1,4 @@ -"""Provide tests for inspecting preprocess module.""" +"""Provide tests for inspecting preprocessors.""" # Authors: Federico Raimondo # Sami Hamdan @@ -56,13 +56,13 @@ ], ) def test_preprocess_sklearn( - X_iris: pd.DataFrame, + X_iris: pd.DataFrame, # noqa: N803 y_iris: pd.Series, pipeline: Pipeline, transformers: List[TransformerLike], until: Optional[str], ) -> None: - """Test the preprocess_sklearn function. + """Test the preprocess function. Parameters ---------- @@ -74,8 +74,9 @@ def test_preprocess_sklearn( The pipeline to test. transformers : list of TransformerLike The transformers to test. - until : str, optional + until : str or None The transformer to stop at. + """ X = list(X_iris.columns) X = cast(List[str], X) @@ -96,7 +97,7 @@ def test_preprocess_sklearn( def test_preprocess_sklearn_nodataframe( - X_iris: pd.DataFrame, + X_iris: pd.DataFrame, # noqa: N803 y_iris: pd.Series, ) -> None: """Test preprocess with non-dataframe output and column types removal. @@ -120,21 +121,49 @@ def test_preprocess_sklearn_nodataframe( ) -def test_preprocess_no_step(X_iris, y_iris, df_iris): +def test_preprocess_no_step(X_iris, y_iris, df_iris) -> None: # noqa: N803 + """Test error for preprocess with no step. + Parameters + ---------- + X_iris : pd.DataFrame + The iris dataset features. + y_iris : pd.Series + The iris dataset target. + df_iris : pd.DataFrame + The iris dataset. + + """ pipeline = Pipeline([("scaler", StandardScaler()), ("svm", SVC())]) pipeline.fit(X_iris, y=y_iris) with pytest.raises(ValueError, match="No step named"): - preprocess(pipeline, X=list(X_iris.columns), - data=df_iris, - until="non_existent") + preprocess( + pipeline, + X=list(X_iris.columns), + data=df_iris, + until="non_existent", + ) + +def test_preprocess_with_column_types(df_iris: pd.DataFrame) -> None: + """Test preprocess with column types. + + Parameters + ---------- + df_iris : pd.DataFrame + The iris dataset. + + """ -def test_preprocess_with_column_types(df_iris): X = list(df_iris.iloc[:, :-1].columns) y = "species" _, model = run_cross_validation( - X=X, y=y, data=df_iris, problem_type="classification", - model="rf", return_estimator="final") + X=X, + y=y, + data=df_iris, + problem_type="classification", + model="rf", + return_estimator="final", + ) X_t = preprocess(model, X=X, data=df_iris, with_column_types=False) - assert (list(X_t.columns) == X) + assert list(X_t.columns) == X diff --git a/julearn/model_selection/continuous_stratified_kfold.py b/julearn/model_selection/continuous_stratified_kfold.py index 02afe4ea2..d5f115a65 100644 --- a/julearn/model_selection/continuous_stratified_kfold.py +++ b/julearn/model_selection/continuous_stratified_kfold.py @@ -91,7 +91,10 @@ def __init__( ) def split( - self, X: np.ndarray, y: np.ndarray, groups: Optional[np.ndarray] = None + self, + X: np.ndarray, # noqa: N803 + y: np.ndarray, + groups: Optional[np.ndarray] = None, ): """Generate indices to split data into training and test set. @@ -231,7 +234,10 @@ def __init__( ) def split( - self, X: np.ndarray, y: np.ndarray, groups: Optional[np.ndarray] = None + self, + X: np.ndarray, # noqa: N803 + y: np.ndarray, + groups: Optional[np.ndarray] = None, ): """Generate indices to split data into training and test set. diff --git a/julearn/model_selection/stratified_bootstrap.py b/julearn/model_selection/stratified_bootstrap.py index 5ba9f762e..6ec0d160b 100644 --- a/julearn/model_selection/stratified_bootstrap.py +++ b/julearn/model_selection/stratified_bootstrap.py @@ -55,7 +55,10 @@ def __init__( ) def _iter_indices( - self, X: np.ndarray, y: np.ndarray, groups: Optional[np.ndarray] = None + self, + X: np.ndarray, # noqa: N803 + y: np.ndarray, + groups: Optional[np.ndarray] = None, ): """Generate (train, test) indices. @@ -99,7 +102,10 @@ def _iter_indices( yield train, test def split( - self, X: np.ndarray, y: np.ndarray, groups: Optional[np.ndarray] = None + self, + X: np.ndarray, # noqa: N803 + y: np.ndarray, + groups: Optional[np.ndarray] = None, ): """Generate indices to split data into training and test set. diff --git a/julearn/model_selection/tests/test_continous_stratified_kfold.py b/julearn/model_selection/tests/test_continous_stratified_kfold.py index 85172c701..fc9c7ca97 100644 --- a/julearn/model_selection/tests/test_continous_stratified_kfold.py +++ b/julearn/model_selection/tests/test_continous_stratified_kfold.py @@ -3,22 +3,22 @@ # Authors: Federico Raimondo # License: AGPL +# RepeatedStratifiedGroupKFold, # Need sklearn #24247 +from collections import Counter + import numpy as np from numpy.testing._private.utils import assert_array_equal from sklearn.model_selection import ( - StratifiedKFold, RepeatedStratifiedKFold, StratifiedGroupKFold, - # RepeatedStratifiedGroupKFold, # Need sklearn #24247 + StratifiedKFold, ) -from collections import Counter - from julearn.model_selection.continuous_stratified_kfold import ( - RepeatedContinuousStratifiedKFold, - ContinuousStratifiedKFold, # RepeatedContinuousStratifiedGroupKFold, # Need in sklearn #24247 ContinuousStratifiedGroupKFold, + ContinuousStratifiedKFold, + RepeatedContinuousStratifiedKFold, ) diff --git a/julearn/model_selection/tests/test_stratified_bootstrap.py b/julearn/model_selection/tests/test_stratified_bootstrap.py index 944c0e693..cbfd0ce6a 100644 --- a/julearn/model_selection/tests/test_stratified_bootstrap.py +++ b/julearn/model_selection/tests/test_stratified_bootstrap.py @@ -1,4 +1,4 @@ -"""Provides tests for the stratified bootstra CV generator.""" +"""Provides tests for the stratified bootstrap CV generator.""" # Authors: Federico Raimondo # License: AGPL diff --git a/julearn/models/available_models.py b/julearn/models/available_models.py index fa799b429..4778a3c23 100644 --- a/julearn/models/available_models.py +++ b/julearn/models/available_models.py @@ -134,7 +134,6 @@ "regression": DummyRegressor, "classification": DummyClassifier, }, - } _available_models_reset = deepcopy(_available_models) @@ -162,6 +161,8 @@ def get_model(name: str, problem_type: str, **kwargs: Any) -> ModelLike: The model name problem_type : str The type of problem. See :func:`.run_cross_validation`. + **kwargs : dict + Extra keyword arguments. Returns ------- diff --git a/julearn/models/dynamic.py b/julearn/models/dynamic.py index 4c9c068e2..d0b2dd5de 100644 --- a/julearn/models/dynamic.py +++ b/julearn/models/dynamic.py @@ -84,7 +84,9 @@ def __init__( self.random_state_algorithm = random_state_algorithm self._ds_params = kwargs - def fit(self, X: DataLike, y: DataLike) -> "DynamicSelection": + def fit( + self, X: DataLike, y: DataLike # noqa: N803 + ) -> "DynamicSelection": """Fit the model. Parameters @@ -113,7 +115,7 @@ def fit(self, X: DataLike, y: DataLike) -> "DynamicSelection": f"You tried to use {n_splits} splits." ) - train, test = list(cv_split.split(X, y))[0] # type: ignore + train, test = next(iter(cv_split.split(X, y))) # type: ignore if isinstance(X, pd.DataFrame): X_train = X.iloc[train, :] X_dsel = X.iloc[test, :] @@ -134,7 +136,7 @@ def fit(self, X: DataLike, y: DataLike) -> "DynamicSelection": return self - def predict(self, X: DataLike) -> DataLike: + def predict(self, X: DataLike) -> DataLike: # noqa: N803 """Predict using the model. Parameters @@ -149,7 +151,7 @@ def predict(self, X: DataLike) -> DataLike: """ return self._dsmodel.predict(X) - def predict_proba(self, X: DataLike) -> np.ndarray: + def predict_proba(self, X: DataLike) -> np.ndarray: # noqa: N803 """Compute probabilities of possible outcomes for samples in X. Parameters @@ -170,7 +172,7 @@ def predict_proba(self, X: DataLike) -> np.ndarray: def score( self, - X: DataLike, + X: DataLike, # noqa: N803 y: DataLike, sample_weight: Optional[DataLike] = None, ) -> float: @@ -183,12 +185,13 @@ def score( y : DataLike The true target values. sample_weight : DataLike, optional - Sample weights to use when computing the score (default is None). + Sample weights to use when computing the score (default None). Returns ------- float The score. + """ return self._dsmodel.score(X, y, sample_weight) diff --git a/julearn/models/tests/test_models.py b/julearn/models/tests/test_models.py index c9602f315..3a45ba14f 100644 --- a/julearn/models/tests/test_models.py +++ b/julearn/models/tests/test_models.py @@ -215,7 +215,7 @@ def test_classificationestimators( # keep only two species X = ["sepal_length", "sepal_width", "petal_length"] - X_types = dict(continuous=X) + X_types = {"continuous": X} y = "species" ju_model_params = None @@ -311,7 +311,7 @@ def test_regression_estimators( Parameters to pass to the model. """ X = ["sepal_length", "sepal_width", "petal_length"] - X_types = dict(continuous=X) + X_types = {"continuous": X} y = "petal_width" ju_model_params = None diff --git a/julearn/pipeline/merger.py b/julearn/pipeline/merger.py index adb0bf335..72838170c 100644 --- a/julearn/pipeline/merger.py +++ b/julearn/pipeline/merger.py @@ -8,13 +8,13 @@ from sklearn.model_selection import GridSearchCV, RandomizedSearchCV from sklearn.pipeline import Pipeline +from ..prepare import prepare_search_params from ..utils.logging import raise_error from ..utils.typing import EstimatorLike from .pipeline_creator import _prepare_hyperparameter_tuning -from ..prepare import prepare_search_params -def merge_pipelines( +def merge_pipelines( # noqa: C901 *pipelines: EstimatorLike, search_params: Dict ) -> Pipeline: """Merge multiple pipelines into a single one. @@ -41,7 +41,8 @@ def merge_pipelines( if not isinstance(p, (Pipeline, GridSearchCV, RandomizedSearchCV)): raise_error( "Only pipelines and searchers are supported. " - f"Found {type(p)} instead.") + f"Found {type(p)} instead." + ) if isinstance(p, GridSearchCV): if search_params["kind"] != "grid": raise_error( diff --git a/julearn/pipeline/pipeline_creator.py b/julearn/pipeline/pipeline_creator.py index 24d0144fa..263b844f9 100644 --- a/julearn/pipeline/pipeline_creator.py +++ b/julearn/pipeline/pipeline_creator.py @@ -14,14 +14,13 @@ from ..base import ColumnTypes, ColumnTypesLike, JuTransformer, WrapModel from ..model_selection.available_searchers import get_searcher, list_searchers from ..models import get_model, list_models +from ..prepare import prepare_search_params from ..transformers import ( JuColumnTransformer, SetColumnTypes, get_transformer, list_transformers, ) -from ..prepare import prepare_search_params - from ..transformers.target import JuTransformedTargetModel from ..utils import logger, raise_error, warn_with_log from ..utils.typing import ( @@ -35,7 +34,9 @@ def _params_to_pipeline( - param: Any, X_types: Dict[str, List], search_params: Optional[Dict] + param: Any, + X_types: Dict[str, List], # noqa: N803 + search_params: Optional[Dict], ): """Recursively convert params to pipelines. @@ -230,8 +231,8 @@ def add( logger.info(f"Adding step {name} that applies to {apply_to}") # Find which parameters should be set and which should be tuned. - params_to_set = dict() - params_to_tune = dict() + params_to_set = {} + params_to_tune = {} for param, vals in params.items(): # If we have more than 1 value, we will tune it. # If not, it will be set in the model. @@ -360,7 +361,7 @@ def from_list( To what should the transformers be applied to if not specified in the `add` method (default is continuous). - Returns + Returns ------- PipelineCreator The PipelineCreator with the steps added @@ -437,7 +438,7 @@ def split( def to_pipeline( self, - X_types: Optional[Dict[str, List]] = None, + X_types: Optional[Dict[str, List]] = None, # noqa: N803 search_params: Optional[Dict[str, Any]] = None, ) -> Pipeline: """Create a pipeline from the PipelineCreator. @@ -665,7 +666,7 @@ def _validate_step( Raises ------ ValueError - If the step is not a valid step, if the tranformer is added after + If the step is not a valid step, if the transformer is added after adding a model, or if a transformer is added after a target transformer. @@ -687,7 +688,7 @@ def _validate_step( raise_error(f"Cannot add a {step}. I don't know what it is.") def _check_X_types( - self, X_types: Optional[Dict] = None + self, X_types: Optional[Dict] = None # noqa: N803 ) -> Dict[str, List[str]]: """Check the X_types against the pipeline creator settings. @@ -744,9 +745,7 @@ def _check_X_types( # All available types are the ones in the X_types + wildcard types + # target + the ones that can be created by a transformer. # So far, we only know of transformers that output continuous - available_types = set( - [*all_X_types, "*", ".*", "target", "continuous"] - ) + available_types = {*all_X_types, "*", ".*", "target", "continuous"} for needed_type in needed_types: if needed_type not in available_types: warn_with_log( @@ -755,7 +754,7 @@ def _check_X_types( "this type." ) - self.wrap = needed_types != set(["continuous"]) + self.wrap = needed_types != {"continuous"} return X_types @staticmethod @@ -790,6 +789,13 @@ def _wrap_step( The step to wrap. column_types : ColumnTypesLike The types of the columns the step is applied to. + row_select_col_type : str or list of str or set of str or ColumnTypes + The column types needed to select rows (default None). + row_select_vals : str, int, bool or list of str, int, bool + The value(s) which should be selected in the + ``row_select_col_type`` to select the rows used for training + (default None). + """ return JuColumnTransformer( name, diff --git a/julearn/pipeline/target_pipeline.py b/julearn/pipeline/target_pipeline.py index d9607333a..3f5aa0b16 100644 --- a/julearn/pipeline/target_pipeline.py +++ b/julearn/pipeline/target_pipeline.py @@ -40,7 +40,9 @@ def __init__( raise TypeError("steps must be a list") self.steps = steps - def fit_transform(self, X: pd.DataFrame, y: DataLike) -> DataLike: + def fit_transform( + self, X: pd.DataFrame, y: DataLike # noqa: N803 + ) -> DataLike: """Fit and transform the target. Parameters @@ -57,7 +59,9 @@ def fit_transform(self, X: pd.DataFrame, y: DataLike) -> DataLike: """ return self.fit(X, y).transform(X, y) - def fit(self, X: pd.DataFrame, y: DataLike) -> "JuTargetPipeline": + def fit( + self, X: pd.DataFrame, y: DataLike # noqa: N803 + ) -> "JuTargetPipeline": """Fit the target pipeline. Parameters @@ -81,7 +85,9 @@ def fit(self, X: pd.DataFrame, y: DataLike) -> "JuTargetPipeline": y = t_step.fit_transform(y[:, None])[:, 0] # type: ignore return self - def transform(self, X: pd.DataFrame, y: DataLike) -> DataLike: + def transform( + self, X: pd.DataFrame, y: DataLike # noqa: N803 + ) -> DataLike: """Transform the target. Parameters @@ -105,7 +111,9 @@ def transform(self, X: pd.DataFrame, y: DataLike) -> DataLike: y = t_step.transform(y[:, None])[:, 0] # type: ignore return y - def inverse_transform(self, X: pd.DataFrame, y: DataLike) -> DataLike: + def inverse_transform( + self, X: pd.DataFrame, y: DataLike # noqa: N803 + ) -> DataLike: """Inverse transform the target. Parameters diff --git a/julearn/pipeline/target_pipeline_creator.py b/julearn/pipeline/target_pipeline_creator.py index b213e81d4..2175e2f8a 100644 --- a/julearn/pipeline/target_pipeline_creator.py +++ b/julearn/pipeline/target_pipeline_creator.py @@ -76,6 +76,8 @@ def _get_step_name( Parameters ---------- + name : str or None + The name of the step. step : EstimatorLike or str The step to get the name for. diff --git a/julearn/pipeline/test/test_merger.py b/julearn/pipeline/test/test_merger.py index a85a9db05..ebf6b997d 100644 --- a/julearn/pipeline/test/test_merger.py +++ b/julearn/pipeline/test/test_merger.py @@ -4,9 +4,10 @@ # License: AGPL import pytest -from sklearn.svm import SVC from sklearn.model_selection import GridSearchCV, RandomizedSearchCV from sklearn.pipeline import Pipeline +from sklearn.svm import SVC + from julearn.pipeline import PipelineCreator from julearn.pipeline.merger import merge_pipelines diff --git a/julearn/pipeline/test/test_pipeline_creator.py b/julearn/pipeline/test/test_pipeline_creator.py index 66d75fd01..545b37f7a 100644 --- a/julearn/pipeline/test/test_pipeline_creator.py +++ b/julearn/pipeline/test/test_pipeline_creator.py @@ -10,9 +10,9 @@ import pandas as pd import pytest from pytest_lazyfixture import lazy_fixture +from sklearn.ensemble import RandomForestClassifier from sklearn.model_selection import GridSearchCV, RandomizedSearchCV from sklearn.preprocessing import RobustScaler, StandardScaler -from sklearn.ensemble import RandomForestClassifier from sklearn.svm import SVC from julearn.base import ColumnTypesLike, WrapModel @@ -84,7 +84,7 @@ def test_construction_working( ], ) def test_fit_and_transform_no_error( - X_iris: pd.DataFrame, + X_iris: pd.DataFrame, # noqa: N803 y_iris: pd.Series, model: str, preprocess: List[str], @@ -123,7 +123,7 @@ def test_fit_and_transform_no_error( ], ) def test_hyperparameter_tuning( - X_types_iris: Dict[str, List[str]], + X_types_iris: Dict[str, List[str]], # noqa: N803 model: str, preprocess: List[str], problem_type: str, @@ -144,6 +144,9 @@ def test_hyperparameter_tuning( The problem type to test. get_tuning_params : Callable A function that returns the tuning hyperparameters for a given step. + search_params : dict of str and list + The parameters for the search. + """ if isinstance(preprocess, str): preprocess = [preprocess] @@ -153,7 +156,7 @@ def test_hyperparameter_tuning( used_types = ( ["continuous"] - if X_types_iris in [None, dict()] + if X_types_iris in [None, {}] else list(X_types_iris.keys()) ) for step in preprocess: @@ -199,7 +202,9 @@ def test_hyperparameter_tuning( ], ) def test_X_types_to_pattern_warnings( - X_types: Dict[str, List[str]], apply_to: ColumnTypesLike, warns: bool + X_types: Dict[str, List[str]], # noqa: N803 + apply_to: ColumnTypesLike, + warns: bool, ) -> None: """Test that the X_types raises the expected warnings. @@ -236,7 +241,9 @@ def test_X_types_to_pattern_warnings( ], ) def test_X_types_to_pattern_errors( - X_types: Dict[str, List[str]], apply_to: ColumnTypesLike, error: bool + X_types: Dict[str, List[str]], # noqa: N803 + apply_to: ColumnTypesLike, + error: bool, ) -> None: """Test that the X_types raises the expected errors. @@ -309,7 +316,9 @@ def test_added_model_target_transform() -> None: assert pipeline_creator._added_model -def test_stacking(X_iris: pd.DataFrame, y_iris: pd.Series) -> None: +def test_stacking( + X_iris: pd.DataFrame, y_iris: pd.Series # noqa: N803 +) -> None: """Test that the stacking model works correctly.""" # Define our feature types X_types = { @@ -360,7 +369,7 @@ def test_added_repeated_transformers() -> None: # ) -def test_target_pipe(X_iris, y_iris) -> None: +def test_target_pipe(X_iris, y_iris) -> None: # noqa: N803 """Test that the target pipeline works correctly.""" X_types = { "continuous": ["sepal_length", "sepal_width", "petal_length"], @@ -375,7 +384,7 @@ def test_target_pipe(X_iris, y_iris) -> None: .add("svm", C=[1, 2]) ) pipe = pipeline_creator.to_pipeline( - X_types, search_params=dict(kind="random") + X_types, search_params={"kind": "random"} ) pipe.fit(X_iris, y_iris) @@ -438,7 +447,7 @@ def test_raise_pipe_wrong_searcher() -> None: match="The searcher no_search is not a valid julearn searcher", ): pipeline_creator.to_pipeline( - X_types, search_params=dict(kind="no_search") + X_types, search_params={"kind": "no_search"} ) diff --git a/julearn/pipeline/test/test_target_pipeline.py b/julearn/pipeline/test/test_target_pipeline.py index 6df57905f..784250c1c 100644 --- a/julearn/pipeline/test/test_target_pipeline.py +++ b/julearn/pipeline/test/test_target_pipeline.py @@ -4,21 +4,22 @@ # Sami Hamdan # License: AGPL +import warnings + import numpy as np import pandas as pd import pytest from numpy.testing import assert_array_equal from sklearn.preprocessing import StandardScaler -import warnings +from julearn import run_cross_validation +from julearn.pipeline import PipelineCreator, TargetPipelineCreator from julearn.pipeline.target_pipeline import JuTargetPipeline from julearn.transformers.target import JuTargetTransformer -from julearn.pipeline import PipelineCreator, TargetPipelineCreator -from julearn import run_cross_validation def test_target_pipeline_sklearn( - X_iris: pd.DataFrame, y_iris: pd.Series + X_iris: pd.DataFrame, y_iris: pd.Series # noqa: N803 ) -> None: """Test the target pipeline using a scikit-learn transformer. @@ -44,7 +45,7 @@ def test_target_pipeline_sklearn( def test_target_pipeline_jutargettransformer( - X_iris: pd.DataFrame, y_iris: pd.Series + X_iris: pd.DataFrame, y_iris: pd.Series # noqa: N803 ) -> None: """Test the target pipeline using a JuTargetTransformer. @@ -57,11 +58,15 @@ def test_target_pipeline_jutargettransformer( """ class MedianSplitter(JuTargetTransformer): - def fit(self, X: pd.DataFrame, y: pd.Series) -> "MedianSplitter": + def fit( + self, X: pd.DataFrame, y: pd.Series # noqa: N803 + ) -> "MedianSplitter": self.median = np.median(y) return self - def transform(self, X: pd.DataFrame, y: pd.Series) -> pd.Series: + def transform( + self, X: pd.DataFrame, y: pd.Series # noqa: N803 + ) -> pd.Series: return (y > self.median).astype(int) steps = [("splitter", MedianSplitter())] @@ -77,7 +82,7 @@ def transform(self, X: pd.DataFrame, y: pd.Series) -> pd.Series: def test_target_pipeline_multiple_ju_sk( - X_iris: pd.DataFrame, y_iris: pd.Series + X_iris: pd.DataFrame, y_iris: pd.Series # noqa: N803 ) -> None: """Test the target pipeline using JuTargetTransformer and sklearn. @@ -90,11 +95,15 @@ def test_target_pipeline_multiple_ju_sk( """ class DeMeaner(JuTargetTransformer): - def fit(self, X: pd.DataFrame, y: pd.Series) -> "DeMeaner": + def fit( + self, X: pd.DataFrame, y: pd.Series # noqa: N803 + ) -> "DeMeaner": self.mean = np.mean(X.iloc[:, 0].values) # type: ignore return self - def transform(self, X: pd.DataFrame, y: pd.Series) -> pd.Series: + def transform( + self, X: pd.DataFrame, y: pd.Series # noqa: N803 + ) -> pd.Series: return y - self.mean # type: ignore steps = [("demeaner", DeMeaner()), ("scaler", StandardScaler())] @@ -114,7 +123,7 @@ def transform(self, X: pd.DataFrame, y: pd.Series) -> pd.Series: def test_target_pipeline_multiple_sk_ju( - X_iris: pd.DataFrame, y_iris: pd.Series + X_iris: pd.DataFrame, y_iris: pd.Series # noqa: N803 ) -> None: """Test the target pipeline using sklearn and JuTargetTransformer. @@ -127,11 +136,15 @@ def test_target_pipeline_multiple_sk_ju( """ class DeMeaner(JuTargetTransformer): - def fit(self, X: pd.DataFrame, y: pd.Series) -> "DeMeaner": + def fit( + self, X: pd.DataFrame, y: pd.Series # noqa: N803 + ) -> "DeMeaner": self.mean = np.mean(X.iloc[:, 0].values) # type: ignore return self - def transform(self, X: pd.DataFrame, y: pd.Series) -> pd.Series: + def transform( + self, X: pd.DataFrame, y: pd.Series # noqa: N803 + ) -> pd.Series: return y - self.mean # type: ignore steps = [("scaler", StandardScaler()), ("demeaner", DeMeaner())] @@ -159,7 +172,9 @@ def test_target_pipeline_errors() -> None: JuTargetPipeline(steps) # type: ignore -def test_target_noninverse(df_iris, X_iris): +def test_target_noninverse(df_iris, X_iris): # noqa: N803 + """Test the target non-inverse.""" + X = list(X_iris.columns) df_iris["species"] = X_iris["petal_width"] target_pipeline_creator = TargetPipelineCreator() @@ -170,14 +185,18 @@ def test_target_noninverse(df_iris, X_iris): pipeline_creator.add(target_pipeline_creator, apply_to="target") pipeline_creator.add("linreg") - X_types = {"confounds": ["petal_width"], - "continuous": ['sepal_length', 'sepal_width', 'petal_length'] - } + X_types = { + "confounds": ["petal_width"], + "continuous": ["sepal_length", "sepal_width", "petal_length"], + } with warnings.catch_warnings(): warnings.simplefilter("error") run_cross_validation( - X=X, y="species", X_types=X_types, - model=pipeline_creator, data=df_iris, - scoring="r2" + X=X, + y="species", + X_types=X_types, + model=pipeline_creator, + data=df_iris, + scoring="r2", ) diff --git a/julearn/prepare.py b/julearn/prepare.py index 7ff760b87..dd8ed469e 100644 --- a/julearn/prepare.py +++ b/julearn/prepare.py @@ -21,16 +21,19 @@ ) from sklearn.model_selection._split import _RepeatedSplits +from .config import get_config from .model_selection import ( - RepeatedContinuousStratifiedGroupKFold, ContinuousStratifiedGroupKFold, + RepeatedContinuousStratifiedGroupKFold, ) from .utils import logger, raise_error, warn_with_log -from .config import get_config def _validate_input_data_df( - X: Union[str, List[str]], y: str, df: pd.DataFrame, groups: Optional[str] + X: Union[str, List[str]], # noqa: N803 + y: str, + df: pd.DataFrame, + groups: Optional[str], ) -> None: """Validate the input data types for the pipeline. @@ -70,7 +73,10 @@ def _validate_input_data_df( def _validate_input_data_df_ext( - X: Union[str, List[str]], y: str, df: pd.DataFrame, groups: Optional[str] + X: Union[str, List[str]], # noqa: N803 + y: str, + df: pd.DataFrame, + groups: Optional[str], ) -> None: """Validate the input dataframe for the pipeline. @@ -193,7 +199,7 @@ def _pick_columns( ) unmatched = [] for exp in regexes: - if not any([re.fullmatch(exp, col) for col in columns]): + if not any(re.fullmatch(exp, col) for col in columns): unmatched.append(exp) if len(unmatched) > 0: raise ValueError( @@ -204,12 +210,12 @@ def _pick_columns( def prepare_input_data( - X: Union[str, List[str]], + X: Union[str, List[str]], # noqa: N803 y: str, df: pd.DataFrame, pos_labels: Union[str, int, float, List, None], groups: Optional[str], - X_types: Optional[Dict], + X_types: Optional[Dict], # noqa: N803 ) -> Tuple[pd.DataFrame, pd.Series, Union[pd.Series, None], Dict]: """Prepare the input data and variables for the pipeline. @@ -232,6 +238,7 @@ def prepare_input_data( X_types : dict | None A dictionary containing keys with column type as a str and the columns of this column type as a list of str. + Returns ------- df_X : pandas.DataFrame @@ -410,7 +417,9 @@ def check_consistency( ) -def _check_x_types(X_types: Optional[Dict], X: List[str]) -> Dict[str, List]: +def _check_x_types( + X_types: Optional[Dict], X: List[str] # noqa: N803 +) -> Dict[str, List]: """Check validity of X_types with respect to X. Parameters @@ -464,12 +473,12 @@ def _check_x_types(X_types: Optional[Dict], X: List[str]) -> Dict[str, List]: t_columns = [ col for col in X - if any([re.fullmatch(exp, col) for exp in columns]) + if any(re.fullmatch(exp, col) for exp in columns) ] t_missing = [ exp for exp in columns - if not any([re.fullmatch(exp, col) for col in X]) + if not any(re.fullmatch(exp, col) for col in X) ] defined_columns.extend(t_columns) missing_columns.extend(t_missing) diff --git a/julearn/scoring/available_scorers.py b/julearn/scoring/available_scorers.py index 11d556610..104debb61 100644 --- a/julearn/scoring/available_scorers.py +++ b/julearn/scoring/available_scorers.py @@ -5,18 +5,19 @@ # License: AGPL import typing +import warnings from copy import deepcopy from typing import Callable, Dict, List, Optional, Union -import warnings -from sklearn.metrics import _scorer, make_scorer, get_scorer_names +from sklearn.metrics import _scorer, get_scorer_names, make_scorer from sklearn.metrics._scorer import _check_multimetric_scoring from sklearn.metrics._scorer import check_scoring as sklearn_check_scoring +from ..transformers.target.ju_transformed_target_model import ( + TransformedTargetWarning, +) from ..utils import logger, raise_error, warn_with_log from ..utils.typing import EstimatorLike, ScorerLike -from ..transformers.target.ju_transformed_target_model import ( - TransformedTargetWarning) from .metrics import r2_corr, r_corr @@ -127,7 +128,7 @@ def reset_scorer_register(): def check_scoring( estimator: EstimatorLike, scoring: Union[ScorerLike, str, Callable, List[str], None], - wrap_score: bool + wrap_score: bool, ) -> Union[None, ScorerLike, Callable, Dict[str, ScorerLike]]: """Check the scoring. @@ -147,12 +148,14 @@ def check_scoring( scoring = _extend_scorer(get_scorer(scoring), wrap_score) if callable(scoring): return _extend_scorer( - sklearn_check_scoring(estimator, scoring=scoring), - wrap_score) + sklearn_check_scoring(estimator, scoring=scoring), wrap_score + ) if isinstance(scoring, list): scorer_names = typing.cast(List[str], scoring) - scoring_dict = {score: _extend_scorer(get_scorer(score), wrap_score) - for score in scorer_names} + scoring_dict = { + score: _extend_scorer(get_scorer(score), wrap_score) + for score in scorer_names + } return _check_multimetric_scoring( # type: ignore estimator, scoring_dict ) @@ -164,25 +167,23 @@ def _extend_scorer(scorer, extend): return scorer -class _ExtendedScorer(): +class _ExtendedScorer: def __init__(self, scorer): self.scorer = scorer - def __call__(self, estimator, X, y): + def __call__(self, estimator, X, y): # noqa: N803 if hasattr(estimator, "best_estimator_"): estimator = estimator.best_estimator_ X_trans = X for _, transform in estimator.steps[:-1]: X_trans = transform.transform(X_trans) - y_true = ( - estimator - .steps[-1][-1] # last est - .transform_target(X_trans, y) + y_true = estimator.steps[-1][-1].transform_target( # last est + X_trans, y ) with warnings.catch_warnings(): warnings.filterwarnings( - action='ignore', category=TransformedTargetWarning + action="ignore", category=TransformedTargetWarning ) scores = self.scorer(estimator, X, y_true) return scores diff --git a/julearn/scoring/tests/test_available_scorers.py b/julearn/scoring/tests/test_available_scorers.py index 019601397..4a8479406 100644 --- a/julearn/scoring/tests/test_available_scorers.py +++ b/julearn/scoring/tests/test_available_scorers.py @@ -11,7 +11,9 @@ from julearn.utils.typing import DataLike, EstimatorLike -def _return_1(estimator: EstimatorLike, X: DataLike, y: DataLike) -> float: +def _return_1( + estimator: EstimatorLike, X: DataLike, y: DataLike # noqa: N803 +) -> float: """Return 1.""" return 1 diff --git a/julearn/stats/corrected_ttest.py b/julearn/stats/corrected_ttest.py index 820b345f1..24a343756 100644 --- a/julearn/stats/corrected_ttest.py +++ b/julearn/stats/corrected_ttest.py @@ -1,3 +1,5 @@ +"""Provide functions for corrected t-test.""" + # Author: Authors of scikit-learn # Martina G. Vilas # Federico Raimondo @@ -11,8 +13,8 @@ import scipy.special as special from statsmodels.stats.multitest import multipletests -from ..utils.logging import raise_error, warn_with_log from ..utils.checks import check_scores_df +from ..utils.logging import raise_error, warn_with_log def _corrected_std( @@ -182,7 +184,9 @@ def corrected_ttest( to_skip = ["cv_mdsum", "n_train", "n_test", "model"] to_keep = [ - x for x in i_scores.columns if x not in to_skip + x + for x in i_scores.columns + if x not in to_skip and (x.startswith("test_") or x.startswith("train_")) ] df1 = i_scores[to_keep] diff --git a/julearn/stats/tests/test_corrected_ttest.py b/julearn/stats/tests/test_corrected_ttest.py index 800efea7c..9aec2fd18 100644 --- a/julearn/stats/tests/test_corrected_ttest.py +++ b/julearn/stats/tests/test_corrected_ttest.py @@ -1,3 +1,8 @@ +"""Provide tests for corrected t-test.""" + +# Author: Federico Raimondo +# License: AGPL + import warnings import numpy as np diff --git a/julearn/tests/test_api.py b/julearn/tests/test_api.py index 01898a44b..67a5a81d1 100644 --- a/julearn/tests/test_api.py +++ b/julearn/tests/test_api.py @@ -4,9 +4,11 @@ # Sami Hamdan # License: AGPL +from pathlib import Path + +import joblib import numpy as np import pandas as pd -import joblib import pytest from sklearn.base import clone from sklearn.datasets import make_regression @@ -36,8 +38,8 @@ from julearn import run_cross_validation from julearn.api import _compute_cvmdsum from julearn.model_selection import ( - RepeatedContinuousStratifiedGroupKFold, ContinuousStratifiedGroupKFold, + RepeatedContinuousStratifiedGroupKFold, ) from julearn.pipeline import PipelineCreator from julearn.utils.testing import compare_models, do_scoring_test @@ -186,7 +188,7 @@ def test_run_cv_simple_binary_groups(df_iris: pd.DataFrame) -> None: def test_run_cv_simple_binary_errors( df_binary: pd.DataFrame, df_iris: pd.DataFrame ) -> None: - """Test a simple classification problem errors + """Test a simple classification problem errors. Parameters ---------- @@ -431,10 +433,8 @@ def test_tune_hyperparam_gridsearch(df_iris: pd.DataFrame) -> None: assert len(actual.columns) == len(expected) + 5 assert len(actual["test_accuracy"]) == len(expected["test_accuracy"]) assert all( - [ - a == b - for a, b in zip(actual["test_accuracy"], expected["test_accuracy"]) - ] + a == b + for a, b in zip(actual["test_accuracy"], expected["test_accuracy"]) ) # Compare the models @@ -511,10 +511,8 @@ def test_tune_hyperparam_gridsearch_groups(df_iris: pd.DataFrame) -> None: assert len(actual.columns) == len(expected) + 5 assert len(actual["test_accuracy"]) == len(expected["test_accuracy"]) assert all( - [ - a == b - for a, b in zip(actual["test_accuracy"], expected["test_accuracy"]) - ] + a == b + for a, b in zip(actual["test_accuracy"], expected["test_accuracy"]) ) # Compare the models @@ -587,10 +585,8 @@ def test_tune_hyperparam_randomsearch(df_iris: pd.DataFrame) -> None: assert len(actual.columns) == len(expected) + 5 assert len(actual["test_accuracy"]) == len(expected["test_accuracy"]) assert all( - [ - a == b - for a, b in zip(actual["test_accuracy"], expected["test_accuracy"]) - ] + a == b + for a, b in zip(actual["test_accuracy"], expected["test_accuracy"]) ) # Compare the models @@ -694,20 +690,12 @@ def test_tune_hyperparams_multiple_grid(df_iris: pd.DataFrame) -> None: assert len(actual1["test_accuracy"]) == len(expected["test_accuracy"]) assert len(actual2["test_accuracy"]) == len(expected["test_accuracy"]) assert all( - [ - a == b - for a, b in zip( - actual1["test_accuracy"], expected["test_accuracy"] - ) - ] + a == b + for a, b in zip(actual1["test_accuracy"], expected["test_accuracy"]) ) assert all( - [ - a == b - for a, b in zip( - actual2["test_accuracy"], expected["test_accuracy"] - ) - ] + a == b + for a, b in zip(actual2["test_accuracy"], expected["test_accuracy"]) ) # Compare the models clf1 = actual_estimator1.best_estimator_.steps[-1][1] @@ -832,8 +820,8 @@ def test_return_train_scores(df_iris: pd.DataFrame) -> None: train_scores = [f"train_{s}" for s in scoring] test_scores = [f"test_{s}" for s in scoring] - assert all([s not in scores.columns for s in train_scores]) - assert all([s in scores.columns for s in test_scores]) + assert all(s not in scores.columns for s in train_scores) + assert all(s in scores.columns for s in test_scores) with pytest.warns(RuntimeWarning, match="treated as continuous"): scores = run_cross_validation( @@ -850,8 +838,8 @@ def test_return_train_scores(df_iris: pd.DataFrame) -> None: train_scores = [f"train_{s}" for s in scoring] test_scores = [f"test_{s}" for s in scoring] - assert all([s in scores.columns for s in train_scores]) - assert all([s in scores.columns for s in test_scores]) + assert all(s in scores.columns for s in train_scores) + assert all(s in scores.columns for s in test_scores) @pytest.mark.parametrize( @@ -1122,7 +1110,7 @@ def test__compute_cvmdsum(cv1, cv2, expected): def test_api_stacking_models() -> None: - """ "Test API of stacking models.""" + """Test API of stacking models.""" # prepare data X, y = make_regression(n_features=6, n_samples=50) @@ -1171,7 +1159,15 @@ def test_api_stacking_models() -> None: assert isinstance(final.steps[1][1].model.estimators[0][1], GridSearchCV) -def test_inspection_error(df_iris): +def test_inspection_error(df_iris: pd.DataFrame) -> None: + """Test error for inspector. + + Parameters + ---------- + df_iris : pd.DataFrame + The iris dataset. + + """ X = ["sepal_length", "sepal_width", "petal_length"] y = "species" with pytest.raises(ValueError, match="return_inspector=True requires"): @@ -1196,7 +1192,19 @@ def test_inspection_error(df_iris): assert len(res) == 3 -def test_final_estimator_picklable(tmp_path, df_iris) -> None: +def test_final_estimator_picklable( + tmp_path: Path, df_iris: pd.DataFrame +) -> None: + """Test if final estimator is picklable. + + Parameters + ---------- + tmp_path : pathlib.Path + The path to the test directory. + df_iris : pd.DataFrame + The iris dataset. + + """ X = ["sepal_length", "sepal_width", "petal_length"] y = "species" pickled_file = tmp_path / "final_estimator.joblib" @@ -1213,7 +1221,17 @@ def test_final_estimator_picklable(tmp_path, df_iris) -> None: joblib.load(pickled_file) -def test_inspector_picklable(tmp_path, df_iris) -> None: +def test_inspector_picklable(tmp_path: Path, df_iris: pd.DataFrame) -> None: + """Test if inspector is picklable. + + Parameters + ---------- + tmp_path : pathlib.Path + The path to the test directory. + df_iris : pd.DataFrame + The iris dataset. + + """ X = ["sepal_length", "sepal_width", "petal_length"] y = "species" pickled_file = tmp_path / "inspector.joblib" diff --git a/julearn/tests/test_config.py b/julearn/tests/test_config.py index e3495f13a..9942bfc1e 100644 --- a/julearn/tests/test_config.py +++ b/julearn/tests/test_config.py @@ -3,9 +3,10 @@ # Authors: Federico Raimondo # License: AGPL -from julearn.config import set_config, get_config import pytest +from julearn.config import get_config, set_config + def test_set_config_wrong_keys() -> None: """Test that set_config raises an error when the key does not exist.""" diff --git a/julearn/tests/test_prepare.py b/julearn/tests/test_prepare.py index 319e8e735..75ec13bee 100644 --- a/julearn/tests/test_prepare.py +++ b/julearn/tests/test_prepare.py @@ -24,9 +24,10 @@ StratifiedShuffleSplit, ) +from julearn.config import set_config from julearn.model_selection import ( - RepeatedContinuousStratifiedGroupKFold, ContinuousStratifiedGroupKFold, + RepeatedContinuousStratifiedGroupKFold, ) from julearn.prepare import ( _check_x_types, @@ -34,10 +35,9 @@ check_consistency, prepare_input_data, ) -from julearn.config import set_config -def _check_df_input(prepared, X, y, groups, df): +def _check_df_input(prepared, X, y, groups, df): # noqa: N803 df_X, df_y, df_groups, _ = prepared assert_array_equal(df[X].values, df_X[X].values) @@ -233,7 +233,7 @@ def test_prepare_input_data_erors() -> None: prepared = prepare_input_data( X=X, y=y, - df=dict(), # type: ignore + df={}, # type: ignore pos_labels=None, groups=None, X_types=None, # type: ignore @@ -512,7 +512,7 @@ def test_prepare_data_pick_regexp(): df_X, df_y, _, prep_X_types = prepared - assert all([x in df_X.columns for x in X]) + assert all(x in df_X.columns for x in X) assert y not in df_X.columns assert df_y.name == y assert X_types == prep_X_types @@ -523,7 +523,7 @@ def test_prepare_data_pick_regexp(): df_X, df_y, _, prep_X_types = prepared - assert all([x in df_X.columns for x in X]) + assert all(x in df_X.columns for x in X) assert y not in df_X.columns assert df_y.name == y assert X_types == prep_X_types @@ -536,7 +536,7 @@ def test_prepare_data_pick_regexp(): df_X, df_y, _, prep_X_types = prepared - assert all([x in df_X.columns for x in X]) + assert all(x in df_X.columns for x in X) assert y not in df_X.columns assert df_y.name == y assert X_types == prep_X_types @@ -550,7 +550,7 @@ def test_prepare_data_pick_regexp(): df_X, df_y, df_groups, prep_X_types = prepared - assert all([x in df_X.columns for x in X]) + assert all(x in df_X.columns for x in X) assert y not in df_X.columns assert groups not in df_X.columns assert df_y.name == y @@ -566,7 +566,7 @@ def test_prepare_data_pick_regexp(): df_X, df_y, _, prep_X_types = prepared - assert all([x in df_X.columns for x in X]) + assert all(x in df_X.columns for x in X) assert y not in df_X.columns assert df_y.name == y assert X_types == prep_X_types @@ -585,7 +585,7 @@ def test_prepare_data_pick_regexp(): df_X, df_y, _, prep_X_types = prepared - assert all([x in df_X.columns for x in X]) + assert all(x in df_X.columns for x in X) assert y not in df_X.columns assert df_y.name == y assert X_types == prep_X_types diff --git a/julearn/transformers/available_transformers.py b/julearn/transformers/available_transformers.py index ca266e6b1..6ee1a305c 100644 --- a/julearn/transformers/available_transformers.py +++ b/julearn/transformers/available_transformers.py @@ -70,8 +70,9 @@ def list_transformers() -> List[str]: Returns ------- - list o str + list of str A list will all the available transformer names. + """ return list(_available_transformers.keys()) @@ -82,12 +83,15 @@ def get_transformer(name: str, **params: Any) -> TransformerLike: Parameters ---------- name : str - The transformer name + The transformer name. + **params : dict + Parameters to get transformer. Returns ------- - out : scikit-learn compatible transformer + scikit-learn compatible transformer The transformer object. + """ out = None if name not in _available_transformers: diff --git a/julearn/transformers/cbpm.py b/julearn/transformers/cbpm.py index 3bc7ac325..851dff868 100644 --- a/julearn/transformers/cbpm.py +++ b/julearn/transformers/cbpm.py @@ -1,6 +1,8 @@ +"""Provide scikit-learn-compatible transformer for CBPM.""" + # Authors: Federico Raimondo # Sami Hamdan -# Kaustubh, Patil +# Kaustubh Patil # License: AGPL from typing import Callable, Optional @@ -9,27 +11,24 @@ from joblib import Parallel, delayed from scipy.stats import pearsonr from sklearn.base import BaseEstimator, TransformerMixin - from sklearn.utils.validation import check_is_fitted -from ..utils.versions import _joblib_parallel_args + from ..utils import warn_with_log +from ..utils.versions import _joblib_parallel_args class CBPM(BaseEstimator, TransformerMixin): - '''Transformer that aggregates together all features significantly - correlated to the target. - - Significant negative and positive correlations are aggregateed separately. - Non-significant ones are dropped. + """Transformer for connectome-based predictive modeling. - User can choose to use negative, positive or both correlations. + It aggregates all features significantly correlated to the target. + The significant negative and positive correlations are aggregateed + separately and non-significant ones are dropped. - In case that there are no significant correlations the mean of the + The user can choose to use negative, positive or both correlations. + In case that there are no significant correlations and the mean of the target will be returned as the only feature. - This transformer implements the procedure described in : - Shen, X., Finn, E., Scheinost, D. et al. 2016 - https://doi.org/10.1038/nprot.2016.178 + This transformer implements the procedure described in [1]_. Parameters ---------- @@ -85,7 +84,16 @@ class CBPM(BaseEstimator, TransformerMixin): used_significant_mask_ : np.array of bools Array of bools showing which of the original features will be used by this transformer. - ''' + + References + ---------- + .. [1] Shen, X., Finn, E., Scheinost, D. et al. + Using connectome-based predictive modeling to predict individual + behavior from brain connectivity. + Nat Protoc 12, 506-518 (2017). + https://doi.org/10.1038/nprot.2016.178 + + """ def __init__( self, @@ -108,7 +116,7 @@ def __init__( self.n_jobs = n_jobs self.verbose = verbose - def fit(self, X: np.ndarray, y: np.ndarray) -> "CBPM": + def fit(self, X: np.ndarray, y: np.ndarray) -> "CBPM": # noqa: N803 """Fit the transformer. Compute the correlations of each feature to the target, threhsold and @@ -148,7 +156,7 @@ def fit(self, X: np.ndarray, y: np.ndarray) -> "CBPM": return self - def transform(self, X: np.ndarray) -> np.ndarray: + def transform(self, X: np.ndarray) -> np.ndarray: # noqa: N803 """Transform the data. Replace each of the features that had a significant correlation on @@ -172,25 +180,20 @@ def transform(self, X: np.ndarray) -> np.ndarray: out = np.ones(X.shape[0]) * self.y_mean_ return out - elif self.used_corr_sign_ == 'posneg': - X_meaned_pos = self.aggregate( - X, mask=self.pos_significant_mask_ - ) + elif self.used_corr_sign_ == "posneg": + X_meaned_pos = self.aggregate(X, mask=self.pos_significant_mask_) - X_meaned_neg = self.aggregate( - X, mask=self.neg_significant_mask_ - ) + X_meaned_neg = self.aggregate(X, mask=self.neg_significant_mask_) X_meaned = np.concatenate( - [ - X_meaned_pos.reshape(-1, 1), - X_meaned_neg.reshape(-1, 1)], - axis=1) + [X_meaned_pos.reshape(-1, 1), X_meaned_neg.reshape(-1, 1)], + axis=1, + ) - elif self.used_corr_sign_ == 'pos': + elif self.used_corr_sign_ == "pos": X_meaned = self.aggregate(X, self.pos_significant_mask_) - elif self.used_corr_sign_ == 'neg': + elif self.used_corr_sign_ == "neg": X_meaned = self.aggregate(X, self.neg_significant_mask_) return X_meaned @@ -269,13 +272,18 @@ def _create_masks(self) -> None: self.used_corr_sign_ = "posneg" self.used_significant_mask_ = self.significant_mask_ - def aggregate(self, X, mask): + def aggregate(self, X, mask): # noqa: N803 + """Aggregate.""" return self.agg_method(X[:, mask], axis=1) def get_feature_names_out(self, input_features=None): + """Get output feature names.""" check_is_fitted(self) - cols = (["positive"] if self.used_corr_sign_ == "pos" else - ["negative"] if self.used_corr_sign_ == "neg" else - ["positive", "negative"] - ) + cols = ( + ["positive"] + if self.used_corr_sign_ == "pos" + else ["negative"] + if self.used_corr_sign_ == "neg" + else ["positive", "negative"] + ) return np.array(cols, dtype=object) diff --git a/julearn/transformers/confound_remover.py b/julearn/transformers/confound_remover.py index 4810fc559..e8db76a24 100644 --- a/julearn/transformers/confound_remover.py +++ b/julearn/transformers/confound_remover.py @@ -61,8 +61,8 @@ def __init__( confounds: ColumnTypesLike = "confound", threshold: Optional[float] = None, keep_confounds: bool = False, - row_select_col_type: Optional[ColumnTypesLike] = None, - row_select_vals: Optional[Union[str, int, List, bool]] = None, + row_select_col_type: Optional[ColumnTypesLike] = None, + row_select_vals: Optional[Union[str, int, List, bool]] = None, ): if model_confound is None: model_confound = LinearRegression() # type: ignore @@ -74,11 +74,11 @@ def __init__( apply_to=apply_to, needed_types=confounds, row_select_col_type=row_select_col_type, - row_select_vals=row_select_vals + row_select_vals=row_select_vals, ) def _fit( - self, X: pd.DataFrame, y: Optional[DataLike] = None + self, X: pd.DataFrame, y: Optional[DataLike] = None # noqa: N803 ) -> "ConfoundRemover": """Fit ConfoundRemover. @@ -104,7 +104,7 @@ def _fit( self.support_mask_[output_X.columns] = True self.support_mask_ = self.support_mask_.values - def fit_confound_models(X: Scalar) -> ModelLike: + def fit_confound_models(X: Scalar) -> ModelLike: # noqa: N803 _model = clone(self.model_confound) _model.fit(ser_confound.values, X) # type: ignore return _model # type: ignore @@ -114,7 +114,7 @@ def fit_confound_models(X: Scalar) -> ModelLike: ) return self - def transform(self, X: pd.DataFrame) -> pd.DataFrame: + def transform(self, X: pd.DataFrame) -> pd.DataFrame: # noqa: N803 """Remove confounds from data. Parameters @@ -197,7 +197,7 @@ def get_feature_names_out( return out # type: ignore def _split_into_X_confound( - self, X: pd.DataFrame + self, X: pd.DataFrame # noqa: N803 ) -> Tuple[pd.DataFrame, pd.DataFrame]: """Split the original X into the features (X) and confounds. diff --git a/julearn/transformers/dataframe/change_column_types.py b/julearn/transformers/dataframe/change_column_types.py index 321ffc6ba..010f87c6a 100644 --- a/julearn/transformers/dataframe/change_column_types.py +++ b/julearn/transformers/dataframe/change_column_types.py @@ -34,22 +34,21 @@ class ChangeColumnTypes(JuTransformer): def __init__( self, - X_types_renamer: Dict[str, str], + X_types_renamer: Dict[str, str], # noqa: N803 apply_to: ColumnTypesLike, - row_select_col_type: Optional[ColumnTypesLike] = None, - row_select_vals: Optional[Union[ - str, int, list, bool]] = None, + row_select_col_type: Optional[ColumnTypesLike] = None, + row_select_vals: Optional[Union[str, int, list, bool]] = None, ): self.X_types_renamer = X_types_renamer super().__init__( - apply_to=apply_to, needed_types=None, + apply_to=apply_to, + needed_types=None, row_select_col_type=row_select_col_type, - row_select_vals=row_select_vals - + row_select_vals=row_select_vals, ) def _fit( - self, X: pd.DataFrame, y: Optional[DataLike] = None + self, X: pd.DataFrame, y: Optional[DataLike] = None # noqa: N803 ) -> "ChangeColumnTypes": """Fit the transformer. @@ -80,7 +79,7 @@ def _fit( self._renamer = to_rename return self - def transform(self, X: pd.DataFrame) -> pd.DataFrame: + def transform(self, X: pd.DataFrame) -> pd.DataFrame: # noqa: N803 """Change the column types. Parameters diff --git a/julearn/transformers/dataframe/drop_columns.py b/julearn/transformers/dataframe/drop_columns.py index f4842491a..822929472 100644 --- a/julearn/transformers/dataframe/drop_columns.py +++ b/julearn/transformers/dataframe/drop_columns.py @@ -33,19 +33,19 @@ class DropColumns(JuTransformer): def __init__( self, - apply_to: ColumnTypesLike, - row_select_col_type: Optional[ColumnTypesLike] = None, - row_select_vals: Optional[Union[str, - int, list, bool]] = None, + apply_to: ColumnTypesLike, + row_select_col_type: Optional[ColumnTypesLike] = None, + row_select_vals: Optional[Union[str, int, list, bool]] = None, ): super().__init__( - apply_to=apply_to, needed_types=None, + apply_to=apply_to, + needed_types=None, row_select_col_type=row_select_col_type, - row_select_vals=row_select_vals + row_select_vals=row_select_vals, ) def _fit( - self, X: pd.DataFrame, y: Optional[DataLike] = None + self, X: pd.DataFrame, y: Optional[DataLike] = None # noqa: N803 ) -> "DropColumns": """Fit the transformer. @@ -73,7 +73,7 @@ def _fit( self.support_mask_ = self.support_mask_.values return self - def transform(self, X: pd.DataFrame) -> pd.DataFrame: + def transform(self, X: pd.DataFrame) -> pd.DataFrame: # noqa: N803 """Drop the columns. Parameters diff --git a/julearn/transformers/dataframe/filter_columns.py b/julearn/transformers/dataframe/filter_columns.py index bd9b3bbdc..f3f92fbbb 100644 --- a/julearn/transformers/dataframe/filter_columns.py +++ b/julearn/transformers/dataframe/filter_columns.py @@ -38,22 +38,21 @@ class FilterColumns(JuTransformer): def __init__( self, keep: Optional[ColumnTypesLike] = None, - row_select_col_type: Optional[ColumnTypesLike] = None, - row_select_vals: Optional[Union[str, - int, list, bool]] = None, + row_select_col_type: Optional[ColumnTypesLike] = None, + row_select_vals: Optional[Union[str, int, list, bool]] = None, ): if keep is None: keep = "continuous" self.keep: ColumnTypes = ensure_column_types(keep) super().__init__( - apply_to="*", needed_types=keep, + apply_to="*", + needed_types=keep, row_select_col_type=row_select_col_type, - row_select_vals=row_select_vals - + row_select_vals=row_select_vals, ) def _fit( - self, X: pd.DataFrame, y: Optional[DataLike] = None + self, X: pd.DataFrame, y: Optional[DataLike] = None # noqa: N803 ) -> "FilterColumns": """Fit the transformer. @@ -78,7 +77,7 @@ def _fit( self.filter_columns_.fit(X, y) return self - def transform(self, X: pd.DataFrame) -> pd.DataFrame: + def transform(self, X: pd.DataFrame) -> pd.DataFrame: # noqa: N803 """Transform the data. Parameters diff --git a/julearn/transformers/dataframe/set_column_types.py b/julearn/transformers/dataframe/set_column_types.py index 95f8706ac..c197ad510 100644 --- a/julearn/transformers/dataframe/set_column_types.py +++ b/julearn/transformers/dataframe/set_column_types.py @@ -34,7 +34,7 @@ class SetColumnTypes(JuTransformer): def __init__( self, - X_types: Optional[Dict[str, List[str]]] = None, + X_types: Optional[Dict[str, List[str]]] = None, # noqa: N803 row_select_col_type: Optional[ColumnTypesLike] = None, row_select_vals: Optional[Union[str, int, list, bool]] = None, ): @@ -57,7 +57,7 @@ def __init__( ) def _fit( - self, X: pd.DataFrame, y: Optional[DataLike] = None + self, X: pd.DataFrame, y: Optional[DataLike] = None # noqa: N803 ) -> "SetColumnTypes": """Fit the transformer. @@ -99,7 +99,7 @@ def _fit( t_columns = [ col for col in X.columns - if any([re.fullmatch(exp, col) for exp in columns]) + if any(re.fullmatch(exp, col) for exp in columns) ] column_mapper_.update( {col: change_column_type(col, X_type) for col in t_columns} @@ -109,7 +109,7 @@ def _fit( self.column_mapper_ = column_mapper_ return self - def transform(self, X: pd.DataFrame) -> pd.DataFrame: + def transform(self, X: pd.DataFrame) -> pd.DataFrame: # noqa: N803 """Transform the data. Parameters diff --git a/julearn/transformers/dataframe/tests/test_change_column_types.py b/julearn/transformers/dataframe/tests/test_change_column_types.py index 8fb396d9b..bd36a0dca 100644 --- a/julearn/transformers/dataframe/tests/test_change_column_types.py +++ b/julearn/transformers/dataframe/tests/test_change_column_types.py @@ -1,16 +1,37 @@ +"""Provide tests for the ChangeColumnTypes transformer.""" + +# Authors: Federico Raimondo +# Sami Hamdan +# License: AGPL + +from typing import TYPE_CHECKING + from julearn.transformers.dataframe.change_column_types import ( ChangeColumnTypes, ) -def test_change_column_types(df_typed_iris): +if TYPE_CHECKING: + import pandas as pd + + +def test_change_column_types(df_typed_iris: "pd.DataFrame") -> None: + """Test ChangeColumnTypes transformer. + + Parameters + ---------- + df_typed_iris : pd.DataFrame + The iris dataset with typed features. + + """ X = df_typed_iris.iloc[:, :-1] y = df_typed_iris.loc[:, "species"] ct = ChangeColumnTypes( - X_types_renamer=dict(continuous="chicken"), apply_to="*") + X_types_renamer={"continuous": "chicken"}, apply_to="*" + ) ct.fit(X, y) Xt = ct.transform(X) Xt_colnames = [x.split("__:")[0] for x in list(ct.get_feature_names_out())] - assert all([col.endswith("__:type:__chicken") for col in list(Xt.columns)]) + assert all(col.endswith("__:type:__chicken") for col in list(Xt.columns)) assert all(X == Xt_colnames) diff --git a/julearn/transformers/dataframe/tests/test_set_column_types.py b/julearn/transformers/dataframe/tests/test_set_column_types.py index 63161b351..b5aea2151 100644 --- a/julearn/transformers/dataframe/tests/test_set_column_types.py +++ b/julearn/transformers/dataframe/tests/test_set_column_types.py @@ -13,7 +13,7 @@ def test_SetColumnTypes( - X_iris: pd.DataFrame, X_types_iris: Optional[Dict] + X_iris: pd.DataFrame, X_types_iris: Optional[Dict] # noqa: N803 ) -> None: """Test SetColumnTypes. @@ -45,7 +45,9 @@ def test_SetColumnTypes( assert_frame_equal(Xt_iris_with_types, X_iris_with_types) -def test_SetColumnTypes_input_validation(X_iris: pd.DataFrame) -> None: +def test_SetColumnTypes_input_validation( + X_iris: pd.DataFrame, # noqa: N803 +) -> None: """Test SetColumnTypes input validation. Parameters @@ -61,7 +63,7 @@ def test_SetColumnTypes_input_validation(X_iris: pd.DataFrame) -> None: def test_SetColumnTypes_array( - X_iris: pd.DataFrame, X_types_iris: Optional[Dict] + X_iris: pd.DataFrame, X_types_iris: Optional[Dict] # noqa: N803 ) -> None: """Test SetColumnTypes. diff --git a/julearn/transformers/ju_column_transformer.py b/julearn/transformers/ju_column_transformer.py index 012b289a7..07034b916 100644 --- a/julearn/transformers/ju_column_transformer.py +++ b/julearn/transformers/ju_column_transformer.py @@ -1,4 +1,4 @@ -"""Provide julearn speicif column transformer.""" +"""Provide julearn specific column transformer.""" # Authors: Federico Raimondo # Sami Hamdan @@ -17,10 +17,10 @@ class JuColumnTransformer(JuTransformer): - """Column transformer that can be used in a Junifer pipeline. + """Column transformer that can be used in a julearn pipeline. This column transformer is a wrapper around the sklearn column transformer, - so it can be used directly with Junifer pipelines. + so it can be used directly with julearn pipelines. Parameters ---------- @@ -33,10 +33,13 @@ class JuColumnTransformer(JuTransformer): needed_types : ColumnTypesLike, optional Which feature types are needed for the transformer to work. row_select_col_type : str or list of str or set of str or ColumnTypes - The column types needed to select rows (default is None) + The column types needed to select rows (default is None). row_select_vals : str, int, bool or list of str, int, bool The value(s) which should be selected in the row_select_col_type - to select the rows used for training (default is None) + to select the rows used for training (default is None). + **params : dict + Extra keyword arguments for the transformer. + """ def __init__( @@ -45,8 +48,8 @@ def __init__( transformer: EstimatorLike, apply_to: ColumnTypesLike, needed_types: Optional[ColumnTypesLike] = None, - row_select_col_type: Optional[ColumnTypesLike] = None, - row_select_vals: Optional[Union[str, int, List, bool]] = None, + row_select_col_type: Optional[ColumnTypesLike] = None, + row_select_vals: Optional[Union[str, int, List, bool]] = None, **params: Any, ): self.name = name @@ -58,7 +61,10 @@ def __init__( self.set_params(**params) def _fit( - self, X: pd.DataFrame, y: Optional[DataLike] = None, **fit_params: Any + self, + X: pd.DataFrame, # noqa: N803 + y: Optional[DataLike] = None, + **fit_params: Any, ) -> "JuColumnTransformer": """Fit the transformer. @@ -70,11 +76,14 @@ def _fit( Input features. y : np.array Target. + **fit_params : dict + Parameters for fitting the transformer. Returns ------- - self : JuColumnTransformer + JuColumnTransformer The fitted transformer. + """ verbose_feature_names_out = isinstance( self.transformer, ClassNamePrefixFeaturesOutMixin @@ -89,7 +98,7 @@ def _fit( return self - def transform(self, X: pd.DataFrame) -> DataLike: + def transform(self, X: pd.DataFrame) -> DataLike: # noqa: N803 """Apply the transformer. Parameters @@ -99,8 +108,9 @@ def transform(self, X: pd.DataFrame) -> DataLike: Returns ------- - out : pd.DataFrame + pd.DataFrame Transformed data. + """ check_is_fitted(self) return self.column_transformer_.transform(X) # type: ignore @@ -113,18 +123,21 @@ def get_feature_names_out( Parameters ---------- input_features : array-like of str or None, default=None + Input features to use. - * If `input_features` is `None`, then `feature_names_in_` is - used as feature names in. If `feature_names_in_` is not defined, - then the following input feature names are generated: - `["x0", "x1", ..., "x(n_features_in_ - 1)"]`. - * If `input_features` is an array-like, then `input_features` must - match `feature_names_in_` if `feature_names_in_` is defined. + * If ``None``, then ``feature_names_in_`` is + used as input feature names if it's defined. If + ``feature_names_in_`` is undefined, then the following input + feature names are generated: + ``["x0", "x1", ..., "x(n_features_in_ - 1)"]``. + * If ``array-like``, then ``input_features`` must + match ``feature_names_in_`` if it's defined. Returns ------- - list + list of str Names of features to be kept in the output pd.DataFrame. + """ out = None try: @@ -158,8 +171,9 @@ def get_params(self, deep: bool = True) -> Dict[str, Any]: Returns ------- - params : dict + dict Parameter names mapped to their values. + """ return dict( **self.transformer.get_params(True), @@ -181,13 +195,14 @@ def set_params(self, **kwargs: Any) -> "JuColumnTransformer": Parameters ---------- - **params : dict + **kwargs : dict Estimator parameters. Returns ------- - self : estimator instance - Estimator instance. + JuColumnTransformer + JuColumnTransformer instance with params set. + """ transformer_params = list(self.transformer.get_params(True).keys()) diff --git a/julearn/transformers/target/ju_target_transformer.py b/julearn/transformers/target/ju_target_transformer.py index 80068eaf2..0275651b8 100644 --- a/julearn/transformers/target/ju_target_transformer.py +++ b/julearn/transformers/target/ju_target_transformer.py @@ -22,7 +22,9 @@ class JuTargetTransformer: are not dropped after the transformation. """ - def fit_transform(self, X: pd.DataFrame, y: DataLike) -> DataLike: + def fit_transform( + self, X: pd.DataFrame, y: DataLike # noqa: N803 + ) -> DataLike: """Fit and transform the target. Parameters @@ -34,13 +36,16 @@ def fit_transform(self, X: pd.DataFrame, y: DataLike) -> DataLike: Returns ------- - y : DataLike + DataLike The transformed target. + """ return self.fit(X, y).transform(X, y) - def fit(self, X: pd.DataFrame, y: DataLike) -> "JuTargetTransformer": - """Fit and transform the target. + def fit( + self, X: pd.DataFrame, y: DataLike # noqa: N803 + ) -> "JuTargetTransformer": + """Fit the target. Parameters ---------- @@ -51,12 +56,15 @@ def fit(self, X: pd.DataFrame, y: DataLike) -> "JuTargetTransformer": Returns ------- - self : JuTargetTransformer + JuTargetTransformer The fitted transformer. + """ - raise NotImplementedError("fit method not implemented") + raise NotImplementedError("fit() not implemented") - def transform(self, X: pd.DataFrame, y: DataLike) -> DataLike: + def transform( + self, X: pd.DataFrame, y: DataLike # noqa: N803 + ) -> DataLike: """Transform the target. Parameters @@ -68,7 +76,8 @@ def transform(self, X: pd.DataFrame, y: DataLike) -> DataLike: Returns ------- - y : DataLike + DataLike The transformed target. + """ - raise NotImplementedError("fitransform method not implemented") + raise NotImplementedError("transform() not implemented") diff --git a/julearn/transformers/target/ju_transformed_target_model.py b/julearn/transformers/target/ju_transformed_target_model.py index 26d014d2a..56bf0ca55 100644 --- a/julearn/transformers/target/ju_transformed_target_model.py +++ b/julearn/transformers/target/ju_transformed_target_model.py @@ -74,7 +74,7 @@ def __init__(self, model: ModelLike, transformer: "JuTargetPipeline"): self.transformer = transformer def fit( - self, X: pd.DataFrame, y: DataLike, **fit_params: Any + self, X: pd.DataFrame, y: DataLike, **fit_params: Any # noqa: N803 ) -> "JuTransformedTargetModel": """Fit the model. @@ -84,20 +84,21 @@ def fit( The input data. y : DataLike The target. - fit_params : Any + **fit_params : dict Additional parameters to be passed to the model fit method. Returns ------- - self : JuTransformedTargetModel + JuTransformedTargetModel The fitted model. + """ y = self.transformer.fit_transform(X, y) self.model_ = clone(self.model) self.model_.fit(X, y, **fit_params) # type: ignore return self - def predict(self, X: pd.DataFrame) -> DataLike: + def predict(self, X: pd.DataFrame) -> DataLike: # noqa: N803 """Predict using the model. Parameters @@ -109,6 +110,7 @@ def predict(self, X: pd.DataFrame) -> DataLike: ------- DataLike The predictions. + """ if not hasattr(self, "model_"): raise_error("Model not fitted yet.") @@ -128,8 +130,22 @@ def predict(self, X: pd.DataFrame) -> DataLike: ) return y_pred - def score(self, X, y): + def score(self, X: pd.DataFrame, y: DataLike) -> float: # noqa: N803 + """Score the model. + Parameters + ---------- + X : pd.DataFrame + The input data. + y : DataLike + The target. + + Returns + ------- + float + Score for the model. + + """ if not hasattr(self, "model_"): raise_error("Model not fitted yet.") self.model_ = typing.cast(ModelLike, self.model_) @@ -137,7 +153,7 @@ def score(self, X, y): return self.model_.score(X, y_trans) @available_if(_wrapped_model_has("predict_proba")) - def predict_proba(self, X: pd.DataFrame) -> np.ndarray: + def predict_proba(self, X: pd.DataFrame) -> np.ndarray: # noqa: N803 """Compute probabilities of possible outcomes for samples in X. Parameters @@ -158,7 +174,7 @@ def predict_proba(self, X: pd.DataFrame) -> np.ndarray: return self.model_.predict_proba(X) # type: ignore @available_if(_wrapped_model_has("decision_function")) - def decision_function(self, X: pd.DataFrame) -> np.ndarray: + def decision_function(self, X: pd.DataFrame) -> np.ndarray: # noqa: N803 """Evaluate the decision function for the samples in X. Parameters @@ -177,7 +193,24 @@ def decision_function(self, X: pd.DataFrame) -> np.ndarray: self.model_ = typing.cast(ModelLike, self.model_) return self.model_.decision_function(X) # type: ignore - def transform_target(self, X, y) -> np.ndarray: + def transform_target( + self, X: pd.DataFrame, y: DataLike # noqa: N803 + ) -> DataLike: + """Transform target. + + Parameters + ---------- + X : pd.DataFrame + The input data. + y : DataLike + The target. + + Returns + ------- + DataLike + The transformed target. + + """ return self.transformer.transform(X, y) @property diff --git a/julearn/transformers/target/target_confound_remover.py b/julearn/transformers/target/target_confound_remover.py index 2b882730f..66632a7db 100644 --- a/julearn/transformers/target/target_confound_remover.py +++ b/julearn/transformers/target/target_confound_remover.py @@ -52,7 +52,9 @@ def needed_types(self) -> ColumnTypesLike: """Get the needed column types.""" return self.confounds - def fit(self, X: pd.DataFrame, y: pd.Series) -> "TargetConfoundRemover": + def fit( + self, X: pd.DataFrame, y: pd.Series # noqa: N803 + ) -> "TargetConfoundRemover": """Fit ConfoundRemover. Parameters @@ -73,7 +75,9 @@ def fit(self, X: pd.DataFrame, y: pd.Series) -> "TargetConfoundRemover": self.model_confounds_.fit(X_confounds.values, y) # type: ignore return self - def transform(self, X: pd.DataFrame, y: pd.Series) -> pd.Series: + def transform( + self, X: pd.DataFrame, y: pd.Series # noqa: N803 + ) -> pd.Series: """Remove confounds from the target. Parameters diff --git a/julearn/transformers/target/tests/test_ju_transformed_target_model.py b/julearn/transformers/target/tests/test_ju_transformed_target_model.py index 4f9fbfaf9..727797479 100644 --- a/julearn/transformers/target/tests/test_ju_transformed_target_model.py +++ b/julearn/transformers/target/tests/test_ju_transformed_target_model.py @@ -8,7 +8,7 @@ import pytest from numpy.testing import assert_array_equal from sklearn.preprocessing import Normalizer, StandardScaler -from sklearn.svm import SVR, SVC +from sklearn.svm import SVC, SVR from julearn.pipeline import JuTargetPipeline from julearn.transformers.target import ( @@ -18,7 +18,7 @@ def test_JuTransformedTargetModel( - X_iris: pd.DataFrame, y_iris: pd.Series + X_iris: pd.DataFrame, y_iris: pd.Series # noqa: N803 ) -> None: """Test JuTransformedTargetModel.""" @@ -43,7 +43,7 @@ def test_JuTransformedTargetModel( def test_JuTransformedTargetModel_noinverse( - X_iris: pd.DataFrame, y_iris: pd.Series + X_iris: pd.DataFrame, y_iris: pd.Series # noqa: N803 ) -> None: """Test JuTransformedTargetModel.""" steps = [("quantile", Normalizer())] @@ -70,8 +70,8 @@ def test_JuTransformedTargetModel_noinverse( assert_array_equal(y_pred, y_pred_sk) -def test_not_fitted(X_iris, y_iris): - +def test_JuTransformedTargetModel_not_fitted(X_iris, y_iris): # noqa: N803 + """Test unfitted model for JuTransformedTargetModel.""" steps = [("scaler", StandardScaler())] transformer = JuTargetPipeline(steps) # type: ignore model = SVC(probability=True) @@ -79,13 +79,13 @@ def test_not_fitted(X_iris, y_iris): target_model = JuTransformedTargetModel( transformer=transformer, model=model # type: ignore ) - with pytest.raises(ValueError, match='Model not fitted '): + with pytest.raises(ValueError, match="Model not fitted "): target_model.score(X_iris, y_iris) - with pytest.raises(ValueError, match='Model not fitted '): + with pytest.raises(ValueError, match="Model not fitted "): target_model.predict(X_iris) - with pytest.raises(ValueError, match='Model not fitted '): + with pytest.raises(ValueError, match="Model not fitted "): target_model.predict_proba(X_iris) - with pytest.raises(ValueError, match='Model not fitted '): + with pytest.raises(ValueError, match="Model not fitted "): target_model.decision_function(X_iris) - with pytest.raises(ValueError, match='Model not fitted '): - target_model.classes_ + with pytest.raises(ValueError, match="Model not fitted "): + _ = target_model.classes_ diff --git a/julearn/transformers/target/tests/test_target_confound_remover.py b/julearn/transformers/target/tests/test_target_confound_remover.py index 9b3641b7d..16c5e25c5 100644 --- a/julearn/transformers/target/tests/test_target_confound_remover.py +++ b/julearn/transformers/target/tests/test_target_confound_remover.py @@ -8,7 +8,7 @@ def test_TargetConfoundRemover( - X_iris: pd.DataFrame, y_iris: pd.Series + X_iris: pd.DataFrame, y_iris: pd.Series # noqa: N803 ) -> None: """Test target confound remover. diff --git a/julearn/transformers/tests/test_available_transformers.py b/julearn/transformers/tests/test_available_transformers.py index 922403f23..2d727d893 100644 --- a/julearn/transformers/tests/test_available_transformers.py +++ b/julearn/transformers/tests/test_available_transformers.py @@ -31,7 +31,9 @@ class Fish(BaseEstimator, TransformerMixin): def __init__(self, can_it_fly: bool): self.can_it_fly = can_it_fly - def fit(self, X: DataLike, y: Optional[DataLike] = None) -> "Fish": + def fit( + self, X: DataLike, y: Optional[DataLike] = None # noqa: N803 + ) -> "Fish": """Fit the fish. Parameters @@ -48,7 +50,7 @@ def fit(self, X: DataLike, y: Optional[DataLike] = None) -> "Fish": """ return self - def transform(self, X: DataLike) -> DataLike: + def transform(self, X: DataLike) -> DataLike: # noqa: N803 """Transform the data. Parameters diff --git a/julearn/transformers/tests/test_cbpm.py b/julearn/transformers/tests/test_cbpm.py index f3fa93c62..f9f126ec0 100644 --- a/julearn/transformers/tests/test_cbpm.py +++ b/julearn/transformers/tests/test_cbpm.py @@ -8,14 +8,14 @@ import pandas as pd import pytest from numpy.testing import assert_array_equal -from scipy.stats import spearmanr from pandas.testing import assert_frame_equal +from scipy.stats import spearmanr from julearn.transformers import CBPM def test_CBPM_posneg_correlated_features( - X_iris: pd.DataFrame, y_iris: pd.DataFrame + X_iris: pd.DataFrame, y_iris: pd.DataFrame # noqa: N803 ) -> None: """Test the CBPM transformer with posneg correlated features. @@ -30,8 +30,9 @@ def test_CBPM_posneg_correlated_features( X_pos = ["sepal_length", "petal_length", "petal_width"] X_neg = ["sepal_width"] - trans_X_posneg = CBPM(corr_sign="posneg", agg_method=np.mean - ).fit_transform(X_iris, y_iris) + trans_X_posneg = CBPM( + corr_sign="posneg", agg_method=np.mean + ).fit_transform(X_iris, y_iris) trans_man_pos = X_iris[X_pos].values.mean(axis=1) trans_man_neg = X_iris[X_neg].values.mean(axis=1) trans_man = np.concatenate( @@ -41,7 +42,7 @@ def test_CBPM_posneg_correlated_features( def test_CBPM_pos_correlated_features( - X_iris: pd.DataFrame, y_iris: pd.DataFrame + X_iris: pd.DataFrame, y_iris: pd.DataFrame # noqa: N803 ) -> None: """Test the CBPM transformer with positive correlated features. @@ -55,11 +56,13 @@ def test_CBPM_pos_correlated_features( X_pos = ["sepal_length", "petal_length", "petal_width"] - trans_X_pos = CBPM(corr_sign="pos", agg_method=np.mean - ).fit_transform(X_iris[X_pos], y_iris) + trans_X_pos = CBPM(corr_sign="pos", agg_method=np.mean).fit_transform( + X_iris[X_pos], y_iris + ) - trans_X_pos_neg = CBPM(corr_sign="pos", agg_method=np.mean - ).fit_transform(X_iris, y_iris) + trans_X_pos_neg = CBPM(corr_sign="pos", agg_method=np.mean).fit_transform( + X_iris, y_iris + ) trans_man = X_iris[X_pos].values.mean(axis=1) @@ -68,7 +71,7 @@ def test_CBPM_pos_correlated_features( def test_CBPM_neg_correlated_features( - X_iris: pd.DataFrame, y_iris: pd.DataFrame + X_iris: pd.DataFrame, y_iris: pd.DataFrame # noqa: N803 ) -> None: """Test the CBPM transformer with positive correlated features. @@ -82,11 +85,13 @@ def test_CBPM_neg_correlated_features( X_neg = ["sepal_width"] - trans_X_neg = CBPM(corr_sign="neg", agg_method=np.mean - ).fit_transform(X_iris[X_neg], y_iris) + trans_X_neg = CBPM(corr_sign="neg", agg_method=np.mean).fit_transform( + X_iris[X_neg], y_iris + ) - trans_X_neg_neg = CBPM(corr_sign="neg", agg_method=np.mean - ).fit_transform(X_iris, y_iris) + trans_X_neg_neg = CBPM(corr_sign="neg", agg_method=np.mean).fit_transform( + X_iris, y_iris + ) trans_man = X_iris[X_neg].values.mean(axis=1) @@ -94,7 +99,9 @@ def test_CBPM_neg_correlated_features( assert_array_equal(trans_X_neg, trans_man) -def test_CBPM_warnings(X_iris: pd.DataFrame, y_iris: pd.DataFrame) -> None: +def test_CBPM_warnings( + X_iris: pd.DataFrame, y_iris: pd.DataFrame # noqa: N803 +) -> None: """Test the CBPM transformer warnings. Parameters @@ -111,8 +118,9 @@ def test_CBPM_warnings(X_iris: pd.DataFrame, y_iris: pd.DataFrame) -> None: with pytest.warns( RuntimeWarning, match="No feature with significant positive" ): - trans = CBPM(corr_sign="pos", agg_method=np.mean - ).fit_transform(X_iris[X_neg], y_iris) + trans = CBPM(corr_sign="pos", agg_method=np.mean).fit_transform( + X_iris[X_neg], y_iris + ) assert (trans == y_iris.values.mean()).all() @@ -120,30 +128,35 @@ def test_CBPM_warnings(X_iris: pd.DataFrame, y_iris: pd.DataFrame) -> None: with pytest.warns( RuntimeWarning, match="No feature with significant negative" ): - trans = CBPM(corr_sign="neg", agg_method=np.mean - ).fit_transform(X_iris[X_pos], y_iris) + trans = CBPM(corr_sign="neg", agg_method=np.mean).fit_transform( + X_iris[X_pos], y_iris + ) assert (trans == y_iris.values.mean()).all() # Use posneg, but only positive present - trans_pos = CBPM(corr_sign="pos", agg_method=np.mean - ).fit_transform(X_iris[X_pos], y_iris) + trans_pos = CBPM(corr_sign="pos", agg_method=np.mean).fit_transform( + X_iris[X_pos], y_iris + ) with pytest.warns( RuntimeWarning, match="Only features with positive correlations" ): - trans = CBPM(corr_sign="posneg", agg_method=np.mean - ).fit_transform(X_iris[X_pos], y_iris) + trans = CBPM(corr_sign="posneg", agg_method=np.mean).fit_transform( + X_iris[X_pos], y_iris + ) assert_array_equal(trans, trans_pos) # Use posneg, but only negative present - trans_neg = CBPM(corr_sign="neg", agg_method=np.mean - ).fit_transform(X_iris[X_neg], y_iris) + trans_neg = CBPM(corr_sign="neg", agg_method=np.mean).fit_transform( + X_iris[X_neg], y_iris + ) with pytest.warns( RuntimeWarning, match="Only features with negative correlations" ): - trans = CBPM(corr_sign="posneg", agg_method=np.mean - ).fit_transform(X_iris[X_neg], y_iris) + trans = CBPM(corr_sign="posneg", agg_method=np.mean).fit_transform( + X_iris[X_neg], y_iris + ) assert_array_equal(trans, trans_neg) @@ -153,13 +166,14 @@ def test_CBPM_warnings(X_iris: pd.DataFrame, y_iris: pd.DataFrame) -> None: RuntimeWarning, match="No feature with significant negative or positive", ): - trans = CBPM(corr_sign="posneg", agg_method=np.mean - ).fit_transform(df_shuffled_X, y_iris) + trans = CBPM(corr_sign="posneg", agg_method=np.mean).fit_transform( + df_shuffled_X, y_iris + ) assert (trans == y_iris.values.mean()).all() def test_CBPM_lower_sign_threshhold( - X_iris: pd.DataFrame, y_iris: pd.DataFrame + X_iris: pd.DataFrame, y_iris: pd.DataFrame # noqa: N803 ) -> None: """Test the CBPM transformer with lower significance threshold. @@ -181,7 +195,7 @@ def test_CBPM_lower_sign_threshhold( def test_CBPM_lower_sign_threshhold_no_sig( - X_iris: pd.DataFrame, y_iris: pd.DataFrame + X_iris: pd.DataFrame, y_iris: pd.DataFrame # noqa: N803 ) -> None: """Test the CBPM transformer with an even lower significance threshold. @@ -199,13 +213,16 @@ def test_CBPM_lower_sign_threshhold_no_sig( match="No feature with significant negative or positive", ): trans_posneg = CBPM( - corr_sign="posneg", significance_threshold=1e-100, - agg_method=np.mean + corr_sign="posneg", + significance_threshold=1e-100, + agg_method=np.mean, ).fit_transform(X_iris, y_iris) assert (trans_posneg == y_iris.values.mean()).all() -def test_CBPM_spearman(X_iris: pd.DataFrame, y_iris: pd.DataFrame) -> None: +def test_CBPM_spearman( + X_iris: pd.DataFrame, y_iris: pd.DataFrame # noqa: N803 +) -> None: """Test the CBPM transformer with spearman correlation. Parameters @@ -220,9 +237,9 @@ def test_CBPM_spearman(X_iris: pd.DataFrame, y_iris: pd.DataFrame) -> None: X_neg = ["sepal_width"] # I have checked before all are still significant with spearman - trans_posneg = CBPM(corr_method=spearmanr, - agg_method=np.mean - ).fit_transform(X_iris, y_iris) + trans_posneg = CBPM( + corr_method=spearmanr, agg_method=np.mean + ).fit_transform(X_iris, y_iris) trans_man_pos = X_iris[X_pos].values.mean(axis=1) trans_man_neg = X_iris[X_neg].values.mean(axis=1) @@ -233,29 +250,29 @@ def test_CBPM_spearman(X_iris: pd.DataFrame, y_iris: pd.DataFrame) -> None: def test_CBPM_set_output_posneg( - X_iris: pd.DataFrame, y_iris: pd.DataFrame, + X_iris: pd.DataFrame, # noqa: N803 + y_iris: pd.DataFrame, ) -> None: - """ + """Test the CBPM transformer for setting posneg output. Parameters ---------- X_iris : pd.DataFrame - The iris dataset features + The iris dataset features. y_iris : pd.Series - The iris dataset target + The iris dataset target. + """ X_pos = ["sepal_length", "petal_length", "petal_width"] X_neg = ["sepal_width"] # I have checked before all are still significant with spearman - trans_posneg = (CBPM(corr_method=spearmanr, - agg_method=np.mean, - corr_sign="posneg" - ) - .set_output(transform="pandas") - .fit_transform(X_iris, y_iris) - ) + trans_posneg = ( + CBPM(corr_method=spearmanr, agg_method=np.mean, corr_sign="posneg") + .set_output(transform="pandas") + .fit_transform(X_iris, y_iris) + ) trans_man_pos = X_iris[X_pos].values.mean(axis=1) trans_man_neg = X_iris[X_neg].values.mean(axis=1) @@ -267,28 +284,28 @@ def test_CBPM_set_output_posneg( def test_CBPM_set_output_pos( - X_iris: pd.DataFrame, y_iris: pd.DataFrame, + X_iris: pd.DataFrame, # noqa: N803 + y_iris: pd.DataFrame, ) -> None: - """ + """Test the CBPM transformer for setting pos output. Parameters ---------- X_iris : pd.DataFrame - The iris dataset features + The iris dataset features. y_iris : pd.Series - The iris dataset target + The iris dataset target. + """ X_pos = ["sepal_length", "petal_length", "petal_width"] # I have checked before all are still significant with spearman - trans_pos = (CBPM(corr_method=spearmanr, - agg_method=np.mean, - corr_sign="pos" - ) - .set_output(transform="pandas") - .fit_transform(X_iris, y_iris) - ) + trans_pos = ( + CBPM(corr_method=spearmanr, agg_method=np.mean, corr_sign="pos") + .set_output(transform="pandas") + .fit_transform(X_iris, y_iris) + ) trans_man_pos = X_iris[X_pos].values.mean(axis=1) df_trans_man = pd.DataFrame(trans_man_pos, columns=["positive"]) @@ -296,28 +313,28 @@ def test_CBPM_set_output_pos( def test_CBPM_set_output_neg( - X_iris: pd.DataFrame, y_iris: pd.DataFrame, + X_iris: pd.DataFrame, # noqa: N803 + y_iris: pd.DataFrame, ) -> None: - """ + """Test the CBPM transformer for setting neg output. Parameters ---------- X_iris : pd.DataFrame - The iris dataset features + The iris dataset features. y_iris : pd.Series - The iris dataset target + The iris dataset target. + """ X_neg = ["sepal_width"] # I have checked before all are still significant with spearman - trans_neg = (CBPM(corr_method=spearmanr, - agg_method=np.mean, - corr_sign="neg" - ) - .set_output(transform="pandas") - .fit_transform(X_iris, y_iris) - ) + trans_neg = ( + CBPM(corr_method=spearmanr, agg_method=np.mean, corr_sign="neg") + .set_output(transform="pandas") + .fit_transform(X_iris, y_iris) + ) trans_man_neg = X_iris[X_neg].values.mean(axis=1) df_trans_man = pd.DataFrame(trans_man_neg, columns=["negative"]) diff --git a/julearn/transformers/tests/test_confounds.py b/julearn/transformers/tests/test_confounds.py index 3de05381f..5ec465c8f 100644 --- a/julearn/transformers/tests/test_confounds.py +++ b/julearn/transformers/tests/test_confounds.py @@ -97,7 +97,7 @@ def test_ConfoundRemover__apply_threshold() -> None: ], ) def test_ConfoundRemover_confound_auto_find_conf( - df_X_confounds: pd.DataFrame, + df_X_confounds: pd.DataFrame, # noqa: N803 drop: Optional[List[str]], confounds: Optional[List[str]], models_confound_remover: str, @@ -183,7 +183,7 @@ def test_ConfoundRemover_confound_auto_find_conf( def test_confound_set_confounds( model_class: Type[ModelLike], confounds: List[str], - df_X_confounds: pd.DataFrame, + df_X_confounds: pd.DataFrame, # noqa: N803 ): """Test confound removal confounds parameter setting. @@ -207,7 +207,7 @@ def test_confound_set_confounds( df_cofound_removed = confound_remover.fit_transform(df_X_confounds) np.random.seed(42) - conf_as_feat = confounds if type(confounds) is list else [confounds] + conf_as_feat = confounds if isinstance(confounds, list) else [confounds] df_confounds = df_X_confounds.loc[:, conf_as_feat] # type: ignore confound_regressions = [ model_class().fit( @@ -246,7 +246,7 @@ def test_confound_set_confounds( assert_frame_equal(df_cofound_removed, df_confound_removed_manual) -def test_return_confound(df_X_confounds: pd.DataFrame) -> None: +def test_return_confound(df_X_confounds: pd.DataFrame) -> None: # noqa: N803 """Test that the confound is returned if keep_confounds is True. Parameters @@ -264,7 +264,7 @@ def test_return_confound(df_X_confounds: pd.DataFrame) -> None: def test_no_confound_found() -> None: """Test that an error is raised if no confound is found.""" - _X = pd.DataFrame(dict(a=np.arange(10))) + _X = pd.DataFrame({"a": np.arange(10)}) remover = ConfoundRemover() with pytest.raises(ValueError, match="No confound was found"): remover.fit_transform(_X) diff --git a/julearn/transformers/tests/test_jucolumntransformers.py b/julearn/transformers/tests/test_jucolumntransformers.py index 12debf6b2..4c4ca83b9 100644 --- a/julearn/transformers/tests/test_jucolumntransformers.py +++ b/julearn/transformers/tests/test_jucolumntransformers.py @@ -33,6 +33,7 @@ def df_X_confounds() -> pd.DataFrame: ------- pd.DataFrame A dataframe with confounds. + """ X = pd.DataFrame( { @@ -59,11 +60,11 @@ def df_X_confounds() -> pd.DataFrame: ("scaler_power", PowerTransformer, {}), ], ) -def test_jucolumntransformer( +def test_JuColumnTransformer( name: str, klass: Type[EstimatorLike], params: Dict, - df_X_confounds: pd.DataFrame, + df_X_confounds: pd.DataFrame, # noqa: N803 ): """Test JuColumnTransformer class.""" @@ -87,16 +88,14 @@ def test_jucolumntransformer( X_transformed, columns=transformer.get_feature_names_out() ) # Check that the columns are as expected - assert set(df_X_transformed.columns) == set( - [ - "a__:type:__continuous", - "b__:type:__continuous", - "c__:type:__confound", - "d__:type:__confound", - "e__:type:__categorical", - "f__:type:__categorical", - ] - ) + assert set(df_X_transformed.columns) == { + "a__:type:__continuous", + "b__:type:__continuous", + "c__:type:__confound", + "d__:type:__confound", + "e__:type:__categorical", + "f__:type:__categorical", + } kept = [ "c__:type:__confound", @@ -113,18 +112,21 @@ def test_jucolumntransformer( assert_array_equal(df_X_transformed[trans].values, manual) -def test_row_select(): - X = pd.DataFrame({ - "a__:type:__continuous": [0, 0, 1, 1], - "b__:type:__healthy": [1, 1, 0, 0], - }) +def test_JuColumnTransformer_row_select(): + """Test row selection for JuColumnTransformer.""" + X = pd.DataFrame( + { + "a__:type:__continuous": [0, 0, 1, 1], + "b__:type:__healthy": [1, 1, 0, 0], + } + ) transformer_healthy = JuColumnTransformer( name="zscore", transformer=StandardScaler(), apply_to="continuous", row_select_col_type=["healthy"], - row_select_vals=1 + row_select_vals=1, ) transformer_unhealthy = JuColumnTransformer( @@ -132,7 +134,7 @@ def test_row_select(): transformer=StandardScaler(), apply_to="continuous", row_select_col_type=["healthy"], - row_select_vals=0 + row_select_vals=0, ) transformer_both = JuColumnTransformer( @@ -140,30 +142,32 @@ def test_row_select(): transformer=StandardScaler(), apply_to="continuous", row_select_col_type=["healthy"], - row_select_vals=[0, 1] + row_select_vals=[0, 1], ) mean_healthy = ( transformer_healthy.fit(X) - .column_transformer_.transformers_[0][1].mean_ + .column_transformer_.transformers_[0][1] + .mean_ ) mean_unhealthy = ( transformer_unhealthy.fit(X) - .column_transformer_.transformers_[0][1].mean_ + .column_transformer_.transformers_[0][1] + .mean_ ) - mean_both = (transformer_both.fit(X) - .column_transformer_.transformers_[0][1].mean_ - ) + mean_both = ( + transformer_both.fit(X).column_transformer_.transformers_[0][1].mean_ + ) assert_almost_equal( - transformer_healthy._select_rows(X, y=None)["X"].index.values, - [0, 1]) + transformer_healthy._select_rows(X, y=None)["X"].index.values, [0, 1] + ) assert_almost_equal( - transformer_unhealthy._select_rows(X, None)["X"].index.values, - [2, 3]) + transformer_unhealthy._select_rows(X, None)["X"].index.values, [2, 3] + ) assert_almost_equal( - transformer_both._select_rows(X, None)["X"].index.values, - [0, 1, 2, 3]) + transformer_both._select_rows(X, None)["X"].index.values, [0, 1, 2, 3] + ) assert_almost_equal(mean_unhealthy, [1]) assert_almost_equal(mean_healthy, [0]) diff --git a/julearn/utils/_cv.py b/julearn/utils/_cv.py index 686ae14a6..880fc7a86 100644 --- a/julearn/utils/_cv.py +++ b/julearn/utils/_cv.py @@ -5,11 +5,11 @@ # License: AGPL -import numpy as np import hashlib import inspect import json +import numpy as np from sklearn.model_selection import ( GroupKFold, KFold, @@ -17,13 +17,14 @@ LeaveOneOut, RepeatedKFold, RepeatedStratifiedKFold, - StratifiedKFold, StratifiedGroupKFold, + StratifiedKFold, ) from sklearn.model_selection._split import PredefinedSplit, _CVIterableWrapper + from ..model_selection import ( - RepeatedContinuousStratifiedGroupKFold, ContinuousStratifiedGroupKFold, + RepeatedContinuousStratifiedGroupKFold, ) @@ -39,7 +40,7 @@ def _recurse_to_list(a): def _compute_cvmdsum(cv): """Compute the sum of the CV generator.""" - params = {k: v for k, v in vars(cv).items()} + params = dict(vars(cv).items()) params["class"] = cv.__class__.__name__ out = None diff --git a/julearn/utils/checks.py b/julearn/utils/checks.py index 5ccaa9338..0bdffa896 100644 --- a/julearn/utils/checks.py +++ b/julearn/utils/checks.py @@ -2,8 +2,8 @@ # Author: Federico Raimondo # License: BSD 3 clause -import pandas as pd import numpy as np +import pandas as pd from .logging import raise_error diff --git a/julearn/utils/logging.py b/julearn/utils/logging.py index 25a58ee58..5d0faffd1 100644 --- a/julearn/utils/logging.py +++ b/julearn/utils/logging.py @@ -106,12 +106,12 @@ def log_versions() -> None: logger.info("========================") -_logging_types = dict( - DEBUG=logging.DEBUG, - INFO=logging.INFO, - WARNING=logging.WARNING, - ERROR=logging.ERROR, -) +_logging_types = { + "DEBUG": logging.DEBUG, + "INFO": logging.INFO, + "WARNING": logging.WARNING, + "ERROR": logging.ERROR, +} def configure_logging( @@ -156,9 +156,10 @@ def configure_logging( fname = Path(fname) if fname.exists() and overwrite is None: warn( - f"File ({str(fname.absolute())}) exists. " + f"File ({fname.absolute()!s}) exists. " "Messages will be appended. Use overwrite=True to " - "overwrite or overwrite=False to avoid this message." + "overwrite or overwrite=False to avoid this message.", + stacklevel=2, ) overwrite = False mode = "w" if overwrite else "a" @@ -235,12 +236,11 @@ def warn_with_log( """ logger.warning(msg) - warn(msg, category=category) + warn(msg, category=category, stacklevel=2) class WrapStdOut(logging.StreamHandler): - """ - Dynamically wrap to sys.stdout. + """Dynamically wrap to sys.stdout. This makes packages that monkey-patch sys.stdout (e.g.doctest, sphinx-gallery) work properly. diff --git a/julearn/utils/testing.py b/julearn/utils/testing.py index b164532a4..223399f59 100644 --- a/julearn/utils/testing.py +++ b/julearn/utils/testing.py @@ -54,8 +54,9 @@ from julearn.utils.typing import DataLike, EstimatorLike -def compare_models( # pragma: no cover - clf1: EstimatorLike, clf2: EstimatorLike +def compare_models( # noqa: C901, pragma: no cover + clf1: EstimatorLike, + clf2: EstimatorLike, ) -> None: """Compare two models. @@ -174,14 +175,14 @@ def compare_models( # pragma: no cover def do_scoring_test( - X: List[str], + X: List[str], # noqa: N803 y: str, data: pd.DataFrame, api_params: Dict[str, Any], sklearn_model: EstimatorLike, scorers: List[str], groups: Optional[str] = None, - X_types: Optional[Dict[str, List[str]]] = None, + X_types: Optional[Dict[str, List[str]]] = None, # noqa: N803 cv: int = 5, sk_y: Optional[np.ndarray] = None, decimal: int = 5, @@ -217,7 +218,7 @@ def do_scoring_test( if sk_y is None: sk_y = data[y].values # type: ignore - params_dict = {k: v for k, v in api_params.items()} + params_dict = dict(api_params.items()) if isinstance(cv, int): jucv = KFold(n_splits=cv, random_state=42, shuffle=True) sk_cv = KFold(n_splits=cv, random_state=42, shuffle=True) @@ -274,7 +275,7 @@ def __init__(self): pass def fit( - self, X: DataLike, y: Optional[DataLike] = None + self, X: DataLike, y: Optional[DataLike] = None # noqa: N803 ) -> "PassThroughTransformer": """Fit the transformer. @@ -293,7 +294,7 @@ def fit( """ return self - def transform(self, X: DataLike) -> DataLike: + def transform(self, X: DataLike) -> DataLike: # noqa: N803 """Transform the data. Parameters @@ -316,7 +317,9 @@ def __init__(self): super().__init__() def transform( - self, X: Optional[DataLike] = None, y: Optional[DataLike] = None + self, + X: Optional[DataLike] = None, # noqa: N803 + y: Optional[DataLike] = None, ) -> Optional[DataLike]: """Transform the data. @@ -335,7 +338,9 @@ def transform( return y def fit_transform( - self, X: Optional[DataLike] = None, y: Optional[DataLike] = None + self, + X: Optional[DataLike] = None, # noqa: N803 + y: Optional[DataLike] = None, ) -> Optional[DataLike]: """Fit the model and transform the data. diff --git a/julearn/utils/tests/test_logging.py b/julearn/utils/tests/test_logging.py index b9d8ea413..c2453cc69 100644 --- a/julearn/utils/tests/test_logging.py +++ b/julearn/utils/tests/test_logging.py @@ -130,10 +130,10 @@ def test_log() -> None: def test_lib_logging() -> None: """Test logging versions.""" - import numpy as np # noqa - import pandas # noqa - import scipy # noqa - import sklearn # noqa + import numpy as np # noqa: F401 + import pandas # noqa: F401 + import scipy # noqa: F401 + import sklearn # noqa: F401 with tempfile.TemporaryDirectory() as tmp: tmpdir = Path(tmp) diff --git a/julearn/utils/tests/test_version.py b/julearn/utils/tests/test_version.py index e0740f1c2..33552080d 100644 --- a/julearn/utils/tests/test_version.py +++ b/julearn/utils/tests/test_version.py @@ -81,7 +81,7 @@ def test_joblib_args_lower_1( m.setattr( sklearn.utils.fixes, # type: ignore[attr-defined] "_joblib_parallel_args", - lambda prefer: dict(backend="threads"), + lambda prefer: {"backend": "threads"}, raising=False, ) kwargs = _joblib_parallel_args(prefer="threads") diff --git a/julearn/utils/typing.py b/julearn/utils/typing.py index 52da1c23d..2ef6baa08 100644 --- a/julearn/utils/typing.py +++ b/julearn/utils/typing.py @@ -1,6 +1,18 @@ """Protocols for type checking.""" -from typing import Any, Dict, Optional, Protocol, Union, runtime_checkable +# Authors: Federico Raimondo +# Sami Hamdan +# License: AGPL + +from typing import ( + Any, + Dict, + List, + Optional, + Protocol, + Union, + runtime_checkable, +) import numpy as np import pandas as pd @@ -20,37 +32,168 @@ @runtime_checkable class EstimatorLikeFit1(Protocol): - def fit(self, X, y, **kwargs: Any) -> "EstimatorLikeFit1": + """Class for estimator-like fit 1.""" + + def fit( + self, X: List[str], y: str, **kwargs: Any # noqa: N803 + ) -> "EstimatorLikeFit1": + """Fit estimator. + + Parameters + ---------- + X : list of str + The features to use. + y : str + The target to use. + **kwargs : dict + Extra keyword arguments. + + Returns + ------- + EstimatorLikeFit1 + The fitted estimator. + + """ return self - def get_params(self, deep=True) -> Dict: + def get_params(self, deep: bool = True) -> Dict: + """Get params. + + Parameters + ---------- + deep : bool, optional + Whether to get in a deep fashion (default True). + + Returns + ------- + dict + The parameters. + + """ return {} - def set_params(self, **params) -> "EstimatorLikeFit1": + def set_params(self, **params: Any) -> "EstimatorLikeFit1": + """Set params. + + Parameters + ---------- + **params : dict + The parameters to set. + + Returns + ------- + EstimatorLikeFit1 + Estimator with set parameters. + + """ return self @runtime_checkable class EstimatorLikeFit2(Protocol): - def fit(self, X, y) -> "EstimatorLikeFit2": + """Class for estimator-like fit 2.""" + + def fit(self, X: List[str], y: str) -> "EstimatorLikeFit2": # noqa: N803 + """Fit estimator. + + Parameters + ---------- + X : list of str + The features to use. + y : str + The target to use. + + Returns + ------- + EstimatorLikeFit2 + The fitted estimator. + + """ return self - def get_params(self, deep=True) -> Dict: + def get_params(self, deep: bool = True) -> Dict: + """Get params. + + Parameters + ---------- + deep : bool, optional + Whether to get in a deep fashion (default True). + + Returns + ------- + dict + The parameters. + + """ return {} - def set_params(self, **params) -> "EstimatorLikeFit2": + def set_params(self, **params: Any) -> "EstimatorLikeFit2": + """Set params. + + Parameters + ---------- + **params : dict + The parameters to set. + + Returns + ------- + EstimatorLikeFit2 + Estimator with set parameters. + + """ return self @runtime_checkable class EstimatorLikeFity(Protocol): - def fit(self, y) -> "EstimatorLikeFity": + """Class for estimator-like fit y.""" + + def fit(self, y: str) -> "EstimatorLikeFity": + """Fit estimator. + + Parameters + ---------- + y : str + The target to use. + + Returns + ------- + EstimatorLikeFity + The fitted estimator. + + """ return self - def get_params(self, deep=True) -> Dict: + def get_params(self, deep: bool = True) -> Dict: + """Get params. + + Parameters + ---------- + deep : bool, optional + Whether to get in a deep fashion (default True). + + Returns + ------- + dict + The parameters. + + """ return {} - def set_params(self, **params) -> "EstimatorLikeFity": + def set_params(self, **params: Any) -> "EstimatorLikeFity": + """Set params. + + Parameters + ---------- + **params : dict + The parameters to set. + + Returns + ------- + EstimatorLikeFity + Estimator with set parameters. + + """ return self @@ -59,42 +202,162 @@ def set_params(self, **params) -> "EstimatorLikeFity": @runtime_checkable class TransformerLike(EstimatorLikeFit1, Protocol): - def fit(self, X, y=None, **fit_params): + """Class for transformer-like.""" + + def fit( + self, + X: List[str], # noqa: N803 + y: Optional[str] = None, + **fit_params: Any, + ) -> None: + """Fit transformer. + + Parameters + ---------- + X : list of str + The features to use. + y : str, optional + The target to use (default None). + **fit_params : dict + Fit parameters. + + """ pass - def transform(self, X: DataLike) -> DataLike: + def transform(self, X: DataLike) -> DataLike: # noqa: N803 + """Transform. + + Parameters + ---------- + X : DataLike + The features to use. + + Returns + ------- + DataLike + The transformed data. + + """ return X def fit_transform( - self, X: DataLike, y: Optional[DataLike] = None + self, X: DataLike, y: Optional[DataLike] = None # noqa: N803 ) -> DataLike: + """Fit and transform. + + Parameters + ---------- + X : DataLike + The features to use. + y : DataLike, optional + The target to use (default None). + + Returns + ------- + DataLike + The fit and transformed object. + + """ return X @runtime_checkable class ModelLike(EstimatorLikeFit1, Protocol): + """Class for model-like.""" + classes_: np.ndarray - def predict(self, X) -> DataLike: + def predict(self, X: pd.DataFrame) -> DataLike: # noqa: N803 + """Predict using the model. + + Parameters + ---------- + X : pd.DataFrame + The data to predict on. + + Returns + ------- + DataLike + The predictions. + + """ return np.zeros(1) - def score(self, X, y, sample_weight=None) -> float: + def score( + self, + X: pd.DataFrame, # noqa: N803 + y: DataLike, + sample_weight: Optional[DataLike] = None, + ) -> float: + """Score the model. + + Parameters + ---------- + X : pd.DataFrame + The data to predict on. + y : DataLike + The true target values. + sample_weight : DataLike, optional + Sample weights to use when computing the score (default None). + + Returns + ------- + float + The score. + + """ return 0.0 @runtime_checkable class JuEstimatorLike(EstimatorLikeFit1, Protocol): + """Class for juestimator-like.""" + def get_needed_types(self) -> ColumnTypes: + """Get the column types needed by the estimator. + + Returns + ------- + ColumnTypes + The column types needed by the estimator. + + """ return ColumnTypes("placeholder") def get_apply_to(self) -> ColumnTypes: + """Get the column types the estimator applies to. + + Returns + ------- + ColumnTypes + The column types the estimator applies to. + + """ return ColumnTypes("placeholder") @runtime_checkable class JuModelLike(ModelLike, Protocol): + """Class for jumodel-like.""" + def get_needed_types(self) -> ColumnTypes: + """Get the column types needed by the estimator. + + Returns + ------- + ColumnTypes + The column types needed by the estimator. + + """ return ColumnTypes("placeholder") def get_apply_to(self) -> ColumnTypes: + """Get the column types the estimator applies to. + + Returns + ------- + ColumnTypes + The column types the estimator applies to. + + """ return ColumnTypes("placeholder") diff --git a/julearn/viz/_scores.py b/julearn/viz/_scores.py index 164b39f2a..1e8c12c01 100644 --- a/julearn/viz/_scores.py +++ b/julearn/viz/_scores.py @@ -8,27 +8,26 @@ import param from bokeh.models import ( ColumnDataSource, - FactorRange, - Whisker, DataTable, + FactorRange, + Label, ScientificFormatter, TableColumn, - Label, + Whisker, ) from bokeh.palettes import Colorblind from bokeh.plotting import figure from bokeh.transform import factor_cmap, jitter -from ..utils.checks import check_scores_df from ..stats import corrected_ttest +from ..utils.checks import check_scores_df SCORE_PLOT_TOOLS = "hover,save,pan,box_zoom,reset,wheel_zoom" class _JulearnScoresViewer(param.Parameterized): - """ - A class to visualize the scores for model comparison. + """A class to visualize the scores for model comparison. Parameters ---------- @@ -69,6 +68,7 @@ def set_data(self, scores): scores : list of pd.DataFrame DataFrames containing the scores of the models. The DataFrames must be the output of `run_cross_validation` + Returns ------- self : _JulearnScoresViewer @@ -145,7 +145,7 @@ def plot_scores(self): "x", palette=Colorblind[3], factors=self.sets, start=1, end=3 ) else: - x = [m for m in self.models] + x = list(self.models) t_df = t_df[t_df["set"] == self.sets[0]] x_values = list(t_df["model"].values) if self.sets[0] == "test": @@ -215,7 +215,7 @@ def plot_scores(self): upper = g.score.quantile(ci_upper) lower = g.score.quantile(ci_lower) source = ColumnDataSource( - data=dict(base=upper.index.values, upper=upper, lower=lower) + data={"base": upper.index.values, "upper": upper, "lower": lower} ) error = Whisker( base="base", @@ -232,11 +232,11 @@ def plot_scores(self): # Add whiskers for mean mean_score = g.score.mean() source = ColumnDataSource( - data=dict( - base=mean_score.index.values, - upper=mean_score, - lower=mean_score, - ) + data={ + "base": mean_score.index.values, + "upper": mean_score, + "lower": mean_score, + } ) mean_bar = Whisker( base="base", @@ -255,7 +255,7 @@ def plot_scores(self): if len(self.sets) > 1: grp_pad = p.x_range.group_padding span_x = [ - tuple(list(t_x) + [1 + (grp_pad - 1.0) / 2.0]) + (*list(t_x), 1 + (grp_pad - 1.0) / 2.0) for t_x in x[1 : -1 : len(self.sets)] ] else: diff --git a/pyproject.toml b/pyproject.toml index d38c5c0e8..fcaee378f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,8 +13,8 @@ readme = "README.md" requires-python = ">=3.8" license = {text = "AGPL-3.0-only"} authors = [ - {name = "Fede Raimondo", email = "f.raimondo@fz-juelich.de"}, - {name = "Sami Hamdan", email = "s.hamdan@fz-juelich.de"}, + { name = "Fede Raimondo", email = "f.raimondo@fz-juelich.de" }, + { name = "Sami Hamdan", email = "s.hamdan@fz-juelich.de" }, ] maintainers = [ {name = "Sami Hamdan", email = "s.hamdan@fz-juelich.de"}, @@ -81,16 +81,95 @@ write_to = "julearn/_version.py" [tool.black] line-length = 79 -target-version = ["py38"] +target-version = ["py38", "py39", "py310", "py311"] [tool.codespell] -skip = "*/auto_examples/*,*.html,.git/,*.pyc,*/_build/*" +skip = "*/auto_examples/*,*.html,.git/,*.pyc,*/_build/*,*/api/generated/*.examples" count = "" quiet-level = 3 ignore-words = "ignore_words.txt" interactive = 0 builtin = "clear,rare,informal,names,usage,code" +[tool.ruff] +line-length = 79 +select = [ + # flake8-bugbear + "B", + # flake8-blind-except + "BLE", + # flake8-comprehensions + "C4", + # mccabe + "C90", + # pydocstyle + "D", + # pycodestyle errors + "E", + # pyflakes + "F", + # isort + "I", + # pep8-naming + "N", + # pygrep-hooks + "PGH", + # ruff + "RUF", + # flake8-type-checking + "TCH", + # pyupgrade + "UP", + # pycodestyle warnings + "W", + # flake8-2020 + "YTT", +] +extend-exclude = [ + "__init__.py", + "docs", + "examples", +] +extend-ignore = [ + # Use of `functools.lru_cache` or `functools.cache` on methods can lead to + # memory leaks. The cache may retain instance references, preventing garbage + # collection. + "B019", + # abstract class with no abstract methods + "B024", + "D202", + # missing docstring in __init__, incompatible with numpydoc + "D107", + # use r""" if any backslashes in a docstring + "D301", + # class names should use CapWords convention + "N801", + # function name should be lowercase + "N802", + # variable in function should be lowercase + "N806", + # use specific rule codes when ignoring type issues + "PGH003", +] + +[tool.ruff.isort] +lines-after-imports = 2 +known-first-party = ["julearn"] +known-third-party =[ + "numpy", + "pandas", + "sklearn", + "statsmodels", + "bokeh", + "panel", + "param", + "deslib", + "pytest", +] + +[tool.ruff.mccabe] +max-complexity = 20 + [tool.towncrier] directory = "docs/changes/newsfragments" filename = "docs/whats_new.rst" diff --git a/tox.ini b/tox.ini index d7e0207f5..7e2607b94 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = isort, black, flake8, test, coverage, codespell, py3{8,9,10,11} +envlist = ruff, black, test, coverage, codespell, py3{8,9,10,11} isolated_build = true [gh-actions] @@ -11,9 +11,6 @@ python = [testenv] skip_install = false -# Required for git-annex -passenv = - HOME deps = pytest pytest-lazy-fixture @@ -25,12 +22,12 @@ deps = commands = pytest -[testenv:isort] +[testenv:ruff] skip_install = true deps = - isort + ruff commands = - isort --check-only --diff {toxinidir}/julearn {toxinidir}/setup.py + ruff check {toxinidir} [testenv:black] skip_install = true @@ -39,19 +36,8 @@ deps = commands = black --check --diff {toxinidir}/julearn {toxinidir}/setup.py -[testenv:flake8] -skip_install = true -deps = - flake8 - # flake8-docstrings - # flake8-bugbear -commands = - flake8 {toxinidir}/julearn {toxinidir}/setup.py - [testenv:test] skip_install = false -passenv = - HOME deps = pytest pytest-lazy-fixture @@ -89,54 +75,6 @@ commands = # Tool configs # ################ -[isort] -skip = - __init__.py -profile = black -line_length = 79 -lines_after_imports = 2 -known_first_party = julearn -known_third_party = - numpy - pandas - sklearn - pytest - bokeh - -[flake8] -exclude = - __init__.py - julearn/utils/typing.py -max-line-length = 79 -extend-ignore = - ; abstract class with no abstract methods - B024 - D202 - ; missing docstring in __init__, incompatible with numpydoc - D107 - ; use r""" if any backslashes in a docstring - D301 - ; whitespace after ‘(’ - E201 - ; whitespace before ‘)’ - E202 - ; whitespace before ‘,’, ‘;’, or ‘:’ - E203 - ; multiple spaces before operator - E221 - ; multiple spaces after operator - E222 - ; multiple spaces after ‘,’ - E241 - I100 - I101 - I201 - N806 - ; line break before binary operator - W503 - ; line break after binary operator - W504 - [coverage:paths] source = julearn