diff --git a/docs/changes/newsfragments/260.enh b/docs/changes/newsfragments/260.enh new file mode 100644 index 000000000..61395e6b2 --- /dev/null +++ b/docs/changes/newsfragments/260.enh @@ -0,0 +1 @@ +Add :class:`~skopt.BayesSearchCV` to the list of available searchers as 'bayes' by `Fede Raimondo`_ diff --git a/docs/changes/newsfragments/260.misc b/docs/changes/newsfragments/260.misc new file mode 100644 index 000000000..2fbb5b4d1 --- /dev/null +++ b/docs/changes/newsfragments/260.misc @@ -0,0 +1 @@ +Add ``all`` as optional dependencies to install all functional dependencies by `Fede Raimondo`_ \ No newline at end of file diff --git a/docs/conf.py b/docs/conf.py index 8ac7c076e..da1a0edbb 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -160,6 +160,7 @@ # "sqlalchemy": ("https://docs.sqlalchemy.org/en/20/", None), "joblib": ("https://joblib.readthedocs.io/en/latest/", None), "scipy": ("https://docs.scipy.org/doc/scipy/", None), + "skopt": ("https://scikit-optimize.readthedocs.io/en/latest", None), } diff --git a/docs/getting_started.rst b/docs/getting_started.rst index 6f4ae04f3..f4f3ac6ef 100644 --- a/docs/getting_started.rst +++ b/docs/getting_started.rst @@ -86,4 +86,8 @@ The following optional dependencies are available: * ``viz``: Visualization tools for ``julearn``. This includes the :mod:`.viz` module. -* ``deslib``: The :mod:`.dynamic` module requires the `deslib`_ package. +* ``deslib``: The :mod:`.dynamic` module requires the `deslib`_ package. This + module is not compatible with newer Python versions and it is unmaintained. +* ``skopt``: Using the ``"bayes"`` searcher (:class:`~skopt.BayesSearchCV`) + requires the `scikit-optimize`_ package. +* ``all``: Install all optional functional dependencies (except ``deslib``). diff --git a/docs/links.inc b/docs/links.inc index 1e6020fba..c4be5e9c0 100644 --- a/docs/links.inc +++ b/docs/links.inc @@ -40,3 +40,4 @@ .. _`DESlib`: https://github.com/scikit-learn-contrib/DESlib +.. _`scikit-optimize`: https://scikit-optimize.readthedocs.io/en/stable/ diff --git a/examples/03_complex_models/run_hyperparameter_tuning_bayessearch.py b/examples/03_complex_models/run_hyperparameter_tuning_bayessearch.py new file mode 100644 index 000000000..05cf4cdad --- /dev/null +++ b/examples/03_complex_models/run_hyperparameter_tuning_bayessearch.py @@ -0,0 +1,95 @@ +""" +Tuning Hyperparameters using Bayesian Search +============================================ + +This example uses the ``fmri`` dataset, performs simple binary classification +using a Support Vector Machine classifier and analyzes the model. + +References +---------- + + Waskom, M.L., Frank, M.C., Wagner, A.D. (2016). Adaptive engagement of + cognitive control in context-dependent decision-making. Cerebral Cortex. + +.. include:: ../../links.inc +""" + +# Authors: Federico Raimondo +# License: AGPL + +import numpy as np +from seaborn import load_dataset + +from julearn import run_cross_validation +from julearn.utils import configure_logging, logger +from julearn.pipeline import PipelineCreator + + +############################################################################### +# Set the logging level to info to see extra information. +configure_logging(level="INFO") + +############################################################################### +# Set the random seed to always have the same example. +np.random.seed(42) + +############################################################################### +# Load the dataset. +df_fmri = load_dataset("fmri") +df_fmri.head() + +############################################################################### +# Set the dataframe in the right format. +df_fmri = df_fmri.pivot( + index=["subject", "timepoint", "event"], columns="region", values="signal" +) + +df_fmri = df_fmri.reset_index() +df_fmri.head() + +############################################################################### +# Following the hyperparamter tuning example, we will now use a Bayesian +# search to find the best hyperparameters for the SVM model. +X = ["frontal", "parietal"] +y = "event" + +creator1 = PipelineCreator(problem_type="classification") +creator1.add("zscore") +creator1.add( + "svm", + kernel=["linear"], + C=(1e-6, 1e3, "log-uniform"), +) + +creator2 = PipelineCreator(problem_type="classification") +creator2.add("zscore") +creator2.add( + "svm", + kernel=["rbf"], + C=(1e-6, 1e3, "log-uniform"), + gamma=(1e-6, 1e1, "log-uniform"), +) + +search_params = { + "kind": "bayes", + "cv": 2, # to speed up the example + "n_iter": 10, # 10 iterations of bayesian search to speed up example +} + + +scores, estimator = run_cross_validation( + X=X, + y=y, + data=df_fmri, + model=[creator1, creator2], + cv=2, # to speed up the example + search_params=search_params, + return_estimator="final", +) + +print(scores["test_score"].mean()) + + +############################################################################### +# It seems that we might have found a better model, but which one is it? +print(estimator.best_params_) diff --git a/examples/99_docs/run_hyperparameters_docs.py b/examples/99_docs/run_hyperparameters_docs.py index 086a800aa..4b7372abb 100644 --- a/examples/99_docs/run_hyperparameters_docs.py +++ b/examples/99_docs/run_hyperparameters_docs.py @@ -243,22 +243,132 @@ # tries to find the best combination of values for the hyperparameters using # cross-validation. # -# By default, ``julearn`` uses a :class:`~sklearn.model_selection.GridSearchCV`. -# This searcher is very simple. First, it construct the "grid" of -# hyperparameters to try. As we see above, we have 3 hyperparameters to tune. -# So it constructs a 3-dimentional grid with all the possible combinations of -# the hyperparameters values. The second step is to perform cross-validation -# on each of the possible combinations of hyperparameters values. +# By default, ``julearn`` uses a +# :class:`~sklearn.model_selection.GridSearchCV`. +# This searcher, specified as ``"grid"`` is very simple. First, it constructs +# the _grid_ of hyperparameters to try. As we see above, we have 3 +# hyperparameters to tune. So it constructs a 3-dimentional grid with all the +# possible combinations of the hyperparameters values. The second step is to +# perform cross-validation on each of the possible combinations of +# hyperparameters values. # -# Another searcher that ``julearn`` provides is the -# :class:`~sklearn.model_selection.RandomizedSearchCV`. This searcher is -# similar to the :class:`~sklearn.model_selection.GridSearchCV`, but instead -# of trying all the possible combinations of hyperparameters values, it tries +# Other searchers that ``julearn`` provides are the +# :class:`~sklearn.model_selection.RandomizedSearchCV` and +# :class:`~skopt.BayesSearchCV`. +# +# The randomized searcher +# (:class:`~sklearn.model_selection.RandomizedSearchCV`) is similar to the +# :class:`~sklearn.model_selection.GridSearchCV`, but instead +# of trying all the possible combinations of hyperparameter values, it tries # a random subset of them. This is useful when we have a lot of hyperparameters -# to tune, since it can be very time consuming to try all the possible, as well -# as continuous parameters that can be sampled out of a distribution. For -# more information, see the +# to tune, since it can be very time consuming to try all the possible +# combinations, as well as continuous parameters that can be sampled out of a +# distribution. For more information, see the # :class:`~sklearn.model_selection.RandomizedSearchCV` documentation. +# +# The Bayesian searcher (:class:`~skopt.BayesSearchCV`) is a bit more +# complex. It uses Bayesian optimization to find the best hyperparameter set. +# As with the randomized search, it is useful when we have many +# hyperparameters to tune, and we don't want to try all the possible +# combinations due to computational constraints. For more information, see the +# :class:`~skopt.BayesSearchCV` documentation, including how to specify +# the prior distributions of the hyperparameters. +# +# We can specify the kind of searcher and its parametrization, by setting the +# ``search_params`` parameter in the :func:`.run_cross_validation` function. +# For example, we can use the +# :class:`~sklearn.model_selection.RandomizedSearchCV` searcher with +# 10 iterations of random search. + +search_params = { + "kind": "random", + "n_iter": 10, +} + +scores_tuned, model_tuned = run_cross_validation( + X=X, + y=y, + data=df, + X_types=X_types, + model=creator, + return_estimator="all", + search_params=search_params, +) + +print( + "Scores with best hyperparameter using 10 iterations of " + f"randomized search: {scores_tuned['test_score'].mean()}" +) +pprint(model_tuned.best_params_) + +############################################################################### +# We can now see that the best hyperparameter might be different from the grid +# search. This is because it tried only 10 combinations and not the whole grid. +# Furthermore, the :class:`~sklearn.model_selection.RandomizedSearchCV` +# searcher can sample hyperparameters from distributions, which can be useful +# when we have continuous hyperparameters. +# Let's set both ``C`` and ``gamma`` to be sampled from log-uniform +# distributions. We can do this by setting the hyperparameter values as a +# tuple with the following format: ``(low, high, distribution)``. The +# distribution can be either ``"log-uniform"`` or ``"uniform"``. + +creator = PipelineCreator(problem_type="classification") +creator.add("zscore") +creator.add("select_k", k=[2, 3, 4]) +creator.add( + "svm", + C=(0.01, 10, "log-uniform"), + gamma=(1e-3, 1e-1, "log-uniform"), +) + +print(creator) + +scores_tuned, model_tuned = run_cross_validation( + X=X, + y=y, + data=df, + X_types=X_types, + model=creator, + return_estimator="all", + search_params=search_params, +) + +print( + "Scores with best hyperparameter using 10 iterations of " + f"randomized search: {scores_tuned['test_score'].mean()}" +) +pprint(model_tuned.best_params_) + + +############################################################################### +# We can also control the number of cross-validation folds used by the searcher +# by setting the ``cv`` parameter in the ``search_params`` dictionary. For +# example, we can use a bayesian search with 3 folds. Fortunately, the +# :class:`~skopt.BayesSearchCV` searcher also accepts distributions for the +# hyperparameters. + +search_params = { + "kind": "bayes", + "n_iter": 10, + "cv": 3, +} + +scores_tuned, model_tuned = run_cross_validation( + X=X, + y=y, + data=df, + X_types=X_types, + model=creator, + return_estimator="all", + search_params=search_params, +) + +print( + "Scores with best hyperparameter using 10 iterations of " + f"bayesian search and 3-fold CV: {scores_tuned['test_score'].mean()}" +) +pprint(model_tuned.best_params_) + ############################################################################### # diff --git a/julearn/api.py b/julearn/api.py index 0ffb786d5..b89b42ba7 100644 --- a/julearn/api.py +++ b/julearn/api.py @@ -4,12 +4,16 @@ # Sami Hamdan # License: AGPL -from typing import Dict, List, Optional, Union +from typing import Dict, Iterable, List, Optional, Union import numpy as np import pandas as pd from sklearn.base import BaseEstimator -from sklearn.model_selection import check_cv, cross_validate +from sklearn.model_selection import ( + BaseCrossValidator, + check_cv, + cross_validate, +) from sklearn.model_selection._search import BaseSearchCV from sklearn.pipeline import Pipeline @@ -25,14 +29,14 @@ def run_cross_validation( # noqa: C901 X: List[str], # noqa: N803 y: str, model: Union[str, PipelineCreator, BaseEstimator, List[PipelineCreator]], + data: pd.DataFrame, X_types: Optional[Dict] = None, # noqa: N803 - data: Optional[pd.DataFrame] = None, problem_type: Optional[str] = None, preprocess: Union[None, str, List[str]] = None, return_estimator: Optional[str] = None, return_inspector: bool = False, return_train_score: bool = False, - cv: Optional[int] = None, + cv: Optional[Union[int, BaseCrossValidator, Iterable]] = None, groups: Optional[str] = None, scoring: Union[str, List[str], None] = None, pos_labels: Union[str, List[str], None] = None, @@ -54,12 +58,11 @@ def run_cross_validation( # noqa: C901 See :ref:`data_usage` for details. model : str or scikit-learn compatible model. If string, it will use one of the available models. + data : pandas.DataFrame + DataFrame with the data. See :ref:`data_usage` for details. X_types : dict[str, list of str] A dictionary containing keys with column type as a str and the columns of this column type as a list of str. - data : pandas.DataFrame | None - DataFrame with the data (optional). - See :ref:`data_usage` for details. problem_type : str The kind of problem to model. @@ -132,8 +135,8 @@ def run_cross_validation( # noqa: C901 the following keys: * 'kind': The kind of search algorithm to use, e.g.: - 'grid' or 'random'. Can be any valid julearn searcher name or - scikit-learn compatible searcher. + 'grid', 'random' or 'bayes'. Can be any valid julearn searcher name + or scikit-learn compatible searcher. * 'cv': If a searcher is going to be used, the cross-validation splitting strategy to use. Defaults to same CV as for the model evaluation. @@ -196,7 +199,7 @@ def run_cross_validation( # noqa: C901 np.random.seed(seed) # Interpret the input data and prepare it to be used with the library - df_X, y, df_groups, X_types = prepare_input_data( + df_X, df_y, df_groups, X_types = prepare_input_data( X=X, y=y, df=data, @@ -267,7 +270,7 @@ def run_cross_validation( # noqa: C901 if has_target_transformer: if isinstance(pipeline, BaseSearchCV): - last_step = pipeline.estimator[-1] + last_step = pipeline.estimator[-1] # type: ignore else: last_step = pipeline[-1] if not last_step.can_inverse_transform(): @@ -313,7 +316,7 @@ def run_cross_validation( # noqa: C901 "Cannot use model_params with a model object. Use either " "a string or a PipelineCreator" ) - pipeline_creator.add(step=model, **t_params) + pipeline_creator.add(step=model, **t_params) # type: ignore # Check for extra model_params that are not used unused_params = [] @@ -346,17 +349,19 @@ def run_cross_validation( # noqa: C901 logger.info("") if problem_type == "classification": - logger.info(f"\tNumber of classes: {len(np.unique(y))}") - logger.info(f"\tTarget type: {y.dtype}") - logger.info(f"\tClass distributions: {y.value_counts()}") + logger.info(f"\tNumber of classes: {len(np.unique(df_y))}") + logger.info(f"\tTarget type: {df_y.dtype}") + logger.info(f"\tClass distributions: {df_y.value_counts()}") elif problem_type == "regression": - logger.info(f"\tTarget type: {y.dtype}") + logger.info(f"\tTarget type: {df_y.dtype}") # Prepare cross validation - cv_outer = check_cv(cv, classifier=problem_type == "classification") + cv_outer = check_cv( + cv, classifier=problem_type == "classification" # type: ignore + ) logger.info(f"Using outer CV scheme {cv_outer}") - check_consistency(y, cv, groups, problem_type) + check_consistency(df_y, cv, groups, problem_type) # type: ignore cv_return_estimator = return_estimator in ["cv", "all"] scoring = check_scoring(pipeline, scoring, wrap_score=wrap_score) @@ -369,14 +374,14 @@ def run_cross_validation( # noqa: C901 scores = cross_validate( pipeline, df_X, - y, + df_y, cv=cv_outer, scoring=scoring, groups=df_groups, return_estimator=cv_return_estimator, n_jobs=n_jobs, return_train_score=return_train_score, - verbose=verbose, + verbose=verbose, # type: ignore fit_params=fit_params, ) @@ -387,7 +392,10 @@ def run_cross_validation( # noqa: C901 folds = np.tile(np.arange(n_folds), n_repeats) fold_sizes = np.array( - [list(map(len, x)) for x in cv_outer.split(df_X, y, groups=df_groups)] + [ + list(map(len, x)) + for x in cv_outer.split(df_X, df_y, groups=df_groups) + ] ) scores["n_train"] = fold_sizes[:, 0] scores["n_test"] = fold_sizes[:, 1] @@ -398,7 +406,8 @@ def run_cross_validation( # noqa: C901 scores_df = pd.DataFrame(scores) out = scores_df if return_estimator in ["final", "all"]: - pipeline.fit(df_X, y, **fit_params) + logger.info("Fitting final model") + pipeline.fit(df_X, df_y, **fit_params) out = scores_df, pipeline if return_inspector: @@ -406,7 +415,7 @@ def run_cross_validation( # noqa: C901 scores=scores_df, model=pipeline, X=df_X, - y=y, + y=df_y, groups=df_groups, cv=cv_outer, ) diff --git a/julearn/conftest.py b/julearn/conftest.py index a331f6388..9da707fc4 100644 --- a/julearn/conftest.py +++ b/julearn/conftest.py @@ -8,10 +8,77 @@ from typing import Callable, Dict, List, Optional, Union import pandas as pd -from pytest import FixtureRequest, fixture +import pytest +from pytest import FixtureRequest, fixture, mark from seaborn import load_dataset +_filter_keys = { + "nodeps": "Test that runs without conditional dependencies only", +} + + +def pytest_configure(config: pytest.Config) -> None: + """Add a new marker to pytest. + + Parameters + ---------- + config : pytest.Config + The pytest configuration object. + + """ + # register your new marker to avoid warnings + for k, v in _filter_keys.items(): + config.addinivalue_line("markers", f"{k}: {v}") + + +def pytest_addoption(parser: pytest.Parser) -> None: + """Add a new filter option to pytest. + + Parameters + ---------- + parser : pytest.Parser + The pytest parser object. + + """ + # add your new filter option (you can name it whatever you want) + parser.addoption( + "--filter", + action="store", + help="Select tests based on markers.", + ) + + +def pytest_collection_modifyitems( + config: pytest.Config, items: List[pytest.Item] +) -> None: + """Filter tests based on the key marker. + + Parameters + ---------- + config : pytest.Config + The pytest configuration object. + items : list + The list of items. + + """ + filter = config.getoption("--filter", None) # type: ignore + if filter is None: + for k in _filter_keys.keys(): + skip_keys = mark.skip( + reason=f"Filter not specified for this test: {k}" + ) + for item in items: + if k in item.keywords: + item.add_marker(skip_keys) # skip the test + else: + new_items = [] + for item in items: + if filter in item.keywords: + new_items.append(item) + items[:] = new_items + + @fixture(scope="function") def df_typed_iris() -> pd.DataFrame: """Return a typed iris dataset. @@ -191,6 +258,32 @@ def search_params(request: FixtureRequest) -> Optional[Dict]: A dictionary with the search_params argument. """ + + return request.param + + +@fixture( + params=[ + {"kind": "bayes", "n_iter": 2, "cv": 3}, + {"kind": "bayes", "n_iter": 2}, + ], + scope="function", +) +def bayes_search_params(request: FixtureRequest) -> Optional[Dict]: + """Return different search_params argument for BayesSearchCV. + + Parameters + ---------- + request : pytest.FixtureRequest + The request object. + + Returns + ------- + dict or None + A dictionary with the search_params argument. + + """ + return request.param @@ -234,6 +327,46 @@ def get(step: str) -> Dict: return get +_tuning_distributions = { + "zscore": {"with_mean": [True, False]}, + "pca": {"n_components": (0.2, 0.7, "uniform")}, + "select_univariate": {"mode": ["k_best", "percentile"]}, + "rf": {"n_estimators": [2, 5]}, + "svm": {"C": (1, 10, "log-uniform")}, + "ridge": {"alpha": (1, 3, "uniform")}, +} + + +@fixture(scope="function") +def get_tuning_distributions() -> Callable: + """Return a function that returns the distributions to tune. + + Returns + ------- + get : callable + A function that returns the distributions to tune for a given step. + + """ + + def get(step: str) -> Dict: + """Return the distributions to tune for a given step. + + Parameters + ---------- + step : str + The name of the step. + + Returns + ------- + dict + The distributions to tune for the given step. + + """ + return copy(_tuning_distributions.get(step, {})) + + return get + + @fixture( params=[ "zscore", diff --git a/julearn/inspect/inspector.py b/julearn/inspect/inspector.py index ccb80388f..3421eff1f 100644 --- a/julearn/inspect/inspector.py +++ b/julearn/inspect/inspector.py @@ -6,13 +6,16 @@ from typing import TYPE_CHECKING, List, Optional, Union +import pandas as pd +from sklearn.model_selection import BaseCrossValidator + from ..utils.logging import raise_error from ._cv import FoldsInspector from ._pipeline import PipelineInspector if TYPE_CHECKING: - import pandas as pd + from sklearn.base import BaseEstimator from ..pipeline.pipeline_creator import PipelineCreator @@ -48,10 +51,10 @@ def __init__( "BaseEstimator", None, ] = None, - X: Optional[List[str]] = None, # noqa: N803 - y: Optional[str] = None, - groups: Optional[str] = None, - cv: Optional[int] = None, + X: Optional[pd.DataFrame] = None, # noqa: N803 + y: Optional[pd.Series] = None, + groups: Optional[pd.Series] = None, + cv: Optional[Union[int, BaseCrossValidator]] = None, ) -> None: self._scores = scores self._model = model diff --git a/julearn/inspect/tests/test_pipeline.py b/julearn/inspect/tests/test_pipeline.py index 54cf36773..afbe631c4 100644 --- a/julearn/inspect/tests/test_pipeline.py +++ b/julearn/inspect/tests/test_pipeline.py @@ -21,7 +21,7 @@ import pandas as pd -class TestEst(BaseEstimator): +class MockTestEst(BaseEstimator): """Class for estimator tests. Parameters @@ -42,7 +42,7 @@ def fit( X: List[str], # noqa: N803 y: Optional[str] = None, **fit_params: Any, - ) -> "TestEst": + ) -> "MockTestEst": """Fit the estimator. Parameters @@ -56,7 +56,7 @@ def fit( Returns ------- - TestEst + MockTestEst The fitted estimator. """ @@ -155,9 +155,9 @@ def test_steps( @pytest.mark.parametrize( "est,fitted_params", [ - [TestEst(), {"param_0_": 0, "param_1_": 1}], + [MockTestEst(), {"param_0_": 0, "param_1_": 1}], [ - JuColumnTransformer("test", TestEst(), "continuous"), + JuColumnTransformer("test", MockTestEst(), "continuous"), {"param_0_": 0, "param_1_": 1}, ], ], @@ -201,7 +201,7 @@ def test_inspect_pipeline(df_iris: "pd.DataFrame") -> None: pipe = ( PipelineCreator(problem_type="classification") - .add(JuColumnTransformer("test", TestEst(), "continuous")) + .add(JuColumnTransformer("test", MockTestEst(), "continuous")) .add(SVC()) .to_pipeline() ) @@ -230,7 +230,7 @@ def test_get_estimator(df_iris: "pd.DataFrame") -> None: """ pipe = ( PipelineCreator(problem_type="classification") - .add(JuColumnTransformer("test", TestEst(), "continuous")) + .add(JuColumnTransformer("test", MockTestEst(), "continuous")) .add(SVC()) .to_pipeline() ) diff --git a/julearn/model_selection/__init__.py b/julearn/model_selection/__init__.py index 83f410c43..2e1eab839 100644 --- a/julearn/model_selection/__init__.py +++ b/julearn/model_selection/__init__.py @@ -15,3 +15,7 @@ register_searcher, reset_searcher_register, ) + +from ._skopt_searcher import register_bayes_searcher +register_bayes_searcher() + diff --git a/julearn/model_selection/_skopt_searcher.py b/julearn/model_selection/_skopt_searcher.py new file mode 100644 index 000000000..e0904b10c --- /dev/null +++ b/julearn/model_selection/_skopt_searcher.py @@ -0,0 +1,32 @@ +"""Module for registering the BayesSearchCV class from scikit-optimize.""" + +# Authors: Federico Raimondo +# License: AGPL + +from .available_searchers import _recreate_reset_copy, register_searcher + + +try: + from skopt import BayesSearchCV +except ImportError: + from sklearn.model_selection._search import BaseSearchCV + + class BayesSearchCV(BaseSearchCV): + """Dummy class for BayesSearchCV that raises ImportError. + + This class is used to raise an ImportError when BayesSearchCV is + requested but scikit-optimize is not installed. + + """ + + def __init__(*args, **kwargs): + raise ImportError( + "BayesSearchCV requires scikit-optimize to be installed." + ) + + +def register_bayes_searcher(): + register_searcher("bayes", BayesSearchCV, "search_spaces") + + # Update the "reset copy" of available searchers + _recreate_reset_copy() diff --git a/julearn/model_selection/available_searchers.py b/julearn/model_selection/available_searchers.py index f3386db78..499ba2edc 100644 --- a/julearn/model_selection/available_searchers.py +++ b/julearn/model_selection/available_searchers.py @@ -5,19 +5,35 @@ # License: AGPL from copy import deepcopy -from typing import List, Optional +from typing import List, Optional, Type, Union from sklearn.model_selection import GridSearchCV, RandomizedSearchCV from julearn.utils.logging import logger, raise_error, warn_with_log -_available_searchers = {"grid": GridSearchCV, "random": RandomizedSearchCV} +_available_searchers = { + "grid": { + "class": GridSearchCV, + "params_attr": "param_grid", + }, + "random": { + "class": RandomizedSearchCV, + "params_attr": "param_distributions", + }, +} + # Keep a copy for reset _available_searchers_reset = deepcopy(_available_searchers) +def _recreate_reset_copy() -> None: + """Recreate the reset copy of available searchers.""" + global _available_searchers_reset + _available_searchers_reset = deepcopy(_available_searchers) + + def list_searchers() -> List[str]: """List all available searching algorithms. @@ -54,12 +70,15 @@ def get_searcher(name: str) -> object: f"The specified searcher ({name}) is not available. " f"Valid options are: {list(_available_searchers.keys())}" ) - out = _available_searchers[name] + out = _available_searchers[name]["class"] return out def register_searcher( - searcher_name: str, searcher: object, overwrite: Optional[bool] = None + searcher_name: str, + searcher: object, + params_attr: str, + overwrite: Optional[bool] = None, ) -> None: """Register searcher to julearn. @@ -73,6 +92,9 @@ def register_searcher( Name by which the searcher will be referenced by. searcher : obj The searcher class by which the searcher can be initialized. + params_attr : str + The name of the attribute that holds the hyperparameter space to + search. overwrite : bool | None, optional decides whether overwrite should be allowed, by default None. Options are: @@ -104,10 +126,49 @@ def register_searcher( "overwrite existing searchers." ) logger.info(f"Registering new searcher: {searcher_name}") - _available_searchers[searcher_name] = searcher + _available_searchers[searcher_name] = { + "class": searcher, + "params_attr": params_attr, + } def reset_searcher_register() -> None: """Reset the searcher register to its initial state.""" global _available_searchers _available_searchers = deepcopy(_available_searchers_reset) + + +def get_searcher_params_attr(searcher: Union[str, Type]) -> Optional[str]: + """Get the name of the attribute that holds the search space. + + Parameters + ---------- + searcher: + The searcher class or name. + + Returns + ------- + str or None + The name of the attribute that holds the search space. If the searcher + did not register any search space attribute, None is returned. + + Raises + ------ + ValueError + If the specified searcher is not available. + + """ + out = None + if isinstance(searcher, str): + if searcher not in _available_searchers: + raise_error( + f"The specified searcher ({searcher}) is not available. " + f"Valid options are: {list(_available_searchers.keys())}" + ) + out = _available_searchers[searcher]["params_attr"] + else: + for _, v in _available_searchers.items(): + if searcher == v["class"]: + out = v["params_attr"] + + return out diff --git a/julearn/model_selection/tests/test_available_searchers.py b/julearn/model_selection/tests/test_available_searchers.py index 65bd71532..f88348867 100644 --- a/julearn/model_selection/tests/test_available_searchers.py +++ b/julearn/model_selection/tests/test_available_searchers.py @@ -5,40 +5,79 @@ # License: AGPL import pytest -from sklearn.model_selection import GridSearchCV +from sklearn.model_selection import GridSearchCV, RandomizedSearchCV from julearn.model_selection import ( get_searcher, register_searcher, reset_searcher_register, ) +from julearn.model_selection.available_searchers import ( + get_searcher_params_attr, +) def test_register_searcher() -> None: """Test registering a searcher.""" with pytest.raises(ValueError, match="The specified searcher "): get_searcher("custom_grid") - register_searcher("custom_grid", GridSearchCV) + register_searcher("custom_grid", GridSearchCV, "param_grid") assert get_searcher("custom_grid") == GridSearchCV with pytest.warns( RuntimeWarning, match="searcher named custom_grid already exists." ): - register_searcher("custom_grid", GridSearchCV) + register_searcher("custom_grid", GridSearchCV, "param_grid") - register_searcher("custom_grid", GridSearchCV, overwrite=True) + register_searcher( + "custom_grid", GridSearchCV, "param_grid", overwrite=True + ) with pytest.raises( ValueError, match="searcher named custom_grid already exists and " ): - register_searcher("custom_grid", GridSearchCV, overwrite=False) + register_searcher( + "custom_grid", GridSearchCV, "param_grid", overwrite=False + ) reset_searcher_register() def test_reset_searcher() -> None: """Test resetting the searcher registry.""" - register_searcher("custom_grid", GridSearchCV) + register_searcher("custom_grid", GridSearchCV, "param_grid") get_searcher("custom_grid") reset_searcher_register() with pytest.raises(ValueError, match="The specified searcher "): get_searcher("custom_grid") + + +def test_get_searcher() -> None: + """Test getting a searcher.""" + out = get_searcher("grid") + assert out == GridSearchCV + + out = get_searcher("random") + assert out == RandomizedSearchCV + + out = get_searcher("bayes") + assert out.__name__ == "BayesSearchCV" + + +def test_get_searcher_params_attr() -> None: + """Test getting the params attribute of a searcher.""" + out = get_searcher_params_attr("grid") + assert out == "param_grid" + + out = get_searcher_params_attr("random") + assert out == "param_distributions" + + out = get_searcher_params_attr("bayes") + assert out == "search_spaces" + + +@pytest.mark.nodeps +def test_get_searchers_noskopt() -> None: + """Test getting a searcher without skopt.""" + out = get_searcher("bayes") + with pytest.raises(ImportError, match="BayesSearchCV requires"): + out() # type: ignore diff --git a/julearn/pipeline/merger.py b/julearn/pipeline/merger.py index 2f86e2e9e..1ac4bdc04 100644 --- a/julearn/pipeline/merger.py +++ b/julearn/pipeline/merger.py @@ -5,16 +5,20 @@ from typing import Dict -from sklearn.model_selection import GridSearchCV, RandomizedSearchCV +from sklearn.model_selection._search import BaseSearchCV from sklearn.pipeline import Pipeline +from ..model_selection.available_searchers import ( + get_searcher, + get_searcher_params_attr, +) from ..prepare import prepare_search_params from ..utils.logging import raise_error from ..utils.typing import EstimatorLike from .pipeline_creator import _prepare_hyperparameter_tuning -def merge_pipelines( # noqa: C901 +def merge_pipelines( *pipelines: EstimatorLike, search_params: Dict ) -> Pipeline: """Merge multiple pipelines into a single one. @@ -37,41 +41,35 @@ def merge_pipelines( # noqa: C901 # searcher, they are all of the same kind and match the search params. search_params = prepare_search_params(search_params) - + t_searcher = get_searcher(search_params["kind"]) for p in pipelines: - if not isinstance(p, (Pipeline, GridSearchCV, RandomizedSearchCV)): + if not isinstance(p, (Pipeline, BaseSearchCV)): raise_error( "Only pipelines and searchers are supported. " f"Found {type(p)} instead." ) - if isinstance(p, GridSearchCV): - if search_params["kind"] != "grid": - raise_error( - "At least one of the pipelines to merge is a " - "GridSearchCV, but the search params do not specify a " - "grid search. These pipelines cannot be merged." - ) - elif isinstance(p, RandomizedSearchCV): - if search_params["kind"] != "random": + + if isinstance(p, BaseSearchCV): + if not isinstance(p, t_searcher): # type: ignore raise_error( - "At least one of the pipelines to merge is a " - "RandomizedSearchCV, but the search params do not specify " - "a random search. These pipelines cannot be merged." + "One of the pipelines to merge is a " + f"{p.__class__.__name__}, but the search params specify a " + f"{search_params['kind']} search. " + "These pipelines cannot be merged." ) - # Check that all estimators have the same named steps in their pipelines. reference_pipeline = pipelines[0] - if isinstance(reference_pipeline, (GridSearchCV, RandomizedSearchCV)): - reference_pipeline = reference_pipeline.estimator + if isinstance(reference_pipeline, BaseSearchCV): + reference_pipeline = reference_pipeline.estimator # type: ignore - step_names = reference_pipeline.named_steps.keys() + step_names = reference_pipeline.named_steps.keys() # type: ignore for p in pipelines: - if isinstance(p, (GridSearchCV, RandomizedSearchCV)): - p = p.estimator + if isinstance(p, BaseSearchCV): + p = p.estimator # type: ignore if not isinstance(p, Pipeline): raise_error("All searchers must use a pipeline.") - if step_names != p.named_steps.keys(): + if step_names != p.named_steps.keys(): # type: ignore raise_error("All pipelines must have the same named steps.") # The idea behind the merge is to create a list of parameter @@ -85,17 +83,20 @@ def merge_pipelines( # noqa: C901 different_steps = [] for t_step_name in step_names: # Get the transformer/model of the first element - t = reference_pipeline.named_steps[t_step_name] + t = reference_pipeline.named_steps[t_step_name] # type: ignore # Check that all searchers have the same transformer/model. # TODO: Fix this comparison, as it always returns False. for s in pipelines[1:]: - if isinstance(s, (GridSearchCV, RandomizedSearchCV)): - if s.estimator.named_steps[t_step_name] != t: + if isinstance(s, BaseSearchCV): + if ( + s.estimator.named_steps[t_step_name] # type: ignore + != t + ): different_steps.append(t_step_name) break else: - if s.named_steps[t_step_name] != t: + if s.named_steps[t_step_name] != t: # type: ignore different_steps.append(t_step_name) break @@ -103,21 +104,29 @@ def merge_pipelines( # noqa: C901 # transformer/model. all_grids = [] for s in pipelines: - if isinstance(s, GridSearchCV): - t_grid = s.param_grid.copy() - elif isinstance(s, RandomizedSearchCV): - t_grid = s.param_distributions.copy() + if isinstance(s, BaseSearchCV): + params_attr = get_searcher_params_attr(s.__class__) + if params_attr is None: + raise_error( + f"Searcher {s.__class__.__name__} is not registered " + "in the searcher registry. Merging of these kinds of " + "searchers is not supported. If you register the " + "searcher, you can merge it." + ) + t_grid = getattr(s, params_attr).copy() else: t_grid = {} for t_name in different_steps: - if isinstance(s, (GridSearchCV, RandomizedSearchCV)): - t_grid[t_name] = [s.estimator.named_steps[t_name]] + if isinstance(s, BaseSearchCV): + t_grid[t_name] = [ + s.estimator.named_steps[t_name] # type: ignore + ] else: - t_grid[t_name] = [s.named_steps[t_name]] + t_grid[t_name] = [s.named_steps[t_name]] # type: ignore all_grids.append(t_grid) # Finally, we will concatenate the grids and create a new searcher. new_searcher = _prepare_hyperparameter_tuning( - all_grids, search_params, reference_pipeline + all_grids, search_params, reference_pipeline # type: ignore ) return new_searcher diff --git a/julearn/pipeline/pipeline_creator.py b/julearn/pipeline/pipeline_creator.py index 4794f2df0..77f54189e 100644 --- a/julearn/pipeline/pipeline_creator.py +++ b/julearn/pipeline/pipeline_creator.py @@ -8,7 +8,8 @@ from typing import Any, Dict, List, Optional, Tuple, Union import numpy as np -from sklearn.model_selection import check_cv +from scipy import stats +from sklearn.model_selection import RandomizedSearchCV, check_cv from sklearn.pipeline import Pipeline from ..base import ColumnTypes, ColumnTypesLike, JuTransformer, WrapModel @@ -251,6 +252,10 @@ def add( else: logger.info(f"Setting hyperparameter {param} = {vals[0]}") params_to_set[param] = vals[0] + elif hasattr(vals, "rvs"): + # If it is a distribution, we will tune it. + logger.info(f"Tuning hyperparameter {param} = {vals}") + params_to_tune[param] = vals else: logger.info(f"Setting hyperparameter {param} = {vals}") params_to_set[param] = vals @@ -701,7 +706,8 @@ def _validate_step( raise_error(f"Cannot add a {step}. I don't know what it is.") def _check_X_types( - self, X_types: Optional[Dict] = None # noqa: N803 + self, + X_types: Optional[Dict] = None, # noqa: N803 ) -> Dict[str, List[str]]: """Check the X_types against the pipeline creator settings. @@ -785,7 +791,9 @@ def _is_model_step(step: Union[EstimatorLike, str]) -> bool: """Check if a step is a model.""" if step in list_models(): return True - if hasattr(step, "fit") and hasattr(step, "predict"): + if hasattr(step, "fit") and ( + hasattr(step, "predict") or hasattr(step, "score") + ): return True return False @@ -855,6 +863,41 @@ def _get_estimator_from( ) +def _prepare_hyperparameters_distributions( + params_to_tune: Dict[str, Any], +) -> Dict[str, Any]: + """Prepare hyperparameters distributions for RandomizedSearchCV. + + This method replaces tuples with distributions for RandomizedSearchCV + following the skopt convention. That is, if a parameter is a tuple + with 3 elements, the first two elements are the bounds of the + distribution and the third element is the type of distribution. + + Parameters + ---------- + params_to_tune : dict + The parameters to tune. + + Returns + ------- + dict + The modified parameters to tune. + + """ + mod_params_to_tune = {} + for k, v in params_to_tune.items(): + if isinstance(v, tuple) and len(v) == 3: + if v[2] == "uniform": + mod_params_to_tune[k] = stats.uniform(v[0], v[1]) + elif v[2] in ("loguniform", "log-uniform"): + mod_params_to_tune[k] = stats.loguniform(v[0], v[1]) + else: + mod_params_to_tune[k] = v + else: + mod_params_to_tune[k] = v + return mod_params_to_tune + + def _prepare_hyperparameter_tuning( params_to_tune: Union[Dict[str, Any], List[Dict[str, Any]]], search_params: Optional[Dict[str, Any]], @@ -876,7 +919,8 @@ def _prepare_hyperparameter_tuning( The parameters for the search. The following keys are accepted: * 'kind': The kind of search algorithm to use e.g.: - 'grid' or 'random'. All valid julearn searchers can be entered. + 'grid', 'random' or 'bayes'. All valid julearn searchers can be + entered. * 'cv': If search is going to be used, the cross-validation splitting strategy to use. Defaults to same CV as for the model evaluation. @@ -929,6 +973,20 @@ def _prepare_hyperparameter_tuning( for k, v in t_params.items(): logger.info(f"\t\t{k}: {v}") + if search == RandomizedSearchCV: + # If we are using RandomizedSearchCV, we can adopt the + # skopt convention of using a 3-element tuple to define + # the distributions. + if isinstance(params_to_tune, dict): + params_to_tune = _prepare_hyperparameters_distributions( + params_to_tune + ) + else: + params_to_tune = [ + _prepare_hyperparameters_distributions(p) + for p in params_to_tune + ] + cv_inner = check_cv(cv_inner) # type: ignore logger.info(f"Using inner CV scheme {cv_inner}") search_params["cv"] = cv_inner diff --git a/julearn/pipeline/test/test_merger.py b/julearn/pipeline/tests/test_merger.py similarity index 89% rename from julearn/pipeline/test/test_merger.py rename to julearn/pipeline/tests/test_merger.py index ebf6b997d..96caaca17 100644 --- a/julearn/pipeline/test/test_merger.py +++ b/julearn/pipeline/tests/test_merger.py @@ -74,7 +74,7 @@ def test_merger_errors() -> None: with pytest.raises( ValueError, - match="At least one of the pipelines to merge is a GridSearchCV", + match="One of the pipelines to merge is a GridSearchCV", ): merge_pipelines(pipe1, pipe2, search_params=search_params) @@ -83,7 +83,7 @@ def test_merger_errors() -> None: with pytest.raises( ValueError, - match="one of the pipelines to merge is a RandomizedSearchCV", + match="One of the pipelines to merge is a RandomizedSearchCV", ): merge_pipelines(pipe1, pipe2, search_params=search_params) @@ -104,3 +104,12 @@ def test_merger_errors() -> None: match="must have the same named steps.", ): merge_pipelines(pipe1, pipe4, search_params=None) + + search_params = {"kind": "grid"} + pipe5 = creator2.to_pipeline(search_params={"kind": "bayes"}) + + with pytest.raises( + ValueError, + match="One of the pipelines to merge is a BayesSearchCV", + ): + merge_pipelines(pipe1, pipe5, search_params=search_params) diff --git a/julearn/pipeline/test/test_pipeline_creator.py b/julearn/pipeline/tests/test_pipeline_creator.py similarity index 75% rename from julearn/pipeline/test/test_pipeline_creator.py rename to julearn/pipeline/tests/test_pipeline_creator.py index f0918aa17..e431a6264 100644 --- a/julearn/pipeline/test/test_pipeline_creator.py +++ b/julearn/pipeline/tests/test_pipeline_creator.py @@ -5,7 +5,7 @@ # License: AGPL import warnings -from typing import Callable, Dict, List, Union +from typing import TYPE_CHECKING, Callable, Dict, List, Tuple, Union import pandas as pd import pytest @@ -22,6 +22,10 @@ from julearn.transformers import get_transformer +if TYPE_CHECKING: + from sklearn.pipeline import Pipeline + + def test_construction_working( model: str, preprocess: Union[str, List[str]], problem_type: str ) -> None: @@ -100,14 +104,14 @@ def test_fit_and_transform_no_error( pipeline[:-1].transform(X_iris) -def test_hyperparameter_tuning( +def _hyperparam_tuning_base_test( X_types_iris: Dict[str, List[str]], # noqa: N803 model: str, preprocess: Union[str, List[str]], problem_type: str, get_tuning_params: Callable, search_params: Dict[str, List], -) -> None: +) -> Tuple["Pipeline", Dict]: """Test that the pipeline hyperparameter tuning works as expected. Parameters @@ -125,6 +129,14 @@ def test_hyperparameter_tuning( search_params : dict of str and list The parameters for the search. + Returns + ------- + pipeline : Pipeline + The pipeline created. + param_grid : dict + The parameter grid for the search, using scikit-learn's + convention. + """ if isinstance(preprocess, str): preprocess = [preprocess] @@ -155,15 +167,218 @@ def test_hyperparameter_tuning( X_types=X_types_iris, search_params=search_params ) + return pipeline, param_grid + + +def test_hyperparameter_tuning( + X_types_iris: Dict[str, List[str]], # noqa: N803 + model: str, + preprocess: Union[str, List[str]], + problem_type: str, + get_tuning_params: Callable, + search_params: Dict[str, List], +) -> None: + """Test that the pipeline hyperparameter tuning works as expected. + + Parameters + ---------- + X_types_iris : dict + The iris dataset features types. + model : str + The model to test. + preprocess : str or list of str + The preprocessing steps to test. + problem_type : str + The problem type to test. + get_tuning_params : Callable + A function that returns the tuning hyperparameters for a given step. + search_params : dict of str and list + The parameters for the search. + + + """ + + pipeline, param_grid = _hyperparam_tuning_base_test( + X_types_iris, + model, + preprocess, + problem_type, + get_tuning_params, + search_params, + ) kind = "grid" if search_params is not None: kind = search_params.get("kind", "grid") + if kind == "grid": assert isinstance(pipeline, GridSearchCV) - assert pipeline.param_grid == param_grid + assert pipeline.param_grid == param_grid # type: ignore else: + assert kind == "random" assert isinstance(pipeline, RandomizedSearchCV) - assert pipeline.param_distributions == param_grid + assert pipeline.param_distributions == param_grid # type: ignore + + +def test_hyperparameter_tuning_bayes( + X_types_iris: Dict[str, List[str]], # noqa: N803 + model: str, + preprocess: Union[str, List[str]], + problem_type: str, + get_tuning_params: Callable, + bayes_search_params: Dict[str, List], +) -> None: + """Test that the pipeline hyperparameter tuning works as expected. + + Parameters + ---------- + X_types_iris : dict + The iris dataset features types. + model : str + The model to test. + preprocess : str or list of str + The preprocessing steps to test. + problem_type : str + The problem type to test. + get_tuning_params : Callable + A function that returns the tuning hyperparameters for a given step. + bayes_search_params : dict of str and list + The parameters for the search. + + """ + BayesSearchCV = pytest.importorskip("skopt.BayesSearchCV") + + pipeline, param_grid = _hyperparam_tuning_base_test( + X_types_iris, + model, + preprocess, + problem_type, + get_tuning_params, + bayes_search_params, + ) + assert isinstance(pipeline, BayesSearchCV) + assert pipeline.search_spaces == param_grid # type: ignore + + +def _compare_param_grids(a: Dict, b: Dict) -> None: + """Compare two param grids. + + Parameters + ---------- + a : dict + The first param grid (processed). + b : dict + The second param grid (raw). + + Raises + ------ + AssertionError + If the param grids are not equal. + + """ + for key, val in a.items(): + assert key in b + if hasattr(val, "rvs"): + assert val.args[0] == b[key][0] + assert val.args[1] == b[key][1] + if b[key][2] in ["log-uniform", "loguniform"]: + assert val.dist.name == "loguniform" + elif b[key][2] == "uniform": + assert val.dist.name == "uniform" + else: + pytest.fail( + f"Unknown distributions {val.dist.name} or {b[key][2]}" + ) + else: + assert val == b[key] + + +def test_hyperparameter_tuning_distributions( + X_types_iris: Dict[str, List[str]], # noqa: N803 + model: str, + preprocess: Union[str, List[str]], + problem_type: str, + get_tuning_distributions: Callable, + search_params: Dict[str, List], +) -> None: + """Test hyperparameter tuning using distributions. + + Parameters + ---------- + X_types_iris : dict + The iris dataset features types. + model : str + The model to test. + preprocess : str or list of str + The preprocessing steps to test. + problem_type : str + The problem type to test. + get_tuning_distributions : Callable + A function that returns the tuning hyperparameters for a given step. + search_params : dict of str and list + The parameters for the search. + + """ + kind = "grid" + if search_params is not None: + kind = search_params.get("kind", "grid") + if kind != "random": + return # No sense to test distributions for other than gridsearch + + pipeline, param_grid = _hyperparam_tuning_base_test( + X_types_iris, + model, + preprocess, + problem_type, + get_tuning_distributions, + search_params, + ) + + assert isinstance(pipeline, RandomizedSearchCV) + _compare_param_grids( + pipeline.param_distributions, # type: ignore + param_grid, + ) + + +def test_hyperparameter_tuning_distributions_bayes( + X_types_iris: Dict[str, List[str]], # noqa: N803 + model: str, + preprocess: Union[str, List[str]], + problem_type: str, + get_tuning_distributions: Callable, + bayes_search_params: Dict[str, List], +) -> None: + """Test BayesSearchCV hyperparameter tuning using distributions. + + Parameters + ---------- + X_types_iris : dict + The iris dataset features types. + model : str + The model to test. + preprocess : str or list of str + The preprocessing steps to test. + problem_type : str + The problem type to test. + get_tuning_distributions : Callable + A function that returns the tuning hyperparameters for a given step. + bayes_search_params : dict of str and list + The parameters for the search. + + """ + BayesSearchCV = pytest.importorskip("skopt.BayesSearchCV") + + pipeline, param_grid = _hyperparam_tuning_base_test( + X_types_iris, + model, + preprocess, + problem_type, + get_tuning_distributions, + bayes_search_params, + ) + + assert isinstance(pipeline, BayesSearchCV) + _compare_param_grids(pipeline.search_spaces, param_grid) @pytest.mark.parametrize( @@ -297,7 +512,8 @@ def test_added_model_target_transform() -> None: def test_stacking( - X_iris: pd.DataFrame, y_iris: pd.Series # noqa: N803 + X_iris: pd.DataFrame, # noqa: N803 + y_iris: pd.Series, ) -> None: """Test that the stacking model works correctly.""" # Define our feature types @@ -578,14 +794,21 @@ def test_PipelineCreator_set_hyperparameter() -> None: assert model1.steps[-1][1].get_params()["strategy"] == "uniform" creator2 = PipelineCreator(problem_type="classification", apply_to="*") - creator2.add(DummyClassifier(strategy="uniform"), name="dummy") + creator2.add( + DummyClassifier(strategy="uniform"), # type: ignore + name="dummy", + ) model2 = creator2.to_pipeline() assert model2.steps[-1][1].get_params()["strategy"] == "uniform" creator3 = PipelineCreator(problem_type="classification", apply_to="*") - creator3.add(DummyClassifier(), strategy="uniform", name="dummy") + creator3.add( + DummyClassifier(), # type: ignore + strategy="uniform", + name="dummy", + ) model3 = creator3.to_pipeline() diff --git a/julearn/pipeline/test/test_target_pipeline.py b/julearn/pipeline/tests/test_target_pipeline.py similarity index 100% rename from julearn/pipeline/test/test_target_pipeline.py rename to julearn/pipeline/tests/test_target_pipeline.py diff --git a/julearn/pipeline/test/test_target_pipeline_creator.py b/julearn/pipeline/tests/test_target_pipeline_creator.py similarity index 100% rename from julearn/pipeline/test/test_target_pipeline_creator.py rename to julearn/pipeline/tests/test_target_pipeline_creator.py diff --git a/pyproject.toml b/pyproject.toml index 1c49aaefa..b6215fac8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -59,6 +59,7 @@ docs = [ "sphinx_copybutton>=0.5.0,<0.6", "numpydoc>=1.5.0,<1.6", "towncrier<24", + "scikit-optimize>=0.10.0,<0.11", ] deslib = ["deslib>=0.3.5,<0.4"] viz = [ @@ -67,6 +68,12 @@ viz = [ "param>=2.0.0", ] +skopt = ["scikit-optimize>=0.10.0,<0.11"] + +# Add all optional functional dependencies (skip deslib until its fixed) +# This does not include dev/docs building dependencies +all = ["julearn[viz,skopt]"] + ################ # Tool configs # ################ @@ -212,3 +219,7 @@ showcontent = true [tool.towncrier.fragment.change] name = "API Changes" showcontent = true + +## Configure pyright to ignore assigment types until scikit-learn stubs are updated +[tool.pyright] +reportAssignmentType = "none" \ No newline at end of file diff --git a/tox.ini b/tox.ini index a17da50a4..017e63fcf 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = ruff, black, test, coverage, codespell, py3{8,9,10,11} +envlist = ruff, black, test, coverage, codespell, py3{8,9,10,11}, nodeps isolated_build = true [gh-actions] @@ -7,15 +7,16 @@ python = 3.8: py38 3.9: py39 3.10: coverage - 3.11: py311 + 3.11: py311, nodeps [testenv] skip_install = false deps = pytest seaborn + scikit-optimize>=0.10.0,<0.11 commands = - pytest + pytest {toxinidir}/julearn [testenv:ruff] skip_install = true @@ -40,8 +41,17 @@ deps = panel>=1.0.0b1 bokeh>=3.0.0 param + scikit-optimize>=0.10.0,<0.11 +commands = + pytest -vv {toxinidir}/julearn + +[testenv:nodeps] +skip_install = false +deps = + pytest + seaborn commands = - pytest -vv + pytest -vv --filter=nodeps {toxinidir}/julearn [testenv:coverage] skip_install = false @@ -53,6 +63,7 @@ deps = panel>=1.0.0b1 bokeh>=3.0.0 param + scikit-optimize>=0.10.0,<0.11 commands = pytest --cov={envsitepackagesdir}/julearn --cov=./julearn --cov-report=xml --cov-report=term -vv