diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 36572483..3198d403 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -53,9 +53,15 @@ jobs: python-version: ${{ matrix.python-version }} cache: "pip" # Caching pip dependencies. - name: Install dependencies + if: matrix.python-version != '3.7' run: | pip install --upgrade pip pip install .[dev] + - name: Install dependencies (python 3.7, use legacy resolver) + if: matrix.python-version == '3.7' + run: | + pip install --upgrade pip + pip install .[dev] --use-deprecated=legacy-resolver - name: Test with pytest (xdist) if: matrix.os != 'ubuntu-latest' run: pytest -vvvx -n auto -m "not slow and not extra and not skipci" --durations=50 --cov diff --git a/.github/workflows/test_full.yml b/.github/workflows/test_full.yml index 4d6d5cce..95e9eb68 100644 --- a/.github/workflows/test_full.yml +++ b/.github/workflows/test_full.yml @@ -49,9 +49,15 @@ jobs: python-version: ${{ matrix.python-version }} cache: "pip" # Caching pip dependencies. - name: Install dependencies + if: matrix.python-version != '3.7' run: | pip install --upgrade pip pip install .[dev] + - name: Install dependencies (python 3.7, use legacy resolver) + if: matrix.python-version == '3.7' + run: | + pip install --upgrade pip + pip install .[dev] --use-deprecated=legacy-resolver - name: Test with pytest (xdist) if: matrix.os != 'ubuntu-latest' run: pytest -vvvx -n auto -m "not skipci" --durations=50 --cov diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index aedcfb17..2500f5e0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -exclude: '^docs/conf.py' +# exclude: '^docs/conf.py' # If need global exclude. repos: - repo: https://github.com/pre-commit/pre-commit-hooks @@ -22,6 +22,7 @@ repos: exclude: 'docs/user_guide' # Auto-generated, leave as is. - id: pretty-format-json args: ['--autofix', '--indent=4', '--no-sort-keys'] + exclude: '.*.ipynb' # ipynb may get treated as JSON, don't want that. - id: requirements-txt-fixer exclude: docs/requirements.txt - id: mixed-line-ending @@ -33,39 +34,34 @@ repos: - id: isort - repo: https://github.com/psf/black - rev: 22.12.0 + rev: 23.7.0 hooks: - id: black-jupyter language_version: python3 - args: [ - "--force-exclude=docs/conf.py" - ] # If like to embrace black styles even in the docs: - repo: https://github.com/asottile/blacken-docs - rev: v1.12.1 + rev: 1.16.0 hooks: - id: blacken-docs additional_dependencies: [black] exclude: 'docs/user_guide' # Auto-generated, leave as is. - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 + rev: 6.1.0 hooks: - id: flake8 - ## You can add flake8 plugins via `additional_dependencies`: - # additional_dependencies: [flake8-bugbear] - repo: https://github.com/pre-commit/mirrors-mypy - rev: v1.1.1 + rev: v1.5.1 hooks: - id: mypy files: "^src/" args: [--config-file=mypy.ini] - additional_dependencies: [pydantic] + additional_dependencies: [pydantic<2] - repo: https://github.com/PyCQA/bandit - rev: 1.7.4 + rev: 1.7.5 hooks: - id: bandit args: ["-c", "pyproject.toml", "-q"] diff --git a/README.md b/README.md index f74bb012..ca079791 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ --> -[![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/user_guide/tutorial04_prediction.ipynb) +[![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/usage/tutorial04_prediction.ipynb) [![Documentation Status](https://readthedocs.org/projects/temporai/badge/?version=latest)](https://temporai.readthedocs.io/en/latest/?badge=latest) [![Python 3.7+](https://img.shields.io/badge/python-3.7+-blue.svg)](https://www.python.org/downloads/release/python-370/) @@ -423,15 +423,15 @@ Prediction where targets are temporal (time series). - [![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/data/tutorial04_data_splitting.ipynb) - [Data Splitting](./tutorials/data/tutorial04_data_splitting.ipynb) ### User Guide -- [![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/user_guide/tutorial01_plugins.ipynb) - [Plugins](./tutorials/user_guide/tutorial01_plugins.ipynb) -- [![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/user_guide/tutorial02_imputation.ipynb) - [Imputation](./tutorials/user_guide/tutorial02_imputation.ipynb) -- [![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/user_guide/tutorial03_scaling.ipynb) - [Scaling](./tutorials/user_guide/tutorial03_scaling.ipynb) -- [![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/user_guide/tutorial04_prediction.ipynb) - [Prediction](./tutorials/user_guide/tutorial04_prediction.ipynb) -- [![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/user_guide/tutorial05_time_to_event.ipynb) - [Time-to-event Analysis](./tutorials/user_guide/tutorial05_time_to_event.ipynb) -- [![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/user_guide/tutorial06_treatments.ipynb) - [Treatment Effects](./tutorials/user_guide/tutorial06_treatments.ipynb) -- [![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/user_guide/tutorial07_pipeline.ipynb) - [Pipeline](./tutorials/user_guide/tutorial07_pipeline.ipynb) -- [![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/user_guide/tutorial08_benchmarks.ipynb) - [Benchmarks](./tutorials/user_guide/tutorial08_benchmarks.ipynb) -- [![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/user_guide/tutorial09_automl.ipynb) - [AutoML](./tutorials/user_guide/tutorial09_automl.ipynb) +- [![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/usage/tutorial01_plugins.ipynb) - [Plugins](./tutorials/usage/tutorial01_plugins.ipynb) +- [![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/usage/tutorial02_imputation.ipynb) - [Imputation](./tutorials/usage/tutorial02_imputation.ipynb) +- [![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/usage/tutorial03_scaling.ipynb) - [Scaling](./tutorials/usage/tutorial03_scaling.ipynb) +- [![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/usage/tutorial04_prediction.ipynb) - [Prediction](./tutorials/usage/tutorial04_prediction.ipynb) +- [![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/usage/tutorial05_time_to_event.ipynb) - [Time-to-event Analysis](./tutorials/usage/tutorial05_time_to_event.ipynb) +- [![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/usage/tutorial06_treatments.ipynb) - [Treatment Effects](./tutorials/usage/tutorial06_treatments.ipynb) +- [![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/usage/tutorial07_pipeline.ipynb) - [Pipeline](./tutorials/usage/tutorial07_pipeline.ipynb) +- [![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/usage/tutorial08_benchmarks.ipynb) - [Benchmarks](./tutorials/usage/tutorial08_benchmarks.ipynb) +- [![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/usage/tutorial09_automl.ipynb) - [AutoML](./tutorials/usage/tutorial09_automl.ipynb) ### Extending TemporAI - [![Test In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/vanderschaarlab/temporai/blob/main/tutorials/extending/tutorial01_custom_plugin.ipynb) - [Writing a Custom Plugin](./tutorials/extending/tutorial01_custom_plugin.ipynb) diff --git a/pytest.ini b/pytest.ini index 52b512e8..85e69723 100644 --- a/pytest.ini +++ b/pytest.ini @@ -24,6 +24,14 @@ filterwarnings = ignore:.*rsyncdir.*:DeprecationWarning # Jupyter platformdirs-related warning, ignore: ignore:.*platformdirs.*:DeprecationWarning + # Some pandas FutureWarnings that can be ignored for now: + ignore:.*is_sparse.*:FutureWarning + ignore:.*is_categorical_dtype.*:FutureWarning + ignore:.*swapaxes.*:FutureWarning + # Some numpy DeprecationWarnings: + ignore:.*bool8.*:DeprecationWarning + # DeprecationWarning about pkg_resources (triggered by lifelines): + ignore:.*pkg_resources.*:DeprecationWarning # Use pytest markers to select/deselect specific tests markers = internet: requires internet access diff --git a/setup.cfg b/setup.cfg index b58b2529..870e745a 100644 --- a/setup.cfg +++ b/setup.cfg @@ -56,8 +56,6 @@ python_requires = >=3.7 # new major versions. This works if the required packages follow Semantic Versioning. # For more information, check out https://semver.org/. install_requires = - # catboost 1.2+ has build errors on "macos-latest" GH runners: https://github.com/catboost/catboost/issues/2371 - catboost < 1.2 clairvoyance2 >=0.0.2 cloudpickle geomloss>=0.2.6 @@ -82,8 +80,10 @@ install_requires = torchdiffeq torchlaplace >= 0.0.4 tsai - typing-extensions + typing-extensions >= 4.7.1 xgbse + # joblib has a bug with py37 & Windows: https://github.com/joblib/loky/issues/411 + joblib < 1.3.0; python_version=="3.7" and platform_system=="Windows" [options.packages.find] diff --git a/src/tempor/benchmarks/benchmark.py b/src/tempor/benchmarks/benchmark.py index 1405266c..49d7307e 100644 --- a/src/tempor/benchmarks/benchmark.py +++ b/src/tempor/benchmarks/benchmark.py @@ -132,7 +132,6 @@ def benchmark_models( @pydantic.validate_arguments(config=dict(arbitrary_types_allowed=True)) def visualize_benchmark(results: Dict[str, pd.DataFrame], palette: str = "viridis") -> Any: - # Pre-format DF for plotting. for k, v in results.items(): v["method"] = k diff --git a/src/tempor/benchmarks/evaluation.py b/src/tempor/benchmarks/evaluation.py index a66bda21..af427c61 100644 --- a/src/tempor/benchmarks/evaluation.py +++ b/src/tempor/benchmarks/evaluation.py @@ -332,7 +332,6 @@ def roc_auc_score(self, y_test: np.ndarray, y_pred_proba: np.ndarray) -> float: return utils.evaluate_auc_multiclass(y_test, y_pred_proba)[0] def average_precision_score(self, y_test: np.ndarray, y_pred_proba: np.ndarray) -> float: - return utils.evaluate_auc_multiclass(y_test, y_pred_proba)[1] diff --git a/src/tempor/benchmarks/utils.py b/src/tempor/benchmarks/utils.py index 187973b8..bb387260 100644 --- a/src/tempor/benchmarks/utils.py +++ b/src/tempor/benchmarks/utils.py @@ -38,7 +38,6 @@ def evaluate_auc_multiclass( y_pred_proba_tmp = get_y_pred_proba_hlpr(y_pred_proba, n_classes) if n_classes > 2: - log.debug(f"+evaluate_auc {y_test.shape} {y_pred_proba_tmp.shape}") fpr = dict() @@ -60,7 +59,6 @@ def evaluate_auc_multiclass( aucroc = roc_auc["micro"] aucprc = average_precision["micro"] else: - aucroc = roc_auc_score(np.ravel(y_test), y_pred_proba_tmp, multi_class="ovr") aucprc = average_precision_score(np.ravel(y_test), y_pred_proba_tmp) diff --git a/src/tempor/data/samples.py b/src/tempor/data/samples.py index 42e896de..e8025c7b 100644 --- a/src/tempor/data/samples.py +++ b/src/tempor/data/samples.py @@ -3,12 +3,14 @@ # pylint: disable=unnecessary-ellipsis import abc +import contextlib from typing import TYPE_CHECKING, Any, List, Optional, Tuple import numpy as np import pandas as pd import pandera as pa import pydantic +from packaging.version import Version from typing_extensions import Self import tempor.exc @@ -292,6 +294,35 @@ def __getitem__(self, key: data_typing.GetItemKey) -> Self: ) +@contextlib.contextmanager +def workaround_pandera_pd2_1_0_multiindex_compatibility(schema: pa.DataFrameSchema, data: pd.DataFrame): + """A version compatibility issue exists between pandera and pandas 2.1.0, as reported here: + https://github.com/unionai-oss/pandera/issues/1328 + + The error pertains to multiindex uniqueness validation giving an unexpected error. + + This is a workaround that will "manually" throw an error that is expected from pandera. + """ + + def problem_versions() -> bool: + return Version(pd.__version__) >= Version("2.1.0") + # TODO: When/if fixed in pandera, add the below condition: + # and Version(pa.__version__) < Version("0.XX.YY") + + try: + yield + + except ValueError as ex: + if problem_versions() and "Columns with duplicate values are not supported in stack" in str(ex): + cols = data.index.names + raise pa.errors.SchemaError(schema=schema, data=data, message=f"columns {cols} not unique") + else: + raise + + finally: + pass + + class TimeSeriesSamples(DataSamples): _data: pd.DataFrame _schema: pa.DataFrameSchema @@ -397,7 +428,8 @@ def _validate(self) -> None: coerce=False, unique=multiindex_unique_def, ) - self._data = schema.validate(data) + with workaround_pandera_pd2_1_0_multiindex_compatibility(schema, data): + self._data = schema.validate(data) logger.debug(f"Final schema:\n{schema}") self._schema = schema diff --git a/src/tempor/models/ddh.py b/src/tempor/models/ddh.py index 666d5135..18a19b84 100644 --- a/src/tempor/models/ddh.py +++ b/src/tempor/models/ddh.py @@ -80,7 +80,6 @@ def __init__( clipping_value: int = 1, output_mode: str = "MLP", ) -> None: - self.split = split self.split_time = None diff --git a/src/tempor/utils/dataloaders/sine.py b/src/tempor/utils/dataloaders/sine.py index e6c50103..c79791ae 100644 --- a/src/tempor/utils/dataloaders/sine.py +++ b/src/tempor/utils/dataloaders/sine.py @@ -89,7 +89,6 @@ def load(self, **kwargs) -> dataset.OneOffPredictionDataset: # Generate sine data. for i in range(self.no): - # Initialize each time-series local = list() @@ -97,7 +96,6 @@ def load(self, **kwargs) -> dataset.OneOffPredictionDataset: seq_len = self.seq_len for k in range(self.temporal_dim): # pylint: disable=unused-variable - # Randomly drawn frequency and phase: freq = np.random.beta(2, 2) phase = np.random.normal() diff --git a/tests/automl/test_pipeline_selector.py b/tests/automl/test_pipeline_selector.py index 793223c1..17a1f967 100644 --- a/tests/automl/test_pipeline_selector.py +++ b/tests/automl/test_pipeline_selector.py @@ -185,7 +185,6 @@ def assert_actual_params(pipe, sample, method): assert getattr(estimator.params, k) == v -@pytest.mark.filterwarnings("ignore:.*bool8.*:DeprecationWarning") # Expected. @pytest.mark.parametrize("task_type,predictor", TEST_PREDICTOR_CASES) @pytest.mark.parametrize("static_imputers", TEST_STATIC_IMPUTERS_CASES) @pytest.mark.parametrize("static_scalers", TEST_STATIC_SCALERS_CASES) diff --git a/tests/automl/test_seeker.py b/tests/automl/test_seeker.py index a2e049a8..9509e255 100644 --- a/tests/automl/test_seeker.py +++ b/tests/automl/test_seeker.py @@ -73,7 +73,6 @@ def patched_transform(self, *args, **kwargs): # pylint: disable=unused-argument np.random.seed(12345) def patched_evaluate(*args, **kwargs): - # Since "ensure reproducibility" may affect seeding, and we want this function to return different values, # seed it from input hash manually as below. seed_from = str(args[0]) @@ -610,7 +609,6 @@ def test_init_fails_wrong_overrides(self, get_dataset: Callable): override_hp_space=override_hp_space, # type: ignore ) - @pytest.mark.filterwarnings("ignore:.*bool8.*:DeprecationWarning") # Expected. @pytest.mark.parametrize( "task_type,estimator_names,metric", [ diff --git a/tests/conftest.py b/tests/conftest.py index d10e75e1..1d73db67 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -191,6 +191,7 @@ def func(ds: "dataset.PredictiveDataset") -> "dataset.CovariatesDataset": # --- Reusable datasets. --- + # Sine data: full. @pytest.fixture(scope="session") def _sine_data_full(): diff --git a/tests/data/test_samples.py b/tests/data/test_samples.py index 211ddaa0..68202bd8 100644 --- a/tests/data/test_samples.py +++ b/tests/data/test_samples.py @@ -29,7 +29,6 @@ class DFsTest: def set_up_dfs_test(): - # --- Static. --- categories = ["A", "B", "C"] diff --git a/tests/log/test_custom_logger.py b/tests/log/test_custom_logger.py index af758d86..a0e250d7 100644 --- a/tests/log/test_custom_logger.py +++ b/tests/log/test_custom_logger.py @@ -73,7 +73,6 @@ def as_loguru_logs(records: List[Any]) -> str: def test_console_logging_at_trace(caplog): # noqa: F811 - config = tempor.get_config() config.logging.level = "TRACE" tempor.configure(config) @@ -97,7 +96,6 @@ def test_console_logging_at_trace(caplog): # noqa: F811 def test_console_logging_at_info(caplog): # noqa: F811 - config = tempor.get_config() config.logging.level = "INFO" tempor.configure(config) @@ -121,7 +119,6 @@ def test_console_logging_at_info(caplog): # noqa: F811 def test_console_logging_at_error(caplog): # noqa: F811 - config = tempor.get_config() config.logging.level = "ERROR" tempor.configure(config) diff --git a/tests/plugins/preprocessing/imputation/static/test_static_tabular_imputer.py b/tests/plugins/preprocessing/imputation/static/test_static_tabular_imputer.py index 6e48aadf..b4625d52 100644 --- a/tests/plugins/preprocessing/imputation/static/test_static_tabular_imputer.py +++ b/tests/plugins/preprocessing/imputation/static/test_static_tabular_imputer.py @@ -91,8 +91,6 @@ def test_transform( assert output.static.dataframe().isna().sum().sum() == 0 -@pytest.mark.filterwarnings("ignore:.*Jupyter.*platformdirs.*:DeprecationWarning") # Expected. -@pytest.mark.filterwarnings("ignore:.*bool8.*:DeprecationWarning") # Expected. @pytest.mark.filterwarnings("ignore:.*nonzero.*0d.*:DeprecationWarning") # Expected for EM imputer. @pytest.mark.filterwarnings("ignore::RuntimeWarning") # Expected for EM imputer. @pytest.mark.filterwarnings("ignore::sklearn.exceptions.ConvergenceWarning") # May happen in some cases. diff --git a/tests/plugins/preprocessing/imputation/temporal/test_ts_tabular_imputer.py b/tests/plugins/preprocessing/imputation/temporal/test_ts_tabular_imputer.py index d78fc9af..3c68ee77 100644 --- a/tests/plugins/preprocessing/imputation/temporal/test_ts_tabular_imputer.py +++ b/tests/plugins/preprocessing/imputation/temporal/test_ts_tabular_imputer.py @@ -84,8 +84,6 @@ def test_transform( assert output.time_series.dataframe().isna().sum().sum() == 0 -@pytest.mark.filterwarnings("ignore:.*Jupyter.*platformdirs.*:DeprecationWarning") # Expected. -@pytest.mark.filterwarnings("ignore:.*bool8.*:DeprecationWarning") # Expected. @pytest.mark.filterwarnings("ignore:.*nonzero.*0d.*:DeprecationWarning") # Expected for EM imputer. @pytest.mark.filterwarnings("ignore::RuntimeWarning") # Expected for EM imputer. @pytest.mark.filterwarnings("ignore::sklearn.exceptions.ConvergenceWarning") # May happen in some cases. diff --git a/tests/plugins/test_all_plugins.py b/tests/plugins/test_all_plugins.py index b804da23..699104bf 100644 --- a/tests/plugins/test_all_plugins.py +++ b/tests/plugins/test_all_plugins.py @@ -16,7 +16,6 @@ def test_init_success(plugin_fqn): # DeprecationWarning expected for preprocessing.imputation.temporal.ts_tabular_imputer: -@pytest.mark.filterwarnings("ignore:.*bool8.*:DeprecationWarning") @pytest.mark.parametrize("plugin_fqn", PLUGIN_FQNS) def test_sample_hyperparameters(plugin_fqn): PluginCls = plugin_loader.get_class(plugin_fqn)