From f2bde65e10338aac85c0cc201520d8c915edee63 Mon Sep 17 00:00:00 2001 From: FedericoGarza Date: Tue, 25 Oct 2022 14:39:08 -0500 Subject: [PATCH 1/5] feat: first version progress bar --- nbs/core.ipynb | 35 +++++++++++++++++++++++------------ statsforecast/core.py | 21 ++++++++++++++------- 2 files changed, 37 insertions(+), 19 deletions(-) diff --git a/nbs/core.ipynb b/nbs/core.ipynb index 525d57e52..c6a762179 100644 --- a/nbs/core.ipynb +++ b/nbs/core.ipynb @@ -61,7 +61,8 @@ "from typing import Any, List, Optional\n", "\n", "import numpy as np\n", - "import pandas as pd" + "import pandas as pd\n", + "from tqdm.autonotebook import tqdm" ] }, { @@ -206,7 +207,7 @@ " fcsts, cols = self.predict(fm=fm, h=h, X=X, level=level)\n", " return fm, fcsts, cols\n", " \n", - " def forecast(self, models, h, fallback_model=None, fitted=False, X=None, level=tuple()):\n", + " def forecast(self, models, h, fallback_model=None, fitted=False, X=None, level=tuple(), verbose=False):\n", " fcsts, cuts, has_level_models = self._output_fcst(\n", " models=models, attr='forecast', \n", " h=h, X=X, level=level\n", @@ -221,7 +222,7 @@ " fitted_vals[:, 0] = self.data\n", " else:\n", " fitted_vals[:, 0] = self.data[:, 0]\n", - " for i, grp in enumerate(self):\n", + " for i, grp in tqdm(enumerate(self), disable=(not verbose)):\n", " y_train = grp[:, 0] if grp.ndim == 2 else grp\n", " X_train = grp[:, 1:] if (grp.ndim == 2 and grp.shape[1] > 1) else None\n", " if X is not None:\n", @@ -263,7 +264,8 @@ " result['fitted']['cols'] = ['y'] + cols_fitted\n", " return result\n", " \n", - " def cross_validation(self, models, h, test_size, step_size=1, input_size=None, fitted=False, level=tuple()):\n", + " def cross_validation(self, models, h, test_size, step_size=1, input_size=None, fitted=False, level=tuple(), \n", + " verbose=False):\n", " # output of size: (ts, window, h)\n", " if (test_size - h) % step_size:\n", " raise Exception('`test_size - h` should be module `step_size`')\n", @@ -278,7 +280,7 @@ " last_fitted_idxs = np.full_like(fitted_idxs, False, dtype=bool)\n", " matches = ['mean', 'lo', 'hi']\n", " for i_ts, grp in enumerate(self):\n", - " for i_window, cutoff in enumerate(range(-test_size, -h + 1, step_size), start=0):\n", + " for i_window, cutoff in tqdm(enumerate(range(-test_size, -h + 1, step_size), start=0), desc=f'CV Time Series {i_ts + 1}', disable=(not verbose)):\n", " end_cutoff = cutoff + h\n", " in_size_disp = cutoff if input_size is None else input_size \n", " y = grp[(cutoff - in_size_disp):cutoff]\n", @@ -793,7 +795,8 @@ " ray_address: Optional[str] = None,\n", " df: Optional[pd.DataFrame] = None,\n", " sort_df: bool = True,\n", - " fallback_model: Any = None\n", + " fallback_model: Any = None,\n", + " verbose: bool = False\n", " ):\n", " \"\"\"core.StatsForecast.\n", " [Source code](https://github.com/Nixtla/statsforecast/blob/main/statsforecast/core.py).\n", @@ -814,6 +817,7 @@ " `n_jobs`: int, number of jobs used in the parallel processing, use -1 for all cores.
\n", " `sort_df`: bool, if True, sort `df` by [`unique_id`,`ds`].
\n", " `fallback_model`: Any, Model to be used if a model fails. Only works with the `forecast` method.
\n", + " `verbose`: bool, Wether print progress bar. Only used when `n_jobs=1`.
\n", "\n", " **Notes:**
\n", " The `core.StatsForecast` class offers parallelization utilities with Dask, Spark and Ray back-ends.
\n", @@ -826,6 +830,7 @@ " self.ray_address = ray_address\n", " self.fallback_model = fallback_model\n", " self._prepare_fit(df=df, sort_df=sort_df)\n", + " self.verbose = verbose and self.n_jobs == 1\n", " \n", " def _prepare_fit(self, df, sort_df):\n", " if df is not None:\n", @@ -984,7 +989,8 @@ " if self.n_jobs == 1:\n", " res_fcsts = self.ga.forecast(models=self.models, \n", " h=h, fallback_model=self.fallback_model, \n", - " fitted=fitted, X=X, level=level)\n", + " fitted=fitted, X=X, level=level, \n", + " verbose=self.verbose)\n", " else:\n", " res_fcsts = self._forecast_parallel(h=h, fitted=fitted, X=X, level=level)\n", " if fitted:\n", @@ -1074,7 +1080,8 @@ " step_size=step_size, \n", " input_size=input_size, \n", " fitted=fitted,\n", - " level=level\n", + " level=level,\n", + " verbose=self.verbose\n", " )\n", " else:\n", " res_fcsts = self._cross_validation_parallel(\n", @@ -1315,7 +1322,9 @@ "# Instantiate StatsForecast class\n", "fcst = StatsForecast(df=panel_df,\n", " models=models,\n", - " freq='D', n_jobs=1)\n", + " freq='D', \n", + " n_jobs=1, \n", + " verbose=True)\n", "\n", "# Efficiently predict\n", "fcsts_df = fcst.forecast(h=4, fitted=True)\n", @@ -1390,7 +1399,8 @@ " df=series,\n", " models=models,\n", " freq='D',\n", - " n_jobs=1\n", + " n_jobs=1,\n", + " verbose=True\n", ")\n", "\n", "res = fcst.forecast(h=14)\n", @@ -1582,7 +1592,7 @@ "# Instantiate StatsForecast class\n", "fcst = StatsForecast(df=panel_df,\n", " models=[Naive()],\n", - " freq='D', n_jobs=1)\n", + " freq='D', n_jobs=1, verbose=True)\n", "\n", "# Access insample predictions\n", "rolled_fcsts_df = fcst.cross_validation(14, n_windows=2)\n", @@ -1613,7 +1623,8 @@ "fcst = StatsForecast(\n", " df=series_cv,\n", " models=[SumAhead(), Naive()],\n", - " freq='D'\n", + " freq='D',\n", + " verbose=True\n", ")\n", "res_cv = fcst.cross_validation(h=2, test_size=5, n_windows=None, level=(50, 60))\n", "test_eq(0., np.mean(res_cv['y'] - res_cv['SumAhead']))\n", diff --git a/statsforecast/core.py b/statsforecast/core.py index 9b72ab4c8..8208a7696 100644 --- a/statsforecast/core.py +++ b/statsforecast/core.py @@ -11,6 +11,7 @@ import numpy as np import pandas as pd +from tqdm.autonotebook import tqdm # %% ../nbs/core.ipynb 5 logging.basicConfig( @@ -116,7 +117,7 @@ def fit_predict(self, models, h, X=None, level=tuple()): fcsts, cols = self.predict(fm=fm, h=h, X=X, level=level) return fm, fcsts, cols - def forecast(self, models, h, fallback_model=None, fitted=False, X=None, level=tuple()): + def forecast(self, models, h, fallback_model=None, fitted=False, X=None, level=tuple(), verbose=False): fcsts, cuts, has_level_models = self._output_fcst( models=models, attr='forecast', h=h, X=X, level=level @@ -131,7 +132,7 @@ def forecast(self, models, h, fallback_model=None, fitted=False, X=None, level=t fitted_vals[:, 0] = self.data else: fitted_vals[:, 0] = self.data[:, 0] - for i, grp in enumerate(self): + for i, grp in tqdm(enumerate(self), disable=(not verbose)): y_train = grp[:, 0] if grp.ndim == 2 else grp X_train = grp[:, 1:] if (grp.ndim == 2 and grp.shape[1] > 1) else None if X is not None: @@ -173,7 +174,8 @@ def forecast(self, models, h, fallback_model=None, fitted=False, X=None, level=t result['fitted']['cols'] = ['y'] + cols_fitted return result - def cross_validation(self, models, h, test_size, step_size=1, input_size=None, fitted=False, level=tuple()): + def cross_validation(self, models, h, test_size, step_size=1, input_size=None, fitted=False, level=tuple(), + verbose=False): # output of size: (ts, window, h) if (test_size - h) % step_size: raise Exception('`test_size - h` should be module `step_size`') @@ -188,7 +190,7 @@ def cross_validation(self, models, h, test_size, step_size=1, input_size=None, f last_fitted_idxs = np.full_like(fitted_idxs, False, dtype=bool) matches = ['mean', 'lo', 'hi'] for i_ts, grp in enumerate(self): - for i_window, cutoff in enumerate(range(-test_size, -h + 1, step_size), start=0): + for i_window, cutoff in tqdm(enumerate(range(-test_size, -h + 1, step_size), start=0), desc=f'CV Time Series {i_ts + 1}', disable=(not verbose)): end_cutoff = cutoff + h in_size_disp = cutoff if input_size is None else input_size y = grp[(cutoff - in_size_disp):cutoff] @@ -313,7 +315,8 @@ def __init__( ray_address: Optional[str] = None, df: Optional[pd.DataFrame] = None, sort_df: bool = True, - fallback_model: Any = None + fallback_model: Any = None, + verbose: bool = False ): """core.StatsForecast. [Source code](https://github.com/Nixtla/statsforecast/blob/main/statsforecast/core.py). @@ -334,6 +337,7 @@ def __init__( `n_jobs`: int, number of jobs used in the parallel processing, use -1 for all cores.
`sort_df`: bool, if True, sort `df` by [`unique_id`,`ds`].
`fallback_model`: Any, Model to be used if a model fails. Only works with the `forecast` method.
+ `verbose`: bool, Wether print progress bar. Only used when `n_jobs=1`.
**Notes:**
The `core.StatsForecast` class offers parallelization utilities with Dask, Spark and Ray back-ends.
@@ -346,6 +350,7 @@ def __init__( self.ray_address = ray_address self.fallback_model = fallback_model self._prepare_fit(df=df, sort_df=sort_df) + self.verbose = verbose and self.n_jobs == 1 def _prepare_fit(self, df, sort_df): if df is not None: @@ -504,7 +509,8 @@ def forecast( if self.n_jobs == 1: res_fcsts = self.ga.forecast(models=self.models, h=h, fallback_model=self.fallback_model, - fitted=fitted, X=X, level=level) + fitted=fitted, X=X, level=level, + verbose=self.verbose) else: res_fcsts = self._forecast_parallel(h=h, fitted=fitted, X=X, level=level) if fitted: @@ -594,7 +600,8 @@ def cross_validation( step_size=step_size, input_size=input_size, fitted=fitted, - level=level + level=level, + verbose=self.verbose ) else: res_fcsts = self._cross_validation_parallel( From 79b23559038c042ed88b423768dab20cf8f4c519 Mon Sep 17 00:00:00 2001 From: FedericoGarza Date: Tue, 25 Oct 2022 14:47:12 -0500 Subject: [PATCH 2/5] fix: improve progress bar --- nbs/core.ipynb | 13 +++++++++++-- statsforecast/core.py | 13 +++++++++++-- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/nbs/core.ipynb b/nbs/core.ipynb index c6a762179..f5eefa21d 100644 --- a/nbs/core.ipynb +++ b/nbs/core.ipynb @@ -222,7 +222,11 @@ " fitted_vals[:, 0] = self.data\n", " else:\n", " fitted_vals[:, 0] = self.data[:, 0]\n", - " for i, grp in tqdm(enumerate(self), disable=(not verbose)):\n", + " iterable = tqdm(enumerate(self), \n", + " disable=(not verbose), \n", + " total=len(self),\n", + " desc='Forecast')\n", + " for i, grp in iterable:\n", " y_train = grp[:, 0] if grp.ndim == 2 else grp\n", " X_train = grp[:, 1:] if (grp.ndim == 2 and grp.shape[1] > 1) else None\n", " if X is not None:\n", @@ -279,8 +283,13 @@ " fitted_idxs = np.full((self.data.shape[0], n_windows), False, dtype=bool)\n", " last_fitted_idxs = np.full_like(fitted_idxs, False, dtype=bool)\n", " matches = ['mean', 'lo', 'hi']\n", + " steps = list(range(-test_size, -h + 1, step_size))\n", " for i_ts, grp in enumerate(self):\n", - " for i_window, cutoff in tqdm(enumerate(range(-test_size, -h + 1, step_size), start=0), desc=f'CV Time Series {i_ts + 1}', disable=(not verbose)):\n", + " iterable = tqdm(enumerate(steps, start=0), \n", + " desc=f'Cross Validation Time Series {i_ts + 1}', \n", + " disable=(not verbose),\n", + " total=len(steps))\n", + " for i_window, cutoff in iterable:\n", " end_cutoff = cutoff + h\n", " in_size_disp = cutoff if input_size is None else input_size \n", " y = grp[(cutoff - in_size_disp):cutoff]\n", diff --git a/statsforecast/core.py b/statsforecast/core.py index 8208a7696..d20185aaf 100644 --- a/statsforecast/core.py +++ b/statsforecast/core.py @@ -132,7 +132,11 @@ def forecast(self, models, h, fallback_model=None, fitted=False, X=None, level=t fitted_vals[:, 0] = self.data else: fitted_vals[:, 0] = self.data[:, 0] - for i, grp in tqdm(enumerate(self), disable=(not verbose)): + iterable = tqdm(enumerate(self), + disable=(not verbose), + total=len(self), + desc='Forecast') + for i, grp in iterable: y_train = grp[:, 0] if grp.ndim == 2 else grp X_train = grp[:, 1:] if (grp.ndim == 2 and grp.shape[1] > 1) else None if X is not None: @@ -189,8 +193,13 @@ def cross_validation(self, models, h, test_size, step_size=1, input_size=None, f fitted_idxs = np.full((self.data.shape[0], n_windows), False, dtype=bool) last_fitted_idxs = np.full_like(fitted_idxs, False, dtype=bool) matches = ['mean', 'lo', 'hi'] + steps = list(range(-test_size, -h + 1, step_size)) for i_ts, grp in enumerate(self): - for i_window, cutoff in tqdm(enumerate(range(-test_size, -h + 1, step_size), start=0), desc=f'CV Time Series {i_ts + 1}', disable=(not verbose)): + iterable = tqdm(enumerate(steps, start=0), + desc=f'Cross Validation Time Series {i_ts + 1}', + disable=(not verbose), + total=len(steps)) + for i_window, cutoff in iterable: end_cutoff = cutoff + h in_size_disp = cutoff if input_size is None else input_size y = grp[(cutoff - in_size_disp):cutoff] From 015833682823491a172ce7f071ca12026c99a484 Mon Sep 17 00:00:00 2001 From: FedericoGarza Date: Tue, 25 Oct 2022 15:01:28 -0500 Subject: [PATCH 3/5] fix: add tqdm as requirement --- environment.yml | 1 + settings.ini | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/environment.yml b/environment.yml index 64107f34b..780ec9644 100644 --- a/environment.yml +++ b/environment.yml @@ -14,3 +14,4 @@ dependencies: - tabulate - pip: - nbdev + - tqdm diff --git a/settings.ini b/settings.ini index 342142377..320387dbc 100644 --- a/settings.ini +++ b/settings.ini @@ -15,7 +15,7 @@ language = English custom_sidebar = True license = apache2 status = 2 -requirements = numba>=0.55.0 numpy>=1.21.6 pandas>=1.3.5 scipy>=1.7.3 statsmodels>=0.13.2 +requirements = numba>=0.55.0 numpy>=1.21.6 pandas>=1.3.5 scipy>=1.7.3 statsmodels>=0.13.2 tqdm ray_requirements = ray protobuf>=3.15.3,<4.0.0 fugue_requirements = fugue[ray]>=0.7.0 dev_requirements = nbdev black mypy flake8 ray protobuf>=3.15.3,<4.0.0 fugue>=0.7.0 matplotlib neuralforecast pmdarima prophet sklearn dask[distributed] From c0e9b1ff582034ba962f960d29df25a68ef524a6 Mon Sep 17 00:00:00 2001 From: FedericoGarza Date: Tue, 25 Oct 2022 15:27:14 -0500 Subject: [PATCH 4/5] fix: docstring typo --- nbs/core.ipynb | 2 +- statsforecast/core.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nbs/core.ipynb b/nbs/core.ipynb index f5eefa21d..2e68d546b 100644 --- a/nbs/core.ipynb +++ b/nbs/core.ipynb @@ -826,7 +826,7 @@ " `n_jobs`: int, number of jobs used in the parallel processing, use -1 for all cores.
\n", " `sort_df`: bool, if True, sort `df` by [`unique_id`,`ds`].
\n", " `fallback_model`: Any, Model to be used if a model fails. Only works with the `forecast` method.
\n", - " `verbose`: bool, Wether print progress bar. Only used when `n_jobs=1`.
\n", + " `verbose`: bool, Whether print progress bar. Only used when `n_jobs=1`.
\n", "\n", " **Notes:**
\n", " The `core.StatsForecast` class offers parallelization utilities with Dask, Spark and Ray back-ends.
\n", diff --git a/statsforecast/core.py b/statsforecast/core.py index d20185aaf..8a6fe4f50 100644 --- a/statsforecast/core.py +++ b/statsforecast/core.py @@ -346,7 +346,7 @@ def __init__( `n_jobs`: int, number of jobs used in the parallel processing, use -1 for all cores.
`sort_df`: bool, if True, sort `df` by [`unique_id`,`ds`].
`fallback_model`: Any, Model to be used if a model fails. Only works with the `forecast` method.
- `verbose`: bool, Wether print progress bar. Only used when `n_jobs=1`.
+ `verbose`: bool, Whether print progress bar. Only used when `n_jobs=1`.
**Notes:**
The `core.StatsForecast` class offers parallelization utilities with Dask, Spark and Ray back-ends.
From c2e915c18286524dfcf9b42b179a228d5ce4bbc2 Mon Sep 17 00:00:00 2001 From: FedericoGarza Date: Tue, 25 Oct 2022 15:38:49 -0500 Subject: [PATCH 5/5] fix: improve progressbar docstrings --- nbs/core.ipynb | 2 +- statsforecast/core.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/nbs/core.ipynb b/nbs/core.ipynb index 2e68d546b..fb9ef3aac 100644 --- a/nbs/core.ipynb +++ b/nbs/core.ipynb @@ -826,7 +826,7 @@ " `n_jobs`: int, number of jobs used in the parallel processing, use -1 for all cores.
\n", " `sort_df`: bool, if True, sort `df` by [`unique_id`,`ds`].
\n", " `fallback_model`: Any, Model to be used if a model fails. Only works with the `forecast` method.
\n", - " `verbose`: bool, Whether print progress bar. Only used when `n_jobs=1`.
\n", + " `verbose`: bool, Prints TQDM progress bar when `n_jobs=1`.
\n", "\n", " **Notes:**
\n", " The `core.StatsForecast` class offers parallelization utilities with Dask, Spark and Ray back-ends.
\n", diff --git a/statsforecast/core.py b/statsforecast/core.py index 8a6fe4f50..a4f6f5132 100644 --- a/statsforecast/core.py +++ b/statsforecast/core.py @@ -346,7 +346,7 @@ def __init__( `n_jobs`: int, number of jobs used in the parallel processing, use -1 for all cores.
`sort_df`: bool, if True, sort `df` by [`unique_id`,`ds`].
`fallback_model`: Any, Model to be used if a model fails. Only works with the `forecast` method.
- `verbose`: bool, Whether print progress bar. Only used when `n_jobs=1`.
+ `verbose`: bool, Prints TQDM progress bar when `n_jobs=1`.
**Notes:**
The `core.StatsForecast` class offers parallelization utilities with Dask, Spark and Ray back-ends.