From e6cacae53e301216c9b059e3392cd4bc2e0607ad Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jos=C3=A9=20Morales?= Date: Mon, 16 Dec 2024 20:17:55 -0600 Subject: [PATCH] enh: migrate theta to c++ (#955) --- include/statsforecast/nelder_mead.h | 17 +- nbs/src/theta.ipynb | 845 +++++++++------------------- python/statsforecast/_modidx.py | 13 +- python/statsforecast/theta.py | 542 ++++-------------- src/statsforecast.cpp | 18 +- src/theta.cpp | 195 +++++++ 6 files changed, 572 insertions(+), 1058 deletions(-) create mode 100644 src/theta.cpp diff --git a/include/statsforecast/nelder_mead.h b/include/statsforecast/nelder_mead.h index e79b41314..513da7171 100644 --- a/include/statsforecast/nelder_mead.h +++ b/include/statsforecast/nelder_mead.h @@ -8,15 +8,16 @@ using Eigen::VectorXd; using RowMajorMatrixXd = Eigen::Matrix; -auto Clamp(const VectorXd &x, const VectorXd &lower, const VectorXd &upper) { +inline auto Clamp(const VectorXd &x, const VectorXd &lower, + const VectorXd &upper) { return x.cwiseMax(lower).cwiseMin(upper); } -double StandardDeviation(const VectorXd &x) { +inline double StandardDeviation(const VectorXd &x) { return std::sqrt((x.array() - x.mean()).square().mean()); } -Eigen::VectorX ArgSort(const VectorXd &v) { +inline Eigen::VectorX ArgSort(const VectorXd &v) { Eigen::VectorX indices(v.size()); std::iota(indices.begin(), indices.end(), 0); std::sort(indices.begin(), indices.end(), @@ -25,11 +26,11 @@ Eigen::VectorX ArgSort(const VectorXd &v) { } template -std::tuple NelderMead(Func F, const VectorXd &x, const VectorXd &lower, - const VectorXd upper, double init_step, - double zero_pert, double alpha, double gamma, - double rho, double sigma, int max_iter, double tol_std, - bool adaptive, Args &&...args) { +std::tuple +NelderMead(Func F, const VectorXd &x, const VectorXd &lower, + const VectorXd upper, double init_step, double zero_pert, + double alpha, double gamma, double rho, double sigma, int max_iter, + double tol_std, bool adaptive, Args &&...args) { auto x0 = Clamp(x, lower, upper); auto n = x0.size(); if (adaptive) { diff --git a/nbs/src/theta.ipynb b/nbs/src/theta.ipynb index f9bc07259..bd86b92b4 100644 --- a/nbs/src/theta.ipynb +++ b/nbs/src/theta.ipynb @@ -19,22 +19,15 @@ "source": [ "#| export\n", "import math\n", - "from typing import Tuple\n", "\n", "import numpy as np\n", - "from numba import njit\n", "from scipy.stats import norm\n", "from statsmodels.tsa.seasonal import seasonal_decompose\n", "from statsmodels.tsa.stattools import acf\n", "\n", - "from statsforecast.utils import (\n", - " CACHE,\n", - " NOGIL,\n", - " _repeat_val_seas,\n", - " _seasonal_naive,\n", - " restrict_to_bounds,\n", - " results,\n", - ")" + "from statsforecast._lib import theta as _theta\n", + "from statsforecast.arima import is_constant\n", + "from statsforecast.utils import _repeat_val_seas, _seasonal_naive, results" ] }, { @@ -45,33 +38,6 @@ "# Theta Model" ] }, - { - "cell_type": "markdown", - "id": "03e43d2e-6341-4efd-81d6-b4ee8cf268e4", - "metadata": {}, - "source": [ - "## thetacalc" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "1b54e61d-3ddc-44c4-9b6e-6b3bd2aa3bd7", - "metadata": {}, - "outputs": [], - "source": [ - "#| exporti\n", - "# Global variables \n", - "STM = 0\n", - "OTM = 1\n", - "DSTM = 2\n", - "DOTM = 3\n", - "TOL = 1.0e-10\n", - "HUGEN = 1.0e10\n", - "NA = -99999.0\n", - "smalno = np.finfo(float).eps" - ] - }, { "cell_type": "code", "execution_count": null, @@ -80,44 +46,13 @@ "outputs": [], "source": [ "#| hide\n", - "from fastcore.test import test_eq\n", "from statsforecast.utils import AirPassengers as ap" ] }, { "cell_type": "code", "execution_count": null, - "id": "b45ce636-f541-4f8b-bb6d-b27a6586d65f", - "metadata": {}, - "outputs": [], - "source": [ - "#| exporti\n", - "@njit(nogil=NOGIL, cache=CACHE)\n", - "def initstate(y, modeltype, initial_smoothed, alpha, theta):\n", - " states = np.zeros((1, 5), dtype=np.float32)\n", - " states[0, 0] = alpha * y[0] + (1 - alpha) * initial_smoothed # level\n", - " states[0, 1] = y[0] #mean y\n", - " if modeltype in [DSTM, DOTM]:\n", - " # dynamic models\n", - " states[0, 2] = y[0] # An\n", - " states[0, 3] = 0 # Bn\n", - " states[0, 4] = y[0] # mu\n", - " else:\n", - " # nodynamic models\n", - " n = len(y)\n", - " Bn = 6 * (2 * np.mean(np.arange(1, n + 1) * y) - (1 + n) * np.mean(y)) / ( n ** 2 - 1)\n", - " An = np.mean(y) - (n + 1) * Bn / 2\n", - " states[0, 2] = An\n", - " states[0, 3] = Bn\n", - " states[0, 4] = initial_smoothed + (1 - 1 / theta) * (An + Bn)\n", - " \n", - " return states" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "04033d29-19eb-4006-8886-79a701deaf8b", + "id": "7c3e561e-cffc-4d21-ab04-f142c82b84a1", "metadata": {}, "outputs": [], "source": [ @@ -125,150 +60,16 @@ "initial_smoothed = ap[0] / 2\n", "alpha = 0.5\n", "theta = 2\n", - "initstate(ap, modeltype=STM, initial_smoothed=initial_smoothed, alpha=alpha, theta=theta)\n", - "initstate(ap, modeltype=OTM, initial_smoothed=initial_smoothed, alpha=alpha, theta=theta)\n", - "initstate(ap, modeltype=DSTM, initial_smoothed=initial_smoothed, alpha=alpha, theta=theta)\n", - "initstate(ap, modeltype=DOTM, initial_smoothed=initial_smoothed, alpha=alpha, theta=theta)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "467c7ef6-267d-4d6a-9268-55a0c1ca24f6", - "metadata": {}, - "outputs": [], - "source": [ - "#| exporti\n", - "@njit(nogil=NOGIL, cache=CACHE)\n", - "def thetacalc(y: np.ndarray,\n", - " states: np.ndarray, # states\n", - " modeltype: int, \n", - " initial_smoothed: float, \n", - " alpha: float,\n", - " theta: float, \n", - " e: np.ndarray, \n", - " amse: np.ndarray, \n", - " nmse: int) -> float:\n", - " denom = np.zeros(nmse)\n", - " f = np.zeros(nmse)\n", - " # update first state\n", - " states[0, :] = initstate(y=y, modeltype=modeltype, \n", - " initial_smoothed=initial_smoothed, \n", - " alpha=alpha, theta=theta) \n", - " \n", - " amse[:nmse] = 0.\n", - " e[0] = y[0] - states[0, 4]\n", - " n = len(y)\n", - " for i in range(1, n):\n", - " # one step forecast \n", - " thetafcst(states=states, i=i, modeltype=modeltype, f=f, h=nmse, alpha=alpha, theta=theta)\n", - " if math.fabs(f[0] - NA) < TOL:\n", - " mse = NA\n", - " return mse\n", - " e[i] = y[i] - f[0]\n", - " for j in range(nmse):\n", - " if (i + j) < n:\n", - " denom[j] += 1.\n", - " tmp = y[i + j] - f[j]\n", - " amse[j] = (amse[j] * (denom[j] - 1.0) + (tmp * tmp)) / denom[j]\n", - " # update state\n", - " thetaupdate(states=states, i=i, modeltype=modeltype, \n", - " alpha=alpha, theta=theta, y=y[i], usemu=0)\n", - " mean_y = np.mean(np.abs(y))\n", - " if math.fabs(mean_y - 0.) < TOL:\n", - " mean_y = TOL\n", - " mse = np.sum(e[3:] ** 2) / mean_y\n", - " return mse" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e2d34a68-2641-4eeb-b025-17f07472bbdc", - "metadata": {}, - "outputs": [], - "source": [ - "#| exporti\n", - "@njit(nogil=NOGIL, cache=CACHE)\n", - "def thetafcst(states, i, \n", - " modeltype, \n", - " f, h, \n", - " alpha, theta):\n", - " # obs:\n", - " # forecast are obtained in a recursive manner\n", - " # this is not standard, for example in ets\n", - " #forecasts\n", - " new_states = np.zeros((i + h, states.shape[1]), dtype=np.float32)\n", - " new_states[:i] = states[:i]\n", - " for i_h in range(h):\n", - " thetaupdate(states=new_states, i=i + i_h, modeltype=modeltype, \n", - " alpha=alpha, theta=theta, y=0, usemu=1)\n", - " f[i_h] = new_states[i + i_h, 4] # mu is the forecast" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "5a88b4c7-fad0-489f-b7c3-eb0e6db34f12", - "metadata": {}, - "outputs": [], - "source": [ - "#| exporti\n", - "@njit(nogil=NOGIL, cache=CACHE)\n", - "def thetaupdate(states, i,\n", - " modeltype, # kind of model \n", - " alpha, theta,\n", - " y, usemu):\n", - " # states\n", - " # level, meany, An, Bn, mu\n", - " # get params\n", - " level = states[i - 1, 0]\n", - " meany = states[i - 1, 1]\n", - " An = states[i - 1, 2]\n", - " Bn = states[i - 1, 3]\n", - " # update mu\n", - " states[i, 4] = level + (1 - 1 / theta) * (An * ((1 - alpha) ** i) + Bn * (1 - (1 - alpha)**(i + 1)) / alpha)\n", - " if usemu:\n", - " y = states[i, 4]\n", - " # update level\n", - " states[i, 0] = alpha * y + (1 - alpha) * level\n", - " # update meany\n", - " states[i, 1] = (i * meany + y) / (i + 1)\n", - " # update Bn and An\n", - " if modeltype in [DSTM, DOTM]:\n", - " # dynamic models\n", - " states[i, 3] = ((i - 1) * Bn + 6 * (y - meany) / (i + 1)) / (i + 2)\n", - " states[i, 2] = states[i, 1] - states[i, 3] * (i + 2) / 2\n", - " else:\n", - " states[i, 2] = An\n", - " states[i, 3] = Bn\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6ff6588c-b9fd-4892-bf66-cac0ebe4a47c", - "metadata": {}, - "outputs": [], - "source": [ - "#| exporti\n", - "@njit(nogil=NOGIL, cache=CACHE)\n", - "def thetaforecast(states, n, modeltype, \n", - " f, h, alpha, theta):\n", - " # compute forecasts\n", - " new_states = thetafcst(\n", - " states=states, i=n, modeltype=modeltype, \n", - " f=f, h=h, \n", - " alpha=alpha,\n", - " theta=theta\n", - " ) \n", - " return new_states" + "_theta.init_state(ap, _theta.ModelType.STM, initial_smoothed, alpha, theta)\n", + "_theta.init_state(ap, _theta.ModelType.OTM, initial_smoothed, alpha, theta)\n", + "_theta.init_state(ap, _theta.ModelType.DSTM, initial_smoothed, alpha, theta)\n", + "_theta.init_state(ap, _theta.ModelType.DOTM, initial_smoothed, alpha, theta)" ] }, { "cell_type": "code", "execution_count": null, - "id": "2585519c-6c14-45d3-9b28-ac4ef984d55e", + "id": "e92decc0-aa29-46ff-bf09-a6f7b2d8fa9e", "metadata": {}, "outputs": [], "source": [ @@ -280,19 +81,24 @@ "initial_smoothed = ap[0] / 2\n", "alpha = 0.5\n", "theta = 2.\n", - "init_states = np.zeros((len(ap), 5), dtype=np.float32)\n", - "mse = thetacalc(\n", - " y=ap,\n", - " states=init_states, \n", - " modeltype=STM, \n", - " initial_smoothed=initial_smoothed, alpha=alpha, theta=theta,\n", - " e=e_, amse=amse_, nmse=3\n", + "init_states = np.zeros((len(ap), 5))\n", + "mse = _theta.calc(\n", + " ap,\n", + " init_states, \n", + " _theta.ModelType.STM, \n", + " initial_smoothed,\n", + " alpha,\n", + " theta,\n", + " e_,\n", + " amse_,\n", + " 3,\n", ")\n", "#verify we recover the fitted values\n", "np.testing.assert_array_equal(\n", " ap - e_,\n", " init_states[:, -1]\n", ")\n", + "\n", "#verify we get same fitted values than R\n", "# use stm(AirPassengers, s=F, estimation=F, h = 12)\n", "# to recover\n", @@ -301,18 +107,20 @@ " np.array([101.1550, 107.9061, 449.1692]), \n", " decimal=2\n", ")\n", + "\n", "# recover mse\n", - "test_eq(np.sum(e_[3:] ** 2) / np.mean(np.abs(ap)), mse)\n", + "assert math.isclose(np.sum(e_[3:] ** 2) / np.mean(np.abs(ap)), mse)\n", "\n", "# test forecasts\n", "h = 5\n", - "fcsts = np.zeros(h, dtype=np.float32)\n", - "thetaforecast(\n", - " states=init_states, n=len(ap), \n", - " modeltype=STM, \n", - " f=fcsts, h=h, \n", - " alpha=alpha,\n", - " theta=theta\n", + "fcsts = np.zeros(h)\n", + "_theta.forecast(\n", + " init_states,\n", + " len(ap), \n", + " _theta.ModelType.STM, \n", + " fcsts,\n", + " alpha,\n", + " theta,\n", ")\n", "# test same forecast than R's\n", "np.testing.assert_array_almost_equal(\n", @@ -325,7 +133,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b4aa2268-6181-4870-813d-ef26e9c75605", + "id": "600c15d8-a49c-493e-9753-a8f59bfc84b3", "metadata": {}, "outputs": [], "source": [ @@ -337,13 +145,17 @@ "initial_smoothed = ap[0] / 2\n", "alpha = 0.5\n", "theta = 2.\n", - "init_states = np.zeros((len(ap), 5), dtype=np.float32)\n", - "mse = thetacalc(\n", - " y=ap,\n", - " states=init_states, \n", - " modeltype=OTM, \n", - " initial_smoothed=initial_smoothed, alpha=alpha, theta=theta,\n", - " e=e_, amse=amse_, nmse=3\n", + "init_states = np.zeros((len(ap), 5))\n", + "mse = _theta.calc(\n", + " ap,\n", + " init_states, \n", + " _theta.ModelType.OTM, \n", + " initial_smoothed,\n", + " alpha,\n", + " theta,\n", + " e_,\n", + " amse_,\n", + " 3,\n", ")\n", "#verify we recover the fitted values\n", "np.testing.assert_array_equal(\n", @@ -359,17 +171,18 @@ " decimal=2\n", ")\n", "# recover mse\n", - "test_eq(np.sum(e_[3:] ** 2) / np.mean(np.abs(ap)), mse)\n", + "assert math.isclose(np.sum(e_[3:] ** 2) / np.mean(np.abs(ap)), mse)\n", "\n", "# test forecasts\n", "h = 5\n", - "fcsts = np.zeros(h, dtype=np.float32)\n", - "thetaforecast(\n", - " states=init_states, n=len(ap), \n", - " modeltype=OTM, \n", - " f=fcsts, h=h, \n", - " alpha=alpha,\n", - " theta=theta\n", + "fcsts = np.zeros(h)\n", + "_theta.forecast(\n", + " init_states,\n", + " len(ap), \n", + " _theta.ModelType.OTM, \n", + " fcsts,\n", + " alpha,\n", + " theta,\n", ")\n", "# test same forecast than R's\n", "np.testing.assert_array_almost_equal(\n", @@ -382,7 +195,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e98a7f30-bffa-4f14-828b-af1da9fe5e47", + "id": "1ee39aac-0500-4207-b440-dd36807675a3", "metadata": {}, "outputs": [], "source": [ @@ -394,13 +207,17 @@ "initial_smoothed = ap[0] / 2\n", "alpha = 0.5\n", "theta = 2.\n", - "init_states = np.zeros((len(ap), 5), dtype=np.float32)\n", - "mse = thetacalc(\n", - " y=ap,\n", - " states=init_states, \n", - " modeltype=DSTM, \n", - " initial_smoothed=initial_smoothed, alpha=alpha, theta=theta,\n", - " e=e_, amse=amse_, nmse=3\n", + "init_states = np.zeros((len(ap), 5))\n", + "mse = _theta.calc(\n", + " ap,\n", + " init_states, \n", + " _theta.ModelType.DSTM, \n", + " initial_smoothed,\n", + " alpha,\n", + " theta,\n", + " e_,\n", + " amse_,\n", + " 3,\n", ")\n", "#verify we recover the fitted values\n", "np.testing.assert_array_equal(\n", @@ -416,17 +233,18 @@ " decimal=2\n", ")\n", "# recover mse\n", - "test_eq(np.sum(e_[3:] ** 2) / np.mean(np.abs(ap)), mse)\n", + "assert math.isclose(np.sum(e_[3:] ** 2) / np.mean(np.abs(ap)), mse)\n", "\n", "# test forecasts\n", "h = 5\n", - "fcsts = np.zeros(h, dtype=np.float32)\n", - "thetaforecast(\n", - " states=init_states, n=len(ap), \n", - " modeltype=DSTM, \n", - " f=fcsts, h=h, \n", - " alpha=alpha,\n", - " theta=theta\n", + "fcsts = np.zeros(h)\n", + "_theta.forecast(\n", + " init_states,\n", + " len(ap), \n", + " _theta.ModelType.DSTM, \n", + " fcsts,\n", + " alpha,\n", + " theta\n", ")\n", "# test same forecast than R's\n", "np.testing.assert_array_almost_equal(\n", @@ -439,7 +257,7 @@ { "cell_type": "code", "execution_count": null, - "id": "3f69faca-9587-4b81-b58a-4a6b0e7e2378", + "id": "ce095f83-4a1d-4c60-a931-5e3f366833a0", "metadata": {}, "outputs": [], "source": [ @@ -451,13 +269,17 @@ "initial_smoothed = ap[0] / 2\n", "alpha = 0.5\n", "theta = 2.\n", - "init_states = np.zeros((len(ap), 5), dtype=np.float32)\n", - "mse = thetacalc(\n", - " y=ap,\n", - " states=init_states, \n", - " modeltype=DOTM, \n", - " initial_smoothed=initial_smoothed, alpha=alpha, theta=theta,\n", - " e=e_, amse=amse_, nmse=3\n", + "init_states = np.zeros((len(ap), 5))\n", + "mse = _theta.calc(\n", + " ap,\n", + " init_states, \n", + " _theta.ModelType.DOTM, \n", + " initial_smoothed,\n", + " alpha,\n", + " theta,\n", + " e_,\n", + " amse_,\n", + " 3\n", ")\n", "#verify we recover the fitted values\n", "np.testing.assert_array_equal(\n", @@ -473,17 +295,18 @@ " decimal=2\n", ")\n", "# recover mse\n", - "test_eq(np.sum(e_[3:] ** 2) / np.mean(np.abs(ap)), mse)\n", + "assert math.isclose(np.sum(e_[3:] ** 2) / np.mean(np.abs(ap)), mse)\n", "\n", "# test forecasts\n", "h = 5\n", - "fcsts = np.zeros(h, dtype=np.float32)\n", - "thetaforecast(\n", - " states=init_states, n=len(ap), \n", - " modeltype=DOTM, \n", - " f=fcsts, h=h, \n", - " alpha=alpha,\n", - " theta=theta\n", + "fcsts = np.zeros(h)\n", + "_theta.forecast(\n", + " init_states,\n", + " len(ap), \n", + " _theta.ModelType.DOTM, \n", + " fcsts,\n", + " alpha,\n", + " theta\n", ")\n", "# test same forecast than R's\n", "np.testing.assert_array_almost_equal(\n", @@ -501,42 +324,50 @@ "outputs": [], "source": [ "#| exporti\n", - "@njit(nogil=NOGIL, cache=CACHE)\n", - "def initparamtheta(initial_smoothed: float, alpha: float, theta: float,\n", - " y: np.ndarray,\n", - " modeltype: str):\n", - " if modeltype in ['STM', 'DSTM']:\n", - " if np.isnan(initial_smoothed):\n", + "def initparamtheta(\n", + " initial_smoothed: float,\n", + " alpha: float,\n", + " theta: float,\n", + " y: np.ndarray,\n", + " modeltype: _theta.ModelType,\n", + "):\n", + " if modeltype in [_theta.ModelType.STM, _theta.ModelType.DSTM]:\n", + " if math.isnan(initial_smoothed):\n", " initial_smoothed = y[0] / 2\n", - " optimize_level = 1\n", + " optimize_level = True\n", " else:\n", - " optimize_level = 0\n", - " if np.isnan(alpha):\n", + " optimize_level = False\n", + " if math.isnan(alpha):\n", " alpha = 0.5\n", - " optimize_alpha = 1\n", + " optimize_alpha = True\n", " else:\n", - " optimize_alpha = 0\n", - " theta = 2. # no optimize\n", - " optimize_theta = 0\n", - " elif modeltype in ['OTM', 'DOTM']:\n", - " if np.isnan(initial_smoothed):\n", + " optimize_alpha = False\n", + " theta = 2.0 # no optimize\n", + " optimize_theta = False\n", + " else:\n", + " if math.isnan(initial_smoothed):\n", " initial_smoothed = y[0] / 2\n", - " optimize_level = 1\n", + " optimize_level = True\n", " else:\n", - " optimize_level = 0\n", - " if np.isnan(alpha):\n", + " optimize_level = False\n", + " if math.isnan(alpha):\n", " alpha = 0.5\n", - " optimize_alpha = 1\n", + " optimize_alpha = True\n", " else:\n", - " optimize_alpha = 0\n", - " if np.isnan(theta):\n", - " theta = 2.\n", - " optimize_theta = 1\n", + " optimize_alpha = False\n", + " if math.isnan(theta):\n", + " theta = 2.0\n", + " optimize_theta = True\n", " else:\n", - " optimize_theta = 0\n", - " return {'initial_smoothed': initial_smoothed, 'optimize_initial_smoothed': optimize_level,\n", - " 'alpha': alpha, 'optimize_alpha': optimize_alpha,\n", - " 'theta': theta, 'optimize_theta': optimize_theta}" + " optimize_theta = False\n", + " return {\n", + " 'initial_smoothed': initial_smoothed,\n", + " 'optimize_initial_smoothed': optimize_level,\n", + " 'alpha': alpha,\n", + " 'optimize_alpha': optimize_alpha,\n", + " 'theta': theta,\n", + " 'optimize_theta': optimize_theta,\n", + " }" ] }, { @@ -547,28 +378,39 @@ "outputs": [], "source": [ "#| hide\n", - "initparamtheta(initial_smoothed=np.nan, alpha=np.nan, theta=np.nan,\n", - " y=ap,\n", - " modeltype='DOTM')" + "initparamtheta(\n", + " initial_smoothed=np.nan,\n", + " alpha=np.nan,\n", + " theta=np.nan,\n", + " y=ap,\n", + " modeltype=_theta.ModelType.DOTM,\n", + ")" ] }, { "cell_type": "code", "execution_count": null, - "id": "ca89edd3-1d38-4a8d-b931-cde5b8f192d8", + "id": "6c95a65d-0eff-4196-963d-648a8f1cf768", "metadata": {}, "outputs": [], "source": [ "#| exporti\n", - "@njit(nogil=NOGIL, cache=CACHE)\n", - "def switch_theta(x: str):\n", - " return {'STM': 0, 'OTM': 1, 'DSTM': 2, 'DOTM': 3}[x]" + "def switch_theta(model: str) -> _theta.ModelType:\n", + " if model == 'STM':\n", + " return _theta.ModelType.STM\n", + " if model == 'OTM':\n", + " return _theta.ModelType.OTM\n", + " if model == 'DSTM':\n", + " return _theta.ModelType.DSTM\n", + " if model == 'DOTM':\n", + " return _theta.ModelType.DOTM\n", + " raise ValueError(f'Invalid model type: {model}.')" ] }, { "cell_type": "code", "execution_count": null, - "id": "a3f204a3-15e4-4d37-a571-824f6855c9bd", + "id": "e5d7cb36-af32-4028-9be0-05274c7bcc00", "metadata": {}, "outputs": [], "source": [ @@ -576,209 +418,6 @@ "switch_theta('STM')" ] }, - { - "cell_type": "code", - "execution_count": null, - "id": "ad1e5d88-c650-4780-9926-41b58cf2710b", - "metadata": {}, - "outputs": [], - "source": [ - "#| exporti\n", - "@njit(nogil=NOGIL, cache=CACHE)\n", - "def pegelsresid_theta(y: np.ndarray, \n", - " modeltype: str, \n", - " initial_smoothed: float, alpha: float,\n", - " theta: float, \n", - " nmse: int):\n", - " states = np.zeros((len(y), 5), dtype=np.float32)\n", - " e = np.full_like(y, fill_value=np.nan)\n", - " amse = np.full(nmse, fill_value=np.nan)\n", - " mse = thetacalc(y=y, states=states, \n", - " modeltype=switch_theta(modeltype), \n", - " initial_smoothed=initial_smoothed, alpha=alpha, theta=theta, \n", - " e=e, amse=amse, nmse=nmse)\n", - " if not np.isnan(mse):\n", - " if np.abs(mse + 99999) < 1e-7:\n", - " mse = np.nan\n", - " return amse, e, states, mse" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6ffc4052-c1d0-4350-a55d-1482c164907c", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "@njit(nogil=NOGIL, cache=CACHE)\n", - "def theta_target_fn(\n", - " optimal_param,\n", - " init_level,\n", - " init_alpha,\n", - " init_theta,\n", - " opt_level,\n", - " opt_alpha,\n", - " opt_theta,\n", - " y,\n", - " modeltype,\n", - " nmse\n", - " ):\n", - " states = np.zeros((len(y), 5), dtype=np.float32)\n", - " j = 0\n", - " if opt_level:\n", - " level = optimal_param[j]\n", - " j+=1\n", - " else:\n", - " level = init_level\n", - " \n", - " if opt_alpha:\n", - " alpha = optimal_param[j]\n", - " j+=1\n", - " else:\n", - " alpha = init_alpha\n", - " \n", - " if opt_theta:\n", - " theta = optimal_param[j]\n", - " j+=1\n", - " else:\n", - " theta = init_theta\n", - " \n", - " e = np.full_like(y, fill_value=np.nan)\n", - " amse = np.full(nmse, fill_value=np.nan)\n", - " mse = thetacalc(y=y, states=states, \n", - " modeltype=switch_theta(modeltype), \n", - " initial_smoothed=level, alpha=alpha, theta=theta, \n", - " e=e, amse=amse, nmse=nmse)\n", - " if mse < -1e10: \n", - " mse = -1e10 \n", - " if math.isnan(mse): \n", - " mse = -np.inf\n", - " if math.fabs(mse + 99999) < 1e-7: \n", - " mse = -np.inf\n", - " return mse" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "e11e1340-8cf9-4e2e-a76e-bcfb43885622", - "metadata": {}, - "outputs": [], - "source": [ - "#| exporti\n", - "@njit(nogil=NOGIL, cache=CACHE)\n", - "def nelder_mead_theta(\n", - " x0: np.ndarray, \n", - " args: Tuple = (), \n", - " lower: np.ndarray = np.empty(0), \n", - " upper: np.ndarray = np.empty(0), \n", - " init_step: float = 0.05,\n", - " zero_pert: float = 0.0001,\n", - " alpha: float = 1.,\n", - " gamma: float = 2.,\n", - " rho: float = 0.5,\n", - " sigma: float = 0.5,\n", - " max_iter: int = 2_000,\n", - " tol_std: float = 1e-10,\n", - " adaptive: bool = False,\n", - " ):\n", - " #We are trying to minimize the function fn(x, args)\n", - " #with initial point x0.\n", - " #Step 0:\n", - " # get x1, ..., x_{n+1}\n", - " # the original article suggested a simplex where an initial point is given as x0\n", - " # with the others generated with a fixed step along each dimension in turn.\n", - " bounds = len(lower) and len(upper)\n", - " if bounds:\n", - " x0 = restrict_to_bounds(x0, lower, upper)\n", - " \n", - " n = x0.size\n", - " if adaptive:\n", - " gamma = 1. + 2. / n\n", - " rho = 0.75 - 1. / (2. * n)\n", - " sigma = 1. - 1. / n\n", - " simplex = np.full((n + 1, n), fill_value=np.nan, dtype=np.float64) #each row is x_j\n", - " simplex[:] = x0\n", - " # perturb simplex using `init_step`\n", - " diag = np.copy(np.diag(simplex))\n", - " diag[diag == 0.] = zero_pert\n", - " diag[diag != 0.] *= (1 + init_step)\n", - " np.fill_diagonal(simplex, diag)\n", - " # restrict simplex to bounds if passed\n", - " if bounds:\n", - " for j in range(n + 1):\n", - " simplex[j] = restrict_to_bounds(simplex[j], lower, upper)\n", - " # array of the value of f\n", - " f_simplex = np.full(n + 1, fill_value=np.nan)\n", - " for j in range(n + 1):\n", - " f_simplex[j] = theta_target_fn(simplex[j], *args)\n", - " for it in range(max_iter):\n", - " #Step1: order of f_simplex\n", - " #print(simplex)\n", - " #print(f_simplex)\n", - " order_f = f_simplex.argsort()\n", - " best_idx = order_f[0]\n", - " worst_idx = order_f[-1]\n", - " second_worst_idx = order_f[-2]\n", - " #Check whether method should stop.\n", - " if np.std(f_simplex) < tol_std:\n", - " break\n", - " #calculate centroid except argmax f_simplex\n", - " x_o = simplex[np.delete(order_f, -1)].sum(axis=0) / n\n", - " #Step2: Reflection, Compute reflected point\n", - " x_r = x_o + alpha * (x_o - simplex[worst_idx])\n", - " # restrict x_r to bounds if passed\n", - " if bounds:\n", - " x_r = restrict_to_bounds(x_r, lower, upper)\n", - " f_r = theta_target_fn(x_r, *args)\n", - " if f_simplex[best_idx] <= f_r < f_simplex[second_worst_idx]:\n", - " simplex[worst_idx] = x_r\n", - " f_simplex[worst_idx] = f_r\n", - " continue\n", - " #Step3: Expansion, reflected point is the best point so far\n", - " if f_r < f_simplex[best_idx]:\n", - " x_e = x_o + gamma * (x_r - x_o)\n", - " # restrict x_e to bounds if passed\n", - " if bounds:\n", - " x_e = restrict_to_bounds(x_e, lower, upper)\n", - " f_e = theta_target_fn(x_e, *args)\n", - " if f_e < f_r:\n", - " simplex[worst_idx] = x_e\n", - " f_simplex[worst_idx] = f_e\n", - " else:\n", - " simplex[worst_idx] = x_r\n", - " f_simplex[worst_idx] = f_r\n", - " continue\n", - " #Step4: outside Contraction\n", - " if f_simplex[second_worst_idx] <= f_r < f_simplex[worst_idx]:\n", - " x_oc = x_o + rho * (x_r - x_o)\n", - " if bounds:\n", - " x_oc = restrict_to_bounds(x_oc, lower, upper)\n", - " f_oc = theta_target_fn(x_oc, *args)\n", - " if f_oc <= f_r:\n", - " simplex[worst_idx] = x_oc\n", - " f_simplex[worst_idx] = f_oc\n", - " continue\n", - " #step 5 inside contraction\n", - " else:\n", - " x_ic = x_o - rho * (x_r - x_o)\n", - " # restrict x_c to bounds if passed\n", - " if bounds:\n", - " x_ic = restrict_to_bounds(x_ic, lower, upper)\n", - " f_ic = theta_target_fn(x_ic, *args)\n", - " if f_ic < f_simplex[worst_idx]:\n", - " simplex[worst_idx] = x_ic\n", - " f_simplex[worst_idx] = f_ic\n", - " continue\n", - " #step 6: shrink\n", - " simplex[np.delete(order_f, 0)] = simplex[best_idx] + sigma * (simplex[np.delete(order_f, 0)] - simplex[best_idx])\n", - " for i in np.delete(order_f, 0):\n", - " simplex[i] = restrict_to_bounds(simplex[i], lower, upper)\n", - " f_simplex[i] = theta_target_fn(simplex[i], *args)\n", - " return results(simplex[best_idx], f_simplex[best_idx], it + 1, simplex)" - ] - }, { "cell_type": "code", "execution_count": null, @@ -788,14 +427,36 @@ "source": [ "#| exporti\n", "def optimize_theta_target_fn(\n", - " init_par, optimize_params, y, \n", - " modeltype, nmse\n", - " ):\n", - " x0 = [init_par[key] for key, val in optimize_params.items() if val]\n", - " x0 = np.array(x0, dtype=np.float32)\n", - " if not len(x0):\n", + " init_par: dict[str, float],\n", + " optimize_params: dict[str, bool],\n", + " y: np.ndarray, \n", + " modeltype: _theta.ModelType,\n", + " nmse: int,\n", + "):\n", + " x0 = []\n", + " lower = []\n", + " upper = []\n", + " lower_bounds = {\n", + " 'initial_smoothed': -1e10,\n", + " 'alpha': 0.1,\n", + " 'theta': 1.0,\n", + " }\n", + " upper_bounds = {\n", + " 'initial_smoothed': 1e10,\n", + " 'alpha': 0.99,\n", + " 'theta': 1e10,\n", + " }\n", + " for param, optim in optimize_params.items():\n", + " if optim:\n", + " x0.append(init_par[param])\n", + " lower.append(lower_bounds[param])\n", + " upper.append(upper_bounds[param])\n", + " if not x0:\n", " return\n", - " \n", + " x0 = np.array(x0)\n", + " lower = np.array(lower)\n", + " upper = np.array(upper)\n", + "\n", " init_level = init_par['initial_smoothed']\n", " init_alpha = init_par['alpha']\n", " init_theta = init_par['theta']\n", @@ -803,50 +464,22 @@ " opt_level = optimize_params['initial_smoothed']\n", " opt_alpha = optimize_params['alpha']\n", " opt_theta = optimize_params['theta']\n", - " \n", - " res = nelder_mead_theta(\n", - " x0, \n", - " args=(\n", - " init_level,\n", - " init_alpha,\n", - " init_theta,\n", - " opt_level,\n", - " opt_alpha,\n", - " opt_theta,\n", - " y,\n", - " modeltype,\n", - " nmse\n", - " ),\n", - " tol_std=1e-4, \n", - " lower=np.array([-1e10, 0.1, 1.0]),\n", - " upper=np.array([1e10, 0.99, 1e10]),\n", - " max_iter=1_000,\n", - " adaptive=True,\n", + "\n", + " opt_res = _theta.optimize(\n", + " x0,\n", + " lower,\n", + " upper,\n", + " init_level,\n", + " init_alpha,\n", + " init_theta,\n", + " opt_level,\n", + " opt_alpha,\n", + " opt_theta,\n", + " y,\n", + " modeltype,\n", + " nmse\n", " )\n", - " return res" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f9172367-0b40-4e54-9fbb-aaad181d865e", - "metadata": {}, - "outputs": [], - "source": [ - "#| export\n", - "@njit(nogil=NOGIL, cache=CACHE)\n", - "def is_constant(x):\n", - " return np.all(x[0] == x)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6ab84bc3-22be-4489-a140-dcd974313a94", - "metadata": {}, - "outputs": [], - "source": [ - "is_constant(ap)" + " return results(*opt_res, None) # type: ignore" ] }, { @@ -858,26 +491,39 @@ "source": [ "#| exporti\n", "def thetamodel(\n", - " y: np.ndarray, m: int, \n", - " modeltype: str, \n", - " initial_smoothed: float, alpha: float,\n", - " theta: float, nmse: int\n", - " ):\n", + " y: np.ndarray,\n", + " m: int, \n", + " modeltype: str, \n", + " initial_smoothed: float,\n", + " alpha: float,\n", + " theta: float,\n", + " nmse: int\n", + "):\n", + " y = y.astype(np.float64, copy=False)\n", + " model_type = switch_theta(modeltype)\n", " #initial parameters\n", - " par = initparamtheta(initial_smoothed=initial_smoothed, \n", - " alpha=alpha, theta=theta, \n", - " y=y, modeltype=modeltype)\n", + " par = initparamtheta(\n", + " initial_smoothed=initial_smoothed, \n", + " alpha=alpha,\n", + " theta=theta, \n", + " y=y,\n", + " modeltype=model_type,\n", + " )\n", " optimize_params = {key.replace('optimize_', ''): val for key, val in par.items() if 'optim' in key}\n", " par = {key: val for key, val in par.items() if 'optim' not in key}\n", " # parameter optimization\n", " fred = optimize_theta_target_fn(\n", - " init_par=par, optimize_params=optimize_params, y=y, \n", - " modeltype=modeltype, nmse=nmse\n", + " init_par=par,\n", + " optimize_params=optimize_params,\n", + " y=y, \n", + " modeltype=model_type,\n", + " nmse=nmse,\n", " )\n", " if fred is not None:\n", " fit_par = fred.x\n", " j = 0\n", " if optimize_params['initial_smoothed']:\n", + " par['initial_smoothed'] = fit_par[j]\n", " j += 1\n", " if optimize_params['alpha']:\n", " par['alpha'] = fit_par[j]\n", @@ -886,11 +532,15 @@ " par['theta'] = fit_par[j]\n", " j += 1\n", " \n", - " amse, e, states, mse = pegelsresid_theta(\n", - " y=y, modeltype=modeltype,\n", - " nmse=nmse, **par\n", + " amse, e, states, mse = _theta.pegels_resid(\n", + " y,\n", + " model_type,\n", + " par['initial_smoothed'],\n", + " par['alpha'],\n", + " par['theta'],\n", + " nmse,\n", " )\n", - " \n", + "\n", " return dict(mse=mse, amse=amse, fit=fred, residuals=e,\n", " m=m, states=states, par=par, n=len(y), \n", " modeltype=modeltype, mean_y=np.mean(y))" @@ -905,7 +555,9 @@ "source": [ "#| hide\n", "res = thetamodel(\n", - " y=ap, m=12, modeltype='STM',\n", + " y=ap,\n", + " m=12,\n", + " modeltype='STM',\n", " initial_smoothed=np.nan,\n", " alpha=np.nan,\n", " theta=np.nan, \n", @@ -943,19 +595,23 @@ "metadata": {}, "outputs": [], "source": [ - "#| exporti\n", + "#| export\n", "def forecast_theta(obj, h, level=None):\n", - " forecast = np.full(h, fill_value=np.nan)\n", + " forecast = np.empty(h)\n", " n = obj['n']\n", " states = obj['states']\n", " alpha=obj['par']['alpha']\n", " theta=obj['par']['theta']\n", - " thetaforecast(\n", - " states=states, n=n, modeltype=switch_theta(obj['modeltype']), \n", - " h=h, f=forecast, alpha=alpha, theta=theta\n", + " _theta.forecast(\n", + " states,\n", + " n,\n", + " switch_theta(obj['modeltype']), \n", + " forecast,\n", + " alpha,\n", + " theta,\n", " )\n", " res = {'mean': forecast}\n", - " \n", + "\n", " if level is not None:\n", " sigma = np.std(obj['residuals'][3:], ddof=1)\n", " mean_y = obj['mean_y']\n", @@ -966,7 +622,7 @@ " max_q = min_q + lv / 100\n", " res[f'lo-{lv}'] = np.quantile(samples, min_q, axis=1)\n", " res[f'hi-{lv}'] = np.quantile(samples, max_q, axis=1)\n", - " \n", + "\n", " if obj.get('decompose', False):\n", " seas_forecast = _repeat_val_seas(obj['seas_forecast']['mean'], h=h)\n", " for key in res:\n", @@ -994,14 +650,17 @@ "metadata": {}, "outputs": [], "source": [ - "#| exporti\n", + "#| export\n", "def auto_theta(\n", - " y, m, model=None, \n", - " initial_smoothed=None, alpha=None, \n", - " theta=None,\n", - " nmse=3,\n", - " decomposition_type='multiplicative'\n", - " ):\n", + " y,\n", + " m,\n", + " model=None, \n", + " initial_smoothed=None,\n", + " alpha=None, \n", + " theta=None,\n", + " nmse=3,\n", + " decomposition_type='multiplicative'\n", + "):\n", " # converting params to floats \n", " # to improve numba compilation\n", " if initial_smoothed is None:\n", @@ -1041,7 +700,7 @@ " \n", " # validate model\n", " if model not in [None, 'STM', 'OTM', 'DSTM', 'DOTM']:\n", - " raise ValueError('Invalid model type')\n", + " raise ValueError(f'Invalid model type: {model}.')\n", "\n", " n = len(y)\n", " npars = 3 \n", @@ -1064,7 +723,7 @@ " best_ic = fit_ic\n", " if np.isinf(best_ic):\n", " raise Exception('no model able to be fitted')\n", - " \n", + "\n", " if decompose:\n", " if decomposition_type == 'multiplicative':\n", " model['residuals'] = model['residuals'] * y_decompose\n", @@ -1395,7 +1054,7 @@ "metadata": {}, "outputs": [], "source": [ - "#| exporti\n", + "#| export\n", "def forward_theta(fitted_model, y):\n", " m = fitted_model['m']\n", " model = fitted_model['modeltype']\n", @@ -1417,7 +1076,7 @@ "source": [ "#| hide\n", "res = auto_theta(ap, m=12)\n", - "test_eq(\n", + "np.testing.assert_allclose(\n", " forecast_theta(forward_theta(res, ap), h=12)['mean'],\n", " forecast_theta(res, h=12)['mean']\n", ")\n", @@ -1425,7 +1084,7 @@ "forecast_theta(forward_theta(res, inttermitent_series), h=12, level=[80,90])\n", "res_transfer = forward_theta(res, inttermitent_series)\n", "for key in res_transfer['par']:\n", - " test_eq(res['par'][key], res_transfer['par'][key])" + " assert res['par'][key] == res_transfer['par'][key]" ] } ], diff --git a/python/statsforecast/_modidx.py b/python/statsforecast/_modidx.py index 857925a4f..d51e60e23 100644 --- a/python/statsforecast/_modidx.py +++ b/python/statsforecast/_modidx.py @@ -763,21 +763,10 @@ 'statsforecast.theta.forecast_theta': ('src/theta.html#forecast_theta', 'statsforecast/theta.py'), 'statsforecast.theta.forward_theta': ('src/theta.html#forward_theta', 'statsforecast/theta.py'), 'statsforecast.theta.initparamtheta': ('src/theta.html#initparamtheta', 'statsforecast/theta.py'), - 'statsforecast.theta.initstate': ('src/theta.html#initstate', 'statsforecast/theta.py'), - 'statsforecast.theta.is_constant': ('src/theta.html#is_constant', 'statsforecast/theta.py'), - 'statsforecast.theta.nelder_mead_theta': ( 'src/theta.html#nelder_mead_theta', - 'statsforecast/theta.py'), 'statsforecast.theta.optimize_theta_target_fn': ( 'src/theta.html#optimize_theta_target_fn', 'statsforecast/theta.py'), - 'statsforecast.theta.pegelsresid_theta': ( 'src/theta.html#pegelsresid_theta', - 'statsforecast/theta.py'), 'statsforecast.theta.switch_theta': ('src/theta.html#switch_theta', 'statsforecast/theta.py'), - 'statsforecast.theta.theta_target_fn': ('src/theta.html#theta_target_fn', 'statsforecast/theta.py'), - 'statsforecast.theta.thetacalc': ('src/theta.html#thetacalc', 'statsforecast/theta.py'), - 'statsforecast.theta.thetafcst': ('src/theta.html#thetafcst', 'statsforecast/theta.py'), - 'statsforecast.theta.thetaforecast': ('src/theta.html#thetaforecast', 'statsforecast/theta.py'), - 'statsforecast.theta.thetamodel': ('src/theta.html#thetamodel', 'statsforecast/theta.py'), - 'statsforecast.theta.thetaupdate': ('src/theta.html#thetaupdate', 'statsforecast/theta.py')}, + 'statsforecast.theta.thetamodel': ('src/theta.html#thetamodel', 'statsforecast/theta.py')}, 'statsforecast.utils': { 'statsforecast.utils.ConformalIntervals': ( 'src/utils.html#conformalintervals', 'statsforecast/utils.py'), 'statsforecast.utils.ConformalIntervals.__init__': ( 'src/utils.html#conformalintervals.__init__', diff --git a/python/statsforecast/theta.py b/python/statsforecast/theta.py index 9999ae6ef..545ed89b7 100644 --- a/python/statsforecast/theta.py +++ b/python/statsforecast/theta.py @@ -1,220 +1,57 @@ # AUTOGENERATED! DO NOT EDIT! File to edit: ../../nbs/src/theta.ipynb. # %% auto 0 -__all__ = ['theta_target_fn', 'is_constant'] +__all__ = ['forecast_theta', 'auto_theta', 'forward_theta'] # %% ../../nbs/src/theta.ipynb 1 import math -from typing import Tuple import numpy as np -from numba import njit from scipy.stats import norm from statsmodels.tsa.seasonal import seasonal_decompose from statsmodels.tsa.stattools import acf -from statsforecast.utils import ( - CACHE, - NOGIL, - _repeat_val_seas, - _seasonal_naive, - restrict_to_bounds, - results, -) +from ._lib import theta as _theta +from .arima import is_constant +from .utils import _repeat_val_seas, _seasonal_naive, results -# %% ../../nbs/src/theta.ipynb 4 -# Global variables -STM = 0 -OTM = 1 -DSTM = 2 -DOTM = 3 -TOL = 1.0e-10 -HUGEN = 1.0e10 -NA = -99999.0 -smalno = np.finfo(float).eps - -# %% ../../nbs/src/theta.ipynb 6 -@njit(nogil=NOGIL, cache=CACHE) -def initstate(y, modeltype, initial_smoothed, alpha, theta): - states = np.zeros((1, 5), dtype=np.float32) - states[0, 0] = alpha * y[0] + (1 - alpha) * initial_smoothed # level - states[0, 1] = y[0] # mean y - if modeltype in [DSTM, DOTM]: - # dynamic models - states[0, 2] = y[0] # An - states[0, 3] = 0 # Bn - states[0, 4] = y[0] # mu - else: - # nodynamic models - n = len(y) - Bn = ( - 6 - * (2 * np.mean(np.arange(1, n + 1) * y) - (1 + n) * np.mean(y)) - / (n**2 - 1) - ) - An = np.mean(y) - (n + 1) * Bn / 2 - states[0, 2] = An - states[0, 3] = Bn - states[0, 4] = initial_smoothed + (1 - 1 / theta) * (An + Bn) - - return states - -# %% ../../nbs/src/theta.ipynb 8 -@njit(nogil=NOGIL, cache=CACHE) -def thetacalc( - y: np.ndarray, - states: np.ndarray, # states - modeltype: int, +# %% ../../nbs/src/theta.ipynb 9 +def initparamtheta( initial_smoothed: float, alpha: float, theta: float, - e: np.ndarray, - amse: np.ndarray, - nmse: int, -) -> float: - denom = np.zeros(nmse) - f = np.zeros(nmse) - # update first state - states[0, :] = initstate( - y=y, - modeltype=modeltype, - initial_smoothed=initial_smoothed, - alpha=alpha, - theta=theta, - ) - - amse[:nmse] = 0.0 - e[0] = y[0] - states[0, 4] - n = len(y) - for i in range(1, n): - # one step forecast - thetafcst( - states=states, - i=i, - modeltype=modeltype, - f=f, - h=nmse, - alpha=alpha, - theta=theta, - ) - if math.fabs(f[0] - NA) < TOL: - mse = NA - return mse - e[i] = y[i] - f[0] - for j in range(nmse): - if (i + j) < n: - denom[j] += 1.0 - tmp = y[i + j] - f[j] - amse[j] = (amse[j] * (denom[j] - 1.0) + (tmp * tmp)) / denom[j] - # update state - thetaupdate( - states=states, - i=i, - modeltype=modeltype, - alpha=alpha, - theta=theta, - y=y[i], - usemu=0, - ) - mean_y = np.mean(np.abs(y)) - if math.fabs(mean_y - 0.0) < TOL: - mean_y = TOL - mse = np.sum(e[3:] ** 2) / mean_y - return mse - -# %% ../../nbs/src/theta.ipynb 9 -@njit(nogil=NOGIL, cache=CACHE) -def thetafcst(states, i, modeltype, f, h, alpha, theta): - # obs: - # forecast are obtained in a recursive manner - # this is not standard, for example in ets - # forecasts - new_states = np.zeros((i + h, states.shape[1]), dtype=np.float32) - new_states[:i] = states[:i] - for i_h in range(h): - thetaupdate( - states=new_states, - i=i + i_h, - modeltype=modeltype, - alpha=alpha, - theta=theta, - y=0, - usemu=1, - ) - f[i_h] = new_states[i + i_h, 4] # mu is the forecast - -# %% ../../nbs/src/theta.ipynb 10 -@njit(nogil=NOGIL, cache=CACHE) -def thetaupdate(states, i, modeltype, alpha, theta, y, usemu): # kind of model - # states - # level, meany, An, Bn, mu - # get params - level = states[i - 1, 0] - meany = states[i - 1, 1] - An = states[i - 1, 2] - Bn = states[i - 1, 3] - # update mu - states[i, 4] = level + (1 - 1 / theta) * ( - An * ((1 - alpha) ** i) + Bn * (1 - (1 - alpha) ** (i + 1)) / alpha - ) - if usemu: - y = states[i, 4] - # update level - states[i, 0] = alpha * y + (1 - alpha) * level - # update meany - states[i, 1] = (i * meany + y) / (i + 1) - # update Bn and An - if modeltype in [DSTM, DOTM]: - # dynamic models - states[i, 3] = ((i - 1) * Bn + 6 * (y - meany) / (i + 1)) / (i + 2) - states[i, 2] = states[i, 1] - states[i, 3] * (i + 2) / 2 - else: - states[i, 2] = An - states[i, 3] = Bn - -# %% ../../nbs/src/theta.ipynb 11 -@njit(nogil=NOGIL, cache=CACHE) -def thetaforecast(states, n, modeltype, f, h, alpha, theta): - # compute forecasts - new_states = thetafcst( - states=states, i=n, modeltype=modeltype, f=f, h=h, alpha=alpha, theta=theta - ) - return new_states - -# %% ../../nbs/src/theta.ipynb 16 -@njit(nogil=NOGIL, cache=CACHE) -def initparamtheta( - initial_smoothed: float, alpha: float, theta: float, y: np.ndarray, modeltype: str + y: np.ndarray, + modeltype: _theta.ModelType, ): - if modeltype in ["STM", "DSTM"]: - if np.isnan(initial_smoothed): + if modeltype in [_theta.ModelType.STM, _theta.ModelType.DSTM]: + if math.isnan(initial_smoothed): initial_smoothed = y[0] / 2 - optimize_level = 1 + optimize_level = True else: - optimize_level = 0 - if np.isnan(alpha): + optimize_level = False + if math.isnan(alpha): alpha = 0.5 - optimize_alpha = 1 + optimize_alpha = True else: - optimize_alpha = 0 + optimize_alpha = False theta = 2.0 # no optimize - optimize_theta = 0 - elif modeltype in ["OTM", "DOTM"]: - if np.isnan(initial_smoothed): + optimize_theta = False + else: + if math.isnan(initial_smoothed): initial_smoothed = y[0] / 2 - optimize_level = 1 + optimize_level = True else: - optimize_level = 0 - if np.isnan(alpha): + optimize_level = False + if math.isnan(alpha): alpha = 0.5 - optimize_alpha = 1 + optimize_alpha = True else: - optimize_alpha = 0 - if np.isnan(theta): + optimize_alpha = False + if math.isnan(theta): theta = 2.0 - optimize_theta = 1 + optimize_theta = True else: - optimize_theta = 0 + optimize_theta = False return { "initial_smoothed": initial_smoothed, "optimize_initial_smoothed": optimize_level, @@ -224,217 +61,49 @@ def initparamtheta( "optimize_theta": optimize_theta, } -# %% ../../nbs/src/theta.ipynb 18 -@njit(nogil=NOGIL, cache=CACHE) -def switch_theta(x: str): - return {"STM": 0, "OTM": 1, "DSTM": 2, "DOTM": 3}[x] - -# %% ../../nbs/src/theta.ipynb 20 -@njit(nogil=NOGIL, cache=CACHE) -def pegelsresid_theta( +# %% ../../nbs/src/theta.ipynb 11 +def switch_theta(model: str) -> _theta.ModelType: + if model == "STM": + return _theta.ModelType.STM + if model == "OTM": + return _theta.ModelType.OTM + if model == "DSTM": + return _theta.ModelType.DSTM + if model == "DOTM": + return _theta.ModelType.DOTM + raise ValueError(f"Invalid model type: {model}.") + +# %% ../../nbs/src/theta.ipynb 13 +def optimize_theta_target_fn( + init_par: dict[str, float], + optimize_params: dict[str, bool], y: np.ndarray, - modeltype: str, - initial_smoothed: float, - alpha: float, - theta: float, + modeltype: _theta.ModelType, nmse: int, ): - states = np.zeros((len(y), 5), dtype=np.float32) - e = np.full_like(y, fill_value=np.nan) - amse = np.full(nmse, fill_value=np.nan) - mse = thetacalc( - y=y, - states=states, - modeltype=switch_theta(modeltype), - initial_smoothed=initial_smoothed, - alpha=alpha, - theta=theta, - e=e, - amse=amse, - nmse=nmse, - ) - if not np.isnan(mse): - if np.abs(mse + 99999) < 1e-7: - mse = np.nan - return amse, e, states, mse - -# %% ../../nbs/src/theta.ipynb 21 -@njit(nogil=NOGIL, cache=CACHE) -def theta_target_fn( - optimal_param, - init_level, - init_alpha, - init_theta, - opt_level, - opt_alpha, - opt_theta, - y, - modeltype, - nmse, -): - states = np.zeros((len(y), 5), dtype=np.float32) - j = 0 - if opt_level: - level = optimal_param[j] - j += 1 - else: - level = init_level - - if opt_alpha: - alpha = optimal_param[j] - j += 1 - else: - alpha = init_alpha - - if opt_theta: - theta = optimal_param[j] - j += 1 - else: - theta = init_theta - - e = np.full_like(y, fill_value=np.nan) - amse = np.full(nmse, fill_value=np.nan) - mse = thetacalc( - y=y, - states=states, - modeltype=switch_theta(modeltype), - initial_smoothed=level, - alpha=alpha, - theta=theta, - e=e, - amse=amse, - nmse=nmse, - ) - if mse < -1e10: - mse = -1e10 - if math.isnan(mse): - mse = -np.inf - if math.fabs(mse + 99999) < 1e-7: - mse = -np.inf - return mse - -# %% ../../nbs/src/theta.ipynb 22 -@njit(nogil=NOGIL, cache=CACHE) -def nelder_mead_theta( - x0: np.ndarray, - args: Tuple = (), - lower: np.ndarray = np.empty(0), - upper: np.ndarray = np.empty(0), - init_step: float = 0.05, - zero_pert: float = 0.0001, - alpha: float = 1.0, - gamma: float = 2.0, - rho: float = 0.5, - sigma: float = 0.5, - max_iter: int = 2_000, - tol_std: float = 1e-10, - adaptive: bool = False, -): - # We are trying to minimize the function fn(x, args) - # with initial point x0. - # Step 0: - # get x1, ..., x_{n+1} - # the original article suggested a simplex where an initial point is given as x0 - # with the others generated with a fixed step along each dimension in turn. - bounds = len(lower) and len(upper) - if bounds: - x0 = restrict_to_bounds(x0, lower, upper) - - n = x0.size - if adaptive: - gamma = 1.0 + 2.0 / n - rho = 0.75 - 1.0 / (2.0 * n) - sigma = 1.0 - 1.0 / n - simplex = np.full( - (n + 1, n), fill_value=np.nan, dtype=np.float64 - ) # each row is x_j - simplex[:] = x0 - # perturb simplex using `init_step` - diag = np.copy(np.diag(simplex)) - diag[diag == 0.0] = zero_pert - diag[diag != 0.0] *= 1 + init_step - np.fill_diagonal(simplex, diag) - # restrict simplex to bounds if passed - if bounds: - for j in range(n + 1): - simplex[j] = restrict_to_bounds(simplex[j], lower, upper) - # array of the value of f - f_simplex = np.full(n + 1, fill_value=np.nan) - for j in range(n + 1): - f_simplex[j] = theta_target_fn(simplex[j], *args) - for it in range(max_iter): - # Step1: order of f_simplex - # print(simplex) - # print(f_simplex) - order_f = f_simplex.argsort() - best_idx = order_f[0] - worst_idx = order_f[-1] - second_worst_idx = order_f[-2] - # Check whether method should stop. - if np.std(f_simplex) < tol_std: - break - # calculate centroid except argmax f_simplex - x_o = simplex[np.delete(order_f, -1)].sum(axis=0) / n - # Step2: Reflection, Compute reflected point - x_r = x_o + alpha * (x_o - simplex[worst_idx]) - # restrict x_r to bounds if passed - if bounds: - x_r = restrict_to_bounds(x_r, lower, upper) - f_r = theta_target_fn(x_r, *args) - if f_simplex[best_idx] <= f_r < f_simplex[second_worst_idx]: - simplex[worst_idx] = x_r - f_simplex[worst_idx] = f_r - continue - # Step3: Expansion, reflected point is the best point so far - if f_r < f_simplex[best_idx]: - x_e = x_o + gamma * (x_r - x_o) - # restrict x_e to bounds if passed - if bounds: - x_e = restrict_to_bounds(x_e, lower, upper) - f_e = theta_target_fn(x_e, *args) - if f_e < f_r: - simplex[worst_idx] = x_e - f_simplex[worst_idx] = f_e - else: - simplex[worst_idx] = x_r - f_simplex[worst_idx] = f_r - continue - # Step4: outside Contraction - if f_simplex[second_worst_idx] <= f_r < f_simplex[worst_idx]: - x_oc = x_o + rho * (x_r - x_o) - if bounds: - x_oc = restrict_to_bounds(x_oc, lower, upper) - f_oc = theta_target_fn(x_oc, *args) - if f_oc <= f_r: - simplex[worst_idx] = x_oc - f_simplex[worst_idx] = f_oc - continue - # step 5 inside contraction - else: - x_ic = x_o - rho * (x_r - x_o) - # restrict x_c to bounds if passed - if bounds: - x_ic = restrict_to_bounds(x_ic, lower, upper) - f_ic = theta_target_fn(x_ic, *args) - if f_ic < f_simplex[worst_idx]: - simplex[worst_idx] = x_ic - f_simplex[worst_idx] = f_ic - continue - # step 6: shrink - simplex[np.delete(order_f, 0)] = simplex[best_idx] + sigma * ( - simplex[np.delete(order_f, 0)] - simplex[best_idx] - ) - for i in np.delete(order_f, 0): - simplex[i] = restrict_to_bounds(simplex[i], lower, upper) - f_simplex[i] = theta_target_fn(simplex[i], *args) - return results(simplex[best_idx], f_simplex[best_idx], it + 1, simplex) - -# %% ../../nbs/src/theta.ipynb 23 -def optimize_theta_target_fn(init_par, optimize_params, y, modeltype, nmse): - x0 = [init_par[key] for key, val in optimize_params.items() if val] - x0 = np.array(x0, dtype=np.float32) - if not len(x0): + x0 = [] + lower = [] + upper = [] + lower_bounds = { + "initial_smoothed": -1e10, + "alpha": 0.1, + "theta": 1.0, + } + upper_bounds = { + "initial_smoothed": 1e10, + "alpha": 0.99, + "theta": 1e10, + } + for param, optim in optimize_params.items(): + if optim: + x0.append(init_par[param]) + lower.append(lower_bounds[param]) + upper.append(upper_bounds[param]) + if not x0: return + x0 = np.array(x0) + lower = np.array(lower) + upper = np.array(upper) init_level = init_par["initial_smoothed"] init_alpha = init_par["alpha"] @@ -444,33 +113,23 @@ def optimize_theta_target_fn(init_par, optimize_params, y, modeltype, nmse): opt_alpha = optimize_params["alpha"] opt_theta = optimize_params["theta"] - res = nelder_mead_theta( + opt_res = _theta.optimize( x0, - args=( - init_level, - init_alpha, - init_theta, - opt_level, - opt_alpha, - opt_theta, - y, - modeltype, - nmse, - ), - tol_std=1e-4, - lower=np.array([-1e10, 0.1, 1.0]), - upper=np.array([1e10, 0.99, 1e10]), - max_iter=1_000, - adaptive=True, + lower, + upper, + init_level, + init_alpha, + init_theta, + opt_level, + opt_alpha, + opt_theta, + y, + modeltype, + nmse, ) - return res - -# %% ../../nbs/src/theta.ipynb 24 -@njit(nogil=NOGIL, cache=CACHE) -def is_constant(x): - return np.all(x[0] == x) + return results(*opt_res, None) # type: ignore -# %% ../../nbs/src/theta.ipynb 26 +# %% ../../nbs/src/theta.ipynb 14 def thetamodel( y: np.ndarray, m: int, @@ -480,13 +139,15 @@ def thetamodel( theta: float, nmse: int, ): + y = y.astype(np.float64, copy=False) + model_type = switch_theta(modeltype) # initial parameters par = initparamtheta( initial_smoothed=initial_smoothed, alpha=alpha, theta=theta, y=y, - modeltype=modeltype, + modeltype=model_type, ) optimize_params = { key.replace("optimize_", ""): val for key, val in par.items() if "optim" in key @@ -497,13 +158,14 @@ def thetamodel( init_par=par, optimize_params=optimize_params, y=y, - modeltype=modeltype, + modeltype=model_type, nmse=nmse, ) if fred is not None: fit_par = fred.x j = 0 if optimize_params["initial_smoothed"]: + par["initial_smoothed"] = fit_par[j] j += 1 if optimize_params["alpha"]: par["alpha"] = fit_par[j] @@ -512,7 +174,14 @@ def thetamodel( par["theta"] = fit_par[j] j += 1 - amse, e, states, mse = pegelsresid_theta(y=y, modeltype=modeltype, nmse=nmse, **par) + amse, e, states, mse = _theta.pegels_resid( + y, + model_type, + par["initial_smoothed"], + par["alpha"], + par["theta"], + nmse, + ) return dict( mse=mse, @@ -527,7 +196,7 @@ def thetamodel( mean_y=np.mean(y), ) -# %% ../../nbs/src/theta.ipynb 28 +# %% ../../nbs/src/theta.ipynb 16 def compute_pi_samples( n, h, states, sigma, alpha, theta, mean_y, seed=0, n_samples=200 ): @@ -546,21 +215,20 @@ def compute_pi_samples( A = mean_y - B * (i + 2) / 2 return samples -# %% ../../nbs/src/theta.ipynb 29 +# %% ../../nbs/src/theta.ipynb 17 def forecast_theta(obj, h, level=None): - forecast = np.full(h, fill_value=np.nan) + forecast = np.empty(h) n = obj["n"] states = obj["states"] alpha = obj["par"]["alpha"] theta = obj["par"]["theta"] - thetaforecast( - states=states, - n=n, - modeltype=switch_theta(obj["modeltype"]), - h=h, - f=forecast, - alpha=alpha, - theta=theta, + _theta.forecast( + states, + n, + switch_theta(obj["modeltype"]), + forecast, + alpha, + theta, ) res = {"mean": forecast} @@ -591,7 +259,7 @@ def forecast_theta(obj, h, level=None): res[key] = res[key] + seas_forecast return res -# %% ../../nbs/src/theta.ipynb 31 +# %% ../../nbs/src/theta.ipynb 19 def auto_theta( y, m, @@ -650,7 +318,7 @@ def auto_theta( # validate model if model not in [None, "STM", "OTM", "DSTM", "DOTM"]: - raise ValueError("Invalid model type") + raise ValueError(f"Invalid model type: {model}.") n = len(y) npars = 3 @@ -691,7 +359,7 @@ def auto_theta( model["seas_forecast"] = dict(seas_forecast) return model -# %% ../../nbs/src/theta.ipynb 41 +# %% ../../nbs/src/theta.ipynb 29 def forward_theta(fitted_model, y): m = fitted_model["m"] model = fitted_model["modeltype"] diff --git a/src/statsforecast.cpp b/src/statsforecast.cpp index edbbed6b5..1fd5b1003 100644 --- a/src/statsforecast.cpp +++ b/src/statsforecast.cpp @@ -2,18 +2,20 @@ namespace py = pybind11; -namespace ets -{ - void init(py::module_ &); +namespace ets { +void init(py::module_ &); } -namespace arima -{ - void init(py::module_ &); +namespace arima { +void init(py::module_ &); } -PYBIND11_MODULE(_lib, m) -{ +namespace theta { +void init(py::module_ &); +} + +PYBIND11_MODULE(_lib, m) { arima::init(m); ets::init(m); + theta::init(m); } diff --git a/src/theta.cpp b/src/theta.cpp new file mode 100644 index 000000000..cca71a3f8 --- /dev/null +++ b/src/theta.cpp @@ -0,0 +1,195 @@ +#include + +#include +#include +#include + +#include "nelder_mead.h" + +namespace theta { +namespace py = pybind11; +using Eigen::VectorXd; +using RowMajorMatrixXd = + Eigen::Matrix; + +enum class ModelType { STM, OTM, DSTM, DOTM }; +constexpr double HUGE_N = 1e10; +constexpr double NA = -99999.0; +constexpr double TOL = 1e-10; + +Eigen::Vector init_state(const Eigen::Ref &y, + ModelType model_type, + double initial_smoothed, double alpha, + double theta) { + double An, Bn, mu; + if (model_type == ModelType::DSTM || model_type == ModelType::DOTM) { + An = y[0]; + Bn = double{}; + mu = y[0]; + } else { + size_t n = y.size(); + double y_mean = y.array().mean(); + double weighted_avg = y.dot(VectorXd::LinSpaced(y.size(), 1, y.size())) / n; + Bn = (6 * (2 * weighted_avg - (n + 1) * y_mean)) / (n * n - 1); + An = y_mean - (n + 1) * Bn / 2; + mu = initial_smoothed + (1 - 1 / theta) * (An + Bn); + } + return {alpha * y[0] + (1 - alpha) * initial_smoothed, y[0], An, Bn, mu}; +} + +void update(Eigen::Ref states, size_t i, ModelType model_type, + double alpha, double theta, double y, bool usemu) { + double level = states(i - 1, 0); + double meany = states(i - 1, 1); + double An = states(i - 1, 2); + double Bn = states(i - 1, 3); + states(i, 4) = + level + (1 - 1 / theta) * (An * std::pow(1 - alpha, i) + + Bn * (1 - std::pow(1 - alpha, i + 1)) / alpha); + if (usemu) { + y = states(i, 4); + } + states(i, 0) = alpha * y + (1 - alpha) * level; + states(i, 1) = (i * meany + y) / (i + 1); + if (model_type == ModelType::DSTM || model_type == ModelType::DOTM) { + states(i, 3) = ((i - 1) * Bn + 6 * (y - meany) / (i + 1)) / (i + 2); + states(i, 2) = states(i, 1) - states(i, 3) * (i + 2) / 2; + } else { + states(i, 2) = An; + states(i, 3) = Bn; + } +} + +void forecast(const Eigen::Ref &states, size_t i, + ModelType model_type, Eigen::Ref f, double alpha, + double theta) { + size_t h = f.size(); + RowMajorMatrixXd new_states = RowMajorMatrixXd::Zero(i + h, states.cols()); + std::copy(states.data(), states.data() + i * states.cols(), + new_states.data()); + for (size_t j = 0; j < h; ++j) { + update(new_states, i + j, model_type, alpha, theta, double{}, true); + f[j] = new_states(i + j, 4); + } +} + +double calc(const Eigen::Ref &y, + Eigen::Ref states, ModelType model_type, + double initial_smoothed, double alpha, double theta, + Eigen::Ref e, Eigen::Ref amse, size_t nmse) { + VectorXd denom = VectorXd::Zero(nmse); + VectorXd f = VectorXd::Zero(nmse); + auto init_states = init_state(y, model_type, initial_smoothed, alpha, theta); + std::ranges::copy(init_states, states.row(0).begin()); + std::fill_n(amse.begin(), nmse, double{}); + e[0] = y[0] - states(0, 4); + size_t n = y.size(); + for (size_t i = 1; i < n; ++i) { + forecast(states, i, model_type, f, alpha, theta); + if (std::abs(f[0] - NA) < TOL) { + return NA; + } + e[i] = y[i] - f[0]; + for (size_t j = 0; j < nmse; ++j) { + if (i + j < n) { + denom[j] += 1.0; + double tmp = y[i + j] - f[j]; + amse[j] = (amse[j] * (denom[j] - 1.0) + tmp * tmp) / denom[j]; + } + } + update(states, i, model_type, alpha, theta, y[i], false); + } + double mean_y = y.array().abs().mean(); + if (mean_y < TOL) { + mean_y = TOL; + } + return e.tail(e.size() - 3).array().square().sum() / mean_y; +} + +std::tuple +pegels_resid(const Eigen::Ref &y, ModelType model_type, + double initial_smoothed, double alpha, double theta, size_t nmse) { + RowMajorMatrixXd states = RowMajorMatrixXd::Zero(y.size(), 5); + VectorXd e = VectorXd::Zero(y.size()); + VectorXd amse = VectorXd::Zero(nmse); + double mse = calc(y, states, model_type, initial_smoothed, alpha, theta, e, + amse, nmse); + if (!std::isnan(mse) && std::abs(mse + 99999) < 1e-7) { + mse = std::numeric_limits::quiet_NaN(); + } + return {amse, e, states, mse}; +} + +double theta_target_fn(const VectorXd ¶ms, double init_level, + double init_alpha, double init_theta, bool opt_level, + bool opt_alpha, bool opt_theta, const VectorXd &y, + ModelType model_type, size_t nmse) { + RowMajorMatrixXd states = RowMajorMatrixXd::Zero(y.size(), 5); + size_t j = 0; + double level, alpha, theta; + if (opt_level) { + level = params[j++]; + } else { + level = init_level; + } + if (opt_alpha) { + alpha = params[j++]; + } else { + alpha = init_alpha; + } + if (opt_theta) { + theta = params[j++]; + } else { + theta = init_theta; + } + VectorXd e = VectorXd::Zero(y.size()); + VectorXd amse = VectorXd::Zero(nmse); + double mse = calc(y, states, model_type, level, alpha, theta, e, amse, nmse); + mse = std::max(mse, -1e10); + if (std::isnan(mse) || std::abs(mse + 99999) < 1e-7) { + mse = -std::numeric_limits::infinity(); + } + return mse; +} + +std::tuple +optimize(const Eigen::Ref &x0, + const Eigen::Ref &lower, + const Eigen::Ref &upper, double init_level, + double init_alpha, double init_theta, bool opt_level, bool opt_alpha, + bool opt_theta, const Eigen::Ref &y, + ModelType model_type, size_t nmse) { + double init_step = 0.05; + double zero_pert = 1e-4; + double alpha = 1.0; + double gamma = 2.0; + double rho = 0.5; + double sigma = 0.5; + int max_iter = 1'000; + double tol_std = 1e-4; + bool adaptive = true; + return nm::NelderMead(theta_target_fn, x0, lower, upper, init_step, zero_pert, + alpha, gamma, rho, sigma, max_iter, tol_std, adaptive, + init_level, init_alpha, init_theta, opt_level, + opt_alpha, opt_theta, y, model_type, nmse); +} + +void init(py::module_ &m) { + py::module_ theta = m.def_submodule("theta"); + theta.attr("HUGE_N") = HUGE_N; + theta.attr("NA") = NA; + theta.attr("TOL") = TOL; + py::enum_(theta, "ModelType") + .value("STM", ModelType::STM) + .value("OTM", ModelType::OTM) + .value("DSTM", ModelType::DSTM) + .value("DOTM", ModelType::DOTM); + theta.def("init_state", &init_state); + theta.def("calc", &calc); + theta.def("forecast", &forecast); + theta.def("update", &update); + theta.def("optimize", &optimize); + theta.def("pegels_resid", &pegels_resid); +} + +} // namespace theta