From 2d56e71fedb0451fd1aaebc56c353daad04594eb Mon Sep 17 00:00:00 2001 From: Cuong Duong Date: Wed, 3 Mar 2021 03:30:00 +1100 Subject: [PATCH] Add regressor_coefficients function for R (#1803) * function code * add tests for regressor coefficients utility * add documentation for regressor_coefficients util function * generate Rd docs * add regressor_coefficients to R namespace * minor formatting nit * fix bugs --- R/NAMESPACE | 1 + R/R/utilities.R | 67 +++++++++++++++++++ R/man/regressor_coefficients.Rd | 36 ++++++++++ R/tests/testthat/test_utilities.R | 48 +++++++++++++ ...nality,_holiday_effects,_and_regressors.md | 51 +++++++------- ...ity,_holiday_effects,_and_regressors.ipynb | 8 ++- 6 files changed, 185 insertions(+), 26 deletions(-) create mode 100644 R/R/utilities.R create mode 100644 R/man/regressor_coefficients.Rd create mode 100644 R/tests/testthat/test_utilities.R diff --git a/R/NAMESPACE b/R/NAMESPACE index 2597f77e2..fa2b6a3c6 100644 --- a/R/NAMESPACE +++ b/R/NAMESPACE @@ -16,6 +16,7 @@ export(plot_forecast_component) export(predictive_samples) export(prophet) export(prophet_plot_components) +export(regressor_coefficients) import(Rcpp) import(RcppParallel, except = LdFlags) import(rlang) diff --git a/R/R/utilities.R b/R/R/utilities.R new file mode 100644 index 000000000..1c66d3afb --- /dev/null +++ b/R/R/utilities.R @@ -0,0 +1,67 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +#' Summarise the coefficients of the extra regressors used in the model. +#' For additive regressors, the coefficient represents the incremental impact +#' on \code{y} of a unit increase in the regressor. For multiplicative regressors, +#' the incremental impact is equal to \code{trend(t)} multiplied by the coefficient. +#' +#' \textbf{Coefficients are measured on the original scale of the training data.} +#' +#' @param m Prophet model object, after fitting. +#' +#' @return Dataframe with one row per regressor. +#' @details Output dataframe columns: +#' \itemize{ +#' \item{regressor: Name of the regressor} +#' \item{regressor_mode: Whether the regressor has an additive or multiplicative +#' effect on \code{y}.} +#' \item{center: The mean of the regressor if it was standardized. Otherwise 0.} +#' \item{coef_lower: Lower bound for the coefficient, estimated from the MCMC samples. +#' Only different to \code{coef} if \code{mcmc_samples > 0}. +#' } +#' \item{coef: Expected value of the coefficient.} +#' \item{coef_upper: Upper bound for the coefficient, estimated from MCMC samples. +#' Only to different to \code{coef} if \code{mcmc_samples > 0}. +#' } +#' } +#' +#' @export +regressor_coefficients <- function(m){ + if (length(m$extra_regressors) == 0) { + stop("No extra regressors found.") + } + regr_names <- names(m$extra_regressors) + regr_modes <- unlist(lapply(m$extra_regressors, function(x) x$mode)) + regr_mus <- unlist(lapply(m$extra_regressors, function (x) x$mu)) + regr_stds <- unlist(lapply(m$extra_regressors, function(x) x$std)) + + beta_indices <- which(m$train.component.cols[, regr_names] == 1, arr.ind = TRUE)[, "row"] + betas <- m$params$beta[, beta_indices, drop = FALSE] + # If regressor is additive, multiply by the scale factor to put coefficients on the original training data scale. + y_scale_indicator <- matrix( + data = ifelse(regr_modes == "additive", m$y.scale, 1), + nrow = nrow(betas), + ncol = ncol(betas), + byrow = TRUE + ) + coefs <- betas * y_scale_indicator / regr_stds + + percentiles = c((1 - m$interval.width) / 2, 1 - (1 - m$interval.width) / 2) + bounds <- apply(betas, 2, quantile, probs = percentiles) + + df <- data.frame( + regressor = regr_names, + regressor_mode = regr_modes, + center = regr_mus, + coef_lower = bounds[1, ], + coef = apply(betas, 2, mean), + coef_upper = bounds[2, ], + stringsAsFactors = FALSE, + row.names = NULL + ) + + return(df) +} diff --git a/R/man/regressor_coefficients.Rd b/R/man/regressor_coefficients.Rd new file mode 100644 index 000000000..642889811 --- /dev/null +++ b/R/man/regressor_coefficients.Rd @@ -0,0 +1,36 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/utilities.R +\name{regressor_coefficients} +\alias{regressor_coefficients} +\title{Summarise the coefficients of the extra regressors used in the model. +For additive regressors, the coefficient represents the incremental impact +on \code{y} of a unit increase in the regressor. For multiplicative regressors, +the incremental impact is equal to \code{trend(t)} multiplied by the coefficient.} +\usage{ +regressor_coefficients(m) +} +\arguments{ +\item{m}{Prophet model object, after fitting.} +} +\value{ +Dataframe with one row per regressor. +} +\description{ +\textbf{Coefficients are measured on the original scale of the training data.} +} +\details{ +Output dataframe columns: +\itemize{ + \item{regressor: Name of the regressor} + \item{regressor_mode: Whether the regressor has an additive or multiplicative +effect on \code{y}.} + \item{center: The mean of the regressor if it was standardized. Otherwise 0.} + \item{coef_lower: Lower bound for the coefficient, estimated from the MCMC samples. + Only different to \code{coef} if \code{mcmc_samples > 0}. + } + \item{coef: Expected value of the coefficient.} + \item{coef_upper: Upper bound for the coefficient, estimated from MCMC samples. + Only to different to \code{coef} if \code{mcmc_samples > 0}. + } +} +} diff --git a/R/tests/testthat/test_utilities.R b/R/tests/testthat/test_utilities.R new file mode 100644 index 000000000..089222c9f --- /dev/null +++ b/R/tests/testthat/test_utilities.R @@ -0,0 +1,48 @@ +# Copyright (c) Facebook, Inc. and its affiliates. + +# This source code is licensed under the MIT license found in the +# LICENSE file in the root directory of this source tree. + +library(prophet) +context("Prophet utilities tests") + +DATA <- read.csv('data.csv') +DATA$ds <- as.Date(DATA$ds) + +build_model_with_regressors <- function(data, mcmc.samples = 0) { + m <- prophet(mcmc.samples = mcmc.samples) + m <- add_regressor(m, 'binary_feature', prior.scale=0.2) + m <- add_regressor(m, 'numeric_feature', prior.scale=0.5) + m <- add_regressor( + m, 'numeric_feature2', prior.scale=0.5, mode = 'multiplicative') + m <- add_regressor(m, 'binary_feature2', standardize=TRUE) + + df <- data + df$binary_feature <- c(rep(0, 255), rep(1, 255)) + df$numeric_feature <- 0:509 + df$numeric_feature2 <- 0:509 + df$binary_feature2 <- c(rep(1, 100), rep(0, 410)) + m <- fit.prophet(m, df) + + return(m) +} + +test_that("regressor_coefficients_no_uncertainty", { + skip_if_not(Sys.getenv('R_ARCH') != '/i386') + m <- build_model_with_regressors(DATA, mcmc.samples = 0) + coefs <- regressor_coefficients(m) + + expect_equal(dim(coefs), c(4, 6)) + expect_equal(coefs[, "coef_lower"], coefs[, "coef"]) + expect_equal(coefs[, "coef_upper"], coefs[, "coef"]) +}) + +test_that("regressor_coefficients_with_uncertainty", { + skip_if_not(Sys.getenv('R_ARCH') != '/i386') + suppressWarnings(m <- build_model_with_regressors(DATA, mcmc.samples = 100)) + coefs <- regressor_coefficients(m) + + expect_equal(dim(coefs), c(4, 6)) + expect_true(all(coefs[, "coef_lower"] < coefs[, "coef"])) + expect_true(all(coefs[, "coef_upper"] > coefs[, "coef"])) +}) diff --git a/docs/_docs/seasonality,_holiday_effects,_and_regressors.md b/docs/_docs/seasonality,_holiday_effects,_and_regressors.md index 9ac63b425..aa88a31e6 100644 --- a/docs/_docs/seasonality,_holiday_effects,_and_regressors.md +++ b/docs/_docs/seasonality,_holiday_effects,_and_regressors.md @@ -97,8 +97,8 @@ The holiday effect can be seen in the `forecast` dataframe: ```R # R -forecast %>% - select(ds, playoff, superbowl) %>% +forecast %>% + select(ds, playoff, superbowl) %>% filter(abs(playoff + superbowl) > 0) %>% tail(10) ``` @@ -211,8 +211,8 @@ prophet_plot_components(m, forecast) # Python fig = m.plot_components(forecast) ``` - -![png](/prophet/static/seasonality,_holiday_effects,_and_regressors_files/seasonality,_holiday_effects,_and_regressors_13_0.png) + +![png](/prophet/static/seasonality,_holiday_effects,_and_regressors_files/seasonality,_holiday_effects,_and_regressors_13_0.png) Individual holidays can be plotted using the `plot_forecast_component` function (imported from `fbprophet.plot` in Python) like `plot_forecast_component(m, forecast, 'superbowl')` to plot just the superbowl holiday component. @@ -247,14 +247,14 @@ You can see which holidays were included by looking at the `train_holiday_names` m$train.holiday.names ``` - [1] "playoff" "superbowl" + [1] "playoff" "superbowl" [3] "New Year's Day" "Martin Luther King, Jr. Day" - [5] "Washington's Birthday" "Memorial Day" - [7] "Independence Day" "Labor Day" - [9] "Columbus Day" "Veterans Day" - [11] "Veterans Day (Observed)" "Thanksgiving" + [5] "Washington's Birthday" "Memorial Day" + [7] "Independence Day" "Labor Day" + [9] "Columbus Day" "Veterans Day" + [11] "Veterans Day (Observed)" "Thanksgiving" [13] "Christmas Day" "Independence Day (Observed)" - [15] "Christmas Day (Observed)" "New Year's Day (Observed)" + [15] "Christmas Day (Observed)" "New Year's Day (Observed)" @@ -306,8 +306,8 @@ prophet_plot_components(m, forecast) forecast = m.predict(future) fig = m.plot_components(forecast) ``` - -![png](/prophet/static/seasonality,_holiday_effects,_and_regressors_files/seasonality,_holiday_effects,_and_regressors_23_0.png) + +![png](/prophet/static/seasonality,_holiday_effects,_and_regressors_files/seasonality,_holiday_effects,_and_regressors_23_0.png) @@ -330,8 +330,8 @@ from fbprophet.plot import plot_yearly m = Prophet().fit(df) a = plot_yearly(m) ``` - -![png](/prophet/static/seasonality,_holiday_effects,_and_regressors_files/seasonality,_holiday_effects,_and_regressors_26_0.png) + +![png](/prophet/static/seasonality,_holiday_effects,_and_regressors_files/seasonality,_holiday_effects,_and_regressors_26_0.png) The default values are often appropriate, but they can be increased when the seasonality needs to fit higher-frequency changes, and generally be less smooth. The Fourier order can be specified for each built-in seasonality when instantiating the model, here it is increased to 20: @@ -348,8 +348,8 @@ from fbprophet.plot import plot_yearly m = Prophet(yearly_seasonality=20).fit(df) a = plot_yearly(m) ``` - -![png](/prophet/static/seasonality,_holiday_effects,_and_regressors_files/seasonality,_holiday_effects,_and_regressors_29_0.png) + +![png](/prophet/static/seasonality,_holiday_effects,_and_regressors_files/seasonality,_holiday_effects,_and_regressors_29_0.png) Increasing the number of Fourier terms allows the seasonality to fit faster changing cycles, but can also lead to overfitting: N Fourier terms corresponds to 2N variables used for modeling the cycle @@ -388,8 +388,8 @@ m.add_seasonality(name='monthly', period=30.5, fourier_order=5) forecast = m.fit(df).predict(future) fig = m.plot_components(forecast) ``` - -![png](/prophet/static/seasonality,_holiday_effects,_and_regressors_files/seasonality,_holiday_effects,_and_regressors_32_0.png) + +![png](/prophet/static/seasonality,_holiday_effects,_and_regressors_files/seasonality,_holiday_effects,_and_regressors_32_0.png) @@ -452,8 +452,8 @@ future['off_season'] = ~future['ds'].apply(is_nfl_season) forecast = m.fit(df).predict(future) fig = m.plot_components(forecast) ``` - -![png](/prophet/static/seasonality,_holiday_effects,_and_regressors_files/seasonality,_holiday_effects,_and_regressors_38_0.png) + +![png](/prophet/static/seasonality,_holiday_effects,_and_regressors_files/seasonality,_holiday_effects,_and_regressors_38_0.png) Both of the seasonalities now show up in the components plots above. We can see that during the on-season when games are played every Sunday, there are large increases on Sunday and Monday that are completely absent during the off-season. @@ -470,8 +470,8 @@ If you find that the holidays are overfitting, you can adjust their prior scale # R m <- prophet(df, holidays = holidays, holidays.prior.scale = 0.05) forecast <- predict(m, future) -forecast %>% - select(ds, playoff, superbowl) %>% +forecast %>% + select(ds, playoff, superbowl) %>% filter(abs(playoff + superbowl) > 0) %>% tail(10) ``` @@ -640,8 +640,8 @@ future['nfl_sunday'] = future['ds'].apply(nfl_sunday) forecast = m.predict(future) fig = m.plot_components(forecast) ``` - -![png](/prophet/static/seasonality,_holiday_effects,_and_regressors_files/seasonality,_holiday_effects,_and_regressors_48_0.png) + +![png](/prophet/static/seasonality,_holiday_effects,_and_regressors_files/seasonality,_holiday_effects,_and_regressors_48_0.png) NFL Sundays could also have been handled using the "holidays" interface described above, by creating a list of past and future NFL Sundays. The `add_regressor` function provides a more general interface for defining extra linear regressors, and in particular does not require that the regressor be a binary indicator. Another time series could be used as a regressor, although its future values would have to be known. @@ -662,3 +662,6 @@ The extra regressor must be known for both the history and for future dates. It Extra regressors are put in the linear component of the model, so the underlying model is that the time series depends on the extra regressor as either an additive or multiplicative factor (see the next section for multiplicativity). +#### Coefficients of additional regressors + +To extract the beta coefficients of the extra regressors, use the utility function `regressor_coefficients` (`from fbprophet.utilities import regressor_coefficients` in Python, `prophet::regressor_coefficients` in R) on the fitted model. The estimated beta coefficient for each regressor roughly represents the increase in prediction value for a unit increase in the regressor value (note that the coefficients returned are always on the scale of the original data). If `mcmc_samples` is specified, a credible interval for each coefficient is also returned, which can help identify whether each regressor is "statistically significant". diff --git a/notebooks/seasonality,_holiday_effects,_and_regressors.ipynb b/notebooks/seasonality,_holiday_effects,_and_regressors.ipynb index 1dccebb40..c9a79b6bb 100644 --- a/notebooks/seasonality,_holiday_effects,_and_regressors.ipynb +++ b/notebooks/seasonality,_holiday_effects,_and_regressors.ipynb @@ -1199,7 +1199,11 @@ "\n", "The extra regressor must be known for both the history and for future dates. It thus must either be something that has known future values (such as `nfl_sunday`), or something that has separately been forecasted elsewhere. The weather regressors used in the notebook linked above is a good example of an extra regressor that has forecasts that can be used for future values. One can also use as a regressor another time series that has been forecasted with a time series model, such as Prophet. For instance, if `r(t)` is included as a regressor for `y(t)`, Prophet can be used to forecast `r(t)` and then that forecast can be plugged in as the future values when forecasting `y(t)`. A note of caution around this approach: This will probably not be useful unless `r(t)` is somehow easier to forecast then `y(t)`. This is because error in the forecast of `r(t)` will produce error in the forecast of `y(t)`. One setting where this can be useful is in hierarchical time series, where there is top-level forecast that has higher signal-to-noise and is thus easier to forecast. Its forecast can be included in the forecast for each lower-level series.\n", "\n", - "Extra regressors are put in the linear component of the model, so the underlying model is that the time series depends on the extra regressor as either an additive or multiplicative factor (see the next section for multiplicativity)." + "Extra regressors are put in the linear component of the model, so the underlying model is that the time series depends on the extra regressor as either an additive or multiplicative factor (see the next section for multiplicativity).\n", + "\n", + "#### Coefficients of additional regressors\n", + "\n", + "To extract the beta coefficients of the extra regressors, use the utility function `regressor_coefficients` (`from fbprophet.utilities import regressor_coefficients` in Python, `prophet::regressor_coefficients` in R) on the fitted model. The estimated beta coefficient for each regressor roughly represents the increase in prediction value for a unit increase in the regressor value (note that the coefficients returned are always on the scale of the original data). If `mcmc_samples` is specified, a credible interval for each coefficient is also returned, which can help identify whether each regressor is \"statistically significant\"." ] } ], @@ -1225,4 +1229,4 @@ }, "nbformat": 4, "nbformat_minor": 1 -} +} \ No newline at end of file