From d0c4ba2e3da9575fb81f261470869d4664ac2244 Mon Sep 17 00:00:00 2001 From: Matt Dancho Date: Sat, 28 Oct 2023 20:56:32 -0400 Subject: [PATCH] plot_anomalies_cleaned --- NAMESPACE | 3 + NEWS.md | 15 +- R/anomalize-plotting.R | 364 ++++++++++++++++++++++++++++++++++++++++- man/plot_anomalies.Rd | 59 ++++++- 4 files changed, 432 insertions(+), 9 deletions(-) diff --git a/NAMESPACE b/NAMESPACE index b76432de..dfbbc52e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -73,6 +73,8 @@ S3method(plot_acf_diagnostics,data.frame) S3method(plot_acf_diagnostics,grouped_df) S3method(plot_anomalies,data.frame) S3method(plot_anomalies,grouped_df) +S3method(plot_anomalies_cleaned,data.frame) +S3method(plot_anomalies_cleaned,grouped_df) S3method(plot_anomalies_decomp,data.frame) S3method(plot_anomalies_decomp,grouped_df) S3method(plot_anomaly_diagnostics,data.frame) @@ -289,6 +291,7 @@ export(parse_date2) export(parse_datetime2) export(plot_acf_diagnostics) export(plot_anomalies) +export(plot_anomalies_cleaned) export(plot_anomalies_decomp) export(plot_anomaly_diagnostics) export(plot_seasonal_diagnostics) diff --git a/NEWS.md b/NEWS.md index f6699ee2..c4c750e3 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,4 +1,17 @@ -# timetk (development version) +# timetk 2.8.4.9000 + +### Anomalize Integration: + +`anomalize` R package is now available in `timetk`: + +1. `anomlize()`: 1 function that breaks down, identifies, and cleans anomalies +2. `plot_anomalies()`: Visualize the anomalies and anomaly bands +3. `plot_anomalies_decomp()`: Visualize the time series decomposition. Make adjustments as needed. +4. `plot_anomalies_cleaned()`: Visualize the before/after of cleaning anomalies. + +Note - `anomalize(.method)`: Only the `.method = "stl"` is supported at this time. The `"twitter"` method is also planned. + +### Other Changes: - Removed dependency on tidymodels. (#154, @olivroy). diff --git a/R/anomalize-plotting.R b/R/anomalize-plotting.R index dff40e5a..f1df07d3 100644 --- a/R/anomalize-plotting.R +++ b/R/anomalize-plotting.R @@ -47,7 +47,7 @@ #' #' #' @examples -#' # Plot Anomaly +#' # Plot Anomalies #' library(dplyr) #' #' walmart_sales_weekly %>% @@ -361,8 +361,9 @@ plot_anomalies.grouped_df <- function( # 2.0 PLOT ANOMALIES DECOMP ---- #' Visualize Anomaly Decomposition #' -#' `plot_anomalies_decomp` function takes in data from the `anomalize()` -#' function, and returns a plot of the anomaly decomposition. +#' `plot_anomalies_decomp()`: Takes in data from the `anomalize()` +#' function, and returns a plot of the anomaly decomposition. Useful for interpeting +#' how the `anomalize()` function is determining outliers from "remainder". #' #' @param .data A `tibble` or `data.frame` that has been anomalized by `anomalize()` #' @param .date_var A column containing either date or date-time values @@ -402,7 +403,7 @@ plot_anomalies.grouped_df <- function( #' - `as_plotly`: use `.interactive` #' #' @examples -#' # Plot Anomaly Decomposition +#' # Plot Anomalies Decomposition #' library(dplyr) #' #' walmart_sales_weekly %>% @@ -610,5 +611,360 @@ plot_anomalies_decomp.grouped_df <- function( } +# 3.0 PLOT ANOMALIES CLEANED ----- + +#' Visualize Anomalies for One or More Time Series +#' +#' `plot_anomalies_cleaned()` helps users visualize the before/after of +#' cleaning anomalies. +#' +#' @param .data A `tibble` or `data.frame` that has been anomalized by `anomalize()` +#' @param .date_var A column containing either date or date-time values +#' @param .facet_vars One or more grouping columns that broken out into `ggplot2` facets. +#' These can be selected using `tidyselect()` helpers (e.g `contains()`). +#' @param .facet_ncol Number of facet columns. +#' @param .facet_nrow Number of facet rows (only used for `.trelliscope = TRUE`) +#' @param .facet_scales Control facet x & y-axis ranges. Options include "fixed", "free", "free_y", "free_x" +#' @param .facet_dir The direction of faceting ("h" for horizontal, "v" for vertical). Default is "h". +#' @param .facet_collapse Multiple facets included on one facet strip instead of +#' multiple facet strips. +#' @param .facet_collapse_sep The separator used for collapsing facets. +#' @param .facet_strip_remove Whether or not to remove the strip and text label for each facet. +#' @param .line_color Line color. +#' @param .line_size Line size. +#' @param .line_type Line type. +#' @param .line_alpha Line alpha (opacity). Range: (0, 1). +#' @param .cleaned_line_color Line color. +#' @param .cleaned_line_size Line size. +#' @param .cleaned_line_type Line type. +#' @param .cleaned_line_alpha Line alpha (opacity). Range: (0, 1). +#' @param .legend_show Toggles on/off the Legend +#' @param .title Plot title. +#' @param .x_lab Plot x-axis label +#' @param .y_lab Plot y-axis label +#' @param .color_lab Plot label for the color legend +#' @param .interactive If TRUE, returns a `plotly` interactive plot. +#' If FALSE, returns a static `ggplot2` plot. +#' @param .trelliscope Returns either a normal plot or a trelliscopejs plot (great for many time series) +#' Must have `trelliscopejs` installed. +#' @param .trelliscope_params Pass parameters to the `trelliscopejs::facet_trelliscope()` function as a `list()`. +#' The only parameters that cannot be passed are: +#' - `ncol`: use `.facet_ncol` +#' - `nrow`: use `.facet_nrow` +#' - `scales`: use `facet_scales` +#' - `as_plotly`: use `.interactive` +#' +#' +#' @examples +#' # Plot Anomalies Cleaned +#' library(dplyr) +#' +#' walmart_sales_weekly %>% +#' filter(id %in% c("1_1", "1_3")) %>% +#' group_by(id) %>% +#' anomalize(Date, Weekly_Sales, .message = FALSE) %>% +#' plot_anomalies_cleaned(Date, .facet_ncol = 2, .interactive = FALSE) +#' +#' @name plot_anomalies +#' @export +plot_anomalies_cleaned <- function( + .data, + .date_var, + + .facet_vars = NULL, + + .facet_ncol = 1, + .facet_nrow = 1, + .facet_scales = "free", + .facet_dir = "h", + .facet_collapse = FALSE, + .facet_collapse_sep = " ", + .facet_strip_remove = FALSE, + + .line_color = "#2c3e50", + .line_size = 0.5, + .line_type = 1, + .line_alpha = 1, + + .cleaned_line_color = "#e31a1c", + .cleaned_line_size = 0.5, + .cleaned_line_type = 1, + .cleaned_line_alpha = 1, + + .legend_show = TRUE, + + .title = "Anomalies Cleaned Plot", + .x_lab = "", + .y_lab = "", + .color_lab = "Legend", + + .interactive = TRUE, + .trelliscope = FALSE, + .trelliscope_params = list() +) { + + date_var_expr <- rlang::enquo(.date_var) + + if (!is.data.frame(.data)) { + rlang::abort(".data is not a data-frame or tibble. Please supply a data.frame or tibble.") + } + if (rlang::quo_is_missing(date_var_expr)) { + rlang::abort(".date_var is missing. Please supply a date or date-time column.") + } + + column_names <- names(.data) + check_names <- c("observed", "observed_cleaned") %in% column_names + if (!all(check_names)) stop('Error in plot_anomalies_decomp(): column names are missing. Run `anomalize()` and make sure: observed, remainder, anomaly, recomposed_l1, and recomposed_l2 are present', call. = FALSE) + + UseMethod("plot_anomalies_cleaned", .data) + +} + +#' @export +plot_anomalies_cleaned.data.frame <- function( + .data, + .date_var, + + .facet_vars = NULL, + + .facet_ncol = 1, + .facet_nrow = 1, + .facet_scales = "free", + .facet_dir = "h", + .facet_collapse = FALSE, + .facet_collapse_sep = " ", + .facet_strip_remove = FALSE, + + .line_color = "#2c3e50", + .line_size = 0.5, + .line_type = 1, + .line_alpha = 1, + + .cleaned_line_color = "#e31a1c", + .cleaned_line_size = 0.5, + .cleaned_line_type = 1, + .cleaned_line_alpha = 1, + + .legend_show = TRUE, + + .title = "Anomalies Cleaned Plot", + .x_lab = "", + .y_lab = "", + .color_lab = "Legend", + + .interactive = TRUE, + .trelliscope = FALSE, + .trelliscope_params = list() +) { + + # Tidy Eval Setup + date_var_expr <- rlang::enquo(.date_var) + facets_expr <- rlang::enquo(.facet_vars) + + # Facet Names + facets_expr <- rlang::syms(names(tidyselect::eval_select(facets_expr, .data))) + + data_formatted <- tibble::as_tibble(.data) + + # FACET SETUP ---- + facet_names <- data_formatted %>% dplyr::select(!!! facets_expr) %>% colnames() + + if (length(facet_names) > 0) { + if (.facet_collapse) { + + data_formatted <- data_formatted %>% + dplyr::ungroup() %>% + dplyr::mutate(.facets_collapsed = stringr::str_c(!!! rlang::syms(facet_names), + sep = .facet_collapse_sep)) %>% + dplyr::mutate(.facets_collapsed = forcats::as_factor(.facets_collapsed)) %>% + dplyr::group_by(.facets_collapsed) + + facet_names <- ".facets_collapsed" + + } else { + data_formatted <- data_formatted %>% + dplyr::group_by(!!! rlang::syms(facet_names)) + } + } + + + # ---- VISUALIZATION ---- + + g <- data_formatted %>% + ggplot2::ggplot(ggplot2::aes(!! date_var_expr, observed)) + + ggplot2::labs(x = .x_lab, y = .y_lab, title = .title, color = .color_lab) + + theme_tq() + + # Add facets + if (length(facet_names) > 0) { + g <- g + + ggplot2::facet_wrap( + ggplot2::vars(!!! rlang::syms(facet_names)), + ncol = .facet_ncol, + scales = .facet_scales, + dir = .facet_dir + ) + } + + + # Add line - observed + g <- g + + ggplot2::geom_line( + aes(color = "Observed"), + # color = .line_color, + linewidth = .line_size, + linetype = .line_type, + alpha = .line_alpha + ) + + # Add color scale + g <- g + + scale_color_manual(values = c(.line_color, .cleaned_line_color)) + + # Add line - observed_cleaned + g <- g + + ggplot2::geom_line( + aes(y = observed_cleaned, color = "Observed Cleaned"), + # color = .cleaned_line_color, + linewidth = .cleaned_line_size, + linetype = .cleaned_line_type, + alpha = .cleaned_line_alpha + ) + + # Show Legend? + if (!.legend_show) { + g <- g + + ggplot2::theme(legend.position = "none") + } + + # Remove the facet strip? + if (.facet_strip_remove) { + g <- g + + ggplot2::theme( + strip.background = ggplot2::element_blank(), + strip.text.x = ggplot2::element_blank() + ) + } + + # Convert to trelliscope and/or plotly? + if (!.trelliscope) { + + if (.interactive) { + + g <- plotly::ggplotly(g) + + } + + } else { + + trell <- do.call(trelliscopejs::facet_trelliscope, c( + list( + facets = ggplot2::vars(!!! rlang::syms(facet_names)), + ncol = .facet_ncol, + nrow = .facet_nrow, + scales = .facet_scales, + as_plotly = .interactive + ), + .trelliscope_params + )) + + g <- g + trell + + } + + return(g) + +} + +#' @export +plot_anomalies_cleaned.grouped_df <- function( + .data, + .date_var, + + .facet_vars = NULL, + + .facet_ncol = 1, + .facet_nrow = 1, + .facet_scales = "free", + .facet_dir = "h", + .facet_collapse = FALSE, + .facet_collapse_sep = " ", + .facet_strip_remove = FALSE, + + .line_color = "#2c3e50", + .line_size = 0.5, + .line_type = 1, + .line_alpha = 1, + + .cleaned_line_color = "#e31a1c", + .cleaned_line_size = 0.5, + .cleaned_line_type = 1, + .cleaned_line_alpha = 1, + + .legend_show = TRUE, + + .title = "Anomalies Cleaned Plot", + .x_lab = "", + .y_lab = "", + .color_lab = "Legend", + + .interactive = TRUE, + .trelliscope = FALSE, + .trelliscope_params = list() +) { + + + # Tidy Eval Setup + group_names <- dplyr::group_vars(.data) + facets_expr <- rlang::enquos(.facet_vars) + + # Checks + facet_names <- .data %>% dplyr::ungroup() %>% dplyr::select(!!! facets_expr) %>% colnames() + if (length(facet_names) > 0) message("plot_anomalies_cleaned(...): Groups are previously detected. Grouping by: ", + stringr::str_c(group_names, collapse = ", ")) + + # ---- DATA SETUP ---- + + # Ungroup Data + data_formatted <- .data %>% dplyr::ungroup() + + # ---- PLOT SETUP ---- + g <- plot_anomalies_cleaned.data.frame( + .data = data_formatted, + .date_var = !! rlang::enquo(.date_var), + + .facet_vars = !! enquo(group_names), + + .facet_ncol = .facet_ncol, + .facet_nrow = .facet_nrow, + .facet_scales = .facet_scales, + .facet_dir = .facet_dir, + .facet_strip_remove = .facet_strip_remove, + + .line_color = .line_color, + .line_size = .line_size, + .line_type = .line_type, + .line_alpha = .line_alpha, + + .cleaned_line_color = .cleaned_line_color, + .cleaned_line_size = .cleaned_line_size, + .cleaned_line_type = .cleaned_line_type, + .cleaned_line_alpha = .cleaned_line_alpha, + + .legend_show = .legend_show, + + .title = .title, + .x_lab = .x_lab, + .y_lab = .y_lab, + .color_lab = .color_lab, + + .interactive = .interactive, + .trelliscope = .trelliscope, + .trelliscope_params = .trelliscope_params + ) + + return(g) + +} diff --git a/man/plot_anomalies.Rd b/man/plot_anomalies.Rd index 8c66de14..c0c7355c 100644 --- a/man/plot_anomalies.Rd +++ b/man/plot_anomalies.Rd @@ -3,6 +3,7 @@ \name{plot_anomalies} \alias{plot_anomalies} \alias{plot_anomalies_decomp} +\alias{plot_anomalies_cleaned} \title{Visualize Anomalies for One or More Time Series} \usage{ plot_anomalies( @@ -49,6 +50,35 @@ plot_anomalies_decomp( .y_lab = "", .interactive = TRUE ) + +plot_anomalies_cleaned( + .data, + .date_var, + .facet_vars = NULL, + .facet_ncol = 1, + .facet_nrow = 1, + .facet_scales = "free", + .facet_dir = "h", + .facet_collapse = FALSE, + .facet_collapse_sep = " ", + .facet_strip_remove = FALSE, + .line_color = "#2c3e50", + .line_size = 0.5, + .line_type = 1, + .line_alpha = 1, + .cleaned_line_color = "#e31a1c", + .cleaned_line_size = 0.5, + .cleaned_line_type = 1, + .cleaned_line_alpha = 1, + .legend_show = TRUE, + .title = "Anomalies Cleaned Plot", + .x_lab = "", + .y_lab = "", + .color_lab = "Legend", + .interactive = TRUE, + .trelliscope = FALSE, + .trelliscope_params = list() +) } \arguments{ \item{.data}{A \code{tibble} or \code{data.frame} that has been anomalized by \code{anomalize()}} @@ -115,6 +145,14 @@ The only parameters that cannot be passed are: \item \code{scales}: use \code{facet_scales} \item \code{as_plotly}: use \code{.interactive} }} + +\item{.cleaned_line_color}{Line color.} + +\item{.cleaned_line_size}{Line size.} + +\item{.cleaned_line_type}{Line type.} + +\item{.cleaned_line_alpha}{Line alpha (opacity). Range: (0, 1).} } \value{ A \code{plotly} or \code{ggplot2} visualization @@ -123,11 +161,15 @@ A \code{plotly} or \code{ggplot2} visualization \code{plot_anomalies()} is an interactive and scalable function for visualizing anomalies in time series data. Plots are available in interactive \code{plotly} (default) and static \code{ggplot2} format. -\code{plot_anomalies_decomp} function takes in data from the \code{anomalize()} -function, and returns a plot of the anomaly decomposition. +\code{plot_anomalies_decomp()}: Takes in data from the \code{anomalize()} +function, and returns a plot of the anomaly decomposition. Useful for interpeting +how the \code{anomalize()} function is determining outliers from "remainder". + +\code{plot_anomalies_cleaned()} helps users visualize the before/after of +cleaning anomalies. } \examples{ -# Plot Anomaly +# Plot Anomalies library(dplyr) walmart_sales_weekly \%>\% @@ -136,7 +178,7 @@ walmart_sales_weekly \%>\% anomalize(Date, Weekly_Sales) \%>\% plot_anomalies(Date, .facet_ncol = 2, .ribbon_alpha = 0.25, .interactive = FALSE) -# Plot Anomaly Decomposition +# Plot Anomalies Decomposition library(dplyr) walmart_sales_weekly \%>\% @@ -145,4 +187,13 @@ walmart_sales_weekly \%>\% anomalize(Date, Weekly_Sales, .message = FALSE) \%>\% plot_anomalies_decomp(Date, .interactive = FALSE) +# Plot Anomalies Cleaned +library(dplyr) + +walmart_sales_weekly \%>\% + filter(id \%in\% c("1_1", "1_3")) \%>\% + group_by(id) \%>\% + anomalize(Date, Weekly_Sales, .message = FALSE) \%>\% + plot_anomalies_cleaned(Date, .facet_ncol = 2, .interactive = FALSE) + }