diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..e16977a --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,18 @@ +## Contribution guidelines: +We always welcome contributions to the package. If you would like to propose a new functionality and/or report an issue, please use GitHub's tracker called [Issues](https://github.com/bfast2/bfast/issues). + +For development we use the [GitHub Flow](https://guides.github.com/introduction/flow/) branching model. + +Key steps: +1. Create a GitHub issue in this repository with description of the work that you plan to do. +2. Assign yourself to the GitHub issue you are working on, to inform other developers that you are working on it. +3. Create your own working fork or branch based on the `dev` branch. +4. Make your changes in that branch. +5. Commit your changes to your working branch as long as you are not finished with your development. +6. Make sure that all tests pass (e.g., in Travis or Circle CI). +7. Once your work is finished, create a pull request so that another developer can review your changes before merging them with the `dev` (or `main`) branch. + +Additional steps for [preparing a new release](https://guide.esciencecenter.nl/best_practices/releases.html): +8. Update the `NEWS.Rd` file with most notable changes. +9. Add new contributors to the `DESCRIPTION` file if applicable. +10. Release the package and make it citable (add `CITATION.cff` including DOI). diff --git a/DESCRIPTION b/DESCRIPTION index 4eebf85..b471d6f 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: bfast -Version: 1.6.0 +Version: 1.6.0.9000 Date: 2017-03-09 Title: Breaks For Additive Season and Trend Authors@R: c(person(given = "Jan", family = "Verbesselt", role = c("aut", "cre"), email = "Jan.Verbesselt@wur.nl"), diff --git a/R/bfast01.R b/R/bfast01.R index aff1107..a51f64b 100644 --- a/R/bfast01.R +++ b/R/bfast01.R @@ -8,7 +8,7 @@ #' steps: #' #' 1. The data is preprocessed with bfastpp using the arguments -#' order/lag/slag/na.action/stl. +#' order/lag/slag/na.action/stl/sbins. #' #' 2. A linear model with the given formula is fitted. By default a suitable #' formula is guessed based on the preprocessing parameters. @@ -70,6 +70,7 @@ #' omitted. #' @param na.action arguments passed on to \code{\link[bfast]{bfastpp}} #' @param stl argument passed on to \code{\link[bfast]{bfastpp}} +#' @param sbins argument passed on to \code{\link[bfast]{bfastpp}} #' @return \code{bfast01} returns a list of class \code{"bfast01"} with the #' following elements: \item{call}{the original function call.} \item{data}{the #' data preprocessed by \code{"bfastpp"}.} \item{formula}{the model formulae.} @@ -132,7 +133,7 @@ bfast01 <- function(data, formula = NULL, test = "OLS-MOSUM", level = 0.05, aggregate = all, trim = NULL, bandwidth = 0.15, functional = "max", order = 3, lag = NULL, slag = NULL, na.action = na.omit, - reg = "lm", stl = "none") + reg = "lm", stl = "none", sbins = 1) { # Error catching if(!(reg %in% c("lm","rlm"))) stop("Regression method unknown. ?bfast01") @@ -141,7 +142,7 @@ bfast01 <- function(data, formula = NULL, stl <- match.arg(stl, c("none", "trend", "seasonal", "both")) if (!inherits(data, "data.frame")) data <- bfastpp(data, order = order, lag = lag, slag = slag, - na.action = na.action, stl = stl) + na.action = na.action, stl = stl, sbins = sbins) if (is.null(formula)) { formula <- c(trend = !(stl %in% c("trend", "both")), harmon = order > 0 & !(stl %in% c("seasonal", "both")), diff --git a/R/bfastmonitor.R b/R/bfastmonitor.R index d498f44..e09f91c 100644 --- a/R/bfastmonitor.R +++ b/R/bfastmonitor.R @@ -72,7 +72,7 @@ #' @param end numeric. Maximum time (relative to the history period) that will #' be monitored (in MOSUM/ME processes). Default is 10 times the history #' period. -#' @param level numeric. Significance level of the monitoring (and ROC, if +#' @param level numeric vector. Significance levels of the monitoring and ROC (if #' selected) procedure, i.e., probability of type I error. #' @param hpc character specifying the high performance computing support. #' Default is \code{"none"}, can be set to \code{"foreach"}. See @@ -80,6 +80,8 @@ #' @param verbose logical. Should information about the monitoring be printed #' during computation? #' @param plot logical. Should the result be plotted? +#' @param sbins numeric. Number of seasonal dummies, passed to +#' \code{\link{bfastpp}}. #' @return \code{bfastmonitor} returns an object of class #' \code{"bfastmonitor"}, i.e., a list with components as follows. #' \item{data}{original \code{"ts"} time series,} \item{tspp}{preprocessed @@ -194,16 +196,16 @@ bfastmonitor <- function(data, start, order = 3, lag = NULL, slag = NULL, history = c("ROC", "BP", "all"), type = "OLS-MOSUM", h = 0.25, end = 10, level = 0.05, - hpc = "none", verbose = FALSE, plot = FALSE) + hpc = "none", verbose = FALSE, plot = FALSE, sbins = 1) { if (getOption("bfast.prefer_matrix_methods", FALSE)) { return(.bfastmonitor.matrix(data,start,formula,order, lag, slag, - history, type, h, end, level, hpc, verbose, plot)) + history, type, h, end, level, hpc, verbose, plot, sbins)) } else { return(.bfastmonitor.default(data,start,formula,order, lag, slag, - history, type, h, end, level, hpc, verbose, plot)) + history, type, h, end, level, hpc, verbose, plot, sbins)) } } @@ -213,13 +215,14 @@ bfastmonitor <- function(data, start, formula = response ~ trend + harmon, order = 3, lag = NULL, slag = NULL, history = c("ROC", "BP", "all"), - type = "OLS-MOSUM", h = 0.25, end = 10, level = 0.05, + type = "OLS-MOSUM", h = 0.25, end = 10, level = c(0.05, 0.05), hpc = "none", verbose = FALSE, plot = FALSE) { ## PREPROCESSING ## two levels needed: 1. monitoring, 2. in ROC (if selected) - level <- rep(level, length.out = 2) - + if (length(level) == 1) # Backwards compatibility, assume both are the same + level <- rep(level, length.out = 2) + if(!is.ts(data)) data <- as.ts(data) ## frequency of data @@ -230,16 +233,20 @@ bfastmonitor <- function(data, start, ## full data - data_tspp <- bfastpp(data, order = order, lag = lag, slag = slag, formula = formula) - X = data_tspp$X - y = data_tspp$y - time = data_tspp$t + data_tspp <- bfastpp(data, order = order, lag = lag, slag = slag, sbins = sbins) + data_tsmat = model.matrix(formula, data_tspp) + X = data_tsmat + y = data_tspp$response + time = data_tspp$time + rm(data_tspp, data_tsmat) ## SELECT STABLE HISTORY ## full history period history_X = X[time < start,] history_y = y[time < start] history_time = time[time < start] + if (length(history_y) <= ncol(history_X)) + stop("Fewer observations in the history period than number of regressors") ## find start of history period ## (may be specified via character, function, or time index directly) @@ -365,7 +372,7 @@ bfastmonitor <- function(data, start, order = 3, lag = NULL, slag = NULL, history = c("ROC", "BP", "all"), type = "OLS-MOSUM", h = 0.25, end = 10, level = 0.05, - hpc = "none", verbose = FALSE, plot = FALSE) + hpc = "none", verbose = FALSE, plot = FALSE, sbins = 1) { ## PREPROCESSING ## two levels needed: 1. monitoring, 2. in ROC (if selected) @@ -380,7 +387,7 @@ bfastmonitor <- function(data, start, start <- time2num(start) ## full data - data_tspp <- bfastpp(data, order = order, lag = lag, slag = slag) + data_tspp <- bfastpp(data, order = order, lag = lag, slag = slag, sbins = sbins) ## SELECT STABLE HISTORY ## full history period diff --git a/R/bfastpp.R b/R/bfastpp.R index d594208..e0ffcea 100644 --- a/R/bfastpp.R +++ b/R/bfastpp.R @@ -31,12 +31,13 @@ #' and/or season-adjustment. The \code{"trend"} or \code{"seasonal"} component #' or both from \code{\link[stats]{stl}} are removed from each column in #' \code{data}. By default (\code{"none"}), no STL adjustment is used. -#' @param formula regression model to be used (see -#' \code{\link[bfast]{bfastmonitor}}). If given, only independent variables -#' that occur in the formula will be computed and the output will be a list of -#' the design matrix, the response vector, and the vector of times instead of a -#' single \code{data.frame}. Providing a formula may reduce relative expensive -#' calls of \code{model.matrix} and \code{model.frame} in following operations. +#' @param decomp "stlplus" or "stl": use the NA-tolerant decomposition package +#' or the reference package (which can make use of time series with 2-3 +#' observations per year) +#' @param sbins numeric. Controls the number of seasonal dummies. If integer +#' > 1, sets the number of seasonal dummies to use per year. +#' If <= 1, treated as a multiplier to the number of observations per year, i.e. +#' ndummies = nobs/year * sbins. #' @return If no formula is provided, \code{bfastpp} returns a #' \code{"data.frame"} with the following variables (some of which may be #' matrices). \item{time}{numeric vector of time stamps,} @@ -80,33 +81,21 @@ #' d2lm <- lm(response ~ lag, data = d2) #' summary(d2lm) #' -#' ## provide a formula and use the lower level lm.fit function -#' d3 <- bfastpp(ndvi, stl = "both", lag = 1:2, formula = response ~ lag) -#' d3lm <- lm.fit(d3$X, d3$y) +#' ## use the lower level lm.fit function +#' d3 <- bfastpp(ndvi, stl = "both", lag = 1:2) +#' d3mm <- model.matrix(response ~ lag, d3) +#' d3lm <- lm.fit(d3mm, d3$response) #' d3lm$coefficients #' #' @export bfastpp bfastpp<- function(data, order = 3, lag = NULL, slag = NULL, na.action = na.omit, stl = c("none", "trend", "seasonal", "both"), - formula = NULL) { - - if(!require("stlplus",quietly = T)) stop("Please install the stlplus package!") - if(!is.ts(data)) data <- as.ts(data) - - if (is.null(formula)) { - return(.bfastpp.full(data, order, lag, slag, na.action, stl)) - } - else { - return(.bfastpp.formula(data, order, lag, slag, na.action, stl, formula)) - } -} - - -.bfastpp.full <- function(data, order = 3, - lag = NULL, slag = NULL, na.action = na.omit, - stl = c("none", "trend", "seasonal", "both")) + decomp=c("stlplus", "stl"), sbins=1) { + decomp = match.arg(decomp) + if(decomp == "stlplus" && !require("stlplus",quietly = T)) stop("Please install the stlplus package or set decomp=stl.") + ## double check what happens with 29-02 if that happens... ## we should keep it simple an remove the datum if that happens @@ -116,7 +105,11 @@ bfastpp<- function(data, order = 3, stl <- match.arg(stl) if(stl != "none") { stl_adjust <- function(x) { - x_stl <- stlplus::stlplus(x, s.window = "periodic", ...)$data + x_stl <- if (decomp=="stlplus") { + stlplus::stlplus(x, s.window = "periodic")$data + } else { + stats::stl(x, s.window = "periodic")$time.series + } switch(stl, "trend" = x - x_stl[, "trend"], "seasonal" = x - x_stl[, "seasonal"], @@ -143,7 +136,7 @@ bfastpp<- function(data, order = 3, time = as.numeric(time(y)), response = y, trend = 1:NROW(y), - season = factor(cycle(y)) + season = cut(cycle(y), if (sbins > 1) sbins else frequency(y)*sbins, ordered_result = TRUE) ) ## set up harmonic trend matrix as well @@ -179,121 +172,3 @@ bfastpp<- function(data, order = 3, ## return everything return(rval) } - - - - - -# this function builds the design matrix X for bfast based on a given formula. Only terms -# given in the formula will be added to X. -.bfastpp.formula <- function(data, order = 3, - lag = NULL, slag = NULL, na.action = na.omit, - stl = c("none", "trend", "seasonal", "both"), - formula = NULL) -{ - ## double check what happens with 29-02 if that happens... - ## we should keep it simple an remove the datum if that happens - - if(!is.ts(data)) data <- as.ts(data) - - ## STL pre-processing to try to adjust for trend or season - stl <- match.arg(stl) - if(stl != "none") { - stl_adjust <- function(x) { - x_stl <- stats::stl(x, s.window = "periodic")$time.series - switch(stl, - "trend" = x - x_stl[, "trend"], - "seasonal" = x - x_stl[, "seasonal"], - "both" = x - x_stl[, "trend"] - x_stl[, "seasonal"]) - } - if(NCOL(data) > 1L) { - for(i in 1:NCOL(data)) data[,i] <- stl_adjust(data[,i]) - } else { - data <- stl_adjust(data) - } - } - - - ## check for covariates - if(NCOL(data) > 1L) { - x <- coredata(data)[, -1L] - y <- data[, 1L] - } else { - x <- NULL - y <- data - } - - - rval = as.numeric(y) # remove ts attrs in order to avoid cbind.ts - rval.names = "response" - - rval = cbind(rval, as.numeric(time(y))) - rval.names = c(rval.names, "time") - - - if (attr(terms(formula), "intercept")) { - rval = cbind(rval, 1) - rval.names = c(rval.names, "(Intercept)") - } - - - if ("trend" %in% attr(terms.formula(formula),"term.labels")){ - rval = cbind(rval,1:NROW(y)) - rval.names = c(rval.names, "trend") - } - - if ("season" %in% attr(terms.formula(formula),"term.labels")){ - rval = cbind(rval,1 + seq(as.numeric(cycle(y[1]))-1,length.out=max(length(y),nrow(y))) %% frequency(y)) - rval.names = c(rval.names, "season") - } - - - ## set up harmonic trend matrix as well - if ("harmon" %in% attr(terms.formula(formula),"term.labels")){ - - freq <- frequency(y) - order <- min(freq, order) - harmon <- outer(2 * pi * rval[,2], 1:order) # rval[,2] is as.numeric(time(y)) - harmon <- cbind(apply(harmon, 2, cos), apply(harmon, 2, sin)) - colnames(harmon) <- if(order == 1) { - c("harmoncos", "harmonsin") - } else { - c(paste("harmon.cos", 1:order, sep = ""), paste("harmon.sin", 1:order, sep = "")) - } - if((2 * order) == freq) harmon <- harmon[, -(2 * order)] - rval = cbind(rval,harmon) - rval.names = c(rval.names, colnames(harmon)) - } - nalag <- function(x, k) c(rep(NA, k), head(x, -k)) - if ("lag" %in% attr(terms.formula(formula),"term.labels")){ - if(!is.null(lag)) { - rval = cbind(rval,sapply(lag, function(k) nalag(y, k))) - rval.names = c(rval.names, paste0("lag",lag)) - } - } - if ("slag" %in% attr(terms.formula(formula),"term.labels")){ - if(!is.null(slag)) { - rval = cbind(rval, sapply(slag * frequency(y), function(k) nalag(as.vector(y), k))) - rval.names = c(rval.names, paste0("slag",slag)) - } - } - - if ("xreg" %in% attr(terms.formula(formula),"term.labels")){ - rval = cbind(rval, x) - rval.names = c(rval.names, paste0("xreg.",colnames(x))) - } - - ## omit missing values - #if (!is.ts(rval)) rval <- ts(rval,start = start(data), frequency = frequency(data)) - colnames(rval) <- rval.names - - #class(rval) <- "matrix" # prevent calling na.omit.ts - rval <- na.action(rval) - - return(list(y=rval[,1L],X=rval[,-c(1L,2L)], t=rval[,2L])) -} - - - - - diff --git a/R/history_roc.R b/R/history_roc.R index eda1ee9..5a5e46a 100644 --- a/R/history_roc.R +++ b/R/history_roc.R @@ -19,7 +19,8 @@ history_roc.matrix <- function(X, y,time, level = 0.05) { if (!is.ts(y)) y <- ts(y) # needed? y_rcus <- efp.matrix(X_rev,y_rev,time_rev, type = "Rec-CUSUM") ## TODO - y_start <- if(sctest(y_rcus)$p.value < level) { + pval = sctest(y_rcus)$p.value + y_start <- if(!is.na(pval) && pval < level) { length(y_rcus$process) - min(which(abs(y_rcus$process)[-1] > boundary(y_rcus)[-1])) + 1 } else { 1 @@ -37,10 +38,11 @@ history_roc.formula <- function(formula, data, level = 0.05) { data_rev$response <- ts(data_rev$response) y_rcus <- efp(formula, data = data_rev, type = "Rec-CUSUM") - y_start <- if(sctest(y_rcus)$p.value < level) { + pval = sctest(y_rcus)$p.value + y_start <- if(!is.na(pval) && pval < level) { length(y_rcus$process) - min(which(abs(y_rcus$process)[-1] > boundary(y_rcus)[-1])) + 1 } else { 1 } data$time[y_start] -} \ No newline at end of file +} diff --git a/README.md b/README.md index 70c7951..c262c16 100644 --- a/README.md +++ b/README.md @@ -10,8 +10,8 @@ Parts of the improvements rely on modifications of the [strucchange](https://cra ``` library(devtools) -install_github("appelmar/strucchange") -install_github("GreatEmerald/bfast") +install_github("bfast2/strucchange") +install_github("bfast2/bfast") ``` @@ -75,3 +75,23 @@ Most important performance modifications include: * avoiding expensive calls of `model.frame()` and `model.matrix` and using the design matrix and response vector instead of a data.frame and a formula * using `lm.fit()` instead of `lm()` when possible +## Contribution guidelines: +We always welcome contributions to the package. If you would like to propose a new functionality and/or report an issue, please use GitHub's tracker called [Issues](https://github.com/bfast2/bfast/issues). + +For development we use the [GitHub Flow](https://guides.github.com/introduction/flow/) branching model. + +Key steps: + +1. Create a GitHub issue in this repository with description of the work that you plan to do. +2. Assign yourself to the GitHub issue you are working on, to inform other developers that you are working on it. +3. Create your own working fork or branch based on the `dev` branch. +4. Make your changes in that branch. +5. Commit your changes to your working branch as long as you are not finished with your development. +6. Make sure that all tests pass (e.g., in Travis or Circle CI). +7. Once your work is finished, create a pull request so that another developer can review your changes before merging them with the `dev` (or `main`) branch. + +Additional steps for [preparing a new release](https://guide.esciencecenter.nl/best_practices/releases.html): + +8. Update the `NEWS.Rd` file with most notable changes. +9. Add new contributors to the `DESCRIPTION` file if applicable. +10. Release the package and make it citable (add `CITATION.cff` including DOI).