Skip to content

Commit

Permalink
Merge pull request #85 from NIEHS/main-sciome
Browse files Browse the repository at this point in the history
Minor bug fixes and usability improvements
  • Loading branch information
kyle-messier authored Dec 29, 2024
2 parents 39c0ffc + ec91a82 commit a611fc5
Show file tree
Hide file tree
Showing 15 changed files with 429 additions and 38 deletions.
4 changes: 2 additions & 2 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: PrestoGP
Type: Package
Title: Penalized Regression for Spatio-Temporal Outcomes via Gaussian Processes
Version: 0.2.0.9037
Version: 0.2.0.9039
Authors@R: c(
person(given = "Eric",
family = "Bair",
Expand Down Expand Up @@ -64,4 +64,4 @@ Suggests:
geoR,
testthat (>= 3.0.0)
Config/testthat/edition: 3
URL: https://niehs.github.io/PrestoGP/
URL: https://niehs.github.io/PrestoGP/, https://github.com/NIEHS/PrestoGP
3 changes: 3 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,20 @@ export(create.param.sequence)
export(createUMultivariate)
export(enquo)
export(enquos)
export(get_Y)
export(max_min_ordering)
export(prestogp_predict)
export(vecchia_Mlikelihood)
export(vecchia_Mprediction)
export(vecchia_Mspecify)
exportMethods(get_beta)
exportMethods(get_converged)
exportMethods(get_linear_model)
exportMethods(get_neighbors)
exportMethods(get_pen_loglik)
exportMethods(get_scaling)
exportMethods(get_theta)
exportMethods(plot_beta)
exportMethods(prestogp_fit)
exportMethods(show)
import(GPvecchia)
Expand Down
42 changes: 42 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# PrestoGP 0.2.0.9039 (2024-12-26)

## MINOR IMPROVEMENTS

* Rewrote some tests that were failing on some verions of R

# PrestoGP 0.2.0.9038 (2024-12-23)

## NEW FEATURES

* Added the method `get_Y` to extract the Y values (including the imputed
Y values) from a PrestoGP model

* Added the method `get_linear_model` to extract the fitted cv.glmnet object
from a PrestoGP model

* Added the method `plot_beta` to plot the glide path of the
regression coefficients

## MINOR IMPROVEMENTS

* Added a NEWS.md file to track changes in each update

* Modified the `show` method to not print zero betas

* Modified `prestogp_predict` to retun predictions in the form of a
list for multivariate models

## BUG FIXES

* Fixed a bug in the intercept calculation in `get_beta`

* Fixed a bug where multivariate models with no variable names for X were
being assigned arbitrary variable names

## DOCUMENTATION FIXES

* Updated the documention in `PrestoGPModel-class` to reflect that the
Y_train slot is updated to contain imputed values during model
fitting

* Added documentation for all new methods
136 changes: 133 additions & 3 deletions R/PrestoGP_Model.R
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ setOldClass("cv.glmnet")
#' "super matrix" for multivariate models. See
#' \code{\link[psych]{superMatrix}}.
#' @slot Y_train A column matrix containing the original response values.
#' After fitting the model, missing values will be replaced with the imputed
#' values if impute.y is TRUE. See \code{link{prestogp_fit}}.
#' @slot X_ndx A vector used to find the elements of beta corresponding to
#' specific outcomes. The \emph{i}th element of X_ndx is the index of the
#' last element of beta corresponding to predictors for outcome \emph{i}.
Expand Down Expand Up @@ -117,12 +119,49 @@ setMethod("initialize", "PrestoGPModel", function(.Object, ...) {
.Object
})

#' Extract the Y values for a PrestoGP model.
#'
#' This method extracts the values of Y for a PrestoGP model. If imputation
#' was used when fitting the model, the missing Y's will be replaced with
#' their imputed values.
#'
#' @param model The PrestoGP model object
#'
#' @return A vector or list containing the values of Y. Any missing Y's will
#' be replaced with their imputed values.
#'
#' @seealso \code{\link{PrestoGPModel-class}}, \code{\link{prestogp_fit}}
#'
#' @references
#' \itemize{
#' \item Messier, K.P. and Katzfuss, M. "Scalable penalized spatiotemporal
#' land-use regression for ground-level nitrogen dioxide", The Annals of
#' Applied Statistics (2021) 15(2):688-710.
#' }
#'
#' @rdname get_Y
#' @export
#'
#' @examples
#' data(soil)
#' soil <- soil[!is.na(soil[,5]),] # remove rows with NA's
#' y <- soil[,4] # predict moisture content
#' X <- as.matrix(soil[,5:9])
#' locs <- as.matrix(soil[,1:2])
#'
#' soil.vm <- new("VecchiaModel", n_neighbors = 10)
#' soil.vm <- prestogp_fit(soil.vm, y, X, locs)
#' get_Y(soil.vm)
setGeneric("get_Y", function(model) standardGeneric("get_Y"))
setGeneric("get_theta", function(model) standardGeneric("get_theta"))
setGeneric("get_beta", function(model) standardGeneric("get_beta"))
setGeneric(
"get_linear_model", function(model) standardGeneric("get_linear_model"))
setGeneric("get_neighbors", function(model) standardGeneric("get_neighbors"))
setGeneric("get_scaling", function(model) standardGeneric("get_scaling"))
setGeneric("get_converged", function(model) standardGeneric("get_converged"))
setGeneric("get_pen_loglik", function(model) standardGeneric("get_pen_loglik"))
setGeneric("plot_beta", function(model, ...) standardGeneric("plot_beta"))
setGeneric(
"prestogp_fit",
function(model, Y, X, locs, Y.names = NULL, X.names = NULL, scaling = NULL,
Expand Down Expand Up @@ -262,7 +301,11 @@ setMethod(
cat("Matern covariance parameters (theta):", "\n")
print(get_theta(object))
cat("Regression coefficients (beta):", "\n")
print(get_beta(object))
cur.coef <- get_beta(object)
for (i in seq_along(cur.coef)) {
cur.coef[[i]] <- cur.coef[[i]][cur.coef[[i]] != 0]
}
print(cur.coef)
cat("Model type:", is(object)[1], "\n")
if (object@n_neighbors > 0) {
cat("Nearest neighbors:", object@n_neighbors, "\n")
Expand Down Expand Up @@ -411,13 +454,14 @@ setMethod(
for (i in seq_len(P)) {
if (i == 1) {
indx <- 1
junk[[P + 1]] <- model@beta[1]
junk[[P + 1]] <- model@beta[1] + model@Y_bar[1]
names(junk[[P + 1]]) <- "(Intercept)"
} else {
indx <- model@X_ndx[i - 1] + 1
new.names <- c(names(junk[[P + 1]]),
names(model@locs_train)[i])
junk[[P + 1]] <- c(junk[[P + 1]], model@beta[indx])
junk[[P + 1]] <- c(junk[[P + 1]], model@beta[indx] + model@Y_bar[i] -
model@Y_bar[1])
names(junk[[P + 1]]) <- new.names
}
pred.ndx <- (indx + 1):model@X_ndx[i]
Expand All @@ -433,6 +477,51 @@ setMethod(
}
)

#' Extract the fitted linear model for a PrestoGP model
#'
#' This method return the fitted linear model (of class cv.glmnet) for a
#' PrestoGP model.
#'
#' @param model The PrestoGP model object
#'
#' @details It is important to note that the model is fit to the
#' transformed data. The CV error rate and predicted values of Y will not be
#' correct for the original (untransformed) data. This method should be
#' used primarily for examining the coefficient path and generating plots.
#'
#' @return The fitted linear model (of class cv.glmnet).
#'
#' @seealso \code{\link{PrestoGPModel-class}}, \code{\link{prestogp_fit}},
#' \code{\link[glmnet]{cv.glmnet}}
#'
#' @references
#' \itemize{
#' \item Messier, K.P. and Katzfuss, M. "Scalable penalized spatiotemporal
#' land-use regression for ground-level nitrogen dioxide", The Annals of
#' Applied Statistics (2021) 15(2):688-710.
#' }
#'
#' @aliases get_linear_model
#' @export
#'
#' @examples
#' data(soil)
#' soil <- soil[!is.na(soil[,5]),] # remove rows with NA's
#' y <- soil[,4] # predict moisture content
#' X <- as.matrix(soil[,5:9])
#' locs <- as.matrix(soil[,1:2])
#'
#' soil.vm <- new("VecchiaModel", n_neighbors = 10)
#' soil.vm <- prestogp_fit(soil.vm, y, X, locs)
#' get_linear_model(soil.vm)

setMethod(
"get_linear_model", "PrestoGPModel",
function(model) {
return(model@linear_model)
}
)

#' Extract the number of neighbors for a PrestoGP model.
#'
#' This method extracts the number of nearest neighbors that were used to
Expand Down Expand Up @@ -590,6 +679,47 @@ setMethod(
}
)

#' Plots the glide path for the coefficients for a PrestoGP model.
#'
#' This method generates a plot showing the coefficients of the model for
#' different values of the tuning parameter. It is a wrapper for
#' \code{\link[glmnet]{plot.glmnet}}.
#'
#' @param model The PrestoGP model object
#'
#' @param ... Additional parameters to \code{\link[glmnet]{plot.glmnet}}
#'
#' @seealso \code{\link{PrestoGPModel-class}}, \code{\link{prestogp_fit}},
#' \code{\link[glmnet]{plot.glmnet}}
#'
#' @references
#' \itemize{
#' \item Messier, K.P. and Katzfuss, M. "Scalable penalized spatiotemporal
#' land-use regression for ground-level nitrogen dioxide", The Annals of
#' Applied Statistics (2021) 15(2):688-710.
#' }
#'
#' @aliases plot_beta
#' @export
#'
#' @examples
#' data(soil)
#' soil <- soil[!is.na(soil[,5]),] # remove rows with NA's
#' y <- soil[,4] # predict moisture content
#' X <- as.matrix(soil[,5:9])
#' locs <- as.matrix(soil[,1:2])
#'
#' soil.vm <- new("VecchiaModel", n_neighbors = 10)
#' soil.vm <- prestogp_fit(soil.vm, y, X, locs)
#' plot_beta(soil.vm)

setMethod(
"plot_beta", "PrestoGPModel",
function(model, ...) {
plot(model@linear_model$glmnet.fit, ...)
}
)

#' Train a PrestoGP model.
#'
#' This method fits a PrestoGP model given a set of locations and predictor
Expand Down
44 changes: 34 additions & 10 deletions R/PrestoGP_Multivariate_Vecchia.R
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,20 @@ setMethod("initialize", "MultivariateVecchiaModel", function(.Object, n_neighbor
.Object
})

#' @rdname get_Y
setMethod("get_Y", "MultivariateVecchiaModel",
function(model) {
Y.all <- model@Y_train
Y.out <- list()
for (i in seq_along(model@locs_train)) {
cur.y <- seq_len(nrow(model@locs_train[[i]]))
Y.out[[i]] <- Y.all[cur.y] + model@Y_bar[i]
Y.all <- Y.all[-cur.y]
}
return(Y.out)
}
)

#' @rdname prestogp_predict
setMethod("prestogp_predict", "MultivariateVecchiaModel",
function(model, X, locs, m = NULL, ordering.pred = c("obspred", "general"), pred.cond = c("independent", "general"), return.values = c("mean", "meanvar")) {
Expand Down Expand Up @@ -93,18 +107,23 @@ setMethod("prestogp_predict", "MultivariateVecchiaModel",

# prediction function can return both mean and sds
# returns a list with elements mu.pred,mu.obs,var.pred,var.obs,V.ord
Vec.mean <- Y_bar + pred$mu.pred + Vecchia.Pred # residual + mean trend
Vec.mean.all <- Y_bar + pred$mu.pred + Vecchia.Pred # residual + mean trend
ndx.out <- NULL
for (i in seq_along(locs)) {
ndx.out <- c(ndx.out, rep(i, nrow(locs[[i]])))
}
if (return.values == "mean") {
Vec.mean <- list()
for (i in seq_along(locs)) {
Vec.mean[[i]] <- Vec.mean.all[ndx.out == i]
}
names(Vec.mean) <- names(model@locs_train)
return.list <- list(means = Vec.mean)
} else {
warning("Variance estimates do not include model fitting variance and are anticonservative. Use with caution.")
Vec.sds <- pred$var.pred
ndx.out <- NULL
Vec.sds <- list()
for (i in seq_along(locs)) {
ndx.out <- c(ndx.out, rep(i, nrow(locs[[i]])))
}
for (i in seq_along(locs)) {
Vec.sds[ndx.out == i] <- sqrt(Vec.sds[ndx.out == i] +
Vec.sds[[i]] <- sqrt(pred$var.pred[ndx.out == i] +
model@covparams[model@param_sequence[4, i]])
}
return.list <- list(means = Vec.mean, sds = Vec.sds)
Expand Down Expand Up @@ -141,9 +160,7 @@ setMethod("check_input", "MultivariateVecchiaModel", function(model, Y, X, locs,
}
}
if (is.null(names(locs))) {
for (i in seq_along(locs)) {
names(locs)[i] <- paste0("Y", i)
}
names(locs) <- paste0("Y", seq_along(locs))
}
if (!is.null(lod)) {
if (!is.list(lod)) {
Expand Down Expand Up @@ -241,8 +258,15 @@ setMethod("check_input", "MultivariateVecchiaModel", function(model, Y, X, locs,
if (length(X) == 1) {
model@X_train <- X[[1]]
} else {
if (is.null(colnames(X[[1]]))) {
colnames(X[[1]]) <- paste0(names(locs)[1], "_", seq_len(ncol(X[[1]])))
}
for (i in 2:length(X)) {
X[[i]] <- cbind(rep(1, nrow(X[[i]])), X[[i]])
if (is.null(colnames(X[[i]]))) {
colnames(X[[i]]) <- paste0(names(locs)[i], "_",
(seq_len(ncol(X[[i]])) - 1))
}
model@X_ndx <- c(model@X_ndx, model@X_ndx[i - 1] + ncol(X[[i]]))
}
model@X_train <- psych::superMatrix(X)
Expand Down
7 changes: 7 additions & 0 deletions R/PrestoGP_Vecchia.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,13 @@ setMethod("initialize", "VecchiaModel", function(.Object, n_neighbors = 25, ...)
.Object
})

#' @rdname get_Y
setMethod("get_Y", "VecchiaModel",
function(model) {
return(as.vector(model@Y_train + model@Y_bar))
}
)

#' @rdname prestogp_predict
setMethod("prestogp_predict", "VecchiaModel",
function(model, X, locs, m = NULL, ordering.pred = c("obspred", "general"), pred.cond = c("independent", "general"), return.values = c("mean", "meanvar")) {
Expand Down
1 change: 1 addition & 0 deletions man/PrestoGP-package.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion man/PrestoGPModel-class.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit a611fc5

Please sign in to comment.