From afc8a40bb7d408e2601ba5b6dd80627023df4734 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Tue, 2 May 2023 16:40:40 +0100 Subject: [PATCH 01/55] Start converting vignette to terra and sf --- vignettes/disaggregation.Rmd | 35 +++++++++++++++++++++++------------ 1 file changed, 23 insertions(+), 12 deletions(-) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index 534e048..3fad507 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -53,27 +53,38 @@ library(raster, quietly = TRUE) library(disaggregation, quietly = TRUE) library(sf) -map <- NYleukemia$spatial.polygon +library(sf) +library(terra) + + +polygons <- sf::st_as_sf(NYleukemia$spatial.polygon) + df <- NYleukemia$data +df <- merge(polygons, df) -polygon_data <- SpatialPolygonsDataFrame(map, df) -polygon_data ``` Now we simulate two covariate rasters for the area of interest and make a `RasterStack`. They are simulated at the resolution of approximately 1km2. ```{r, fig.show='hold'} -extent_in_km <- 111*(polygon_data@bbox[, 2] - polygon_data@bbox[, 1]) + +bbox <- sf::st_bbox(df) + +extent_in_km <- 111*(bbox[c(3, 4)] - bbox[c(1, 2)]) n_pixels_x <- floor(extent_in_km[[1]]) n_pixels_y <- floor(extent_in_km[[2]]) -r <- raster::raster(ncol = n_pixels_x, nrow = n_pixels_y) -r <- raster::setExtent(r, raster::extent(polygon_data)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_x != 0, x %% n_pixels_x, n_pixels_x), 3)) -r2 <- raster::raster(ncol = n_pixels_x, nrow = n_pixels_y) -r2 <- raster::setExtent(r2, raster::extent(polygon_data)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_y), 3)) -cov_stack <- raster::stack(r, r2) -cov_stack <- raster::scale(cov_stack) + +r <- terra::rast(ncols = n_pixels_x, nrows = n_pixels_y) +terra::ext(r) <- terra::ext(df) +values(r) <- sapply(seq(terra::ncell(r)), function(x) rnorm(1, ifelse(x %% n_pixels_x != 0, x %% n_pixels_x, n_pixels_x), 3)) +r2 <- terra::rast(ncol = n_pixels_x, nrow = n_pixels_y) +terra::ext(r2) <- terra::ext(df) +values(r2) <- sapply(seq(terra::ncell(r)), function(x) rnorm(1, ceiling(x/n_pixels_y), 3)) + + +cov_stack <- terra::rast(list(r, r2)) +cov_stack <- terra::scale(cov_stack) + ``` We also create a population raster. This is to allow the model to correctly aggregated the pixel values to the polygon level. For this simple example we assume that the population within each polygon is uniformly distributed. From 281fb2f4b8a31429ecaf296932f44cdf4c3aa18f Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 22 May 2023 16:21:35 +0100 Subject: [PATCH 02/55] Vignette preamble now terra. Except gbuffer which Im not sure if I need to replace or not. --- vignettes/disaggregation.Rmd | 29 ++++++++++++++++++----------- 1 file changed, 18 insertions(+), 11 deletions(-) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index 3fad507..3dfeafa 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -48,19 +48,20 @@ We will demonstrate an example of the **disaggregation** package using areal dat ```{r} library(SpatialEpi, quietly = TRUE) library(dplyr, quietly = TRUE) -library(sp, quietly = TRUE) +library(sp, quietly = TRUE) # Don't need to read data. So just here while I learn sf. library(raster, quietly = TRUE) library(disaggregation, quietly = TRUE) -library(sf) - +library(ggplot2) library(sf) library(terra) polygons <- sf::st_as_sf(NYleukemia$spatial.polygon) -df <- NYleukemia$data -df <- merge(polygons, df) +df <- cbind(polygons, NYleukemia$data) + +ggplot() + geom_sf(data = df, aes(fill = cases / population)) + ``` @@ -76,10 +77,16 @@ n_pixels_y <- floor(extent_in_km[[2]]) r <- terra::rast(ncols = n_pixels_x, nrows = n_pixels_y) terra::ext(r) <- terra::ext(df) -values(r) <- sapply(seq(terra::ncell(r)), function(x) rnorm(1, ifelse(x %% n_pixels_x != 0, x %% n_pixels_x, n_pixels_x), 3)) + +data_generate <- function(x){ + rnorm(1, ifelse(x %% n_pixels_x != 0, x %% n_pixels_x, n_pixels_x), 3) +} + +terra::values(r) <- sapply(seq(terra::ncell(r)), data_generate) r2 <- terra::rast(ncol = n_pixels_x, nrow = n_pixels_y) terra::ext(r2) <- terra::ext(df) -values(r2) <- sapply(seq(terra::ncell(r)), function(x) rnorm(1, ceiling(x/n_pixels_y), 3)) +terra::values(r2) <- sapply(seq(terra::ncell(r2)), + function(x) rnorm(1, ceiling(x/n_pixels_y), 3)) cov_stack <- terra::rast(list(r, r2)) @@ -90,10 +97,10 @@ cov_stack <- terra::scale(cov_stack) We also create a population raster. This is to allow the model to correctly aggregated the pixel values to the polygon level. For this simple example we assume that the population within each polygon is uniformly distributed. ```{r, fig.show='hold'} -extracted <- raster::extract(r, polygon_data) -n_cells <- sapply(extracted, length) -polygon_data@data$pop_per_cell <- polygon_data@data$population/n_cells -pop_raster <- rasterize(polygon_data, cov_stack, field = 'pop_per_cell') +extracted <- terra::extract(r, terra::vect(df$geometry), fun = sum) +n_cells <- terra::extract(r, terra::vect(df$geometry), fun = length) +df$pop_per_cell <- df$population/n_cells$lyr.1 +pop_raster <- rasterize(terra::vect(df), cov_stack, field = 'pop_per_cell') ``` From cfd56592b895df72dc2342ab18a431682655b560 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 22 May 2023 16:24:25 +0100 Subject: [PATCH 03/55] Replaced gBuffer with sf equivalent even though Im not 100pc its needed. --- vignettes/disaggregation.Rmd | 1 + 1 file changed, 1 insertion(+) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index 3dfeafa..a089e70 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -112,6 +112,7 @@ switch the polygons to simple features and back again. ```{r, fig.show='hold'} polygon_data <- sf:::as_Spatial(st_buffer(st_as_sf(polygon_data), dist = 0)) +df <- sf::st_buffer(df, dist = 0) ``` Now we have setup the data we can use the `prepare_data` function to create the objects needed to run the disaggregation model. The name of the response variable and id variable in the `SpatialPolygonsDataFrame` should be specified. From 7f8197ca1ab02f726d8017c1f0de3f571d3f8fd1 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 22 May 2023 17:00:07 +0100 Subject: [PATCH 04/55] Mid work, working through prepare_data making it terra --- R/extract.R | 16 ++++++------- R/prepare_data.R | 44 ++++++++++++++++++------------------ vignettes/disaggregation.Rmd | 3 ++- 3 files changed, 32 insertions(+), 31 deletions(-) diff --git a/R/extract.R b/R/extract.R index 5a166f3..05eb555 100644 --- a/R/extract.R +++ b/R/extract.R @@ -57,17 +57,17 @@ parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ call. = FALSE) } - shape@data[, id] <- as.character(shape@data[, id]) + shape[, id] <- as.character(shape[, id, drop = TRUE]) i <- NULL # Run extract in parallel. - values <- foreach::foreach(i = seq_along(shape)) %dopar% { - raster::extract(raster, shape[i, ], fun = fun, na.rm = TRUE, cellnumbers = TRUE, ...) + values <- foreach::foreach(i = seq(nrow(shape))) %dopar% { + terra::extract(raster, terra::vect(shape[i, ]), fun = fun, na.rm = TRUE, cells = TRUE, ...) } if(!is.null(fun)){ # If a summary function was given, just bind everything together and add ID column df <- data.frame(do.call(rbind, values)) - if(inherits(shape, 'SpatialPolygonsDataFrame')){ + if(inherits(shape, 'df')){ df <- cbind(ID = as.data.frame(shape)[, id], df) } else{ df <- cbind(ID = names(shape), df) @@ -83,7 +83,7 @@ parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ # Want to make covariates columns, rbind shapes, and add shape and cell id columns. # list of vectors, one for each covariate - values_id <- lapply(seq_along(values), function(x) data.frame(shape@data[, id][x], values[[x]][[1]])) + values_id <- lapply(seq_along(values), function(x) data.frame(shape[, id, drop = TRUE][x], values[[x]][[1]])) df <- do.call(rbind, values_id) @@ -101,7 +101,7 @@ parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ #' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does #' not exist), this column will contain NAs. #' -#' @param shape A SpatialPolygons object containing response data. +#' @param shape A sf object containing response data. #' @param id_var Name of column in shape object with the polygon id. Default 'area_id'. #' @param response_var Name of column in shape object with the response data. Default 'response'. #' @param sample_size_var For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data. Default NULL. @@ -132,10 +132,10 @@ parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ getPolygonData <- function(shape, id_var = 'area_id', response_var = 'response', sample_size_var = NULL) { if(is.null(sample_size_var)) { - polygon_df <- shape@data[, c(id_var, response_var)] + polygon_df <- shape[, c(id_var, response_var), drop = TRUE] polygon_df$N <- rep(NA, nrow(polygon_df)) } else { - polygon_df <- shape@data[, c(id_var, response_var, sample_size_var)] + polygon_df <- shape[, c(id_var, response_var, sample_size_var), drop = TRUE] } names(polygon_df) <- c('area_id', 'response', 'N') diff --git a/R/prepare_data.R b/R/prepare_data.R index 85af1bf..7c4d75f 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -26,7 +26,7 @@ #' will automatically deal with NAs. It removes any polygons that have NAs as a response, sets any aggregation pixels with NA to zero #' and sets covariate NAs pixels to the median value for the that covariate. #' -#' @param polygon_shapefile SpatialPolygonDataFrame containing at least two columns: one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}). +#' @param x sf object containing at least three columns: one with the geometried, one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}). #' @param covariate_rasters RasterStack of covariate rasters to be used in the model. #' @param aggregation_raster Raster to aggregate pixel level predictions to polygon level e.g. population to aggregate prevalence. If this is not supplied a uniform raster will be used. #' @param id_var Name of column in SpatialPolygonDataFrame object with the polygon id. @@ -40,7 +40,7 @@ #' @return A list is returned of class \code{disag_data}. #' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. #' The list of class \code{disag_data} contains: -#' \item{polygon_shapefile }{The SpatialPolygonDataFrame used as an input.} +#' \item{x }{The SpatialPolygonDataFrame used as an input.} #' \item{covariate_rasters }{The RasterStack used as an input.} #' \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} #' \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} @@ -75,7 +75,7 @@ #' r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) #' cov_rasters <- raster::stack(r, r2) #' -#' test_data <- prepare_data(polygon_shapefile = spdf, +#' test_data <- prepare_data(x = spdf, #' covariate_rasters = cov_rasters) #' } #' @@ -83,7 +83,7 @@ #' #' -prepare_data <- function(polygon_shapefile, +prepare_data <- function(x, covariate_rasters, aggregation_raster = NULL, id_var = 'area_id', @@ -94,24 +94,24 @@ prepare_data <- function(polygon_shapefile, makeMesh = TRUE, ncores = 2) { - stopifnot(inherits(polygon_shapefile, 'SpatialPolygonsDataFrame')) - stopifnot(inherits(covariate_rasters, 'Raster')) - if(!is.null(aggregation_raster)) stopifnot(inherits(aggregation_raster, 'Raster')) + stopifnot(inherits(x, 'sf')) + stopifnot(inherits(covariate_rasters, 'SpatRaster')) + if(!is.null(aggregation_raster)) stopifnot(inherits(aggregation_raster, 'SpatRaster')) stopifnot(inherits(id_var, 'character')) stopifnot(inherits(response_var, 'character')) if(!is.null(mesh.args)) stopifnot(inherits(mesh.args, 'list')) # Check for NAs in response data - na_rows <- is.na(polygon_shapefile@data[, response_var]) + na_rows <- is.na(x[, response_var, drop = TRUE]) if(sum(na_rows) != 0) { if(na.action) { - polygon_shapefile <- polygon_shapefile[!na_rows, ] + x <- x[!na_rows, ] } else { stop('There are NAs in the response data. Please deal with these, or set na.action = TRUE') } } - polygon_data <- getPolygonData(polygon_shapefile, id_var, response_var, sample_size_var) + polygon_data <- getPolygonData(x, id_var, response_var, sample_size_var) # Save raster layer names so we can reassign it to make sure names don't change. @@ -120,15 +120,15 @@ prepare_data <- function(polygon_shapefile, # If no aggregation raster is given, use a 'unity' raster if(is.null(aggregation_raster)) { aggregation_raster <- covariate_rasters[[1]] - aggregation_raster <- raster::setValues(aggregation_raster, rep(1, raster::ncell(aggregation_raster))) + terra::values(aggregation_raster) <- rep(1, terra::ncell(aggregation_raster)) } names(aggregation_raster) <- 'aggregation_raster' - covariate_rasters <- raster::addLayer(covariate_rasters, aggregation_raster) + covariate_rasters <- c(covariate_rasters, aggregation_raster) cl <- parallel::makeCluster(ncores) doParallel::registerDoParallel(cl) - covariate_data <- parallelExtract(covariate_rasters, polygon_shapefile, fun = NULL, id = id_var) + covariate_data <- parallelExtract(covariate_rasters, x, fun = NULL, id = id_var) parallel::stopCluster(cl) foreach::registerDoSEQ() @@ -167,14 +167,14 @@ prepare_data <- function(polygon_shapefile, mesh <- NULL message("Cannot build mesh as INLA is not installed. If you need a spatial field in your model, you must install INLA.") } else { - mesh <- build_mesh(polygon_shapefile, mesh.args) + mesh <- build_mesh(x, mesh.args) } } else { mesh <- NULL message("A mesh is not being built. You will not be able to run a spatial model without a mesh.") } - disag_data <- list(polygon_shapefile = polygon_shapefile, + disag_data <- list(x = x, shapefile_names = list(id_var = id_var, response_var = response_var), covariate_rasters = covariate_rasters, polygon_data = polygon_data, @@ -193,13 +193,13 @@ prepare_data <- function(polygon_shapefile, #' Function to fit the disaggregation model #' -#' @param polygon_shapefile SpatialPolygonDataFrame containing the response data -#' @param shapefile_names List of 2: polygon id variable name and response variable name from polygon_shapefile +#' @param x SpatialPolygonDataFrame containing the response data +#' @param shapefile_names List of 2: polygon id variable name and response variable name from x #' @param covariate_rasters RasterStack of covariates #' @param polygon_data data.frame with two columns: polygon id and response #' @param covariate_data data.frame with cell id, polygon id and covariate columns #' @param aggregation_pixels vector with value of aggregation raster at each pixel -#' @param coordsForFit coordinates of the covariate data points within the polygons in polygon_shapefile +#' @param coordsForFit coordinates of the covariate data points within the polygons in x #' @param coordsForPrediction coordinates of the covariate data points in the whole raster extent #' @param startendindex matrix containing the start and end index for each polygon #' @param mesh inla.mesh object to use in the fit @@ -207,7 +207,7 @@ prepare_data <- function(polygon_shapefile, #' @return A list is returned of class \code{disag_data}. #' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. #' The list of class \code{disag_data} contains: -#' \item{polygon_shapefile }{The SpatialPolygonDataFrame used as an input.} +#' \item{x }{The SpatialPolygonDataFrame used as an input.} #' \item{covariate_rasters }{The RasterStack used as an input.} #' \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} #' \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} @@ -222,7 +222,7 @@ prepare_data <- function(polygon_shapefile, #' @export -as.disag_data <- function(polygon_shapefile, +as.disag_data <- function(x, shapefile_names, covariate_rasters, polygon_data, @@ -233,7 +233,7 @@ as.disag_data <- function(polygon_shapefile, startendindex, mesh = NULL) { - stopifnot(inherits(polygon_shapefile, 'SpatialPolygonsDataFrame')) + stopifnot(inherits(x, 'SpatialPolygonsDataFrame')) stopifnot(inherits(shapefile_names, 'list')) stopifnot(inherits(covariate_rasters, c('RasterBrick', 'RasterStack'))) stopifnot(inherits(polygon_data, 'data.frame')) @@ -246,7 +246,7 @@ as.disag_data <- function(polygon_shapefile, stopifnot(inherits(mesh, 'inla.mesh')) } - disag_data <- list(polygon_shapefile = polygon_shapefile, + disag_data <- list(x = x, shapefile_names = shapefile_names, covariate_rasters = covariate_rasters, polygon_data = polygon_data, diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index a089e70..8d2944b 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -91,6 +91,7 @@ terra::values(r2) <- sapply(seq(terra::ncell(r2)), cov_stack <- terra::rast(list(r, r2)) cov_stack <- terra::scale(cov_stack) +names(cov_stack) <- c('layer1', 'layer2') ``` @@ -120,7 +121,7 @@ Now we have setup the data we can use the `prepare_data` function to create the The user can also control the parameters of the mesh that is used to create the spatial field. The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest and having a small region outside with a coarser mesh to avoid edge effects. The mesh parameters: `concave`, `convex` and `resolution` refer to the parameters used to create the mesh boundary using the [inla.noncovex.hull function](https://rdrr.io/github/andrewzm/INLA/man/inla.nonconvex.hull.html), while the mesh parameters `max.edge`, `cut` and `offset` refer to the parameters used to create the mesh using the [inla.mesh.2d function](https://rdrr.io/github/andrewzm/INLA/man/inla.mesh.2d.html). ```{r, fig.show='hold', eval= isINLA} -data_for_model <- prepare_data(polygon_data, +data_for_model <- prepare_data(x = df, cov_stack, pop_raster, response_var = 'cases', From 93b65cd6614cf011caa94ccfe5d531d8133693bb Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 22 May 2023 17:00:43 +0100 Subject: [PATCH 05/55] Old change, but adding what changed made for 0.2.0. --- cran-comments.md | 71 ++++++++++++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 26 deletions(-) diff --git a/cran-comments.md b/cran-comments.md index 1892954..6d8f73b 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,15 +1,8 @@ ## Update -This is a package update (version 0.1.4). The changes in this version are: +This is a package update (version 0.2.0). The only real change in this version +is updating references to our Journal of Statistical Science paper that is in +press. -* Change maintainer. Anita Nandi has emailed to confirm. Anita has moved industry and no longer has time to maintain this package. - -* Fixed mistake in model definition. We were adjusting the jacobian for a change of variables incorrectly. - -* Fixed predictions in models with no field - -* Better documentation for priors. - -* redocument to fix html5 issues. @@ -23,29 +16,55 @@ Ubuntu 20, R devel ## R CMD check results There were no ERRORs or WARNINGs. -There were 3 NOTEs: +There were 3 NOTES. + + +* checking CRAN incoming feasibility ... [14s] NOTE +Maintainer: 'Tim Lucas ' + +Possibly misspelled words in DESCRIPTION: + Nandi (15:28) + +Suggests or Enhances not in mainstream repositories: + INLA +Availability using Additional_repositories specification: + INLA yes https://inla.r-inla-download.org/R/stable + +Found the following (possibly) invalid DOIs: + DOI: 10.18637/jss.v106.i11 + From: DESCRIPTION + inst/CITATION + Status: 404 + Message: Not Found + + +Examples with CPU (user + system) or elapsed time > 10s + user system elapsed +getPolygonData 9.89 0.17 10.08 + + + +Response: Anita Nandi's name is spelled correctly. The INLA availability +issue is the same as previous submissions. The doi is for our new Journal +of the Statistical Society paper and has been reserved but not registered yet. + + +* checking package dependencies ... NOTE +Package suggested but not available for checking: 'INLA' - Suggests or Enhances not in mainstream repositories: - INLA - Availability using Additional_repositories specification: - INLA yes https://inla.r-inla-download.org/R/stable +Response: Same as above. - The package uses INLA, my understanding of this NOTE is that it is fine. -* checking installed package size ... NOTE - installed size is 12.8Mb - sub-directories of 1Mb or more: - libs 12.5Mb +* checking examples ... [16s] NOTE +Examples with CPU (user + system) or elapsed time > 10s + user system elapsed +getPolygonData 9.89 0.17 10.08 - Packages based on C++ can have large compiled libraries. This is as small as it can be, hope that is ok. I got a similar, but slightly different note when using R CMD check compared to devtools::check(). The gist was the same though. -* checking compilation flags used ... NOTE - Compilation used the following non-portable flag(s): - '-Wa,-mbig-obj' - - To compile large C++ source files on Windows a compilation flag is needed +Response: As this is only just over the 10 second limit we hope it is ok. We +have done our best to make the examples small throughout. ## Downstream dependencies From 7c3ee57551c1fc8d6f6c04ffc7fa88dd79cc7575 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 31 May 2023 15:53:45 +0100 Subject: [PATCH 06/55] Switch parallelExtract to 1 core terra. I cant get terra to work in parallel so just doing this for now. --- R/extract.R | 24 +++++------------------- R/prepare_data.R | 6 +----- 2 files changed, 6 insertions(+), 24 deletions(-) diff --git a/R/extract.R b/R/extract.R index 05eb555..9137adf 100644 --- a/R/extract.R +++ b/R/extract.R @@ -52,21 +52,14 @@ parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ - if (!requireNamespace("foreach", quietly = TRUE)) { - stop("foreach needed for this function to work. Please install it.", - call. = FALSE) - } - shape[, id] <- as.character(shape[, id, drop = TRUE]) - i <- NULL # Run extract in parallel. - values <- foreach::foreach(i = seq(nrow(shape))) %dopar% { - terra::extract(raster, terra::vect(shape[i, ]), fun = fun, na.rm = TRUE, cells = TRUE, ...) - } + values <- terra::extract(raster, terra::vect(shape), fun = fun, na.rm = TRUE, cells = TRUE, ...) + if(!is.null(fun)){ # If a summary function was given, just bind everything together and add ID column - df <- data.frame(do.call(rbind, values)) + df <- values if(inherits(shape, 'df')){ df <- cbind(ID = as.data.frame(shape)[, id], df) } else{ @@ -78,15 +71,8 @@ parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ return(df) } else { - # If no summary was given we get a list of length n.shapes - # each entry in the list is a dataframe with n.covariates columns - # Want to make covariates columns, rbind shapes, and add shape and cell id columns. - - # list of vectors, one for each covariate - values_id <- lapply(seq_along(values), function(x) data.frame(shape[, id, drop = TRUE][x], values[[x]][[1]])) - - - df <- do.call(rbind, values_id) + df <- values[, 2:(ncol(values) - 1)] + df <- cbind(values$ID, values$cell, df) names(df) <- c(id, 'cellid', names(raster)) return(df) diff --git a/R/prepare_data.R b/R/prepare_data.R index 7c4d75f..c04e46d 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -126,12 +126,8 @@ prepare_data <- function(x, covariate_rasters <- c(covariate_rasters, aggregation_raster) - cl <- parallel::makeCluster(ncores) - doParallel::registerDoParallel(cl) covariate_data <- parallelExtract(covariate_rasters, x, fun = NULL, id = id_var) - parallel::stopCluster(cl) - foreach::registerDoSEQ() - + covariate_rasters <- raster::dropLayer(covariate_rasters, raster::nlayers(covariate_rasters)) names(covariate_rasters) <- cov_names From b8770df81bd24055d89e6de0f9241eb6a1cf4871 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 31 May 2023 16:13:01 +0100 Subject: [PATCH 07/55] Currently broken. raster extract wasnt adding proper polygon id column. --- .Rbuildignore | 18 +- .github/workflows/R-CMD-check-HTML5.yaml | 116 ++-- .../workflows/R-CMD-check-no-suggests.yaml | 190 +++---- .gitignore | 20 +- NAMESPACE | 1 + R/build_mesh.R | 50 +- R/extract.R | 14 +- R/fit_model.R | 282 +++++----- R/matching.R | 106 ++-- R/plotting.R | 127 ++--- R/predict.R | 225 ++++---- R/prepare_data.R | 4 +- README.md | 1 - cran-comments.md | 142 ++--- man/build_mesh.Rd | 8 +- man/fit_model.Rd | 296 +++++----- man/getStartendindex.Rd | 84 +-- man/make_model_object.Rd | 248 ++++----- man/plot.disag_data.Rd | 48 +- man/plot.disag_model.Rd | 44 +- man/plot.disag_prediction.Rd | 44 +- src/disaggregation.cpp | 524 +++++++++--------- tests/testthat/test-fit-model.R | 336 +++++------ tests/testthat/test-predict-model.R | 106 ++-- vignettes/disaggregation.Rmd | 4 +- 25 files changed, 1521 insertions(+), 1517 deletions(-) diff --git a/.Rbuildignore b/.Rbuildignore index 4808471..3ad7674 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -1,10 +1,10 @@ -^.*\.Rproj$ -^\.Rproj\.user$ -^\.git* -README.md -.travis.yml -vignettes/disaggregation_cache/* -cran-comments.md -^\.github$ -.github/workflows/R-CMD-check-HTML5.archyaml +^.*\.Rproj$ +^\.Rproj\.user$ +^\.git* +README.md +.travis.yml +vignettes/disaggregation_cache/* +cran-comments.md +^\.github$ +.github/workflows/R-CMD-check-HTML5.archyaml vignettes/spatio_temporal_disaggregation.Rmd \ No newline at end of file diff --git a/.github/workflows/R-CMD-check-HTML5.yaml b/.github/workflows/R-CMD-check-HTML5.yaml index e651e70..a4c492c 100644 --- a/.github/workflows/R-CMD-check-HTML5.yaml +++ b/.github/workflows/R-CMD-check-HTML5.yaml @@ -1,58 +1,58 @@ -# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. -# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions -on: - push: - branches: [html5] - pull_request: - branches: [html5] - -name: R-CMD-check-html5 - - -jobs: - HTML5-check: - runs-on: ubuntu-latest - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} - - steps: - - uses: actions/checkout@v2 - - - uses: r-lib/actions/setup-r@v2 - with: - r-version: ${{ matrix.config.r }} - extra-repositories: "https://inla.r-inla-download.org/R/stable" - - - - name: Install pdflatex - run: sudo apt-get install texlive-latex-base texlive-fonts-recommended texlive-fonts-extra texlive-latex-extra - - - uses: r-lib/actions/setup-pandoc@v2 - - - name: Install system dependencies on MacOS (X11, gdal) - if: runner.os == 'macOS' - run: | - brew install --cask xquartz - brew install pkg-config - brew install proj@8 - brew install gdal - - - uses: r-lib/actions/setup-r-dependencies@v2 - with: - dependencies: '"all"' - extra-packages: | - rcmdcheck - - - name: Session info - run: | - options(width = 100) - pkgs <- installed.packages()[, "Package"] - sessioninfo::session_info(pkgs, include_base = TRUE) - shell: Rscript {0} - - - uses: r-lib/actions/check-r-package@v2 - with: - args: '"--as-cran"' - build_args: 'character()' - #error-on: '"note"' +# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. +# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions +on: + push: + branches: [html5] + pull_request: + branches: [html5] + +name: R-CMD-check-html5 + + +jobs: + HTML5-check: + runs-on: ubuntu-latest + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + extra-repositories: "https://inla.r-inla-download.org/R/stable" + + + - name: Install pdflatex + run: sudo apt-get install texlive-latex-base texlive-fonts-recommended texlive-fonts-extra texlive-latex-extra + + - uses: r-lib/actions/setup-pandoc@v2 + + - name: Install system dependencies on MacOS (X11, gdal) + if: runner.os == 'macOS' + run: | + brew install --cask xquartz + brew install pkg-config + brew install proj@8 + brew install gdal + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + dependencies: '"all"' + extra-packages: | + rcmdcheck + + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} + + - uses: r-lib/actions/check-r-package@v2 + with: + args: '"--as-cran"' + build_args: 'character()' + #error-on: '"note"' diff --git a/.github/workflows/R-CMD-check-no-suggests.yaml b/.github/workflows/R-CMD-check-no-suggests.yaml index e4db198..cea02e9 100644 --- a/.github/workflows/R-CMD-check-no-suggests.yaml +++ b/.github/workflows/R-CMD-check-no-suggests.yaml @@ -1,96 +1,96 @@ -# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. -# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions -# -# Largely copied from: https://github.com/inlabru-org/inlabru/blob/devel/.github/workflows/R-CMD-check-no-suggests.yaml -# Want to test without suggests to ensure things don't fail on cran when INLA isn't there. - -on: - push: - branches: - '**' - pull_request: - branches: - - devel - - master - -name: R-CMD-check-no-suggests - -jobs: - R-CMD-check: - runs-on: ${{ matrix.config.os }} - - name: ${{ matrix.config.os }} (${{ matrix.config.r }}) - - strategy: - fail-fast: false - matrix: - config: - - {os: windows-latest, r: 'release'} - # - {os: macOS-latest, r: 'release'} - - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} - - env: - R_REMOTES_NO_ERRORS_FROM_WARNINGS: true - RSPM: ${{ matrix.config.rspm }} - - steps: - - uses: actions/checkout@v2 - - - uses: r-lib/actions/setup-r@v2 - with: - r-version: ${{ matrix.config.r }} - extra-repositories: "https://inla.r-inla-download.org/R/testing" - - - uses: r-lib/actions/setup-pandoc@v2 - - - name: Install system dependencies on MacOS (X11, gdal) - if: runner.os == 'macOS' - run: | - brew install --cask xquartz - brew install pkg-config - brew install proj@9 - brew install gdal - - - name: Has inla? Check. - run: | - options(width = 100) - pkgs <- installed.packages()[, "Package"] - "INLA" %in% pkgs - shell: Rscript {0} - - - uses: r-lib/actions/setup-r-dependencies@v2 - with: - dependencies: '"hard"' - extra-packages: | - rcmdcheck - testthat - - - name: Has inla? Check, and remove. - run: | - options(width = 100) - pkgs <- installed.packages()[, "Package"] - "INLA" %in% pkgs - if ("INLA" %in% pkgs) { - remove.packages("INLA") - } - shell: Rscript {0} - - - name: Session info - run: | - options(width = 100) - pkgs <- installed.packages()[, "Package"] - sessioninfo::session_info(pkgs, include_base = TRUE) - shell: Rscript {0} - - - uses: r-lib/actions/check-r-package@v2 - env: - _R_CHECK_CRAN_INCOMING_REMOTE_: false - _R_CHECK_FORCE_SUGGESTS_: false - with: - build_args: 'c("--no-manual", "--no-build-vignettes")' - args: 'c("--no-manual", "--ignore-vignettes", "--as-cran")' - - - +# For help debugging build failures open an issue on the RStudio community with the 'github-actions' tag. +# https://community.rstudio.com/new-topic?category=Package%20development&tags=github-actions +# +# Largely copied from: https://github.com/inlabru-org/inlabru/blob/devel/.github/workflows/R-CMD-check-no-suggests.yaml +# Want to test without suggests to ensure things don't fail on cran when INLA isn't there. + +on: + push: + branches: + '**' + pull_request: + branches: + - devel + - master + +name: R-CMD-check-no-suggests + +jobs: + R-CMD-check: + runs-on: ${{ matrix.config.os }} + + name: ${{ matrix.config.os }} (${{ matrix.config.r }}) + + strategy: + fail-fast: false + matrix: + config: + - {os: windows-latest, r: 'release'} + # - {os: macOS-latest, r: 'release'} + - {os: ubuntu-20.04, r: 'release', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + - {os: ubuntu-20.04, r: 'devel', rspm: "https://packagemanager.rstudio.com/cran/__linux__/focal/latest"} + + env: + R_REMOTES_NO_ERRORS_FROM_WARNINGS: true + RSPM: ${{ matrix.config.rspm }} + + steps: + - uses: actions/checkout@v2 + + - uses: r-lib/actions/setup-r@v2 + with: + r-version: ${{ matrix.config.r }} + extra-repositories: "https://inla.r-inla-download.org/R/testing" + + - uses: r-lib/actions/setup-pandoc@v2 + + - name: Install system dependencies on MacOS (X11, gdal) + if: runner.os == 'macOS' + run: | + brew install --cask xquartz + brew install pkg-config + brew install proj@9 + brew install gdal + + - name: Has inla? Check. + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + "INLA" %in% pkgs + shell: Rscript {0} + + - uses: r-lib/actions/setup-r-dependencies@v2 + with: + dependencies: '"hard"' + extra-packages: | + rcmdcheck + testthat + + - name: Has inla? Check, and remove. + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + "INLA" %in% pkgs + if ("INLA" %in% pkgs) { + remove.packages("INLA") + } + shell: Rscript {0} + + - name: Session info + run: | + options(width = 100) + pkgs <- installed.packages()[, "Package"] + sessioninfo::session_info(pkgs, include_base = TRUE) + shell: Rscript {0} + + - uses: r-lib/actions/check-r-package@v2 + env: + _R_CHECK_CRAN_INCOMING_REMOTE_: false + _R_CHECK_FORCE_SUGGESTS_: false + with: + build_args: 'c("--no-manual", "--no-build-vignettes")' + args: 'c("--no-manual", "--ignore-vignettes", "--as-cran")' + + + \ No newline at end of file diff --git a/.gitignore b/.gitignore index 260b964..eef5be0 100644 --- a/.gitignore +++ b/.gitignore @@ -1,11 +1,11 @@ -inst/doc -.Rproj.user -.Rhistory -.Rproj -.RData -*.o -*.so -vignettes/disaggregation_cache/* -vignettes/disaggregation_files/* -.github/workflows/R-CMD-check-HTML5.archyaml +inst/doc +.Rproj.user +.Rhistory +.Rproj +.RData +*.o +*.so +vignettes/disaggregation_cache/* +vignettes/disaggregation_files/* +.github/workflows/R-CMD-check-HTML5.archyaml vignettes/spatio_temporal_disaggregation.Rmd \ No newline at end of file diff --git a/NAMESPACE b/NAMESPACE index 5669eb7..b298c2b 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -33,3 +33,4 @@ importFrom(stats,cor) importFrom(stats,quantile) importFrom(stats,sd) useDynLib(disaggregation) + diff --git a/R/build_mesh.R b/R/build_mesh.R index 151bbdb..71946c9 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -1,28 +1,28 @@ #' Build mesh for disaggregaton model -#' +#' #' \emph{build_mesh} function takes a SpatialPolygons object and mesh arguments to build an appropriate mesh for the spatial field. #' -#' The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary -#' and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest +#' The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary +#' and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest #' and having a small region outside with a coarser mesh to avoid edge effects. -#' +#' #' Six mesh parameters can be specified as arguments: \emph{convex}, \emph{concave} and \emph{resolution}, #' to control the boundary of the inner mesh, and \emph{max.edge}, \emph{cut} and \emph{offset}, to control the mesh itself, #' with the names meaning the same as used by INLA functions \emph{inla.convex.hull} and \emph{inla.mesh.2d}. -#' +#' #' Defaults are: #' pars <- list(convex = -0.01, concave = -0.5, resolution = 300, max.edge = c(3.0, 8), cut = 0.4, offset = c(1, 15)). -#' +#' #' @param shapes shapefile covering the region under investigation. #' @param mesh.args list of parameters that control the mesh structure. \emph{convex}, \emph{concave} and \emph{resolution}, #' to control the boundary of the inner mesh, and \emph{max.edge}, \emph{cut} and \emph{offset}, to control the mesh itself, #' with the parameters having the same meaning as in the INLA functions \emph{inla.convex.hull} and \emph{inla.mesh.2d}. #' #' @return An inla.mesh object -#' +#' #' @name build_mesh #' -#' @examples +#' @examples #' \dontrun{ #' polygons <- list() #' for(i in 1:100) { @@ -31,11 +31,11 @@ #' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row #' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) #' } -#' +#' #' polys <- do.call(raster::spPolygons, polygons) #' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) #' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' +#' #' my_mesh <- build_mesh(spdf) #' } #' @@ -47,38 +47,38 @@ build_mesh <- function(shapes, mesh.args = NULL) { stopifnot(inherits(shapes, 'SpatialPolygons')) if(!is.null(mesh.args)) stopifnot(inherits(mesh.args, 'list')) - + limits <- sp::bbox(shapes) hypotenuse <- sqrt((limits[1,2] - limits[1,1])^2 + (limits[2,2] - limits[2,1])^2) maxedge <- hypotenuse/10 - - + + pars <- list(convex = -0.01, concave = -0.5, resolution = 300, - max.edge = c(maxedge, maxedge * 2), - cut = 0.1, + max.edge = c(maxedge, maxedge * 2), + cut = 0.1, offset = c(hypotenuse / 10, hypotenuse / 10)) - + pars[names(mesh.args)] <- mesh.args outline <- sf::st_union(sf::st_as_sf(shapes)) coords <- sf::st_coordinates(outline) - - - outline.hull <- INLA::inla.nonconvex.hull(coords, - convex = pars$convex, + + + outline.hull <- INLA::inla.nonconvex.hull(coords, + convex = pars$convex, concave = pars$concave, resolution = pars$resolution) - - mesh <- INLA::inla.mesh.2d( + + mesh <- INLA::inla.mesh.2d( boundary = outline.hull, - max.edge = pars$max.edge, - cut = pars$cut, + max.edge = pars$max.edge, + cut = pars$cut, offset = pars$offset) - + return(mesh) } diff --git a/R/extract.R b/R/extract.R index 9137adf..db6a302 100644 --- a/R/extract.R +++ b/R/extract.R @@ -58,6 +58,7 @@ parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ values <- terra::extract(raster, terra::vect(shape), fun = fun, na.rm = TRUE, cells = TRUE, ...) if(!is.null(fun)){ + # If a summary function was given, just bind everything together and add ID column df <- values if(inherits(shape, 'df')){ @@ -66,13 +67,12 @@ parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ df <- cbind(ID = names(shape), df) id <- 'id' } - names(df) <- c(id, names(raster)) - return(df) + } else { df <- values[, 2:(ncol(values) - 1)] - df <- cbind(values$ID, values$cell, df) + df <- cbind(as.data.frame(shape)[, id], values$cell, df) names(df) <- c(id, 'cellid', names(raster)) return(df) @@ -171,13 +171,13 @@ getCovariateRasters <- function(directory, file_pattern = '.tif$', shape) { extractCoordsForMesh <- function(cov_rasters, selectIds = NULL) { - stopifnot(inherits(cov_rasters, c('RasterStack', 'RasterBrick'))) + stopifnot(inherits(cov_rasters, 'SpatRaster')) if(!is.null(selectIds)) stopifnot(inherits(selectIds, 'numeric')) points_raster <- cov_rasters[[1]] - points_raster[is.na(raster::values(points_raster))] <- -9999 - raster_pts <- raster::rasterToPoints(points_raster, spatial = TRUE) - coords <- raster_pts@coords + points_raster[is.na(terra::values(points_raster))] <- -9999 + raster_pts <- terra::as.points(points_raster) + coords <- terra::crds(raster_pts) # If specified, only retain certain pixel ids if(!is.null(selectIds)) { diff --git a/R/fit_model.R b/R/fit_model.R index f74d5af..28df455 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -1,46 +1,46 @@ #' Fit the disaggregation model -#' -#' \emph{fit_model} function takes a \emph{disag_data} object created by +#' +#' \emph{fit_model} function takes a \emph{disag_data} object created by #' \code{\link{prepare_data}} and performs a Bayesian disaggregation fit. -#' +#' #' \strong{The model definition} -#' +#' #' The disaggregation model makes predictions at the pixel level: #' \deqn{link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i}{ link(predi) = \beta 0 + \beta X + GP + u} -#' +#' #' And then aggregates these predictions to the polygon level using the weighted sum (via the aggregation raster, \eqn{agg_i}{aggi}): #' \deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} #' \deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} -#' +#' #' The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): #' \itemize{ -#' \item Gaussian: -#' If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where +#' \item Gaussian: +#' If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where #' \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} #' \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. -#' \item Binomial: +#' \item Binomial: #' For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. #' \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. -#' \item Poisson: +#' \item Poisson: #' \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. #' } -#' -#' Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field -#' are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field -#' where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field +#' +#' Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field +#' are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field +#' where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field #' where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect -#' -#' The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. -#' The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. +#' +#' The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. +#' The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. #' The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. #' These are specified as strings. -#' +#' #' The field and iid effect can be turned on or off via the \emph{field} and \emph{iid} logical flags. Both are default TRUE. -#' +#' #' The \emph{iterations} argument specifies the maximum number of iterations the model can run for to find an optimal point. -#' +#' #' The \emph{silent} argument can be used to publish/suppress verbose output. Default TRUE. -#' +#' #' #' @param data disag_data object returned by \code{\link{prepare_data}} function that contains all the necessary objects for the model fitting #' @param priors list of prior values @@ -49,28 +49,28 @@ #' @param iterations number of iterations to run the optimisation for #' @param field logical. Flag the spatial field on or off #' @param iid logical. Flag the iid effect on or off -#' @param hess_control_parscale Argument to scale parameters during the calculation of the Hessian. +#' @param hess_control_parscale Argument to scale parameters during the calculation of the Hessian. #' Must be the same length as the number of parameters. See \code{\link[stats]{optimHess}} for details. -#' @param hess_control_ndeps Argument to control step sizes during the calculation of the Hessian. -#' Either length 1 (same step size applied to all parameters) or the same length as the number of parameters. -#' Default is 1e-3, try setting a smaller value if you get NaNs in the standard error of the parameters. +#' @param hess_control_ndeps Argument to control step sizes during the calculation of the Hessian. +#' Either length 1 (same step size applied to all parameters) or the same length as the number of parameters. +#' Default is 1e-3, try setting a smaller value if you get NaNs in the standard error of the parameters. #' See \code{\link[stats]{optimHess}} for details. #' @param silent logical. Suppress verbose output. -#' -#' @return A list is returned of class \code{disag_model}. -#' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_model}. +#' +#' @return A list is returned of class \code{disag_model}. +#' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_model}. #' The list of class \code{disag_model} contains: -#' \item{obj }{The TMB model object returned by \code{\link[TMB]{MakeADFun}}.} -#' \item{opt }{The optimized model object returned by \code{\link[stats]{nlminb}}.} +#' \item{obj }{The TMB model object returned by \code{\link[TMB]{MakeADFun}}.} +#' \item{opt }{The optimized model object returned by \code{\link[stats]{nlminb}}.} #' \item{sd_out }{The TMB object returned by \code{\link[TMB]{sdreport}}.} #' \item{data }{The \emph{disag_data} object used as an input to the model.} #' \item{model_setup }{A list of information on the model setup. Likelihood function (\emph{family}), link function(\emph{link}), logical: whether a field was used (\emph{field}) and logical: whether an iid effect was used (\emph{iid}).} -#' +#' #' @name fit_model -#' @references Nanda et al. (2023) disaggregation: An R Package for Bayesian +#' @references Nanda et al. (2023) disaggregation: An R Package for Bayesian #' Spatial Disaggregation Modeling. #' -#' @examples +#' @examples #' \dontrun{ #' polygons <- list() #' for(i in 1:100) { @@ -79,11 +79,11 @@ #' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row #' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) #' } -#' +#' #' polys <- do.call(raster::spPolygons, polygons) #' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) #' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' +#' #' r <- raster::raster(ncol=20, nrow=20) #' r <- raster::setExtent(r, raster::extent(spdf)) #' r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) @@ -91,139 +91,139 @@ #' r2 <- raster::setExtent(r2, raster::extent(spdf)) #' r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) #' cov_rasters <- raster::stack(r, r2) -#' +#' #' cl <- parallel::makeCluster(2) #' doParallel::registerDoParallel(cl) -#' test_data <- prepare_data(polygon_shapefile = spdf, +#' test_data <- prepare_data(polygon_shapefile = spdf, #' covariate_rasters = cov_rasters) #' parallel::stopCluster(cl) #' foreach::registerDoSEQ() -#' +#' #' result <- fit_model(test_data, iterations = 2) #' } -#' +#' #' @export -fit_model <- function(data, - priors = NULL, - family = 'gaussian', - link = 'identity', - iterations = 100, - field = TRUE, +fit_model <- function(data, + priors = NULL, + family = 'gaussian', + link = 'identity', + iterations = 100, + field = TRUE, iid = TRUE, hess_control_parscale = NULL, hess_control_ndeps = 1e-4, silent = TRUE) { - + .Deprecated(new = 'disag_model', msg = "'fit_model' will be removed in the next version. Please use 'disag_model' instead") - - model_output <- disag_model(data, - priors = priors, - family = family, - link = link, - iterations = iterations, - field = field, + + model_output <- disag_model(data, + priors = priors, + family = family, + link = link, + iterations = iterations, + field = field, iid = iid, hess_control_parscale = hess_control_parscale, hess_control_ndeps = hess_control_ndeps, silent = silent) - + return(model_output) - - + + } #' @export #' @rdname fit_model -disag_model <- function(data, - priors = NULL, - family = 'gaussian', - link = 'identity', - iterations = 100, - field = TRUE, +disag_model <- function(data, + priors = NULL, + family = 'gaussian', + link = 'identity', + iterations = 100, + field = TRUE, iid = TRUE, hess_control_parscale = NULL, hess_control_ndeps = 1e-4, silent = TRUE) { - - + + stopifnot(inherits(data, 'disag_data')) if(!is.null(priors)) stopifnot(inherits(priors, 'list')) stopifnot(inherits(iterations, 'numeric')) - - obj <- make_model_object(data = data, - priors = priors, - family = family, - link = link, - field = field, + + obj <- make_model_object(data = data, + priors = priors, + family = family, + link = link, + field = field, iid = iid, silent = silent) - + message('Fitting model. This may be slow.') opt <- stats::nlminb(obj$par, obj$fn, obj$gr, control = list(iter.max = iterations, trace = 0)) - + if(opt$convergence != 0) warning('The model did not converge. Try increasing the number of iterations') - + # Get hess control parameters into a list. hess_control <- setup_hess_control(opt, hess_control_parscale, hess_control_ndeps) - + # Calculate the hessian hess <- stats::optimHess(opt$par, fn = obj$fn, gr = obj$gr, control = hess_control) - + # Calc uncertainty using the fixed hessian from above. sd_out <- TMB::sdreport(obj, getJointPrecision = TRUE, hessian.fixed = hess) - + sd_out <- TMB::sdreport(obj, getJointPrecision = TRUE) - + # Rename parameters to match layers # Need to change in sd_out as well # names(opt$par)[names(opt$par) == 'slope'] <- names(data$covariate_rasters) - + model_output <- list(obj = obj, opt = opt, sd_out = sd_out, data = data, model_setup = list(family = family, link = link, field = field, iid = iid)) - + class(model_output) <- c('disag_model', 'list') - + return(model_output) } #' Create the TMB model object for the disaggregation model -#' -#' \emph{make_model_object} function takes a \emph{disag_data} object created by \code{\link{prepare_data}} +#' +#' \emph{make_model_object} function takes a \emph{disag_data} object created by \code{\link{prepare_data}} #' and creates a TMB model object to be used in fitting. -#' +#' #' \strong{The model definition} -#' +#' #' The disaggregation model make predictions at the pixel level: #' \deqn{link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i}{ link(predi) = \beta 0 + \beta X + GP + u} -#' +#' #' And then aggregates these predictions to the polygon level using the weighted sum (via the aggregation raster, \eqn{agg_i}{aggi}): #' \deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} #' \deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} -#' +#' #' The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): #' \itemize{ -#' \item Gaussian: -#' If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where +#' \item Gaussian: +#' If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where #' \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} #' \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. -#' \item Binomial: +#' \item Binomial: #' For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. #' \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. -#' \item Poisson: +#' \item Poisson: #' \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. #' } -#' -#' Specify priors for the regression parameters, field and iid effect as a single named list. Hyperpriors for the field -#' are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field -#' where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field +#' +#' Specify priors for the regression parameters, field and iid effect as a single named list. Hyperpriors for the field +#' are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field +#' where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field #' where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect. -#' +#' #' The precise names and default values for these priors are: #' \itemize{ #' \item priormean_intercept: 0 @@ -237,18 +237,18 @@ disag_model <- function(data, #' \item prior_iideffect_sd_max: 0.1 #' \item prior_iideffect_sd_prob: 0.01 #' } -#' -#' The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. -#' The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. +#' +#' The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. +#' The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. #' The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. #' These are specified as strings. -#' +#' #' The field and iid effect can be turned on or off via the \emph{field} and \emph{iid} logical flags. Both are default TRUE. -#' +#' #' The \emph{iterations} argument specifies the maximum number of iterations the model can run for to find an optimal point. -#' +#' #' The \emph{silent} argument can be used to publish/supress verbose output. Default TRUE. -#' +#' #' #' @param data disag_data object returned by \code{\link{prepare_data}} function that contains all the necessary objects for the model fitting #' @param priors list of prior values @@ -257,12 +257,12 @@ disag_model <- function(data, #' @param field logical. Flag the spatial field on or off #' @param iid logical. Flag the iid effect on or off #' @param silent logical. Suppress verbose output. -#' +#' #' @return The TMB model object returned by \code{\link[TMB]{MakeADFun}}. -#' +#' #' @name make_model_object #' -#' @examples +#' @examples #' \dontrun{ #' polygons <- list() #' for(i in 1:100) { @@ -271,11 +271,11 @@ disag_model <- function(data, #' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row #' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) #' } -#' +#' #' polys <- do.call(raster::spPolygons, polygons) #' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) #' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' +#' #' r <- raster::raster(ncol=20, nrow=20) #' r <- raster::setExtent(r, raster::extent(spdf)) #' r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) @@ -283,36 +283,36 @@ disag_model <- function(data, #' r2 <- raster::setExtent(r2, raster::extent(spdf)) #' r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) #' cov_rasters <- raster::stack(r, r2) -#' +#' #' cl <- parallel::makeCluster(2) #' doParallel::registerDoParallel(cl) -#' test_data <- prepare_data(polygon_shapefile = spdf, +#' test_data <- prepare_data(polygon_shapefile = spdf, #' covariate_rasters = cov_rasters) #' parallel::stopCluster(cl) #' foreach::registerDoSEQ() -#' +#' #' result <- make_model_object(test_data) #' } -#' +#' #' @export -#' +#' -make_model_object <- function(data, - priors = NULL, - family = 'gaussian', - link = 'identity', - field = TRUE, +make_model_object <- function(data, + priors = NULL, + family = 'gaussian', + link = 'identity', + field = TRUE, iid = TRUE, silent = TRUE) { - - + + # Check that binomial model has sample_size values supplied if(family == 'binomial') { if(sum(is.na(data$polygon_data$N)) != 0) { stop("There are NAs in the sample sizes. These must be supplied for a binomial likelihood") } } - + if(family == 'gaussian') { family_id = 0 } else if(family == 'binomial') { @@ -322,7 +322,7 @@ make_model_object <- function(data, } else { stop(paste(family, "is not a valid likelihood")) } - + if(link == 'logit') { link_id = 0 } else if(link == 'log') { @@ -332,38 +332,38 @@ make_model_object <- function(data, } else { stop(paste(link, "is not a valid link function")) } - + if(family == 'gaussian' & iid) { - warning('You are using both a gaussian likelihood and an iid effect. Using both of these is redundant as they are + warning('You are using both a gaussian likelihood and an iid effect. Using both of these is redundant as they are having the same effect on the model. Consider setting iid = FALSE.') } - + if(is.null(data$mesh)) { stop('Your data object must contain an INLA mesh.') } - + nu = 1 # Sort out mesh bits - spde <- (INLA::inla.spde2.matern(data$mesh, alpha = nu + 1)$param.inla)[c("M0", "M1", "M2")] + spde <- (INLA::inla.spde2.matern(data$mesh, alpha = nu + 1)$param.inla)[c("M0", "M1", "M2")] Apix <- INLA::inla.mesh.project(data$mesh, loc = data$coordsForFit)$A n_s <- nrow(spde$M0) - + cov_matrix <- as.matrix(data$covariate_data[, -c(1:2)]) - # If we have exactly one column we don't have to transpose. Sure this + # If we have exactly one column we don't have to transpose. Sure this # this could be cleaner but I don't know how. if(ncol(cov_matrix) == 1){ cov_matrix <- as.matrix(apply(cov_matrix, 1, as.numeric)) } else { cov_matrix <- t(apply(cov_matrix, 1, as.numeric)) } - + # Construct sensible default field hyperpriors limits <- sp::bbox(data$polygon_shapefile) hypontenuse <- sqrt((limits[1,2] - limits[1,1])^2 + (limits[2,2] - limits[2,1])^2) prior_rho <- hypontenuse/3 - + prior_sigma <- sd(data$polygon_data$response/mean(data$polygon_data$response)) - + # Default priors if they are not specified default_priors <- list(priormean_intercept = 0, priorsd_intercept = 10.0, @@ -375,7 +375,7 @@ make_model_object <- function(data, prior_sigma_prob = 0.1, prior_iideffect_sd_max = 0.1, prior_iideffect_sd_prob = 0.01) - + # Replace with any specified priors if(!is.null(priors)) { final_priors <- default_priors @@ -397,7 +397,7 @@ make_model_object <- function(data, } else { final_priors <- default_priors } - + parameters <- list(intercept = -5, slope = rep(0, ncol(cov_matrix)), log_tau_gaussian = 8, @@ -406,7 +406,7 @@ make_model_object <- function(data, log_sigma = 0, log_rho = 4, nodemean = rep(0, n_s)) - + input_data <- list(x = cov_matrix, aggregation_values = data$aggregation_pixels, Apixel = Apix, @@ -419,9 +419,9 @@ make_model_object <- function(data, nu = nu, field = as.integer(field), iid = as.integer(iid)) - + input_data <- c(input_data, final_priors) - + tmb_map <- list() if(!field) { tmb_map <- c(tmb_map, list(log_sigma = as.factor(NA), @@ -435,7 +435,7 @@ make_model_object <- function(data, if(family_id != 0) { # if not gaussian do not need a dispersion in likelihood tmb_map <- c(tmb_map, list(log_tau_gaussian = as.factor(NA))) } - + random_effects <- c() if(field) { random_effects <- c(random_effects, 'nodemean') @@ -443,15 +443,15 @@ make_model_object <- function(data, if(iid) { random_effects <- c(random_effects, 'iideffect') } - + obj <- TMB::MakeADFun( - data = input_data, + data = input_data, parameters = parameters, map = tmb_map, random = random_effects, silent = silent, DLL = "disaggregation") - + return(obj) } @@ -468,7 +468,7 @@ setup_hess_control <- function(opt,hess_control_parscale, hess_control_ndeps){ hess_control$parscale <- hess_control_parscale } # hess_control_ndeps can either be length 1 (default) or correct length vecot. - if(length(hess_control_ndeps) == 1){ + if(length(hess_control_ndeps) == 1){ hess_control$ndeps <- rep(hess_control_ndeps, length(opt$par)) } else { if(length(hess_control_ndeps) != length(opt$par)){ diff --git a/R/matching.R b/R/matching.R index f2b7825..18684c3 100644 --- a/R/matching.R +++ b/R/matching.R @@ -1,53 +1,53 @@ -#' Function to match pixels to their corresponding polygon -#' -#' From the covariate data and polygon data, the function matches the polygon id between the two to find -#' which pixels from the covariate data are contained in each of the polygons. -#' -#' Takes a data.frame containing the covariate data with a polygon id column and one column for each covariate, -#' and another data.frame containing polygon data with a polygon id, response and sample size column (as returned -#' by \code{getPolygonData} function). -#' -#' Returns a matrix with two columns and one row for each polygon. The first column is the index of the first row in -#' covariate data that corresponds to that polygon, the second column is the index of the last row in -#' covariate data that corresponds to that polygon. -#' -#' @param covariates data.frame with each covariate as a column an and id column. -#' @param polygon_data data.frame with polygon id and response data. -#' @param id_var string with the name of the column in the covariate data.frame containing the polygon id. -#' -#' @return A matrix with two columns and one row for each polygon. The first column is the index of the first row in -#' covariate data that corresponds to that polygon, the second column is the index of the last row in -#' covariate data that corresponds to that polygon. -#' -#' @name getStartendindex -#' -#' @examples { -#' covs <- data.frame(area_id = c(1, 1, 1, 2, 2, 3, 3, 3, 3), response = c(3, 9, 5, 2, 3, 6, 7, 3, 5)) -#' response <- data.frame(area_id = c(1, 2, 3), response = c(4, 7, 2), N = c(NA, NA, NA)) -#' getStartendindex(covs, response, 'area_id') -#' } -#' -#' -#' @export - -getStartendindex <- function(covariates, polygon_data, id_var = 'area_id') { - - stopifnot(ncol(polygon_data) == 3) - stopifnot(ncol(covariates) >= 2) - stopifnot(nrow(covariates) > nrow(polygon_data)) - stopifnot(sum(polygon_data$area_id %in% covariates[, id_var]) == nrow(polygon_data)) - - # Create startendindex matrix - # This defines which pixels in the matrix are associated with which polygon. - startendindex <- lapply(unique(covariates[, id_var]), function(x) range(which(covariates[, id_var] == x))) - - startendindex <- do.call(rbind, startendindex) - - whichindices <- match(polygon_data$area_id, unique(covariates[, id_var])) - - # c++ is zero indexed. - startendindex <- startendindex[whichindices, ] - 1L - - return(startendindex) -} - +#' Function to match pixels to their corresponding polygon +#' +#' From the covariate data and polygon data, the function matches the polygon id between the two to find +#' which pixels from the covariate data are contained in each of the polygons. +#' +#' Takes a data.frame containing the covariate data with a polygon id column and one column for each covariate, +#' and another data.frame containing polygon data with a polygon id, response and sample size column (as returned +#' by \code{getPolygonData} function). +#' +#' Returns a matrix with two columns and one row for each polygon. The first column is the index of the first row in +#' covariate data that corresponds to that polygon, the second column is the index of the last row in +#' covariate data that corresponds to that polygon. +#' +#' @param covariates data.frame with each covariate as a column an and id column. +#' @param polygon_data data.frame with polygon id and response data. +#' @param id_var string with the name of the column in the covariate data.frame containing the polygon id. +#' +#' @return A matrix with two columns and one row for each polygon. The first column is the index of the first row in +#' covariate data that corresponds to that polygon, the second column is the index of the last row in +#' covariate data that corresponds to that polygon. +#' +#' @name getStartendindex +#' +#' @examples { +#' covs <- data.frame(area_id = c(1, 1, 1, 2, 2, 3, 3, 3, 3), response = c(3, 9, 5, 2, 3, 6, 7, 3, 5)) +#' response <- data.frame(area_id = c(1, 2, 3), response = c(4, 7, 2), N = c(NA, NA, NA)) +#' getStartendindex(covs, response, 'area_id') +#' } +#' +#' +#' @export + +getStartendindex <- function(covariates, polygon_data, id_var = 'area_id') { + + stopifnot(ncol(polygon_data) == 3) + stopifnot(ncol(covariates) >= 2) + stopifnot(nrow(covariates) > nrow(polygon_data)) + stopifnot(sum(polygon_data$area_id %in% covariates[, id_var]) == nrow(polygon_data)) + + # Create startendindex matrix + # This defines which pixels in the matrix are associated with which polygon. + startendindex <- lapply(unique(covariates[, id_var]), function(x) range(which(covariates[, id_var] == x))) + + startendindex <- do.call(rbind, startendindex) + + whichindices <- match(polygon_data$area_id, unique(covariates[, id_var])) + + # c++ is zero indexed. + startendindex <- startendindex[whichindices, ] - 1L + + return(startendindex) +} + diff --git a/R/plotting.R b/R/plotting.R index d6080b4..d1ed93b 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -1,90 +1,91 @@ +<<<<<<< HEAD #' Plot input data for disaggregation #' #' Plotting function for class \emph{disag_data} (the input data for disaggregation). -#' +#' #' Produces three plots: polygon response data, covariate rasters and INLA mesh. #' #' @param x Object of class \emph{disag_data} to be plotted. #' @param which If a subset of plots is required, specify a subset of the numbers 1:3 #' @param ... Further arguments to \emph{plot} function. -#' +#' #' @return A list of three plots: the polygon plot (ggplot), covariate plot (spplot) and INLA mesh plot (ggplot) -#' +#' #' @import ggplot2 #' @method plot disag_data -#' +#' #' @export plot.disag_data <- function(x, which = c(1,2,3), ...) { - + plots <- list() titles <- c() - + if(1 %in% which) { plots$polygon <- plot_polygon_data(x$polygon_shapefile, x$shapefile_names) titles <- c(titles, 'Polygon response data') } - + if(2 %in% which) { stopifnot(inherits(x$covariate_rasters, c('RasterStack', 'RasterBrick'))) plots$covariates <- sp::spplot(x$covariate_rasters) titles <- c(titles, 'Covariate rasters') } - + if(3 %in% which & !is.null(x$mesh)) { stopifnot(inherits(x$mesh, 'inla.mesh')) plots$mesh <- plot_mesh(x$mesh) titles <- c(titles, 'INLA mesh for spatial field') } - + print(cowplot::plot_grid(plotlist = plots, labels = titles, label_size = 10)) - + return(invisible(plots)) } #' Plot results of fitted model #' #' Plotting function for class \emph{disag_model} (the result of the disaggregation fitting). -#' +#' #' Produces two plots: results of the fixed effects and in-sample observed vs predicted plot. -#' +#' #' @param x Object of class \emph{disag_model} to be plotted. #' @param ... Further arguments to \emph{plot} function. -#' -#' @return A list of two ggplot plots: results of the fixed effects and an in-sample observed vs predicted plot -#' +#' +#' @return A list of two ggplot plots: results of the fixed effects and an in-sample observed vs predicted plot +#' #' @import ggplot2 #' @method plot disag_model -#' +#' #' @export plot.disag_model <- function(x, ...){ - + parameter <- sd <- obs <- pred <- NULL posteriors <- as.data.frame(summary(x$sd_out, select = 'fixed')) posteriors <- dplyr::mutate(posteriors, name = rownames(posteriors)) names(posteriors) <- c('mean', 'sd', 'parameter') posteriors$fixed <- grepl('slope', posteriors$parameter) posteriors$type <- ifelse(posteriors$fixed, 'Slope', 'Other') - + # Check name lengths match before substituting. lengths_match <- raster::nlayers(x$data$covariate_rasters) == sum(posteriors$fixed) if(lengths_match){ posteriors$parameter[grepl('slope', posteriors$parameter)] <- names(x$data$covariate_rasters) } - - fixedeffects <- ggplot() + - geom_errorbar(posteriors, mapping = aes(x = parameter, ymin = mean - sd, - ymax = mean + sd), - width = 0.2, color = "blue") + - geom_point(posteriors, mapping = aes(x = parameter, y = mean)) + - facet_wrap( ~ type , scales = 'free') + + + fixedeffects <- ggplot() + + geom_errorbar(posteriors, mapping = aes(x = parameter, ymin = mean - sd, + ymax = mean + sd), + width = 0.2, color = "blue") + + geom_point(posteriors, mapping = aes(x = parameter, y = mean)) + + facet_wrap( ~ type , scales = 'free') + coord_flip() + ggtitle("Parameters (excluding random effects)") - + report <- x$obj$report() - + # Form of the observed and predicted results depends on the likelihood function used if(x$model_setup$family == 'gaussian') { observed_data = report$polygon_response_data/report$reportnormalisation @@ -99,33 +100,33 @@ plot.disag_model <- function(x, ...){ predicted_data = report$reportprediction_rate title <- 'In sample performance: incidence rate' } - + data <- data.frame(obs = observed_data, pred = predicted_data) - - obspred <- ggplot(data, aes(x = obs, y = pred)) + - geom_point() + - geom_abline(intercept = 0, slope = 1, color = 'blue') + + + obspred <- ggplot(data, aes(x = obs, y = pred)) + + geom_point() + + geom_abline(intercept = 0, slope = 1, color = 'blue') + ggtitle(title) - + plots <- list(fixedeffects, obspred) print(cowplot::plot_grid(plotlist = plots)) - + return(invisible(plots)) } #' Plot mean and uncertainty predictions from the disaggregation model results #' #' Plotting function for class \emph{disag_prediction} (the mean and uncertainty predictions of the disaggregation fitting). -#' +#' #' Produces raster plots of the mean prediction, and the lower and upper confidence intervals. #' #' @param x Object of class \emph{disag_prediction} to be plotted. #' @param ... Further arguments to \emph{plot} function. -#' +#' #' @return A list of plots of rasters from the prediction: mean prediction, lower CI and upper CI. -#' +#' #' @method plot disag_prediction -#' +#' #' @export @@ -133,11 +134,11 @@ plot.disag_prediction <- function(x, ...) { rasters_to_plot <- raster::stack(x$mean_prediction$prediction, x$uncertainty_prediction$predictions_ci) names(rasters_to_plot) <- c('mean prediction', 'lower CI', 'upper CI') - + plots <- sp::spplot(rasters_to_plot) - + print(plots) - + return(invisible(plots)) } @@ -146,9 +147,9 @@ plot.disag_prediction <- function(x, ...) { # # @param x Object to be plotted # @param names list of 2 names: polygon id variable and response variable names -# +# # @return A ggplot of the polygon data -# +# # @name plot_polygon_data plot_polygon_data <- function(x, names) { @@ -157,18 +158,18 @@ plot_polygon_data <- function(x, names) { shp <- sf::st_as_sf(x) shp <- dplyr::rename(shp, 'response' = names$response_var) shp <- dplyr::rename(shp, 'area_id' = names$id_var) - + area_id <- long <- lat <- group <- response <- NULL stopifnot(inherits(shp, 'sf')) - - shp <- dplyr::mutate(shp, area_id = as.character(area_id)) - p <- ggplot(shp, aes(fill = response)) + + shp <- dplyr::mutate(shp, area_id = as.character(area_id)) + + p <- ggplot(shp, aes(fill = response)) + geom_sf() + #coord_equal() + scale_fill_viridis_c(trans = 'identity') - + return(invisible(p)) } @@ -179,24 +180,24 @@ plot_polygon_data <- function(x, names) { # @param lwd Line width # @param linecol The colour for the mesh edges # @param size size Size of data points -# +# # @name plot_mesh plot_mesh <- function(x, main = '', col = 'blue', lwd = 0.5, linecol = 'darkgrey', size = 1.2) { - + mesh <- x # extract point data d <- data.frame(x = mesh$loc[, 1], y = mesh$loc[, 2], type = 'evertices') levels(d$type) <- c('evertices', 'adata') d[mesh$idx$loc, 'type'] <- 'adata' - # extract lines data. + # extract lines data. # mesh$graph$tv column 1, 2, 3 are points in triangles. # Therefore need 1 to 2, 2 to 3 and 3 to 1. - idx = rbind(mesh$graph$tv[, 1:2, drop = FALSE], - mesh$graph$tv[, 2:3, drop = FALSE], + idx = rbind(mesh$graph$tv[, 1:2, drop = FALSE], + mesh$graph$tv[, 2:3, drop = FALSE], mesh$graph$tv[, c(3, 1), drop = FALSE]) segments <- data.frame(mesh$loc[idx[, 1], 1:2], mesh$loc[idx[, 2], 1:2], type = 'bsegments') - + innerouter <- data.frame(mesh$loc[mesh$segm$bnd$idx[, 1], 1:2], mesh$loc[mesh$segm$bnd$idx[, 2], 1:2], type = 'cbinding', stringsAsFactors = FALSE) @@ -211,19 +212,19 @@ plot_mesh <- function(x, main = '', col = 'blue', lwd = 0.5, linecol = 'darkgrey #innerouter[nrow(innerouter), 5] <- 'dinternal' innerouter$type = factor(innerouter$type, levels = c('dinternal', 'cbinding')) } - - + + names(segments) <- c('x1', 'y1', 'x2', 'y2', 'type') names(innerouter) <- c('x1', 'y1', 'x2', 'y2', 'type') - + segments <- rbind(segments, innerouter) - + #size = .data$type - p <- ggplot2::ggplot(data = d, - ggplot2::aes(.data$x, .data$y, + p <- ggplot2::ggplot(data = d, + ggplot2::aes(.data$x, .data$y, colour = .data$type)) + - ggplot2::geom_segment(data = segments, - ggplot2::aes(x = .data$x1, y = .data$y1, + ggplot2::geom_segment(data = segments, + ggplot2::aes(x = .data$x1, y = .data$y1, xend = .data$x2, yend = .data$y2, linewidth = .data$type)) + ggplot2::geom_point(aes(size = .data$type)) + @@ -235,6 +236,6 @@ plot_mesh <- function(x, main = '', col = 'blue', lwd = 0.5, linecol = 'darkgrey ggplot2::scale_size_manual(values = c(size, lwd, 1.3, 1.3, 0), drop = FALSE) + ggplot2::scale_linewidth_manual(values = c(size, lwd, 1.3, 1.3, 0), drop = FALSE) + ggtitle(main) - + return(invisible(p)) } diff --git a/R/predict.R b/R/predict.R index 1b78aad..4e9c759 100644 --- a/R/predict.R +++ b/R/predict.R @@ -1,80 +1,81 @@ +<<<<<<< HEAD #' Predict mean and uncertainty from the disaggregation model result -#' -#' \emph{predict.disag_model} function takes a \emph{disag_model} object created by \emph{disaggregation::disag_model} and +#' +#' \emph{predict.disag_model} function takes a \emph{disag_model} object created by \emph{disaggregation::disag_model} and #' predicts mean and uncertainty maps. -#' -#' To predict over a different spatial extent to that used in the model, -#' a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +#' +#' To predict over a different spatial extent to that used in the model, +#' a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. #' If this is not given predictions are made over the data used in the fit. -#' -#' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. -#' -#' For the uncertainty calculations, the number of the realisations and the size of the confidence interval to be calculated -#' are given by the arguments \emph{N} and \emph{CI} respectively. -#' +#' +#' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. +#' +#' For the uncertainty calculations, the number of the realisations and the size of the confidence interval to be calculated +#' are given by the arguments \emph{N} and \emph{CI} respectively. +#' #' @param object disag_model object returned by disag_model function. -#' @param newdata If NULL, predictions are made using the data in model_output. -#' If this is a raster stack or brick, predictions will be made over this data. +#' @param newdata If NULL, predictions are made using the data in model_output. +#' If this is a raster stack or brick, predictions will be made over this data. #' @param predict_iid logical. If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE. #' @param N Number of realisations. Default: 100. #' @param CI Confidence interval to be calculated from the realisations. Default: 0.95. #' @param ... Further arguments passed to or from other methods. #' -#' @return An object of class \emph{disag_prediction} which consists of a list of two objects: +#' @return An object of class \emph{disag_prediction} which consists of a list of two objects: #' \item{mean_prediction }{List of: #' \itemize{ #' \item \emph{prediction} Raster of mean predictions based. #' \item \emph{field} Raster of the field component of the linear predictor. #' \item \emph{iid} Raster of the iid component of the linear predictor. #' \item \emph{covariates} Raster of the covariate component of the linear predictor. -#' }} +#' }} #' \item{uncertainty_prediction: }{List of: #' \itemize{ #' \item \emph{realisations} RasterStack of realisations of predictions. Number of realisations defined by argument \emph{N}. #' \item \emph{predictions_ci} RasterStack of the upper and lower credible intervals. Defined by argument \emph{CI}. -#' }} +#' }} #' #' #' @method predict disag_model #' -#' @examples +#' @examples #' \dontrun{ #' predict(fit_result) #' } -#' +#' #' @export predict.disag_model <- function(object, newdata = NULL, predict_iid = FALSE, N = 100, CI = 0.95, ...) { - + mean_prediction <- predict_model(object, newdata = newdata, predict_iid) - + uncertainty_prediction <- predict_uncertainty(object, newdata = newdata, predict_iid, N, CI) - + prediction <- list(mean_prediction = mean_prediction, uncertainty_prediction = uncertainty_prediction) - + class(prediction) <- c('disag_prediction', 'list') - + return(prediction) } #' Function to predict mean from the model result -#' -#' \emph{predict_model} function takes a \emph{disag_model} object created by -#' \emph{disaggregation::disag_model} and predicts mean maps. -#' +#' +#' \emph{predict_model} function takes a \emph{disag_model} object created by +#' \emph{disaggregation::disag_model} and predicts mean maps. +#' #' Function returns rasters of the mean predictions as well as the covariate and field contributions #' to the linear predictor. -#' -#' To predict over a different spatial extent to that used in the model, -#' a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +#' +#' To predict over a different spatial extent to that used in the model, +#' a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. #' If this is not given predictions are made over the data used in the fit. -#' -#' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. -#' +#' +#' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. +#' #' @param model_output disag_model object returned by disag_model function -#' @param newdata If NULL, predictions are made using the data in model_output. +#' @param newdata If NULL, predictions are made using the data in model_output. #' If this is a raster stack or brick, predictions will be made over this data. Default NULL. #' @param predict_iid If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE. #' @@ -85,84 +86,84 @@ predict.disag_model <- function(object, newdata = NULL, predict_iid = FALSE, N = #' \item \emph{iid} Raster of the iid component of the linear predictor. #' \item \emph{covariates} Raster of the covariate component of the linear predictor. #' } -#' +#' #' @name predict_model #' -#' @examples +#' @examples #' \dontrun{ #' predict_model(result) #' } -#' +#' #' @export predict_model <- function(model_output, newdata = NULL, predict_iid = FALSE) { - + objects_for_prediction <- setup_objects(model_output, newdata = newdata, predict_iid) - + pars <- model_output$obj$env$last.par.best pars <- split(pars, names(pars)) - - prediction <- predict_single_raster(pars, + + prediction <- predict_single_raster(pars, objects_for_prediction, - link_function = model_output$model_setup$link) - + link_function = model_output$model_setup$link) + return(prediction) - + } #' Function to predict uncertainty from the model result -#' -#' \emph{predict_uncertainty} function takes a \emph{disag_model} object created by -#' \emph{disaggregation::disag_model} and predicts upper and lower credible interval maps. -#' +#' +#' \emph{predict_uncertainty} function takes a \emph{disag_model} object created by +#' \emph{disaggregation::disag_model} and predicts upper and lower credible interval maps. +#' #' Function returns a RasterStack of the realisations as well as the upper and lower credible interval rasters. -#' -#' To predict over a different spatial extent to that used in the model, -#' a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +#' +#' To predict over a different spatial extent to that used in the model, +#' a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. #' If this is not given predictions are made over the data used in the fit. -#' -#' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. -#' +#' +#' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. +#' #' The number of the realisations and the size of the confidence interval to be calculated. -#' are given by the arguments \emph{N} and \emph{CI} respectively. -#' +#' are given by the arguments \emph{N} and \emph{CI} respectively. +#' #' @param model_output disag_model object returned by disag_model function. -#' @param newdata If NULL, predictions are made using the data in model_output. +#' @param newdata If NULL, predictions are made using the data in model_output. #' If this is a raster stack or brick, predictions will be made over this data. Default NULL. #' @param predict_iid If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE. #' @param N number of realisations. Default: 100. #' @param CI confidence interval. Default: 0.95. -#' +#' #' @return The uncertainty prediction, which is a list of: #' \itemize{ #' \item \emph{realisations} RasterStack of realisations of predictions. Number of realisations defined by argument \emph{N}. #' \item \emph{predictions_ci} RasterStack of the upper and lower credible intervals. Defined by argument \emph{CI}. #' } -#' +#' #' @name predict_uncertainty #' -#' @examples +#' @examples #' \dontrun{ #' predict_uncertainty(result) #' } -#' +#' #' @export predict_uncertainty <- function(model_output, newdata = NULL, predict_iid = FALSE, N = 100, CI = 0.95) { - + objects_for_prediction <- setup_objects(model_output, newdata = newdata, predict_iid) - + parameters <- model_output$obj$env$last.par.best - + # If we have either of the random effects, we have the jointPrecision matrix. # but if we have neither, we don't get that matrix and should use the # covariance matrix instead - + #CH <- Matrix::Cholesky(as(S, 'dsCMatrix')) #x <- rmvn.sparse(10, mu, CH, prec=FALSE) ## 10 random draws of x #d <- dmvn.sparse(x, mu, CH, prec=FALSE) ## densities of the 10 draws - - + + if(model_output$model_setup$iid | model_output$model_setup$field){ ch <- Matrix::Cholesky(model_output$sd_out$jointPrecision) par_draws <- sparseMVN::rmvn.sparse(N, parameters, ch, prec = TRUE) @@ -171,47 +172,47 @@ predict_uncertainty <- function(model_output, newdata = NULL, predict_iid = FALS ch <- Matrix::Cholesky(covariance_matrix) par_draws <- sparseMVN::rmvn.sparse(N, parameters, ch, prec = FALSE) } - + predictions <- list() - + for(r in seq_len(N)) { - + p <- split(par_draws[r, ], names(parameters)) - - prediction_result <- predict_single_raster(p, + + prediction_result <- predict_single_raster(p, objects_for_prediction, - link_function = model_output$model_setup$link) - + link_function = model_output$model_setup$link) + predictions[[r]] <- prediction_result$prediction } predictions <- raster::stack(predictions) - + probs <- c((1 - CI) / 2, 1 - (1 - CI) / 2) predictions_ci <- raster::calc(predictions, function(x) stats::quantile(x, probs = probs, na.rm = TRUE)) names(predictions_ci) <- c('lower CI', 'upper CI') - + uncertainty <- list(realisations = predictions, predictions_ci = predictions_ci) - + return(uncertainty) } # Get coordinates from raster # -# @param data disag_data object -# +# @param data disag_data object +# # @return A matrix of the coordinates of the raster -# +# # @name getCoords getCoords <- function(data) { - + points_raster <- data$covariate_rasters[[1]] points_raster[is.na(points_raster)] <- -9999 raster_pts <- raster::rasterToPoints(points_raster, spatial = TRUE) coords <- raster_pts@coords - + return(coords) } @@ -219,18 +220,18 @@ getCoords <- function(data) { # # @param mesh mesh used in the model fitting # @param coords coordinates extracted from raster -# +# # @return An Amatrix object for the field -# +# # @name getAmatrix getAmatrix <- function(mesh, coords) { - - spde <- (INLA::inla.spde2.matern(mesh, alpha = 2)$param.inla)[c("M0", "M1", "M2")] - n_s <- nrow(spde$M0) - + + spde <- (INLA::inla.spde2.matern(mesh, alpha = 2)$param.inla)[c("M0", "M1", "M2")] + n_s <- nrow(spde$M0) + Amatrix <- INLA::inla.mesh.project(mesh, loc = as.matrix(coords))$A - + return(Amatrix) } @@ -242,7 +243,7 @@ check_newdata <- function(newdata, model_output){ if(!is.null(newdata)){ if(!(inherits(newdata, c('RasterStack', 'RasterBrick', 'RasterLayer')))){ stop('newdata should be NULL or a RasterStack or a RasterBrick') - } + } if(!all(names(model_output$data$covariate_rasters) %in% names(newdata))){ stop('All covariates used to fit the model must be in newdata') } @@ -254,26 +255,26 @@ check_newdata <- function(newdata, model_output){ # Function to setup covariates, field and iid objects for prediction setup_objects <- function(model_output, newdata = NULL, predict_iid = FALSE) { - + newdata <- check_newdata(newdata, model_output) - + # Pull out original data data <- model_output$data - + # Decide which covariates to predict over if(is.null(newdata)){ covariates <- data$covariate_rasters } else { covariates <- newdata } - + data$covariate_rasters <- covariates - + # If there is no iid effect in the model, it cannot be predicted if(!model_output$model_setup$iid) { predict_iid <- FALSE } - + if(model_output$model_setup$field) { if(is.null(newdata)) { coords <- data$coordsForPrediction @@ -285,19 +286,19 @@ setup_objects <- function(model_output, newdata = NULL, predict_iid = FALSE) { } else { field_objects <- NULL } - + if(predict_iid) { tmp_shp <- model_output$data$polygon_shapefile tmp_shp@data <- data.frame(area_id = factor(model_output$data$polygon_data$area_id)) - shapefile_raster <- raster::rasterize(tmp_shp, - model_output$data$covariate_rasters, + shapefile_raster <- raster::rasterize(tmp_shp, + model_output$data$covariate_rasters, field = 'area_id') shapefile_ids <- raster::unique(shapefile_raster) iid_objects <- list(shapefile_raster = shapefile_raster, shapefile_ids = shapefile_ids) } else { iid_objects <- NULL } - + return(list(covariates = covariates, field_objects = field_objects, iid_objects = iid_objects)) @@ -305,24 +306,24 @@ setup_objects <- function(model_output, newdata = NULL, predict_iid = FALSE) { # Function to take model parameters and predict a single raster predict_single_raster <- function(model_parameters, objects, link_function) { - + # Create linear predictor covs_by_betas <- list() for(i in seq_len(raster::nlayers(objects$covariates))){ covs_by_betas[[i]] <- model_parameters$slope[i] * objects$covariates[[i]] } - + cov_by_betas <- raster::stack(covs_by_betas) if(raster::nlayers(cov_by_betas) > 1){ sum_cov_by_betas <- sum(cov_by_betas) - } else { + } else { # With only 1 covariate, there's nothing to sum. Do this to avoid warnings. sum_cov_by_betas <- cov_by_betas } cov_contribution <- sum_cov_by_betas + model_parameters$intercept - - linear_pred <- cov_contribution - + + linear_pred <- cov_contribution + if(!is.null(objects$field_objects)){ # Extract field values field <- (objects$field_objects$Amatrix %*% model_parameters$nodemean)[, 1] @@ -331,12 +332,12 @@ predict_single_raster <- function(model_parameters, objects, link_function) { } else { field_ras <- NULL } - + if(!is.null(objects$iid_objects)) { iid_ras <- objects$iid_objects$shapefile_raster iideffect_sd <- 1/sqrt(exp(model_parameters$iideffect_log_tau)) for(i in seq_along(model_parameters$iideffect)) { - iid_ras@data@values[which(objects$iid_objects$shapefile_raster@data@values == objects$iid_objects$shapefile_ids[i])] <- + iid_ras@data@values[which(objects$iid_objects$shapefile_raster@data@values == objects$iid_objects$shapefile_ids[i])] <- model_parameters$iideffect[i] na_pixels <- which(is.na(iid_ras@data@values)) na_iid_values <- stats::rnorm(length(na_pixels), 0, iideffect_sd) @@ -355,7 +356,7 @@ predict_single_raster <- function(model_parameters, objects, link_function) { } else { iid_ras <- NULL } - + if(link_function == 'logit') { prediction_ras <- 1 / (1 + exp(-1 * linear_pred)) } else if(link_function == 'log') { @@ -363,11 +364,11 @@ predict_single_raster <- function(model_parameters, objects, link_function) { } else if(link_function == 'identity') { prediction_ras <- linear_pred } - - predictions <- list(prediction = prediction_ras, + + predictions <- list(prediction = prediction_ras, field = field_ras, iid = iid_ras, covariates = cov_contribution) - + return(predictions) } diff --git a/R/prepare_data.R b/R/prepare_data.R index c04e46d..0d6831e 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -128,7 +128,9 @@ prepare_data <- function(x, covariate_rasters <- c(covariate_rasters, aggregation_raster) covariate_data <- parallelExtract(covariate_rasters, x, fun = NULL, id = id_var) - covariate_rasters <- raster::dropLayer(covariate_rasters, raster::nlayers(covariate_rasters)) + # Remove the aggregation raster + covariate_rasters <- covariate_rasters[[seq(nlyr(covariate_rasters) - 1)]] + names(covariate_rasters) <- cov_names aggregation_pixels <- as.numeric(covariate_data[ , ncol(covariate_data)]) diff --git a/README.md b/README.md index 2497a62..cc08ed4 100644 --- a/README.md +++ b/README.md @@ -109,4 +109,3 @@ Summary functions for input data and model results summary(data_for_model) summary(model_result) ``` - diff --git a/cran-comments.md b/cran-comments.md index 6d8f73b..1cdb171 100644 --- a/cran-comments.md +++ b/cran-comments.md @@ -1,71 +1,71 @@ -## Update -This is a package update (version 0.2.0). The only real change in this version -is updating references to our Journal of Statistical Science paper that is in -press. - - - - -## Test environments -Windows, R release -Ubuntu 20, R release -Ubuntu 20, r Oldrel -Ubuntu 20, R devel - - -## R CMD check results -There were no ERRORs or WARNINGs. - -There were 3 NOTES. - - -* checking CRAN incoming feasibility ... [14s] NOTE -Maintainer: 'Tim Lucas ' - -Possibly misspelled words in DESCRIPTION: - Nandi (15:28) - -Suggests or Enhances not in mainstream repositories: - INLA -Availability using Additional_repositories specification: - INLA yes https://inla.r-inla-download.org/R/stable - -Found the following (possibly) invalid DOIs: - DOI: 10.18637/jss.v106.i11 - From: DESCRIPTION - inst/CITATION - Status: 404 - Message: Not Found - - -Examples with CPU (user + system) or elapsed time > 10s - user system elapsed -getPolygonData 9.89 0.17 10.08 - - - -Response: Anita Nandi's name is spelled correctly. The INLA availability -issue is the same as previous submissions. The doi is for our new Journal -of the Statistical Society paper and has been reserved but not registered yet. - - - -* checking package dependencies ... NOTE -Package suggested but not available for checking: 'INLA' - -Response: Same as above. - - -* checking examples ... [16s] NOTE -Examples with CPU (user + system) or elapsed time > 10s - user system elapsed -getPolygonData 9.89 0.17 10.08 - - - -Response: As this is only just over the 10 second limit we hope it is ok. We -have done our best to make the examples small throughout. - - -## Downstream dependencies -There are currently no downstream dependencies for this package +## Update +This is a package update (version 0.2.0). The only real change in this version +is updating references to our Journal of Statistical Science paper that is in +press. + + + + +## Test environments +Windows, R release +Ubuntu 20, R release +Ubuntu 20, r Oldrel +Ubuntu 20, R devel + + +## R CMD check results +There were no ERRORs or WARNINGs. + +There were 3 NOTES. + + +* checking CRAN incoming feasibility ... [14s] NOTE +Maintainer: 'Tim Lucas ' + +Possibly misspelled words in DESCRIPTION: + Nandi (15:28) + +Suggests or Enhances not in mainstream repositories: + INLA +Availability using Additional_repositories specification: + INLA yes https://inla.r-inla-download.org/R/stable + +Found the following (possibly) invalid DOIs: + DOI: 10.18637/jss.v106.i11 + From: DESCRIPTION + inst/CITATION + Status: 404 + Message: Not Found + + +Examples with CPU (user + system) or elapsed time > 10s + user system elapsed +getPolygonData 9.89 0.17 10.08 + + + +Response: Anita Nandi's name is spelled correctly. The INLA availability +issue is the same as previous submissions. The doi is for our new Journal +of the Statistical Society paper and has been reserved but not registered yet. + + + +* checking package dependencies ... NOTE +Package suggested but not available for checking: 'INLA' + +Response: Same as above. + + +* checking examples ... [16s] NOTE +Examples with CPU (user + system) or elapsed time > 10s + user system elapsed +getPolygonData 9.89 0.17 10.08 + + + +Response: As this is only just over the 10 second limit we hope it is ok. We +have done our best to make the examples small throughout. + + +## Downstream dependencies +There are currently no downstream dependencies for this package diff --git a/man/build_mesh.Rd b/man/build_mesh.Rd index f30a92f..5e17944 100644 --- a/man/build_mesh.Rd +++ b/man/build_mesh.Rd @@ -1,3 +1,4 @@ +<<<<<<< HEAD % Generated by roxygen2: do not edit by hand % Please edit documentation in R/build_mesh.R \name{build_mesh} @@ -20,8 +21,8 @@ An inla.mesh object \emph{build_mesh} function takes a SpatialPolygons object and mesh arguments to build an appropriate mesh for the spatial field. } \details{ -The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary -and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest +The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary +and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest and having a small region outside with a coarser mesh to avoid edge effects. Six mesh parameters can be specified as arguments: \emph{convex}, \emph{concave} and \emph{resolution}, @@ -48,6 +49,5 @@ pars <- list(convex = -0.01, concave = -0.5, resolution = 300, max.edge = c(3.0, my_mesh <- build_mesh(spdf) } - - } + diff --git a/man/fit_model.Rd b/man/fit_model.Rd index 1b4500c..c1dec35 100644 --- a/man/fit_model.Rd +++ b/man/fit_model.Rd @@ -1,148 +1,148 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fit_model.R -\name{fit_model} -\alias{fit_model} -\alias{disag_model} -\title{Fit the disaggregation model} -\usage{ -fit_model( - data, - priors = NULL, - family = "gaussian", - link = "identity", - iterations = 100, - field = TRUE, - iid = TRUE, - hess_control_parscale = NULL, - hess_control_ndeps = 1e-04, - silent = TRUE -) - -disag_model( - data, - priors = NULL, - family = "gaussian", - link = "identity", - iterations = 100, - field = TRUE, - iid = TRUE, - hess_control_parscale = NULL, - hess_control_ndeps = 1e-04, - silent = TRUE -) -} -\arguments{ -\item{data}{disag_data object returned by \code{\link{prepare_data}} function that contains all the necessary objects for the model fitting} - -\item{priors}{list of prior values} - -\item{family}{likelihood function: \emph{gaussian}, \emph{binomial} or \emph{poisson}} - -\item{link}{link function: \emph{logit}, \emph{log} or \emph{identity}} - -\item{iterations}{number of iterations to run the optimisation for} - -\item{field}{logical. Flag the spatial field on or off} - -\item{iid}{logical. Flag the iid effect on or off} - -\item{hess_control_parscale}{Argument to scale parameters during the calculation of the Hessian. -Must be the same length as the number of parameters. See \code{\link[stats]{optimHess}} for details.} - -\item{hess_control_ndeps}{Argument to control step sizes during the calculation of the Hessian. -Either length 1 (same step size applied to all parameters) or the same length as the number of parameters. -Default is 1e-3, try setting a smaller value if you get NaNs in the standard error of the parameters. -See \code{\link[stats]{optimHess}} for details.} - -\item{silent}{logical. Suppress verbose output.} -} -\value{ -A list is returned of class \code{disag_model}. -The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_model}. -The list of class \code{disag_model} contains: - \item{obj }{The TMB model object returned by \code{\link[TMB]{MakeADFun}}.} - \item{opt }{The optimized model object returned by \code{\link[stats]{nlminb}}.} - \item{sd_out }{The TMB object returned by \code{\link[TMB]{sdreport}}.} - \item{data }{The \emph{disag_data} object used as an input to the model.} - \item{model_setup }{A list of information on the model setup. Likelihood function (\emph{family}), link function(\emph{link}), logical: whether a field was used (\emph{field}) and logical: whether an iid effect was used (\emph{iid}).} -} -\description{ -\emph{fit_model} function takes a \emph{disag_data} object created by -\code{\link{prepare_data}} and performs a Bayesian disaggregation fit. -} -\details{ -\strong{The model definition} - -The disaggregation model makes predictions at the pixel level: -\deqn{link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i}{ link(predi) = \beta 0 + \beta X + GP + u} - -And then aggregates these predictions to the polygon level using the weighted sum (via the aggregation raster, \eqn{agg_i}{aggi}): -\deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} -\deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} - -The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): -\itemize{ - \item Gaussian: - If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where - \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} - \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. - \item Binomial: - For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. - \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. - \item Poisson: - \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. -} - -Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field -are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field -where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field -where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect - -The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. -The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. -The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. -These are specified as strings. - -The field and iid effect can be turned on or off via the \emph{field} and \emph{iid} logical flags. Both are default TRUE. - -The \emph{iterations} argument specifies the maximum number of iterations the model can run for to find an optimal point. - -The \emph{silent} argument can be used to publish/suppress verbose output. Default TRUE. -} -\examples{ -\dontrun{ - polygons <- list() - for(i in 1:100) { - row <- ceiling(i/10) - col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } - - polys <- do.call(raster::spPolygons, polygons) - response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) - - r <- raster::raster(ncol=20, nrow=20) - r <- raster::setExtent(r, raster::extent(spdf)) - r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) - r2 <- raster::raster(ncol=20, nrow=20) - r2 <- raster::setExtent(r2, raster::extent(spdf)) - r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) - cov_rasters <- raster::stack(r, r2) - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_rasters) - parallel::stopCluster(cl) - foreach::registerDoSEQ() - - result <- fit_model(test_data, iterations = 2) - } - -} -\references{ -Nanda et al. (2023) disaggregation: An R Package for Bayesian -Spatial Disaggregation Modeling. -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fit_model.R +\name{fit_model} +\alias{fit_model} +\alias{disag_model} +\title{Fit the disaggregation model} +\usage{ +fit_model( + data, + priors = NULL, + family = "gaussian", + link = "identity", + iterations = 100, + field = TRUE, + iid = TRUE, + hess_control_parscale = NULL, + hess_control_ndeps = 1e-04, + silent = TRUE +) + +disag_model( + data, + priors = NULL, + family = "gaussian", + link = "identity", + iterations = 100, + field = TRUE, + iid = TRUE, + hess_control_parscale = NULL, + hess_control_ndeps = 1e-04, + silent = TRUE +) +} +\arguments{ +\item{data}{disag_data object returned by \code{\link{prepare_data}} function that contains all the necessary objects for the model fitting} + +\item{priors}{list of prior values} + +\item{family}{likelihood function: \emph{gaussian}, \emph{binomial} or \emph{poisson}} + +\item{link}{link function: \emph{logit}, \emph{log} or \emph{identity}} + +\item{iterations}{number of iterations to run the optimisation for} + +\item{field}{logical. Flag the spatial field on or off} + +\item{iid}{logical. Flag the iid effect on or off} + +\item{hess_control_parscale}{Argument to scale parameters during the calculation of the Hessian. +Must be the same length as the number of parameters. See \code{\link[stats]{optimHess}} for details.} + +\item{hess_control_ndeps}{Argument to control step sizes during the calculation of the Hessian. +Either length 1 (same step size applied to all parameters) or the same length as the number of parameters. +Default is 1e-3, try setting a smaller value if you get NaNs in the standard error of the parameters. +See \code{\link[stats]{optimHess}} for details.} + +\item{silent}{logical. Suppress verbose output.} +} +\value{ +A list is returned of class \code{disag_model}. +The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_model}. +The list of class \code{disag_model} contains: + \item{obj }{The TMB model object returned by \code{\link[TMB]{MakeADFun}}.} + \item{opt }{The optimized model object returned by \code{\link[stats]{nlminb}}.} + \item{sd_out }{The TMB object returned by \code{\link[TMB]{sdreport}}.} + \item{data }{The \emph{disag_data} object used as an input to the model.} + \item{model_setup }{A list of information on the model setup. Likelihood function (\emph{family}), link function(\emph{link}), logical: whether a field was used (\emph{field}) and logical: whether an iid effect was used (\emph{iid}).} +} +\description{ +\emph{fit_model} function takes a \emph{disag_data} object created by +\code{\link{prepare_data}} and performs a Bayesian disaggregation fit. +} +\details{ +\strong{The model definition} + +The disaggregation model makes predictions at the pixel level: +\deqn{link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i}{ link(predi) = \beta 0 + \beta X + GP + u} + +And then aggregates these predictions to the polygon level using the weighted sum (via the aggregation raster, \eqn{agg_i}{aggi}): +\deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} +\deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} + +The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): +\itemize{ + \item Gaussian: + If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where + \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} + \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. + \item Binomial: + For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. + \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. + \item Poisson: + \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. +} + +Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field +are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field +where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field +where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect + +The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. +The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. +The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. +These are specified as strings. + +The field and iid effect can be turned on or off via the \emph{field} and \emph{iid} logical flags. Both are default TRUE. + +The \emph{iterations} argument specifies the maximum number of iterations the model can run for to find an optimal point. + +The \emph{silent} argument can be used to publish/suppress verbose output. Default TRUE. +} +\examples{ +\dontrun{ + polygons <- list() + for(i in 1:100) { + row <- ceiling(i/10) + col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) + xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row + polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) + } + + polys <- do.call(raster::spPolygons, polygons) + response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) + spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) + + r <- raster::raster(ncol=20, nrow=20) + r <- raster::setExtent(r, raster::extent(spdf)) + r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) + r2 <- raster::raster(ncol=20, nrow=20) + r2 <- raster::setExtent(r2, raster::extent(spdf)) + r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) + cov_rasters <- raster::stack(r, r2) + + cl <- parallel::makeCluster(2) + doParallel::registerDoParallel(cl) + test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_rasters) + parallel::stopCluster(cl) + foreach::registerDoSEQ() + + result <- fit_model(test_data, iterations = 2) + } + +} +\references{ +Nanda et al. (2023) disaggregation: An R Package for Bayesian +Spatial Disaggregation Modeling. +} diff --git a/man/getStartendindex.Rd b/man/getStartendindex.Rd index 4d35f90..6848ada 100644 --- a/man/getStartendindex.Rd +++ b/man/getStartendindex.Rd @@ -1,42 +1,42 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/matching.R -\name{getStartendindex} -\alias{getStartendindex} -\title{Function to match pixels to their corresponding polygon} -\usage{ -getStartendindex(covariates, polygon_data, id_var = "area_id") -} -\arguments{ -\item{covariates}{data.frame with each covariate as a column an and id column.} - -\item{polygon_data}{data.frame with polygon id and response data.} - -\item{id_var}{string with the name of the column in the covariate data.frame containing the polygon id.} -} -\value{ -A matrix with two columns and one row for each polygon. The first column is the index of the first row in -covariate data that corresponds to that polygon, the second column is the index of the last row in -covariate data that corresponds to that polygon. -} -\description{ -From the covariate data and polygon data, the function matches the polygon id between the two to find -which pixels from the covariate data are contained in each of the polygons. -} -\details{ -Takes a data.frame containing the covariate data with a polygon id column and one column for each covariate, -and another data.frame containing polygon data with a polygon id, response and sample size column (as returned -by \code{getPolygonData} function). - -Returns a matrix with two columns and one row for each polygon. The first column is the index of the first row in -covariate data that corresponds to that polygon, the second column is the index of the last row in -covariate data that corresponds to that polygon. -} -\examples{ -{ - covs <- data.frame(area_id = c(1, 1, 1, 2, 2, 3, 3, 3, 3), response = c(3, 9, 5, 2, 3, 6, 7, 3, 5)) - response <- data.frame(area_id = c(1, 2, 3), response = c(4, 7, 2), N = c(NA, NA, NA)) - getStartendindex(covs, response, 'area_id') -} - - -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/matching.R +\name{getStartendindex} +\alias{getStartendindex} +\title{Function to match pixels to their corresponding polygon} +\usage{ +getStartendindex(covariates, polygon_data, id_var = "area_id") +} +\arguments{ +\item{covariates}{data.frame with each covariate as a column an and id column.} + +\item{polygon_data}{data.frame with polygon id and response data.} + +\item{id_var}{string with the name of the column in the covariate data.frame containing the polygon id.} +} +\value{ +A matrix with two columns and one row for each polygon. The first column is the index of the first row in +covariate data that corresponds to that polygon, the second column is the index of the last row in +covariate data that corresponds to that polygon. +} +\description{ +From the covariate data and polygon data, the function matches the polygon id between the two to find +which pixels from the covariate data are contained in each of the polygons. +} +\details{ +Takes a data.frame containing the covariate data with a polygon id column and one column for each covariate, +and another data.frame containing polygon data with a polygon id, response and sample size column (as returned +by \code{getPolygonData} function). + +Returns a matrix with two columns and one row for each polygon. The first column is the index of the first row in +covariate data that corresponds to that polygon, the second column is the index of the last row in +covariate data that corresponds to that polygon. +} +\examples{ +{ + covs <- data.frame(area_id = c(1, 1, 1, 2, 2, 3, 3, 3, 3), response = c(3, 9, 5, 2, 3, 6, 7, 3, 5)) + response <- data.frame(area_id = c(1, 2, 3), response = c(4, 7, 2), N = c(NA, NA, NA)) + getStartendindex(covs, response, 'area_id') +} + + +} diff --git a/man/make_model_object.Rd b/man/make_model_object.Rd index 8348828..b040041 100644 --- a/man/make_model_object.Rd +++ b/man/make_model_object.Rd @@ -1,124 +1,124 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fit_model.R -\name{make_model_object} -\alias{make_model_object} -\title{Create the TMB model object for the disaggregation model} -\usage{ -make_model_object( - data, - priors = NULL, - family = "gaussian", - link = "identity", - field = TRUE, - iid = TRUE, - silent = TRUE -) -} -\arguments{ -\item{data}{disag_data object returned by \code{\link{prepare_data}} function that contains all the necessary objects for the model fitting} - -\item{priors}{list of prior values} - -\item{family}{likelihood function: \emph{gaussian}, \emph{binomial} or \emph{poisson}} - -\item{link}{link function: \emph{logit}, \emph{log} or \emph{identity}} - -\item{field}{logical. Flag the spatial field on or off} - -\item{iid}{logical. Flag the iid effect on or off} - -\item{silent}{logical. Suppress verbose output.} -} -\value{ -The TMB model object returned by \code{\link[TMB]{MakeADFun}}. -} -\description{ -\emph{make_model_object} function takes a \emph{disag_data} object created by \code{\link{prepare_data}} -and creates a TMB model object to be used in fitting. -} -\details{ -\strong{The model definition} - -The disaggregation model make predictions at the pixel level: -\deqn{link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i}{ link(predi) = \beta 0 + \beta X + GP + u} - -And then aggregates these predictions to the polygon level using the weighted sum (via the aggregation raster, \eqn{agg_i}{aggi}): -\deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} -\deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} - -The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): -\itemize{ - \item Gaussian: - If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where - \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} - \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. - \item Binomial: - For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. - \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. - \item Poisson: - \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. -} - -Specify priors for the regression parameters, field and iid effect as a single named list. Hyperpriors for the field -are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field -where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field -where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect. - -The precise names and default values for these priors are: -\itemize{ -\item priormean_intercept: 0 -\item priorsd_intercept: 10.0 -\item priormean_slope: 0.0 -\item priorsd_slope: 0.5 -\item prior_rho_min: A third the length of the diagonal of the bounding box. -\item prior_rho_prob: 0.1 -\item prior_sigma_max: sd(response/mean(response)) -\item prior_sigma_prob: 0.1 -\item prior_iideffect_sd_max: 0.1 -\item prior_iideffect_sd_prob: 0.01 -} - -The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. -The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. -The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. -These are specified as strings. - -The field and iid effect can be turned on or off via the \emph{field} and \emph{iid} logical flags. Both are default TRUE. - -The \emph{iterations} argument specifies the maximum number of iterations the model can run for to find an optimal point. - -The \emph{silent} argument can be used to publish/supress verbose output. Default TRUE. -} -\examples{ -\dontrun{ - polygons <- list() - for(i in 1:100) { - row <- ceiling(i/10) - col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } - - polys <- do.call(raster::spPolygons, polygons) - response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) - - r <- raster::raster(ncol=20, nrow=20) - r <- raster::setExtent(r, raster::extent(spdf)) - r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) - r2 <- raster::raster(ncol=20, nrow=20) - r2 <- raster::setExtent(r2, raster::extent(spdf)) - r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) - cov_rasters <- raster::stack(r, r2) - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_rasters) - parallel::stopCluster(cl) - foreach::registerDoSEQ() - - result <- make_model_object(test_data) - } - -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/fit_model.R +\name{make_model_object} +\alias{make_model_object} +\title{Create the TMB model object for the disaggregation model} +\usage{ +make_model_object( + data, + priors = NULL, + family = "gaussian", + link = "identity", + field = TRUE, + iid = TRUE, + silent = TRUE +) +} +\arguments{ +\item{data}{disag_data object returned by \code{\link{prepare_data}} function that contains all the necessary objects for the model fitting} + +\item{priors}{list of prior values} + +\item{family}{likelihood function: \emph{gaussian}, \emph{binomial} or \emph{poisson}} + +\item{link}{link function: \emph{logit}, \emph{log} or \emph{identity}} + +\item{field}{logical. Flag the spatial field on or off} + +\item{iid}{logical. Flag the iid effect on or off} + +\item{silent}{logical. Suppress verbose output.} +} +\value{ +The TMB model object returned by \code{\link[TMB]{MakeADFun}}. +} +\description{ +\emph{make_model_object} function takes a \emph{disag_data} object created by \code{\link{prepare_data}} +and creates a TMB model object to be used in fitting. +} +\details{ +\strong{The model definition} + +The disaggregation model make predictions at the pixel level: +\deqn{link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i}{ link(predi) = \beta 0 + \beta X + GP + u} + +And then aggregates these predictions to the polygon level using the weighted sum (via the aggregation raster, \eqn{agg_i}{aggi}): +\deqn{cases_j = \sum_{i \epsilon j} pred_i \times agg_i}{ casesj = \sum (predi x aggi)} +\deqn{rate_j = \frac{\sum_{i \epsilon j} pred_i \times agg_i}{\sum_{i \epsilon j} agg_i}}{ratej = \sum(predi x aggi) / \sum (aggi)} + +The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): +\itemize{ + \item Gaussian: + If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where + \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} + \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. + \item Binomial: + For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. + \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. + \item Poisson: + \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. +} + +Specify priors for the regression parameters, field and iid effect as a single named list. Hyperpriors for the field +are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field +where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field +where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect. + +The precise names and default values for these priors are: +\itemize{ +\item priormean_intercept: 0 +\item priorsd_intercept: 10.0 +\item priormean_slope: 0.0 +\item priorsd_slope: 0.5 +\item prior_rho_min: A third the length of the diagonal of the bounding box. +\item prior_rho_prob: 0.1 +\item prior_sigma_max: sd(response/mean(response)) +\item prior_sigma_prob: 0.1 +\item prior_iideffect_sd_max: 0.1 +\item prior_iideffect_sd_prob: 0.01 +} + +The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. +The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. +The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. +These are specified as strings. + +The field and iid effect can be turned on or off via the \emph{field} and \emph{iid} logical flags. Both are default TRUE. + +The \emph{iterations} argument specifies the maximum number of iterations the model can run for to find an optimal point. + +The \emph{silent} argument can be used to publish/supress verbose output. Default TRUE. +} +\examples{ +\dontrun{ + polygons <- list() + for(i in 1:100) { + row <- ceiling(i/10) + col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) + xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row + polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) + } + + polys <- do.call(raster::spPolygons, polygons) + response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) + spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) + + r <- raster::raster(ncol=20, nrow=20) + r <- raster::setExtent(r, raster::extent(spdf)) + r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) + r2 <- raster::raster(ncol=20, nrow=20) + r2 <- raster::setExtent(r2, raster::extent(spdf)) + r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) + cov_rasters <- raster::stack(r, r2) + + cl <- parallel::makeCluster(2) + doParallel::registerDoParallel(cl) + test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_rasters) + parallel::stopCluster(cl) + foreach::registerDoSEQ() + + result <- make_model_object(test_data) + } + +} diff --git a/man/plot.disag_data.Rd b/man/plot.disag_data.Rd index bc4f774..f541c61 100644 --- a/man/plot.disag_data.Rd +++ b/man/plot.disag_data.Rd @@ -1,24 +1,24 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plotting.R -\name{plot.disag_data} -\alias{plot.disag_data} -\title{Plot input data for disaggregation} -\usage{ -\method{plot}{disag_data}(x, which = c(1, 2, 3), ...) -} -\arguments{ -\item{x}{Object of class \emph{disag_data} to be plotted.} - -\item{which}{If a subset of plots is required, specify a subset of the numbers 1:3} - -\item{...}{Further arguments to \emph{plot} function.} -} -\value{ -A list of three plots: the polygon plot (ggplot), covariate plot (spplot) and INLA mesh plot (ggplot) -} -\description{ -Plotting function for class \emph{disag_data} (the input data for disaggregation). -} -\details{ -Produces three plots: polygon response data, covariate rasters and INLA mesh. -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting.R +\name{plot.disag_data} +\alias{plot.disag_data} +\title{Plot input data for disaggregation} +\usage{ +\method{plot}{disag_data}(x, which = c(1, 2, 3), ...) +} +\arguments{ +\item{x}{Object of class \emph{disag_data} to be plotted.} + +\item{which}{If a subset of plots is required, specify a subset of the numbers 1:3} + +\item{...}{Further arguments to \emph{plot} function.} +} +\value{ +A list of three plots: the polygon plot (ggplot), covariate plot (spplot) and INLA mesh plot (ggplot) +} +\description{ +Plotting function for class \emph{disag_data} (the input data for disaggregation). +} +\details{ +Produces three plots: polygon response data, covariate rasters and INLA mesh. +} diff --git a/man/plot.disag_model.Rd b/man/plot.disag_model.Rd index 6fbcab3..421e14c 100644 --- a/man/plot.disag_model.Rd +++ b/man/plot.disag_model.Rd @@ -1,22 +1,22 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plotting.R -\name{plot.disag_model} -\alias{plot.disag_model} -\title{Plot results of fitted model} -\usage{ -\method{plot}{disag_model}(x, ...) -} -\arguments{ -\item{x}{Object of class \emph{disag_model} to be plotted.} - -\item{...}{Further arguments to \emph{plot} function.} -} -\value{ -A list of two ggplot plots: results of the fixed effects and an in-sample observed vs predicted plot -} -\description{ -Plotting function for class \emph{disag_model} (the result of the disaggregation fitting). -} -\details{ -Produces two plots: results of the fixed effects and in-sample observed vs predicted plot. -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting.R +\name{plot.disag_model} +\alias{plot.disag_model} +\title{Plot results of fitted model} +\usage{ +\method{plot}{disag_model}(x, ...) +} +\arguments{ +\item{x}{Object of class \emph{disag_model} to be plotted.} + +\item{...}{Further arguments to \emph{plot} function.} +} +\value{ +A list of two ggplot plots: results of the fixed effects and an in-sample observed vs predicted plot +} +\description{ +Plotting function for class \emph{disag_model} (the result of the disaggregation fitting). +} +\details{ +Produces two plots: results of the fixed effects and in-sample observed vs predicted plot. +} diff --git a/man/plot.disag_prediction.Rd b/man/plot.disag_prediction.Rd index 5942bf3..213027c 100644 --- a/man/plot.disag_prediction.Rd +++ b/man/plot.disag_prediction.Rd @@ -1,22 +1,22 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/plotting.R -\name{plot.disag_prediction} -\alias{plot.disag_prediction} -\title{Plot mean and uncertainty predictions from the disaggregation model results} -\usage{ -\method{plot}{disag_prediction}(x, ...) -} -\arguments{ -\item{x}{Object of class \emph{disag_prediction} to be plotted.} - -\item{...}{Further arguments to \emph{plot} function.} -} -\value{ -A list of plots of rasters from the prediction: mean prediction, lower CI and upper CI. -} -\description{ -Plotting function for class \emph{disag_prediction} (the mean and uncertainty predictions of the disaggregation fitting). -} -\details{ -Produces raster plots of the mean prediction, and the lower and upper confidence intervals. -} +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/plotting.R +\name{plot.disag_prediction} +\alias{plot.disag_prediction} +\title{Plot mean and uncertainty predictions from the disaggregation model results} +\usage{ +\method{plot}{disag_prediction}(x, ...) +} +\arguments{ +\item{x}{Object of class \emph{disag_prediction} to be plotted.} + +\item{...}{Further arguments to \emph{plot} function.} +} +\value{ +A list of plots of rasters from the prediction: mean prediction, lower CI and upper CI. +} +\description{ +Plotting function for class \emph{disag_prediction} (the mean and uncertainty predictions of the disaggregation fitting). +} +\details{ +Produces raster plots of the mean prediction, and the lower and upper confidence intervals. +} diff --git a/src/disaggregation.cpp b/src/disaggregation.cpp index 210e595..d476bbd 100644 --- a/src/disaggregation.cpp +++ b/src/disaggregation.cpp @@ -1,262 +1,262 @@ -// -// Author: Anita Nandi -// Date: 2019-02-14 - -// Data: Spatial field mesh and matrices, polygon data, covariate pixel data - - -#define TMB_LIB_INIT R_init_disaggregation -#include - -template -Type objective_function::operator()() -{ - - using namespace R_inla; - using namespace density; - using namespace Eigen; - - // ------------------------------------------------------------------------ // - // Spatial field data - // ------------------------------------------------------------------------ // - - // The A matrices are for projecting the mesh to a point for the pixel and point data respectively. - DATA_SPARSE_MATRIX(Apixel); - DATA_STRUCT(spde, spde_t); - - // ------------------------------------------------------------------------ // - // Polygon level data - // ------------------------------------------------------------------------ // - - // Covariate pixel data - DATA_MATRIX(x); - - // two col matrix with start end indices for each shape case. - DATA_IARRAY(startendindex); - - // Shape data. Cases and region id. - DATA_VECTOR(polygon_response_data); - DATA_VECTOR(response_sample_size); - - // Use to aggreagte pixel response values to polygon level - DATA_VECTOR(aggregation_values); - - // ------------------------------------------------------------------------ // - // Likelihood and link functions - // ------------------------------------------------------------------------ // - - DATA_INTEGER(family); - DATA_INTEGER(link); - - // ------------------------------------------------------------------------ // - // Parameters - // ------------------------------------------------------------------------ // - - PARAMETER(intercept); - PARAMETER_VECTOR(slope); - - DATA_SCALAR(priormean_intercept); - DATA_SCALAR(priorsd_intercept); - DATA_SCALAR(priormean_slope); - DATA_SCALAR(priorsd_slope); - - // Priors for likelihood - PARAMETER(log_tau_gaussian); - Type tau_gaussian = exp(log_tau_gaussian); - Type gaussian_sd = 1 / sqrt(tau_gaussian); - - // INLA defines a loggamma prior on log tau. - // We evaluate a gamma prior on tau, but the parameters are - // therefore the same. - Type prior_gamma_shape = 1; - Type prior_gamma_rate = 5e-05; - - PARAMETER_VECTOR(iideffect); - PARAMETER(iideffect_log_tau); - Type iideffect_tau = exp(iideffect_log_tau); - Type iideffect_sd = 1 / sqrt(iideffect_tau); - - Type iideffect_mean = 0.0; - - // Priors on iid random effect for polygons - DATA_SCALAR(prior_iideffect_sd_max); - DATA_SCALAR(prior_iideffect_sd_prob); - - // spde hyperparameters - PARAMETER(log_sigma); - PARAMETER(log_rho); - Type sigma = exp(log_sigma); - Type rho = exp(log_rho); - - // Priors on spde hyperparameters - DATA_SCALAR(prior_rho_min); - DATA_SCALAR(prior_rho_prob); - DATA_SCALAR(prior_sigma_max); - DATA_SCALAR(prior_sigma_prob); - - // Convert hyperparameters to natural scale - DATA_SCALAR(nu); - Type kappa = sqrt(8.0) / rho; - - // Random effect parameters - PARAMETER_VECTOR(nodemean); - - // Model component flags - DATA_INTEGER(field); - DATA_INTEGER(iid); - - // Number of polygons - int n_polygons = polygon_response_data.size(); - // Number of pixels - int n_pixels = x.rows(); - - Type nll = 0.0; - - // ------------------------------------------------------------------------ // - // Likelihood from priors - // ------------------------------------------------------------------------ // - - nll -= dnorm(intercept, priormean_intercept, priorsd_intercept, true); - for (int s = 0; s < slope.size(); s++) { - nll -= dnorm(slope[s], priormean_slope, priorsd_slope, true); - } - - if(iid) { - // Likelihood of hyperparameter of polygon iid random effect. - // From https://projecteuclid.org/euclid.ss/1491465621 (Eqn 3.3) - Type lambda = -log(prior_iideffect_sd_prob) / prior_iideffect_sd_max; - Type log_pcdensity_iid = log(lambda / 2) - (3/2)*iideffect_log_tau - lambda * pow(iideffect_tau, -1/2); - // log(iideffect_sd) from the Jacobian - nll -= log_pcdensity_iid + iideffect_log_tau; - - // Likelihood of random effect for polygons - for(int p = 0; p < iideffect.size(); p++) { - nll -= dnorm(iideffect[p], iideffect_mean, iideffect_sd, true); - } - } - - // Likelihood from the gaussian prior. - // log(prec) ~ loggamma - // prec ~ gamma - if(family == 0) { - nll -= dgamma(tau_gaussian, prior_gamma_shape, prior_gamma_rate, true); - } - - if(field) { - // Likelihood of hyperparameters for field. - // From https://www.tandfonline.com/doi/full/10.1080/01621459.2017.1415907 (Theorem 2.6) - Type lambdatilde1 = -log(prior_rho_prob) * prior_rho_min; - Type lambdatilde2 = -log(prior_sigma_prob) / prior_sigma_max; - Type log_pcdensity = log(lambdatilde1) + log(lambdatilde2) - 2*log_rho - lambdatilde1 * pow(rho, -1) - lambdatilde2 * sigma; - // log_rho and log_sigma from the Jacobian - nll -= log_pcdensity + log_rho + log_sigma; - - // Build spde matrix - SparseMatrix Q = Q_spde(spde, kappa); - - // From Lindgren (2011) https://doi.org/10.1111/j.1467-9868.2011.00777.x, see equation for the marginal variance - Type scaling_factor = sqrt(exp(lgamma(nu)) / (exp(lgamma(nu + 1)) * 4 * M_PI * pow(kappa, 2*nu))); - - // Likelihood of the random field. - nll += SCALE(GMRF(Q), sigma / scaling_factor)(nodemean); - } - - Type nll_priors = nll; - - // ------------------------------------------------------------------------ // - // Likelihood from data - // ------------------------------------------------------------------------ // - - vector pixel_linear_pred(n_pixels); - pixel_linear_pred = intercept + x * slope; - - if(field) { - // Calculate field for pixel data - vector linear_pred_field(n_pixels); - linear_pred_field = Apixel * nodemean; - pixel_linear_pred += linear_pred_field.array(); - } - - // recalculate startendindices to be in the form start, n - startendindex.col(1) = startendindex.col(1) - startendindex.col(0) + 1; - - Type polygon_response; - Type normalised_polygon_response; - Type normalisation_total; - Type pred_polygoncases; - Type pred_polygonrate; - Type polygon_sd; - vector pixel_pred; - vector numerator_pixels; - vector normalisation_pixels; - vector reportnormalisation(n_polygons); - vector reportprediction_cases(n_polygons); - vector reportprediction_rate(n_polygons); - vector reportnll(n_polygons); - vector reportpolygonsd(n_polygons); - - // For each shape get pixel predictions within and aggregate to polygon level - for (int polygon = 0; polygon < n_polygons; polygon++) { - - // Get pixel level predictions (rate) - pixel_pred = pixel_linear_pred.segment(startendindex(polygon, 0), startendindex(polygon, 1)).array(); - if(iid) { - pixel_pred += iideffect[polygon]; - } - // Use correct link function - if(link == 0) { - pixel_pred = invlogit(pixel_pred); - } else if(link == 1) { - pixel_pred = exp(pixel_pred); - } else if(link == 2){ - // Don't need to do anything, i.e. pixel_pred = pixel_pred; - } else { - error("Link function not implemented."); - } - - // Aggregate to polygon prediction - numerator_pixels = pixel_pred * aggregation_values.segment(startendindex(polygon, 0), startendindex(polygon, 1)).array(); - normalisation_pixels = aggregation_values.segment(startendindex(polygon, 0), startendindex(polygon, 1)); - normalisation_total = sum(normalisation_pixels); - pred_polygoncases = sum(numerator_pixels); - pred_polygonrate = pred_polygoncases/normalisation_total; - - reportnormalisation[polygon] = normalisation_total; - reportprediction_cases[polygon] = pred_polygoncases; - reportprediction_rate[polygon] = pred_polygonrate; - - // Use correct likelihood function - if(family == 0) { - // Scale the pixel sd to polygon level - polygon_sd = gaussian_sd * sqrt((normalisation_pixels * normalisation_pixels).sum()) / normalisation_total; - reportpolygonsd[polygon] = polygon_sd; - // Calculate normal likelihood in rate space - polygon_response = polygon_response_data(polygon); - normalised_polygon_response = polygon_response/normalisation_total; - nll -= dnorm(normalised_polygon_response, pred_polygonrate, polygon_sd, true); - reportnll[polygon] = -dnorm(normalised_polygon_response, pred_polygonrate, polygon_sd, true); - } else if(family == 1) { - nll -= dbinom(polygon_response_data[polygon], response_sample_size[polygon], pred_polygonrate, true); - reportnll[polygon] = -dbinom(polygon_response_data[polygon], response_sample_size[polygon], pred_polygonrate, true); - } else if(family == 2) { - nll -= dpois(polygon_response_data[polygon], pred_polygoncases, true); - reportnll[polygon] = -dpois(polygon_response_data[polygon], pred_polygoncases, true); - } else { - error("Likelihood not implemented."); - } - - } - - REPORT(reportprediction_cases); - REPORT(reportprediction_rate); - REPORT(reportnormalisation); - REPORT(reportnll); - REPORT(polygon_response_data); - REPORT(nll_priors); - REPORT(nll); - if(family == 0) { - REPORT(reportpolygonsd); - } - - return nll; -} +// +// Author: Anita Nandi +// Date: 2019-02-14 + +// Data: Spatial field mesh and matrices, polygon data, covariate pixel data + + +#define TMB_LIB_INIT R_init_disaggregation +#include + +template +Type objective_function::operator()() +{ + + using namespace R_inla; + using namespace density; + using namespace Eigen; + + // ------------------------------------------------------------------------ // + // Spatial field data + // ------------------------------------------------------------------------ // + + // The A matrices are for projecting the mesh to a point for the pixel and point data respectively. + DATA_SPARSE_MATRIX(Apixel); + DATA_STRUCT(spde, spde_t); + + // ------------------------------------------------------------------------ // + // Polygon level data + // ------------------------------------------------------------------------ // + + // Covariate pixel data + DATA_MATRIX(x); + + // two col matrix with start end indices for each shape case. + DATA_IARRAY(startendindex); + + // Shape data. Cases and region id. + DATA_VECTOR(polygon_response_data); + DATA_VECTOR(response_sample_size); + + // Use to aggreagte pixel response values to polygon level + DATA_VECTOR(aggregation_values); + + // ------------------------------------------------------------------------ // + // Likelihood and link functions + // ------------------------------------------------------------------------ // + + DATA_INTEGER(family); + DATA_INTEGER(link); + + // ------------------------------------------------------------------------ // + // Parameters + // ------------------------------------------------------------------------ // + + PARAMETER(intercept); + PARAMETER_VECTOR(slope); + + DATA_SCALAR(priormean_intercept); + DATA_SCALAR(priorsd_intercept); + DATA_SCALAR(priormean_slope); + DATA_SCALAR(priorsd_slope); + + // Priors for likelihood + PARAMETER(log_tau_gaussian); + Type tau_gaussian = exp(log_tau_gaussian); + Type gaussian_sd = 1 / sqrt(tau_gaussian); + + // INLA defines a loggamma prior on log tau. + // We evaluate a gamma prior on tau, but the parameters are + // therefore the same. + Type prior_gamma_shape = 1; + Type prior_gamma_rate = 5e-05; + + PARAMETER_VECTOR(iideffect); + PARAMETER(iideffect_log_tau); + Type iideffect_tau = exp(iideffect_log_tau); + Type iideffect_sd = 1 / sqrt(iideffect_tau); + + Type iideffect_mean = 0.0; + + // Priors on iid random effect for polygons + DATA_SCALAR(prior_iideffect_sd_max); + DATA_SCALAR(prior_iideffect_sd_prob); + + // spde hyperparameters + PARAMETER(log_sigma); + PARAMETER(log_rho); + Type sigma = exp(log_sigma); + Type rho = exp(log_rho); + + // Priors on spde hyperparameters + DATA_SCALAR(prior_rho_min); + DATA_SCALAR(prior_rho_prob); + DATA_SCALAR(prior_sigma_max); + DATA_SCALAR(prior_sigma_prob); + + // Convert hyperparameters to natural scale + DATA_SCALAR(nu); + Type kappa = sqrt(8.0) / rho; + + // Random effect parameters + PARAMETER_VECTOR(nodemean); + + // Model component flags + DATA_INTEGER(field); + DATA_INTEGER(iid); + + // Number of polygons + int n_polygons = polygon_response_data.size(); + // Number of pixels + int n_pixels = x.rows(); + + Type nll = 0.0; + + // ------------------------------------------------------------------------ // + // Likelihood from priors + // ------------------------------------------------------------------------ // + + nll -= dnorm(intercept, priormean_intercept, priorsd_intercept, true); + for (int s = 0; s < slope.size(); s++) { + nll -= dnorm(slope[s], priormean_slope, priorsd_slope, true); + } + + if(iid) { + // Likelihood of hyperparameter of polygon iid random effect. + // From https://projecteuclid.org/euclid.ss/1491465621 (Eqn 3.3) + Type lambda = -log(prior_iideffect_sd_prob) / prior_iideffect_sd_max; + Type log_pcdensity_iid = log(lambda / 2) - (3/2)*iideffect_log_tau - lambda * pow(iideffect_tau, -1/2); + // log(iideffect_sd) from the Jacobian + nll -= log_pcdensity_iid + iideffect_log_tau; + + // Likelihood of random effect for polygons + for(int p = 0; p < iideffect.size(); p++) { + nll -= dnorm(iideffect[p], iideffect_mean, iideffect_sd, true); + } + } + + // Likelihood from the gaussian prior. + // log(prec) ~ loggamma + // prec ~ gamma + if(family == 0) { + nll -= dgamma(tau_gaussian, prior_gamma_shape, prior_gamma_rate, true); + } + + if(field) { + // Likelihood of hyperparameters for field. + // From https://www.tandfonline.com/doi/full/10.1080/01621459.2017.1415907 (Theorem 2.6) + Type lambdatilde1 = -log(prior_rho_prob) * prior_rho_min; + Type lambdatilde2 = -log(prior_sigma_prob) / prior_sigma_max; + Type log_pcdensity = log(lambdatilde1) + log(lambdatilde2) - 2*log_rho - lambdatilde1 * pow(rho, -1) - lambdatilde2 * sigma; + // log_rho and log_sigma from the Jacobian + nll -= log_pcdensity + log_rho + log_sigma; + + // Build spde matrix + SparseMatrix Q = Q_spde(spde, kappa); + + // From Lindgren (2011) https://doi.org/10.1111/j.1467-9868.2011.00777.x, see equation for the marginal variance + Type scaling_factor = sqrt(exp(lgamma(nu)) / (exp(lgamma(nu + 1)) * 4 * M_PI * pow(kappa, 2*nu))); + + // Likelihood of the random field. + nll += SCALE(GMRF(Q), sigma / scaling_factor)(nodemean); + } + + Type nll_priors = nll; + + // ------------------------------------------------------------------------ // + // Likelihood from data + // ------------------------------------------------------------------------ // + + vector pixel_linear_pred(n_pixels); + pixel_linear_pred = intercept + x * slope; + + if(field) { + // Calculate field for pixel data + vector linear_pred_field(n_pixels); + linear_pred_field = Apixel * nodemean; + pixel_linear_pred += linear_pred_field.array(); + } + + // recalculate startendindices to be in the form start, n + startendindex.col(1) = startendindex.col(1) - startendindex.col(0) + 1; + + Type polygon_response; + Type normalised_polygon_response; + Type normalisation_total; + Type pred_polygoncases; + Type pred_polygonrate; + Type polygon_sd; + vector pixel_pred; + vector numerator_pixels; + vector normalisation_pixels; + vector reportnormalisation(n_polygons); + vector reportprediction_cases(n_polygons); + vector reportprediction_rate(n_polygons); + vector reportnll(n_polygons); + vector reportpolygonsd(n_polygons); + + // For each shape get pixel predictions within and aggregate to polygon level + for (int polygon = 0; polygon < n_polygons; polygon++) { + + // Get pixel level predictions (rate) + pixel_pred = pixel_linear_pred.segment(startendindex(polygon, 0), startendindex(polygon, 1)).array(); + if(iid) { + pixel_pred += iideffect[polygon]; + } + // Use correct link function + if(link == 0) { + pixel_pred = invlogit(pixel_pred); + } else if(link == 1) { + pixel_pred = exp(pixel_pred); + } else if(link == 2){ + // Don't need to do anything, i.e. pixel_pred = pixel_pred; + } else { + error("Link function not implemented."); + } + + // Aggregate to polygon prediction + numerator_pixels = pixel_pred * aggregation_values.segment(startendindex(polygon, 0), startendindex(polygon, 1)).array(); + normalisation_pixels = aggregation_values.segment(startendindex(polygon, 0), startendindex(polygon, 1)); + normalisation_total = sum(normalisation_pixels); + pred_polygoncases = sum(numerator_pixels); + pred_polygonrate = pred_polygoncases/normalisation_total; + + reportnormalisation[polygon] = normalisation_total; + reportprediction_cases[polygon] = pred_polygoncases; + reportprediction_rate[polygon] = pred_polygonrate; + + // Use correct likelihood function + if(family == 0) { + // Scale the pixel sd to polygon level + polygon_sd = gaussian_sd * sqrt((normalisation_pixels * normalisation_pixels).sum()) / normalisation_total; + reportpolygonsd[polygon] = polygon_sd; + // Calculate normal likelihood in rate space + polygon_response = polygon_response_data(polygon); + normalised_polygon_response = polygon_response/normalisation_total; + nll -= dnorm(normalised_polygon_response, pred_polygonrate, polygon_sd, true); + reportnll[polygon] = -dnorm(normalised_polygon_response, pred_polygonrate, polygon_sd, true); + } else if(family == 1) { + nll -= dbinom(polygon_response_data[polygon], response_sample_size[polygon], pred_polygonrate, true); + reportnll[polygon] = -dbinom(polygon_response_data[polygon], response_sample_size[polygon], pred_polygonrate, true); + } else if(family == 2) { + nll -= dpois(polygon_response_data[polygon], pred_polygoncases, true); + reportnll[polygon] = -dpois(polygon_response_data[polygon], pred_polygoncases, true); + } else { + error("Likelihood not implemented."); + } + + } + + REPORT(reportprediction_cases); + REPORT(reportprediction_rate); + REPORT(reportnormalisation); + REPORT(reportnll); + REPORT(polygon_response_data); + REPORT(nll_priors); + REPORT(nll); + if(family == 0) { + REPORT(reportpolygonsd); + } + + return nll; +} diff --git a/tests/testthat/test-fit-model.R b/tests/testthat/test-fit-model.R index 5cefeb8..37a9b73 100644 --- a/tests/testthat/test-fit-model.R +++ b/tests/testthat/test-fit-model.R @@ -1,168 +1,168 @@ - -context("Fitting model") - -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -} - -polys <- do.call(raster::spPolygons, polygons) -N <- floor(runif(n_polygons, min = 1, max = 100)) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) - -spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -spdf_binom <- sp::SpatialPolygonsDataFrame(polys, response_binom_df) - -# Create raster stack -r <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r <- raster::setExtent(r, raster::extent(spdf)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r2 <- raster::setExtent(r2, raster::extent(spdf)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- raster::stack(r, r2) - -if(identical(Sys.getenv("NOT_CRAN"), "true")) { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack) -} else { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack, - makeMesh = FALSE) -} - -test_that("disag_model produces errors when expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - expect_error(disag_model(list())) - expect_error(disag_model(test_data, iterations = 'iterations')) - expect_error(disag_model(test_data, priors = list(polygon_sd_men = 0.3, polygon_sd_sd = 0.4))) - expect_error(disag_model(test_data, priors = c(polygon_sd_mean = 1.2))) - expect_error(disag_model(test_data, family = 'banana')) - expect_error(disag_model(test_data, link = 'apple')) - -}) - -test_that("disag_model behaves as expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - result <- disag_model(test_data, iterations = 2, iid = FALSE) - - expect_is(result, 'disag_model') - expect_equal(length(result), 5) - expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 4) - expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) - - - -}) - - - - -test_that("disag_model with 1 covariate behaves as expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - test_data2 <- test_data - test_data2$covariate_rasters <- test_data2$covariate_rasters[[1]] - test_data2$covariate_data <- test_data2$covariate_data[, 1:3] - - result <- disag_model(test_data2, iterations = 2, iid = FALSE) - - expect_is(result, 'disag_model') - expect_equal(length(result), 5) - - # Should be intercept, 1 slope, tau gaussian, and 2 for space. None for iid anymore. - expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 3) - expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) - - # Confirm only two covariates were fitted. - expect_equal(sum(names(result$opt$par) == 'slope'), 1) - -}) -test_that("user defined model setup is working as expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - binom_data <- prepare_data(polygon_shapefile = spdf_binom, - covariate_rasters = cov_stack, - sample_size_var = 'sample_size') - - result2 <- disag_model(test_data, iterations = 2, field = FALSE, family = 'poisson', link = 'log') - result3 <- disag_model(binom_data, iterations = 2, iid = FALSE, family = 'binomial', link = 'logit') - result4 <- disag_model(test_data, iterations = 2, field = FALSE, iid = FALSE, link = 'identity') - - expect_error(disag_model(test_data, iterations = 2, iid = FALSE, family = 'binomial', link = 'logit')) - - expect_is(result2, 'disag_model') - expect_equal(length(result2), 5) - expect_equal(length(result2$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 2) - expect_equal(unique(names(result2$sd_out$par.random)), c("iideffect")) - expect_false(result2$model_setup$field) - expect_true(result2$model_setup$iid) - expect_equal(result2$model_setup$family, 'poisson') - expect_equal(result2$model_setup$link, 'log') - - expect_is(result3, 'disag_model') - expect_equal(length(result3), 5) - expect_equal(length(result3$sd_out$par.fixed), raster::nlayers(binom_data$covariate_rasters) + 3) - expect_equal(unique(names(result3$sd_out$par.random)), c("nodemean")) - expect_true(result3$model_setup$field) - expect_false(result3$model_setup$iid) - expect_equal(result3$model_setup$family, 'binomial') - expect_equal(result3$model_setup$link, 'logit') - - expect_is(result4, 'disag_model') - expect_equal(length(result4), 5) - expect_equal(length(result4$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 2) - expect_equal(unique(names(result4$sd_out$par.random)), NULL) - expect_false(result4$model_setup$field) - expect_false(result4$model_setup$iid) - expect_equal(result4$model_setup$family, 'gaussian') - expect_equal(result4$model_setup$link, 'identity') -}) - -test_that("make_model_object behaves as expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - result <- make_model_object(test_data) - - expect_is(result, 'list') - expect_equal(sum(sapply(c("par", "fn", "gr", "report"), function(x) !(x %in% names(result)))), 0) - -}) - -test_that("setup_hess_control behaves as expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - obj <- make_model_object(test_data) - - opt <- stats::nlminb(obj$par, obj$fn, obj$gr, control = list(iter.max = 2, trace = 0)) - - hess_control <- setup_hess_control(opt, hess_control_parscale = c(rep(c(0.9, 1.1), 3), 1), hess_control_ndeps = 1e-3) - - expect_is(hess_control, 'list') - expect_equal(length(hess_control$parscale), length(opt$par)) - expect_equal(length(hess_control$ndeps), length(opt$par)) - -}) - + +context("Fitting model") + +polygons <- list() +n_polygon_per_side <- 10 +n_polygons <- n_polygon_per_side * n_polygon_per_side +n_pixels_per_side <- n_polygon_per_side * 2 + +for(i in 1:n_polygons) { + row <- ceiling(i/n_polygon_per_side) + col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) + xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row + polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) +} + +polys <- do.call(raster::spPolygons, polygons) +N <- floor(runif(n_polygons, min = 1, max = 100)) +response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) +response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) + +spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) +spdf_binom <- sp::SpatialPolygonsDataFrame(polys, response_binom_df) + +# Create raster stack +r <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +r <- raster::setExtent(r, raster::extent(spdf)) +r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) +r2 <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +r2 <- raster::setExtent(r2, raster::extent(spdf)) +r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +cov_stack <- raster::stack(r, r2) + +if(identical(Sys.getenv("NOT_CRAN"), "true")) { + test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_stack) +} else { + test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_stack, + makeMesh = FALSE) +} + +test_that("disag_model produces errors when expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + expect_error(disag_model(list())) + expect_error(disag_model(test_data, iterations = 'iterations')) + expect_error(disag_model(test_data, priors = list(polygon_sd_men = 0.3, polygon_sd_sd = 0.4))) + expect_error(disag_model(test_data, priors = c(polygon_sd_mean = 1.2))) + expect_error(disag_model(test_data, family = 'banana')) + expect_error(disag_model(test_data, link = 'apple')) + +}) + +test_that("disag_model behaves as expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + result <- disag_model(test_data, iterations = 2, iid = FALSE) + + expect_is(result, 'disag_model') + expect_equal(length(result), 5) + expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 4) + expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) + + + +}) + + + + +test_that("disag_model with 1 covariate behaves as expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + test_data2 <- test_data + test_data2$covariate_rasters <- test_data2$covariate_rasters[[1]] + test_data2$covariate_data <- test_data2$covariate_data[, 1:3] + + result <- disag_model(test_data2, iterations = 2, iid = FALSE) + + expect_is(result, 'disag_model') + expect_equal(length(result), 5) + + # Should be intercept, 1 slope, tau gaussian, and 2 for space. None for iid anymore. + expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 3) + expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) + + # Confirm only two covariates were fitted. + expect_equal(sum(names(result$opt$par) == 'slope'), 1) + +}) +test_that("user defined model setup is working as expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + binom_data <- prepare_data(polygon_shapefile = spdf_binom, + covariate_rasters = cov_stack, + sample_size_var = 'sample_size') + + result2 <- disag_model(test_data, iterations = 2, field = FALSE, family = 'poisson', link = 'log') + result3 <- disag_model(binom_data, iterations = 2, iid = FALSE, family = 'binomial', link = 'logit') + result4 <- disag_model(test_data, iterations = 2, field = FALSE, iid = FALSE, link = 'identity') + + expect_error(disag_model(test_data, iterations = 2, iid = FALSE, family = 'binomial', link = 'logit')) + + expect_is(result2, 'disag_model') + expect_equal(length(result2), 5) + expect_equal(length(result2$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 2) + expect_equal(unique(names(result2$sd_out$par.random)), c("iideffect")) + expect_false(result2$model_setup$field) + expect_true(result2$model_setup$iid) + expect_equal(result2$model_setup$family, 'poisson') + expect_equal(result2$model_setup$link, 'log') + + expect_is(result3, 'disag_model') + expect_equal(length(result3), 5) + expect_equal(length(result3$sd_out$par.fixed), raster::nlayers(binom_data$covariate_rasters) + 3) + expect_equal(unique(names(result3$sd_out$par.random)), c("nodemean")) + expect_true(result3$model_setup$field) + expect_false(result3$model_setup$iid) + expect_equal(result3$model_setup$family, 'binomial') + expect_equal(result3$model_setup$link, 'logit') + + expect_is(result4, 'disag_model') + expect_equal(length(result4), 5) + expect_equal(length(result4$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 2) + expect_equal(unique(names(result4$sd_out$par.random)), NULL) + expect_false(result4$model_setup$field) + expect_false(result4$model_setup$iid) + expect_equal(result4$model_setup$family, 'gaussian') + expect_equal(result4$model_setup$link, 'identity') +}) + +test_that("make_model_object behaves as expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + result <- make_model_object(test_data) + + expect_is(result, 'list') + expect_equal(sum(sapply(c("par", "fn", "gr", "report"), function(x) !(x %in% names(result)))), 0) + +}) + +test_that("setup_hess_control behaves as expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + obj <- make_model_object(test_data) + + opt <- stats::nlminb(obj$par, obj$fn, obj$gr, control = list(iter.max = 2, trace = 0)) + + hess_control <- setup_hess_control(opt, hess_control_parscale = c(rep(c(0.9, 1.1), 3), 1), hess_control_ndeps = 1e-3) + + expect_is(hess_control, 'list') + expect_equal(length(hess_control$parscale), length(opt$par)) + expect_equal(length(hess_control$ndeps), length(opt$par)) + +}) + diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index 322a5bf..fab8f87 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -26,19 +26,19 @@ r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_s cov_stack <- raster::stack(r, r2) if(identical(Sys.getenv("NOT_CRAN"), "true")) { - test_data <- prepare_data(polygon_shapefile = spdf, + test_data <- prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack) } else { - test_data <- prepare_data(polygon_shapefile = spdf, + test_data <- prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack, makeMesh = FALSE) } test_that("Check predict.disag_model function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - + result <- disag_model(test_data, iterations = 1000, iid = TRUE, field = TRUE, @@ -56,18 +56,18 @@ test_that("Check predict.disag_model function works as expected", { prior_iideffect_sd_prob = 0.01)) pred2 <- predict(result) - + expect_is(pred2, 'disag_prediction') expect_equal(length(pred2), 2) expect_equal(names(pred2), c('mean_prediction', 'uncertainty_prediction')) - + expect_is(pred2$mean_prediction, 'list') expect_equal(length(pred2$mean_prediction), 4) expect_is(pred2$mean_prediction$prediction, 'Raster') expect_is(pred2$mean_prediction$field, 'Raster') expect_true(is.null(pred2$mean_prediction$iid)) expect_is(pred2$mean_prediction$covariates, 'Raster') - + expect_is(pred2$uncertainty_prediction, 'list') expect_equal(length(pred2$uncertainty_prediction), 2) expect_equal(names(pred2$uncertainty_prediction), c('realisations', 'predictions_ci')) @@ -77,11 +77,11 @@ test_that("Check predict.disag_model function works as expected", { expect_equal(raster::nlayers(pred2$uncertainty_prediction$predictions_ci), 2) pred2 <- predict(result, predict_iid = TRUE, N = 10) - + expect_is(pred2, 'disag_prediction') expect_equal(length(pred2), 2) expect_equal(names(pred2), c('mean_prediction', 'uncertainty_prediction')) - + expect_is(pred2$mean_prediction, 'list') expect_equal(length(pred2$mean_prediction), 4) expect_equal(names(pred2$mean_prediction), c('prediction', 'field', 'iid', 'covariates')) @@ -89,7 +89,7 @@ test_that("Check predict.disag_model function works as expected", { expect_is(pred2$mean_prediction$field, 'Raster') expect_is(pred2$mean_prediction$iid, 'Raster') expect_is(pred2$mean_prediction$covariates, 'Raster') - + expect_is(pred2$uncertainty_prediction, 'list') expect_equal(length(pred2$uncertainty_prediction), 2) expect_equal(names(pred2$uncertainty_prediction), c('realisations', 'predictions_ci')) @@ -97,25 +97,25 @@ test_that("Check predict.disag_model function works as expected", { expect_is(pred2$uncertainty_prediction$predictions_ci, 'RasterBrick') expect_equal(raster::nlayers(pred2$uncertainty_prediction$realisations), 10) expect_equal(raster::nlayers(pred2$uncertainty_prediction$predictions_ci), 2) - - + + # For a model with no field or iid - + result <- disag_model(test_data, iterations = 100, field = FALSE, iid = FALSE) - + pred2 <- predict(result) - + expect_is(pred2, 'disag_prediction') expect_equal(length(pred2), 2) expect_equal(names(pred2), c('mean_prediction', 'uncertainty_prediction')) - + expect_is(pred2$mean_prediction, 'list') expect_equal(length(pred2$mean_prediction), 4) expect_is(pred2$mean_prediction$prediction, 'Raster') expect_true(is.null(pred2$mean_prediction$field)) expect_true(is.null(pred2$mean_prediction$iid)) expect_is(pred2$mean_prediction$covariates, 'Raster') - + expect_is(pred2$uncertainty_prediction, 'list') expect_equal(length(pred2$uncertainty_prediction), 2) expect_equal(names(pred2$uncertainty_prediction), c('realisations', 'predictions_ci')) @@ -123,16 +123,16 @@ test_that("Check predict.disag_model function works as expected", { expect_is(pred2$uncertainty_prediction$predictions_ci, 'RasterBrick') expect_equal(raster::nlayers(pred2$uncertainty_prediction$realisations), 100) expect_equal(raster::nlayers(pred2$uncertainty_prediction$predictions_ci), 2) - + }) test_that("Check predict.disag_model function works with newdata", { - + skip_if_not_installed('INLA') skip_on_cran() - + result <- disag_model(test_data, field = FALSE, iid = TRUE, iterations = 100, priors = list(priormean_intercept = 0, priorsd_intercept = 1, @@ -144,15 +144,15 @@ test_that("Check predict.disag_model function works with newdata", { prior_sigma_prob = 0.01, prior_iideffect_sd_max = 0.0001, prior_iideffect_sd_prob = 0.01)) - + newdata <- raster::crop(raster::stack(r, r2), c(0, 10, 0, 10)) pred1 <- predict(result) pred2 <- predict(result, newdata, predict_iid = TRUE, N = 5) - + expect_is(pred2, 'disag_prediction') expect_equal(length(pred2), 2) expect_equal(names(pred2), c('mean_prediction', 'uncertainty_prediction')) - + expect_is(pred2$mean_prediction, 'list') expect_equal(length(pred2$mean_prediction), 4) expect_equal(names(pred2$mean_prediction), c('prediction', 'field', 'iid', 'covariates')) @@ -160,7 +160,7 @@ test_that("Check predict.disag_model function works with newdata", { expect_true(is.null(pred2$mean_prediction$field)) expect_is(pred2$mean_prediction$iid, 'Raster') expect_is(pred2$mean_prediction$covariates, 'Raster') - + expect_is(pred2$uncertainty_prediction, 'list') expect_equal(length(pred2$uncertainty_prediction), 2) expect_equal(names(pred2$uncertainty_prediction), c('realisations', 'predictions_ci')) @@ -168,23 +168,23 @@ test_that("Check predict.disag_model function works with newdata", { expect_is(pred2$uncertainty_prediction$predictions_ci, 'RasterBrick') expect_equal(raster::nlayers(pred2$uncertainty_prediction$realisations), 5) expect_equal(raster::nlayers(pred2$uncertainty_prediction$predictions_ci), 2) - + expect_false(identical(raster::extent(pred1$mean_prediction$prediction), raster::extent(pred2$mean_prediction$prediction))) expect_false(identical(raster::extent(pred1$uncertainty_prediction$realisations), raster::extent(pred2$uncertainty_prediction$realisations))) - + }) test_that('Check that check_newdata works', { - + skip_if_not_installed('INLA') skip_on_cran() - + result <- disag_model(test_data, field = FALSE, iterations = 100) - + newdata <- raster::crop(raster::stack(r, r2), c(0, 10, 0, 10)) nd1 <- check_newdata(newdata, result) expect_is(nd1, 'RasterBrick') - + nn <- newdata[[1]] names(nn) <- 'extra_uneeded' newdata2 <- raster::stack(newdata, nn) @@ -192,18 +192,18 @@ test_that('Check that check_newdata works', { newdata3 <- newdata[[1]] expect_error(check_newdata(newdata3, result), 'All covariates') - + newdata4 <- result$data$covariate_data expect_error(check_newdata(newdata4, result), 'newdata should be NULL or') - - + + }) test_that('Check that setup_objects works', { - + skip_if_not_installed('INLA') skip_on_cran() - + result <- disag_model(test_data, iterations = 100, iid = TRUE, field = TRUE, @@ -217,9 +217,9 @@ test_that('Check that setup_objects works', { prior_sigma_prob = 0.01, prior_iideffect_sd_max = 0.01, prior_iideffect_sd_prob = 0.01)) - + objects <- setup_objects(result) - + expect_is(objects, 'list') expect_equal(length(objects), 3) expect_equal(names(objects), c('covariates', 'field_objects', 'iid_objects')) @@ -228,28 +228,28 @@ test_that('Check that setup_objects works', { newdata <- raster::crop(raster::stack(r, r2), c(0, 180, -90, 90)) objects2 <- setup_objects(result, newdata) - + expect_is(objects2, 'list') expect_equal(length(objects2), 3) expect_equal(names(objects2), c('covariates', 'field_objects', 'iid_objects')) expect_is(objects2$field_objects, 'list') expect_true(is.null(objects$iid_objects)) - + objects3 <- setup_objects(result, predict_iid = TRUE) - + expect_is(objects3, 'list') expect_equal(length(objects3), 3) expect_equal(names(objects3), c('covariates', 'field_objects', 'iid_objects')) expect_is(objects3$field_objects, 'list') expect_is(objects3$iid_objects, 'list') - + }) test_that('Check that predict_single_raster works', { - + skip_if_not_installed('INLA') skip_on_cran() - + result <- disag_model(test_data, iterations = 100, iid = TRUE, field = TRUE, @@ -263,16 +263,16 @@ test_that('Check that predict_single_raster works', { prior_sigma_prob = 0.01, prior_iideffect_sd_max = 0.01, prior_iideffect_sd_prob = 0.01)) - + objects <- setup_objects(result) - + pars <- result$obj$env$last.par.best pars <- split(pars, names(pars)) - - pred2 <- predict_single_raster(pars, + + pred2 <- predict_single_raster(pars, objects = objects, link_function = result$model_setup$link) - + expect_is(pred2, 'list') expect_equal(length(pred2), 4) expect_equal(names(pred2), c('prediction', 'field', 'iid', 'covariates')) @@ -280,13 +280,13 @@ test_that('Check that predict_single_raster works', { expect_is(pred2$field, 'Raster') expect_true(is.null(pred2$iid)) expect_is(pred2$covariates, 'Raster') - + objects2 <- setup_objects(result, predict_iid = TRUE) - - pred2 <- predict_single_raster(pars, + + pred2 <- predict_single_raster(pars, objects = objects2, link_function = result$model_setup$link) - + expect_is(pred2, 'list') expect_equal(length(pred2), 4) expect_equal(names(pred2), c('prediction', 'field', 'iid', 'covariates')) @@ -294,7 +294,7 @@ test_that('Check that predict_single_raster works', { expect_is(pred2$field, 'Raster') expect_is(pred2$iid, 'Raster') expect_is(pred2$covariates, 'Raster') - + }) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index 8d2944b..5967ce2 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -122,8 +122,8 @@ The user can also control the parameters of the mesh that is used to create the ```{r, fig.show='hold', eval= isINLA} data_for_model <- prepare_data(x = df, - cov_stack, - pop_raster, + covariate_rasters = cov_stack, + aggregation_raster = pop_raster, response_var = 'cases', id_var = 'censustract.FIPS', mesh.args = list(cut = 0.01, From f1497a424c6abf98382dfde9169eea3ec56aa1db Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 31 May 2023 19:54:46 +0100 Subject: [PATCH 08/55] Fix parallelExtract rows id issue. --- R/extract.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/extract.R b/R/extract.R index db6a302..0db30bc 100644 --- a/R/extract.R +++ b/R/extract.R @@ -72,7 +72,7 @@ parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ } else { df <- values[, 2:(ncol(values) - 1)] - df <- cbind(as.data.frame(shape)[, id], values$cell, df) + df <- cbind(as.data.frame(shape)[values$ID, id], values$cell, df) names(df) <- c(id, 'cellid', names(raster)) return(df) From 2f74d59fe1de94504b6e3633e89cc8f0123e8201 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 31 May 2023 20:31:48 +0100 Subject: [PATCH 09/55] Convert build_mesh to sf --- R/build_mesh.R | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/R/build_mesh.R b/R/build_mesh.R index 71946c9..7c3a1da 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -40,16 +40,16 @@ #' } #' #' -#' #' @export build_mesh <- function(shapes, mesh.args = NULL) { - stopifnot(inherits(shapes, 'SpatialPolygons')) +<<<<<<< HEAD + stopifnot(inherits(shapes, 'sf')) if(!is.null(mesh.args)) stopifnot(inherits(mesh.args, 'list')) - limits <- sp::bbox(shapes) - hypotenuse <- sqrt((limits[1,2] - limits[1,1])^2 + (limits[2,2] - limits[2,1])^2) + limits <- sf::st_bbox(shapes) + hypotenuse <- sqrt((limits$xmax - limits$xmin)^2 + (limits$ymax - limits$ymin)^2) maxedge <- hypotenuse/10 @@ -65,7 +65,9 @@ build_mesh <- function(shapes, mesh.args = NULL) { outline <- sf::st_union(sf::st_as_sf(shapes)) coords <- sf::st_coordinates(outline) - + #no sure which is needed + # outline <- st_sf(sf::st_union(sf::st_convex_hull(shapes))) + # coords <- sf::st_coordinates(outline)[, c('X', 'Y')] outline.hull <- INLA::inla.nonconvex.hull(coords, @@ -79,6 +81,5 @@ build_mesh <- function(shapes, mesh.args = NULL) { cut = pars$cut, offset = pars$offset) - return(mesh) } From 74aacfac14b23eff94e8bd08e4d0ea76c96f7844 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Wed, 31 May 2023 21:18:12 +0100 Subject: [PATCH 10/55] Fix plot.disag_data --- R/plotting.R | 7 ++++--- R/prepare_data.R | 4 ++-- vignettes/disaggregation.Rmd | 3 --- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/R/plotting.R b/R/plotting.R index d1ed93b..02f36e2 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -1,8 +1,8 @@ -<<<<<<< HEAD #' Plot input data for disaggregation #' #' Plotting function for class \emph{disag_data} (the input data for disaggregation). #' +>>>>>>> Fix plot.disag_data #' Produces three plots: polygon response data, covariate rasters and INLA mesh. #' #' @param x Object of class \emph{disag_data} to be plotted. @@ -22,12 +22,12 @@ plot.disag_data <- function(x, which = c(1,2,3), ...) { titles <- c() if(1 %in% which) { - plots$polygon <- plot_polygon_data(x$polygon_shapefile, x$shapefile_names) + plots$polygon <- plot_polygon_data(x$x, x$shapefile_names) titles <- c(titles, 'Polygon response data') } if(2 %in% which) { - stopifnot(inherits(x$covariate_rasters, c('RasterStack', 'RasterBrick'))) + stopifnot(inherits(x$covariate_rasters, c('SpatRaster'))) plots$covariates <- sp::spplot(x$covariate_rasters) titles <- c(titles, 'Covariate rasters') } @@ -118,6 +118,7 @@ plot.disag_model <- function(x, ...){ #' #' Plotting function for class \emph{disag_prediction} (the mean and uncertainty predictions of the disaggregation fitting). #' +#' #' Produces raster plots of the mean prediction, and the lower and upper confidence intervals. #' #' @param x Object of class \emph{disag_prediction} to be plotted. diff --git a/R/prepare_data.R b/R/prepare_data.R index 0d6831e..94b98ff 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -40,8 +40,8 @@ #' @return A list is returned of class \code{disag_data}. #' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. #' The list of class \code{disag_data} contains: -#' \item{x }{The SpatialPolygonDataFrame used as an input.} -#' \item{covariate_rasters }{The RasterStack used as an input.} +#' \item{x }{The sf object used as an input.} +#' \item{covariate_rasters }{The SpatRaster used as an input.} #' \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} #' \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} #' \item{aggregation_pixels }{An array with the value of the aggregation raster for each pixel in the same order as the rows of \emph{covariate_data}.} diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index 5967ce2..7f4f223 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -48,14 +48,11 @@ We will demonstrate an example of the **disaggregation** package using areal dat ```{r} library(SpatialEpi, quietly = TRUE) library(dplyr, quietly = TRUE) -library(sp, quietly = TRUE) # Don't need to read data. So just here while I learn sf. -library(raster, quietly = TRUE) library(disaggregation, quietly = TRUE) library(ggplot2) library(sf) library(terra) - polygons <- sf::st_as_sf(NYleukemia$spatial.polygon) df <- cbind(polygons, NYleukemia$data) From 405cf740d2777141701901db89143b5e95f4e081 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Fri, 2 Jun 2023 16:43:46 +0100 Subject: [PATCH 11/55] Continue updating vignette --- vignettes/disaggregation.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index 7f4f223..1e74593 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -98,7 +98,7 @@ We also create a population raster. This is to allow the model to correctly aggr extracted <- terra::extract(r, terra::vect(df$geometry), fun = sum) n_cells <- terra::extract(r, terra::vect(df$geometry), fun = length) df$pop_per_cell <- df$population/n_cells$lyr.1 -pop_raster <- rasterize(terra::vect(df), cov_stack, field = 'pop_per_cell') +pop_raster <- terra::rasterize(terra::vect(df), cov_stack, field = 'pop_per_cell') ``` From c21747605c2f8e26aec120e63f6a8de9162152da Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 5 Jun 2023 14:48:58 +0100 Subject: [PATCH 12/55] Change imports in description to match new backends. --- DESCRIPTION | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c6ad584..3001c52 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -18,10 +18,8 @@ Encoding: UTF-8 LazyData: true RoxygenNote: 7.2.3 Imports: - raster, foreach, sp, - sf, parallel, doParallel, splancs, @@ -32,6 +30,8 @@ Imports: ggplot2, cowplot, sparseMVN, + terra, + sf, utils Additional_repositories: https://inla.r-inla-download.org/R/stable Suggests: @@ -45,3 +45,4 @@ LinkingTo: RcppEigen SystemRequirements: GNU make VignetteBuilder: knitr +>>>>>>> Change imports in description to match new backends. From 0f9cbbc3cdea511742ef4cd530974d12ed512b9e Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 5 Jun 2023 17:03:06 +0100 Subject: [PATCH 13/55] Fix small bug in extract. --- R/extract.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/extract.R b/R/extract.R index 0db30bc..75d1e16 100644 --- a/R/extract.R +++ b/R/extract.R @@ -175,7 +175,7 @@ extractCoordsForMesh <- function(cov_rasters, selectIds = NULL) { if(!is.null(selectIds)) stopifnot(inherits(selectIds, 'numeric')) points_raster <- cov_rasters[[1]] - points_raster[is.na(terra::values(points_raster))] <- -9999 + points_raster[is.na(terra::values(points_raster, mat = FALSE))] <- -9999 raster_pts <- terra::as.points(points_raster) coords <- terra::crds(raster_pts) From 2cbd6f6f76760c04f8ce1473ef6ae6a8ce9472d8 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 5 Jun 2023 17:34:46 +0100 Subject: [PATCH 14/55] Update some docs to use sf. --- R/build_mesh.R | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/R/build_mesh.R b/R/build_mesh.R index 7c3a1da..7bc3eec 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -24,19 +24,25 @@ #' #' @examples #' \dontrun{ -#' polygons <- list() -#' for(i in 1:100) { -#' row <- ceiling(i/10) -#' col <- ifelse(i %% 10 != 0, i %% 10, 10) -#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } -#' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' -#' my_mesh <- build_mesh(spdf) +#' polygons <- list() +#' for(i in 1:14) { +#' row <- ceiling(i/10) +#' col <- ifelse(i %% 10 != 0, i %% 10, 10) +#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row +#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), +#' c(xmin,ymin), c(xmin, ymax)) +#' } +#' +#' +#' +#' +#' +#' polys <- sf::st_sfc(sf::st_polygon(polygons)) +#' response_df <- data.frame(area_id = 1:100, +#' response = runif(100, min = 0, max = 10)) +#' spdf <- sf::st_sf(polys, response_df) +#' +#' my_mesh <- build_mesh(spdf) #' } #' #' @@ -44,7 +50,6 @@ build_mesh <- function(shapes, mesh.args = NULL) { -<<<<<<< HEAD stopifnot(inherits(shapes, 'sf')) if(!is.null(mesh.args)) stopifnot(inherits(mesh.args, 'list')) From 63f063324e2f092ee91a0dbf2ae0bca449d6d3cb Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 5 Jun 2023 17:35:20 +0100 Subject: [PATCH 15/55] And some tidyup. --- R/build_mesh.R | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/R/build_mesh.R b/R/build_mesh.R index 7bc3eec..0c9c16d 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -13,7 +13,7 @@ #' Defaults are: #' pars <- list(convex = -0.01, concave = -0.5, resolution = 300, max.edge = c(3.0, 8), cut = 0.4, offset = c(1, 15)). #' -#' @param shapes shapefile covering the region under investigation. +#' @param shapes sf covering the region under investigation. #' @param mesh.args list of parameters that control the mesh structure. \emph{convex}, \emph{concave} and \emph{resolution}, #' to control the boundary of the inner mesh, and \emph{max.edge}, \emph{cut} and \emph{offset}, to control the mesh itself, #' with the parameters having the same meaning as in the INLA functions \emph{inla.convex.hull} and \emph{inla.mesh.2d}. @@ -31,11 +31,6 @@ #' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row #' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), #' c(xmin,ymin), c(xmin, ymax)) -#' } -#' -#' -#' -#' #' #' polys <- sf::st_sfc(sf::st_polygon(polygons)) #' response_df <- data.frame(area_id = 1:100, From 0e53f67985fbf426b3bd52655be7ba72907f8831 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 5 Jun 2023 21:08:45 +0100 Subject: [PATCH 16/55] Remove raster from some print and summary functions. --- R/summary.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/summary.R b/R/summary.R index 2a744e1..59e1dbf 100644 --- a/R/summary.R +++ b/R/summary.R @@ -116,7 +116,7 @@ print.disag_model <- function(x, ...){ summary.disag_data <- function(object, ...) { n_polygons <- nrow(object$polygon_shapefile) - n_covariates <- raster::nlayers(object$covariate_rasters) + n_covariates <- terra::nlyr(object$covariate_rasters) cat(paste("They data contains", n_polygons, "polygons and", nrow(object$covariate_data), "pixels\n")) @@ -157,7 +157,7 @@ summary.disag_data <- function(object, ...) { print.disag_data <- function(x, ...){ n_polygons <- nrow(x$polygon_shapefile) - n_covariates <- raster::nlayers(x$covariate_rasters) + n_covariates <- terra::nlyr(x$covariate_rasters) cat(paste("They data contains", n_polygons, "polygons and", nrow(x$covariate_data), "pixels\n")) From 783ad21d76cea8203c9518792579db493fab131f Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 5 Jun 2023 21:09:22 +0100 Subject: [PATCH 17/55] Remove raster from some print and summary functions. --- R/summary.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/R/summary.R b/R/summary.R index 59e1dbf..87ce78c 100644 --- a/R/summary.R +++ b/R/summary.R @@ -187,7 +187,7 @@ print.disag_data <- function(x, ...){ summary.disag_prediction <- function(object, ...) { - number_realisations <- raster::nlayers(object$uncertainty_prediction$realisations) + number_realisations <- terra::nlyr(object$uncertainty_prediction$realisations) max_mean <- max(object$mean_prediction$prediction@data@values) min_mean <- min(object$mean_prediction$prediction@data@values) max_iqr <- max((object$uncertainty_prediction$predictions_ci[[2]] - object$uncertainty_prediction$predictions_ci[[1]])@data@values) @@ -238,7 +238,7 @@ print.disag_prediction <- function(x, ...){ if(!is.null(x$mean_prediction$field)) cat('field ') if(!is.null(x$mean_prediction$iid)) cat('iid ') cat('\n\n') - cat(paste0('There are ', raster::nlayers(x$uncertainty_prediction$realisations), ' uncertainty realisations')) + cat(paste0('There are ', terra::nlyr(x$uncertainty_prediction$realisations), ' uncertainty realisations')) return(invisible(x)) } \ No newline at end of file From 66d6bb80576c30a3e8614aa28d5b6289ce496e80 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 5 Jun 2023 21:29:24 +0100 Subject: [PATCH 18/55] First pass at removing raster from predict. Currently makes massive numbers. --- R/predict.R | 55 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 24 deletions(-) diff --git a/R/predict.R b/R/predict.R index 4e9c759..5033bfd 100644 --- a/R/predict.R +++ b/R/predict.R @@ -1,4 +1,3 @@ -<<<<<<< HEAD #' Predict mean and uncertainty from the disaggregation model result #' #' \emph{predict.disag_model} function takes a \emph{disag_model} object created by \emph{disaggregation::disag_model} and @@ -213,7 +212,7 @@ getCoords <- function(data) { raster_pts <- raster::rasterToPoints(points_raster, spatial = TRUE) coords <- raster_pts@coords - return(coords) + return(coords) } # Get Amatrix for field @@ -236,13 +235,12 @@ getAmatrix <- function(mesh, coords) { } - # Helper to check and sort out new raster data. check_newdata <- function(newdata, model_output){ if(is.null(newdata)) return(NULL) if(!is.null(newdata)){ - if(!(inherits(newdata, c('RasterStack', 'RasterBrick', 'RasterLayer')))){ - stop('newdata should be NULL or a RasterStack or a RasterBrick') + if(!(inherits(newdata, c('SpatRaster')))){ + stop('newdata should be NULL or a SpatRaster') } if(!all(names(model_output$data$covariate_rasters) %in% names(newdata))){ stop('All covariates used to fit the model must be in newdata') @@ -288,17 +286,18 @@ setup_objects <- function(model_output, newdata = NULL, predict_iid = FALSE) { } if(predict_iid) { - tmp_shp <- model_output$data$polygon_shapefile - tmp_shp@data <- data.frame(area_id = factor(model_output$data$polygon_data$area_id)) - shapefile_raster <- raster::rasterize(tmp_shp, + tmp_shp <- model_output$data$x + tmp_shp <- dplyr::bind_cols(tmp_shp, + area_id = + factor(model_output$data$polygon_data$area_id)) + shapefile_raster <- terra::rasterize(tmp_shp, model_output$data$covariate_rasters, field = 'area_id') - shapefile_ids <- raster::unique(shapefile_raster) + shapefile_ids <- terra::unique(shapefile_raster) iid_objects <- list(shapefile_raster = shapefile_raster, shapefile_ids = shapefile_ids) } else { iid_objects <- NULL } - return(list(covariates = covariates, field_objects = field_objects, iid_objects = iid_objects)) @@ -309,12 +308,12 @@ predict_single_raster <- function(model_parameters, objects, link_function) { # Create linear predictor covs_by_betas <- list() - for(i in seq_len(raster::nlayers(objects$covariates))){ + for(i in seq_len(terra::nlyr(objects$covariates))){ covs_by_betas[[i]] <- model_parameters$slope[i] * objects$covariates[[i]] } - cov_by_betas <- raster::stack(covs_by_betas) - if(raster::nlayers(cov_by_betas) > 1){ + cov_by_betas <- terra::rast(covs_by_betas) + if(terra::nlyr(cov_by_betas) > 1){ sum_cov_by_betas <- sum(cov_by_betas) } else { # With only 1 covariate, there's nothing to sum. Do this to avoid warnings. @@ -327,7 +326,9 @@ predict_single_raster <- function(model_parameters, objects, link_function) { if(!is.null(objects$field_objects)){ # Extract field values field <- (objects$field_objects$Amatrix %*% model_parameters$nodemean)[, 1] - field_ras <- raster::rasterFromXYZ(cbind(objects$field_objects$coords, field)) + field_ras <- terra::rast(cbind(objects$field_objects$coords, field), + type = 'xyz', + crs = crs(linear_pred)) linear_pred <- linear_pred + field_ras } else { field_ras <- NULL @@ -336,28 +337,34 @@ predict_single_raster <- function(model_parameters, objects, link_function) { if(!is.null(objects$iid_objects)) { iid_ras <- objects$iid_objects$shapefile_raster iideffect_sd <- 1/sqrt(exp(model_parameters$iideffect_log_tau)) + # todo for(i in seq_along(model_parameters$iideffect)) { - iid_ras@data@values[which(objects$iid_objects$shapefile_raster@data@values == objects$iid_objects$shapefile_ids[i])] <- + targetvals <- terra::values(objects$iid_objects$shapefile_raster, + dataframe = FALSE, mat = FALSE) + whichvals <- which(targetvals == objects$iid_objects$shapefile_ids[1, i]) + values(iid_ras)[whichvals] <- model_parameters$iideffect[i] - na_pixels <- which(is.na(iid_ras@data@values)) + na_pixels <- which(is.na(values(iid_ras, dataframe = FALSE, mat = FALSE))) na_iid_values <- stats::rnorm(length(na_pixels), 0, iideffect_sd) - iid_ras@data@values[na_pixels] <- na_iid_values + values(iid_ras)[na_pixels] <- na_iid_values } - if(raster::extent(iid_ras) != raster::extent(linear_pred)) { + if(terra::ext(iid_ras) != terra::ext(linear_pred)) { # Extent of prediction space is different to the original model. Keep any overlapping iid values but predict to the new extent raster_new_extent <- linear_pred - raster_new_extent@data@values <- NA - iid_ras <- raster::merge(iid_ras, raster_new_extent, ext = raster::extent(raster_new_extent)) - missing_pixels <- which(is.na(iid_ras@data@values)) + values(raster_new_extent) <- NA + # iid_ras <- terra::merge(iid_ras, raster_new_extent, ext = terra::ext(raster_new_extent)) + # NOt sure why we no longer need the ext argument + iid_ras <- terra::merge(iid_ras, raster_new_extent) + missing_pixels <- which(is.na(values(iid_ras, dataframe = FALSE, mat = FALSE))) missing_iid_values <- stats::rnorm(length(missing_pixels), 0, iideffect_sd) - iid_ras@data@values[missing_pixels] <- missing_iid_values + values(iid_ras)[missing_pixels] <- missing_iid_values } linear_pred <- linear_pred + iid_ras } else { iid_ras <- NULL } - if(link_function == 'logit') { + if(link_function == 'logit') { prediction_ras <- 1 / (1 + exp(-1 * linear_pred)) } else if(link_function == 'log') { prediction_ras <- exp(linear_pred) @@ -365,7 +372,7 @@ predict_single_raster <- function(model_parameters, objects, link_function) { prediction_ras <- linear_pred } - predictions <- list(prediction = prediction_ras, + predictions <- list(prediction = prediction_ras, field = field_ras, iid = iid_ras, covariates = cov_contribution) From e334b3dec4b38b3807e4e49c311875d581010ae0 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 5 Jun 2023 21:30:07 +0100 Subject: [PATCH 19/55] Line endings. --- R/build_mesh.R | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/R/build_mesh.R b/R/build_mesh.R index 0c9c16d..0485ff2 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -31,6 +31,7 @@ #' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row #' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), #' c(xmin,ymin), c(xmin, ymax)) +#' } #' #' polys <- sf::st_sfc(sf::st_polygon(polygons)) #' response_df <- data.frame(area_id = 1:100, @@ -40,7 +41,6 @@ #' my_mesh <- build_mesh(spdf) #' } #' -#' #' @export build_mesh <- function(shapes, mesh.args = NULL) { @@ -63,12 +63,10 @@ build_mesh <- function(shapes, mesh.args = NULL) { pars[names(mesh.args)] <- mesh.args - outline <- sf::st_union(sf::st_as_sf(shapes)) - coords <- sf::st_coordinates(outline) - #no sure which is needed - # outline <- st_sf(sf::st_union(sf::st_convex_hull(shapes))) - # coords <- sf::st_coordinates(outline)[, c('X', 'Y')] + #outline <- maptools::unionSpatialPolygons(shapes_old, IDs = rep(1, length(shapes_old))) + outline <- st_sf(sf::st_union(sf::st_convex_hull(shapes))) + coords <- sf::st_coordinates(outline)[, c('X', 'Y')] outline.hull <- INLA::inla.nonconvex.hull(coords, convex = pars$convex, @@ -81,5 +79,6 @@ build_mesh <- function(shapes, mesh.args = NULL) { cut = pars$cut, offset = pars$offset) + return(mesh) } From cb40b8161d216fbd114306b5b4011d74fa556639 Mon Sep 17 00:00:00 2001 From: Tim Lucas Date: Mon, 5 Jun 2023 21:30:49 +0100 Subject: [PATCH 20/55] line endings. --- R/extract.R | 384 +++++++++++++++++++++++++------------------------- R/fit_model.R | 7 +- 2 files changed, 196 insertions(+), 195 deletions(-) diff --git a/R/extract.R b/R/extract.R index 75d1e16..808f0cb 100644 --- a/R/extract.R +++ b/R/extract.R @@ -1,192 +1,192 @@ -#' Parallel extraction of raster stack by shape file. -#' -#' Parallelisation is performed across rasters, not shapes. -#' So this function is only useful if you are extracting -#' data from many raster layers. -#' As the overhead for parallel computation in windows is high -#' it only makes sense to parallelise in this way. -#' -#' -#' @param raster A RasterBrick or RasterStack object. -#' @param shape A SpatialPolygons object. -#' @param fun The function used to aggregate the pixel data. If NULL, raw pixel data is returned. -#' @param id Name of column in shape object to be used to bind an ID column to output. -#' @param ... Other arguments to raster::extract. -#' -#' @return A data.frame with columns of polygon id, cell id (if fun = NULL) and a column for each raster in the stack -#' -#' @importFrom foreach %dopar% -#' @importFrom parallel stopCluster -#' @importFrom parallel makeCluster -#' @importFrom doParallel registerDoParallel -#' -#' @export -#' @examples -#' \dontrun{ -#' polygons <- list() -#' for(i in 1:100) { -#' row <- ceiling(i/10) -#' col <- ifelse(i %% 10 != 0, i %% 10, 10) -#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } -#' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' -#' r <- raster::raster(ncol=20, nrow=20) -#' r <- raster::setExtent(r, raster::extent(spdf)) -#' r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) -#' r2 <- raster::raster(ncol=20, nrow=20) -#' r2 <- raster::setExtent(r2, raster::extent(spdf)) -#' r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) -#' cov_rasters <- raster::stack(r, r2) -#' -#' cl <- parallel::makeCluster(2) -#' doParallel::registerDoParallel(cl) -#' result <- parallelExtract(cov_rasters, spdf, fun = NULL, id = 'area_id') -#' parallel::stopCluster(cl) -#' foreach::registerDoSEQ() -#' } - -parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ - - shape[, id] <- as.character(shape[, id, drop = TRUE]) - - # Run extract in parallel. - values <- terra::extract(raster, terra::vect(shape), fun = fun, na.rm = TRUE, cells = TRUE, ...) - - if(!is.null(fun)){ - - # If a summary function was given, just bind everything together and add ID column - df <- values - if(inherits(shape, 'df')){ - df <- cbind(ID = as.data.frame(shape)[, id], df) - } else{ - df <- cbind(ID = names(shape), df) - id <- 'id' - } - names(df) <- c(id, names(raster)) - return(df) - - } else { - df <- values[, 2:(ncol(values) - 1)] - df <- cbind(as.data.frame(shape)[values$ID, id], values$cell, df) - names(df) <- c(id, 'cellid', names(raster)) - - return(df) - } - -} - - -#' Extract polygon id and response data into a data.frame from a SpatialPolygonsDataFrame -#' -#' Returns a data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the -#' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does -#' not exist), this column will contain NAs. -#' -#' @param shape A sf object containing response data. -#' @param id_var Name of column in shape object with the polygon id. Default 'area_id'. -#' @param response_var Name of column in shape object with the response data. Default 'response'. -#' @param sample_size_var For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data. Default NULL. -#' -#' @return A data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the -#' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does -#' not exist), this column will contain NAs. -#' -#' @export -#' @examples { -#' polygons <- list() -#' for(i in 1:100) { -#' row <- ceiling(i/10) -#' col <- ifelse(i %% 10 != 0, i %% 10, 10) -#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } -#' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' -#' getPolygonData(spdf, id_var = 'area_id', response_var = 'response') -#' } -#' -#' - -getPolygonData <- function(shape, id_var = 'area_id', response_var = 'response', sample_size_var = NULL) { - - if(is.null(sample_size_var)) { - polygon_df <- shape[, c(id_var, response_var), drop = TRUE] - polygon_df$N <- rep(NA, nrow(polygon_df)) - } else { - polygon_df <- shape[, c(id_var, response_var, sample_size_var), drop = TRUE] - } - - names(polygon_df) <- c('area_id', 'response', 'N') - - return(polygon_df) -} - - -#' Get a RasterStack of covariates from a folder containing .tif files -#' -#' Looks in a specified folder for raster files. Returns a RasterStack of the rasters cropped to the extent specified by the shape parameter. -#' -#' @param directory Filepath to the directory containing the rasters. -#' @param file_pattern Pattern the filenames must match. Default is all files ending in .tif . -#' @param shape An object with an extent that the rasters will be cropped to. -#' -#' @return A RasterStack of the raster files in the directory -#' -#' @export -#' @examples -#' \dontrun{ -#' getCovariateRasters('/home/rasters', '.tif$', shape) -#' } -#' - -getCovariateRasters <- function(directory, file_pattern = '.tif$', shape) { - - stopifnot(dir.exists(directory)) - - covariate_files <- list.files(directory, pattern = file_pattern, full.names = TRUE) - stopifnot(length(covariate_files) != 0) - - covariate_rasters <- lapply(covariate_files, function(x) raster::raster(x)) - covariate_stack <- raster::stack(covariate_rasters) - - covariate_stack <- raster::crop(covariate_stack, shape) - - return(covariate_stack) -} - -# Extract coordinates from raster to use constructing the INLA mesh -# -# @param cov_rasters RasterStack of the covariate rasters. -# @param selectIds numeric vector containing cell ids to retain. Default NULL retains all cell ids in the covariate rasters. -# -# @return A matrix containing the coordinates used to make the mesh - -extractCoordsForMesh <- function(cov_rasters, selectIds = NULL) { - - stopifnot(inherits(cov_rasters, 'SpatRaster')) - if(!is.null(selectIds)) stopifnot(inherits(selectIds, 'numeric')) - - points_raster <- cov_rasters[[1]] - points_raster[is.na(terra::values(points_raster, mat = FALSE))] <- -9999 - raster_pts <- terra::as.points(points_raster) - coords <- terra::crds(raster_pts) - - # If specified, only retain certain pixel ids - if(!is.null(selectIds)) { - coords <- coords[selectIds, ] - } - - return(coords) - -} - - - +#' Parallel extraction of raster stack by shape file. +#' +#' Parallelisation is performed across rasters, not shapes. +#' So this function is only useful if you are extracting +#' data from many raster layers. +#' As the overhead for parallel computation in windows is high +#' it only makes sense to parallelise in this way. +#' +#' +#' @param raster A RasterBrick or RasterStack object. +#' @param shape A SpatialPolygons object. +#' @param fun The function used to aggregate the pixel data. If NULL, raw pixel data is returned. +#' @param id Name of column in shape object to be used to bind an ID column to output. +#' @param ... Other arguments to raster::extract. +#' +#' @return A data.frame with columns of polygon id, cell id (if fun = NULL) and a column for each raster in the stack +#' +#' @importFrom foreach %dopar% +#' @importFrom parallel stopCluster +#' @importFrom parallel makeCluster +#' @importFrom doParallel registerDoParallel +#' +#' @export +#' @examples +#' \dontrun{ +#' polygons <- list() +#' for(i in 1:100) { +#' row <- ceiling(i/10) +#' col <- ifelse(i %% 10 != 0, i %% 10, 10) +#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row +#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) +#' } +#' +#' polys <- do.call(raster::spPolygons, polygons) +#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) +#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) +#' +#' r <- raster::raster(ncol=20, nrow=20) +#' r <- raster::setExtent(r, raster::extent(spdf)) +#' r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) +#' r2 <- raster::raster(ncol=20, nrow=20) +#' r2 <- raster::setExtent(r2, raster::extent(spdf)) +#' r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) +#' cov_rasters <- raster::stack(r, r2) +#' +#' cl <- parallel::makeCluster(2) +#' doParallel::registerDoParallel(cl) +#' result <- parallelExtract(cov_rasters, spdf, fun = NULL, id = 'area_id') +#' parallel::stopCluster(cl) +#' foreach::registerDoSEQ() +#' } + +parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ + + shape[, id] <- as.character(shape[, id, drop = TRUE]) + + # Run extract in parallel. + values <- terra::extract(raster, terra::vect(shape), fun = fun, na.rm = TRUE, cells = TRUE, ...) + + if(!is.null(fun)){ + + # If a summary function was given, just bind everything together and add ID column + df <- values + if(inherits(shape, 'df')){ + df <- cbind(ID = as.data.frame(shape)[, id], df) + } else{ + df <- cbind(ID = names(shape), df) + id <- 'id' + } + names(df) <- c(id, names(raster)) + return(df) + + } else { + df <- values[, 2:(ncol(values) - 1)] + df <- cbind(as.data.frame(shape)[values$ID, id], values$cell, df) + names(df) <- c(id, 'cellid', names(raster)) + + return(df) + } + +} + + +#' Extract polygon id and response data into a data.frame from a SpatialPolygonsDataFrame +#' +#' Returns a data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the +#' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does +#' not exist), this column will contain NAs. +#' +#' @param shape A sf object containing response data. +#' @param id_var Name of column in shape object with the polygon id. Default 'area_id'. +#' @param response_var Name of column in shape object with the response data. Default 'response'. +#' @param sample_size_var For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data. Default NULL. +#' +#' @return A data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the +#' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does +#' not exist), this column will contain NAs. +#' +#' @export +#' @examples { +#' polygons <- list() +#' for(i in 1:100) { +#' row <- ceiling(i/10) +#' col <- ifelse(i %% 10 != 0, i %% 10, 10) +#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row +#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) +#' } +#' +#' polys <- do.call(raster::spPolygons, polygons) +#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) +#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) +#' +#' getPolygonData(spdf, id_var = 'area_id', response_var = 'response') +#' } +#' +#' + +getPolygonData <- function(shape, id_var = 'area_id', response_var = 'response', sample_size_var = NULL) { + + if(is.null(sample_size_var)) { + polygon_df <- shape[, c(id_var, response_var), drop = TRUE] + polygon_df$N <- rep(NA, nrow(polygon_df)) + } else { + polygon_df <- shape[, c(id_var, response_var, sample_size_var), drop = TRUE] + } + + names(polygon_df) <- c('area_id', 'response', 'N') + + return(polygon_df) +} + + +#' Get a RasterStack of covariates from a folder containing .tif files +#' +#' Looks in a specified folder for raster files. Returns a RasterStack of the rasters cropped to the extent specified by the shape parameter. +#' +#' @param directory Filepath to the directory containing the rasters. +#' @param file_pattern Pattern the filenames must match. Default is all files ending in .tif . +#' @param shape An object with an extent that the rasters will be cropped to. +#' +#' @return A RasterStack of the raster files in the directory +#' +#' @export +#' @examples +#' \dontrun{ +#' getCovariateRasters('/home/rasters', '.tif$', shape) +#' } +#' + +getCovariateRasters <- function(directory, file_pattern = '.tif$', shape) { + + stopifnot(dir.exists(directory)) + + covariate_files <- list.files(directory, pattern = file_pattern, full.names = TRUE) + stopifnot(length(covariate_files) != 0) + + covariate_rasters <- lapply(covariate_files, function(x) raster::raster(x)) + covariate_stack <- raster::stack(covariate_rasters) + + covariate_stack <- raster::crop(covariate_stack, shape) + + return(covariate_stack) +} + +# Extract coordinates from raster to use constructing the INLA mesh +# +# @param cov_rasters RasterStack of the covariate rasters. +# @param selectIds numeric vector containing cell ids to retain. Default NULL retains all cell ids in the covariate rasters. +# +# @return A matrix containing the coordinates used to make the mesh + +extractCoordsForMesh <- function(cov_rasters, selectIds = NULL) { + + stopifnot(inherits(cov_rasters, 'SpatRaster')) + if(!is.null(selectIds)) stopifnot(inherits(selectIds, 'numeric')) + + points_raster <- cov_rasters[[1]] + points_raster[is.na(terra::values(points_raster, mat = FALSE))] <- -9999 + raster_pts <- terra::as.points(points_raster) + coords <- terra::crds(raster_pts) + + # If specified, only retain certain pixel ids + if(!is.null(selectIds)) { + coords <- coords[selectIds, ] + } + + return(coords) + +} + + + diff --git a/R/fit_model.R b/R/fit_model.R index 28df455..fe99c35 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -1,3 +1,4 @@ +<<<<<<< HEAD #' Fit the disaggregation model #' #' \emph{fit_model} function takes a \emph{disag_data} object created by @@ -358,9 +359,9 @@ make_model_object <- function(data, } # Construct sensible default field hyperpriors - limits <- sp::bbox(data$polygon_shapefile) - hypontenuse <- sqrt((limits[1,2] - limits[1,1])^2 + (limits[2,2] - limits[2,1])^2) - prior_rho <- hypontenuse/3 + limits <- sf::st_bbox(data$x) + hypotenuse <- sqrt((limits$xmax - limits$xmin)^2 + (limits$ymax - limits$ymin)^2) + prior_rho <- hypotenuse/3 prior_sigma <- sd(data$polygon_data$response/mean(data$polygon_data$response)) From be450ede41948c817355be179c668499b062d4f7 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Wed, 13 Sep 2023 14:42:14 +0100 Subject: [PATCH 21/55] migrate prepare_data to terra/sf --- R/extract.R | 301 +++++++++++++++++------------------------------ R/matching.R | 106 ++++++++--------- R/prepare_data.R | 183 ++++++++++++++-------------- 3 files changed, 256 insertions(+), 334 deletions(-) diff --git a/R/extract.R b/R/extract.R index 808f0cb..dcf704f 100644 --- a/R/extract.R +++ b/R/extract.R @@ -1,192 +1,109 @@ -#' Parallel extraction of raster stack by shape file. -#' -#' Parallelisation is performed across rasters, not shapes. -#' So this function is only useful if you are extracting -#' data from many raster layers. -#' As the overhead for parallel computation in windows is high -#' it only makes sense to parallelise in this way. -#' -#' -#' @param raster A RasterBrick or RasterStack object. -#' @param shape A SpatialPolygons object. -#' @param fun The function used to aggregate the pixel data. If NULL, raw pixel data is returned. -#' @param id Name of column in shape object to be used to bind an ID column to output. -#' @param ... Other arguments to raster::extract. -#' -#' @return A data.frame with columns of polygon id, cell id (if fun = NULL) and a column for each raster in the stack -#' -#' @importFrom foreach %dopar% -#' @importFrom parallel stopCluster -#' @importFrom parallel makeCluster -#' @importFrom doParallel registerDoParallel -#' -#' @export -#' @examples -#' \dontrun{ -#' polygons <- list() -#' for(i in 1:100) { -#' row <- ceiling(i/10) -#' col <- ifelse(i %% 10 != 0, i %% 10, 10) -#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } -#' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' -#' r <- raster::raster(ncol=20, nrow=20) -#' r <- raster::setExtent(r, raster::extent(spdf)) -#' r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) -#' r2 <- raster::raster(ncol=20, nrow=20) -#' r2 <- raster::setExtent(r2, raster::extent(spdf)) -#' r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) -#' cov_rasters <- raster::stack(r, r2) -#' -#' cl <- parallel::makeCluster(2) -#' doParallel::registerDoParallel(cl) -#' result <- parallelExtract(cov_rasters, spdf, fun = NULL, id = 'area_id') -#' parallel::stopCluster(cl) -#' foreach::registerDoSEQ() -#' } - -parallelExtract <- function(raster, shape, fun = mean, id = 'OBJECTID', ...){ - - shape[, id] <- as.character(shape[, id, drop = TRUE]) - - # Run extract in parallel. - values <- terra::extract(raster, terra::vect(shape), fun = fun, na.rm = TRUE, cells = TRUE, ...) - - if(!is.null(fun)){ - - # If a summary function was given, just bind everything together and add ID column - df <- values - if(inherits(shape, 'df')){ - df <- cbind(ID = as.data.frame(shape)[, id], df) - } else{ - df <- cbind(ID = names(shape), df) - id <- 'id' - } - names(df) <- c(id, names(raster)) - return(df) - - } else { - df <- values[, 2:(ncol(values) - 1)] - df <- cbind(as.data.frame(shape)[values$ID, id], values$cell, df) - names(df) <- c(id, 'cellid', names(raster)) - - return(df) - } - -} - - -#' Extract polygon id and response data into a data.frame from a SpatialPolygonsDataFrame -#' -#' Returns a data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the -#' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does -#' not exist), this column will contain NAs. -#' -#' @param shape A sf object containing response data. -#' @param id_var Name of column in shape object with the polygon id. Default 'area_id'. -#' @param response_var Name of column in shape object with the response data. Default 'response'. -#' @param sample_size_var For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data. Default NULL. -#' -#' @return A data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the -#' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does -#' not exist), this column will contain NAs. -#' -#' @export -#' @examples { -#' polygons <- list() -#' for(i in 1:100) { -#' row <- ceiling(i/10) -#' col <- ifelse(i %% 10 != 0, i %% 10, 10) -#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } -#' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' -#' getPolygonData(spdf, id_var = 'area_id', response_var = 'response') -#' } -#' -#' - -getPolygonData <- function(shape, id_var = 'area_id', response_var = 'response', sample_size_var = NULL) { - - if(is.null(sample_size_var)) { - polygon_df <- shape[, c(id_var, response_var), drop = TRUE] - polygon_df$N <- rep(NA, nrow(polygon_df)) - } else { - polygon_df <- shape[, c(id_var, response_var, sample_size_var), drop = TRUE] - } - - names(polygon_df) <- c('area_id', 'response', 'N') - - return(polygon_df) -} - - -#' Get a RasterStack of covariates from a folder containing .tif files -#' -#' Looks in a specified folder for raster files. Returns a RasterStack of the rasters cropped to the extent specified by the shape parameter. -#' -#' @param directory Filepath to the directory containing the rasters. -#' @param file_pattern Pattern the filenames must match. Default is all files ending in .tif . -#' @param shape An object with an extent that the rasters will be cropped to. -#' -#' @return A RasterStack of the raster files in the directory -#' -#' @export -#' @examples -#' \dontrun{ -#' getCovariateRasters('/home/rasters', '.tif$', shape) -#' } -#' - -getCovariateRasters <- function(directory, file_pattern = '.tif$', shape) { - - stopifnot(dir.exists(directory)) - - covariate_files <- list.files(directory, pattern = file_pattern, full.names = TRUE) - stopifnot(length(covariate_files) != 0) - - covariate_rasters <- lapply(covariate_files, function(x) raster::raster(x)) - covariate_stack <- raster::stack(covariate_rasters) - - covariate_stack <- raster::crop(covariate_stack, shape) - - return(covariate_stack) -} - -# Extract coordinates from raster to use constructing the INLA mesh -# -# @param cov_rasters RasterStack of the covariate rasters. -# @param selectIds numeric vector containing cell ids to retain. Default NULL retains all cell ids in the covariate rasters. -# -# @return A matrix containing the coordinates used to make the mesh - -extractCoordsForMesh <- function(cov_rasters, selectIds = NULL) { - - stopifnot(inherits(cov_rasters, 'SpatRaster')) - if(!is.null(selectIds)) stopifnot(inherits(selectIds, 'numeric')) - - points_raster <- cov_rasters[[1]] - points_raster[is.na(terra::values(points_raster, mat = FALSE))] <- -9999 - raster_pts <- terra::as.points(points_raster) - coords <- terra::crds(raster_pts) - - # If specified, only retain certain pixel ids - if(!is.null(selectIds)) { - coords <- coords[selectIds, ] - } - - return(coords) - -} - - - +#' Extract polygon id and response data into a data.frame from a SpatialPolygonsDataFrame +#' +#' Returns a data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the +#' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does +#' not exist), this column will contain NAs. +#' +#' @param shape A sf object containing response data. +#' @param id_var Name of column in shape object with the polygon id. Default 'area_id'. +#' @param response_var Name of column in shape object with the response data. Default 'response'. +#' @param sample_size_var For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data. Default NULL. +#' +#' @return A data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the +#' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does +#' not exist), this column will contain NAs. +#' +#' @export +#' @examples { +#' polygons <- list() +#' for(i in 1:100) { +#' row <- ceiling(i/10) +#' col <- ifelse(i %% 10 != 0, i %% 10, 10) +#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row +#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) +#' } +#' +#' polys <- do.call(raster::spPolygons, polygons) +#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) +#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) +#' +#' getPolygonData(spdf, id_var = 'area_id', response_var = 'response') +#' } +#' +#' + +getPolygonData <- function(shape, id_var = 'area_id', response_var = 'response', sample_size_var = NULL) { + + if(is.null(sample_size_var)) { + polygon_df <- shape[, c(id_var, response_var), drop = TRUE] + polygon_df$N <- rep(NA, nrow(polygon_df)) + } else { + polygon_df <- shape[, c(id_var, response_var, sample_size_var), drop = TRUE] + } + + names(polygon_df) <- c('area_id', 'response', 'N') + + return(polygon_df) +} + + +#' Get a RasterStack of covariates from a folder containing .tif files +#' +#' Looks in a specified folder for raster files. Returns a RasterStack of the rasters cropped to the extent specified by the shape parameter. +#' +#' @param directory Filepath to the directory containing the rasters. +#' @param file_pattern Pattern the filenames must match. Default is all files ending in .tif . +#' @param shape An object with an extent that the rasters will be cropped to. +#' +#' @return A RasterStack of the raster files in the directory +#' +#' @export +#' @examples +#' \dontrun{ +#' getCovariateRasters('/home/rasters', '.tif$', shape) +#' } +#' + +getCovariateRasters <- function(directory, file_pattern = '.tif$', shape) { + + stopifnot(dir.exists(directory)) + + covariate_files <- list.files(directory, pattern = file_pattern, full.names = TRUE) + stopifnot(length(covariate_files) != 0) + + covariate_rasters <- lapply(covariate_files, function(x) raster::raster(x)) + covariate_stack <- raster::stack(covariate_rasters) + + covariate_stack <- raster::crop(covariate_stack, shape) + + return(covariate_stack) +} + +# Extract coordinates from raster to use constructing the INLA mesh +# +# @param cov_rasters RasterStack of the covariate rasters. +# @param selectIds numeric vector containing cell ids to retain. Default NULL retains all cell ids in the covariate rasters. +# +# @return A matrix containing the coordinates used to make the mesh + +extractCoordsForMesh <- function(cov_rasters, selectIds = NULL) { + + stopifnot(inherits(cov_rasters, 'SpatRaster')) + if(!is.null(selectIds)) stopifnot(inherits(selectIds, 'numeric')) + + points_raster <- cov_rasters[[1]] + points_raster[is.na(terra::values(points_raster, mat = FALSE))] <- -9999 + raster_pts <- terra::as.points(points_raster) + coords <- terra::crds(raster_pts) + + # If specified, only retain certain pixel ids + if(!is.null(selectIds)) { + coords <- coords[selectIds, ] + } + + return(coords) + +} + + + diff --git a/R/matching.R b/R/matching.R index 18684c3..615b5fc 100644 --- a/R/matching.R +++ b/R/matching.R @@ -1,53 +1,53 @@ -#' Function to match pixels to their corresponding polygon -#' -#' From the covariate data and polygon data, the function matches the polygon id between the two to find -#' which pixels from the covariate data are contained in each of the polygons. -#' -#' Takes a data.frame containing the covariate data with a polygon id column and one column for each covariate, -#' and another data.frame containing polygon data with a polygon id, response and sample size column (as returned -#' by \code{getPolygonData} function). -#' -#' Returns a matrix with two columns and one row for each polygon. The first column is the index of the first row in -#' covariate data that corresponds to that polygon, the second column is the index of the last row in -#' covariate data that corresponds to that polygon. -#' -#' @param covariates data.frame with each covariate as a column an and id column. -#' @param polygon_data data.frame with polygon id and response data. -#' @param id_var string with the name of the column in the covariate data.frame containing the polygon id. -#' -#' @return A matrix with two columns and one row for each polygon. The first column is the index of the first row in -#' covariate data that corresponds to that polygon, the second column is the index of the last row in -#' covariate data that corresponds to that polygon. -#' -#' @name getStartendindex -#' -#' @examples { -#' covs <- data.frame(area_id = c(1, 1, 1, 2, 2, 3, 3, 3, 3), response = c(3, 9, 5, 2, 3, 6, 7, 3, 5)) -#' response <- data.frame(area_id = c(1, 2, 3), response = c(4, 7, 2), N = c(NA, NA, NA)) -#' getStartendindex(covs, response, 'area_id') -#' } -#' -#' -#' @export - -getStartendindex <- function(covariates, polygon_data, id_var = 'area_id') { - - stopifnot(ncol(polygon_data) == 3) - stopifnot(ncol(covariates) >= 2) - stopifnot(nrow(covariates) > nrow(polygon_data)) - stopifnot(sum(polygon_data$area_id %in% covariates[, id_var]) == nrow(polygon_data)) - - # Create startendindex matrix - # This defines which pixels in the matrix are associated with which polygon. - startendindex <- lapply(unique(covariates[, id_var]), function(x) range(which(covariates[, id_var] == x))) - - startendindex <- do.call(rbind, startendindex) - - whichindices <- match(polygon_data$area_id, unique(covariates[, id_var])) - - # c++ is zero indexed. - startendindex <- startendindex[whichindices, ] - 1L - - return(startendindex) -} - +#' Function to match pixels to their corresponding polygon +#' +#' From the covariate data and polygon data, the function matches the polygon id between the two to find +#' which pixels from the covariate data are contained in each of the polygons. +#' +#' Takes a data.frame containing the covariate data with a polygon id column and one column for each covariate, +#' and another data.frame containing polygon data with a polygon id, response and sample size column (as returned +#' by \code{getPolygonData} function). +#' +#' Returns a matrix with two columns and one row for each polygon. The first column is the index of the first row in +#' covariate data that corresponds to that polygon, the second column is the index of the last row in +#' covariate data that corresponds to that polygon. +#' +#' @param covariates data.frame with each covariate as a column an and id column. +#' @param polygon_data data.frame with polygon id and response data. +#' @param id_var string with the name of the column in the covariate data.frame containing the polygon id. +#' +#' @return A matrix with two columns and one row for each polygon. The first column is the index of the first row in +#' covariate data that corresponds to that polygon, the second column is the index of the last row in +#' covariate data that corresponds to that polygon. +#' +#' @name getStartendindex +#' +#' @examples { +#' covs <- data.frame(area_id = c(1, 1, 1, 2, 2, 3, 3, 3, 3), response = c(3, 9, 5, 2, 3, 6, 7, 3, 5)) +#' response <- data.frame(area_id = c(1, 2, 3), response = c(4, 7, 2), N = c(NA, NA, NA)) +#' getStartendindex(covs, response, 'area_id') +#' } +#' +#' +#' @export + +getStartendindex <- function(covariates, polygon_data, id_var = 'area_id') { + + stopifnot(ncol(polygon_data) == 3) + stopifnot(ncol(covariates) >= 2) + stopifnot(nrow(covariates) > nrow(polygon_data)) + stopifnot(sum(polygon_data$area_id %in% covariates[, id_var]) == nrow(polygon_data)) + + # Create startendindex matrix + # This defines which pixels in the matrix are associated with which polygon. + startendindex <- lapply(unique(covariates[, id_var]), function(x) range(which(covariates[, id_var] == x))) + + startendindex <- do.call(rbind, startendindex) + + whichindices <- terra::match(polygon_data$area_id, unique(covariates[, id_var])) + + # c++ is zero indexed. + startendindex <- startendindex[whichindices, ] - 1L + + return(startendindex) +} + diff --git a/R/prepare_data.R b/R/prepare_data.R index 94b98ff..3c40d1e 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -1,31 +1,31 @@ #' Prepare data for disaggregation modelling -#' -#' \emph{prepare_data} function is used to extract all the data required for fitting a disaggregation model. +#' +#' \emph{prepare_data} function is used to extract all the data required for fitting a disaggregation model. #' Designed to be used in the \emph{disaggregation::fit_model} function. -#' -#' Takes a SpatialPolygonDataFrame with the response data and a RasterStack of covariates. -#' -#' Extract the values of the covariates (as well as the aggregation raster, if given) at each pixel within the polygons -#' (\emph{parallelExtract} function). This is done in parallel and \emph{n.cores} argument is used to set the number of cores +#' +#' Takes a SpatialPolygonDataFrame with the response data and a RasterStack of covariates. +#' +#' Extract the values of the covariates (as well as the aggregation raster, if given) at each pixel within the polygons +#' (\emph{parallelExtract} function). This is done in parallel and \emph{n.cores} argument is used to set the number of cores #' to use for covariate extraction. This can be the number of covariates used in the model. -#' -#' The aggregation raster defines how the pixels within each polygon are aggregated. +#' +#' The aggregation raster defines how the pixels within each polygon are aggregated. #' The disaggregation model performs a weighted sum of the pixel prediction, weighted by the pixel values in the aggregation raster. -#' For disease incidence rate you use the population raster to aggregate pixel incidence rate by summing the number of cases -#' (rate weighted by population). If no aggregation raster is provided a uniform distribution is assumed, i.e. the pixel predictions +#' For disease incidence rate you use the population raster to aggregate pixel incidence rate by summing the number of cases +#' (rate weighted by population). If no aggregation raster is provided a uniform distribution is assumed, i.e. the pixel predictions #' are aggregated to polygon level by summing the pixel values. -#' -#' Makes a matrix that contains the start and end pixel index for each polygon. Builds an INLA mesh to use for the spatial field +#' +#' Makes a matrix that contains the start and end pixel index for each polygon. Builds an INLA mesh to use for the spatial field #' (\emph{getStartendindex} function). -#' -#' The \emph{mesh.args} argument allows you to supply a list of INLA mesh parameters to control the mesh used for the spatial field +#' +#' The \emph{mesh.args} argument allows you to supply a list of INLA mesh parameters to control the mesh used for the spatial field #' (\emph{build_mesh} function). -#' -#' The \emph{na.action} flag is automatically off. If there are any NAs in the response or covariate data within the polygons the -#' \emph{prepare_data} method will error. Ideally the NAs in the data would be dealt with beforehand, however, setting na.action = TRUE -#' will automatically deal with NAs. It removes any polygons that have NAs as a response, sets any aggregation pixels with NA to zero +#' +#' The \emph{na.action} flag is automatically off. If there are any NAs in the response or covariate data within the polygons the +#' \emph{prepare_data} method will error. Ideally the NAs in the data would be dealt with beforehand, however, setting na.action = TRUE +#' will automatically deal with NAs. It removes any polygons that have NAs as a response, sets any aggregation pixels with NA to zero #' and sets covariate NAs pixels to the median value for the that covariate. -#' +#' #' @param x sf object containing at least three columns: one with the geometried, one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}). #' @param covariate_rasters RasterStack of covariate rasters to be used in the model. #' @param aggregation_raster Raster to aggregate pixel level predictions to polygon level e.g. population to aggregate prevalence. If this is not supplied a uniform raster will be used. @@ -37,11 +37,11 @@ #' @param makeMesh logical. If TRUE, build INLA mesh, takes some time. Default TRUE. #' @param ncores Number of cores used to perform covariate extraction. #' -#' @return A list is returned of class \code{disag_data}. -#' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. +#' @return A list is returned of class \code{disag_data}. +#' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. #' The list of class \code{disag_data} contains: -#' \item{x }{The sf object used as an input.} -#' \item{covariate_rasters }{The SpatRaster used as an input.} +#' \item{x }{The sf object used as an input.} +#' \item{covariate_rasters }{The SpatRaster used as an input.} #' \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} #' \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} #' \item{aggregation_pixels }{An array with the value of the aggregation raster for each pixel in the same order as the rows of \emph{covariate_data}.} @@ -53,43 +53,46 @@ #' @import utils #' @name prepare_data #' -#' @examples +#' @examples #' \donttest{ -#' polygons <- list() -#' for(i in 1:100) { +#' polygons <- list() +#' for(i in 1:100) { #' row <- ceiling(i/10) #' col <- ifelse(i %% 10 != 0, i %% 10, 10) #' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } -#' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' -#' r <- raster::raster(ncol=20, nrow=20) -#' r <- raster::setExtent(r, raster::extent(spdf)) -#' r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) -#' r2 <- raster::raster(ncol=20, nrow=20) -#' r2 <- raster::setExtent(r2, raster::extent(spdf)) -#' r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) -#' cov_rasters <- raster::stack(r, r2) -#' -#' test_data <- prepare_data(x = spdf, -#' covariate_rasters = cov_rasters) -#' } -#' +#' polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), +#' c(ymax, ymax, ymin, ymin, ymax))) +#' } +#' +#' polys <- lapply(polygons,sf::st_polygon) +#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) +#' spdf <- sf::st_sf(response_df,geometry=polys) +#' +#' plot(spdf) +#' +#' r <- terra::rast(nrow=20,ncol=20) +#' terra::ext(r) <- terra::ext(spdf) +#' r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) +#' +#' r2 <- terra::rast(nrow=20,ncol=20) +#' terra::ext(r2) <- terra::ext(spdf) +#' r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) +#' cov_rasters <- c(r, r2) +#' test_data <- prepare_data(x = spdf, +#' covariate_rasters = cov_rasters) +#' } +#' #' @export -#' -#' +#' +#' -prepare_data <- function(x, +prepare_data <- function(x, covariate_rasters, aggregation_raster = NULL, - id_var = 'area_id', - response_var = 'response', + id_var = 'area_id', + response_var = 'response', sample_size_var = NULL, - mesh.args = NULL, + mesh.args = NULL, na.action = FALSE, makeMesh = TRUE, ncores = 2) { @@ -100,7 +103,7 @@ prepare_data <- function(x, stopifnot(inherits(id_var, 'character')) stopifnot(inherits(response_var, 'character')) if(!is.null(mesh.args)) stopifnot(inherits(mesh.args, 'list')) - + # Check for NAs in response data na_rows <- is.na(x[, response_var, drop = TRUE]) if(sum(na_rows) != 0) { @@ -110,9 +113,9 @@ prepare_data <- function(x, stop('There are NAs in the response data. Please deal with these, or set na.action = TRUE') } } - + polygon_data <- getPolygonData(x, id_var, response_var, sample_size_var) - + # Save raster layer names so we can reassign it to make sure names don't change. cov_names <- names(covariate_rasters) @@ -124,18 +127,20 @@ prepare_data <- function(x, } names(aggregation_raster) <- 'aggregation_raster' - + covariate_rasters <- c(covariate_rasters, aggregation_raster) - covariate_data <- parallelExtract(covariate_rasters, x, fun = NULL, id = id_var) + #covariate_data <- parallelExtract(covariate_rasters, x, fun = NULL, id = id_var) + covariate_data <- terra::extract(covariate_rasters, x, cells=TRUE, na.rm=TRUE, ID=TRUE) + names(covariate_data)[1] <- id_var # Remove the aggregation raster - covariate_rasters <- covariate_rasters[[seq(nlyr(covariate_rasters) - 1)]] - + covariate_rasters <- covariate_rasters[[seq(terra::nlyr(covariate_rasters) - 1)]] + names(covariate_rasters) <- cov_names - - aggregation_pixels <- as.numeric(covariate_data[ , ncol(covariate_data)]) - covariate_data <- covariate_data[, -ncol(covariate_data)] - + + aggregation_pixels <- as.numeric(covariate_data[ , terra::ncol(covariate_data)]) + covariate_data <- covariate_data[, -terra::ncol(covariate_data)] + # Check for NAs in population data if(sum(is.na(aggregation_pixels)) != 0) { if(na.action) { @@ -144,7 +149,7 @@ prepare_data <- function(x, stop('There are NAs in the aggregation rasters within polygons. Please deal with these, or set na.action = TRUE') } } - + # Check for NAs in covariate data if(sum(is.na(covariate_data)) != 0) { if(na.action) { @@ -153,13 +158,13 @@ prepare_data <- function(x, stop('There are NAs in the covariate rasters within polygons. Please deal with these, or set na.action = TRUE') } } - + coordsForFit <- extractCoordsForMesh(covariate_rasters, selectIds = covariate_data$cellid) - + coordsForPrediction <- extractCoordsForMesh(covariate_rasters) - + startendindex <- getStartendindex(covariate_data, polygon_data, id_var = id_var) - + if(makeMesh) { if(!requireNamespace('INLA', quietly = TRUE)) { mesh <- NULL @@ -171,7 +176,7 @@ prepare_data <- function(x, mesh <- NULL message("A mesh is not being built. You will not be able to run a spatial model without a mesh.") } - + disag_data <- list(x = x, shapefile_names = list(id_var = id_var, response_var = response_var), covariate_rasters = covariate_rasters, @@ -182,16 +187,16 @@ prepare_data <- function(x, coordsForPrediction = coordsForPrediction, startendindex = startendindex, mesh = mesh) - + class(disag_data) <- c('disag_data', 'list') - + return(disag_data) - + } #' Function to fit the disaggregation model #' -#' @param x SpatialPolygonDataFrame containing the response data +#' @param x SpatialPolygonDataFrame containing the response data #' @param shapefile_names List of 2: polygon id variable name and response variable name from x #' @param covariate_rasters RasterStack of covariates #' @param polygon_data data.frame with two columns: polygon id and response @@ -201,12 +206,12 @@ prepare_data <- function(x, #' @param coordsForPrediction coordinates of the covariate data points in the whole raster extent #' @param startendindex matrix containing the start and end index for each polygon #' @param mesh inla.mesh object to use in the fit -#' -#' @return A list is returned of class \code{disag_data}. -#' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. +#' +#' @return A list is returned of class \code{disag_data}. +#' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. #' The list of class \code{disag_data} contains: -#' \item{x }{The SpatialPolygonDataFrame used as an input.} -#' \item{covariate_rasters }{The RasterStack used as an input.} +#' \item{x }{The SpatialPolygonDataFrame used as an input.} +#' \item{covariate_rasters }{The RasterStack used as an input.} #' \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} #' \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} #' \item{aggregation_pixels }{An array with the value of the aggregation raster for each pixel in the same order as the rows of \emph{covariate_data}.} @@ -216,21 +221,21 @@ prepare_data <- function(x, #' \item{mesh }{A INLA mesh to be used for the spatial field of the disaggregation model.} #' #' @name as.disag_data -#' +#' #' @export -as.disag_data <- function(x, +as.disag_data <- function(x, shapefile_names, - covariate_rasters, - polygon_data, - covariate_data, + covariate_rasters, + polygon_data, + covariate_data, aggregation_pixels, - coordsForFit, + coordsForFit, coordsForPrediction, - startendindex, + startendindex, mesh = NULL) { - + stopifnot(inherits(x, 'SpatialPolygonsDataFrame')) stopifnot(inherits(shapefile_names, 'list')) stopifnot(inherits(covariate_rasters, c('RasterBrick', 'RasterStack'))) @@ -243,7 +248,7 @@ as.disag_data <- function(x, if(!is.null(mesh)) { stopifnot(inherits(mesh, 'inla.mesh')) } - + disag_data <- list(x = x, shapefile_names = shapefile_names, covariate_rasters = covariate_rasters, @@ -254,8 +259,8 @@ as.disag_data <- function(x, coordsForPrediction = coordsForPrediction, startendindex = startendindex, mesh = mesh) - + class(disag_data) <- c('disag_data', 'list') - + return(disag_data) } From b2bff700001a895865222110498a7cae0e48b29b Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Wed, 13 Sep 2023 17:34:58 +0100 Subject: [PATCH 22/55] more terra migration --- R/extract.R | 32 +++++++++++++++++--------------- R/plotting.R | 10 ++++------ R/predict.R | 12 +++++++----- R/prepare_data.R | 4 +--- 4 files changed, 29 insertions(+), 29 deletions(-) diff --git a/R/extract.R b/R/extract.R index dcf704f..c704f2c 100644 --- a/R/extract.R +++ b/R/extract.R @@ -15,17 +15,18 @@ #' #' @export #' @examples { -#' polygons <- list() -#' for(i in 1:100) { -#' row <- ceiling(i/10) -#' col <- ifelse(i %% 10 != 0, i %% 10, 10) -#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } +#' polygons <- list() +#' for(i in 1:100) { +#' row <- ceiling(i/10) +#' col <- ifelse(i %% 10 != 0, i %% 10, 10) +#' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row +#' polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), +#' c(ymax, ymax, ymin, ymin, ymax))) +#' } #' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) +#' polys <- lapply(polygons,sf::st_polygon) +#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) +#' spdf <- sf::st_sf(response_df,geometry=polys) #' #' getPolygonData(spdf, id_var = 'area_id', response_var = 'response') #' } @@ -49,13 +50,13 @@ getPolygonData <- function(shape, id_var = 'area_id', response_var = 'response', #' Get a RasterStack of covariates from a folder containing .tif files #' -#' Looks in a specified folder for raster files. Returns a RasterStack of the rasters cropped to the extent specified by the shape parameter. +#' Looks in a specified folder for raster files. Returns a multi-layered SpatRaster of the rasters cropped to the extent specified by the shape parameter. #' #' @param directory Filepath to the directory containing the rasters. #' @param file_pattern Pattern the filenames must match. Default is all files ending in .tif . #' @param shape An object with an extent that the rasters will be cropped to. #' -#' @return A RasterStack of the raster files in the directory +#' @return A multi-layered SpatRaster of the raster files in the directory #' #' @export #' @examples @@ -71,10 +72,11 @@ getCovariateRasters <- function(directory, file_pattern = '.tif$', shape) { covariate_files <- list.files(directory, pattern = file_pattern, full.names = TRUE) stopifnot(length(covariate_files) != 0) - covariate_rasters <- lapply(covariate_files, function(x) raster::raster(x)) - covariate_stack <- raster::stack(covariate_rasters) + covariate_rasters <- lapply(covariate_files, function(x) terra::rast(x)) + covariate_stack <- terra::rast(covariate_rasters) - covariate_stack <- raster::crop(covariate_stack, shape) + covariate_stack <- terra::crop(covariate_stack, shape) + covariate_stack <- terra::mask(covariate_stack, shape) return(covariate_stack) } diff --git a/R/plotting.R b/R/plotting.R index 02f36e2..e87f645 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -2,7 +2,6 @@ #' #' Plotting function for class \emph{disag_data} (the input data for disaggregation). #' ->>>>>>> Fix plot.disag_data #' Produces three plots: polygon response data, covariate rasters and INLA mesh. #' #' @param x Object of class \emph{disag_data} to be plotted. @@ -28,7 +27,7 @@ plot.disag_data <- function(x, which = c(1,2,3), ...) { if(2 %in% which) { stopifnot(inherits(x$covariate_rasters, c('SpatRaster'))) - plots$covariates <- sp::spplot(x$covariate_rasters) + plots$covariates <- plot(x$covariate_rasters) titles <- c(titles, 'Covariate rasters') } @@ -70,7 +69,7 @@ plot.disag_model <- function(x, ...){ posteriors$type <- ifelse(posteriors$fixed, 'Slope', 'Other') # Check name lengths match before substituting. - lengths_match <- raster::nlayers(x$data$covariate_rasters) == sum(posteriors$fixed) + lengths_match <- terra::nlyr(x$data$covariate_rasters) == sum(posteriors$fixed) if(lengths_match){ posteriors$parameter[grepl('slope', posteriors$parameter)] <- names(x$data$covariate_rasters) } @@ -133,10 +132,10 @@ plot.disag_model <- function(x, ...){ plot.disag_prediction <- function(x, ...) { - rasters_to_plot <- raster::stack(x$mean_prediction$prediction, x$uncertainty_prediction$predictions_ci) + rasters_to_plot <- terra::rast(list(x$mean_prediction$prediction, x$uncertainty_prediction$predictions_ci)) names(rasters_to_plot) <- c('mean prediction', 'lower CI', 'upper CI') - plots <- sp::spplot(rasters_to_plot) + plots <- plot(rasters_to_plot) print(plots) @@ -163,7 +162,6 @@ plot_polygon_data <- function(x, names) { area_id <- long <- lat <- group <- response <- NULL stopifnot(inherits(shp, 'sf')) - shp <- dplyr::mutate(shp, area_id = as.character(area_id)) p <- ggplot(shp, aes(fill = response)) + diff --git a/R/predict.R b/R/predict.R index 5033bfd..0aeeedf 100644 --- a/R/predict.R +++ b/R/predict.R @@ -185,10 +185,12 @@ predict_uncertainty <- function(model_output, newdata = NULL, predict_iid = FALS predictions[[r]] <- prediction_result$prediction } - predictions <- raster::stack(predictions) + predictions <- terra::rast(predictions) probs <- c((1 - CI) / 2, 1 - (1 - CI) / 2) - predictions_ci <- raster::calc(predictions, function(x) stats::quantile(x, probs = probs, na.rm = TRUE)) + predictions_ci <- terra::app(predictions, function(x) stats::quantile(x, probs = probs, na.rm = TRUE)) + + names(predictions_ci) <- c('lower CI', 'upper CI') uncertainty <- list(realisations = predictions, @@ -208,9 +210,9 @@ predict_uncertainty <- function(model_output, newdata = NULL, predict_iid = FALS getCoords <- function(data) { points_raster <- data$covariate_rasters[[1]] - points_raster[is.na(points_raster)] <- -9999 - raster_pts <- raster::rasterToPoints(points_raster, spatial = TRUE) - coords <- raster_pts@coords + points_raster[is.na(terra::values(points_raster, mat = FALSE))] <- -9999 + raster_pts <- terra::as.points(points_raster) + coords <- terra::crds(raster_pts) return(coords) } diff --git a/R/prepare_data.R b/R/prepare_data.R index 3c40d1e..eeb4a65 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -68,8 +68,6 @@ #' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) #' spdf <- sf::st_sf(response_df,geometry=polys) #' -#' plot(spdf) -#' #' r <- terra::rast(nrow=20,ncol=20) #' terra::ext(r) <- terra::ext(spdf) #' r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) @@ -78,6 +76,7 @@ #' terra::ext(r2) <- terra::ext(spdf) #' r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) #' cov_rasters <- c(r, r2) +#' #' test_data <- prepare_data(x = spdf, #' covariate_rasters = cov_rasters) #' } @@ -129,7 +128,6 @@ prepare_data <- function(x, covariate_rasters <- c(covariate_rasters, aggregation_raster) - #covariate_data <- parallelExtract(covariate_rasters, x, fun = NULL, id = id_var) covariate_data <- terra::extract(covariate_rasters, x, cells=TRUE, na.rm=TRUE, ID=TRUE) names(covariate_data)[1] <- id_var From 91d7cad0f6a9eb742df6cb4ca1414d26e3c62b4f Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Fri, 15 Sep 2023 12:27:04 +0100 Subject: [PATCH 23/55] migrate test-extract and minor git fixes --- DESCRIPTION | 1 - NAMESPACE | 1 - R/extract.R | 4 +- R/fit_model.R | 1 - tests/testthat/test-extract.R | 112 +++++++++++++--------------------- 5 files changed, 44 insertions(+), 75 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 3001c52..873a8bd 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -45,4 +45,3 @@ LinkingTo: RcppEigen SystemRequirements: GNU make VignetteBuilder: knitr ->>>>>>> Change imports in description to match new backends. diff --git a/NAMESPACE b/NAMESPACE index b298c2b..bd927c7 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -18,7 +18,6 @@ export(getCovariateRasters) export(getPolygonData) export(getStartendindex) export(make_model_object) -export(parallelExtract) export(predict_model) export(predict_uncertainty) export(prepare_data) diff --git a/R/extract.R b/R/extract.R index c704f2c..6bf26b4 100644 --- a/R/extract.R +++ b/R/extract.R @@ -26,7 +26,7 @@ #' #' polys <- lapply(polygons,sf::st_polygon) #' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sf::st_sf(response_df,geometry=polys) +#' spdf <- sf::st_sf(response_df, geometry = polys) #' #' getPolygonData(spdf, id_var = 'area_id', response_var = 'response') #' } @@ -76,7 +76,7 @@ getCovariateRasters <- function(directory, file_pattern = '.tif$', shape) { covariate_stack <- terra::rast(covariate_rasters) covariate_stack <- terra::crop(covariate_stack, shape) - covariate_stack <- terra::mask(covariate_stack, shape) + #covariate_stack <- terra::mask(covariate_stack, shape) return(covariate_stack) } diff --git a/R/fit_model.R b/R/fit_model.R index fe99c35..b1fff72 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -1,4 +1,3 @@ -<<<<<<< HEAD #' Fit the disaggregation model #' #' \emph{fit_model} function takes a \emph{disag_data} object created by diff --git a/tests/testthat/test-extract.R b/tests/testthat/test-extract.R index 4a50ef5..8a0e253 100644 --- a/tests/testthat/test-extract.R +++ b/tests/testthat/test-extract.R @@ -10,115 +10,87 @@ for(i in 1:n_polygons) { row <- ceiling(i/n_polygon_per_side) col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) } -polys <- do.call(raster::spPolygons, polygons) +polys <- lapply(polygons,sf::st_polygon) N <- floor(runif(n_polygons, min = 1, max = 100)) response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) -spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -spdf_binom <- sp::SpatialPolygonsDataFrame(polys, response_binom_df) +spdf <- sf::st_sf(response_df, geometry = polys) +spdf_binom <- sf::st_sf(response_binom_df, geometry = polys) # Create raster stack -r <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r <- raster::setExtent(r, raster::extent(spdf)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r2 <- raster::setExtent(r2, raster::extent(spdf)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- raster::stack(r, r2) - -test_that("parallelExtract gives errors when it should", { - - skip_on_cran() - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - - expect_error(parallelExtract(spdf, cov_stack, fun = NULL, id = 'area_id')) - expect_error(parallelExtract(cov_stack, spdf, fun = NULL, id = 'id')) - - parallel::stopCluster(cl) - foreach::registerDoSEQ() -}) - -test_that("parallelExtract give the right form of output", { - - skip_on_cran() - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - cov_data <- parallelExtract(cov_stack, spdf, fun = NULL, id = 'area_id') - parallel::stopCluster(cl) - foreach::registerDoSEQ() - - expect_is(cov_data, 'data.frame') - expect_equal(sort(as.numeric(unique(cov_data$area_id))), spdf$area_id)# - expect_equal(ncol(cov_data), raster::nlayers(cov_stack) + 2)# - expect_equal(names(cov_stack), names(cov_data)[-c(1,2)])# - expect_equal(length(unique(cov_data$area_id)), length(spdf)) - -}) +r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r) <- terra::ext(spdf) +r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) +r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r2) <- terra::ext(spdf) +r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +cov_stack <- c(r, r2) test_that("getPolygonData function", { - + skip_on_cran() - + expect_error(getPolygonData(spdf, id_var = 'id', response_var = 'response')) expect_error(getPolygonData(spdf, id_var = 'area_id', response_var = 'data')) - + result <- getPolygonData(spdf, id_var = 'area_id', response_var = 'response') result_binom <- getPolygonData(spdf_binom, id_var = 'area_id', response_var = 'response', sample_size_var = 'sample_size') - + expect_is(result, 'data.frame') expect_equal(ncol(result), 3) expect_equal(nrow(result), nrow(spdf)) expect_equal(result$area_id, spdf$area_id) expect_equal(result$response, spdf$response) expect_equal(result$N, rep(NA, nrow(result))) - + expect_is(result_binom, 'data.frame') expect_equal(ncol(result_binom), 3) expect_equal(nrow(result_binom), nrow(spdf_binom)) expect_equal(result_binom$area_id, spdf_binom$area_id) expect_equal(result_binom$response, spdf_binom$response) expect_equal(result_binom$N, spdf_binom$sample_size) - + }) test_that("getCovariateData function gives errors when it should", { - + skip_on_cran() - + expect_error(getCovariateRasters('/home/rasters', '.tif$', spdf)) - + # Save .tif files in tempdir() - r <- raster::raster(ncol=20, nrow=20) - r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) - r2 <- raster::raster(ncol=20, nrow=20) - r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) - cov_stack <- raster::stack(r, r2) - raster::writeRaster(r, paste0(tempdir(), '/cov1.tif'), overwrite = TRUE) - raster::writeRaster(r2, paste0(tempdir(), '/cov2.tif'), overwrite = TRUE) - - expect_is(getCovariateRasters(tempdir(), '.tif$', spdf), 'RasterBrick') - + r <- terra::rast(ncol=20, nrow=20) + r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) + r2 <- terra::rast(ncol=20, nrow=20) + r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) + cov_stack <- c(r, r2) + terra::writeRaster(r, paste0(tempdir(), '/cov1.tif'), overwrite = TRUE) + terra::writeRaster(r2, paste0(tempdir(), '/cov2.tif'), overwrite = TRUE) + + expect_is(getCovariateRasters(tempdir(), '.tif$', spdf), 'SpatRaster') + }) test_that("extractCoordsForMesh function behaves as it should", { skip_on_cran() - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - cov_data <- parallelExtract(cov_stack, spdf, fun = NULL, id = 'area_id') - parallel::stopCluster(cl) - foreach::registerDoSEQ() - + + # cl <- parallel::makeCluster(2) + # doParallel::registerDoParallel(cl) + # cov_data <- parallelExtract(cov_stack, spdf, fun = NULL, id = ) + # parallel::stopCluster(cl) + # foreach::registerDoSEQ() + + cov_data <- terra::extract(cov_stack, spdf, cells=TRUE, na.rm=TRUE, ID=TRUE) + names(cov_data)[1] <- 'area_id' + result <- extractCoordsForMesh(cov_stack, cov_data$cellid) - + result2 <- extractCoordsForMesh(cov_stack) expect_error(extractCoordsForMesh(cov_data$cellid, cov_stack)) From ba3a522ff433fc3f582a833a7e3fe2a3789bbbd0 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Fri, 15 Sep 2023 16:15:05 +0100 Subject: [PATCH 24/55] migrate test-prepare-data and further migration in prepare_data --- R/build_mesh.R | 2 +- R/prepare_data.R | 34 +++++---- tests/testthat/test-prepare-data.R | 118 ++++++++++++++--------------- 3 files changed, 77 insertions(+), 77 deletions(-) diff --git a/R/build_mesh.R b/R/build_mesh.R index 0485ff2..7ad5017 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -64,7 +64,7 @@ build_mesh <- function(shapes, mesh.args = NULL) { pars[names(mesh.args)] <- mesh.args #outline <- maptools::unionSpatialPolygons(shapes_old, IDs = rep(1, length(shapes_old))) - outline <- st_sf(sf::st_union(sf::st_convex_hull(shapes))) + outline <- sf::st_sf(sf::st_union(sf::st_convex_hull(shapes))) coords <- sf::st_coordinates(outline)[, c('X', 'Y')] diff --git a/R/prepare_data.R b/R/prepare_data.R index eeb4a65..96e132c 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -85,7 +85,7 @@ #' #' -prepare_data <- function(x, +prepare_data <- function(polygon_shapefile, covariate_rasters, aggregation_raster = NULL, id_var = 'area_id', @@ -96,7 +96,7 @@ prepare_data <- function(x, makeMesh = TRUE, ncores = 2) { - stopifnot(inherits(x, 'sf')) + stopifnot(inherits(polygon_shapefile, 'sf')) stopifnot(inherits(covariate_rasters, 'SpatRaster')) if(!is.null(aggregation_raster)) stopifnot(inherits(aggregation_raster, 'SpatRaster')) stopifnot(inherits(id_var, 'character')) @@ -104,16 +104,16 @@ prepare_data <- function(x, if(!is.null(mesh.args)) stopifnot(inherits(mesh.args, 'list')) # Check for NAs in response data - na_rows <- is.na(x[, response_var, drop = TRUE]) + na_rows <- is.na(polygon_shapefile[, response_var, drop = TRUE]) if(sum(na_rows) != 0) { if(na.action) { - x <- x[!na_rows, ] + polygon_shapefile <- polygon_shapefile[!na_rows, ] } else { stop('There are NAs in the response data. Please deal with these, or set na.action = TRUE') } } - polygon_data <- getPolygonData(x, id_var, response_var, sample_size_var) + polygon_data <- getPolygonData(polygon_shapefile, id_var, response_var, sample_size_var) # Save raster layer names so we can reassign it to make sure names don't change. @@ -128,7 +128,7 @@ prepare_data <- function(x, covariate_rasters <- c(covariate_rasters, aggregation_raster) - covariate_data <- terra::extract(covariate_rasters, x, cells=TRUE, na.rm=TRUE, ID=TRUE) + covariate_data <- terra::extract(covariate_rasters, polygon_shapefile, cells=TRUE, na.rm=TRUE, ID=TRUE) names(covariate_data)[1] <- id_var # Remove the aggregation raster @@ -136,8 +136,9 @@ prepare_data <- function(x, names(covariate_rasters) <- cov_names - aggregation_pixels <- as.numeric(covariate_data[ , terra::ncol(covariate_data)]) - covariate_data <- covariate_data[, -terra::ncol(covariate_data)] + agg_filter <- names(covariate_data) %in% c('aggregation_raster') + aggregation_pixels <- as.numeric(covariate_data[ , agg_filter]) + covariate_data <- covariate_data[, !agg_filter] # Check for NAs in population data if(sum(is.na(aggregation_pixels)) != 0) { @@ -151,13 +152,14 @@ prepare_data <- function(x, # Check for NAs in covariate data if(sum(is.na(covariate_data)) != 0) { if(na.action) { - covariate_data[-c(1:2)] <- sapply(covariate_data[-c(1:2)], function(x) { x[is.na(x)] <- stats::median(x, na.rm = T); return(x) }) + cov_filter <- !(names(covariate_data) %in% c(id_var,'cell')) + covariate_data[ , cov_filter] <- sapply(covariate_data[ , cov_filter], function(x) { x[is.na(x)] <- stats::median(x, na.rm = T); return(x) }) } else { stop('There are NAs in the covariate rasters within polygons. Please deal with these, or set na.action = TRUE') } } - coordsForFit <- extractCoordsForMesh(covariate_rasters, selectIds = covariate_data$cellid) + coordsForFit <- extractCoordsForMesh(covariate_rasters, selectIds = covariate_data$cell) coordsForPrediction <- extractCoordsForMesh(covariate_rasters) @@ -168,14 +170,14 @@ prepare_data <- function(x, mesh <- NULL message("Cannot build mesh as INLA is not installed. If you need a spatial field in your model, you must install INLA.") } else { - mesh <- build_mesh(x, mesh.args) + mesh <- build_mesh(polygon_shapefile, mesh.args) } } else { mesh <- NULL message("A mesh is not being built. You will not be able to run a spatial model without a mesh.") } - disag_data <- list(x = x, + disag_data <- list(polygon_shapefile = polygon_shapefile, shapefile_names = list(id_var = id_var, response_var = response_var), covariate_rasters = covariate_rasters, polygon_data = polygon_data, @@ -223,7 +225,7 @@ prepare_data <- function(x, #' @export -as.disag_data <- function(x, +as.disag_data <- function(polygon_shapefile, shapefile_names, covariate_rasters, polygon_data, @@ -234,9 +236,9 @@ as.disag_data <- function(x, startendindex, mesh = NULL) { - stopifnot(inherits(x, 'SpatialPolygonsDataFrame')) + stopifnot(inherits(polygon_shapefile, 'sf')) stopifnot(inherits(shapefile_names, 'list')) - stopifnot(inherits(covariate_rasters, c('RasterBrick', 'RasterStack'))) + stopifnot(inherits(covariate_rasters, 'SpatRaster')) stopifnot(inherits(polygon_data, 'data.frame')) stopifnot(inherits(covariate_data, 'data.frame')) stopifnot(inherits(aggregation_pixels, 'numeric')) @@ -247,7 +249,7 @@ as.disag_data <- function(x, stopifnot(inherits(mesh, 'inla.mesh')) } - disag_data <- list(x = x, + disag_data <- list(polygon_shapefile = polygon_shapefile, shapefile_names = shapefile_names, covariate_rasters = covariate_rasters, polygon_data = polygon_data, diff --git a/tests/testthat/test-prepare-data.R b/tests/testthat/test-prepare-data.R index 7cdcee8..3702171 100644 --- a/tests/testthat/test-prepare-data.R +++ b/tests/testthat/test-prepare-data.R @@ -10,44 +10,45 @@ for(i in 1:n_polygons) { row <- ceiling(i/n_polygon_per_side) col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) } -polys <- do.call(raster::spPolygons, polygons) +polys <- lapply(polygons,sf::st_polygon) N <- floor(runif(n_polygons, min = 1, max = 100)) response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) response_na_df <- data.frame(area_id = 1:n_polygons, response = c(runif(n_polygons - 1, min = 0, max = 1000), NA)) response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) -spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -spdf_na <- sp::SpatialPolygonsDataFrame(polys, response_na_df) -spdf_binom <- sp::SpatialPolygonsDataFrame(polys, response_binom_df) +spdf <- sf::st_sf(response_df, geometry = polys) +spdf_na <- sf::st_sf(response_na_df, geometry = polys) +spdf_binom <- sf::st_sf(response_binom_df, geometry = polys) # Create raster stack -r <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r <- raster::setExtent(r, raster::extent(spdf)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r2 <- raster::setExtent(r2, raster::extent(spdf)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- raster::stack(r, r2) +r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r) <- terra::ext(spdf) +r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) +r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r2) <- terra::ext(spdf) +r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +cov_stack <- c(r, r2) test_that("Check prepare_data function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - - result <- prepare_data(polygon_shapefile = spdf, + + result <- prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack) - + expect_is(result, 'disag_data') expect_equal(length(result), 10) - expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', + expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', 'aggregation_pixels', 'coordsForFit', 'coordsForPrediction', 'startendindex', 'mesh')) - expect_is(result$polygon_shapefile, 'SpatialPolygonsDataFrame') + expect_is(result$polygon_shapefile, 'sf') expect_is(result$shapefile_names, 'list') - expect_is(result$covariate_rasters, c('RasterBrick', 'RasterStack')) + expect_is(result$covariate_rasters, 'SpatRaster') expect_is(result$polygon_data, 'data.frame') expect_is(result$covariate_data, 'data.frame') expect_is(result$aggregation_pixels, 'numeric') @@ -58,25 +59,25 @@ test_that("Check prepare_data function works as expected", { expect_equal(sum(is.na(result$polygon_data$N)), length(result$polygon_data$N)) expect_equal(nrow(result$polygon_data), nrow(result$startendindex)) expect_equal(nrow(result$covariate_data), nrow(result$coordsForFit)) - + }) test_that("Check prepare_data function with sample size works as expected", { - + skip_on_cran() - - result <- prepare_data(polygon_shapefile = spdf_binom, + + result <- prepare_data(polygon_shapefile = spdf_binom, covariate_rasters = cov_stack, sample_size_var = 'sample_size', makeMesh = FALSE) - + expect_is(result, 'disag_data') expect_equal(length(result), 10) - expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', + expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', 'aggregation_pixels', 'coordsForFit', 'coordsForPrediction', 'startendindex', 'mesh')) - expect_is(result$polygon_shapefile, 'SpatialPolygonsDataFrame') + expect_is(result$polygon_shapefile, 'sf') expect_is(result$shapefile_names, 'list') - expect_is(result$covariate_rasters, c('RasterBrick', 'RasterStack')) + expect_is(result$covariate_rasters, 'SpatRaster') expect_is(result$polygon_data, 'data.frame') expect_is(result$covariate_data, 'data.frame') expect_is(result$aggregation_pixels, 'numeric') @@ -87,36 +88,36 @@ test_that("Check prepare_data function with sample size works as expected", { expect_equal(sum(is.na(result$polygon_data$N)), 0) expect_equal(nrow(result$polygon_data), nrow(result$startendindex)) expect_equal(nrow(result$covariate_data), nrow(result$coordsForFit)) - + }) test_that("Check prepare_data function deals with NAs as expected", { - + skip_on_cran() - + cov_stack_na <- cov_stack cov_stack_na[[1]][c(1:10)] <- NA - + aggregation_raster_na <- r aggregation_raster_na[c(1:10)] <- NA - + expect_error(prepare_data(polygon_shapefile = spdf_na, covariate_rasters = cov_stack, makeMesh = FALSE)) expect_error(prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack_na, makeMesh = FALSE)) expect_error(prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack, aggregation_raster = aggregation_raster_na, makeMesh = FALSE)) - - result <- prepare_data(polygon_shapefile = spdf_na, + + result <- prepare_data(polygon_shapefile = spdf_na, covariate_rasters = cov_stack_na, aggregation_raster = aggregation_raster_na, na.action = TRUE, makeMesh = FALSE) - + expect_is(result, 'disag_data') expect_equal(length(result), 10) - expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', + expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', 'aggregation_pixels', 'coordsForFit', 'coordsForPrediction', 'startendindex', 'mesh')) - expect_is(result$polygon_shapefile, 'SpatialPolygonsDataFrame') + expect_is(result$polygon_shapefile, 'sf') expect_is(result$shapefile_names, 'list') - expect_is(result$covariate_rasters, c('RasterBrick', 'RasterStack')) + expect_is(result$covariate_rasters, 'SpatRaster') expect_is(result$polygon_data, 'data.frame') expect_is(result$covariate_data, 'data.frame') expect_is(result$aggregation_pixels, 'numeric') @@ -133,43 +134,40 @@ test_that("Check prepare_data function deals with NAs as expected", { test_that("Check as.disag_data function works as expected", { - + skip_on_cran() - + polygon_data <- getPolygonData(spdf, id_var = 'area_id', response_var = 'response') - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - cov_data <- parallelExtract(cov_stack, spdf, fun = NULL, id = 'area_id') - parallel::stopCluster(cl) - foreach::registerDoSEQ() - + + cov_data <- terra::extract(cov_stack, spdf, cells=TRUE, na.rm=TRUE, ID=TRUE) + names(cov_data)[1] <- 'area_id' + aggregation_data <- rep(1, nrow(cov_data)) - + coordsForFit <- extractCoordsForMesh(cov_stack, cov_data$cellid) - + coordsForPrediction <- extractCoordsForMesh(cov_stack) - + startendindex <- getStartendindex(cov_data, polygon_data, 'area_id') - - result <- as.disag_data(spdf, + + result <- as.disag_data(spdf, list('area_id', 'response'), cov_stack, - polygon_data, - cov_data, + polygon_data, + cov_data, aggregation_data, - coordsForFit, + coordsForFit, coordsForPrediction, startendindex, mesh = NULL) - + expect_is(result, 'disag_data') expect_equal(length(result), 10) - expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', + expect_equal(names(result), c('polygon_shapefile', 'shapefile_names', 'covariate_rasters', 'polygon_data', 'covariate_data', 'aggregation_pixels', 'coordsForFit', 'coordsForPrediction', 'startendindex', 'mesh')) - expect_is(result$polygon_shapefile, 'SpatialPolygonsDataFrame') + expect_is(result$polygon_shapefile, 'sf') expect_is(result$shapefile_names, 'list') - expect_is(result$covariate_rasters, c('RasterBrick', 'RasterStack')) + expect_is(result$covariate_rasters, 'SpatRaster') expect_is(result$polygon_data, 'data.frame') expect_is(result$covariate_data, 'data.frame') expect_is(result$aggregation_pixels, 'numeric') @@ -179,6 +177,6 @@ test_that("Check as.disag_data function works as expected", { expect_true(is.null(result$mesh)) expect_equal(nrow(result$polygon_data), nrow(result$startendindex)) expect_equal(nrow(result$covariate_data), nrow(result$coordsForFit)) - + }) From 0b83bd7c936406d3ca537a706752d23be751b88c Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Fri, 15 Sep 2023 17:00:28 +0100 Subject: [PATCH 25/55] start to migrate test-fit-model --- R/fit_model.R | 2 +- tests/testthat/test-fit-model.R | 340 +++++++++++++++-------------- tests/testthat/test-prepare-data.R | 2 - 3 files changed, 173 insertions(+), 171 deletions(-) diff --git a/R/fit_model.R b/R/fit_model.R index b1fff72..dba8bb0 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -358,7 +358,7 @@ make_model_object <- function(data, } # Construct sensible default field hyperpriors - limits <- sf::st_bbox(data$x) + limits <- sf::st_bbox(data$polygon_shapefile) hypotenuse <- sqrt((limits$xmax - limits$xmin)^2 + (limits$ymax - limits$ymin)^2) prior_rho <- hypotenuse/3 diff --git a/tests/testthat/test-fit-model.R b/tests/testthat/test-fit-model.R index 37a9b73..f1b76b4 100644 --- a/tests/testthat/test-fit-model.R +++ b/tests/testthat/test-fit-model.R @@ -1,168 +1,172 @@ - -context("Fitting model") - -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -} - -polys <- do.call(raster::spPolygons, polygons) -N <- floor(runif(n_polygons, min = 1, max = 100)) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) - -spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -spdf_binom <- sp::SpatialPolygonsDataFrame(polys, response_binom_df) - -# Create raster stack -r <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r <- raster::setExtent(r, raster::extent(spdf)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r2 <- raster::setExtent(r2, raster::extent(spdf)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- raster::stack(r, r2) - -if(identical(Sys.getenv("NOT_CRAN"), "true")) { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack) -} else { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack, - makeMesh = FALSE) -} - -test_that("disag_model produces errors when expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - expect_error(disag_model(list())) - expect_error(disag_model(test_data, iterations = 'iterations')) - expect_error(disag_model(test_data, priors = list(polygon_sd_men = 0.3, polygon_sd_sd = 0.4))) - expect_error(disag_model(test_data, priors = c(polygon_sd_mean = 1.2))) - expect_error(disag_model(test_data, family = 'banana')) - expect_error(disag_model(test_data, link = 'apple')) - -}) - -test_that("disag_model behaves as expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - result <- disag_model(test_data, iterations = 2, iid = FALSE) - - expect_is(result, 'disag_model') - expect_equal(length(result), 5) - expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 4) - expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) - - - -}) - - - - -test_that("disag_model with 1 covariate behaves as expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - test_data2 <- test_data - test_data2$covariate_rasters <- test_data2$covariate_rasters[[1]] - test_data2$covariate_data <- test_data2$covariate_data[, 1:3] - - result <- disag_model(test_data2, iterations = 2, iid = FALSE) - - expect_is(result, 'disag_model') - expect_equal(length(result), 5) - - # Should be intercept, 1 slope, tau gaussian, and 2 for space. None for iid anymore. - expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 3) - expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) - - # Confirm only two covariates were fitted. - expect_equal(sum(names(result$opt$par) == 'slope'), 1) - -}) -test_that("user defined model setup is working as expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - binom_data <- prepare_data(polygon_shapefile = spdf_binom, - covariate_rasters = cov_stack, - sample_size_var = 'sample_size') - - result2 <- disag_model(test_data, iterations = 2, field = FALSE, family = 'poisson', link = 'log') - result3 <- disag_model(binom_data, iterations = 2, iid = FALSE, family = 'binomial', link = 'logit') - result4 <- disag_model(test_data, iterations = 2, field = FALSE, iid = FALSE, link = 'identity') - - expect_error(disag_model(test_data, iterations = 2, iid = FALSE, family = 'binomial', link = 'logit')) - - expect_is(result2, 'disag_model') - expect_equal(length(result2), 5) - expect_equal(length(result2$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 2) - expect_equal(unique(names(result2$sd_out$par.random)), c("iideffect")) - expect_false(result2$model_setup$field) - expect_true(result2$model_setup$iid) - expect_equal(result2$model_setup$family, 'poisson') - expect_equal(result2$model_setup$link, 'log') - - expect_is(result3, 'disag_model') - expect_equal(length(result3), 5) - expect_equal(length(result3$sd_out$par.fixed), raster::nlayers(binom_data$covariate_rasters) + 3) - expect_equal(unique(names(result3$sd_out$par.random)), c("nodemean")) - expect_true(result3$model_setup$field) - expect_false(result3$model_setup$iid) - expect_equal(result3$model_setup$family, 'binomial') - expect_equal(result3$model_setup$link, 'logit') - - expect_is(result4, 'disag_model') - expect_equal(length(result4), 5) - expect_equal(length(result4$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 2) - expect_equal(unique(names(result4$sd_out$par.random)), NULL) - expect_false(result4$model_setup$field) - expect_false(result4$model_setup$iid) - expect_equal(result4$model_setup$family, 'gaussian') - expect_equal(result4$model_setup$link, 'identity') -}) - -test_that("make_model_object behaves as expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - result <- make_model_object(test_data) - - expect_is(result, 'list') - expect_equal(sum(sapply(c("par", "fn", "gr", "report"), function(x) !(x %in% names(result)))), 0) - -}) - -test_that("setup_hess_control behaves as expected", { - - skip_if_not_installed('INLA') - skip_on_cran() - - obj <- make_model_object(test_data) - - opt <- stats::nlminb(obj$par, obj$fn, obj$gr, control = list(iter.max = 2, trace = 0)) - - hess_control <- setup_hess_control(opt, hess_control_parscale = c(rep(c(0.9, 1.1), 3), 1), hess_control_ndeps = 1e-3) - - expect_is(hess_control, 'list') - expect_equal(length(hess_control$parscale), length(opt$par)) - expect_equal(length(hess_control$ndeps), length(opt$par)) - -}) - + +context("Fitting model") + +polygons <- list() +n_polygon_per_side <- 10 +n_polygons <- n_polygon_per_side * n_polygon_per_side +n_pixels_per_side <- n_polygon_per_side * 2 + +for(i in 1:n_polygons) { + row <- ceiling(i/n_polygon_per_side) + col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) + xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) +} + +polys <- lapply(polygons,sf::st_polygon) +N <- floor(runif(n_polygons, min = 1, max = 100)) +response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) +response_na_df <- data.frame(area_id = 1:n_polygons, response = c(runif(n_polygons - 1, min = 0, max = 1000), NA)) +response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) + +spdf <- sf::st_sf(response_df, geometry = polys) +spdf_na <- sf::st_sf(response_na_df, geometry = polys) +spdf_binom <- sf::st_sf(response_binom_df, geometry = polys) + +# Create raster stack +r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r) <- terra::ext(spdf) +r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) +r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r2) <- terra::ext(spdf) +r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +cov_stack <- c(r, r2) + + +if(identical(Sys.getenv("NOT_CRAN"), "true")) { + test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_stack) +} else { + test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_stack, + makeMesh = FALSE) +} + +test_that("disag_model produces errors when expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + expect_error(disag_model(list())) + expect_error(disag_model(test_data, iterations = 'iterations')) + expect_error(disag_model(test_data, priors = list(polygon_sd_men = 0.3, polygon_sd_sd = 0.4))) + expect_error(disag_model(test_data, priors = c(polygon_sd_mean = 1.2))) + expect_error(disag_model(test_data, family = 'banana')) + expect_error(disag_model(test_data, link = 'apple')) + +}) + +test_that("disag_model behaves as expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + result <- disag_model(test_data, iterations = 2, iid = FALSE) + + expect_is(result, 'disag_model') + expect_equal(length(result), 5) + expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 4) + expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) + + + +}) + + + + +test_that("disag_model with 1 covariate behaves as expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + test_data2 <- test_data + test_data2$covariate_rasters <- test_data2$covariate_rasters[[1]] + test_data2$covariate_data <- test_data2$covariate_data[, 1:3] + + result <- disag_model(test_data2, iterations = 2, iid = FALSE) + + expect_is(result, 'disag_model') + expect_equal(length(result), 5) + + # Should be intercept, 1 slope, tau gaussian, and 2 for space. None for iid anymore. + expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 3) + expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) + + # Confirm only two covariates were fitted. + expect_equal(sum(names(result$opt$par) == 'slope'), 1) + +}) +test_that("user defined model setup is working as expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + binom_data <- prepare_data(polygon_shapefile = spdf_binom, + covariate_rasters = cov_stack, + sample_size_var = 'sample_size') + + result2 <- disag_model(test_data, iterations = 2, field = FALSE, family = 'poisson', link = 'log') + result3 <- disag_model(binom_data, iterations = 2, iid = FALSE, family = 'binomial', link = 'logit') + result4 <- disag_model(test_data, iterations = 2, field = FALSE, iid = FALSE, link = 'identity') + + expect_error(disag_model(test_data, iterations = 2, iid = FALSE, family = 'binomial', link = 'logit')) + + expect_is(result2, 'disag_model') + expect_equal(length(result2), 5) + expect_equal(length(result2$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 2) + expect_equal(unique(names(result2$sd_out$par.random)), c("iideffect")) + expect_false(result2$model_setup$field) + expect_true(result2$model_setup$iid) + expect_equal(result2$model_setup$family, 'poisson') + expect_equal(result2$model_setup$link, 'log') + + expect_is(result3, 'disag_model') + expect_equal(length(result3), 5) + expect_equal(length(result3$sd_out$par.fixed), raster::nlayers(binom_data$covariate_rasters) + 3) + expect_equal(unique(names(result3$sd_out$par.random)), c("nodemean")) + expect_true(result3$model_setup$field) + expect_false(result3$model_setup$iid) + expect_equal(result3$model_setup$family, 'binomial') + expect_equal(result3$model_setup$link, 'logit') + + expect_is(result4, 'disag_model') + expect_equal(length(result4), 5) + expect_equal(length(result4$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 2) + expect_equal(unique(names(result4$sd_out$par.random)), NULL) + expect_false(result4$model_setup$field) + expect_false(result4$model_setup$iid) + expect_equal(result4$model_setup$family, 'gaussian') + expect_equal(result4$model_setup$link, 'identity') +}) + +test_that("make_model_object behaves as expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + result <- make_model_object(test_data) + + expect_is(result, 'list') + expect_equal(sum(sapply(c("par", "fn", "gr", "report"), function(x) !(x %in% names(result)))), 0) + +}) + +test_that("setup_hess_control behaves as expected", { + + skip_if_not_installed('INLA') + skip_on_cran() + + obj <- make_model_object(test_data) + + opt <- stats::nlminb(obj$par, obj$fn, obj$gr, control = list(iter.max = 2, trace = 0)) + + hess_control <- setup_hess_control(opt, hess_control_parscale = c(rep(c(0.9, 1.1), 3), 1), hess_control_ndeps = 1e-3) + + expect_is(hess_control, 'list') + expect_equal(length(hess_control$parscale), length(opt$par)) + expect_equal(length(hess_control$ndeps), length(opt$par)) + +}) + diff --git a/tests/testthat/test-prepare-data.R b/tests/testthat/test-prepare-data.R index 3702171..6af1b94 100644 --- a/tests/testthat/test-prepare-data.R +++ b/tests/testthat/test-prepare-data.R @@ -17,11 +17,9 @@ for(i in 1:n_polygons) { polys <- lapply(polygons,sf::st_polygon) N <- floor(runif(n_polygons, min = 1, max = 100)) response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -response_na_df <- data.frame(area_id = 1:n_polygons, response = c(runif(n_polygons - 1, min = 0, max = 1000), NA)) response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) spdf <- sf::st_sf(response_df, geometry = polys) -spdf_na <- sf::st_sf(response_na_df, geometry = polys) spdf_binom <- sf::st_sf(response_binom_df, geometry = polys) # Create raster stack From 0a095c23960cbfca927c01ded343b674a43fef75 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 10 Oct 2023 14:24:35 +0100 Subject: [PATCH 26/55] more test migration, migrate to fmesher, update docs --- DESCRIPTION | 1 + NAMESPACE | 5 -- R/build_mesh.R | 5 +- R/predict.R | 32 +++++---- man/as.disag_data.Rd | 16 ++--- man/fit_model.Rd | 42 +++++------ man/getCovariateRasters.Rd | 4 +- man/getPolygonData.Rd | 27 +++---- man/getStartendindex.Rd | 6 +- man/make_model_object.Rd | 26 +++---- man/parallelExtract.Rd | 58 --------------- man/predict.disag_model.Rd | 16 ++--- man/predict_model.Rd | 8 +-- man/predict_uncertainty.Rd | 10 +-- man/prepare_data.Rd | 80 +++++++++++---------- tests/testthat/test-fit-model.R | 22 +++--- tests/testthat/test-predict-model.R | 106 ++++++++++++++-------------- tests/testthat/test-prepare-data.R | 2 + 18 files changed, 207 insertions(+), 259 deletions(-) delete mode 100644 man/parallelExtract.Rd diff --git a/DESCRIPTION b/DESCRIPTION index 873a8bd..faddfb0 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -30,6 +30,7 @@ Imports: ggplot2, cowplot, sparseMVN, + fmesher, terra, sf, utils diff --git a/NAMESPACE b/NAMESPACE index bd927c7..47705eb 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -24,12 +24,7 @@ export(prepare_data) import(ggplot2) import(splancs) import(utils) -importFrom(doParallel,registerDoParallel) -importFrom(foreach,"%dopar%") -importFrom(parallel,makeCluster) -importFrom(parallel,stopCluster) importFrom(stats,cor) importFrom(stats,quantile) importFrom(stats,sd) useDynLib(disaggregation) - diff --git a/R/build_mesh.R b/R/build_mesh.R index 7ad5017..97a2bcc 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -63,17 +63,16 @@ build_mesh <- function(shapes, mesh.args = NULL) { pars[names(mesh.args)] <- mesh.args - #outline <- maptools::unionSpatialPolygons(shapes_old, IDs = rep(1, length(shapes_old))) outline <- sf::st_sf(sf::st_union(sf::st_convex_hull(shapes))) coords <- sf::st_coordinates(outline)[, c('X', 'Y')] - outline.hull <- INLA::inla.nonconvex.hull(coords, + outline.hull <- fmesher::fm_nonconvex_hull_inla(coords, convex = pars$convex, concave = pars$concave, resolution = pars$resolution) - mesh <- INLA::inla.mesh.2d( + mesh <- fmesher::fm_mesh_2d( boundary = outline.hull, max.edge = pars$max.edge, cut = pars$cut, diff --git a/R/predict.R b/R/predict.R index 0aeeedf..f27bf2b 100644 --- a/R/predict.R +++ b/R/predict.R @@ -236,7 +236,6 @@ getAmatrix <- function(mesh, coords) { return(Amatrix) } - # Helper to check and sort out new raster data. check_newdata <- function(newdata, model_output){ if(is.null(newdata)) return(NULL) @@ -288,10 +287,13 @@ setup_objects <- function(model_output, newdata = NULL, predict_iid = FALSE) { } if(predict_iid) { - tmp_shp <- model_output$data$x - tmp_shp <- dplyr::bind_cols(tmp_shp, - area_id = - factor(model_output$data$polygon_data$area_id)) + tmp_shp <- model_output$data$polygon_shapefile + #needed to avoid errors in testing + if (!("area_id" %in% names(model_output$data$polygon_shapefile))){ + tmp_shp <- dplyr::bind_cols(tmp_shp, + area_id = + factor(model_output$data$polygon_data$area_id)) + } shapefile_raster <- terra::rasterize(tmp_shp, model_output$data$covariate_rasters, field = 'area_id') @@ -330,13 +332,13 @@ predict_single_raster <- function(model_parameters, objects, link_function) { field <- (objects$field_objects$Amatrix %*% model_parameters$nodemean)[, 1] field_ras <- terra::rast(cbind(objects$field_objects$coords, field), type = 'xyz', - crs = crs(linear_pred)) + crs = terra::crs(linear_pred)) linear_pred <- linear_pred + field_ras } else { field_ras <- NULL } - if(!is.null(objects$iid_objects)) { + if(!is.null(objects$iid_objects)) { iid_ras <- objects$iid_objects$shapefile_raster iideffect_sd <- 1/sqrt(exp(model_parameters$iideffect_log_tau)) # todo @@ -344,22 +346,24 @@ predict_single_raster <- function(model_parameters, objects, link_function) { targetvals <- terra::values(objects$iid_objects$shapefile_raster, dataframe = FALSE, mat = FALSE) whichvals <- which(targetvals == objects$iid_objects$shapefile_ids[1, i]) - values(iid_ras)[whichvals] <- + terra::values(iid_ras)[whichvals] <- model_parameters$iideffect[i] - na_pixels <- which(is.na(values(iid_ras, dataframe = FALSE, mat = FALSE))) + na_pixels <- which(is.na(terra::values(iid_ras, dataframe = FALSE, mat = FALSE))) na_iid_values <- stats::rnorm(length(na_pixels), 0, iideffect_sd) - values(iid_ras)[na_pixels] <- na_iid_values + terra::values(iid_ras)[na_pixels] <- na_iid_values } if(terra::ext(iid_ras) != terra::ext(linear_pred)) { # Extent of prediction space is different to the original model. Keep any overlapping iid values but predict to the new extent raster_new_extent <- linear_pred - values(raster_new_extent) <- NA - # iid_ras <- terra::merge(iid_ras, raster_new_extent, ext = terra::ext(raster_new_extent)) + terra::values(raster_new_extent) <- NA + #iid_ras <- terra::merge(iid_ras, raster_new_extent, ext = terra::ext(raster_new_extent)) # NOt sure why we no longer need the ext argument + # SS - added a crop which I think does the same thing iid_ras <- terra::merge(iid_ras, raster_new_extent) - missing_pixels <- which(is.na(values(iid_ras, dataframe = FALSE, mat = FALSE))) + iid_ras <- terra::crop(iid_ras, raster_new_extent) + missing_pixels <- which(is.na(terra::values(iid_ras, dataframe = FALSE, mat = FALSE))) missing_iid_values <- stats::rnorm(length(missing_pixels), 0, iideffect_sd) - values(iid_ras)[missing_pixels] <- missing_iid_values + terra::values(iid_ras)[missing_pixels] <- missing_iid_values } linear_pred <- linear_pred + iid_ras } else { diff --git a/man/as.disag_data.Rd b/man/as.disag_data.Rd index 01903e3..c8e92f9 100644 --- a/man/as.disag_data.Rd +++ b/man/as.disag_data.Rd @@ -18,9 +18,7 @@ as.disag_data( ) } \arguments{ -\item{polygon_shapefile}{SpatialPolygonDataFrame containing the response data} - -\item{shapefile_names}{List of 2: polygon id variable name and response variable name from polygon_shapefile} +\item{shapefile_names}{List of 2: polygon id variable name and response variable name from x} \item{covariate_rasters}{RasterStack of covariates} @@ -30,20 +28,22 @@ as.disag_data( \item{aggregation_pixels}{vector with value of aggregation raster at each pixel} -\item{coordsForFit}{coordinates of the covariate data points within the polygons in polygon_shapefile} +\item{coordsForFit}{coordinates of the covariate data points within the polygons in x} \item{coordsForPrediction}{coordinates of the covariate data points in the whole raster extent} \item{startendindex}{matrix containing the start and end index for each polygon} \item{mesh}{inla.mesh object to use in the fit} + +\item{x}{SpatialPolygonDataFrame containing the response data} } \value{ -A list is returned of class \code{disag_data}. -The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. +A list is returned of class \code{disag_data}. +The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. The list of class \code{disag_data} contains: - \item{polygon_shapefile }{The SpatialPolygonDataFrame used as an input.} - \item{covariate_rasters }{The RasterStack used as an input.} + \item{x }{The SpatialPolygonDataFrame used as an input.} + \item{covariate_rasters }{The RasterStack used as an input.} \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} \item{aggregation_pixels }{An array with the value of the aggregation raster for each pixel in the same order as the rows of \emph{covariate_data}.} diff --git a/man/fit_model.Rd b/man/fit_model.Rd index c1dec35..6a6b2d3 100644 --- a/man/fit_model.Rd +++ b/man/fit_model.Rd @@ -46,28 +46,28 @@ disag_model( \item{iid}{logical. Flag the iid effect on or off} -\item{hess_control_parscale}{Argument to scale parameters during the calculation of the Hessian. +\item{hess_control_parscale}{Argument to scale parameters during the calculation of the Hessian. Must be the same length as the number of parameters. See \code{\link[stats]{optimHess}} for details.} -\item{hess_control_ndeps}{Argument to control step sizes during the calculation of the Hessian. -Either length 1 (same step size applied to all parameters) or the same length as the number of parameters. -Default is 1e-3, try setting a smaller value if you get NaNs in the standard error of the parameters. +\item{hess_control_ndeps}{Argument to control step sizes during the calculation of the Hessian. +Either length 1 (same step size applied to all parameters) or the same length as the number of parameters. +Default is 1e-3, try setting a smaller value if you get NaNs in the standard error of the parameters. See \code{\link[stats]{optimHess}} for details.} \item{silent}{logical. Suppress verbose output.} } \value{ -A list is returned of class \code{disag_model}. -The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_model}. +A list is returned of class \code{disag_model}. +The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_model}. The list of class \code{disag_model} contains: - \item{obj }{The TMB model object returned by \code{\link[TMB]{MakeADFun}}.} - \item{opt }{The optimized model object returned by \code{\link[stats]{nlminb}}.} + \item{obj }{The TMB model object returned by \code{\link[TMB]{MakeADFun}}.} + \item{opt }{The optimized model object returned by \code{\link[stats]{nlminb}}.} \item{sd_out }{The TMB object returned by \code{\link[TMB]{sdreport}}.} \item{data }{The \emph{disag_data} object used as an input to the model.} \item{model_setup }{A list of information on the model setup. Likelihood function (\emph{family}), link function(\emph{link}), logical: whether a field was used (\emph{field}) and logical: whether an iid effect was used (\emph{iid}).} } \description{ -\emph{fit_model} function takes a \emph{disag_data} object created by +\emph{fit_model} function takes a \emph{disag_data} object created by \code{\link{prepare_data}} and performs a Bayesian disaggregation fit. } \details{ @@ -82,24 +82,24 @@ And then aggregates these predictions to the polygon level using the weighted su The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): \itemize{ - \item Gaussian: - If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where + \item Gaussian: + If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. - \item Binomial: + \item Binomial: For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. - \item Poisson: + \item Poisson: \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. } -Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field -are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field -where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field +Specify priors for the regression parameters, field and iid effect as a single list. Hyperpriors for the field +are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field +where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect -The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. -The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. +The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. +The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. These are specified as strings. @@ -133,16 +133,16 @@ The \emph{silent} argument can be used to publish/suppress verbose output. Defau cl <- parallel::makeCluster(2) doParallel::registerDoParallel(cl) - test_data <- prepare_data(polygon_shapefile = spdf, + test_data <- prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_rasters) parallel::stopCluster(cl) foreach::registerDoSEQ() - + result <- fit_model(test_data, iterations = 2) } } \references{ -Nanda et al. (2023) disaggregation: An R Package for Bayesian +Nanda et al. (2023) disaggregation: An R Package for Bayesian Spatial Disaggregation Modeling. } diff --git a/man/getCovariateRasters.Rd b/man/getCovariateRasters.Rd index eeb6cc0..fcac9b0 100644 --- a/man/getCovariateRasters.Rd +++ b/man/getCovariateRasters.Rd @@ -14,10 +14,10 @@ getCovariateRasters(directory, file_pattern = ".tif$", shape) \item{shape}{An object with an extent that the rasters will be cropped to.} } \value{ -A RasterStack of the raster files in the directory +A multi-layered SpatRaster of the raster files in the directory } \description{ -Looks in a specified folder for raster files. Returns a RasterStack of the rasters cropped to the extent specified by the shape parameter. +Looks in a specified folder for raster files. Returns a multi-layered SpatRaster of the rasters cropped to the extent specified by the shape parameter. } \examples{ \dontrun{ diff --git a/man/getPolygonData.Rd b/man/getPolygonData.Rd index 3b57903..e6ec032 100644 --- a/man/getPolygonData.Rd +++ b/man/getPolygonData.Rd @@ -12,7 +12,7 @@ getPolygonData( ) } \arguments{ -\item{shape}{A SpatialPolygons object containing response data.} +\item{shape}{A sf object containing response data.} \item{id_var}{Name of column in shape object with the polygon id. Default 'area_id'.} @@ -22,27 +22,28 @@ getPolygonData( } \value{ A data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the -polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does +polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does not exist), this column will contain NAs. } \description{ Returns a data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the -polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does +polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does not exist), this column will contain NAs. } \examples{ { - polygons <- list() - for(i in 1:100) { - row <- ceiling(i/10) - col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } +polygons <- list() +for(i in 1:100) { + row <- ceiling(i/10) + col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) + xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) +} - polys <- do.call(raster::spPolygons, polygons) - response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) +polys <- lapply(polygons,sf::st_polygon) +response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) +spdf <- sf::st_sf(response_df, geometry = polys) getPolygonData(spdf, id_var = 'area_id', response_var = 'response') } diff --git a/man/getStartendindex.Rd b/man/getStartendindex.Rd index 6848ada..e30350e 100644 --- a/man/getStartendindex.Rd +++ b/man/getStartendindex.Rd @@ -19,12 +19,12 @@ covariate data that corresponds to that polygon, the second column is the index covariate data that corresponds to that polygon. } \description{ -From the covariate data and polygon data, the function matches the polygon id between the two to find +From the covariate data and polygon data, the function matches the polygon id between the two to find which pixels from the covariate data are contained in each of the polygons. } \details{ -Takes a data.frame containing the covariate data with a polygon id column and one column for each covariate, -and another data.frame containing polygon data with a polygon id, response and sample size column (as returned +Takes a data.frame containing the covariate data with a polygon id column and one column for each covariate, +and another data.frame containing polygon data with a polygon id, response and sample size column (as returned by \code{getPolygonData} function). Returns a matrix with two columns and one row for each polygon. The first column is the index of the first row in diff --git a/man/make_model_object.Rd b/man/make_model_object.Rd index b040041..d855661 100644 --- a/man/make_model_object.Rd +++ b/man/make_model_object.Rd @@ -33,7 +33,7 @@ make_model_object( The TMB model object returned by \code{\link[TMB]{MakeADFun}}. } \description{ -\emph{make_model_object} function takes a \emph{disag_data} object created by \code{\link{prepare_data}} +\emph{make_model_object} function takes a \emph{disag_data} object created by \code{\link{prepare_data}} and creates a TMB model object to be used in fitting. } \details{ @@ -48,20 +48,20 @@ And then aggregates these predictions to the polygon level using the weighted su The different likelihood correspond to slightly different models (\eqn{y_j}{yi} is the response count data): \itemize{ - \item Gaussian: - If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where + \item Gaussian: + If \eqn{\sigma} is the dispersion of the pixel data, \eqn{\sigma_j}{\sigmaj} is the dispersion of the polygon data, where \eqn{\sigma_j = \sigma \sqrt{\sum agg_i^2} / \sum agg_i }{\sigmaj = \sigma x { \sqrt \sum (aggi ^ 2) } / \sum aggi} \deqn{dnorm(y_j/\sum agg_i, rate_j, \sigma_j)}{dnorm(yj / \sum aggi, ratej, \sigmaj)} - predicts incidence rate. - \item Binomial: + \item Binomial: For a survey in polygon j, \eqn{y_j}{yj} is the number positive and \eqn{N_j}{Nj} is the number tested. \deqn{dbinom(y_j, N_j, rate_j)}{dbinom(yj, Nj, ratej)} - predicts prevalence rate. - \item Poisson: + \item Poisson: \deqn{dpois(y_j, cases_j)}{dpois(yj, casesj)} - predicts incidence count. } -Specify priors for the regression parameters, field and iid effect as a single named list. Hyperpriors for the field -are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field -where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field +Specify priors for the regression parameters, field and iid effect as a single named list. Hyperpriors for the field +are given as penalised complexity priors you specify \eqn{\rho_{min}} and \eqn{\rho_{prob}} for the range of the field +where \eqn{P(\rho < \rho_{min}) = \rho_{prob}}, and \eqn{\sigma_{min}} and \eqn{\sigma_{prob}} for the variation of the field where \eqn{P(\sigma > \sigma_{min}) = \sigma_{prob}}. Also, specify pc priors for the iid effect. The precise names and default values for these priors are: @@ -78,8 +78,8 @@ The precise names and default values for these priors are: \item prior_iideffect_sd_prob: 0.01 } -The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. -The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. +The \emph{family} and \emph{link} arguments are used to specify the likelihood and link function respectively. +The likelihood function can be one of \emph{gaussian}, \emph{poisson} or \emph{binomial}. The link function can be one of \emph{logit}, \emph{log} or \emph{identity}. These are specified as strings. @@ -113,12 +113,12 @@ The \emph{silent} argument can be used to publish/supress verbose output. Defaul cl <- parallel::makeCluster(2) doParallel::registerDoParallel(cl) - test_data <- prepare_data(polygon_shapefile = spdf, + test_data <- prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_rasters) parallel::stopCluster(cl) foreach::registerDoSEQ() - + result <- make_model_object(test_data) } - + } diff --git a/man/parallelExtract.Rd b/man/parallelExtract.Rd deleted file mode 100644 index 52be889..0000000 --- a/man/parallelExtract.Rd +++ /dev/null @@ -1,58 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/extract.R -\name{parallelExtract} -\alias{parallelExtract} -\title{Parallel extraction of raster stack by shape file.} -\usage{ -parallelExtract(raster, shape, fun = mean, id = "OBJECTID", ...) -} -\arguments{ -\item{raster}{A RasterBrick or RasterStack object.} - -\item{shape}{A SpatialPolygons object.} - -\item{fun}{The function used to aggregate the pixel data. If NULL, raw pixel data is returned.} - -\item{id}{Name of column in shape object to be used to bind an ID column to output.} - -\item{...}{Other arguments to raster::extract.} -} -\value{ -A data.frame with columns of polygon id, cell id (if fun = NULL) and a column for each raster in the stack -} -\description{ -Parallelisation is performed across rasters, not shapes. -So this function is only useful if you are extracting -data from many raster layers. -As the overhead for parallel computation in windows is high -it only makes sense to parallelise in this way. -} -\examples{ - \dontrun{ - polygons <- list() - for(i in 1:100) { - row <- ceiling(i/10) - col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } - - polys <- do.call(raster::spPolygons, polygons) - response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) - - r <- raster::raster(ncol=20, nrow=20) - r <- raster::setExtent(r, raster::extent(spdf)) - r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) - r2 <- raster::raster(ncol=20, nrow=20) - r2 <- raster::setExtent(r2, raster::extent(spdf)) - r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) - cov_rasters <- raster::stack(r, r2) - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - result <- parallelExtract(cov_rasters, spdf, fun = NULL, id = 'area_id') - parallel::stopCluster(cl) - foreach::registerDoSEQ() - } -} diff --git a/man/predict.disag_model.Rd b/man/predict.disag_model.Rd index a94b1ce..edea4ac 100644 --- a/man/predict.disag_model.Rd +++ b/man/predict.disag_model.Rd @@ -9,7 +9,7 @@ \arguments{ \item{object}{disag_model object returned by disag_model function.} -\item{newdata}{If NULL, predictions are made using the data in model_output. +\item{newdata}{If NULL, predictions are made using the data in model_output. If this is a raster stack or brick, predictions will be made over this data.} \item{predict_iid}{logical. If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE.} @@ -21,14 +21,14 @@ If this is a raster stack or brick, predictions will be made over this data.} \item{...}{Further arguments passed to or from other methods.} } \value{ -An object of class \emph{disag_prediction} which consists of a list of two objects: +An object of class \emph{disag_prediction} which consists of a list of two objects: \item{mean_prediction }{List of: \itemize{ \item \emph{prediction} Raster of mean predictions based. \item \emph{field} Raster of the field component of the linear predictor. \item \emph{iid} Raster of the iid component of the linear predictor. \item \emph{covariates} Raster of the covariate component of the linear predictor. - }} + }} \item{uncertainty_prediction: }{List of: \itemize{ \item \emph{realisations} RasterStack of realisations of predictions. Number of realisations defined by argument \emph{N}. @@ -36,17 +36,17 @@ An object of class \emph{disag_prediction} which consists of a list of two objec }} } \description{ -\emph{predict.disag_model} function takes a \emph{disag_model} object created by \emph{disaggregation::disag_model} and +\emph{predict.disag_model} function takes a \emph{disag_model} object created by \emph{disaggregation::disag_model} and predicts mean and uncertainty maps. } \details{ -To predict over a different spatial extent to that used in the model, -a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +To predict over a different spatial extent to that used in the model, +a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. If this is not given predictions are made over the data used in the fit. -The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. +The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. -For the uncertainty calculations, the number of the realisations and the size of the confidence interval to be calculated +For the uncertainty calculations, the number of the realisations and the size of the confidence interval to be calculated are given by the arguments \emph{N} and \emph{CI} respectively. } \examples{ diff --git a/man/predict_model.Rd b/man/predict_model.Rd index 93a9647..3fbbc26 100644 --- a/man/predict_model.Rd +++ b/man/predict_model.Rd @@ -9,7 +9,7 @@ predict_model(model_output, newdata = NULL, predict_iid = FALSE) \arguments{ \item{model_output}{disag_model object returned by disag_model function} -\item{newdata}{If NULL, predictions are made using the data in model_output. +\item{newdata}{If NULL, predictions are made using the data in model_output. If this is a raster stack or brick, predictions will be made over this data. Default NULL.} \item{predict_iid}{If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE.} @@ -24,15 +24,15 @@ The mean prediction, which is a list of: } } \description{ -\emph{predict_model} function takes a \emph{disag_model} object created by +\emph{predict_model} function takes a \emph{disag_model} object created by \emph{disaggregation::disag_model} and predicts mean maps. } \details{ Function returns rasters of the mean predictions as well as the covariate and field contributions to the linear predictor. -To predict over a different spatial extent to that used in the model, -a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +To predict over a different spatial extent to that used in the model, +a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. If this is not given predictions are made over the data used in the fit. The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. diff --git a/man/predict_uncertainty.Rd b/man/predict_uncertainty.Rd index b51c09e..5297230 100644 --- a/man/predict_uncertainty.Rd +++ b/man/predict_uncertainty.Rd @@ -15,7 +15,7 @@ predict_uncertainty( \arguments{ \item{model_output}{disag_model object returned by disag_model function.} -\item{newdata}{If NULL, predictions are made using the data in model_output. +\item{newdata}{If NULL, predictions are made using the data in model_output. If this is a raster stack or brick, predictions will be made over this data. Default NULL.} \item{predict_iid}{If TRUE, any polygon iid effect from the model will be used in the prediction. Default FALSE.} @@ -32,17 +32,17 @@ The uncertainty prediction, which is a list of: } } \description{ -\emph{predict_uncertainty} function takes a \emph{disag_model} object created by +\emph{predict_uncertainty} function takes a \emph{disag_model} object created by \emph{disaggregation::disag_model} and predicts upper and lower credible interval maps. } \details{ Function returns a RasterStack of the realisations as well as the upper and lower credible interval rasters. -To predict over a different spatial extent to that used in the model, -a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +To predict over a different spatial extent to that used in the model, +a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. If this is not given predictions are made over the data used in the fit. -The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. +The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. The number of the realisations and the size of the confidence interval to be calculated. are given by the arguments \emph{N} and \emph{CI} respectively. diff --git a/man/prepare_data.Rd b/man/prepare_data.Rd index e66a503..83792d2 100644 --- a/man/prepare_data.Rd +++ b/man/prepare_data.Rd @@ -18,8 +18,6 @@ prepare_data( ) } \arguments{ -\item{polygon_shapefile}{SpatialPolygonDataFrame containing at least two columns: one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}).} - \item{covariate_rasters}{RasterStack of covariate rasters to be used in the model.} \item{aggregation_raster}{Raster to aggregate pixel level predictions to polygon level e.g. population to aggregate prevalence. If this is not supplied a uniform raster will be used.} @@ -37,13 +35,15 @@ prepare_data( \item{makeMesh}{logical. If TRUE, build INLA mesh, takes some time. Default TRUE.} \item{ncores}{Number of cores used to perform covariate extraction.} + +\item{x}{sf object containing at least three columns: one with the geometried, one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}).} } \value{ -A list is returned of class \code{disag_data}. -The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. +A list is returned of class \code{disag_data}. +The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. The list of class \code{disag_data} contains: - \item{polygon_shapefile }{The SpatialPolygonDataFrame used as an input.} - \item{covariate_rasters }{The RasterStack used as an input.} + \item{x }{The sf object used as an input.} + \item{covariate_rasters }{The SpatRaster used as an input.} \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} \item{aggregation_pixels }{An array with the value of the aggregation raster for each pixel in the same order as the rows of \emph{covariate_data}.} @@ -53,57 +53,59 @@ The list of class \code{disag_data} contains: \item{mesh }{A INLA mesh to be used for the spatial field of the disaggregation model.} } \description{ -\emph{prepare_data} function is used to extract all the data required for fitting a disaggregation model. +\emph{prepare_data} function is used to extract all the data required for fitting a disaggregation model. Designed to be used in the \emph{disaggregation::fit_model} function. } \details{ -Takes a SpatialPolygonDataFrame with the response data and a RasterStack of covariates. +Takes a SpatialPolygonDataFrame with the response data and a RasterStack of covariates. -Extract the values of the covariates (as well as the aggregation raster, if given) at each pixel within the polygons -(\emph{parallelExtract} function). This is done in parallel and \emph{n.cores} argument is used to set the number of cores +Extract the values of the covariates (as well as the aggregation raster, if given) at each pixel within the polygons +(\emph{parallelExtract} function). This is done in parallel and \emph{n.cores} argument is used to set the number of cores to use for covariate extraction. This can be the number of covariates used in the model. -The aggregation raster defines how the pixels within each polygon are aggregated. +The aggregation raster defines how the pixels within each polygon are aggregated. The disaggregation model performs a weighted sum of the pixel prediction, weighted by the pixel values in the aggregation raster. -For disease incidence rate you use the population raster to aggregate pixel incidence rate by summing the number of cases -(rate weighted by population). If no aggregation raster is provided a uniform distribution is assumed, i.e. the pixel predictions +For disease incidence rate you use the population raster to aggregate pixel incidence rate by summing the number of cases +(rate weighted by population). If no aggregation raster is provided a uniform distribution is assumed, i.e. the pixel predictions are aggregated to polygon level by summing the pixel values. -Makes a matrix that contains the start and end pixel index for each polygon. Builds an INLA mesh to use for the spatial field +Makes a matrix that contains the start and end pixel index for each polygon. Builds an INLA mesh to use for the spatial field (\emph{getStartendindex} function). -The \emph{mesh.args} argument allows you to supply a list of INLA mesh parameters to control the mesh used for the spatial field +The \emph{mesh.args} argument allows you to supply a list of INLA mesh parameters to control the mesh used for the spatial field (\emph{build_mesh} function). -The \emph{na.action} flag is automatically off. If there are any NAs in the response or covariate data within the polygons the -\emph{prepare_data} method will error. Ideally the NAs in the data would be dealt with beforehand, however, setting na.action = TRUE -will automatically deal with NAs. It removes any polygons that have NAs as a response, sets any aggregation pixels with NA to zero +The \emph{na.action} flag is automatically off. If there are any NAs in the response or covariate data within the polygons the +\emph{prepare_data} method will error. Ideally the NAs in the data would be dealt with beforehand, however, setting na.action = TRUE +will automatically deal with NAs. It removes any polygons that have NAs as a response, sets any aggregation pixels with NA to zero and sets covariate NAs pixels to the median value for the that covariate. } \examples{ \donttest{ - polygons <- list() - for(i in 1:100) { +polygons <- list() +for(i in 1:100) { row <- ceiling(i/10) col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } - - polys <- do.call(raster::spPolygons, polygons) - response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) - - r <- raster::raster(ncol=20, nrow=20) - r <- raster::setExtent(r, raster::extent(spdf)) - r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) - r2 <- raster::raster(ncol=20, nrow=20) - r2 <- raster::setExtent(r2, raster::extent(spdf)) - r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) - cov_rasters <- raster::stack(r, r2) - - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_rasters) -} - + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) +} + +polys <- lapply(polygons,sf::st_polygon) +response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) +spdf <- sf::st_sf(response_df,geometry=polys) + +r <- terra::rast(nrow=20,ncol=20) +terra::ext(r) <- terra::ext(spdf) +r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) + +r2 <- terra::rast(nrow=20,ncol=20) +terra::ext(r2) <- terra::ext(spdf) +r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) +cov_rasters <- c(r, r2) + +test_data <- prepare_data(x = spdf, + covariate_rasters = cov_rasters) +} + } diff --git a/tests/testthat/test-fit-model.R b/tests/testthat/test-fit-model.R index f1b76b4..9a46eed 100644 --- a/tests/testthat/test-fit-model.R +++ b/tests/testthat/test-fit-model.R @@ -62,11 +62,11 @@ test_that("disag_model behaves as expected", { skip_if_not_installed('INLA') skip_on_cran() - result <- disag_model(test_data, iterations = 2, iid = FALSE) + result <- disag_model(test_data, iterations = 100, iid = FALSE) expect_is(result, 'disag_model') expect_equal(length(result), 5) - expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 4) + expect_equal(length(result$sd_out$par.fixed), terra::nlyr(test_data$covariate_rasters) + 4) expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) @@ -85,13 +85,13 @@ test_that("disag_model with 1 covariate behaves as expected", { test_data2$covariate_rasters <- test_data2$covariate_rasters[[1]] test_data2$covariate_data <- test_data2$covariate_data[, 1:3] - result <- disag_model(test_data2, iterations = 2, iid = FALSE) + result <- disag_model(test_data2, iterations = 100, iid = FALSE) expect_is(result, 'disag_model') expect_equal(length(result), 5) # Should be intercept, 1 slope, tau gaussian, and 2 for space. None for iid anymore. - expect_equal(length(result$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 3) + expect_equal(length(result$sd_out$par.fixed), terra::nlyr(test_data$covariate_rasters) + 3) expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) # Confirm only two covariates were fitted. @@ -107,15 +107,15 @@ test_that("user defined model setup is working as expected", { covariate_rasters = cov_stack, sample_size_var = 'sample_size') - result2 <- disag_model(test_data, iterations = 2, field = FALSE, family = 'poisson', link = 'log') - result3 <- disag_model(binom_data, iterations = 2, iid = FALSE, family = 'binomial', link = 'logit') - result4 <- disag_model(test_data, iterations = 2, field = FALSE, iid = FALSE, link = 'identity') + result2 <- disag_model(test_data, iterations = 100, field = FALSE, family = 'poisson', link = 'log') + result3 <- disag_model(binom_data, iterations = 100, iid = FALSE, family = 'binomial', link = 'logit') + result4 <- disag_model(test_data, iterations = 100, field = FALSE, iid = FALSE, link = 'identity') - expect_error(disag_model(test_data, iterations = 2, iid = FALSE, family = 'binomial', link = 'logit')) + expect_error(disag_model(test_data, iterations = 100, iid = FALSE, family = 'binomial', link = 'logit')) expect_is(result2, 'disag_model') expect_equal(length(result2), 5) - expect_equal(length(result2$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 2) + expect_equal(length(result2$sd_out$par.fixed), terra::nlyr(test_data$covariate_rasters) + 2) expect_equal(unique(names(result2$sd_out$par.random)), c("iideffect")) expect_false(result2$model_setup$field) expect_true(result2$model_setup$iid) @@ -124,7 +124,7 @@ test_that("user defined model setup is working as expected", { expect_is(result3, 'disag_model') expect_equal(length(result3), 5) - expect_equal(length(result3$sd_out$par.fixed), raster::nlayers(binom_data$covariate_rasters) + 3) + expect_equal(length(result3$sd_out$par.fixed), terra::nlyr(binom_data$covariate_rasters) + 3) expect_equal(unique(names(result3$sd_out$par.random)), c("nodemean")) expect_true(result3$model_setup$field) expect_false(result3$model_setup$iid) @@ -133,7 +133,7 @@ test_that("user defined model setup is working as expected", { expect_is(result4, 'disag_model') expect_equal(length(result4), 5) - expect_equal(length(result4$sd_out$par.fixed), raster::nlayers(test_data$covariate_rasters) + 2) + expect_equal(length(result4$sd_out$par.fixed), terra::nlyr(test_data$covariate_rasters) + 2) expect_equal(unique(names(result4$sd_out$par.random)), NULL) expect_false(result4$model_setup$field) expect_false(result4$model_setup$iid) diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index fab8f87..3bd32f5 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -9,21 +9,23 @@ for(i in 1:n_polygons) { row <- ceiling(i/n_polygon_per_side) col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) } -polys <- do.call(raster::spPolygons, polygons) +polys <- lapply(polygons,sf::st_polygon) +N <- floor(runif(n_polygons, min = 1, max = 100)) response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) +spdf <- sf::st_sf(response_df, geometry = polys) # Create raster stack -r <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r <- raster::setExtent(r, raster::extent(spdf)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r2 <- raster::setExtent(r2, raster::extent(spdf)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- raster::stack(r, r2) +r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r) <- terra::ext(spdf) +r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) +r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r2) <- terra::ext(spdf) +r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +cov_stack <- c(r, r2) if(identical(Sys.getenv("NOT_CRAN"), "true")) { test_data <- prepare_data(polygon_shapefile = spdf, @@ -63,18 +65,18 @@ test_that("Check predict.disag_model function works as expected", { expect_is(pred2$mean_prediction, 'list') expect_equal(length(pred2$mean_prediction), 4) - expect_is(pred2$mean_prediction$prediction, 'Raster') - expect_is(pred2$mean_prediction$field, 'Raster') + expect_is(pred2$mean_prediction$prediction, 'SpatRaster') + expect_is(pred2$mean_prediction$field, 'SpatRaster') expect_true(is.null(pred2$mean_prediction$iid)) - expect_is(pred2$mean_prediction$covariates, 'Raster') + expect_is(pred2$mean_prediction$covariates, 'SpatRaster') expect_is(pred2$uncertainty_prediction, 'list') expect_equal(length(pred2$uncertainty_prediction), 2) expect_equal(names(pred2$uncertainty_prediction), c('realisations', 'predictions_ci')) - expect_is(pred2$uncertainty_prediction$realisations, 'RasterStack') - expect_is(pred2$uncertainty_prediction$predictions_ci, 'RasterBrick') - expect_equal(raster::nlayers(pred2$uncertainty_prediction$realisations), 100) - expect_equal(raster::nlayers(pred2$uncertainty_prediction$predictions_ci), 2) + expect_is(pred2$uncertainty_prediction$realisations, 'SpatRaster') + expect_is(pred2$uncertainty_prediction$predictions_ci, 'SpatRaster') + expect_equal(terra::nlyr(pred2$uncertainty_prediction$realisations), 100) + expect_equal(terra::nlyr(pred2$uncertainty_prediction$predictions_ci), 2) pred2 <- predict(result, predict_iid = TRUE, N = 10) @@ -85,18 +87,18 @@ test_that("Check predict.disag_model function works as expected", { expect_is(pred2$mean_prediction, 'list') expect_equal(length(pred2$mean_prediction), 4) expect_equal(names(pred2$mean_prediction), c('prediction', 'field', 'iid', 'covariates')) - expect_is(pred2$mean_prediction$prediction, 'Raster') - expect_is(pred2$mean_prediction$field, 'Raster') - expect_is(pred2$mean_prediction$iid, 'Raster') - expect_is(pred2$mean_prediction$covariates, 'Raster') + expect_is(pred2$mean_prediction$prediction, 'SpatRaster') + expect_is(pred2$mean_prediction$field, 'SpatRaster') + expect_is(pred2$mean_prediction$iid, 'SpatRaster') + expect_is(pred2$mean_prediction$covariates, 'SpatRaster') expect_is(pred2$uncertainty_prediction, 'list') expect_equal(length(pred2$uncertainty_prediction), 2) expect_equal(names(pred2$uncertainty_prediction), c('realisations', 'predictions_ci')) - expect_is(pred2$uncertainty_prediction$realisations, 'RasterStack') - expect_is(pred2$uncertainty_prediction$predictions_ci, 'RasterBrick') - expect_equal(raster::nlayers(pred2$uncertainty_prediction$realisations), 10) - expect_equal(raster::nlayers(pred2$uncertainty_prediction$predictions_ci), 2) + expect_is(pred2$uncertainty_prediction$realisations, 'SpatRaster') + expect_is(pred2$uncertainty_prediction$predictions_ci, 'SpatRaster') + expect_equal(terra::nlyr(pred2$uncertainty_prediction$realisations), 10) + expect_equal(terra::nlyr(pred2$uncertainty_prediction$predictions_ci), 2) # For a model with no field or iid @@ -111,18 +113,18 @@ test_that("Check predict.disag_model function works as expected", { expect_is(pred2$mean_prediction, 'list') expect_equal(length(pred2$mean_prediction), 4) - expect_is(pred2$mean_prediction$prediction, 'Raster') + expect_is(pred2$mean_prediction$prediction, 'SpatRaster') expect_true(is.null(pred2$mean_prediction$field)) expect_true(is.null(pred2$mean_prediction$iid)) - expect_is(pred2$mean_prediction$covariates, 'Raster') + expect_is(pred2$mean_prediction$covariates, 'SpatRaster') expect_is(pred2$uncertainty_prediction, 'list') expect_equal(length(pred2$uncertainty_prediction), 2) expect_equal(names(pred2$uncertainty_prediction), c('realisations', 'predictions_ci')) - expect_is(pred2$uncertainty_prediction$realisations, 'RasterStack') - expect_is(pred2$uncertainty_prediction$predictions_ci, 'RasterBrick') - expect_equal(raster::nlayers(pred2$uncertainty_prediction$realisations), 100) - expect_equal(raster::nlayers(pred2$uncertainty_prediction$predictions_ci), 2) + expect_is(pred2$uncertainty_prediction$realisations, 'SpatRaster') + expect_is(pred2$uncertainty_prediction$predictions_ci, 'SpatRaster') + expect_equal(terra::nlyr(pred2$uncertainty_prediction$realisations), 100) + expect_equal(terra::nlyr(pred2$uncertainty_prediction$predictions_ci), 2) }) @@ -145,7 +147,7 @@ test_that("Check predict.disag_model function works with newdata", { prior_iideffect_sd_max = 0.0001, prior_iideffect_sd_prob = 0.01)) - newdata <- raster::crop(raster::stack(r, r2), c(0, 10, 0, 10)) + newdata <- terra::crop(c(r, r2), c(0, 10, 0, 10)) pred1 <- predict(result) pred2 <- predict(result, newdata, predict_iid = TRUE, N = 5) @@ -156,21 +158,21 @@ test_that("Check predict.disag_model function works with newdata", { expect_is(pred2$mean_prediction, 'list') expect_equal(length(pred2$mean_prediction), 4) expect_equal(names(pred2$mean_prediction), c('prediction', 'field', 'iid', 'covariates')) - expect_is(pred2$mean_prediction$prediction, 'Raster') + expect_is(pred2$mean_prediction$prediction, 'SpatRaster') expect_true(is.null(pred2$mean_prediction$field)) - expect_is(pred2$mean_prediction$iid, 'Raster') - expect_is(pred2$mean_prediction$covariates, 'Raster') + expect_is(pred2$mean_prediction$iid, 'SpatRaster') + expect_is(pred2$mean_prediction$covariates, 'SpatRaster') expect_is(pred2$uncertainty_prediction, 'list') expect_equal(length(pred2$uncertainty_prediction), 2) expect_equal(names(pred2$uncertainty_prediction), c('realisations', 'predictions_ci')) - expect_is(pred2$uncertainty_prediction$realisations, 'RasterStack') - expect_is(pred2$uncertainty_prediction$predictions_ci, 'RasterBrick') - expect_equal(raster::nlayers(pred2$uncertainty_prediction$realisations), 5) - expect_equal(raster::nlayers(pred2$uncertainty_prediction$predictions_ci), 2) + expect_is(pred2$uncertainty_prediction$realisations, 'SpatRaster') + expect_is(pred2$uncertainty_prediction$predictions_ci, 'SpatRaster') + expect_equal(terra::nlyr(pred2$uncertainty_prediction$realisations), 5) + expect_equal(terra::nlyr(pred2$uncertainty_prediction$predictions_ci), 2) - expect_false(identical(raster::extent(pred1$mean_prediction$prediction), raster::extent(pred2$mean_prediction$prediction))) - expect_false(identical(raster::extent(pred1$uncertainty_prediction$realisations), raster::extent(pred2$uncertainty_prediction$realisations))) + expect_false(identical(terra::ext(pred1$mean_prediction$prediction), terra::ext(pred2$mean_prediction$prediction))) + expect_false(identical(terra::ext(pred1$uncertainty_prediction$realisations), terra::ext(pred2$uncertainty_prediction$realisations))) }) @@ -181,13 +183,13 @@ test_that('Check that check_newdata works', { result <- disag_model(test_data, field = FALSE, iterations = 100) - newdata <- raster::crop(raster::stack(r, r2), c(0, 10, 0, 10)) + newdata <- terra::crop(c(r, r2), c(0, 10, 0, 10)) nd1 <- check_newdata(newdata, result) - expect_is(nd1, 'RasterBrick') + expect_is(nd1, 'SpatRaster') nn <- newdata[[1]] names(nn) <- 'extra_uneeded' - newdata2 <- raster::stack(newdata, nn) + newdata2 <- c(newdata, nn) expect_error(check_newdata(newdata2, result), NA) newdata3 <- newdata[[1]] @@ -226,7 +228,7 @@ test_that('Check that setup_objects works', { expect_is(objects$field_objects, 'list') expect_true(is.null(objects$iid_objects)) - newdata <- raster::crop(raster::stack(r, r2), c(0, 180, -90, 90)) + newdata <- terra::crop(c(r, r2), c(0, 180, -90, 90)) objects2 <- setup_objects(result, newdata) expect_is(objects2, 'list') @@ -276,10 +278,10 @@ test_that('Check that predict_single_raster works', { expect_is(pred2, 'list') expect_equal(length(pred2), 4) expect_equal(names(pred2), c('prediction', 'field', 'iid', 'covariates')) - expect_is(pred2$prediction, 'Raster') - expect_is(pred2$field, 'Raster') + expect_is(pred2$prediction, 'SpatRaster') + expect_is(pred2$field, 'SpatRaster') expect_true(is.null(pred2$iid)) - expect_is(pred2$covariates, 'Raster') + expect_is(pred2$covariates, 'SpatRaster') objects2 <- setup_objects(result, predict_iid = TRUE) @@ -290,10 +292,10 @@ test_that('Check that predict_single_raster works', { expect_is(pred2, 'list') expect_equal(length(pred2), 4) expect_equal(names(pred2), c('prediction', 'field', 'iid', 'covariates')) - expect_is(pred2$prediction, 'Raster') - expect_is(pred2$field, 'Raster') - expect_is(pred2$iid, 'Raster') - expect_is(pred2$covariates, 'Raster') + expect_is(pred2$prediction, 'SpatRaster') + expect_is(pred2$field, 'SpatRaster') + expect_is(pred2$iid, 'SpatRaster') + expect_is(pred2$covariates, 'SpatRaster') }) diff --git a/tests/testthat/test-prepare-data.R b/tests/testthat/test-prepare-data.R index 6af1b94..3702171 100644 --- a/tests/testthat/test-prepare-data.R +++ b/tests/testthat/test-prepare-data.R @@ -17,9 +17,11 @@ for(i in 1:n_polygons) { polys <- lapply(polygons,sf::st_polygon) N <- floor(runif(n_polygons, min = 1, max = 100)) response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) +response_na_df <- data.frame(area_id = 1:n_polygons, response = c(runif(n_polygons - 1, min = 0, max = 1000), NA)) response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) spdf <- sf::st_sf(response_df, geometry = polys) +spdf_na <- sf::st_sf(response_na_df, geometry = polys) spdf_binom <- sf::st_sf(response_binom_df, geometry = polys) # Create raster stack From 4130016a543a06f811ab583bf25d8e6486c3da7d Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 10 Oct 2023 15:18:50 +0100 Subject: [PATCH 27/55] fix predict tests and remove some dependencies --- DESCRIPTION | 3 --- tests/testthat/test-predict-model.R | 13 +++++++------ 2 files changed, 7 insertions(+), 9 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index faddfb0..5f757ad 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -20,8 +20,6 @@ RoxygenNote: 7.2.3 Imports: foreach, sp, - parallel, - doParallel, splancs, Matrix, stats, @@ -37,7 +35,6 @@ Imports: Additional_repositories: https://inla.r-inla-download.org/R/stable Suggests: testthat, - INLA, knitr, rmarkdown, SpatialEpi diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index 3bd32f5..79b2960 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -27,6 +27,8 @@ terra::ext(r2) <- terra::ext(spdf) r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) cov_stack <- c(r, r2) +names(cov_stack) <- c('layer1', 'layer2') + if(identical(Sys.getenv("NOT_CRAN"), "true")) { test_data <- prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_stack) @@ -38,7 +40,6 @@ if(identical(Sys.getenv("NOT_CRAN"), "true")) { test_that("Check predict.disag_model function works as expected", { - skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, iterations = 1000, @@ -132,7 +133,6 @@ test_that("Check predict.disag_model function works as expected", { test_that("Check predict.disag_model function works with newdata", { - skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, field = FALSE, iid = TRUE, iterations = 100, @@ -148,6 +148,7 @@ test_that("Check predict.disag_model function works with newdata", { prior_iideffect_sd_prob = 0.01)) newdata <- terra::crop(c(r, r2), c(0, 10, 0, 10)) + names(newdata) <- c('layer1', 'layer2') pred1 <- predict(result) pred2 <- predict(result, newdata, predict_iid = TRUE, N = 5) @@ -178,17 +179,18 @@ test_that("Check predict.disag_model function works with newdata", { test_that('Check that check_newdata works', { - skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, field = FALSE, iterations = 100) newdata <- terra::crop(c(r, r2), c(0, 10, 0, 10)) + names(newdata) <- c('layer1', 'layer2') + nd1 <- check_newdata(newdata, result) expect_is(nd1, 'SpatRaster') nn <- newdata[[1]] - names(nn) <- 'extra_uneeded' + names(nn) <- 'extra_unneeded' newdata2 <- c(newdata, nn) expect_error(check_newdata(newdata2, result), NA) @@ -203,7 +205,6 @@ test_that('Check that check_newdata works', { test_that('Check that setup_objects works', { - skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, iterations = 100, @@ -229,6 +230,7 @@ test_that('Check that setup_objects works', { expect_true(is.null(objects$iid_objects)) newdata <- terra::crop(c(r, r2), c(0, 180, -90, 90)) + names(newdata) <- c('layer1', 'layer2') objects2 <- setup_objects(result, newdata) expect_is(objects2, 'list') @@ -249,7 +251,6 @@ test_that('Check that setup_objects works', { test_that('Check that predict_single_raster works', { - skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, iterations = 100, From aca877b332cab4b048c481e18a2a70f48516fee9 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 10 Oct 2023 15:37:56 +0100 Subject: [PATCH 28/55] move test data to helper_data --- tests/testthat/helper_data.R | 35 +++++++++++ tests/testthat/test-build-mesh.R | 24 +------- tests/testthat/test-extract.R | 41 ------------- tests/testthat/test-fit-model.R | 45 -------------- tests/testthat/test-plotting.R | 93 +++++++++------------------- tests/testthat/test-predict-model.R | 38 ------------ tests/testthat/test-prepare-data.R | 34 ----------- tests/testthat/test-summary.R | 94 +++++++++-------------------- 8 files changed, 96 insertions(+), 308 deletions(-) create mode 100644 tests/testthat/helper_data.R diff --git a/tests/testthat/helper_data.R b/tests/testthat/helper_data.R new file mode 100644 index 0000000..01f1f0c --- /dev/null +++ b/tests/testthat/helper_data.R @@ -0,0 +1,35 @@ +polygons <- list() +n_polygon_per_side <- 10 +n_polygons <- n_polygon_per_side * n_polygon_per_side +n_pixels_per_side <- n_polygon_per_side * 2 + +for(i in 1:n_polygons) { + row <- ceiling(i/n_polygon_per_side) + col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) + xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) +} + +polys <- lapply(polygons,sf::st_polygon) +N <- floor(runif(n_polygons, min = 1, max = 100)) +response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) +response_na_df <- data.frame(area_id = 1:n_polygons, response = c(runif(n_polygons - 1, min = 0, max = 1000), NA)) +response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) + +spdf <- sf::st_sf(response_df, geometry = polys) +spdf_na <- sf::st_sf(response_na_df, geometry = polys) +spdf_binom <- sf::st_sf(response_binom_df, geometry = polys) + +# Create raster stack +r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r) <- terra::ext(spdf) +r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) +r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r2) <- terra::ext(spdf) +r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +cov_stack <- c(r, r2) +names(cov_stack) <- c('layer1', 'layer2') + +test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_stack) diff --git a/tests/testthat/test-build-mesh.R b/tests/testthat/test-build-mesh.R index 97e1d98..8bd19a2 100644 --- a/tests/testthat/test-build-mesh.R +++ b/tests/testthat/test-build-mesh.R @@ -2,26 +2,8 @@ context("Build mesh") test_that("build_mesh behaves as expected", { - - skip_if_not_installed('INLA') + skip_on_cran() - - polygons <- list() - n_polygon_per_side <- 10 - n_polygons <- n_polygon_per_side * n_polygon_per_side - n_pixels_per_side <- n_polygon_per_side * 2 - - for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } - - polys <- do.call(raster::spPolygons, polygons) - - response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) my_mesh <- build_mesh(spdf) @@ -29,5 +11,5 @@ test_that("build_mesh behaves as expected", { expect_error(build_mesh(spdf, mesh.args = c(4, 8))) expect_is(my_mesh, 'inla.mesh') expect_is(build_mesh(spdf, mesh.args = list(max.edge = c(50, 100))), 'inla.mesh') - -}) \ No newline at end of file + +}) diff --git a/tests/testthat/test-extract.R b/tests/testthat/test-extract.R index 8a0e253..74e16f5 100644 --- a/tests/testthat/test-extract.R +++ b/tests/testthat/test-extract.R @@ -1,36 +1,6 @@ context("Extract covariates and polygon data") -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), - c(ymax, ymax, ymin, ymin, ymax))) -} - -polys <- lapply(polygons,sf::st_polygon) -N <- floor(runif(n_polygons, min = 1, max = 100)) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) - -spdf <- sf::st_sf(response_df, geometry = polys) -spdf_binom <- sf::st_sf(response_binom_df, geometry = polys) - -# Create raster stack -r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -terra::ext(r) <- terra::ext(spdf) -r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -terra::ext(r2) <- terra::ext(spdf) -r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- c(r, r2) - test_that("getPolygonData function", { skip_on_cran() @@ -64,11 +34,6 @@ test_that("getCovariateData function gives errors when it should", { expect_error(getCovariateRasters('/home/rasters', '.tif$', spdf)) # Save .tif files in tempdir() - r <- terra::rast(ncol=20, nrow=20) - r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) - r2 <- terra::rast(ncol=20, nrow=20) - r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) - cov_stack <- c(r, r2) terra::writeRaster(r, paste0(tempdir(), '/cov1.tif'), overwrite = TRUE) terra::writeRaster(r2, paste0(tempdir(), '/cov2.tif'), overwrite = TRUE) @@ -80,12 +45,6 @@ test_that("extractCoordsForMesh function behaves as it should", { skip_on_cran() - # cl <- parallel::makeCluster(2) - # doParallel::registerDoParallel(cl) - # cov_data <- parallelExtract(cov_stack, spdf, fun = NULL, id = ) - # parallel::stopCluster(cl) - # foreach::registerDoSEQ() - cov_data <- terra::extract(cov_stack, spdf, cells=TRUE, na.rm=TRUE, ID=TRUE) names(cov_data)[1] <- 'area_id' diff --git a/tests/testthat/test-fit-model.R b/tests/testthat/test-fit-model.R index 9a46eed..4c7a384 100644 --- a/tests/testthat/test-fit-model.R +++ b/tests/testthat/test-fit-model.R @@ -1,51 +1,8 @@ context("Fitting model") -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), - c(ymax, ymax, ymin, ymin, ymax))) -} - -polys <- lapply(polygons,sf::st_polygon) -N <- floor(runif(n_polygons, min = 1, max = 100)) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -response_na_df <- data.frame(area_id = 1:n_polygons, response = c(runif(n_polygons - 1, min = 0, max = 1000), NA)) -response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) - -spdf <- sf::st_sf(response_df, geometry = polys) -spdf_na <- sf::st_sf(response_na_df, geometry = polys) -spdf_binom <- sf::st_sf(response_binom_df, geometry = polys) - -# Create raster stack -r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -terra::ext(r) <- terra::ext(spdf) -r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -terra::ext(r2) <- terra::ext(spdf) -r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- c(r, r2) - - -if(identical(Sys.getenv("NOT_CRAN"), "true")) { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack) -} else { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack, - makeMesh = FALSE) -} - test_that("disag_model produces errors when expected", { - skip_if_not_installed('INLA') skip_on_cran() expect_error(disag_model(list())) @@ -59,7 +16,6 @@ test_that("disag_model produces errors when expected", { test_that("disag_model behaves as expected", { - skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, iterations = 100, iid = FALSE) @@ -70,7 +26,6 @@ test_that("disag_model behaves as expected", { expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) - }) diff --git a/tests/testthat/test-plotting.R b/tests/testthat/test-plotting.R index 7e4e567..e4f32f6 100644 --- a/tests/testthat/test-plotting.R +++ b/tests/testthat/test-plotting.R @@ -1,111 +1,74 @@ context("Plotting data") -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -} - -polys <- do.call(raster::spPolygons, polygons) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -response_df2 <- data.frame(area_id = 1:n_polygons, n_positive = runif(n_polygons, min = 0, max = 1), sample_size = floor(runif(n_polygons, min = 1, max = 100))) -spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -spdf2 <- sp::SpatialPolygonsDataFrame(polys, response_df2) - -# Create raster stack -r <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r <- raster::setExtent(r, raster::extent(spdf)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r2 <- raster::setExtent(r2, raster::extent(spdf)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- raster::stack(r, r2) - -if(identical(Sys.getenv("NOT_CRAN"), "true")) { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack) -} else { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack, - makeMesh = FALSE) -} - test_that("Check plot_polygon_data function works as expected", { - + skip_on_cran() - + p <- plot_polygon_data(spdf, list(id_var = 'area_id', response_var = 'response')) expect_error(plot_polygon_data(polys, list(id_var = 'area_id', response_var = 'response'))) expect_is(p, 'ggplot') - + p2 <- plot_polygon_data(spdf2, list(id_var = 'area_id', response_var = 'n_positive')) expect_is(p2, 'ggplot') - + }) test_that("Check plot.disag.data function works as expected", { - - skip_if_not_installed('INLA') + skip_on_cran() - - test_data2 <- prepare_data(polygon_shapefile = spdf2, + + test_data2 <- prepare_data(polygon_shapefile = spdf2, covariate_rasters = cov_stack, response_var = 'n_positive') - + p <- plot(test_data) - + expect_is(p, 'list') expect_equal(length(p), 3) expect_equal(names(p), c('polygon', 'covariates', 'mesh')) - + p2 <- plot(test_data2) - + expect_is(p2, 'list') expect_equal(length(p2), 3) expect_equal(names(p2), c('polygon', 'covariates', 'mesh')) - + p3 <- plot(test_data, which = c(1,3)) - + expect_is(p3, 'list') expect_equal(length(p3), 2) expect_equal(names(p3), c('polygon', 'mesh')) - + }) test_that("Check plot.disag_model function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - + fit_result <- disag_model(test_data, iterations = 10) - + fit_result_nofield <- disag_model(test_data, iterations = 10, field = FALSE) - + p1 <- plot(fit_result) - + p2 <- plot(fit_result_nofield) - + expect_is(p1, 'list') expect_equal(length(p1), 2) - + expect_is(p2, 'list') expect_equal(length(p2), 2) - - + + }) test_that("Check plot.disag_prediction function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - - + + fit_result <- disag_model(test_data, iterations = 1000, iid = TRUE, field = TRUE, @@ -123,8 +86,8 @@ test_that("Check plot.disag_prediction function works as expected", { prior_iideffect_sd_prob = 0.01)) pred <- predict(fit_result) p <- plot(pred) - + expect_is(p, 'trellis') - + }) diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index 79b2960..ae95e13 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -1,43 +1,5 @@ context("Predict model") -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), - c(ymax, ymax, ymin, ymin, ymax))) -} - -polys <- lapply(polygons,sf::st_polygon) -N <- floor(runif(n_polygons, min = 1, max = 100)) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -spdf <- sf::st_sf(response_df, geometry = polys) - -# Create raster stack -r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -terra::ext(r) <- terra::ext(spdf) -r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -terra::ext(r2) <- terra::ext(spdf) -r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- c(r, r2) - -names(cov_stack) <- c('layer1', 'layer2') - -if(identical(Sys.getenv("NOT_CRAN"), "true")) { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack) -} else { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack, - makeMesh = FALSE) -} - test_that("Check predict.disag_model function works as expected", { skip_on_cran() diff --git a/tests/testthat/test-prepare-data.R b/tests/testthat/test-prepare-data.R index 3702171..bef721e 100644 --- a/tests/testthat/test-prepare-data.R +++ b/tests/testthat/test-prepare-data.R @@ -1,39 +1,5 @@ - context("Preparing data") -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), - c(ymax, ymax, ymin, ymin, ymax))) -} - -polys <- lapply(polygons,sf::st_polygon) -N <- floor(runif(n_polygons, min = 1, max = 100)) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -response_na_df <- data.frame(area_id = 1:n_polygons, response = c(runif(n_polygons - 1, min = 0, max = 1000), NA)) -response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) - -spdf <- sf::st_sf(response_df, geometry = polys) -spdf_na <- sf::st_sf(response_na_df, geometry = polys) -spdf_binom <- sf::st_sf(response_binom_df, geometry = polys) - -# Create raster stack -r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -terra::ext(r) <- terra::ext(spdf) -r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -terra::ext(r2) <- terra::ext(spdf) -r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- c(r, r2) - - test_that("Check prepare_data function works as expected", { skip_if_not_installed('INLA') diff --git a/tests/testthat/test-summary.R b/tests/testthat/test-summary.R index c913ecf..56961be 100644 --- a/tests/testthat/test-summary.R +++ b/tests/testthat/test-summary.R @@ -1,45 +1,11 @@ context("Summary functions") -polygons <- list() -n_polygon_per_side <- 10 -n_polygons <- n_polygon_per_side * n_polygon_per_side -n_pixels_per_side <- n_polygon_per_side * 2 - -for(i in 1:n_polygons) { - row <- ceiling(i/n_polygon_per_side) - col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -} - -polys <- do.call(raster::spPolygons, polygons) -response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) -spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) - -# Create raster stack -r <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r <- raster::setExtent(r, raster::extent(spdf)) -r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) -r2 <- raster::raster(ncol=n_pixels_per_side, nrow=n_pixels_per_side) -r2 <- raster::setExtent(r2, raster::extent(spdf)) -r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) -cov_stack <- raster::stack(r, r2) - -if(identical(Sys.getenv("NOT_CRAN"), "true")) { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack) -} else { - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_stack, - makeMesh = FALSE) -} - test_that("Check summary.disag_data function works as expected", { - + skip_on_cran() - + data_summary <- summary(test_data) - + expect_is(data_summary, 'list') expect_equal(length(data_summary), 3) expect_equal(names(data_summary), c('number_polygons', 'number_covariates', 'covariate_summary')) @@ -47,29 +13,29 @@ test_that("Check summary.disag_data function works as expected", { expect_is(data_summary$number_covariates, 'integer') expect_is(data_summary$covariate_summary, 'table') expect_equal(ncol(data_summary$covariate_summary), data_summary$number_covariates) - + }) test_that("Check print.disag_data function works as expected", { - + skip_on_cran() - + print_output <- print(test_data) - + expect_is(print_output, 'disag_data') expect_equal(print_output, test_data) - + }) test_that("Check summary.disag_model function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, field = FALSE, iterations = 2) - + model_summary <- summary(result) - + expect_is(model_summary, 'list') expect_equal(length(model_summary), 3) expect_equal(names(model_summary), c('model_params', 'nll', 'metrics')) @@ -77,28 +43,28 @@ test_that("Check summary.disag_model function works as expected", { expect_is(model_summary$nll, 'numeric') expect_is(model_summary$metrics, 'data.frame') expect_equal(dim(model_summary$metrics), c(1, 5)) - + }) test_that("Check print.disag_model function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - + result <- disag_model(test_data, field = FALSE, iterations = 2) - + print_output <- print(result) - + expect_is(print_output, 'disag_model') expect_equal(print_output, result) - + }) test_that("Check summary.disag_predictions function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - + result <- disag_model(test_data, iid = FALSE, iterations = 100, list(priormean_intercept = 0, priorsd_intercept = 0.1, @@ -110,25 +76,25 @@ test_that("Check summary.disag_predictions function works as expected", { prior_sigma_prob = 0.01, prior_iideffect_sd_max = 0.0001, prior_iideffect_sd_prob = 0.01)) - + pred <- predict(result) - + model_summary <- summary(pred) - + expect_is(model_summary, 'list') expect_equal(length(model_summary), 3) expect_equal(names(model_summary), c('number_realisations', 'range_mean_values', 'range_iqr_values')) expect_is(model_summary$number_realisations, 'integer') expect_is(model_summary$range_mean_values, 'numeric') expect_is(model_summary$range_iqr_values, 'numeric') - + }) test_that("Check print.disag_predictions function works as expected", { - + skip_if_not_installed('INLA') skip_on_cran() - + result <- disag_model(test_data, iid = FALSE, iterations = 100, list(priormean_intercept = 0, priorsd_intercept = 0.1, @@ -140,12 +106,12 @@ test_that("Check print.disag_predictions function works as expected", { prior_sigma_prob = 0.01, prior_iideffect_sd_max = 0.0001, prior_iideffect_sd_prob = 0.01)) - + pred <- predict(result) - + print_output <- print(pred) - + expect_is(print_output, 'disag_prediction') expect_equal(print_output, pred) - -}) \ No newline at end of file + +}) From b41253766c49ac68f7ff82004b16b24ddd854dbf Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 10 Oct 2023 15:57:19 +0100 Subject: [PATCH 29/55] update examples and docs. remove sp from imports --- DESCRIPTION | 1 - R/build_mesh.R | 6 +- R/fit_model.R | 106 +++++++++++++++++++---------------- man/build_mesh.Rd | 28 ++++----- man/fit_model.Rd | 53 ++++++++++-------- man/make_model_object.Rd | 53 ++++++++++-------- tests/testthat/helper_data.R | 2 +- 7 files changed, 134 insertions(+), 115 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 5f757ad..21731a5 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,7 +19,6 @@ LazyData: true RoxygenNote: 7.2.3 Imports: foreach, - sp, splancs, Matrix, stats, diff --git a/R/build_mesh.R b/R/build_mesh.R index 97a2bcc..1a14713 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -29,11 +29,11 @@ #' row <- ceiling(i/10) #' col <- ifelse(i %% 10 != 0, i %% 10, 10) #' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), -#' c(xmin,ymin), c(xmin, ymax)) +#' polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), +#' c(ymax, ymax, ymin, ymin, ymax))) #' } #' -#' polys <- sf::st_sfc(sf::st_polygon(polygons)) +#' polys <- lapply(polygons, sf::st_polygon) #' response_df <- data.frame(area_id = 1:100, #' response = runif(100, min = 0, max = 10)) #' spdf <- sf::st_sf(polys, response_df) diff --git a/R/fit_model.R b/R/fit_model.R index dba8bb0..5fc7966 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -72,32 +72,37 @@ #' #' @examples #' \dontrun{ -#' polygons <- list() -#' for(i in 1:100) { -#' row <- ceiling(i/10) -#' col <- ifelse(i %% 10 != 0, i %% 10, 10) +#' polygons <- list() +#' n_polygon_per_side <- 10 +#' n_polygons <- n_polygon_per_side * n_polygon_per_side +#' n_pixels_per_side <- n_polygon_per_side * 2 +#' +#' for(i in 1:n_polygons) { +#' row <- ceiling(i/n_polygon_per_side) +#' col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) #' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } +#' polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), +#' c(ymax, ymax, ymin, ymin, ymax))) +#' } +#' +#' polys <- lapply(polygons,sf::st_polygon) +#' N <- floor(runif(n_polygons, min = 1, max = 100)) +#' response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) +#' +#' spdf <- sf::st_sf(response_df, geometry = polys) +#' +#' # Create raster stack +#' r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +#' terra::ext(r) <- terra::ext(spdf) +#' r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) +#' r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +#' terra::ext(r2) <- terra::ext(spdf) +#' r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +#' cov_stack <- c(r, r2) +#' names(cov_stack) <- c('layer1', 'layer2') #' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' -#' r <- raster::raster(ncol=20, nrow=20) -#' r <- raster::setExtent(r, raster::extent(spdf)) -#' r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) -#' r2 <- raster::raster(ncol=20, nrow=20) -#' r2 <- raster::setExtent(r2, raster::extent(spdf)) -#' r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) -#' cov_rasters <- raster::stack(r, r2) -#' -#' cl <- parallel::makeCluster(2) -#' doParallel::registerDoParallel(cl) -#' test_data <- prepare_data(polygon_shapefile = spdf, -#' covariate_rasters = cov_rasters) -#' parallel::stopCluster(cl) -#' foreach::registerDoSEQ() +#' test_data <- prepare_data(polygon_shapefile = spdf, +#' covariate_rasters = cov_stack) #' #' result <- fit_model(test_data, iterations = 2) #' } @@ -264,32 +269,37 @@ disag_model <- function(data, #' #' @examples #' \dontrun{ -#' polygons <- list() -#' for(i in 1:100) { -#' row <- ceiling(i/10) -#' col <- ifelse(i %% 10 != 0, i %% 10, 10) +#' polygons <- list() +#' n_polygon_per_side <- 10 +#' n_polygons <- n_polygon_per_side * n_polygon_per_side +#' n_pixels_per_side <- n_polygon_per_side * 2 +#' +#' for(i in 1:n_polygons) { +#' row <- ceiling(i/n_polygon_per_side) +#' col <- ifelse(i %% n_polygon_per_side != 0, i %% n_polygon_per_side, n_polygon_per_side) #' xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row -#' polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) -#' } +#' polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), +#' c(ymax, ymax, ymin, ymin, ymax))) +#' } +#' +#' polys <- lapply(polygons,sf::st_polygon) +#' N <- floor(runif(n_polygons, min = 1, max = 100)) +#' response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) +#' +#' spdf <- sf::st_sf(response_df, geometry = polys) +#' +#' # Create raster stack +#' r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +#' terra::ext(r) <- terra::ext(spdf) +#' r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) +#' r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +#' terra::ext(r2) <- terra::ext(spdf) +#' r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +#' cov_stack <- c(r, r2) +#' names(cov_stack) <- c('layer1', 'layer2') #' -#' polys <- do.call(raster::spPolygons, polygons) -#' response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) -#' spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) -#' -#' r <- raster::raster(ncol=20, nrow=20) -#' r <- raster::setExtent(r, raster::extent(spdf)) -#' r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x %% 20 != 0, x %% 20, 20), 3)) -#' r2 <- raster::raster(ncol=20, nrow=20) -#' r2 <- raster::setExtent(r2, raster::extent(spdf)) -#' r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) -#' cov_rasters <- raster::stack(r, r2) -#' -#' cl <- parallel::makeCluster(2) -#' doParallel::registerDoParallel(cl) -#' test_data <- prepare_data(polygon_shapefile = spdf, -#' covariate_rasters = cov_rasters) -#' parallel::stopCluster(cl) -#' foreach::registerDoSEQ() +#' test_data <- prepare_data(polygon_shapefile = spdf, +#' covariate_rasters = cov_stack) #' #' result <- make_model_object(test_data) #' } diff --git a/man/build_mesh.Rd b/man/build_mesh.Rd index 5e17944..fb0df25 100644 --- a/man/build_mesh.Rd +++ b/man/build_mesh.Rd @@ -1,4 +1,3 @@ -<<<<<<< HEAD % Generated by roxygen2: do not edit by hand % Please edit documentation in R/build_mesh.R \name{build_mesh} @@ -8,7 +7,7 @@ build_mesh(shapes, mesh.args = NULL) } \arguments{ -\item{shapes}{shapefile covering the region under investigation.} +\item{shapes}{sf covering the region under investigation.} \item{mesh.args}{list of parameters that control the mesh structure. \emph{convex}, \emph{concave} and \emph{resolution}, to control the boundary of the inner mesh, and \emph{max.edge}, \emph{cut} and \emph{offset}, to control the mesh itself, @@ -34,20 +33,21 @@ pars <- list(convex = -0.01, concave = -0.5, resolution = 300, max.edge = c(3.0, } \examples{ \dontrun{ - polygons <- list() - for(i in 1:100) { - row <- ceiling(i/10) - col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) - xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } +polygons <- list() +for(i in 1:14) { + row <- ceiling(i/10) + col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) + xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row +polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) +} - polys <- do.call(raster::spPolygons, polygons) - response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) +polys <- lapply(polygons, sf::st_polygon) +response_df <- data.frame(area_id = 1:100, + response = runif(100, min = 0, max = 10)) +spdf <- sf::st_sf(polys, response_df) - my_mesh <- build_mesh(spdf) +my_mesh <- build_mesh(spdf) } } - diff --git a/man/fit_model.Rd b/man/fit_model.Rd index 6a6b2d3..f06c963 100644 --- a/man/fit_model.Rd +++ b/man/fit_model.Rd @@ -111,32 +111,37 @@ The \emph{silent} argument can be used to publish/suppress verbose output. Defau } \examples{ \dontrun{ - polygons <- list() - for(i in 1:100) { - row <- ceiling(i/10) - col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) +polygons <- list() +n_polygon_per_side <- 10 +n_polygons <- n_polygon_per_side * n_polygon_per_side +n_pixels_per_side <- n_polygon_per_side * 2 + +for(i in 1:n_polygons) { + row <- ceiling(i/n_polygon_per_side) + col <- ifelse(i \%\% n_polygon_per_side != 0, i \%\% n_polygon_per_side, n_polygon_per_side) xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) +} + +polys <- lapply(polygons,sf::st_polygon) +N <- floor(runif(n_polygons, min = 1, max = 100)) +response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) + +spdf <- sf::st_sf(response_df, geometry = polys) + +# Create raster stack +r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r) <- terra::ext(spdf) +r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x \%\% n_pixels_per_side != 0, x \%\% n_pixels_per_side, n_pixels_per_side), 3)) +r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r2) <- terra::ext(spdf) +r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +cov_stack <- c(r, r2) +names(cov_stack) <- c('layer1', 'layer2') - polys <- do.call(raster::spPolygons, polygons) - response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) - - r <- raster::raster(ncol=20, nrow=20) - r <- raster::setExtent(r, raster::extent(spdf)) - r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) - r2 <- raster::raster(ncol=20, nrow=20) - r2 <- raster::setExtent(r2, raster::extent(spdf)) - r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) - cov_rasters <- raster::stack(r, r2) - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_rasters) - parallel::stopCluster(cl) - foreach::registerDoSEQ() +test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_stack) result <- fit_model(test_data, iterations = 2) } diff --git a/man/make_model_object.Rd b/man/make_model_object.Rd index d855661..1f12a97 100644 --- a/man/make_model_object.Rd +++ b/man/make_model_object.Rd @@ -91,32 +91,37 @@ The \emph{silent} argument can be used to publish/supress verbose output. Defaul } \examples{ \dontrun{ - polygons <- list() - for(i in 1:100) { - row <- ceiling(i/10) - col <- ifelse(i \%\% 10 != 0, i \%\% 10, 10) +polygons <- list() +n_polygon_per_side <- 10 +n_polygons <- n_polygon_per_side * n_polygon_per_side +n_pixels_per_side <- n_polygon_per_side * 2 + +for(i in 1:n_polygons) { + row <- ceiling(i/n_polygon_per_side) + col <- ifelse(i \%\% n_polygon_per_side != 0, i \%\% n_polygon_per_side, n_polygon_per_side) xmin = 2*(col - 1); xmax = 2*col; ymin = 2*(row - 1); ymax = 2*row - polygons[[i]] <- rbind(c(xmin, ymax), c(xmax,ymax), c(xmax, ymin), c(xmin,ymin)) - } + polygons[[i]] <- list(cbind(c(xmin, xmax, xmax, xmin, xmin), + c(ymax, ymax, ymin, ymin, ymax))) +} + +polys <- lapply(polygons,sf::st_polygon) +N <- floor(runif(n_polygons, min = 1, max = 100)) +response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) + +spdf <- sf::st_sf(response_df, geometry = polys) + +# Create raster stack +r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r) <- terra::ext(spdf) +r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x \%\% n_pixels_per_side != 0, x \%\% n_pixels_per_side, n_pixels_per_side), 3)) +r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) +terra::ext(r2) <- terra::ext(spdf) +r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) +cov_stack <- c(r, r2) +names(cov_stack) <- c('layer1', 'layer2') - polys <- do.call(raster::spPolygons, polygons) - response_df <- data.frame(area_id = 1:100, response = runif(100, min = 0, max = 10)) - spdf <- sp::SpatialPolygonsDataFrame(polys, response_df) - - r <- raster::raster(ncol=20, nrow=20) - r <- raster::setExtent(r, raster::extent(spdf)) - r[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ifelse(x \%\% 20 != 0, x \%\% 20, 20), 3)) - r2 <- raster::raster(ncol=20, nrow=20) - r2 <- raster::setExtent(r2, raster::extent(spdf)) - r2[] <- sapply(1:raster::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) - cov_rasters <- raster::stack(r, r2) - - cl <- parallel::makeCluster(2) - doParallel::registerDoParallel(cl) - test_data <- prepare_data(polygon_shapefile = spdf, - covariate_rasters = cov_rasters) - parallel::stopCluster(cl) - foreach::registerDoSEQ() +test_data <- prepare_data(polygon_shapefile = spdf, + covariate_rasters = cov_stack) result <- make_model_object(test_data) } diff --git a/tests/testthat/helper_data.R b/tests/testthat/helper_data.R index 01f1f0c..b3aa456 100644 --- a/tests/testthat/helper_data.R +++ b/tests/testthat/helper_data.R @@ -11,7 +11,7 @@ for(i in 1:n_polygons) { c(ymax, ymax, ymin, ymin, ymax))) } -polys <- lapply(polygons,sf::st_polygon) +polys <- lapply(polygons, sf::st_polygon) N <- floor(runif(n_polygons, min = 1, max = 100)) response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) response_na_df <- data.frame(area_id = 1:n_polygons, response = c(runif(n_polygons - 1, min = 0, max = 1000), NA)) From 6ad196c926cc10eb467fcfe5e080dd526e4a1632 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Wed, 11 Oct 2023 10:42:10 +0100 Subject: [PATCH 30/55] partially migrate plotting (not functional yet) --- R/plotting.R | 4 ++-- tests/testthat/helper_data.R | 2 ++ tests/testthat/test-plotting.R | 2 -- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/R/plotting.R b/R/plotting.R index e87f645..2a0a568 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -21,7 +21,7 @@ plot.disag_data <- function(x, which = c(1,2,3), ...) { titles <- c() if(1 %in% which) { - plots$polygon <- plot_polygon_data(x$x, x$shapefile_names) + plots$polygon <- plot_polygon_data(x$polygon_shapefile, x$shapefile_names) titles <- c(titles, 'Polygon response data') } @@ -155,7 +155,7 @@ plot.disag_prediction <- function(x, ...) { plot_polygon_data <- function(x, names) { # Rename the response variable for plotting - shp <- sf::st_as_sf(x) + shp <- x shp <- dplyr::rename(shp, 'response' = names$response_var) shp <- dplyr::rename(shp, 'area_id' = names$id_var) diff --git a/tests/testthat/helper_data.R b/tests/testthat/helper_data.R index b3aa456..89deef8 100644 --- a/tests/testthat/helper_data.R +++ b/tests/testthat/helper_data.R @@ -16,10 +16,12 @@ N <- floor(runif(n_polygons, min = 1, max = 100)) response_df <- data.frame(area_id = 1:n_polygons, response = runif(n_polygons, min = 0, max = 1000)) response_na_df <- data.frame(area_id = 1:n_polygons, response = c(runif(n_polygons - 1, min = 0, max = 1000), NA)) response_binom_df <- data.frame(area_id = 1:n_polygons, response = N*runif(n_polygons, min = 0, max = 1), sample_size = N) +response_df2 <- data.frame(area_id = 1:n_polygons, n_positive = runif(n_polygons, min = 0, max = 1), sample_size = floor(runif(n_polygons, min = 1, max = 100))) spdf <- sf::st_sf(response_df, geometry = polys) spdf_na <- sf::st_sf(response_na_df, geometry = polys) spdf_binom <- sf::st_sf(response_binom_df, geometry = polys) +spdf2 <- sf::st_sf(response_df2, geometry = polys) # Create raster stack r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) diff --git a/tests/testthat/test-plotting.R b/tests/testthat/test-plotting.R index e4f32f6..cb55e51 100644 --- a/tests/testthat/test-plotting.R +++ b/tests/testthat/test-plotting.R @@ -43,7 +43,6 @@ test_that("Check plot.disag.data function works as expected", { test_that("Check plot.disag_model function works as expected", { - skip_if_not_installed('INLA') skip_on_cran() fit_result <- disag_model(test_data, iterations = 10) @@ -65,7 +64,6 @@ test_that("Check plot.disag_model function works as expected", { test_that("Check plot.disag_prediction function works as expected", { - skip_if_not_installed('INLA') skip_on_cran() From 793077b42641acd53de4805775f53110b7ad82e9 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Wed, 11 Oct 2023 12:17:10 +0100 Subject: [PATCH 31/55] migrate summary --- R/summary.R | 152 ++++++++++++++++++++++++++-------------------------- 1 file changed, 76 insertions(+), 76 deletions(-) diff --git a/R/summary.R b/R/summary.R index 87ce78c..2c34c07 100644 --- a/R/summary.R +++ b/R/summary.R @@ -1,28 +1,28 @@ #' Summary function for disaggregation fit result -#' +#' #' Function that summarises the result of the fit from the disaggregation model. -#' +#' #' Prints the negative log likelihood, model parameters and calculates metrics from in-sample performance. #' #' @param object Object returned from disag_model. #' @param ... Further arguments to \emph{summary} function. -#' +#' #' @return A list of the model parameters, negative log likelihood and metrics from in-sample performance. -#' +#' #' @method summary disag_model -#' +#' #' @export #' @importFrom stats cor quantile sd summary.disag_model <- function(object, ...) { - + pred <- obs <- NULL - + model_params <- summary(object$sd_out, select = 'fixed') - + report <- object$obj$report() nll <- report$nll - + # Form of the observed and predicted results depends on the likelihood function used if(object$model_setup$family == 'gaussian') { observed_data = report$polygon_response_data/report$reportnormalisation @@ -34,165 +34,165 @@ summary.disag_model <- function(object, ...) { observed_data = report$polygon_response_data predicted_data = report$reportprediction_cases } - + in_sample <- data.frame(obs = observed_data, pred = predicted_data) in_sample_reduced <- in_sample[!is.na(in_sample$pred), ] - metrics <- dplyr::summarise(in_sample_reduced, + metrics <- dplyr::summarise(in_sample_reduced, RMSE = sqrt(mean((pred - obs) ^ 2)), MAE = mean(abs(pred - obs)), pearson = cor(pred, obs, method = 'pearson'), spearman = cor(pred, obs, method = 'spearman'), log_pearson = cor(log1p(pred), log1p(obs), method = 'pearson')) - + cat(paste('Likelihood function:', object$model_setup$family, '\n')) cat(paste('Link function:', object$model_setup$link, '\n')) - + cat('Model parameters:\n') print(model_params) - + cat(paste0('\nModel convergence: ', object$opt$convergence, ' (', object$opt$message, ')')) - + cat(paste('\nNegative log likelihood: ', nll, '\n')) - + cat('\nIn sample performance:\n') print(metrics) - + summary <- list(model_params = model_params, nll = nll, metrics = metrics) - + return(invisible(summary)) - + } #' Print function for disaggregation fit result. -#' +#' #' Function that prints the result of the fit from the disaggregation model. -#' +#' #' Prints the negative log likelihood, model parameters and calculates metrics from in-sample performance. #' #' @param x Object returned from disag_model. #' @param ... Further arguments to \emph{print} function. -#' +#' #' @return NULL -#' +#' #' @method print disag_model -#' +#' #' @export #' @importFrom stats cor quantile sd print.disag_model <- function(x, ...){ - + model_params <- summary(x$sd_out, select = 'fixed') - + cat('Bayesian disaggregation model result\n') cat('\n') cat(paste('Likelihood function:', x$model_setup$family, '\n')) cat(paste('Link function:', x$model_setup$link, '\n')) - + cat('\nParameter values:\n') print(model_params[ , 1]) - + return(invisible(x)) } #' Summary function for disaggregation input data -#' +#' #' Function that summarizes the input data from the disaggregation model. -#' +#' #' Prints the number of polyons and pixels, the number of pixels in the largest and smallest polygons and summaries of the covariates. #' #' @param object Object returned from prepare_data. #' @param ... Further arguments to \emph{summary} function. -#' +#' #' @return A list of the number of polyons, the number of covariates and summaries of the covariates. -#' +#' #' @method summary disag_data -#' +#' #' @export summary.disag_data <- function(object, ...) { n_polygons <- nrow(object$polygon_shapefile) - n_covariates <- terra::nlyr(object$covariate_rasters) - + n_covariates <- as.integer(terra::nlyr(object$covariate_rasters)) + cat(paste("They data contains", n_polygons, "polygons and", nrow(object$covariate_data), "pixels\n")) - - cat(paste("The largest polygon contains", max(table(object$covariate_data[ , object$shapefile_names$id_var])), "pixels", + + cat(paste("The largest polygon contains", max(table(object$covariate_data[ , object$shapefile_names$id_var])), "pixels", "and the smallest polygon contains", min(table(object$covariate_data[ , object$shapefile_names$id_var])), "pixels\n")) - + cat(paste("There are", n_covariates, "covariates\n")) - + covariate_summary <- summary(object$covariate_data[ , names(object$covariate_rasters)]) - + cat("\nCovariate summary:\n") print(covariate_summary) - + summary <- list(number_polygons = n_polygons, number_covariates = n_covariates, covariate_summary = covariate_summary) - + return(invisible(summary)) - + } #' Print function for disaggregation input data -#' +#' #' Function that prints the input data from the disaggregation model. -#' +#' #' Prints the number of polyons and pixels, the number of pixels in the largest and smallest polygons and summaries of the covariates. #' #' @param x Object returned from prepare_data. #' @param ... Further arguments to \emph{print} function. -#' +#' #' @return NULL -#' +#' #' @method print disag_data -#' +#' #' @export print.disag_data <- function(x, ...){ - + n_polygons <- nrow(x$polygon_shapefile) n_covariates <- terra::nlyr(x$covariate_rasters) - + cat(paste("They data contains", n_polygons, "polygons and", nrow(x$covariate_data), "pixels\n")) - - cat(paste("The largest polygon contains", max(table(x$covariate_data[ , x$shapefile_names$id_var])), "pixels", + + cat(paste("The largest polygon contains", max(table(x$covariate_data[ , x$shapefile_names$id_var])), "pixels", "and the smallest polygon contains", min(table(x$covariate_data[ , x$shapefile_names$id_var])), "pixels\n")) - + cat(paste("There are", n_covariates, "covariates\n")) - + return(invisible(x)) } #' Summary function for disaggregation prediction -#' +#' #' Function that summarizes the prediction from the disaggregation model. -#' +#' #' Prints the number of polyons and pixels, the number of pixels in the largest and smallest polygons and summaries of the covariates. #' #' @param object Object returned from predict.disag_model #' @param ... Further arguments to \emph{summary} function. -#' +#' #' @return A list of the number of polyons, the number of covariates and summaries of the covariates. -#' +#' #' @method summary disag_prediction -#' +#' #' @export summary.disag_prediction <- function(object, ...) { - - number_realisations <- terra::nlyr(object$uncertainty_prediction$realisations) - max_mean <- max(object$mean_prediction$prediction@data@values) - min_mean <- min(object$mean_prediction$prediction@data@values) - max_iqr <- max((object$uncertainty_prediction$predictions_ci[[2]] - object$uncertainty_prediction$predictions_ci[[1]])@data@values) - min_iqr <- min((object$uncertainty_prediction$predictions_ci[[2]] - object$uncertainty_prediction$predictions_ci[[1]])@data@values) - + + number_realisations <- as.integer(terra::nlyr(object$uncertainty_prediction$realisations)) + max_mean <- max(terra::values(object$mean_prediction$prediction)) + min_mean <- min(terra::values(object$mean_prediction$prediction)) + max_iqr <- max((terra::values(object$uncertainty_prediction$predictions_ci[[2]]) - terra::values(object$uncertainty_prediction$predictions_ci[[1]]))) + min_iqr <- min((terra::values(object$uncertainty_prediction$predictions_ci[[2]]) - terra::values(object$uncertainty_prediction$predictions_ci[[1]]))) + cat('Predction from disaggregation model\n') cat('\n') cat('Components of the model: ') @@ -204,33 +204,33 @@ summary.disag_prediction <- function(object, ...) { cat('\n') cat(paste('The mean predicted values range from', signif(min_mean, 3), 'to', signif(max_mean, 3), '\n')) cat(paste('The predicted IQR takes values from', signif(min_iqr, 3), 'to', signif(max_iqr, 3), '\n')) - + summary <- list(number_realisations = number_realisations, range_mean_values = c(min_mean, max_mean), range_iqr_values = c(min_iqr, max_iqr)) - + return(invisible(summary)) - + } #' Print function for disaggregation prediction -#' +#' #' Function that prints the prediction from the disaggregation model. -#' +#' #' Prints the number of polyons and pixels, the number of pixels in the largest and smallest polygons and summaries of the covariates. #' #' @param x Object returned from predict.disag_model. #' @param ... Further arguments to \emph{print} function. -#' +#' #' @return NULL -#' +#' #' @method print disag_prediction -#' +#' #' @export print.disag_prediction <- function(x, ...){ - + cat('Predction from disaggregation model\n') cat('\n') cat('Components of the model: ') @@ -241,4 +241,4 @@ print.disag_prediction <- function(x, ...){ cat(paste0('There are ', terra::nlyr(x$uncertainty_prediction$realisations), ' uncertainty realisations')) return(invisible(x)) -} \ No newline at end of file +} From 268e7e0a287ebed2c9b8d2cdc7bb04abd52fd25b Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Fri, 13 Oct 2023 14:40:55 +0100 Subject: [PATCH 32/55] update prepare docs --- R/prepare_data.R | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/R/prepare_data.R b/R/prepare_data.R index 96e132c..f849754 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -27,11 +27,11 @@ #' and sets covariate NAs pixels to the median value for the that covariate. #' #' @param x sf object containing at least three columns: one with the geometried, one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}). -#' @param covariate_rasters RasterStack of covariate rasters to be used in the model. -#' @param aggregation_raster Raster to aggregate pixel level predictions to polygon level e.g. population to aggregate prevalence. If this is not supplied a uniform raster will be used. -#' @param id_var Name of column in SpatialPolygonDataFrame object with the polygon id. -#' @param response_var Name of column in SpatialPolygonDataFrame object with the response data. -#' @param sample_size_var For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data. +#' @param covariate_rasters SpatRaster of covariate rasters to be used in the model. +#' @param aggregation_raster SpatRaster to aggregate pixel level predictions to polygon level e.g. population to aggregate prevalence. If this is not supplied a uniform raster will be used. +#' @param id_var Name of column in sf object with the polygon id. +#' @param response_var Name of column in sf object with the response data. +#' @param sample_size_var For survey data, name of column in sf object (if it exists) with the sample size data. #' @param mesh.args list of parameters that control the mesh structure with the same names as used by INLA. #' @param na.action logical. If TRUE, NAs in response will be removed, covariate NAs will be given the median value, aggregation NAs will be set to zero. Default FALSE (NAs in response or covariate data within the polygons will give errors). #' @param makeMesh logical. If TRUE, build INLA mesh, takes some time. Default TRUE. @@ -77,7 +77,7 @@ #' r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) #' cov_rasters <- c(r, r2) #' -#' test_data <- prepare_data(x = spdf, +#' test_data <- prepare_data(polygon_shapefile = spdf, #' covariate_rasters = cov_rasters) #' } #' From 3d2cc960f656e1f05bc9dcae86667121a5310b83 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Fri, 13 Oct 2023 16:17:16 +0100 Subject: [PATCH 33/55] fix id_var handling in prepare_data --- R/prepare_data.R | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/R/prepare_data.R b/R/prepare_data.R index f849754..cf5afcd 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -129,7 +129,12 @@ prepare_data <- function(polygon_shapefile, covariate_rasters <- c(covariate_rasters, aggregation_raster) covariate_data <- terra::extract(covariate_rasters, polygon_shapefile, cells=TRUE, na.rm=TRUE, ID=TRUE) - names(covariate_data)[1] <- id_var + #merge to transfer area_id and then tidy up + polygon_data$area_n <- 1:nrow(polygon_data) + covariate_data <- merge(covariate_data, polygon_data, by.x = "ID", by.y = "area_n") + covariate_data <- covariate_data[ , !(names(covariate_data) %in% c("ID", "cell", "response", "N"))] + colnames(covariate_data )[colnames(covariate_data ) == "area_id"] <- id_var + polygon_data <- polygon_data[ , !(names(polygon_data) %in% c("area_n"))] # Remove the aggregation raster covariate_rasters <- covariate_rasters[[seq(terra::nlyr(covariate_rasters) - 1)]] From e6a057640088e6633d43f5bbeca34c84cb951dc8 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Mon, 16 Oct 2023 17:06:04 +0100 Subject: [PATCH 34/55] switch plotting to use tidyterra and update some docs --- DESCRIPTION | 1 + R/plotting.R | 4 ++-- R/predict.R | 2 +- man/predict_model.Rd | 2 +- man/prepare_data.Rd | 12 ++++++------ tests/testthat/test-matching.R | 2 +- tests/testthat/test-plotting.R | 3 +-- 7 files changed, 13 insertions(+), 13 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 21731a5..c24d931 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -28,6 +28,7 @@ Imports: cowplot, sparseMVN, fmesher, + tidyterra, terra, sf, utils diff --git a/R/plotting.R b/R/plotting.R index 2a0a568..8603e75 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -27,7 +27,7 @@ plot.disag_data <- function(x, which = c(1,2,3), ...) { if(2 %in% which) { stopifnot(inherits(x$covariate_rasters, c('SpatRaster'))) - plots$covariates <- plot(x$covariate_rasters) + plots$covariates <- ggplot2::ggplot() + tidyterra::geom_spatraster(data=x$covariate_rasters) + ggplot2::facet_wrap(~lyr) + tidyterra::scale_fill_terrain_c() titles <- c(titles, 'Covariate rasters') } @@ -135,7 +135,7 @@ plot.disag_prediction <- function(x, ...) { rasters_to_plot <- terra::rast(list(x$mean_prediction$prediction, x$uncertainty_prediction$predictions_ci)) names(rasters_to_plot) <- c('mean prediction', 'lower CI', 'upper CI') - plots <- plot(rasters_to_plot) + plots <- ggplot2::ggplot() + tidyterra::geom_spatraster(data=rasters_to_plot) + ggplot2::facet_wrap(~lyr) + tidyterra::scale_fill_terrain_c() print(plots) diff --git a/R/predict.R b/R/predict.R index f27bf2b..250cb7a 100644 --- a/R/predict.R +++ b/R/predict.R @@ -68,7 +68,7 @@ predict.disag_model <- function(object, newdata = NULL, predict_iid = FALSE, N = #' to the linear predictor. #' #' To predict over a different spatial extent to that used in the model, -#' a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +#' a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. #' If this is not given predictions are made over the data used in the fit. #' #' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. diff --git a/man/predict_model.Rd b/man/predict_model.Rd index 3fbbc26..3e0387a 100644 --- a/man/predict_model.Rd +++ b/man/predict_model.Rd @@ -32,7 +32,7 @@ Function returns rasters of the mean predictions as well as the covariate and f to the linear predictor. To predict over a different spatial extent to that used in the model, -a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. If this is not given predictions are made over the data used in the fit. The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. diff --git a/man/prepare_data.Rd b/man/prepare_data.Rd index 83792d2..0a8110b 100644 --- a/man/prepare_data.Rd +++ b/man/prepare_data.Rd @@ -18,15 +18,15 @@ prepare_data( ) } \arguments{ -\item{covariate_rasters}{RasterStack of covariate rasters to be used in the model.} +\item{covariate_rasters}{SpatRaster of covariate rasters to be used in the model.} -\item{aggregation_raster}{Raster to aggregate pixel level predictions to polygon level e.g. population to aggregate prevalence. If this is not supplied a uniform raster will be used.} +\item{aggregation_raster}{SpatRaster to aggregate pixel level predictions to polygon level e.g. population to aggregate prevalence. If this is not supplied a uniform raster will be used.} -\item{id_var}{Name of column in SpatialPolygonDataFrame object with the polygon id.} +\item{id_var}{Name of column in sf object with the polygon id.} -\item{response_var}{Name of column in SpatialPolygonDataFrame object with the response data.} +\item{response_var}{Name of column in sf object with the response data.} -\item{sample_size_var}{For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data.} +\item{sample_size_var}{For survey data, name of column in sf object (if it exists) with the sample size data.} \item{mesh.args}{list of parameters that control the mesh structure with the same names as used by INLA.} @@ -104,7 +104,7 @@ terra::ext(r2) <- terra::ext(spdf) r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/10), 3)) cov_rasters <- c(r, r2) -test_data <- prepare_data(x = spdf, +test_data <- prepare_data(polygon_shapefile = spdf, covariate_rasters = cov_rasters) } diff --git a/tests/testthat/test-matching.R b/tests/testthat/test-matching.R index e22d433..42b152a 100644 --- a/tests/testthat/test-matching.R +++ b/tests/testthat/test-matching.R @@ -17,7 +17,7 @@ test_that("Getting start and end index returns the right object", { result <- getStartendindex(covs, response, 'id') save(result, file = paste0(tempdir(), '/test_startendindex.RData')) - + expect_is(result, "matrix") expect_equal(nrow(result), nrow(response)) expect_equal(ncol(result), 2) diff --git a/tests/testthat/test-plotting.R b/tests/testthat/test-plotting.R index cb55e51..306317c 100644 --- a/tests/testthat/test-plotting.R +++ b/tests/testthat/test-plotting.R @@ -66,7 +66,6 @@ test_that("Check plot.disag_prediction function works as expected", { skip_on_cran() - fit_result <- disag_model(test_data, iterations = 1000, iid = TRUE, field = TRUE, @@ -85,7 +84,7 @@ test_that("Check plot.disag_prediction function works as expected", { pred <- predict(fit_result) p <- plot(pred) - expect_is(p, 'trellis') + expect_is(p, 'gg') }) From b1874a554518433dfdcccae4c229d34b8099377b Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Mon, 16 Oct 2023 17:19:07 +0100 Subject: [PATCH 35/55] update docs after migration --- R/build_mesh.R | 2 +- R/extract.R | 15 ++++++--------- R/plotting.R | 6 +++--- R/predict.R | 14 +++++++------- R/prepare_data.R | 10 +++++----- man/as.disag_data.Rd | 10 +++++----- man/build_mesh.Rd | 2 +- man/getCovariateRasters.Rd | 2 +- man/getPolygonData.Rd | 8 ++++---- man/predict.disag_model.Rd | 6 +++--- man/predict_uncertainty.Rd | 8 ++++---- man/prepare_data.Rd | 2 +- 12 files changed, 41 insertions(+), 44 deletions(-) diff --git a/R/build_mesh.R b/R/build_mesh.R index 1a14713..b005a55 100644 --- a/R/build_mesh.R +++ b/R/build_mesh.R @@ -1,6 +1,6 @@ #' Build mesh for disaggregaton model #' -#' \emph{build_mesh} function takes a SpatialPolygons object and mesh arguments to build an appropriate mesh for the spatial field. +#' \emph{build_mesh} function takes a sf object and mesh arguments to build an appropriate mesh for the spatial field. #' #' The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary #' and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest diff --git a/R/extract.R b/R/extract.R index 6bf26b4..dd7fd75 100644 --- a/R/extract.R +++ b/R/extract.R @@ -1,15 +1,15 @@ -#' Extract polygon id and response data into a data.frame from a SpatialPolygonsDataFrame +#' Extract polygon id and response data into a data.frame from a sf object #' -#' Returns a data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the +#' Returns a data.frame with a row for each polygon in the sf object and columns: area_id, response and N, containing the id of the #' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does #' not exist), this column will contain NAs. #' #' @param shape A sf object containing response data. #' @param id_var Name of column in shape object with the polygon id. Default 'area_id'. #' @param response_var Name of column in shape object with the response data. Default 'response'. -#' @param sample_size_var For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data. Default NULL. +#' @param sample_size_var For survey data, name of column in sf object (if it exists) with the sample size data. Default NULL. #' -#' @return A data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the +#' @return A data.frame with a row for each polygon in the sf object and columns: area_id, response and N, containing the id of the #' polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does #' not exist), this column will contain NAs. #' @@ -48,7 +48,7 @@ getPolygonData <- function(shape, id_var = 'area_id', response_var = 'response', } -#' Get a RasterStack of covariates from a folder containing .tif files +#' Get a SpatRaster of covariates from a folder containing .tif files #' #' Looks in a specified folder for raster files. Returns a multi-layered SpatRaster of the rasters cropped to the extent specified by the shape parameter. #' @@ -83,7 +83,7 @@ getCovariateRasters <- function(directory, file_pattern = '.tif$', shape) { # Extract coordinates from raster to use constructing the INLA mesh # -# @param cov_rasters RasterStack of the covariate rasters. +# @param cov_rasters SpatRaster of the covariate rasters. # @param selectIds numeric vector containing cell ids to retain. Default NULL retains all cell ids in the covariate rasters. # # @return A matrix containing the coordinates used to make the mesh @@ -106,6 +106,3 @@ extractCoordsForMesh <- function(cov_rasters, selectIds = NULL) { return(coords) } - - - diff --git a/R/plotting.R b/R/plotting.R index 8603e75..f9b9a2b 100644 --- a/R/plotting.R +++ b/R/plotting.R @@ -143,7 +143,7 @@ plot.disag_prediction <- function(x, ...) { } -# Plot polygon data from SpatialPolygonDataFrame +# Plot polygon data from sf object # # @param x Object to be plotted # @param names list of 2 names: polygon id variable and response variable names @@ -152,10 +152,10 @@ plot.disag_prediction <- function(x, ...) { # # @name plot_polygon_data -plot_polygon_data <- function(x, names) { +plot_polygon_data <- function(polygon_shapefile, names) { # Rename the response variable for plotting - shp <- x + shp <- polygon_shapefile shp <- dplyr::rename(shp, 'response' = names$response_var) shp <- dplyr::rename(shp, 'area_id' = names$id_var) diff --git a/R/predict.R b/R/predict.R index 250cb7a..9f6713c 100644 --- a/R/predict.R +++ b/R/predict.R @@ -4,7 +4,7 @@ #' predicts mean and uncertainty maps. #' #' To predict over a different spatial extent to that used in the model, -#' a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +#' a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. #' If this is not given predictions are made over the data used in the fit. #' #' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. @@ -30,8 +30,8 @@ #' }} #' \item{uncertainty_prediction: }{List of: #' \itemize{ -#' \item \emph{realisations} RasterStack of realisations of predictions. Number of realisations defined by argument \emph{N}. -#' \item \emph{predictions_ci} RasterStack of the upper and lower credible intervals. Defined by argument \emph{CI}. +#' \item \emph{realisations} SpatRaster of realisations of predictions. Number of realisations defined by argument \emph{N}. +#' \item \emph{predictions_ci} SpatRaster of the upper and lower credible intervals. Defined by argument \emph{CI}. #' }} #' #' @@ -115,10 +115,10 @@ predict_model <- function(model_output, newdata = NULL, predict_iid = FALSE) { #' \emph{predict_uncertainty} function takes a \emph{disag_model} object created by #' \emph{disaggregation::disag_model} and predicts upper and lower credible interval maps. #' -#' Function returns a RasterStack of the realisations as well as the upper and lower credible interval rasters. +#' Function returns a SpatRaster of the realisations as well as the upper and lower credible interval rasters. #' #' To predict over a different spatial extent to that used in the model, -#' a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +#' a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. #' If this is not given predictions are made over the data used in the fit. #' #' The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. @@ -135,8 +135,8 @@ predict_model <- function(model_output, newdata = NULL, predict_iid = FALSE) { #' #' @return The uncertainty prediction, which is a list of: #' \itemize{ -#' \item \emph{realisations} RasterStack of realisations of predictions. Number of realisations defined by argument \emph{N}. -#' \item \emph{predictions_ci} RasterStack of the upper and lower credible intervals. Defined by argument \emph{CI}. +#' \item \emph{realisations} SpatRaster of realisations of predictions. Number of realisations defined by argument \emph{N}. +#' \item \emph{predictions_ci} SpatRaster of the upper and lower credible intervals. Defined by argument \emph{CI}. #' } #' #' @name predict_uncertainty diff --git a/R/prepare_data.R b/R/prepare_data.R index cf5afcd..c653260 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -3,7 +3,7 @@ #' \emph{prepare_data} function is used to extract all the data required for fitting a disaggregation model. #' Designed to be used in the \emph{disaggregation::fit_model} function. #' -#' Takes a SpatialPolygonDataFrame with the response data and a RasterStack of covariates. +#' Takes a sf object with the response data and a SpatRaster of covariates. #' #' Extract the values of the covariates (as well as the aggregation raster, if given) at each pixel within the polygons #' (\emph{parallelExtract} function). This is done in parallel and \emph{n.cores} argument is used to set the number of cores @@ -201,9 +201,9 @@ prepare_data <- function(polygon_shapefile, #' Function to fit the disaggregation model #' -#' @param x SpatialPolygonDataFrame containing the response data +#' @param polygon_shapefile sf object containing the response data #' @param shapefile_names List of 2: polygon id variable name and response variable name from x -#' @param covariate_rasters RasterStack of covariates +#' @param covariate_rasters SpatRaster of covariates #' @param polygon_data data.frame with two columns: polygon id and response #' @param covariate_data data.frame with cell id, polygon id and covariate columns #' @param aggregation_pixels vector with value of aggregation raster at each pixel @@ -215,8 +215,8 @@ prepare_data <- function(polygon_shapefile, #' @return A list is returned of class \code{disag_data}. #' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. #' The list of class \code{disag_data} contains: -#' \item{x }{The SpatialPolygonDataFrame used as an input.} -#' \item{covariate_rasters }{The RasterStack used as an input.} +#' \item{x }{The sf object used as an input.} +#' \item{covariate_rasters }{The SpatRaster used as an input.} #' \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} #' \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} #' \item{aggregation_pixels }{An array with the value of the aggregation raster for each pixel in the same order as the rows of \emph{covariate_data}.} diff --git a/man/as.disag_data.Rd b/man/as.disag_data.Rd index c8e92f9..803d050 100644 --- a/man/as.disag_data.Rd +++ b/man/as.disag_data.Rd @@ -18,9 +18,11 @@ as.disag_data( ) } \arguments{ +\item{polygon_shapefile}{sf object containing the response data} + \item{shapefile_names}{List of 2: polygon id variable name and response variable name from x} -\item{covariate_rasters}{RasterStack of covariates} +\item{covariate_rasters}{SpatRaster of covariates} \item{polygon_data}{data.frame with two columns: polygon id and response} @@ -35,15 +37,13 @@ as.disag_data( \item{startendindex}{matrix containing the start and end index for each polygon} \item{mesh}{inla.mesh object to use in the fit} - -\item{x}{SpatialPolygonDataFrame containing the response data} } \value{ A list is returned of class \code{disag_data}. The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. The list of class \code{disag_data} contains: - \item{x }{The SpatialPolygonDataFrame used as an input.} - \item{covariate_rasters }{The RasterStack used as an input.} + \item{x }{The sf object used as an input.} + \item{covariate_rasters }{The SpatRaster used as an input.} \item{polygon_data }{A data frame with columns of \emph{area_id}, \emph{response} and \emph{N} (sample size: all NAs unless using binomial data). Each row represents a polygon.} \item{covariate_data }{A data frame with columns of \emph{area_id}, \emph{cell_id} and one for each covariate in \emph{covariate_rasters}. Each row represents a pixel in a polygon.} \item{aggregation_pixels }{An array with the value of the aggregation raster for each pixel in the same order as the rows of \emph{covariate_data}.} diff --git a/man/build_mesh.Rd b/man/build_mesh.Rd index fb0df25..77561ca 100644 --- a/man/build_mesh.Rd +++ b/man/build_mesh.Rd @@ -17,7 +17,7 @@ with the parameters having the same meaning as in the INLA functions \emph{inla. An inla.mesh object } \description{ -\emph{build_mesh} function takes a SpatialPolygons object and mesh arguments to build an appropriate mesh for the spatial field. +\emph{build_mesh} function takes a sf object and mesh arguments to build an appropriate mesh for the spatial field. } \details{ The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary diff --git a/man/getCovariateRasters.Rd b/man/getCovariateRasters.Rd index fcac9b0..c39619b 100644 --- a/man/getCovariateRasters.Rd +++ b/man/getCovariateRasters.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/extract.R \name{getCovariateRasters} \alias{getCovariateRasters} -\title{Get a RasterStack of covariates from a folder containing .tif files} +\title{Get a SpatRaster of covariates from a folder containing .tif files} \usage{ getCovariateRasters(directory, file_pattern = ".tif$", shape) } diff --git a/man/getPolygonData.Rd b/man/getPolygonData.Rd index e6ec032..27759e6 100644 --- a/man/getPolygonData.Rd +++ b/man/getPolygonData.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/extract.R \name{getPolygonData} \alias{getPolygonData} -\title{Extract polygon id and response data into a data.frame from a SpatialPolygonsDataFrame} +\title{Extract polygon id and response data into a data.frame from a sf object} \usage{ getPolygonData( shape, @@ -18,15 +18,15 @@ getPolygonData( \item{response_var}{Name of column in shape object with the response data. Default 'response'.} -\item{sample_size_var}{For survey data, name of column in SpatialPolygonDataFrame object (if it exists) with the sample size data. Default NULL.} +\item{sample_size_var}{For survey data, name of column in sf object (if it exists) with the sample size data. Default NULL.} } \value{ -A data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the +A data.frame with a row for each polygon in the sf object and columns: area_id, response and N, containing the id of the polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does not exist), this column will contain NAs. } \description{ -Returns a data.frame with a row for each polygon in the SpatialPolygonDataFrame and columns: area_id, response and N, containing the id of the +Returns a data.frame with a row for each polygon in the sf object and columns: area_id, response and N, containing the id of the polygon, the values of the response for that polygon, and the sample size respectively. If the data is not survey data (the sample size does not exist), this column will contain NAs. } diff --git a/man/predict.disag_model.Rd b/man/predict.disag_model.Rd index edea4ac..f85ed7a 100644 --- a/man/predict.disag_model.Rd +++ b/man/predict.disag_model.Rd @@ -31,8 +31,8 @@ An object of class \emph{disag_prediction} which consists of a list of two objec }} \item{uncertainty_prediction: }{List of: \itemize{ - \item \emph{realisations} RasterStack of realisations of predictions. Number of realisations defined by argument \emph{N}. - \item \emph{predictions_ci} RasterStack of the upper and lower credible intervals. Defined by argument \emph{CI}. + \item \emph{realisations} SpatRaster of realisations of predictions. Number of realisations defined by argument \emph{N}. + \item \emph{predictions_ci} SpatRaster of the upper and lower credible intervals. Defined by argument \emph{CI}. }} } \description{ @@ -41,7 +41,7 @@ predicts mean and uncertainty maps. } \details{ To predict over a different spatial extent to that used in the model, -a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. If this is not given predictions are made over the data used in the fit. The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. diff --git a/man/predict_uncertainty.Rd b/man/predict_uncertainty.Rd index 5297230..ccf361d 100644 --- a/man/predict_uncertainty.Rd +++ b/man/predict_uncertainty.Rd @@ -27,8 +27,8 @@ If this is a raster stack or brick, predictions will be made over this data. Def \value{ The uncertainty prediction, which is a list of: \itemize{ - \item \emph{realisations} RasterStack of realisations of predictions. Number of realisations defined by argument \emph{N}. - \item \emph{predictions_ci} RasterStack of the upper and lower credible intervals. Defined by argument \emph{CI}. + \item \emph{realisations} SpatRaster of realisations of predictions. Number of realisations defined by argument \emph{N}. + \item \emph{predictions_ci} SpatRaster of the upper and lower credible intervals. Defined by argument \emph{CI}. } } \description{ @@ -36,10 +36,10 @@ The uncertainty prediction, which is a list of: \emph{disaggregation::disag_model} and predicts upper and lower credible interval maps. } \details{ -Function returns a RasterStack of the realisations as well as the upper and lower credible interval rasters. +Function returns a SpatRaster of the realisations as well as the upper and lower credible interval rasters. To predict over a different spatial extent to that used in the model, -a RasterStack covering the region to make predictions over is passed to the argument \emph{newdata}. +a SpatRaster covering the region to make predictions over is passed to the argument \emph{newdata}. If this is not given predictions are made over the data used in the fit. The \emph{predict_iid} logical flag should be set to TRUE if the results of the iid effect from the model are to be used in the prediction. diff --git a/man/prepare_data.Rd b/man/prepare_data.Rd index 0a8110b..c310421 100644 --- a/man/prepare_data.Rd +++ b/man/prepare_data.Rd @@ -57,7 +57,7 @@ The list of class \code{disag_data} contains: Designed to be used in the \emph{disaggregation::fit_model} function. } \details{ -Takes a SpatialPolygonDataFrame with the response data and a RasterStack of covariates. +Takes a sf object with the response data and a SpatRaster of covariates. Extract the values of the covariates (as well as the aggregation raster, if given) at each pixel within the polygons (\emph{parallelExtract} function). This is done in parallel and \emph{n.cores} argument is used to set the number of cores From 225c86681a33476a35ac4f4cc842da8bfbaa3bb4 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Mon, 23 Oct 2023 16:40:55 +0100 Subject: [PATCH 36/55] Update prepare_data.R more robust removal of aggregation data --- R/prepare_data.R | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/R/prepare_data.R b/R/prepare_data.R index c653260..2466398 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -128,7 +128,7 @@ prepare_data <- function(polygon_shapefile, covariate_rasters <- c(covariate_rasters, aggregation_raster) - covariate_data <- terra::extract(covariate_rasters, polygon_shapefile, cells=TRUE, na.rm=TRUE, ID=TRUE) + covariate_data <- terra::extract(covariate_rasters, terra::vect(polygon_shapefile), cells=TRUE, na.rm=TRUE, ID=TRUE) #merge to transfer area_id and then tidy up polygon_data$area_n <- 1:nrow(polygon_data) covariate_data <- merge(covariate_data, polygon_data, by.x = "ID", by.y = "area_n") @@ -137,8 +137,8 @@ prepare_data <- function(polygon_shapefile, polygon_data <- polygon_data[ , !(names(polygon_data) %in% c("area_n"))] # Remove the aggregation raster - covariate_rasters <- covariate_rasters[[seq(terra::nlyr(covariate_rasters) - 1)]] - + cov_filter <- !(names(covariate_data) %in% c('aggregation_raster')) + covariate_rasters <- covariate_rasters[[cov_filter]] names(covariate_rasters) <- cov_names agg_filter <- names(covariate_data) %in% c('aggregation_raster') From 5ff89dc38823e9fab055d58464e3f9e3aa23c053 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Mon, 23 Oct 2023 16:52:15 +0100 Subject: [PATCH 37/55] Update fit_model.R fix cov_matrix column selection bug --- R/fit_model.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/fit_model.R b/R/fit_model.R index 5fc7966..b6fefd6 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -358,7 +358,7 @@ make_model_object <- function(data, Apix <- INLA::inla.mesh.project(data$mesh, loc = data$coordsForFit)$A n_s <- nrow(spde$M0) - cov_matrix <- as.matrix(data$covariate_data[, -c(1:2)]) + cov_matrix <- as.matrix(data$covariate_data[, -c("area_id")]) # If we have exactly one column we don't have to transpose. Sure this # this could be cleaner but I don't know how. if(ncol(cov_matrix) == 1){ From 1db8d22b00e4914b61e6000d3c626afc4e9cc012 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Mon, 23 Oct 2023 16:58:32 +0100 Subject: [PATCH 38/55] migrate INLA::inla.mesh.project to fmesher::fm_evaluate --- R/fit_model.R | 3 ++- R/predict.R | 4 ++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/R/fit_model.R b/R/fit_model.R index b6fefd6..7e957b7 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -355,7 +355,8 @@ make_model_object <- function(data, nu = 1 # Sort out mesh bits spde <- (INLA::inla.spde2.matern(data$mesh, alpha = nu + 1)$param.inla)[c("M0", "M1", "M2")] - Apix <- INLA::inla.mesh.project(data$mesh, loc = data$coordsForFit)$A + # Apix <- INLA::inla.mesh.project(data$mesh, loc = data$coordsForFit)$A + Apix <- fmesher::fm_evaluate(data$mesh, loc = data$coordsForFit)$A n_s <- nrow(spde$M0) cov_matrix <- as.matrix(data$covariate_data[, -c("area_id")]) diff --git a/R/predict.R b/R/predict.R index 9f6713c..6880874 100644 --- a/R/predict.R +++ b/R/predict.R @@ -231,8 +231,8 @@ getAmatrix <- function(mesh, coords) { spde <- (INLA::inla.spde2.matern(mesh, alpha = 2)$param.inla)[c("M0", "M1", "M2")] n_s <- nrow(spde$M0) - Amatrix <- INLA::inla.mesh.project(mesh, loc = as.matrix(coords))$A - + # Amatrix <- INLA::inla.mesh.project(mesh, loc = as.matrix(coords))$A + Amatrix <- fmesher::fm_evaluate(mesh, loc = as.matrix(coords))$A return(Amatrix) } From 9d66d11aeac0e7cdf242d66106b6a06796e6d1ac Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Mon, 23 Oct 2023 17:11:11 +0100 Subject: [PATCH 39/55] remove getamatrix function --- R/predict.R | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/R/predict.R b/R/predict.R index 6880874..0c2eaa8 100644 --- a/R/predict.R +++ b/R/predict.R @@ -217,25 +217,6 @@ getCoords <- function(data) { return(coords) } -# Get Amatrix for field -# -# @param mesh mesh used in the model fitting -# @param coords coordinates extracted from raster -# -# @return An Amatrix object for the field -# -# @name getAmatrix - -getAmatrix <- function(mesh, coords) { - - spde <- (INLA::inla.spde2.matern(mesh, alpha = 2)$param.inla)[c("M0", "M1", "M2")] - n_s <- nrow(spde$M0) - - # Amatrix <- INLA::inla.mesh.project(mesh, loc = as.matrix(coords))$A - Amatrix <- fmesher::fm_evaluate(mesh, loc = as.matrix(coords))$A - return(Amatrix) -} - # Helper to check and sort out new raster data. check_newdata <- function(newdata, model_output){ if(is.null(newdata)) return(NULL) @@ -280,7 +261,7 @@ setup_objects <- function(model_output, newdata = NULL, predict_iid = FALSE) { } else { coords <- getCoords(data) } - Amatrix <- getAmatrix(data$mesh, coords) + Amatrix <- fmesher::fm_evaluate(data$mesh, loc = as.matrix(coords))$A field_objects <- list(coords = coords, Amatrix = Amatrix) } else { field_objects <- NULL From ad953faab683fd56bdc34df7420c07fe804c279f Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 24 Oct 2023 10:17:18 +0100 Subject: [PATCH 40/55] tweaks to yesterdays changes --- R/fit_model.R | 5 ++--- R/predict.R | 2 +- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/R/fit_model.R b/R/fit_model.R index 7e957b7..47dacab 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -355,11 +355,10 @@ make_model_object <- function(data, nu = 1 # Sort out mesh bits spde <- (INLA::inla.spde2.matern(data$mesh, alpha = nu + 1)$param.inla)[c("M0", "M1", "M2")] - # Apix <- INLA::inla.mesh.project(data$mesh, loc = data$coordsForFit)$A - Apix <- fmesher::fm_evaluate(data$mesh, loc = data$coordsForFit)$A + Apix <- fmesher::fm_evaluator(data$mesh, loc = data$coordsForFit)$proj$A n_s <- nrow(spde$M0) - cov_matrix <- as.matrix(data$covariate_data[, -c("area_id")]) + cov_matrix <- as.matrix(data$covariate_data[, (names(data$covariate_data) %in% names(data$covariate_rasters))]) # If we have exactly one column we don't have to transpose. Sure this # this could be cleaner but I don't know how. if(ncol(cov_matrix) == 1){ diff --git a/R/predict.R b/R/predict.R index 0c2eaa8..6cbfc67 100644 --- a/R/predict.R +++ b/R/predict.R @@ -261,7 +261,7 @@ setup_objects <- function(model_output, newdata = NULL, predict_iid = FALSE) { } else { coords <- getCoords(data) } - Amatrix <- fmesher::fm_evaluate(data$mesh, loc = as.matrix(coords))$A + Amatrix <- fmesher::fm_evaluator(data$mesh, loc = as.matrix(coords))$proj$A field_objects <- list(coords = coords, Amatrix = Amatrix) } else { field_objects <- NULL From bedcb09902836a0151441ae8b02c6e37bc4095cf Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 24 Oct 2023 15:16:32 +0100 Subject: [PATCH 41/55] deprectate ncores --- R/prepare_data.R | 7 +++++-- man/prepare_data.Rd | 4 ++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/R/prepare_data.R b/R/prepare_data.R index 2466398..716e294 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -35,7 +35,7 @@ #' @param mesh.args list of parameters that control the mesh structure with the same names as used by INLA. #' @param na.action logical. If TRUE, NAs in response will be removed, covariate NAs will be given the median value, aggregation NAs will be set to zero. Default FALSE (NAs in response or covariate data within the polygons will give errors). #' @param makeMesh logical. If TRUE, build INLA mesh, takes some time. Default TRUE. -#' @param ncores Number of cores used to perform covariate extraction. +#' @param ncores Deprecated. #' #' @return A list is returned of class \code{disag_data}. #' The functions \emph{summary}, \emph{print} and \emph{plot} can be used on \code{disag_data}. @@ -94,7 +94,10 @@ prepare_data <- function(polygon_shapefile, mesh.args = NULL, na.action = FALSE, makeMesh = TRUE, - ncores = 2) { + ncores = NULL) { + + if (!missing("ncores")) + warning("The ncores argument has been deprecated") stopifnot(inherits(polygon_shapefile, 'sf')) stopifnot(inherits(covariate_rasters, 'SpatRaster')) diff --git a/man/prepare_data.Rd b/man/prepare_data.Rd index c310421..150ca58 100644 --- a/man/prepare_data.Rd +++ b/man/prepare_data.Rd @@ -14,7 +14,7 @@ prepare_data( mesh.args = NULL, na.action = FALSE, makeMesh = TRUE, - ncores = 2 + ncores = NULL ) } \arguments{ @@ -34,7 +34,7 @@ prepare_data( \item{makeMesh}{logical. If TRUE, build INLA mesh, takes some time. Default TRUE.} -\item{ncores}{Number of cores used to perform covariate extraction.} +\item{ncores}{Deprecated.} \item{x}{sf object containing at least three columns: one with the geometried, one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}).} } From dc94f2876c1c6f05279c00790fb589e9eae76ce1 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 31 Oct 2023 09:39:27 +0000 Subject: [PATCH 42/55] fix prepare bug --- R/prepare_data.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/prepare_data.R b/R/prepare_data.R index 716e294..2d970ff 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -135,7 +135,7 @@ prepare_data <- function(polygon_shapefile, #merge to transfer area_id and then tidy up polygon_data$area_n <- 1:nrow(polygon_data) covariate_data <- merge(covariate_data, polygon_data, by.x = "ID", by.y = "area_n") - covariate_data <- covariate_data[ , !(names(covariate_data) %in% c("ID", "cell", "response", "N"))] + covariate_data <- covariate_data[ , !(names(covariate_data) %in% c("ID", "response", "N"))] colnames(covariate_data )[colnames(covariate_data ) == "area_id"] <- id_var polygon_data <- polygon_data[ , !(names(polygon_data) %in% c("area_n"))] From 4df4c4df7140d35b890f0bc1a77320418fe1e50b Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 31 Oct 2023 10:22:37 +0000 Subject: [PATCH 43/55] Update vignette --- vignettes/disaggregation.Rmd | 24 +++++++++--------------- 1 file changed, 9 insertions(+), 15 deletions(-) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index 1e74593..ace52cf 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -23,7 +23,7 @@ knitr::opts_chunk$set( isINLA <- requireNamespace('INLA', quietly = TRUE) ``` -The **disaggregation** package contains functions to run Bayesian disaggregation models. Aggregated response data over large heterogenous regions can be used alongside fine-scale covariate information to predict fine-scale response across the region. The github page for this package can be found [here](https://github.com/aknandi/disaggregation). +The **disaggregation** package contains functions to run Bayesian disaggregation models. Aggregated response data over large heterogeneous regions can be used alongside fine-scale covariate information to predict fine-scale response across the region. The github page for this package can be found [here](https://github.com/aknandi/disaggregation). Install **disaggregation** using: @@ -39,7 +39,7 @@ devtools::install_github("aknandi/disaggregation") The key functions are `prepare_data`, `fit_model` and `predict`. The `prepare_data` function takes the aggregated data and covariate data to be used in the model and produces an object to be use by `fit_model`. This functions runs the disaggregation model and the out can be passed to `predict` to produce fine-scale predicted maps of the response variable. -To use the disaggregation `prepare_data` function, you must have the aggregated data as a `SpatialPolygonDataFrame` object and a `RasterStack` of the covariate data to be used in the model. +To use the disaggregation `prepare_data` function, you must have the aggregated data as a `sf` object and a `SpatRaster` of the covariate data to be used in the model. ## Example @@ -62,7 +62,7 @@ ggplot() + geom_sf(data = df, aes(fill = cases / population)) ``` -Now we simulate two covariate rasters for the area of interest and make a `RasterStack`. They are simulated at the resolution of approximately 1km2. +Now we simulate two covariate rasters for the area of interest and make a two-layered `SpatRaster`. They are simulated at the resolution of approximately 1km2. ```{r, fig.show='hold'} @@ -103,22 +103,17 @@ pop_raster <- terra::rasterize(terra::vect(df), cov_stack, field = 'pop_per_cell ``` To correct small inconsistencies in the polygon geometry, we run the code below. -We are a bit inbetween frameworks at the moment. -Most of the package is built on sp. But as rgeos has been depreciated we have to -switch the polygons to simple features and back again. ```{r, fig.show='hold'} -polygon_data <- sf:::as_Spatial(st_buffer(st_as_sf(polygon_data), dist = 0)) - df <- sf::st_buffer(df, dist = 0) ``` -Now we have setup the data we can use the `prepare_data` function to create the objects needed to run the disaggregation model. The name of the response variable and id variable in the `SpatialPolygonsDataFrame` should be specified. +Now we have setup the data we can use the `prepare_data` function to create the objects needed to run the disaggregation model. The name of the response variable and id variable in the `sf` object should be specified. -The user can also control the parameters of the mesh that is used to create the spatial field. The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest and having a small region outside with a coarser mesh to avoid edge effects. The mesh parameters: `concave`, `convex` and `resolution` refer to the parameters used to create the mesh boundary using the [inla.noncovex.hull function](https://rdrr.io/github/andrewzm/INLA/man/inla.nonconvex.hull.html), while the mesh parameters `max.edge`, `cut` and `offset` refer to the parameters used to create the mesh using the [inla.mesh.2d function](https://rdrr.io/github/andrewzm/INLA/man/inla.mesh.2d.html). +The user can also control the parameters of the mesh that is used to create the spatial field. The mesh is created by finding a tight boundary around the polygon data, and creating a fine mesh within the boundary and a coarser mesh outside. This speeds up computation time by only having a very fine mesh within the area of interest and having a small region outside with a coarser mesh to avoid edge effects. The mesh parameters: `concave`, `convex` and `resolution` refer to the parameters used to create the mesh boundary using the [fm_nonconvex_hull_inla function](https://rdrr.io/cran/fmesher/man/fm_nonconvex_hull_inla.html), while the mesh parameters `max.edge`, `cut` and `offset` refer to the parameters used to create the mesh using the [fm_mesh_2d function](https://rdrr.io/cran/fmesher/man/fm_mesh_2d.html). ```{r, fig.show='hold', eval= isINLA} -data_for_model <- prepare_data(x = df, +data_for_model <- prepare_data(polygon_shapefile = df, covariate_rasters = cov_stack, aggregation_raster = pop_raster, response_var = 'cases', @@ -127,22 +122,21 @@ data_for_model <- prepare_data(x = df, offset = c(0.1, 0.5), max.edge = c(0.1, 0.2), resolution = 250), - na.action = TRUE, - ncores = 1) + na.action = TRUE) ``` ```{r, fig.show='hold', eval= isINLA} plot(data_for_model) ``` -Now have our data object we are ready to run the model. Here we can specify +Now we have our data object we are ready to run the model. Here we can specify the likelihood function as Gaussian, binomial or poisson, and we can specify the link function as logit, log or identity. The disaggregation model makes predictions at the pixel level: $link(pred_i) = \beta_0 + \beta X + GP(s_i) + u_i$ -where $X$ are the covariates, $GP$ is the gaussian random field and $u_i$ is the iid random effect. The pixel predictions are then aggregated to the polygon level using the weighted sum (via the aggregation raster, $agg_i$): +where $X$ are the covariates, $GP$ is the Gaussian random field and $u_i$ is the iid random effect. The pixel predictions are then aggregated to the polygon level using the weighted sum (via the aggregation raster, $agg_i$): $cases_j = \sum_{i \epsilon j} pred_i \times agg_i$ From ee0a1c07857538344744faa866ca5eedfd12b1f7 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 31 Oct 2023 11:16:20 +0000 Subject: [PATCH 44/55] update readme --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index cc08ed4..c0dd5ac 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ Overview ## Data preparation -Function prepare_data takes in SpatialPolygonDataFrame (response) and RasterStack (covariates) to produce a data structure required for the disaggregation modelling. Calls functions to extract covariate data, polygon data, aggregation (population data), match points to polygons and build an INLA mesh for the spatial field (build_mesh) +Function prepare_data takes in sf (response) and SpatRaster (covariates) to produce a data structure required for the disaggregation modelling. Calls functions to extract covariate data, polygon data, aggregation (population data), match points to polygons and build an INLA mesh for the spatial field (build_mesh) ```R data_for_model <- prepare_data(polygon_shapefile = shps, @@ -33,9 +33,9 @@ data_for_model <- prepare_data(polygon_shapefile = shps, ### Input data -* A RasterStack of covariate rasters to be used in the model (covariate_rasters) -* A SpatialPolygonsDataFrame (polygon_shapefile) containing at least two columns: one with the id for the polygons (id_var) and one with the response count data (response_var); for binomial data, i.e survey data, it can also contain a sample size column (sample_size_var). -* (Optional) Raster used to aggregate the pixel level predictions (aggregation_raster) to polygon level (usually population). If this is not supplied a uniform raster will be used +* A SpatRaster of covariate rasters to be used in the model (covariate_rasters) +* A sf (polygon_shapefile) containing at least two columns: one with the id for the polygons (id_var) and one with the response count data (response_var); for binomial data, i.e survey data, it can also contain a sample size column (sample_size_var). +* (Optional) SpatRaster used to aggregate the pixel level predictions (aggregation_raster) to polygon level (usually population). If this is not supplied a uniform raster will be used ### Controlling the mesh From ca2a9d1207f3515a74529757be0b54192993cf48 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 31 Oct 2023 11:21:35 +0000 Subject: [PATCH 45/55] update description --- DESCRIPTION | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index c24d931..8906a47 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,12 +1,13 @@ Package: disaggregation Type: Package Title: Disaggregation Modelling -Version: 0.2.1 +Version: 0.3.0 Authors@R: c( person("Anita", "Nandi", email = "anita.k.nandi@gmail.com", role = "aut", comment = c(ORCID = "0000-0002-5087-2494")), person("Tim", "Lucas", email = "timcdlucas@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-4694-8107")), person("Rohan", "Arambepola", email = "rarambepola@gmail.com", role = "aut"), - person("Andre", "Python", email = "python.andre@gmail.com", role = "aut", comment = c(ORCID = "0000-0001-8094-7226")) + person("Andre", "Python", email = "python.andre@gmail.com", role = "aut", comment = c(ORCID = "0000-0001-8094-7226")), + person("Simon", "Smart", email = "simon.smart@cantab.net", role = "ctb") ) Description: Fits disaggregation regression models using 'TMB' ('Template Model Builder'). When the response data are aggregated to polygon level but From ffd17111dfbe3989c492b47cc9fa63b00e9ec388 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 31 Oct 2023 12:24:11 +0000 Subject: [PATCH 46/55] adjust test-extract --- tests/testthat/test-extract.R | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/testthat/test-extract.R b/tests/testthat/test-extract.R index 74e16f5..84ba7b4 100644 --- a/tests/testthat/test-extract.R +++ b/tests/testthat/test-extract.R @@ -45,10 +45,10 @@ test_that("extractCoordsForMesh function behaves as it should", { skip_on_cran() - cov_data <- terra::extract(cov_stack, spdf, cells=TRUE, na.rm=TRUE, ID=TRUE) + cov_data <- terra::extract(cov_stack, spdf, cells = TRUE, na.rm = TRUE, ID = TRUE) names(cov_data)[1] <- 'area_id' - result <- extractCoordsForMesh(cov_stack, cov_data$cellid) + result <- extractCoordsForMesh(cov_stack, cov_data$cell) result2 <- extractCoordsForMesh(cov_stack) From 7ef6593a4b6af7d5f0dcbae06ce6628ab91663df Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 31 Oct 2023 12:29:17 +0000 Subject: [PATCH 47/55] replace skip_if_not_installed('INLA') --- tests/testthat/test-fit-model.R | 1 + tests/testthat/test-plotting.R | 3 +++ tests/testthat/test-predict-model.R | 4 ++++ tests/testthat/test-prepare-data.R | 1 - 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/tests/testthat/test-fit-model.R b/tests/testthat/test-fit-model.R index 4c7a384..df4146d 100644 --- a/tests/testthat/test-fit-model.R +++ b/tests/testthat/test-fit-model.R @@ -3,6 +3,7 @@ context("Fitting model") test_that("disag_model produces errors when expected", { + skip_if_not_installed('INLA') skip_on_cran() expect_error(disag_model(list())) diff --git a/tests/testthat/test-plotting.R b/tests/testthat/test-plotting.R index 306317c..ab0e0e6 100644 --- a/tests/testthat/test-plotting.R +++ b/tests/testthat/test-plotting.R @@ -15,6 +15,7 @@ test_that("Check plot_polygon_data function works as expected", { test_that("Check plot.disag.data function works as expected", { + skip_if_not_installed('INLA') skip_on_cran() test_data2 <- prepare_data(polygon_shapefile = spdf2, @@ -43,6 +44,7 @@ test_that("Check plot.disag.data function works as expected", { test_that("Check plot.disag_model function works as expected", { + skip_if_not_installed('INLA') skip_on_cran() fit_result <- disag_model(test_data, iterations = 10) @@ -64,6 +66,7 @@ test_that("Check plot.disag_model function works as expected", { test_that("Check plot.disag_prediction function works as expected", { + skip_if_not_installed('INLA') skip_on_cran() fit_result <- disag_model(test_data, iterations = 1000, diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index ae95e13..33ac3ef 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -2,6 +2,7 @@ context("Predict model") test_that("Check predict.disag_model function works as expected", { + skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, iterations = 1000, @@ -95,6 +96,7 @@ test_that("Check predict.disag_model function works as expected", { test_that("Check predict.disag_model function works with newdata", { + skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, field = FALSE, iid = TRUE, iterations = 100, @@ -141,6 +143,7 @@ test_that("Check predict.disag_model function works with newdata", { test_that('Check that check_newdata works', { + skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, field = FALSE, iterations = 100) @@ -213,6 +216,7 @@ test_that('Check that setup_objects works', { test_that('Check that predict_single_raster works', { + skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, iterations = 100, diff --git a/tests/testthat/test-prepare-data.R b/tests/testthat/test-prepare-data.R index bef721e..f71c4dd 100644 --- a/tests/testthat/test-prepare-data.R +++ b/tests/testthat/test-prepare-data.R @@ -2,7 +2,6 @@ context("Preparing data") test_that("Check prepare_data function works as expected", { - skip_if_not_installed('INLA') skip_on_cran() result <- prepare_data(polygon_shapefile = spdf, From 17aa2ca57a53b71505d301c4b7ffdc59be700ad5 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Wed, 1 Nov 2023 09:43:11 +0000 Subject: [PATCH 48/55] add missing skip inla and update prepare docs --- R/prepare_data.R | 2 +- man/prepare_data.Rd | 4 ++-- tests/testthat/test-fit-model.R | 5 +---- 3 files changed, 4 insertions(+), 7 deletions(-) diff --git a/R/prepare_data.R b/R/prepare_data.R index 2d970ff..dc5029b 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -26,7 +26,7 @@ #' will automatically deal with NAs. It removes any polygons that have NAs as a response, sets any aggregation pixels with NA to zero #' and sets covariate NAs pixels to the median value for the that covariate. #' -#' @param x sf object containing at least three columns: one with the geometried, one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}). +#' @param polygon_shapefile sf object containing at least three columns: one with the geometried, one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}). #' @param covariate_rasters SpatRaster of covariate rasters to be used in the model. #' @param aggregation_raster SpatRaster to aggregate pixel level predictions to polygon level e.g. population to aggregate prevalence. If this is not supplied a uniform raster will be used. #' @param id_var Name of column in sf object with the polygon id. diff --git a/man/prepare_data.Rd b/man/prepare_data.Rd index 150ca58..5b5d5ab 100644 --- a/man/prepare_data.Rd +++ b/man/prepare_data.Rd @@ -18,6 +18,8 @@ prepare_data( ) } \arguments{ +\item{polygon_shapefile}{sf object containing at least three columns: one with the geometried, one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}).} + \item{covariate_rasters}{SpatRaster of covariate rasters to be used in the model.} \item{aggregation_raster}{SpatRaster to aggregate pixel level predictions to polygon level e.g. population to aggregate prevalence. If this is not supplied a uniform raster will be used.} @@ -35,8 +37,6 @@ prepare_data( \item{makeMesh}{logical. If TRUE, build INLA mesh, takes some time. Default TRUE.} \item{ncores}{Deprecated.} - -\item{x}{sf object containing at least three columns: one with the geometried, one with the id for the polygons (\emph{id_var}) and one with the response count data (\emph{response_var}); for binomial data, i.e survey data, it can also contain a sample size column (\emph{sample_size_var}).} } \value{ A list is returned of class \code{disag_data}. diff --git a/tests/testthat/test-fit-model.R b/tests/testthat/test-fit-model.R index df4146d..a3a7774 100644 --- a/tests/testthat/test-fit-model.R +++ b/tests/testthat/test-fit-model.R @@ -17,6 +17,7 @@ test_that("disag_model produces errors when expected", { test_that("disag_model behaves as expected", { + skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, iterations = 100, iid = FALSE) @@ -26,12 +27,8 @@ test_that("disag_model behaves as expected", { expect_equal(length(result$sd_out$par.fixed), terra::nlyr(test_data$covariate_rasters) + 4) expect_equal(unique(names(result$sd_out$par.random)), c("nodemean")) - }) - - - test_that("disag_model with 1 covariate behaves as expected", { skip_if_not_installed('INLA') From fa9f1d552522bf99ae9c7170de276772a077d81a Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Wed, 1 Nov 2023 16:04:16 +0000 Subject: [PATCH 49/55] another missing INLA skip --- tests/testthat/test-predict-model.R | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/testthat/test-predict-model.R b/tests/testthat/test-predict-model.R index 33ac3ef..d2cd848 100644 --- a/tests/testthat/test-predict-model.R +++ b/tests/testthat/test-predict-model.R @@ -170,6 +170,7 @@ test_that('Check that check_newdata works', { test_that('Check that setup_objects works', { + skip_if_not_installed('INLA') skip_on_cran() result <- disag_model(test_data, iterations = 100, From e867911c322e3735a4c2f6542f50979831505437 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 7 Nov 2023 08:30:49 +0000 Subject: [PATCH 50/55] remove INLA requirement from prepare data --- R/prepare_data.R | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/R/prepare_data.R b/R/prepare_data.R index dc5029b..f95cd31 100644 --- a/R/prepare_data.R +++ b/R/prepare_data.R @@ -174,13 +174,8 @@ prepare_data <- function(polygon_shapefile, startendindex <- getStartendindex(covariate_data, polygon_data, id_var = id_var) if(makeMesh) { - if(!requireNamespace('INLA', quietly = TRUE)) { - mesh <- NULL - message("Cannot build mesh as INLA is not installed. If you need a spatial field in your model, you must install INLA.") - } else { mesh <- build_mesh(polygon_shapefile, mesh.args) - } - } else { + } else { mesh <- NULL message("A mesh is not being built. You will not be able to run a spatial model without a mesh.") } From 0e2031c73a1a60dd2de715d00b63c80a3e826926 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 7 Nov 2023 09:07:38 +0000 Subject: [PATCH 51/55] remove foreach dependency --- DESCRIPTION | 1 - 1 file changed, 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 8906a47..85351f8 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -19,7 +19,6 @@ Encoding: UTF-8 LazyData: true RoxygenNote: 7.2.3 Imports: - foreach, splancs, Matrix, stats, From 8e0e63a244daeca635b541a095ab9d147a563692 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 7 Nov 2023 09:15:32 +0000 Subject: [PATCH 52/55] fix long lines in docs --- R/fit_model.R | 6 ++++-- man/fit_model.Rd | 3 ++- man/make_model_object.Rd | 3 ++- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/R/fit_model.R b/R/fit_model.R index 47dacab..e448890 100644 --- a/R/fit_model.R +++ b/R/fit_model.R @@ -94,7 +94,8 @@ #' # Create raster stack #' r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) #' terra::ext(r) <- terra::ext(spdf) -#' r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) +#' r[] <- sapply(1:terra::ncell(r), function(x){ +#' rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3))} #' r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) #' terra::ext(r2) <- terra::ext(spdf) #' r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) @@ -291,7 +292,8 @@ disag_model <- function(data, #' # Create raster stack #' r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) #' terra::ext(r) <- terra::ext(spdf) -#' r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3)) +#' r[] <- sapply(1:terra::ncell(r), function(x){ +#' rnorm(1, ifelse(x %% n_pixels_per_side != 0, x %% n_pixels_per_side, n_pixels_per_side), 3))} #' r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) #' terra::ext(r2) <- terra::ext(spdf) #' r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) diff --git a/man/fit_model.Rd b/man/fit_model.Rd index f06c963..771fac7 100644 --- a/man/fit_model.Rd +++ b/man/fit_model.Rd @@ -133,7 +133,8 @@ spdf <- sf::st_sf(response_df, geometry = polys) # Create raster stack r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) terra::ext(r) <- terra::ext(spdf) -r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x \%\% n_pixels_per_side != 0, x \%\% n_pixels_per_side, n_pixels_per_side), 3)) +r[] <- sapply(1:terra::ncell(r), function(x){ +rnorm(1, ifelse(x \%\% n_pixels_per_side != 0, x \%\% n_pixels_per_side, n_pixels_per_side), 3))} r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) terra::ext(r2) <- terra::ext(spdf) r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) diff --git a/man/make_model_object.Rd b/man/make_model_object.Rd index 1f12a97..25022c7 100644 --- a/man/make_model_object.Rd +++ b/man/make_model_object.Rd @@ -113,7 +113,8 @@ spdf <- sf::st_sf(response_df, geometry = polys) # Create raster stack r <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) terra::ext(r) <- terra::ext(spdf) -r[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ifelse(x \%\% n_pixels_per_side != 0, x \%\% n_pixels_per_side, n_pixels_per_side), 3)) +r[] <- sapply(1:terra::ncell(r), function(x){ +rnorm(1, ifelse(x \%\% n_pixels_per_side != 0, x \%\% n_pixels_per_side, n_pixels_per_side), 3))} r2 <- terra::rast(ncol=n_pixels_per_side, nrow=n_pixels_per_side) terra::ext(r2) <- terra::ext(spdf) r2[] <- sapply(1:terra::ncell(r), function(x) rnorm(1, ceiling(x/n_pixels_per_side), 3)) From 166b08509e3724718aed564777afde47ddd19202 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 7 Nov 2023 09:39:22 +0000 Subject: [PATCH 53/55] improve vignette plot --- vignettes/disaggregation.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/disaggregation.Rmd b/vignettes/disaggregation.Rmd index ace52cf..9892b98 100644 --- a/vignettes/disaggregation.Rmd +++ b/vignettes/disaggregation.Rmd @@ -57,8 +57,8 @@ polygons <- sf::st_as_sf(NYleukemia$spatial.polygon) df <- cbind(polygons, NYleukemia$data) -ggplot() + geom_sf(data = df, aes(fill = cases / population)) +ggplot() + geom_sf(data = df, aes(fill = cases / population)) + scale_fill_viridis_c(lim = c(0, 0.003)) ``` From 3b80068ec213e373c1c73f911af597b09cf13b24 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 7 Nov 2023 12:00:13 +0000 Subject: [PATCH 54/55] add missing suggest --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 85351f8..c49bce4 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -35,6 +35,7 @@ Imports: Additional_repositories: https://inla.r-inla-download.org/R/stable Suggests: testthat, + INLA, knitr, rmarkdown, SpatialEpi From d8a5e1ba7b38ccbb4df3628c5bcf2eefb6596bc8 Mon Sep 17 00:00:00 2001 From: simon-smart88 Date: Tue, 7 Nov 2023 13:42:47 +0000 Subject: [PATCH 55/55] tweak priors --- tests/testthat/test-summary.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-summary.R b/tests/testthat/test-summary.R index 56961be..0538441 100644 --- a/tests/testthat/test-summary.R +++ b/tests/testthat/test-summary.R @@ -74,7 +74,7 @@ test_that("Check summary.disag_predictions function works as expected", { prior_rho_prob = 0.01, prior_sigma_max = 0.1, prior_sigma_prob = 0.01, - prior_iideffect_sd_max = 0.0001, + prior_iideffect_sd_max = 0.00001, prior_iideffect_sd_prob = 0.01)) pred <- predict(result)